rm(list=ls())
path = getwd()
setwd(path)
source("func.R")
source("gen.R")

file_content = "citeseer/citeseer.content.txt"
file_cites = "citeseer/citeseer.cites.txt"
W_df = read.csv(file_content, header = FALSE)
ID = W_df[,1]
names(ID) = 1:length(ID)
Edge = read.csv(file_cites, header = FALSE)
tempE = matrix(0,nrow = nrow(Edge),ncol = ncol(Edge))
for (e in 1:nrow(Edge)){
  temp1 = which(ID==as.character(Edge[e,1]))
  temp2 = which(ID==as.character(Edge[e,2]))
  if (temp1<temp2){
    tempE[e,1] = temp1;tempE[e,2] = temp2
    }else{
      tempE[e,2] = temp1;tempE[e,1] = temp2
  }
}
E=plyr::count(tempE, names(tempE))[,1:2]
E = as.matrix(E)
a = plyr::count(E, names(E))

W_origin = data.matrix(W_df[ ,2:(ncol(W_df)-1)])
W = W_origin[,which((colSums(W_origin)<200)==T)]
W = W_origin[,which((colSums(W_origin)>50)==T)]
D = nrow(W);D
V = ncol(W);V
adj=matrix(0,D,D)
for (e in 1:nrow(E)){
  adj[E[e,1],E[e,2]]=1
  adj[E[e,2],E[e,1]]=1
}
lst=which(rowSums(adj)==0) # ??????????
Ebynode = vector("list", D)
for (i in 1:D) Ebynode[[i]] = rep(0,0)
for (i in 1:nrow(E)){
  Ebynode[[E[i,1]]] = c(Ebynode[[E[i,1]]], E[i,2])
  Ebynode[[E[i,2]]] = c(Ebynode[[E[i,2]]], E[i,1])
}

D1 = length(which(W_df =="Agents"))
D2 = length(which(W_df =="DB"))
D3 = length(which(W_df =="AI"))
D4 = length(which(W_df =="IR"))
D5 = length(which(W_df =="ML"))
D6 = length(which(W_df =="HCI"))
miss = which(rowSums(W)==0);miss
for (i in 1:length(miss)){
  if (length(Ebynode[[miss[i]]])==1){
    W[miss[i],] = W[Ebynode[[miss[i]]],]+1
  }else{
    W[miss[i],] = round(colMeans(W[Ebynode[[miss[i]]],]))+1
  }
}
groupCiteseer = NULL
labelreal = rep(0, nrow(W_df))
classes = c('Agents','AI','DB','IR','ML','HCI')
for (i in 1:length(classes)){
  groupCiteseer[[classes[i]]] = which((W_df[,ncol(W_df)]==classes[i])==T)
  labelreal[which(W_df[,3] == classes[i])] = i
}

# labelreal = c(rep(1,length(groupCiteseer[[1]])),rep(2,length(groupCiteseer[[2]])),rep(3,length(groupCiteseer[[3]])),
#               rep(4,length(groupCiteseer[[4]])),rep(5,length(groupCiteseer[[5]])),rep(6,length(groupCiteseer[[6]])))
net.data = W/matrix(rowSums(W),D,V)
net.dis = as.matrix(dist(net.data))

xi=1/net.dis[E]^3
xi[which(xi==Inf)]=max(xi[which(xi!=Inf)])
rownames(W) = 1:nrow(W)
# DM, lambda = 106.1683
cv.result = net.cv.realdata(W, adj, xi, net.dis, D1, D2, D3, method = 'DM', itvl = 0.98, testlambda = 120, rho = 1500, dataset = 'Citeseer')
result = admm.norm(W, E, Ebynode, xi,lambda= 106.1683, rho = 1500, method='DM',s = 1.e-5)

grps = clus.into.grp(result$z,E,Ebynode,result$lambda,del.vec = NULL,s = result$s)
groups = grps$group
labels=label(groups)
groupdata = groupCiteseer
lst=NULL; lcount=NULL
for (j in 1:length(groups)){if (length(groups[[j]])>=30){lst=c(lst,j)}};lst


