SNAP Library 6.0, User Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
cmty.cpp
Go to the documentation of this file.
1 // Community detection algorithms
3 namespace TSnap {
4 
5 
6 namespace TSnapDetail {
7 
8 // GIRVAN-NEWMAN algorithm
9 // 1. The betweenness of all existing edges in the network is calculated first.
10 // 2. The edge with the highest betweenness is removed.
11 // 3. The betweenness of all edges affected by the removal is recalculated.
12 // 4. Steps 2 and 3 are repeated until no edges remain.
13 // Girvan M. and Newman M. E. J., Community structure in social and biological networks, Proc. Natl. Acad. Sci. USA 99, 7821-7826 (2002)
14 // Keep removing edges from Graph until one of the connected components of Graph splits into two.
15 void CmtyGirvanNewmanStep(PUNGraph& Graph, TIntV& Cmty1, TIntV& Cmty2) {
16  TIntPrFltH BtwEH;
17  TBreathFS<PUNGraph> BFS(Graph);
18  Cmty1.Clr(false); Cmty2.Clr(false);
19  while (true) {
20  TSnap::GetBetweennessCentr(Graph, BtwEH);
21  BtwEH.SortByDat(false);
22  if (BtwEH.Empty()) { return; }
23  const int NId1 = BtwEH.GetKey(0).Val1;
24  const int NId2 = BtwEH.GetKey(0).Val2;
25  Graph->DelEdge(NId1, NId2);
26  BFS.DoBfs(NId1, true, false, NId2, TInt::Mx);
27  if (BFS.GetHops(NId1, NId2) == -1) { // two components
28  TSnap::GetNodeWcc(Graph, NId1, Cmty1);
29  TSnap::GetNodeWcc(Graph, NId2, Cmty2);
30  return;
31  }
32  }
33 }
34 
35 // Connected components of a graph define clusters
36 // OutDegH and OrigEdges stores node degrees and number of edges in the original graph
37 double _GirvanNewmanGetModularity(const PUNGraph& G, const TIntH& OutDegH, const int& OrigEdges, TCnComV& CnComV) {
38  TSnap::GetWccs(G, CnComV); // get communities
39  double Mod = 0;
40  for (int c = 0; c < CnComV.Len(); c++) {
41  const TIntV& NIdV = CnComV[c]();
42  double EIn = 0, EEIn = 0;
43  for (int i = 0; i < NIdV.Len(); i++) {
44  TUNGraph::TNodeI NI = G->GetNI(NIdV[i]);
45  EIn += NI.GetOutDeg();
46  EEIn += OutDegH.GetDat(NIdV[i]);
47  }
48  Mod += (EIn-EEIn*EEIn / (2.0*OrigEdges));
49  }
50  if (Mod == 0) { return 0; }
51  else { return Mod / (2.0*OrigEdges); }
52 }
53 
54 void MapEquationNew2Modules(PUNGraph& Graph, TIntH& Module, TIntFltH& Qi, int a, int b) {
55  float InModule = 0.0, OutModule = 0.0, Val;
56  int Mds[2] = { a, b };
57  for (int i = 0; i<2; i++) {
58  InModule = 0.0, OutModule = 0.0;
59  if (Qi.IsKey(Mds[i])) {
60  int CentralModule = Mds[i];
61 
62  //printf("central module: %i\n ",CentralModule);
63 
64  TIntV newM;
65  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
66  if (Module.GetDat(NI.GetId()) == CentralModule)
67  newM.Add(NI.GetId());
68  }
69 
70  for (int j = 0; j<newM.Len(); j++) {
71  //int len1 = newM.Len();
72 
73  //int c = CentralModule;
74  for (int k = 0; k<Graph->GetNI(newM[j]).GetDeg(); k++) {
75  //int len = Graph->GetNI(newM[j]).GetDeg();
76 
77  int ids = Graph->GetNI(newM[j]).GetId();
78  int idd = Graph->GetNI(newM[j]).GetNbrNId(k);
79  int ms = Module.GetDat(ids);
80  int md = Module.GetDat(idd);
81  //int c = CentralModule;
82 
83  if (ms == md) {
84  InModule += 1.0;
85  //printf("IN: \t%i - %i; moduls: %i - %i\n", ids, idd, ms, md);
86  } else {
87  OutModule += 1.0;
88  //printf("OUT: \t%i - %i; moduls: %i - %i\n", ids, idd, ms, md);
89  }
90  }
91  }
92  if (InModule >1) InModule = InModule / 2;
93 
94  //printf("\n");
95 
96  Val = 0.0;
97  if (InModule + OutModule > 0) {
98  Val = OutModule / (InModule + OutModule);
99  }
100  //int control = Mds[i];
101  Qi.AddDat(Mds[i], Val);
102  } else {
103  //int control = Mds[i];
104  Qi.AddDat(Mds[i], 0.0);
105  }
106  }
107 }
108 
109 double Equation(TIntFltH& PAlpha, double& SumPAlphaLogPAlpha, TIntFltH& Qi){
110  double SumPAlpha = 1.0, SumQi = 0.0, SumQiLogQi = 0.0;
111  double SumQiSumPAlphaLogQiSumPAlpha = 0.0, logqi = 0.0, qi = 0.0;
112  for (int i = 0; i<Qi.Len(); i++) {
113  SumQi += Qi[i];
114  qi = Qi[i];
115  if (qi != 0) {
116  logqi = log(qi);
117  } else {
118  logqi = 0;
119  }
120  SumQiLogQi += Qi[i] * logqi;
121  SumQiSumPAlphaLogQiSumPAlpha += (Qi[i] + SumPAlpha)*log(Qi[i] + SumPAlpha);
122  }
123  return (SumQi*log(SumQi) - 2 * SumQiLogQi - SumPAlphaLogPAlpha +
124  SumQiSumPAlphaLogQiSumPAlpha);
125 }
126 
127 bool edgeIntersect(PNGraph& graph, TIntV& a, TIntV& b) {
128  for (int i = 0; i<a.Len(); i++) {
129  for (int j = 0; j<b.Len(); j++) {
130  if (graph->IsEdge(a[i], b[j]))
131  return true;
132  }
133  }
134 
135  return false;
136 }
137 
139  int count = 0;
140  for (int i = 0; i<a.Len(); i++) {
141  for (int j = 0; j<b.Len(); j++) {
142  if (a[i] == b[j])
143  count++;
144  }
145  }
146 
147  return count;
148 }
149 
150 bool inComp(PNGraph& g1, PNGraph& Graph, TIntH& inCompCount, int id, int neigh) {
151  bool out = true;
152 
153  int inCompN = 0;
154  int inComp = 0;
155 
156  if (g1->IsNode(id) && g1->IsNode(neigh)) {
157  int deg = g1->GetNI(id).GetDeg();
158  int neighDeg = g1->GetNI(neigh).GetDeg();
159 
160 
161  if (inCompCount.IsKey(id)) {
162  inComp = inCompCount.GetDat(id);
163  }
164  if (inCompCount.IsKey(neigh)) {
165  inCompN = inCompCount.GetDat(neigh);
166  }
167 
168  if (inCompN < neighDeg && inComp < deg && (!g1->IsNode(neigh) || Graph->GetNI(neigh).GetDeg() - neighDeg == 0)) {
169  inCompCount.AddDat(neigh, ++inCompN);
170  inCompCount.AddDat(id, ++inComp);
171  out = true;
172  } else {
173  out = false;
174  }
175  }
176  return out;
177 }
178 
180  for (int i = 0; i < a.Len(); i++) {
181  bool diff = false;
182  for (int j = 0; j < b.Len(); j++) {
183  if (a[i] == a[j]) {
184  diff = true;
185  break;
186  }
187  }
188  if (!diff) {
189  b.Add(a[i]);
190  break;
191  }
192  }
193 }
194 
195 bool chekIfCrossing(TIntV& a, TIntH& t, int f, int l, int TP) {
196  bool after = false;
197  bool before = false;
198  for (int i = 0; i < a.Len(); i++) {
199  if (t.GetDat(a[i]) < TP)
200  before = true;
201  if (t.GetDat(a[i]) > TP)
202  after = true;
203  }
204 
205  if (TP == f)
206  before = true;
207 
208  if (TP == l)
209  after = true;
210 
211  return (after && before);
212 }
213 
214 double InfomapOnlineIncrement(PUNGraph& Graph, int n1, int n2, TIntFltH& PAlpha, double& SumPAlphaLogPAlpha, TIntFltH& Qi, TIntH& Module, int& Br) {
215  // NOW NEW stuff add another additional iteration:
216 
217  bool n1new = false;
218  bool n2new = false;
219 
220  // add edge
221  if (!Graph->IsNode(n1)){
222  Graph->AddNode(n1);
223  n1new = true;
224  }
225 
226  if (!Graph->IsNode(n2)) {
227  Graph->AddNode(n2);
228  n2new = true;
229  }
230 
231  Graph->AddEdge(n1, n2);
232 
233  int e = Graph->GetEdges();
234 
235  // get previous alpha for 27
236  double oldAlphaN1 = 0.0;
237  double oldAlphaN2 = 0.0;
238 
239  if (!n1new)
240  oldAlphaN1 = PAlpha.GetDat(n1);
241 
242  if (!n2new)
243  oldAlphaN2 = PAlpha.GetDat(n2);
244 
245  // update alpha for 27
246  TUNGraph::TNodeI node = Graph->GetNI(n1);
247  int nodeDeg = node.GetDeg();
248  float d = ((float)nodeDeg / (float)(2 * e));
249  PAlpha.AddDat(n1, d);
250 
251  //update alphasum
252  SumPAlphaLogPAlpha = SumPAlphaLogPAlpha - oldAlphaN1 + d*log(d);
253 
254  if (n1new) {
255  Module.AddDat(n1, Br);
256  Qi.AddDat(Br, 1.0);
257  Br++;
258  }
259 
260  // update alpha for 28
261  node = Graph->GetNI(n2);
262  nodeDeg = node.GetDeg();
263  d = ((float)nodeDeg / (float)(2 * e));
264  PAlpha.AddDat(n2, d);
265 
266  //update alphasum
267  SumPAlphaLogPAlpha = SumPAlphaLogPAlpha - oldAlphaN2 + d*log(d);
268 
269  //add module
270  if (n2new) {
271  Module.AddDat(n2, Br);
272  Qi.AddDat(Br, 1.0);
273  Br++;
274  }
275 
276  // Start
277 
278  double MinCodeLength = TSnapDetail::Equation(PAlpha, SumPAlphaLogPAlpha, Qi);
279  double PrevIterationCodeLength = 0.0;
280 
281  do {
282  PrevIterationCodeLength = MinCodeLength;
283  int id[2] = { n1, n2 };
284  for (int k = 0; k<2; k++) {
285  for (int i = 0; i<Graph->GetNI(id[k]).GetDeg(); i++) {
286 
287  int OldModule = Module.GetDat(id[k]);
288  int NewModule = Module.GetDat(Graph->GetNI(id[k]).GetNbrNId(i));
289 
290  Module.AddDat(id[k], NewModule);
291 
292  TSnapDetail::MapEquationNew2Modules(Graph, Module, Qi, OldModule, NewModule);
293  double NewCodeLength = TSnapDetail::Equation(PAlpha, SumPAlphaLogPAlpha, Qi);
294  if (NewCodeLength<MinCodeLength) {
295  MinCodeLength = NewCodeLength;
296  OldModule = NewModule;
297  }
298  else {
299  Module.AddDat(id[k], OldModule);
300  }
301  }
302  }
303  } while (MinCodeLength<PrevIterationCodeLength);
304 
305  return MinCodeLength;
306 }
307 
308 } // namespace TSnapDetail
309 
310 // Maximum modularity clustering by Girvan-Newman algorithm (slow)
311 // Girvan M. and Newman M. E. J., Community structure in social and biological networks, Proc. Natl. Acad. Sci. USA 99, 7821-7826 (2002)
312 double CommunityGirvanNewman(PUNGraph& Graph, TCnComV& CmtyV) {
313  PUNGraph LocalGraph = TSnap::ConvertGraph<PUNGraph>(Graph, false);
314 
315  TIntH OutDegH;
316  const int NEdges = LocalGraph->GetEdges();
317  for (TUNGraph::TNodeI NI = LocalGraph->BegNI(); NI < LocalGraph->EndNI(); NI++) {
318  OutDegH.AddDat(NI.GetId(), NI.GetOutDeg());
319  }
320  double BestQ = -1; // modularity
321  TCnComV CurCmtyV;
322  CmtyV.Clr();
323  TIntV Cmty1, Cmty2;
324  while (true) {
325  TSnapDetail::CmtyGirvanNewmanStep(LocalGraph, Cmty1, Cmty2);
326  const double Q = TSnapDetail::_GirvanNewmanGetModularity(LocalGraph, OutDegH, NEdges, CurCmtyV);
327  //printf("current modularity: %f\n", Q);
328  if (Q > BestQ) {
329  BestQ = Q;
330  CmtyV.Swap(CurCmtyV);
331  }
332  if (Cmty1.Len() == 0 || Cmty2.Len() == 0) { break; }
333  }
334  return BestQ;
335 }
336 
337 // Rosvall-Bergstrom community detection algorithm based on information theoretic approach.
338 // See: Rosvall M., Bergstrom C. T., Maps of random walks on complex networks reveal community structure, Proc. Natl. Acad. Sci. USA 105, 1118-1123 (2008)
339 double Infomap(PUNGraph& Graph, TCnComV& CmtyV){
340 
341  TIntFltH PAlpha; // probability of visiting node alpha
342  TIntH Module; // module of each node
343  TIntFltH Qi; // probability of leaving each module
344 
345  double SumPAlphaLogPAlpha = 0.0;
346  int Br = 0;
347  const int e = Graph->GetEdges();
348 
349  // initial values
350  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
351  int nodeId = NI.GetId();
352  int nodeDeg = NI.GetDeg();
353  float d = ((float)nodeDeg / (float)(2 * e));
354  PAlpha.AddDat(nodeId, d);
355  SumPAlphaLogPAlpha += d*log(d);
356  Module.AddDat(nodeId, Br);
357  Qi.AddDat(Br, 1.0);
358  Br += 1;
359  }
360 
361  double MinCodeLength = TSnapDetail::Equation(PAlpha, SumPAlphaLogPAlpha, Qi);
362  double NewCodeLength, PrevIterationCodeLength = 0.0;
363  int OldModule, NewModule;
364 
365  TIntV nodes;
366  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++)
367  nodes.Add(NI.GetId());
368 
369  do {
370  PrevIterationCodeLength = MinCodeLength;
371  TRnd rnd;
372  rnd.Randomize();
373  nodes.Shuffle(rnd);
374  for (int ndcounter = 0; ndcounter<nodes.Len(); ndcounter++) {
375  MinCodeLength = TSnapDetail::Equation(PAlpha, SumPAlphaLogPAlpha, Qi);
376  int nodeId = nodes[ndcounter];
377  TUNGraph::TNodeI NI = Graph->GetNI(nodeId);
378  for (int i = 0; i<NI.GetDeg(); i++) {
379 
380  OldModule = Module.GetDat(nodeId);
381  NewModule = Module.GetDat(NI.GetNbrNId(i));
382 
383  if (OldModule != NewModule){
384 
385  Module.AddDat(nodeId, NewModule);
386 
387  TSnapDetail::MapEquationNew2Modules(Graph, Module, Qi, OldModule, NewModule);
388  NewCodeLength = TSnapDetail::Equation(PAlpha, SumPAlphaLogPAlpha, Qi);
389  if (NewCodeLength<MinCodeLength) {
390  MinCodeLength = NewCodeLength;
391  OldModule = NewModule;
392  }
393  else {
394  Module.AddDat(nodeId, OldModule);
395  }
396  }
397  }
398  }
399  } while (MinCodeLength<PrevIterationCodeLength);
400 
401  Module.SortByDat(true);
402 
403  int Mod = -1;
404  for (int i = 0; i<Module.Len(); i++) {
405  if (Module[i]>Mod){
406  Mod = Module[i];
407  TCnCom t;
408  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++){
409  if (Module.GetDat(NI.GetId()) == Mod)
410  t.Add(NI.GetId());
411  }
412  CmtyV.Add(t);
413  }
414  }
415 
416  return MinCodeLength;
417 }
418 
419 double InfomapOnline(PUNGraph& Graph, int n1, int n2, TIntFltH& PAlpha, double& SumPAlphaLogPAlpha, TIntFltH& Qi, TIntH& Module, int& Br, TCnComV& CmtyV) {
420 
421  double MinCodeLength = TSnapDetail::InfomapOnlineIncrement(Graph, n1, n2, PAlpha, SumPAlphaLogPAlpha, Qi, Module, Br);
422 
423  Module.SortByDat(true);
424 
425  int Mod = -1;
426  for (int i = 0; i<Module.Len(); i++) {
427  if (Module[i]>Mod){
428  Mod = Module[i];
429  TCnCom t;
430  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++){
431  if (Module.GetDat(NI.GetId()) == Mod)
432  t.Add(NI.GetId());
433  }
434  CmtyV.Add(t);
435  }
436  }
437 
438  return MinCodeLength;
439 }
440 
441 void CmtyEvolutionFileBatchV(TStr InFNm, TIntIntVH& sizesContV, TIntIntVH& cContV, TIntIntVH& edges, double alpha, double beta, int CmtyAlg) {
442  TIntIntHH sizesCont;
443  TIntIntHH cCont;
444  CmtyEvolutionFileBatch(InFNm, sizesCont, cCont, edges, alpha, beta, CmtyAlg);
445 
446  TIntV uniqueId;
447  for (int i = 0; i < cCont.Len(); i++){
448  for (THashKeyDatI<TInt, TInt> it = cCont[i].BegI(); !it.IsEnd(); it++){
449  if (!uniqueId.IsIn(it.GetKey()))
450  uniqueId.Add(it.GetKey());
451  }
452  }
453 
454  for (int j = 0; j<uniqueId.Len(); j++)
455  {
456  TIntV cV;
457  for (int i = 0; i<cCont.Len(); i++)
458  {
459  if (cCont[i].IsKey(uniqueId[j]))
460  cV.Add(cCont[i].GetDat(uniqueId[j]));
461  else
462  cV.Add(-1);
463  }
464  cContV.AddDat(uniqueId[j], cV);
465  }
466 
467  TIntV uniqueC;
468  for (int i = 0; i < sizesCont.Len(); i++){
469  for (THashKeyDatI<TInt, TInt> it = sizesCont[i].BegI(); !it.IsEnd(); it++){
470  if (!uniqueC.IsIn(it.GetKey()))
471  uniqueC.Add(it.GetKey());
472  }
473  }
474 
475  for (int j = 0; j<uniqueC.Len(); j++)
476  {
477  TIntV cV;
478  for (int i = 0; i<sizesCont.Len(); i++)
479  {
480  if (sizesCont[i].IsKey(uniqueC[j]))
481  cV.Add(sizesCont[i].GetDat(uniqueC[j]));
482  else
483  cV.Add(0);
484  }
485  sizesContV.AddDat(uniqueC[j], cV);
486  }
487 
488 }
489 
490 void CmtyEvolutionFileBatch(TStr InFNm, TIntIntHH& sizesCont, TIntIntHH& cCont, TIntIntVH& edges, double alpha, double beta, int CmtyAlg) {
491 
492 
493  // reading folder with networks and calculating core/periphery
494  int br = 0;
495  TIntIntH prev;
496  TIntH prev_sizes;
497 
498  TSsParser Ss(InFNm, ssfWhiteSep, true, false, true);
499  Ss.Next();
500  //int internal_year_counter = 0;
501  // variable for delimiter between networks
502  TStr Marker;
503  // defining variables for node ids and starting year
504  int SrcNId, DstNId; // , t = 1970;
505 
506  // temporal container for edges
507  TIntIntVH edges_;
508 
509  while (!Ss.Eof()) {
510 
511  //printf("%i\n", t);
512  Marker = Ss.GetLnStr();
513  // get the year from the network seperator
514  //t = Marker.GetSubStr(1, 4).GetInt();
515 
516  if (Marker.GetCh(0) == '#'){
517 
518  Ss.Next();
519  PUNGraph Graph = PUNGraph::TObj::New();
520  do{
521  if (!Ss.GetInt(0, SrcNId) || !Ss.GetInt(1, DstNId)) {
522  if (!Ss.Eof()){
523  Ss.Next();
524  if (!Ss.Eof())
525  Marker = Ss.GetLnStr();
526  }
527  continue;
528  }
529  if (!Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
530  if (!Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
531  Graph->AddEdge(SrcNId, DstNId);
532  Ss.Next();
533  if (!Ss.Eof())
534  Marker = Ss.GetLnStr();
535  } while (Marker.GetCh(0) != '#' && !Ss.Eof());
536 
537 
538  if (Graph->GetNodes()>0) {
539  // WORK
540 
541  TSnap::DelSelfEdges(Graph);
542  TCnComV CmtyV;
543  //double Q = 0.0;
544  TStr CmtyAlgStr;
545  if (CmtyAlg == 1) {
546  CmtyAlgStr = "Girvan-Newman";
547  //Q = TSnap::CommunityGirvanNewman(Graph, CmtyV);
548  }
549  else if (CmtyAlg == 2) {
550  CmtyAlgStr = "Clauset-Newman-Moore";
551  //Q = TSnap::CommunityCNM(Graph, CmtyV);
552  }
553  else if (CmtyAlg == 3) {
554  CmtyAlgStr = "Infomap";
555  //Q = TSnap::Infomap(Graph, CmtyV);
556  }
557  else { Fail; }
558 
559  TIntIntHH distCont;
560 
561  if (br == 0) {
562  prev.Clr();
563  //int size = 0;
564  for (int c = 0; c < CmtyV.Len(); c++) {
565  for (int i = 0; i < CmtyV[c].Len(); i++){
566  prev.AddDat(CmtyV[c][i].Val, c);
567  }
568  //int s = CmtyV[c].Len();
569  prev_sizes.AddDat(c, CmtyV[c].Len());
570  }
571  }
572  else {
573 
574  // containers for statistics
575 
576  //TIntFltHH stat1;
577  //TIntIntHH stat2;
578  TIntH dist;
579  TIntH map;
580 
581  int first_new_c_id = -1;
582 
583  // getting first free id for a new community
584  for (THashKeyDatI<TInt, TInt> it = prev_sizes.BegI(); !it.IsEnd(); it++)
585  if (it.GetKey() > first_new_c_id)
586  first_new_c_id = it.GetKey();
587  if (CmtyV.Len() - 1>first_new_c_id)
588  first_new_c_id = CmtyV.Len() - 1;
589  first_new_c_id++;
590 
591  for (int c = 0; c < CmtyV.Len(); c++) {
592 
593  TIntV stat;
594  TIntFltH statH1;
595  TIntFltH statH2;
596 
597  // initialize distributions to 0
598  for (THashKeyDatI<TInt, TInt> it = prev_sizes.BegI(); !it.IsEnd(); it++)
599  dist.AddDat(it.GetKey(), 0);
600  //for new nodes
601  dist.AddDat(-1, 0);
602 
603  for (int i = 0; i < CmtyV[c].Len(); i++) {
604  int id = CmtyV[c][i].Val;
605  int prev_comm = -1;
606  if (prev.IsKey(id))
607  prev_comm = prev.GetDat(CmtyV[c][i].Val);
608  stat.Add(prev_comm);
609  int pre_val = dist.GetDat(prev_comm);
610  dist.AddDat(prev_comm, pre_val + 1);
611  }
612 
613  double sumstat2 = 0;
614  for (THashKeyDatI<TInt, TInt> it = dist.BegI(); !it.IsEnd(); it++) {
615 
616  int k = it.GetKey();
617  int d = it.GetDat();
618  if (d > 0){
619  if (prev_sizes.IsKey(it.GetKey())){
620 
621  double stat1_ = (double)d / (double)prev_sizes.GetDat(k);
622  statH1.AddDat(k, stat1_);
623  }
624  double stat2_ = (double)d / (double)CmtyV[c].Len();
625  statH2.AddDat(k, stat2_);
626  sumstat2 += stat2_;
627 
628  TIntV edge;
629  edge.Add(k);
630  edge.Add(c);
631  edge.Add(d);
632  edge.Add(br - 1);
633  edge.Add(br);
634  edges_.AddDat(edges_.Len() + 1, edge);
635  }
636 
637  // adding edges between two communities in two neighbouring time points;
638 
639 
640  if (sumstat2 > 0.98) break;
641  }
642 
643  int n_of_c_greater_than_half = 0;
644  int id_of_c_greater_than_half = -1;
645  TIntV ids_of_c_greater_than_half;
646 
647  for (THashKeyDatI<TInt, TFlt> it = statH1.BegI(); !it.IsEnd(); it++){
648  if (it.GetDat()>alpha){
649  id_of_c_greater_than_half = it.GetKey();
650  ids_of_c_greater_than_half.Add(it.GetKey());
651  n_of_c_greater_than_half++;
652  }
653  }
654 
655  // if this community is build of majority of one previous community and the other parts of the community are fractions of other communities smaller than half, the new community gets its label
656  if (n_of_c_greater_than_half == 1){
657  map.AddDat(c, id_of_c_greater_than_half);
658  }
659  else{
660  int h2part_id = -2;
661  for (int i = 0; i<ids_of_c_greater_than_half.Len(); i++){
662  double H2 = statH2.GetDat(ids_of_c_greater_than_half[i]);
663  if (H2>beta){
664  h2part_id = ids_of_c_greater_than_half[i];
665  }
666  }
667  if (h2part_id != -2)
668  map.AddDat(c, h2part_id);
669  else{
670  map.AddDat(c, first_new_c_id);
671  first_new_c_id++;
672  }
673  }
674 
675  distCont.AddDat(c, dist);
676 
677  //stat1.AddDat(c,statH1);
678  //stat2.AddDat(c,statH2);
679 
680  }
681 
682 
683  prev.Clr();
684  prev_sizes.Clr();
685  for (int c = 0; c < CmtyV.Len(); c++){
686  for (int i = 0; i < CmtyV[c].Len(); i++){
687  prev.AddDat(CmtyV[c][i].Val, map.GetDat(c));
688  }
689  //int s = CmtyV[c].Len();
690  prev_sizes.AddDat(map.GetDat(c), CmtyV[c].Len());
691  }
692 
693  // filing the edges container - the key thing is the map(c)
694  for (THashKeyDatI<TInt, TIntV> it = edges_.BegI(); !it.IsEnd(); it++){
695  TIntV edgesV;
696  int a = it.GetDat()[0];
697  int b = it.GetDat()[1];
698  int v = it.GetDat()[2];
699  int d = it.GetDat()[3];
700  int e = it.GetDat()[4];
701  edgesV.Add(map.GetDat(b));
702  edgesV.Add(a);
703  edgesV.Add(v);
704  edgesV.Add(d);
705  edgesV.Add(e);
706  if (a != -1)
707  edges.AddDat(edges.Len(), edgesV);
708  }
709  edges_.Clr();
710 
711 
712  }
713 
714  sizesCont.AddDat(br, prev_sizes);
715  cCont.AddDat(br, prev);
716  br++;
717  // WORK - END
718  }
719  }
720  else Ss.Next();
721  }
722 
723 }
724 
725 void CmtyEvolutionJson(TStr& Json, TIntIntVH& sizesContV, TIntIntVH& cContV, TIntIntVH& edges){
727  // This function creates a JSON string with communities and edges for community evolution visualization using D3.js
729 
730  // writing json label for edges
731  Json.InsStr(Json.Len(), "{\n\"edges\":[\n");
732 
733  TInt br = 0;
734  // iterating hash of vector of edges and writing into string
735  for (THashKeyDatI<TInt, TIntV> it = edges.BegI(); !it.IsEnd(); it++)
736  {
737  // first node
738  TInt n1 = it.GetDat()[1];
739  // second node
740  TInt n2 = it.GetDat()[0];
741  // edge weight
742  TInt w = it.GetDat()[2];
743  // start time point
744  TInt t0 = it.GetDat()[3];
745  // end time point
746  TInt t1 = it.GetDat()[4];
747 
748  if (br>0)
749  Json.InsStr(Json.Len(), ",");
750 
751  // writing to string
752  Json.InsStr(Json.Len(), "{\"n1\":"); Json.InsStr(Json.Len(), n1.GetStr());
753  Json.InsStr(Json.Len(), ", \"n2\":"); Json.InsStr(Json.Len(), n2.GetStr());
754  Json.InsStr(Json.Len(), ", \"w\":"); Json.InsStr(Json.Len(), w.GetStr());
755  Json.InsStr(Json.Len(), ", \"t0\":"); Json.InsStr(Json.Len(), t0.GetStr());
756  Json.InsStr(Json.Len(), ", \"t1\":"); Json.InsStr(Json.Len(), t1.GetStr());
757  Json.InsStr(Json.Len(), " }\n");
758  br++;
759  }
760 
761  // json label for communities
762  Json.InsStr(Json.Len(), "],\n\"communities\":[\n");
763 
764  br = 0;
765  // printing communities into json file
766  for (int i = 0; i < sizesContV[0].Len(); i++)
767  {
768  for (THashKeyDatI<TInt, TIntV> it = sizesContV.BegI(); !it.IsEnd(); it++)
769  {
770  // id of community
771  TInt id = it.GetKey();
772  // community size
773  TInt size = it.GetDat()[i];
774  // time
775  TInt j = i;
776 
777  // if the community has size greater than 0, output it to json string
778  if (size > 0) {
779  if (br>0)
780  Json.InsStr(Json.Len(), ",");
781 
782  TInt size = it.GetDat()[i];
783  Json.InsStr(Json.Len(), "{\"id\":"); Json.InsStr(Json.Len(), id.GetStr());
784  Json.InsStr(Json.Len(), ", \"size\":"); Json.InsStr(Json.Len(), size.GetStr());
785  Json.InsStr(Json.Len(), ", \"t\":"); Json.InsStr(Json.Len(), j.GetStr());
786  Json.InsStr(Json.Len(), " }\n");
787 
788  br++;
789  }
790  }
791  }
792 
793  // printing communities into json file - alternative ordering
794  /*
795  for (THashKeyDatI<TInt, TIntV> it = sizesContV.BegI(); !it.IsEnd(); it++)
796  {
797  TInt id = it.GetKey();
798  int len = it.GetDat().Len();
799  for (int i=0; i < it.GetDat().Len(); i++)
800  {
801  TInt size = it.GetDat()[i];
802  TInt j = i;
803  if (size > 0) {
804 
805  if(br>0)
806  Json.InsStr(Json.Len(),",");
807 
808  TInt size = it.GetDat()[i];
809 
810  Json.InsStr(Json.Len(),"{\"id\":"); Json.InsStr(Json.Len(),id.GetStr());
811  Json.InsStr(Json.Len(),", \"size\":"); Json.InsStr(Json.Len(),size.GetStr());
812  Json.InsStr(Json.Len(),", \"t\":"); Json.InsStr(Json.Len(),j.GetStr());
813  Json.InsStr(Json.Len()," }\n");
814 
815  br++;
816 
817  }
818 
819  }
820  }
821  */
822 
823  Json.InsStr(Json.Len(), "]\n}");
824 
825 }
826 
827 TStr CmtyTest(TStr InFNm, int CmtyAlg){
828 
829  TIntIntVH sizesContV;
830  TIntIntVH cContV;
831  TIntIntVH edges;
832  double alpha = 0.5;
833  double beta = 0.75;
834  CmtyEvolutionFileBatchV(InFNm, sizesContV, cContV, edges, alpha, beta, CmtyAlg);
835  TStr out;
836  //int a = sizesContV.Len();
837  //int b = cContV.Len();
838  //int c = edges.Len();
839  CmtyEvolutionJson(out, sizesContV, cContV, edges);
840 
841  return out;
842 }
843 
844 void ReebSimplify(PNGraph& Graph, TIntH& t, int e, PNGraph& gFinal, TIntH& tFinal, bool collapse) {
845  TIntIntVH components;
846  TIntIntVH ct;
847 
848  int newId = 0; //get first new free id;
849 
850  // gett first and last t
851  int first = 429496729;
852  int last = -1;
853 
854  // smarter way of determining focus time points
855  TIntV timePoints;
856 
857  // get first and last time point
858  for (THashKeyDatI<TInt, TInt> it = t.BegI(); !it.IsEnd(); it++) {
859  if (it.GetDat()<first)
860  first = it.GetDat();
861  if (it.GetDat()>last)
862  last = it.GetDat();
863  }
864 
865  // adding focus timepoints
866  // this can be put in the previous (first, last time point detection) iteration if breaking borders is not an issue
867  for (THashKeyDatI<TInt, TInt> it = t.BegI(); !it.IsEnd(); it++) {
868  if (it.GetDat() - (e / 2) >= first)
869  timePoints.Add(it.GetDat() - (e / 2) /*- 0.1*/);
870  timePoints.Add(it.GetDat());
871  if (it.GetDat() + (e / 2) <= last)
872  timePoints.Add(it.GetDat() + (e / 2) /*+ 0.1*/);
873  }
874 
875 
876  //iterate each time point
877  for (int i = 0; i<timePoints.Len(); i++) {
878 
879  int focusTimePoint = timePoints[i];
880 
881  TIntV fnodes; // all the nodes int the focus in that step
882 
883  // getting nodes in focus -- in epsilon
884  for (THashKeyDatI<TInt, TInt> it = t.BegI(); !it.IsEnd(); it++) {
885  if ((it.GetDat() <= focusTimePoint + (e / 2)) && (it.GetDat() >= focusTimePoint - (e / 2)))
886  fnodes.Add(it.GetKey());
887  }
888 
889  // create graph from nodes in focus
890  PNGraph g1 = TNGraph::New();
891  for (int i = 0; i<fnodes.Len(); i++) {
892  if (!g1->IsNode(fnodes[i]))
893  g1->AddNode(fnodes[i]);
894  // lower star
895  for (int j = 0; j<Graph->GetNI(fnodes[i]).GetInDeg(); j++) {
896  int NeighId = Graph->GetNI(fnodes[i]).GetInNId(j);
897  if (t.GetDat(NeighId)<focusTimePoint - (e / 2)) {
898 
899  }
900  else {
901  if (!g1->IsNode(NeighId))
902  g1->AddNode(NeighId);
903  g1->AddEdge(NeighId, fnodes[i]);
904  }
905  }
906  // upper star
907  for (int j = 0; j<Graph->GetNI(fnodes[i]).GetOutDeg(); j++) {
908  int NeighId = Graph->GetNI(fnodes[i]).GetOutNId(j);
909  if (t.GetDat(NeighId)>focusTimePoint + (e / 2)) {
910 
911  }
912  else {
913  if (!g1->IsNode(NeighId))
914  g1->AddNode(NeighId);
915  g1->AddEdge(fnodes[i], NeighId);
916  }
917  }
918  }
919 
920  // getting results from commponents detection and recording elements of components and timestamps of components
921  TCnComV CnComV;
922  GetWccs(g1, CnComV);
923  TIntV communitiesAtT;
924  for (int cc = 0; cc < CnComV.Len(); cc++) {
925  components.AddDat(newId, CnComV[cc].NIdV);
926  communitiesAtT.Add(newId);
927  newId++;
928  }
929  if (CnComV.Len() > 0)
930  ct.AddDat(focusTimePoint, communitiesAtT);
931  } // end iterate each node
932 
933  // connecting neighbouring components
935  THashKeyDatI<TInt, TIntV> prelast = ct.EndI()--;
936  prelast--;
937  while (it < prelast) {
938  TIntV cms0;
939  TIntV cms1;
940  int focusTimePoint;
941  int focusTimePoint1;
942  focusTimePoint = it.GetKey();
943  cms0 = it.GetDat();
944  it++;
945  focusTimePoint1 = it.GetKey();
946  cms1 = it.GetDat();
947  if (cms0.Len()>0 && cms1.Len() > 0) {
948  for (int i = 0; i < cms0.Len(); i++) {
949  for (int j = 0; j < cms1.Len(); j++) {
950  TIntV ids0 = components.GetDat(cms0[i]);
951  TIntV ids1 = components.GetDat(cms1[j]);
952  if (ids0.IntrsLen(ids1) > 0 || TSnapDetail::edgeIntersect(Graph, ids0, ids1)) {
953  if (!gFinal->IsNode(cms0[i])) {
954  gFinal->AddNode(cms0[i]);
955  tFinal.AddDat(cms0[i], focusTimePoint);
956  }
957  if (!gFinal->IsNode(cms1[j])) {
958  gFinal->AddNode(cms1[j]);
959  tFinal.AddDat(cms1[j], focusTimePoint1);
960  }
961  gFinal->AddEdge(cms0[i], cms1[j]);
962  }
963  }
964  }
965  }
966  }// end connecting components
967 
968  // collapsing chains
969  if (collapse) {
970  for (TNGraph::TNodeI NI = gFinal->BegNI(); NI < gFinal->EndNI(); NI++) {
971  if (NI.GetInDeg() == 1 && NI.GetOutDeg() == 1)
972  if (gFinal->GetNI(NI.GetInNId(0)).GetOutDeg() == 1 && gFinal->GetNI(NI.GetOutNId(0)).GetInDeg() == 1)
973  {
974  gFinal->AddEdge(NI.GetInNId(0), NI.GetOutNId(0));
975  gFinal->DelEdge(NI.GetInNId(0), NI.GetId());
976  tFinal.DelKey(NI.GetId());
977  gFinal->DelNode(NI.GetId());
978  }
979  }
980  }// end collapsing
981 
982 }
983 
984 void ReebRefine(PNGraph& Graph, TIntH& t, int e, PNGraph& gFinal, TIntH& tFinal, bool collapse) {
985  TIntIntVH components;
986  TIntIntVH ct;
987 
988  int newId = 0; //get first new free id;
989 
990  // gett first and last t
991  int first = 429496729;
992  int last = -1;
993 
994  // smarter way of determining focus time points
995  TIntV timePoints;
996 
997  // get first and last time point
998  for (THashKeyDatI<TInt, TInt> it = t.BegI(); !it.IsEnd(); it++) {
999  if (it.GetDat() < first)
1000  first = it.GetDat();
1001  if (it.GetDat() > last)
1002  last = it.GetDat();
1003  }
1004 
1005  // adding focus timepoints
1006  // this can be put in the previous (first, last time point detection) iteration if breaking borders is not an issue
1007  for (THashKeyDatI<TInt, TInt> it = t.BegI(); !it.IsEnd(); it++) {
1008  if (it.GetDat() - (e / 2) >= first)
1009  timePoints.Add(it.GetDat() - (e / 2) /*- 0.1*/);
1010  timePoints.Add(it.GetDat());
1011  if (it.GetDat() + (e / 2) <= last)
1012  timePoints.Add(it.GetDat() + (e / 2) /*+ 0.1*/);
1013  }
1014 
1015  TIntV timePointsUnique;
1016  int prevtp = -1;
1017  //get unique time points
1018  for (int i = 0; i < timePoints.Len(); i++){
1019  if (timePoints[i] > prevtp)
1020  timePointsUnique.Add(timePoints[i]);
1021  prevtp = timePoints[i];
1022  }
1023 
1024  timePoints.Clr();
1025  timePoints = timePointsUnique;
1026 
1027  //iterate each time point
1028  for (int i = 0; i < timePoints.Len(); i++) {
1029 
1030  int focusTimePoint = timePoints[i];
1031 
1032  TIntV fnodes; // all the nodes int the focus in that step
1033 
1034  // getting nodes in focus -- in epsilon
1035  for (THashKeyDatI<TInt, TInt> it = t.BegI(); !it.IsEnd(); it++) {
1036  if ((it.GetDat() <= focusTimePoint + (e / 2)) && (it.GetDat() >= focusTimePoint - (e / 2)))
1037  fnodes.Add(it.GetKey());
1038  }
1039 
1040  // create graph from nodes in focus
1041  PNGraph g1 = TNGraph::New();
1042  for (int i = 0; i < fnodes.Len(); i++) {
1043  if (!g1->IsNode(fnodes[i]))
1044  g1->AddNode(fnodes[i]);
1045  // lower star
1046  for (int j = 0; j < Graph->GetNI(fnodes[i]).GetInDeg(); j++) {
1047  int NeighId = Graph->GetNI(fnodes[i]).GetInNId(j);
1048  if (t.GetDat(NeighId) < focusTimePoint - (e / 2)) {
1049 
1050  }
1051  else {
1052  if (!g1->IsNode(NeighId))
1053  g1->AddNode(NeighId);
1054  g1->AddEdge(NeighId, fnodes[i]);
1055  }
1056  }
1057  // upper star
1058  for (int j = 0; j < Graph->GetNI(fnodes[i]).GetOutDeg(); j++) {
1059  int NeighId = Graph->GetNI(fnodes[i]).GetOutNId(j);
1060  if (t.GetDat(NeighId) > focusTimePoint + (e / 2)) {
1061 
1062  }
1063  else {
1064  if (!g1->IsNode(NeighId))
1065  g1->AddNode(NeighId);
1066  g1->AddEdge(fnodes[i], NeighId);
1067  }
1068  }
1069  }
1070 
1071  // getting results from commponents detection and recording elements of components and timestamps of components
1072  TIntH inCompCount;
1073  TIntIntVH comps;
1074  int compBr = 0;
1075  TIntH nn_nodes;
1076 
1077  int FTP = focusTimePoint;
1078  TIntH TEdges;
1079 
1080  for (TNGraph::TNodeI NI = g1->BegNI(); NI < g1->EndNI(); NI++) {
1081 
1082 
1083  int FTPNode = NI.GetId();
1084  TNGraph::TNodeI GNI = Graph->GetNI(FTPNode);
1085  int FI, FO, RI, RO, I, O;
1086 
1087  RI = NI.GetInDeg();
1088  RO = NI.GetOutDeg();
1089 
1090  FI = Graph->GetNI(FTPNode).GetInDeg() - RI;
1091  FO = Graph->GetNI(FTPNode).GetOutDeg() - RO;
1092 
1093  if (focusTimePoint + (e / 2) == t.GetDat(NI.GetId())) { // if its on the right edge only in degree is observed
1094  RO = FO = 0;
1095  }
1096  if (focusTimePoint - (e / 2) == t.GetDat(NI.GetId())) { // if its on the left edge only out degree is observed
1097  RI = FI = 0;
1098  }
1099 
1100  I = RI + FI;
1101  O = RO + FO;
1102 
1103  // counting edges imidiately after time point
1104  int temp = 0;
1105  if (TEdges.IsKey(FTP))
1106  temp = TEdges.GetDat(FTP);
1107  TEdges.AddDat(FTP, O + temp);
1108 
1109  // FIND ELEMENTS
1110 
1111  // n - n,
1112  if (I > 1 && O > 1) {
1113  // number of nodes is in our out degree
1114  int nn = I;
1115  if (O > I)
1116  nn = O;
1117 
1118  TIntV nds;
1119  nds.Add(FTPNode);
1120  for (int i = 0; i < I; i++) {
1121  nds.Add(GNI.GetInNId(i));
1122  }
1123 
1124  for (int i = 0; i < O; i++) {
1125  nds.Add(GNI.GetOutNId(i));
1126  }
1127 
1128  for (int j = 0; j < nn; j++) {
1129  nn_nodes.AddDat(compBr);
1130  comps.AddDat(compBr, nds);
1131  compBr++;
1132  }
1133  }
1134 
1135  // 1 - n
1136  else if (I == 1 && O > 1) {
1137  for (int i = 0; i < O; i++) {
1138  TIntV nds;
1139  nds.Add(FTPNode);
1140  nds.Add(GNI.GetInNId(0));
1141  nds.Add(GNI.GetOutNId(i));
1142  comps.AddDat(compBr, nds);
1143  compBr++;
1144  }
1145  }
1146 
1147  // n - 1
1148  else if (I > 1 && O == 1) {
1149  for (int i = 0; i < I; i++) {
1150  TIntV nds;
1151  nds.Add(FTPNode);
1152  nds.Add(GNI.GetOutNId(0));
1153  nds.Add(GNI.GetInNId(i));
1154  comps.AddDat(compBr, nds);
1155  compBr++;
1156  }
1157  }
1158 
1159  // 0 - n
1160  else if (I == 0 && O > 1) {
1161  for (int i = 0; i < O; i++) {
1162  TIntV nds;
1163  nds.Add(FTPNode);
1164  nds.Add(GNI.GetOutNId(i));
1165  comps.AddDat(compBr, nds);
1166  compBr++;
1167  }
1168  }
1169 
1170  // n - 0
1171  else if (I > 1 && O == 0) {
1172  for (int i = 0; i < I; i++) {
1173  TIntV nds;
1174  nds.Add(FTPNode);
1175  nds.Add(GNI.GetInNId(i));
1176  comps.AddDat(compBr, nds);
1177  compBr++;
1178  }
1179  }
1180 
1181  // 1 - 1
1182  else if (I == 1 && O == 1) {
1183  TIntV nds;
1184  nds.Add(FTPNode);
1185  nds.Add(GNI.GetOutNId(0));
1186  nds.Add(GNI.GetInNId(0));
1187  comps.AddDat(compBr, nds);
1188  compBr++;
1189  }
1190 
1191  // 0 - 1
1192  else if (I == 0 && O == 1) {
1193  TIntV nds;
1194  nds.Add(FTPNode);
1195  nds.Add(GNI.GetOutNId(0));
1196  comps.AddDat(compBr, nds);
1197  compBr++;
1198  }
1199 
1200  // 1 - 0
1201  else if (I == 1 && O == 0) {
1202  TIntV nds;
1203  nds.Add(FTPNode);
1204  nds.Add(GNI.GetInNId(0));
1205  comps.AddDat(compBr, nds);
1206  compBr++;
1207  }
1208 
1209 
1210 
1211  } // end iterate each node
1212 
1213  // connecting inside of epsilon
1214 
1215  TIntIntVH elements;
1216  TIntH banned;
1217  for (int cc0 = 0; cc0 < comps.Len(); cc0++) {
1218  for (int cc1 = cc0; cc1 < comps.Len(); cc1++) {
1219  int smaller = comps[cc0].Len();
1220  int smaller_id = cc0;
1221  if (cc0 != cc1) {
1222  if (comps[cc1].Len() < smaller) {
1223  smaller = comps[cc1].Len();
1224  smaller_id = cc1;
1225  }
1226  int vi = TSnapDetail::vectorIntersect(comps[cc0], comps[cc1]);
1227  if (vi == smaller && !nn_nodes.IsKey(smaller_id)){
1228  banned.AddDat(smaller_id);
1229  }
1230  /*else if (smaller > 2 && vi == smaller - 1 && !nn_nodes.IsKey(smaller_id)) {
1231  TSnapDetail::transitiveTransform(comps[cc0], comps[cc1]);
1232  banned.AddDat(cc0);
1233  }*/
1234  }
1235  }
1236  }
1237 
1238  // add transitivity connection
1239 
1240  /*
1241  int max_out_tp = -1;
1242  int max_out = -1;
1243  for (THashKeyDatI<TInt, TInt> it = TEdges.BegI(); !it.IsEnd(); it++) {
1244  if (it.GetDat() > max_out) {
1245  max_out = it.GetDat();
1246  max_out_tp = it.GetKey();
1247  }
1248  }
1249  */
1250  for (int cc0 = 0; cc0 < comps.Len(); cc0++) {
1251  if (!banned.IsKey(cc0) /*&& TSnapDetail::chekIfCrossing(comps[cc0], t, first, last, max_out_tp)*/)
1252  elements.AddDat(cc0, comps[cc0]);
1253  }
1254 
1255 
1256  TIntV communitiesAtT;
1257  for (int cc = 0; cc < elements.Len(); cc++) {
1258  components.AddDat(newId, elements[cc]);
1259  communitiesAtT.Add(newId);
1260  newId++;
1261  }
1262  if (elements.Len() > 0)
1263  ct.AddDat(focusTimePoint, communitiesAtT);
1264 
1265  } // FOR
1266 
1267  // connecting neighbouring components
1268  THashKeyDatI<TInt, TIntV> it = ct.BegI();
1269  THashKeyDatI<TInt, TIntV> prelast = ct.EndI()--;
1270  prelast--;
1271  while (it < prelast) {
1272  TIntV cms0;
1273  TIntV cms1;
1274  int focusTimePoint;
1275  int focusTimePoint1;
1276  focusTimePoint = it.GetKey();
1277  cms0 = it.GetDat();
1278  it++;
1279  focusTimePoint1 = it.GetKey();
1280  cms1 = it.GetDat();
1281  if (cms0.Len() > 0 && cms1.Len() > 0) {
1282  for (int i = 0; i < cms0.Len(); i++) {
1283  for (int j = 0; j < cms1.Len(); j++) {
1284  TIntV ids0 = components.GetDat(cms0[i]);
1285  TIntV ids1 = components.GetDat(cms1[j]);
1286  int smaller = ids0.Len();
1287  if (ids1.Len() < smaller)
1288  smaller = ids1.Len();
1289 
1290  if (TSnapDetail::vectorIntersect(ids0, ids1) == smaller || (smaller > 2 && TSnapDetail::vectorIntersect(ids0, ids1) == (smaller -1 ))) {
1291  if (!gFinal->IsNode(cms0[i])) {
1292  gFinal->AddNode(cms0[i]);
1293  tFinal.AddDat(cms0[i], focusTimePoint);
1294  }
1295  if (!gFinal->IsNode(cms1[j])) {
1296  gFinal->AddNode(cms1[j]);
1297  tFinal.AddDat(cms1[j], focusTimePoint1);
1298  }
1299  gFinal->AddEdge(cms0[i], cms1[j]);
1300  }
1301  }
1302  }
1303  }
1304  }// end connecting components
1305 
1306  // collapsing chains
1307  if (collapse) {
1308  for (TNGraph::TNodeI NI = gFinal->BegNI(); NI < gFinal->EndNI(); NI++) {
1309  if (NI.GetInDeg() == 1 && NI.GetOutDeg() == 1)
1310  if (gFinal->GetNI(NI.GetInNId(0)).GetOutDeg() == 1 && gFinal->GetNI(NI.GetOutNId(0)).GetInDeg() == 1)
1311  {
1312  gFinal->AddEdge(NI.GetInNId(0), NI.GetOutNId(0));
1313  gFinal->DelEdge(NI.GetInNId(0), NI.GetId());
1314  tFinal.DelKey(NI.GetId());
1315  gFinal->DelNode(NI.GetId());
1316  }
1317  }
1318  }// end collapsing
1319 
1320 }
1321 
1322 namespace TSnapDetail {
1327 private:
1328  struct TCmtyDat {
1329  double DegFrac;
1331  int MxQId;
1332  TCmtyDat() : MxQId(-1) { }
1333  TCmtyDat(const double& NodeDegFrac, const int& OutDeg) :
1334  DegFrac(NodeDegFrac), NIdQH(OutDeg), MxQId(-1) { }
1335  void AddQ(const int& NId, const double& Q) {
1336  NIdQH.AddDat(NId, Q);
1337  if (MxQId == -1 || NIdQH[MxQId]<Q) { MxQId = NIdQH.GetKeyId(NId); }
1338  }
1339  void UpdateMaxQ() {
1340  MxQId = -1;
1341  for (int i = -1; NIdQH.FNextKeyId(i);) {
1342  if (MxQId == -1 || NIdQH[MxQId]< NIdQH[i]) { MxQId = i; }
1343  }
1344  }
1345  void DelLink(const int& K) {
1346  const int NId = GetMxQNId();
1347  NIdQH.DelKey(K); if (NId == K) { UpdateMaxQ(); }
1348  }
1349  int GetMxQNId() const { return NIdQH.GetKey(MxQId); }
1350  double GetMxQ() const { return NIdQH[MxQId]; }
1351  };
1352 private:
1356  double Q;
1357 public:
1358  TCNMQMatrix(const PUNGraph& Graph) : CmtyQH(Graph->GetNodes()),
1359  MxQHeap(Graph->GetNodes()), CmtyIdUF(Graph->GetNodes()) {
1360  Init(Graph);
1361  }
1362  void Init(const PUNGraph& Graph) {
1363  const double M = 0.5 / Graph->GetEdges(); // 1/2m
1364  Q = 0.0;
1365  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
1366  CmtyIdUF.Add(NI.GetId());
1367  const int OutDeg = NI.GetOutDeg();
1368  if (OutDeg == 0) { continue; }
1369  TCmtyDat& Dat = CmtyQH.AddDat(NI.GetId(), TCmtyDat(M * OutDeg, OutDeg));
1370  for (int e = 0; e < NI.GetOutDeg(); e++) {
1371  const int DstNId = NI.GetOutNId(e);
1372  const double DstMod = 2 * M * (1.0 - OutDeg * Graph->GetNI(DstNId).GetOutDeg() * M);
1373  Dat.AddQ(DstNId, DstMod);
1374  }
1375  Q += -1.0*TMath::Sqr(OutDeg*M);
1376  if (NI.GetId() < Dat.GetMxQNId()) {
1377  MxQHeap.Add(TFltIntIntTr(Dat.GetMxQ(), NI.GetId(), Dat.GetMxQNId()));
1378  }
1379  }
1380  MxQHeap.MakeHeap();
1381  }
1383  while (true) {
1384  if (MxQHeap.Empty()) { break; }
1385  const TFltIntIntTr TopQ = MxQHeap.PopHeap();
1386  if (!CmtyQH.IsKey(TopQ.Val2) || !CmtyQH.IsKey(TopQ.Val3)) { continue; }
1387  if (TopQ.Val1 != CmtyQH.GetDat(TopQ.Val2).GetMxQ() && TopQ.Val1 != CmtyQH.GetDat(TopQ.Val3).GetMxQ()) { continue; }
1388  return TopQ;
1389  }
1390  return TFltIntIntTr(-1, -1, -1);
1391  }
1392 
1393  bool MergeBestQ() {
1394  const TFltIntIntTr TopQ = FindMxQEdge();
1395  if (TopQ.Val1 <= 0.0) { return false; }
1396  // joint communities
1397  const int I = TopQ.Val3;
1398  const int J = TopQ.Val2;
1399  CmtyIdUF.Union(I, J); // join
1400  Q += TopQ.Val1;
1401  TCmtyDat& DatJ = CmtyQH.GetDat(J);
1402  { TCmtyDat& DatI = CmtyQH.GetDat(I);
1403  DatI.DelLink(J); DatJ.DelLink(I);
1404  for (int i = -1; DatJ.NIdQH.FNextKeyId(i); ) {
1405  const int K = DatJ.NIdQH.GetKey(i);
1406  TCmtyDat& DatK = CmtyQH.GetDat(K);
1407  double NewQ = DatJ.NIdQH[i];
1408  if (DatI.NIdQH.IsKey(K)) { NewQ = NewQ + DatI.NIdQH.GetDat(K); DatK.DelLink(I); } // K connected to I and J
1409  else { NewQ = NewQ - 2 * DatI.DegFrac*DatK.DegFrac; } // K connected to J not I
1410  DatJ.AddQ(K, NewQ);
1411  DatK.AddQ(J, NewQ);
1412  MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J, K), TMath::Mx(J, K)));
1413  }
1414  for (int i = -1; DatI.NIdQH.FNextKeyId(i); ) {
1415  const int K = DatI.NIdQH.GetKey(i);
1416  if (!DatJ.NIdQH.IsKey(K)) { // K connected to I not J
1417  TCmtyDat& DatK = CmtyQH.GetDat(K);
1418  const double NewQ = DatI.NIdQH[i] - 2 * DatJ.DegFrac*DatK.DegFrac;
1419  DatJ.AddQ(K, NewQ);
1420  DatK.DelLink(I);
1421  DatK.AddQ(J, NewQ);
1422  MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J, K), TMath::Mx(J, K)));
1423  }
1424  }
1425  DatJ.DegFrac += DatI.DegFrac; }
1426  if (DatJ.NIdQH.Empty()) { CmtyQH.DelKey(J); } // isolated community (done)
1427  CmtyQH.DelKey(I);
1428  return true;
1429  }
1430  static double CmtyCMN(const PUNGraph& Graph, TCnComV& CmtyV) {
1431  TCNMQMatrix QMatrix(Graph);
1432  // maximize modularity
1433  while (QMatrix.MergeBestQ()) {}
1434  // reconstruct communities
1435  THash<TInt, TIntV> IdCmtyH;
1436  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
1437  IdCmtyH.AddDat(QMatrix.CmtyIdUF.Find(NI.GetId())).Add(NI.GetId());
1438  }
1439  CmtyV.Gen(IdCmtyH.Len());
1440  for (int j = 0; j < IdCmtyH.Len(); j++) {
1441  CmtyV[j].NIdV.Swap(IdCmtyH[j]);
1442  }
1443  return QMatrix.Q;
1444  }
1445 };
1446 
1447 } // namespace TSnapDetail
1448 
1449 double CommunityCNM(const PUNGraph& Graph, TCnComV& CmtyV) {
1450  return TSnapDetail::TCNMQMatrix::CmtyCMN(Graph, CmtyV);
1451 }
1452 
1453 }; //namespace TSnap
double CommunityGirvanNewman(PUNGraph &Graph, TCnComV &CmtyV)
Definition: cmty.cpp:312
static const T & Mn(const T &LVal, const T &RVal)
Definition: xmath.h:36
Main namespace for all the Snap global entities.
Definition: alg.h:1
void Randomize()
Definition: dt.h:60
TStr GetStr() const
Definition: dt.h:1200
int Add(const int &Key)
Adds an element Key to the structure.
Definition: gbase.h:254
int Len() const
Definition: dt.h:490
void Add(const int &NodeId)
Definition: cncom.h:104
int GetHops(const int &SrcNId, const int &DstNId) const
Definition: bfsdfs.h:294
void Union(const int &Key1, const int &Key2)
Merges sets with elements Key1 and Key2.
Definition: gbase.cpp:40
bool Empty() const
Tests whether the heap is empty.
Definition: gbase.h:295
TFltIntIntTr FindMxQEdge()
Definition: cmty.cpp:1382
void Add(const TVal &Val)
Adds an element to the data structure. Heap property is not maintained by Add() and thus after all th...
Definition: gbase.h:301
Definition: dt.h:11
Definition: ds.h:130
static const T & Mx(const T &LVal, const T &RVal)
Definition: xmath.h:32
static PNGraph New()
Static constructor that returns a pointer to the graph. Call: PNGraph Graph = TNGraph::New().
Definition: graph.h:481
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:552
bool IsIn(const TVal &Val) const
Checks whether element Val is a member of the vector.
Definition: ds.h:828
void GetNodeWcc(const PGraph &Graph, const int &NId, TIntV &CnCom)
Returns (via output parameter CnCom) all nodes that are in the same connected component as node NId...
Definition: cncom.h:277
static const int Mx
Definition: dt.h:1142
TCNMQMatrix(const PUNGraph &Graph)
Definition: cmty.cpp:1358
#define Fail
Definition: bd.h:238
int DoBfs(const int &StartNode, const bool &FollowOut, const bool &FollowIn, const int &TargetNId=-1, const int &MxDist=TInt::Mx)
Performs BFS from node id StartNode for at maps MxDist steps by only following in-links (parameter Fo...
Definition: bfsdfs.h:128
TIter BegI() const
Definition: hash.h:213
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
Definition: graph.cpp:8
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
double InfomapOnline(PUNGraph &Graph, int n1, int n2, TIntFltH &PAlpha, double &SumPAlphaLogPAlpha, TIntFltH &Qi, TIntH &Module, int &Br, TCnComV &CmtyV)
Definition: cmty.cpp:419
void ReebSimplify(PNGraph &Graph, TIntH &t, int e, PNGraph &gFinal, TIntH &tFinal, bool collapse)
Definition: cmty.cpp:844
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:68
bool Empty() const
Definition: hash.h:227
void GetBetweennessCentr(const PGraph &Graph, TIntFltH &NIdBtwH, const double &NodeFrac=1.0, const bool &IsDir=false)
Definition: centr.h:489
Definition: ss.h:72
TVal1 Val1
Definition: ds.h:132
bool GetInt(const int &FldN, int &Val) const
If the field FldN is an integer its value is returned in Val and the function returns true...
Definition: ss.cpp:447
TChA GetLnStr() const
Returns the current line.
Definition: ss.h:124
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
static double Sqr(const double &x)
Definition: xmath.h:12
TIter EndI() const
Definition: hash.h:218
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
const TKey & GetKey() const
Definition: hash.h:80
void Swap(TVec< TVal, TSizeTy > &Vec)
Swaps the contents of the vector with Vec.
Definition: ds.h:1101
int GetDeg() const
Returns degree of the current node.
Definition: graph.h:90
void DelKey(const TKey &Key)
Definition: hash.h:404
TVal2 Val2
Definition: ds.h:133
bool Eof() const
Checks for end of file.
Definition: ss.h:122
int GetOutDeg() const
Returns out-degree of the current node (returns same as value GetDeg() since the graph is undirected)...
Definition: graph.h:94
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
int Find(const int &Key)
Returns the set that contains element Key.
Definition: gbase.cpp:23
const TDat & GetDat() const
Definition: hash.h:81
Definition: cncom.h:88
bool IsEnd() const
Tests whether the iterator is pointing to the past-end element.
Definition: hash.h:78
void CmtyEvolutionFileBatch(TStr InFNm, TIntIntHH &sizesCont, TIntIntHH &cCont, TIntIntVH &edges, double alpha, double beta, int CmtyAlg)
Definition: cmty.cpp:490
bool IsEdge(const int &SrcNId, const int &DstNId, const bool &IsDir=true) const
Tests whether an edge from node IDs SrcNId to DstNId exists in the graph.
Definition: graph.cpp:363
const TVal & GetDat(const TVal &Val) const
Returns reference to the first occurrence of element Val.
Definition: ds.h:838
bool inComp(PNGraph &g1, PNGraph &Graph, TIntH &inCompCount, int id, int neigh)
Definition: cmty.cpp:150
Simple heap data structure.
Definition: gbase.h:269
Whitespace (space or tab) separated.
Definition: ss.h:11
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:241
bool FNextKeyId(int &KeyId) const
Definition: hash.h:478
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:546
bool chekIfCrossing(TIntV &a, TIntH &t, int f, int l, int TP)
Definition: cmty.cpp:195
static PUNGraph New()
Static constructor that returns a pointer to the graph. Call: PUNGraph Graph = TUNGraph::New().
Definition: graph.h:172
void CmtyEvolutionFileBatchV(TStr InFNm, TIntIntVH &sizesContV, TIntIntVH &cContV, TIntIntVH &edges, double alpha, double beta, int CmtyAlg)
Definition: cmty.cpp:441
TStr CmtyTest(TStr InFNm, int CmtyAlg)
Definition: cmty.cpp:827
void DelEdge(const int &SrcNId, const int &DstNId)
Deletes an edge between node IDs SrcNId and DstNId from the graph.
Definition: graph.cpp:124
TSizeTy IntrsLen(const TVec< TVal, TSizeTy > &ValV) const
Returns the size of the intersection of vectors this and ValV. Assumes the vectors are sorted! ...
Definition: ds.h:1479
char GetCh(const int &ChN) const
Definition: dt.h:486
int GetDeg() const
Returns degree of the current node, the sum of in-degree and out-degree.
Definition: graph.h:402
THeap< TFltIntIntTr > MxQHeap
Definition: cmty.cpp:1354
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge between node IDs SrcNId and DstNId to the graph.
Definition: graph.cpp:92
Definition: dt.h:1137
int GetKeyId(const TKey &Key) const
Definition: hash.h:466
void CmtyEvolutionJson(TStr &Json, TIntIntVH &sizesContV, TIntIntVH &cContV, TIntIntVH &edges)
Definition: cmty.cpp:725
void MapEquationNew2Modules(PUNGraph &Graph, TIntH &Module, TIntFltH &Qi, int a, int b)
Definition: cmty.cpp:54
double InfomapOnlineIncrement(PUNGraph &Graph, int n1, int n2, TIntFltH &PAlpha, double &SumPAlphaLogPAlpha, TIntFltH &Qi, TIntH &Module, int &Br)
Definition: cmty.cpp:214
void AddQ(const int &NId, const double &Q)
Definition: cmty.cpp:1335
void transitiveTransform(TIntV &a, TIntV &b)
Definition: cmty.cpp:179
Definition: dt.h:412
double CommunityCNM(const PUNGraph &Graph, TCnComV &CmtyV)
Definition: cmty.cpp:1449
void PushHeap(const int &First, int HoleIdx, const int &Top, TVal Val)
Definition: gbase.h:325
void ReebRefine(PNGraph &Graph, TIntH &t, int e, PNGraph &gFinal, TIntH &tFinal, bool collapse)
Definition: cmty.cpp:984
void Shuffle(TRnd &Rnd)
Randomly shuffles the elements of the vector.
Definition: ds.h:1335
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:383
void Init(const PUNGraph &Graph)
Definition: cmty.cpp:1362
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:239
int vectorIntersect(TIntV &a, TIntV &b)
Definition: cmty.cpp:138
TVal1 Val1
Definition: ds.h:34
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
Definition: graph.h:111
TVal2 Val2
Definition: ds.h:35
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
Definition: hash.h:361
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:235
bool Next()
Loads next line from the input file.
Definition: ss.cpp:412
bool edgeIntersect(PNGraph &graph, TIntV &a, TIntV &b)
Definition: cmty.cpp:127
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
TVal PopHeap()
Removes the top element from the heap.
Definition: gbase.h:313
int GetId() const
Returns ID of the current node.
Definition: graph.h:88
bool IsKey(const TKey &Key) const
Definition: hash.h:258
int GetInNId(const int &NodeN) const
Returns ID of NodeN-th in-node (the node pointing to the current node).
Definition: graph.h:412
TCmtyDat(const double &NodeDegFrac, const int &OutDeg)
Definition: cmty.cpp:1333
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:237
void DelSelfEdges(const PGraph &Graph)
Removes all the self-edges from the graph.
Definition: alg.h:419
double Equation(TIntFltH &PAlpha, double &SumPAlphaLogPAlpha, TIntFltH &Qi)
Definition: cmty.cpp:109
int Len() const
Definition: hash.h:228
void InsStr(const int &BChN, const TStr &Str)
Definition: dt.cpp:825
double Infomap(PUNGraph &Graph, TCnComV &CmtyV)
Definition: cmty.cpp:339
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
static double CmtyCMN(const PUNGraph &Graph, TCnComV &CmtyV)
Definition: cmty.cpp:1430
void GetWccs(const PGraph &Graph, TCnComV &CnComV)
Returns all weakly connected components in a Graph.
Definition: cncom.h:376
Union Find class (Disjoint-set data structure).
Definition: gbase.h:230
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:416
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:252
TTriple< TFlt, TInt, TInt > TFltIntIntTr
Definition: ds.h:182
TVal3 Val3
Definition: ds.h:134
double _GirvanNewmanGetModularity(const PUNGraph &G, const TIntH &OutDegH, const int &OrigEdges, TCnComV &CnComV)
Definition: cmty.cpp:37
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
THash< TInt, TCmtyDat > CmtyQH
Definition: cmty.cpp:1353
void CmtyGirvanNewmanStep(PUNGraph &Graph, TIntV &Cmty1, TIntV &Cmty2)
A single step of Girvan-Newman clustering procedure.
Definition: cmty.cpp:15
void MakeHeap(const int &First, const int &Len)
Definition: gbase.h:349
void SortByDat(const bool &Asc=true)
Definition: hash.h:292