SNAP Library 3.0, User Reference  2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
ncp.cpp
Go to the documentation of this file.
1  #include "stdafx.h"
2 #include "ncp.h"
3 
5 // Local Spectral Clustering
6 
7 bool TLocClust::Verbose = true;
8 
9 int TLocClust::ApproxPageRank(const int& SeedNode, const double& Eps) {
10  for (int i = 0; i < ProbH.Len(); i++) { ProbH[i]=0.0; }
11  for (int i = 0; i < ResH.Len(); i++) { ResH[i]=0.0; }
12  ProbH.Clr(false, -1, false);
13  ResH.Clr(false, -1, false);
14  ResH.AddDat(SeedNode, 1.0);
15  int iter = 0;
16  double OldRes = 0.0;
17  NodeQ.Clr(false);
18  NodeQ.Push(SeedNode);
19  TExeTm ExeTm;
20  while (! NodeQ.Empty()) {
21  const int NId = NodeQ.Top(); NodeQ.Pop();
22  const TUNGraph::TNodeI& Node = Graph->GetNI(NId);
23  const int NIdDeg = Node.GetOutDeg();
24  const double PushVal = ResH.GetDat(NId) - 0.5*Eps*NIdDeg;
25  const double PutVal = (1.0-Alpha) * PushVal / double(NIdDeg);
26  ProbH.AddDat(NId) += Alpha*PushVal;
27  ResH.AddDat(NId) = 0.5 * Eps * NIdDeg;
28  for (int e = 0; e < NIdDeg; e++) {
29  const int DstNId = Node.GetOutNId(e);
30  const int DstDeg = Graph->GetNI(DstNId).GetOutDeg();
31  double& ResVal = ResH.AddDat(DstNId).Val;
32  OldRes = ResVal;
33  ResVal += PutVal;
34  if (ResVal >= Eps*DstDeg && OldRes < Eps*DstDeg) {
35  NodeQ.Push(DstNId); }
36  }
37  iter++;
38  if (iter % Mega(1) == 0) {
39  printf(" %d[%s]", NodeQ.Len(), ExeTm.GetStr());
40  if (iter/1000 > Graph->GetNodes() || ExeTm.GetSecs() > 4*3600) { // more than 2 hours
41  printf("Too many iterations! Stop to save time.\n");
42  return iter; }
43  }
44  }
45  // check that the residuals are sufficiently small
46  /*for (int i =0; i < ProbH.Len(); i++) {
47  const int Deg = Graph->GetNI(ResH.GetKey(i)).GetOutDeg();
48  IAssert(ResH[i] < Eps*Deg); } //*/
49  return iter;
50 }
51 
53  TExeTm ExeTm;
54  VolV.Clr(false); CutV.Clr(false); PhiV.Clr(false);
55  if (ProbH.Empty()) { return; }
56  const int TopNId = ProbH.GetKey(0);
57  const int TopNIdDeg = Graph->GetNI(TopNId).GetOutDeg();
58  int Vol = TopNIdDeg, Cut = TopNIdDeg;
59  double Phi = Cut/double(Vol);
60  VolV.Add(Vol); CutV.Add(Cut); PhiV.Add(1.0);
61  for (int i = 1; i < ProbH.Len(); i++) {
62  const int NId = ProbH.GetKey(i);
63  const TUNGraph::TNodeI& Node = Graph->GetNI(NId);
64  const int OutDeg = Node.GetOutDeg();
65  int CutSz = OutDeg; // edges outside
66  for (int e = 0; e < OutDeg; e++) {
67  const int Rank = ProbH.GetKeyId(Node.GetOutNId(e));
68  if ( Rank > -1 && Rank < i) { CutSz -= 2; }
69  }
70  Vol += OutDeg; Cut += CutSz;
71  if (Vol < Edges2) {
72  if (2*Vol > Edges2) { Phi = Cut / double(Edges2-Vol); }
73  else { Phi = Cut / double(Vol); }
74  } else {
75  Phi = 1.0;
76  }
77  IAssert((Phi+1e-6) >= double(1.0)/double(i*(i+1)+1)); // conductance is worse than the best possible
78  VolV.Add(Vol); CutV.Add(Cut); PhiV.Add(Phi);
79  }}
80 
81 void TLocClust::FindBestCut(const int& SeedNode, const int& ClustSz, const double& MinSizeFrac) {
82  double MaxPhi = TFlt::Mx;
83  BestCutIdx = -1;
84  SeedNId = SeedNode;
85  // calculate pagerank and cut sets
86  ApproxPageRank(SeedNId, 1.0/double(ClustSz));
87  for (int i = 0; i < ProbH.Len(); i++) {
88  ProbH[i] /= Graph->GetNI(ProbH.GetKey(i)).GetOutDeg(); }
89  ProbH.SortByDat(false);
90  SupportSweep();
91  // find best cut
92  NIdV.Clr(false);
93  for (int i = 0; i < PhiV.Len(); i++) {
94  const double Phi = PhiV[i];
95  NIdV.Add(ProbH.GetKey(i));
96  if (Phi < MaxPhi) { MaxPhi = Phi; BestCutIdx = i; }
97  }
98 }
99 
100 void TLocClust::PlotVolDistr(const TStr& OutFNm, TStr Desc) const {
101  TFltPrV RankValV(VolV.Len(), 0);
102  for (int i = 0; i < VolV.Len(); i++) {
103  RankValV.Add(TFltPr(i+1, VolV[i].Val)); }
104  if (Desc.Empty()) { Desc = OutFNm; }
105  TFltPrV NewV; TLocClustStat::MakeExpBins(RankValV, NewV);
106  TGnuPlot GP(OutFNm+"-VOL", TStr::Fmt("VOLUME. Seed node %d.", SeedNId, Desc.CStr()));
107  GP.AddPlot(NewV, gpwLinesPoints, ""); //, "pointtype 6 pointsize 1.5"
108  GP.SetXYLabel("Rank", "Volume");
109  //GP.SetScale(gpsLog10X);
110  GP.SavePng();
111 }
112 
113 void TLocClust::PlotCutDistr(const TStr& OutFNm, TStr Desc) const {
114  TFltPrV RankValV(CutV.Len(), 0);
115  for (int i = 0; i < CutV.Len(); i++) {
116  RankValV.Add(TFltPr(i+1, CutV[i].Val)); }
117  if (Desc.Empty()) { Desc = OutFNm; }
118  TFltPrV NewV; TLocClustStat::MakeExpBins(RankValV, NewV);
119  TGnuPlot GP(OutFNm+"-CUT", TStr::Fmt("CUT SIZE. Seed node %d.", SeedNId, Desc.CStr()));
120  GP.AddPlot(NewV, gpwLinesPoints, ""); //, "pointtype 6 pointsize 1.5"
121  GP.SetXYLabel("Rank", "Cut size");
122  //GP.SetScale(gpsLog10X);
123  GP.SavePng();
124 }
125 
126 void TLocClust::PlotPhiDistr(const TStr& OutFNm, TStr Desc) const {
127  TFltPrV RankValV(PhiV.Len(), 0);
128  for (int i = 0; i < PhiV.Len(); i++) {
129  RankValV.Add(TFltPr(i+1, PhiV[i])); }
130  if (Desc.Empty()) { Desc = OutFNm; }
131  TFltPrV NewV; TLocClustStat::MakeExpBins(RankValV, NewV);
132  TGnuPlot GP(OutFNm+"-PHI", TStr::Fmt("CONDUCTANCE (Cut size / Volume). Seed node %d.", SeedNId, Desc.CStr()));
133  GP.AddPlot(NewV, gpwLinesPoints, ""); //, "pointtype 6 pointsize 1.5"
134  GP.SetXYLabel("Rank", "Conductance (Cut size / Volume)");
135  //GP.SetScale(gpsLog10X);
136  GP.SavePng();
137 }
138 
139 void TLocClust::SavePajek(const TStr& OutFNm) const {
140  FILE *F = fopen(TStr::Fmt("%s.net", OutFNm.CStr()).CStr(), "wt");
141  TIntH NIdToIdH(Graph->GetNodes(), true);
142  TIntH ClustSet(BestCut()+1);
143  const int BucketSz = BestCutNodes()/ 4;
144  TStrV ClrV = TStrV::GetV("Black", "Gray80", "Gray60", "Gray40", "Gray20", "RedViolet");
145  for (int a = 0; a < BestCutNodes(); a++) {
146  ClustSet.AddDat(NIdV[a], (a+1)/BucketSz);
147  }
148  fprintf(F, "*Vertices %d\n", Graph->GetNodes());
149  int i = 0;
150  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
151  const int NId = NI.GetId();
152  if (NId == SeedNId) {
153  fprintf(F, "%d \"%d\" diamond x_fact 2 y_fact 2 ic Green fos 10\n", i+1, NId); }
154  else if (ClustSet.IsKey(NId)) {
155  //fprintf(F, "%d \"%d\" box x_fact 1 y_fact 1 ic Red fos 10\n", i+1, NId); }
156  fprintf(F, "%d \"%d\" box x_fact 2 y_fact 2 ic %s fos 10\n", i+1, NId, ClrV[ClustSet.GetDat(NId)].CStr()); }
157  else {
158  fprintf(F, "%d \"%d\" ellipse x_fact 2 y_fact 2 ic Blue fos 10\n", i+1, NId); }
159  NIdToIdH.AddDat(NId, i+1);
160  i++;
161  }
162  fprintf(F, "*Arcs %d\n", Graph->GetEdges()); // arcs are directed, edges are undirected
163  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
164  const int NId = NIdToIdH.GetDat(NI.GetId());
165  for (int e = 0; e < NI.GetOutDeg(); e++) {
166  fprintf(F, "%d %d %g c Tan\n", NId, NIdToIdH.GetDat(NI.GetOutNId(e)).Val, 1.0);
167  }
168  }
169  fclose(F);
170 }
171 
172 void TLocClust::DrawWhiskers(const PUNGraph& Graph, TStr FNmPref, const int& PlotN=10) {
173  TCnComV CnComV; TSnap::Get1CnCom(Graph, CnComV);
174  CnComV.Sort(false);
175  // plot size distribution
176  { TIntH SzCntH;
177  for (int i = 0; i < CnComV.Len(); i++) { SzCntH.AddDat(CnComV[i].Len()) += 1; }
178  TGnuPlot::PlotValCntH(SzCntH, "whiskSz."+FNmPref, TStr::Fmt("%s. G(%d, %d)", FNmPref.CStr(), Graph->GetNodes(),
179  Graph->GetEdges()), "Whisker size (Maximal component connected with a bridge edge)", "Count", gpsLog10XY, false); }
180  // draw them
181  int BrNodeId = -1;
182  for (int c = 0; c < TMath::Mn(CnComV.Len(), PlotN); c++) {
183  const PUNGraph BrClust = TSnap::GetSubGraph(Graph, CnComV[c].NIdV);
184  for (TUNGraph::TNodeI NI = BrClust->BegNI(); NI < BrClust->EndNI(); NI++) {
185  if (NI.GetOutDeg() != Graph->GetNI(NI.GetId()).GetOutDeg()) { BrNodeId=NI.GetId(); break; } }
186  TIntStrH ClrH; ClrH.AddDat(BrNodeId, "red");
187  TSnap::DrawGViz(BrClust, gvlNeato, TStr::Fmt("whisk-%s-%02d.gif", FNmPref.CStr(), c+1),
188  TStr::Fmt("Bridge node id: %d, n=%d, e=%d", BrNodeId, BrClust->GetNodes(), BrClust->GetEdges()), false, ClrH);
189  }
190 }
191 
192 void TLocClust::GetCutStat(const PUNGraph& Graph, const TIntV& NIdV, int& Vol, int& Cut, double& Phi, int GraphEdges) {
193  TIntSet NIdSet(NIdV.Len());
194  for (int i = 0; i < NIdV.Len(); i++) { NIdSet.AddKey(NIdV[i]); }
195  GetCutStat(Graph, NIdSet, Vol, Cut, Phi, GraphEdges);
196 }
197 
198 void TLocClust::GetCutStat(const PUNGraph& Graph, const TIntSet& NIdSet, int& Vol, int& Cut, double& Phi, int GraphEdges) {
199  const int Edges2 = GraphEdges>=0 ? 2*GraphEdges : Graph->GetEdges();
200  Vol=0; Cut=0; Phi=0.0;
201  for (int i = 0; i < NIdSet.Len(); i++) {
202  if (! Graph->IsNode(NIdSet[i])) { continue; }
203  TUNGraph::TNodeI NI = Graph->GetNI(NIdSet[i]);
204  for (int e = 0; e < NI.GetOutDeg(); e++) {
205  if (! NIdSet.IsKey(NI.GetOutNId(e))) { Cut += 1; }
206  }
207  Vol += NI.GetOutDeg();
208  }
209  // get conductance
210  if (Vol != Edges2) {
211  if (2*Vol > Edges2) { Phi = Cut / double(Edges2-Vol); }
212  else if (Vol == 0) { Phi = 0.0; }
213  else { Phi = Cut / double(Vol); }
214  } else {
215  if (Vol == Edges2) { Phi = 1.0; }
216  }
217 }
218 
219 void TLocClust::PlotNCP(const PUNGraph& Graph, const TStr& FNm, const TStr Desc, const bool& BagOfWhiskers, const bool& RewireNet, const int& KMin, const int& KMax, const int& Coverage, const bool& SaveTxtStat, const bool& PlotBoltzman) {
220  const double Alpha = 0.001, KFac = 1.5, SizeFrac = 0.001;
221  //const int KMin = 2, KMax = Mega(100), Coverage = 10;
222  TLocClustStat ClusStat1(Alpha, KMin, KMax, KFac, Coverage, SizeFrac);
223  ClusStat1.Run(Graph, false, PlotBoltzman, SaveTxtStat);
224  if (BagOfWhiskers) { ClusStat1.AddBagOfWhiskers(); }
225  TLocClustStat ClusStat2(Alpha, KMin, KMax, KFac, Coverage, SizeFrac);
226  ClusStat1.ImposeNCP(ClusStat2, FNm, Desc, "ORIGINAL", "REWIRED"); // plot before rewiring
227  if (SaveTxtStat) { // for every piece size store modularity
228  ClusStat1.SaveTxtInfo(FNm, Desc, false);
229  }
230  if (PlotBoltzman) {
231  ClusStat1.PlotBoltzmanCurve(FNm, Desc);
232  }
233  if (RewireNet) {
234  ClusStat2.Run(TSnap::GenRewire(Graph, 50, TInt::Rnd), false, false, false);
235  if (BagOfWhiskers) { ClusStat2.AddBagOfWhiskers(); }
236  ClusStat1.ImposeNCP(ClusStat2, FNm, Desc, "ORIGINAL", "REWIRED"); // if rewire, plot again
237  }
238 }
239 
241 // Local Clustering Statistics
242 
243 double TLocClustStat::BinFactor = 1.01;
244 
245 double TLocClustStat::TCutInfo::GetFracDegOut(const PUNGraph& Graph, double& MxFrac, double& AvgFrac, double& MedianFrac, double& Pct9Frac, double& Flake) const {
246  if (CutNIdV.Empty()) {
247  IAssert(Nodes<100 || ! CutNIdV.Empty());
248  MxFrac=1; AvgFrac=1; MedianFrac=1; Pct9Frac=1; Flake=1;
249  return 1;
250  }
251  TMom FracDegMom;
252  TIntSet InNIdSet(CutNIdV.Len());
253  int NHalfIn=0;
254  for (int i = 0; i < CutNIdV.Len(); i++) {
255  InNIdSet.AddKey(CutNIdV[i]); }
256  for (int n = 0; n < CutNIdV.Len(); n++) {
257  const TUNGraph::TNodeI NI = Graph->GetNI(CutNIdV[n]);
258  int EdgesOut = 0;
259  for (int i = 0; i < NI.GetDeg(); i++) {
260  if (! InNIdSet.IsKey(NI.GetNbrNId(i))) { EdgesOut++; }
261  }
262  const double FracOut = EdgesOut/double(NI.GetDeg());
263  if (FracOut <= 0.5) { NHalfIn++; }
264  FracDegMom.Add(FracOut);
265  }
266  FracDegMom.Def();
267  MxFrac = FracDegMom.GetMx();
268  AvgFrac = FracDegMom.GetMean();
269  MedianFrac = FracDegMom.GetMedian();
270  Pct9Frac = FracDegMom.GetDecile(9);
271  Flake = 1.0 - double(NHalfIn)/double(CutNIdV.Len());
272  return MxFrac;
273 }
274 
275 TLocClustStat::TLocClustStat(const double& _Alpha, const int& _KMin, const int& _KMax, const double& _KFac,
276  const int& _Coverage, const double& _SizeFrac) : Alpha(_Alpha), SizeFrac(_SizeFrac), KFac(_KFac),
277  KMin(_KMin), KMax(_KMax), Coverage(_Coverage) {
278 }
279 
280 void TLocClustStat::Save(TSOut& SOut) const {
281  // parameters
282  Alpha.Save(SOut);
283  SizeFrac.Save(SOut);
284  KFac.Save(SOut);
285  KMin.Save(SOut);
286  KMax.Save(SOut);
287  Coverage.Save(SOut);
288  // cluster stat
289  //SweepsV.Save(SOut);
290  //SizePhiH.Save(SOut);
291 }
292 
294  SweepsV.Clr(false); // node ids and conductances for each run of local clustering
295  SizePhiH.Clr(false); // conductances at cluster size K
296  BestCutH.Clr(false); // best cut (min conductance) at size K
297  BagOfWhiskerV.Clr(false); // (Size, Conductance) of bag of whiskers clusters
298 }
299 
300 void TLocClustStat::Run(const PUNGraph& _Graph, const bool& SaveAllSweeps, const bool& SaveAllCond, const bool& SaveBestNodesAtK) {
301  Graph = TSnap::GetMxWcc(_Graph);
302  const int Nodes = Graph->GetNodes();
303  const int Edges = Graph->GetEdges();
304  printf("\nLocal clustering: Graph(%d, %d)\n", Nodes, Edges);
305  printf(" Alpha: %g\n", Alpha());
306  printf(" K: %d -- %g -- %dm\n", KMin(), KFac(), int(KMax/Mega(1)));
307  printf(" Coverage: %d\n", Coverage());
308  printf(" SizeFrac: %g\n\n", SizeFrac());
309  TExeTm TotTm;
310  Clr();
311  TLocClust Clust(Graph, Alpha);
312  BestCut.CutNIdV.Clr(false);
313  BestCut.CutSz=-1; BestCut.Edges=-1;
314  double BestPhi = TFlt::Mx;
315  int prevK=-1;
316  bool NextDone=false;
317  if (SaveBestNodesAtK) { // fill buckets (only store nodes in clusters for sizes in SizeBucketSet)
318  SizeBucketSet.Clr();
319  double PrevBPos = 1, BPos = 1;
320  while (BPos <= Mega(100)) {
321  PrevBPos = (uint) floor(BPos);
322  BPos *= BinFactor;
323  if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
324  SizeBucketSet.AddKey(int(floor(BPos) - 1));
325  }
326  }
327  for (int K = KMin, cnt=1; K < KMax; K = int(KFac * double(K))+1, cnt++) {
328  if (K == prevK) { K++; } prevK = K;
329  const int Runs = 2 + int(Coverage * floor(double(Graph->GetEdges()) / double(K)));
330  printf("%d] K: %d, %d runs\n", cnt+1, K, Runs);
331  if (NextDone) { break; } // done
332  if (K+1 > 2*Graph->GetEdges()) { K = Graph->GetEdges(); NextDone=true; }
333  //if (K+1 > Graph->GetEdges()) { K = Graph->GetEdges(); NextDone=true; }
334  TExeTm ExeTm, IterTm;
335  double MeanSz=0.0, MeanVol=0.0, Count=0.0;
336  for (int run = 0; run < Runs; run++) {
337  const int SeedNId = Graph->GetRndNId(); IterTm.Tick();
338  Clust.FindBestCut(SeedNId, K, SizeFrac);
339  const int Sz = Clust.BestCutNodes();
340  const int Vol = Clust.GetCutVol();
341  const double Phi = TMath::Round(Clust.GetCutPhi(), 4);
342  if (Sz == 0 || Vol == 0 || Phi == 0) { continue; }
343  MeanSz+=Sz; MeanVol+=Vol; Count+= 1;
344  if (SaveAllSweeps) { // save the full cut set and conductances for all trials
345  SweepsV.Add(TNodeSweep(SeedNId, Clust.GetNIdV(), Clust.GetPhiV())); }
346  int SAtBestPhi=-1;
347  for (int s = 0; s < Clust.Len(); s++) {
348  const int size = s+1;
349  const int cut = Clust.GetCut(s);
350  const int edges = (Clust.GetVol(s)-cut)/2;
351  const double phi = Clust.GetPhi(s);
352  if (( Clust.GetPhi(s) != double(cut)/double(2*edges+cut))) { continue; } // more than half of the edges
353  IAssert((Clust.GetVol(s) - cut) % 2 == 0);
354  IAssert(phi == double(cut)/double(2*edges+cut));
355  IAssert(phi >= 1.0/double((1+s)*s+1));
357  // TCutInfo CutInfo(size, edges, cut); Clust.GetNIdV().GetSubValV(0, size-1, CutInfo.CutNIdV);
358  //double MxFrac=0, AvgFrac=0, MedianFrac=0, Pct9Frac=0, Flake=0;
359  //CutInfo.GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake);
360  //const double phi = MxFrac;
361  if (BestPhi >= phi) {
362  BestPhi = phi;
363  BestCut = TCutInfo(size, edges, cut);
364  SAtBestPhi = s;
365  }
367  //bool TAKE=false; if (! BestCutH.IsKey(size)) { TAKE=true; }
368  //else { BestCutH.GetDat(size).GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake); if (MxFrac >= phi) { TAKE = true; } }
369  // if (TAKE) {
370  if (! BestCutH.IsKey(size) || BestCutH.GetDat(size).GetPhi() >= phi) { //new best cut (size, edges inside and nodes)
371  BestCutH.AddDat(size, TCutInfo(size, edges, cut)); // for every size store best cut (NIds inside the cut)
372  if (SaveBestNodesAtK) { // store node ids in best community for each size k
373  if (! SizeBucketSet.Empty() && ! SizeBucketSet.IsKey(size)) { continue; } // only save best clusters at SizeBucketSet
374  Clust.GetNIdV().GetSubValV(0, size-1, BestCutH.GetDat(size).CutNIdV); }
375  }
376  if (SaveAllCond) { // for every size store all conductances
377  SizePhiH.AddDat(size).Add(phi); }
378  }
379  if (SAtBestPhi != -1) { // take nodes in best cluster
380  const int size = SAtBestPhi+1;
381  Clust.GetNIdV().GetSubValV(0, size-1, BestCut.CutNIdV);
382  }
383  if (TLocClust::Verbose) {
384  printf(".");
385  if (run % 50 == 0) {
386  printf("\r %d / %d \r", run, Runs); }
387  }
388  }
389  if (TLocClust::Verbose) {
390  printf("\r %d / %d: %s \n", Runs, Runs, ExeTm.GetStr());
391  }
392  MeanSz/=Count; MeanVol/=Count;
393  printf(" Graph(%d, %d) ", Nodes, Edges);
394  printf(" mean: sz: %.2f vol: %.2f [%s] %s\n", MeanSz, MeanVol, ExeTm.GetStr(), TExeTm::GetCurTm());
395  }
396  SizePhiH.SortByKey();
397  for (int k = 0; k < SizePhiH.Len(); k++) {
398  SizePhiH[k].Sort(); }
399  SweepsV.Sort();
400  BestCutH.SortByKey();
401  printf("DONE. Graph(%d, %d): Total run time: %s\n\n", Nodes, Edges, TotTm.GetStr());
402 }
403 
405  BagOfWhiskers(Graph, BagOfWhiskerV, BestWhisk); // slow but exact
406  //BagOfWhiskers2(Graph, BagOfWhiskerV);
407 }
408 
409 // add a cut on NIdV nodes
410 void TLocClustStat::AddCut(const TIntV& NIdV) {
411  int Vol, Cut; double Phi;
412  TLocClust::GetCutStat(Graph, NIdV, Vol, Cut, Phi, Graph->GetEdges());
413  SizePhiH.AddDat(NIdV.Len()).Add(Phi);
414 }
415 
416 // add a cut of particular conductance
417 void TLocClustStat::AddCut(const int& ClustSz, const double& Phi) {
418  SizePhiH.AddDat(ClustSz).Add(Phi);
419 }
420 
421 // add the cut on particular set of nodes (calculate conductance and modularity)
422 void TLocClustStat::AddToBestCutH(const PUNGraph& Graph, const TIntV& NIdV, const bool& AddBestCond) {
423  const int K = NIdV.Len();
424  TCutInfo CutInfo(Graph, NIdV, true);
425  printf("new: %d\t%d\t%d\t%f\t%f\n", CutInfo.GetNodes(), CutInfo.GetEdges(),
426  CutInfo.GetCutSz(), CutInfo.GetPhi(), CutInfo.GetModular(Graph->GetEdges()));
427  if (! BestCutH.IsKey(K)) { BestCutH.AddDat(K, CutInfo); return; }
428  TCutInfo& CurCut = BestCutH.GetDat(K);
429  // keep the cluster with best conductance
430  if (AddBestCond && CurCut.GetPhi() > CutInfo.GetPhi()) { CurCut=CutInfo; }
431  // keep the cluster with best modularity
432  //const int E = Graph->GetEdges();
433  //if (! AddBestCond && CurCut.GetModular(E) < CutInfo.GetModular(E)) { CurCut=CutInfo; }
434 }
435 
436 const TLocClustStat::TCutInfo& TLocClustStat::FindBestCut(const int& Nodes) const {
437  double bestPhi = 1;
438  int CutId = -1;
439  if (Nodes > 0) {
440  IAssert(BestCutH.IsKey(Nodes));
441  return BestCutH.GetDat(Nodes);
442  } else {
443  for (int n = 0; n < BestCutH.Len(); n++) {
444  if (BestCutH[n].GetPhi() <= bestPhi) {
445  bestPhi = BestCutH[n].GetPhi(); CutId = n; }
446  }
447  IAssert(CutId != -1);
448  IAssert(! BestCutH[CutId].CutNIdV.Empty());
449  return BestCutH[CutId];
450  }
451 }
452 
453 // find the cut with the lowest Phi of particular size (if Nodes=-1 find absolute best cut)
454 double TLocClustStat::FindBestCut(const int& Nodes, TIntV& ClustNIdV) const {
455  const TCutInfo& Cut = FindBestCut(Nodes);
456  ClustNIdV = Cut.CutNIdV;
457  return Cut.GetPhi();
458 }
459 
460 int TLocClustStat::FindBestCut(const int& Nodes, int& Vol, int& Cut, double& Phi) const {
461  const TCutInfo& CutInfo = FindBestCut(Nodes);
462  Vol = CutInfo.GetVol();
463  Cut = CutInfo.GetCutSz();
464  Phi = CutInfo.GetPhi();
465  return CutInfo.GetNodes();
466 }
467 
468 // find best phi where the cut has N nodes, and the nodes in the cluster are not in the TabuSet
469 double TLocClustStat::FindBestCut(const int& Nodes, const TIntSet& TabuNIdSet, int& BestCutId) const {
470  double bestPhi = 1;
471  BestCutId = -1;
472  bool Tabu;
473  IAssert(! SweepsV.Empty());
474  for (int c = 0; c < SweepsV.Len(); c++) {
475  const TNodeSweep& Sweep = SweepsV[c];
476  if (Sweep.Len() < Nodes) { continue; }
477  if (Sweep.Phi(Nodes-1) > bestPhi) { continue; }
478  Tabu = false;
479  for (int i = 0; i < Nodes; i++) {
480  if (TabuNIdSet.IsKey(Sweep.NId(i))) { Tabu=true; break; }
481  }
482  if (Tabu) { continue; }
483  bestPhi = Sweep.Phi(Nodes-1);
484  BestCutId = c;
485  }
486  return bestPhi;
487 }
488 
489 // get statistics over all conductances at all cluster sizes
490 void TLocClustStat::GetCurveStat(TFltPrV& MeanV, TFltPrV& MedV, TFltPrV& Dec1V, TFltPrV& MinV, TFltPrV& MaxV) const {
491  TFltPrV BucketV;
492  MeanV.Clr(false); MedV.Clr(false); Dec1V.Clr(false); MinV.Clr(false); MaxV.Clr(false);
493  if (! SizePhiH.Empty()) { // stores conductances of all little clusters
494  const THash<TInt, TFltV>& KvH = SizePhiH;
495  for (int i = 0; i < KvH.Len(); i++) {
496  const double X = KvH.GetKey(i).Val; IAssert(X >= 1.0);
497  const TFltV& YVec = KvH[i];
498  TMom Mom;
499  for (int j = 0; j < YVec.Len(); j++) { Mom.Add(YVec[j]); }
500  Mom.Def();
501  /*IAssert(Mom.GetMean()>=0 && Mom.GetMean()<=1);
502  IAssert(Mom.GetMedian()>=0 && Mom.GetMedian()<=1);
503  IAssert(Mom.GetDecile(1)>=0 && Mom.GetDecile(1)<=1);
504  IAssert(Mom.GetMn()>=0 && Mom.GetMn()<=1);
505  IAssert(Mom.GetMx()>=0 && Mom.GetMx()<=1);*/
506  MeanV.Add(TFltPr(X, Mom.GetMean()));
507  MedV.Add(TFltPr(X, Mom.GetMedian()));
508  Dec1V.Add(TFltPr(X, Mom.GetDecile(1)));
509  MinV.Add(TFltPr(X, Mom.GetMn()));
510  MaxV.Add(TFltPr(X, Mom.GetMx()));
511  }
512  MeanV.Sort(); MedV.Sort(); Dec1V.Sort(); MinV.Sort(); MaxV.Sort();
513  // create log buckets (makes plots smaller and smoother)
514  TLocClustStat::MakeExpBins(MeanV, BucketV); MeanV.Swap(BucketV);
515  TLocClustStat::MakeExpBins(MedV, BucketV); MedV.Swap(BucketV);
516  TLocClustStat::MakeExpBins(Dec1V, BucketV); Dec1V.Swap(BucketV);
517  TLocClustStat::MakeExpBins(MinV, BucketV); MinV.Swap(BucketV);
518  TLocClustStat::MakeExpBins(MaxV, BucketV); MaxV.Swap(BucketV);
519  } else {
520  //const int GEdges = Graph->GetEdges();
521  for (int i = 0; i < BestCutH.Len(); i++) {
522  MinV.Add(TFltPr(BestCutH.GetKey(i).Val, BestCutH[i].GetPhi()));
523  }
524  MinV.Sort();
525  TLocClustStat::MakeExpBins(MinV, BucketV); MinV.Swap(BucketV);
526  }
527 }
528 
529 void TLocClustStat::GetBoltzmanCurveStat(const TFltV& TempV, TVec<TFltPrV>& NcpV) const {
530  IAssert(! SizePhiH.Empty()); // stores conductances of all little clusters
531  NcpV.Gen(TempV.Len());
532  TFltPrV BucketV;
533  for (int t = 0; t < TempV.Len(); t++) {
534  const double T = TempV[t];
535  for (int i = 0; i < SizePhiH.Len(); i++) {
536  const double X = SizePhiH.GetKey(i).Val; IAssert(X >= 1.0);
537  const TFltV& PhiV = SizePhiH[i];
538  double V = 0.0, SumW = 0.0;
539  for (int j = 0; j < PhiV.Len(); j++) {
540  V += PhiV[j] * exp(-PhiV[j]/T);
541  SumW += exp(-PhiV[j]/T);
542  }
543  //V /= PhiV.Len();
544  V /= SumW;
545  NcpV[t].Add(TFltPr(X, V));
546  }
547  TLocClustStat::MakeExpBins(NcpV[t], BucketV); NcpV[t].Swap(BucketV);
548  // normalize to start at (1,1)
549  //for (int i = 0; i < NcpV[t].Len(); i++) {
550  // NcpV[t][i].Val2 /= NcpV[t][0].Val2;
551  //}
552  }
553 }
554 
556  if (Graph.Empty()) {
557  return TStr::Fmt("A=%g, K=%d-%g-%s, Cvr=%d, SzFrc=%g", Alpha(), KMin(), KFac(), TInt::GetMegaStr(KMax()).CStr(), Coverage(), SizeFrac()); }
558  else {
559  return TStr::Fmt("A=%g, K=%d-%g-%s, Cvr=%d, SzFrc=%g G(%d, %d)", Alpha(), KMin(), KFac(), TInt::GetMegaStr(KMax()).CStr(), Coverage(), SizeFrac(),
560  Graph->GetNodes(), Graph->GetEdges());
561  }
562 }
563 
564 void TLocClustStat::PlotNCP(const TStr& OutFNm, TStr Desc) const {
565  if (Desc.Empty()) { Desc = OutFNm; }
566  TFltPrV MeanV, MedV, Dec1V, MinV, MaxV;
567  GetCurveStat(MeanV, MedV, Dec1V, MinV, MaxV);
568  TGnuPlot GP("ncp."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
569  if (! MaxV.Empty()) { GP.AddPlot((MaxV), gpwLines, "MAX"); }
570  if (! MedV.Empty()) { GP.AddPlot((MedV), gpwLines, "MEDIAN"); }
571  if (! MeanV.Empty()) { GP.AddPlot((MeanV), gpwLines, "MEAN"); }
572  if (! Dec1V.Empty()) { GP.AddPlot((Dec1V), gpwLines, "1-st DECILE"); }
573  if (! MinV.Empty()) {
574  GP.AddPlot((MinV), gpwLines, "MIN");
575  //GP.AddPlot(MinV, gpwLines, "smooth MIN", "lw 2 smooth bezier");
576  }
577  if (! BagOfWhiskerV.Empty()) {
578  GP.AddPlot(BagOfWhiskerV, gpwLines, "Whiskers", "lw 1");
579  TFltPrV BestWhiskV; BestWhiskV.Add(TFltPr(BestWhisk));
580  GP.AddPlot(BestWhiskV, gpwPoints, "Best whisker", "pt 5 ps 2");
581  }
582  GP.SetScale(gpsLog10XY);
583  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
584  GP.SetXRange(1, Graph->GetNodes()/2);
585  GP.SavePng();
586 }
587 
588 // NCP but with modularity on Y-axis
589 void TLocClustStat::PlotNCPModul(const TStr& OutFNm, TStr Desc) const {
590  if (Desc.Empty()) { Desc = OutFNm; }
591  TFltPrV MinV, BucketV;
592  const int GEdges = Graph->GetEdges();
593  for (int i = 0; i < BestCutH.Len(); i++) {
594  MinV.Add(TFltPr(BestCutH.GetKey(i).Val, BestCutH[i].GetModular(GEdges))); }
595  MinV.Sort();
596  TLocClustStat::MakeExpBins(MinV, BucketV); MinV.Swap(BucketV);
597  TGnuPlot GP("ncpMod."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
598  if (! MinV.Empty()) {
599  GP.AddPlot((MinV), gpwLines, "MIN"); }
600  GP.SetScale(gpsLog10XY);
601  GP.SetXYLabel("k (number of nodes in the cluster)", "Q (modularity)");
602  GP.SetXRange(1, Graph->GetNodes()/2);
603  GP.SavePng();
604 }
605 
606 // plot top 10 surface/volume curves (disjont clusters of particular size)
607 void TLocClustStat::PlotNcpTop10(const TStr& OutFNm, TStr Desc, const int& TakeMinN) const {
608  if (Desc.Empty()) { Desc = OutFNm; }
609  const double BinFactor = 1.05;
610  double BinPos=1;
611  int PrevBPos=1, CurBPos=1, CutId;
612  bool AddSome;
613  TVec<TFltPrV> Curves(TakeMinN);
614  while (true) {
615  PrevBPos = CurBPos;
616  BinPos *= BinFactor; // do exponential binning
617  CurBPos = (int) floor(BinPos);
618  if (CurBPos == PrevBPos) { CurBPos=PrevBPos+1; BinPos=CurBPos; }
619  const int Nodes = CurBPos;
620  TIntSet TabuNIdSet(Graph->GetNodes());
621  AddSome = false;
622  for (int t = 0; t < TakeMinN; t++) {
623  const double Phi = FindBestCut(Nodes, TabuNIdSet, CutId);
624  if (CutId == -1) { break; }
625  Curves[t].Add(TFltPr(Nodes, Phi));
626  for (int n = 0; n < Nodes; n++) {
627  TabuNIdSet.AddKey(SweepsV[CutId].NId(n)); }
628  AddSome = true;
629  }
630  if (! AddSome) { break; }
631  }
632  TGnuPlot GP("ncpTop."+OutFNm, TStr::Fmt("%s. Top disjoint clusters. Take:%d, %s", Desc.CStr(), TakeMinN, ParamStr().CStr()));
633  for (int i = 0; i < Curves.Len(); i++) {
634  GP.AddPlot(Curves[i], gpwLines, TStr::Fmt("MIN %d", i+1), "lw 1"); }
635  //if (! BagOfWhiskerV.Empty()) { GP.AddPlot(BagOfWhiskerV, gpwLines, " Whiskers", "lw 2"); }
636  GP.SetScale(gpsLog10XY);
637  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
638  GP.SetXRange(1, Graph->GetNodes()/2);
639  GP.SavePng();
640 }
641 
642 // plot conductance on the boundary / counductance inside vs. cluster size
643 void TLocClustStat::PlotPhiInOut(const TStr& OutFNm, TStr Desc) const {
644  IAssert(! BestCutH.Empty() && ! Graph.Empty());
645  TFltPrV PhiInV, PhiBoundV, PhiRatV;
646  FILE *F = fopen(TStr::Fmt("phiInOut.%s-all.tab", OutFNm.CStr()).CStr(), "wt");
647  fprintf(F, "#Nodes\tEdges\tVol\tCut\tPhi\tInNodes\tInEdges\tInVol\tInCut\tInPhi\n");
648  TLocClustStat ClustStat2(Alpha, 1, KMax, KFac, Coverage, SizeFrac);
649  const double IdxFact = 1.05;
650  int curIdx=1, prevIdx=1;
651  while (curIdx <= BestCutH.Len()) {
652  const TCutInfo& CutInfo = BestCutH[curIdx-1];
653  if (CutInfo.GetNodes() > 1) {
654  PUNGraph ClustG = TSnap::GetSubGraph(Graph, CutInfo.CutNIdV);
655  ClustStat2.Run(ClustG);
656  const TCutInfo& InCut = ClustStat2.FindBestCut(-1);
657  PhiInV.Add(TFltPr(CutInfo.GetNodes(), InCut.GetPhi()));
658  PhiBoundV.Add(TFltPr(CutInfo.GetNodes(), CutInfo.GetPhi()));
659  PhiRatV.Add(TFltPr(CutInfo.GetNodes(), InCut.GetPhi()/CutInfo.GetPhi()));
660  fprintf(F, "%d\t%d\t%d\t%d\t%f\t%d\t%d\t%d\t%d\t%f\n", CutInfo.GetNodes(), CutInfo.GetEdges(), CutInfo.GetVol(),
661  CutInfo.GetCutSz(), CutInfo.GetPhi(), InCut.GetNodes(), InCut.GetEdges(), InCut.GetVol(), InCut.GetCutSz(), InCut.GetPhi());
662  fflush(F);
663  }
664  prevIdx = curIdx;
665  curIdx = (int) TMath::Round(double(curIdx)*IdxFact);
666  if (prevIdx == curIdx) { curIdx++; }
667  }
668  fclose(F);
669  { TGnuPlot GP("phiInOut."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
670  GP.AddPlot(PhiBoundV, gpwLines, "CUT conductance", "lw 1");
671  GP.AddPlot(PhiInV, gpwLines, "INSIDE conductance", "lw 1");
672  //GP.AddPlot(PhiRatV, gpwLines, "RATIO (inside/boundary)", "lw 1");
673  GP.SetXRange(1, Graph->GetNodes()/2); GP.SetScale(gpsLog10XY);
674  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
675  //GP.AddCmd("set logscale xyy2 10");
676  //GP.AddCmd("set y2label \"Conductance ratio (inside/boundary -- higher better)\"");
677  GP.SavePng(); }
678  //system(TStr(TStr("replace_all.py phiInOut.")+OutFNm+".plt \"title \\\"RATIO (inside/boundary)\" \"axis x1y2 title \\\"RATIO (inside/boundary)\"").CStr());
679  //GP.RunGnuPlot();
680  { TGnuPlot GP("phiInOutRat."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
681  GP.AddPlot(PhiRatV, gpwLines, "RATIO (Inside / Boundary)", "lw 1");
682  GP.SetXRange(1, Graph->GetNodes()/2); GP.SetScale(gpsLog10XY);
683  GP.SetXYLabel("Nodes", "Conductance ratio (inside/boundary) -- higher better");
684  GP.SavePng(); }
685 }
686 
687 // Plot edges inside, edges cut and conductance.
688 // run: replace_all.py cutEdges.*.plt "title \"Conductance" "axis x1y2 title \"Conductance"
689 void TLocClustStat::PlotBestClustDens(TStr OutFNm, TStr Desc) const {
690  if (Desc.Empty()) { Desc = OutFNm; }
691  const int len = BestCutH.Len();
692  TFltPrV CutV(len, 0), EdgesV(len, 0), PhiV(len,0);
693  for (int i = 0; i < BestCutH.Len(); i++) {
694  const double size = BestCutH.GetKey(i).Val;
695  CutV.Add(TFltPr(size, BestCutH[i].GetCutSz()));
696  EdgesV.Add(TFltPr(size, BestCutH[i].GetEdges()));
697  PhiV.Add(TFltPr(size, BestCutH[i].GetPhi()));
698  }
699  TGnuPlot GP("cutEdges."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
700  TFltPrV NewV; TLocClustStat::MakeExpBins(EdgesV, NewV);
701  GP.AddPlot(NewV, gpwLines, "Edges inside");
702  TLocClustStat::MakeExpBins(CutV, NewV);
703  GP.AddPlot(NewV, gpwLines, "Cut edges");
704  TLocClustStat::MakeExpBins(PhiV, NewV);
705  GP.AddPlot(NewV, gpwLines, "Conductance");
706  GP.SetXYLabel("Cluster size", "Edges"); GP.SetScale(gpsLog10XY);
707  GP.AddCmd("set logscale xyy2 10");
708  GP.AddCmd("set y2label \"Conductance\"");
709  GP.SavePng();
710  system(TStr(TStr("replace_all.py cutEdges.")+OutFNm+".plt \"title \\\"Conductance\" \"axis x1y2 title \\\"Conductance\"").CStr());
711  GP.RunGnuPlot();
712 }
713 
714 // all different conducances of all sizes (not just lower envelope)
715 void TLocClustStat::PlotNCPScatter(const TStr& OutFNm, TStr Desc) const {
716  if (Desc.Empty()) { Desc = OutFNm; }
717  THashSet<TFltPr> PhiSzH;
718  IAssertR(! SizePhiH.Empty(), "Set SaveAllCond=true in TLocClustStat::Run()");
719  for (int k = 0; k < SizePhiH.Len(); k++) {
720  const int K = SizePhiH.GetKey(k);
721  const TFltV& PhiV = SizePhiH[k];
722  for (int p = 0; p < PhiV.Len(); p++) {
723  PhiSzH.AddKey(TFltPr(K, TMath::Round(PhiV[p], 3))); }
724  }
725  TFltPrV PhiSzV; PhiSzH.GetKeyV(PhiSzV);
726  TGnuPlot GP("ncpScatter."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
727  GP.AddPlot(PhiSzV, gpwPoints, "", "pt 5 ps 0.2");
728  GP.SetScale(gpsLog10XY);
729  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
730  GP.SetXRange(1, Graph->GetNodes()/2);
731  GP.SavePng();
732 }
733 
734 // histogram of conductances for a fixed CmtySz
735 void TLocClustStat::PlotPhiDistr(const int& CmtySz, const TStr& OutFNm, TStr Desc) const {
736  IAssert(! SizePhiH.Empty());
737  const TFltV& PhiV = SizePhiH.GetDat(CmtySz);
738  THash<TFlt, TInt> PhiCntH;
739  for (int i = 0; i < PhiV.Len(); i++) {
740  const double Buck = TMath::Round(PhiV[i], 3);
741  PhiCntH.AddDat(Buck) += 1;
742  }
743  TGnuPlot::PlotValCntH(PhiCntH, TStr::Fmt("phiDistr%03d.%s", CmtySz, OutFNm.CStr()),
744  TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()), "{/Symbol \\F} (conductance)", "Count");
745 }
746 
747 void TLocClustStat::PlotBoltzmanCurve(const TStr& OutFNm, TStr Desc) const {
748  TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
749  GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
750  TVec<TFltPrV> NcpV;
751  //const TFltV TempV = TFltV::GetV(0.05, 0.01, 0.1, 10, 100, 1000);
752  const TFltV TempV = TFltV::GetV(0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.5, 1);
753  GetBoltzmanCurveStat(TempV, NcpV);
754  TGnuPlot GP("ncp."+OutFNm+"-B", TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
755  GP.AddPlot(MinV1, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc.CStr(), Graph->GetNodes(), Graph->GetEdges()), "lw 1");
756  GP.AddPlot(MeanV1, gpwLines, "Avg", "lw 1");
757  GP.AddPlot(MedV1, gpwLines, "Median", "lw 1");
758  GP.AddPlot(Dec1V1, gpwLines, "Decile-1", "lw 1");
759  for (int t = 0; t < TempV.Len(); t++) {
760  GP.AddPlot(NcpV[t], gpwLines, TStr::Fmt("Temp %g", TempV[t]()), "lw 1");
761  }
762  GP.SetScale(gpsLog10XY);
763  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
764  GP.SavePng();
765  //
766  TFltPrV SzNClustV;
767  int kCnt=1;
768  for (int i = 0; i < SizePhiH.Len(); i++) {
769  const int K = SizePhiH.GetKey(i);
770  const TFltV& PhiV = SizePhiH[i];
771  SzNClustV.Add(TFltPr(K, PhiV.Len()));
772  if (K>2 && (pow(10.0, (int)log10((double)K))==K || (K >=10 && K<=100 && K%10==0) || (K >=100 && K<=1000 && K%100==0))) {
773  THash<TFlt, TFlt> CntH;
774  for (int p = 0; p < PhiV.Len(); p++) {
775  CntH.AddDat(TMath::Round(log10(PhiV[p].Val),1)) += 1;
776  }
777  TGnuPlot::PlotValCntH(CntH, TStr::Fmt("ncp.%s-c%02d", OutFNm.CStr(), kCnt++), TStr::Fmt("%s. K=%d, NPieces=%d, %s",
778  Desc.CStr(), K, PhiV.Len(), ParamStr().CStr()), "log_10 {/Symbol \\F} (conductance)",
779  TStr::Fmt("Number of pieces of such conductance, K=%d, NPieces=%d)", K, PhiV.Len()));
780  }
781  }
782  TGnuPlot::PlotValV(SzNClustV, "ncp."+OutFNm+"-ClSz", TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()),
783  "k (cluster size)", "c(k) (number of extracted clusters)", gpsLog);
784 }
785 
786 void TLocClustStat::ImposeNCP(const TLocClustStat& LcStat2, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2) const {
787  if (Desc.Empty()) { Desc = OutFNm; }
788  TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
789  TFltPrV MeanV2, MedV2, Dec1V2, MinV2, MaxV2;
790  GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
791  LcStat2.GetCurveStat(MeanV2, MedV2, Dec1V2, MinV2, MaxV2);
792  // plot
793  TGnuPlot GP("ncp."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
794  if (! MinV1.Empty()) { GP.AddPlot(MinV1, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc1.CStr(), Graph->GetNodes(), Graph->GetEdges()), "lw 1"); }
795  if (! MinV2.Empty()) { GP.AddPlot(MinV2, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc2.CStr(), LcStat2.Graph->GetNodes(), LcStat2.Graph->GetEdges()), "lw 1"); }
796  if (! BagOfWhiskerV.Empty()) {
797  GP.AddPlot(BagOfWhiskerV, gpwLines, Desc1+" whiskers", "lw 1");
798  TFltPrV BestWhiskV; BestWhiskV.Add(BestWhisk);
799  GP.AddPlot(BestWhiskV, gpwPoints, Desc1+" Best whisker", "pt 5 ps 2"); }
800  if (! LcStat2.BagOfWhiskerV.Empty()) {
801  GP.AddPlot(LcStat2.BagOfWhiskerV, gpwLines, Desc2+" whiskers", "lw 1");
802  TFltPrV BestWhiskV; BestWhiskV.Add(LcStat2.BestWhisk);
803  GP.AddPlot(BestWhiskV, gpwPoints, Desc2+" Best whisker", "pt 5 ps 2"); }
804  GP.SetScale(gpsLog10XY);
805  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
806  //GP.SetXRange(1, Graph->GetNodes()/2);
807  GP.SavePng();
808 }
809 
810 void TLocClustStat::ImposeNCP(const TLocClustStat& LcStat2, const TLocClustStat& LcStat3, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2, TStr Desc3) const {
811  if (Desc.Empty()) { Desc = OutFNm; }
812  TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
813  TFltPrV MeanV2, MedV2, Dec1V2, MinV2, MaxV2;
814  TFltPrV MeanV3, MedV3, Dec1V3, MinV3, MaxV3;
815  GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
816  LcStat2.GetCurveStat(MeanV2, MedV2, Dec1V2, MinV2, MaxV2);
817  LcStat3.GetCurveStat(MeanV3, MedV3, Dec1V3, MinV3, MaxV3);
818  // plot
819  TGnuPlot GP("phiTR."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
820  if (! MinV1.Empty()) { GP.AddPlot(MinV1, gpwLines, Desc1+" MIN", "lw 1"); }
821  if (! MinV2.Empty()) { GP.AddPlot(MinV2, gpwLines, Desc2+" MIN", "lw 1"); }
822  if (! MinV3.Empty()) { GP.AddPlot(MinV3, gpwLines, Desc3+" MIN", "lw 1"); }
823  if (! BagOfWhiskerV.Empty()) {
824  GP.AddPlot(BagOfWhiskerV, gpwLines, Desc1+" whiskers", "lw 1");
825  TFltPrV BestWhiskV; BestWhiskV.Add(BestWhisk);
826  GP.AddPlot(BestWhiskV, gpwPoints, Desc1+" Best whisker", "pt 5 ps 2"); }
827  if (! LcStat2.BagOfWhiskerV.Empty()) {
828  GP.AddPlot(LcStat2.BagOfWhiskerV, gpwLines, Desc2+" whiskers", "lw 1");
829  TFltPrV BestWhiskV; BestWhiskV.Add(LcStat2.BestWhisk);
830  GP.AddPlot(BestWhiskV, gpwPoints, Desc2+" Best whisker", "pt 5 ps 2"); }
831  if (! LcStat3.BagOfWhiskerV.Empty()) {
832  GP.AddPlot(LcStat3.BagOfWhiskerV, gpwLines, Desc3+" whiskers", "lw 1");
833  TFltPrV BestWhiskV; BestWhiskV.Add(LcStat3.BestWhisk);
834  GP.AddPlot(BestWhiskV, gpwPoints, Desc3+" Best whisker", "pt 5 ps 2"); }
835  GP.SetScale(gpsLog10XY);
836  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
837  GP.SetXRange(1, Graph->GetNodes()/2);
838  GP.SavePng();
839 }
840 
841 void TLocClustStat::SaveTxtInfo(const TStr& OutFNmPref, const TStr& Desc, const bool& SetMaxAt1) const {
842  printf("Save text info...");
843  TExeTm ExeTm;
844  const int GNodes = Graph->GetNodes();
845  const int GEdges = Graph->GetEdges();
846  TVec<TFltV> ColV(17);
847  double MxFrac=0, AvgFrac=0, MedianFrac=0, Pct9Frac=0, Flake=0;
848  // double PrevBPos = 1, BPos = 1;
849  for (int i = 0; i < SizeBucketSet.Len(); i++) {
850  if ( !BestCutH.IsKey(SizeBucketSet[i])) { continue; }
852  C.GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake);
853  ColV[0].Add(C.Nodes()); ColV[1].Add(C.Edges());
854  ColV[2].Add(C.CutSz()); ColV[3].Add(C.GetPhi());
855  ColV[4].Add(C.GetExpansion()); ColV[5].Add(C.GetIntDens());
856  ColV[6].Add(C.GetCutRatio(GNodes)); ColV[7].Add(C.GetNormCut(GEdges));
857  ColV[8].Add(MxFrac); ColV[9].Add(AvgFrac);
858  ColV[10].Add(MedianFrac); ColV[11].Add(Pct9Frac); ColV[12].Add(Flake);
859  ColV[13].Add(double(2.0*C.Edges)); ColV[14].Add(C.GetExpEdgesIn(GEdges));
860  ColV[15].Add(C.GetModular(GEdges)); ColV[16].Add(C.GetModRat(GEdges));
861  printf(".");
862  }
863  // normalize so that maximum value is at 1
864  if (SetMaxAt1) {
865  for (int c = 1; c < ColV.Len(); c++) {
866  double MaxVal=1e-10;
867  for (int r = 0; r < ColV[c].Len(); r++) { MaxVal = TMath::Mx(MaxVal, ColV[c][r]()); }
868  for (int r = 0; r < ColV[c].Len(); r++) { ColV[c][r] /= MaxVal; }
869  }
870  }
871  // save
872  const TStr DatFNm = TStr::Fmt("ncp.%s.INFO.tab", OutFNmPref.CStr());
873  FILE *F = fopen(DatFNm.CStr(), "wt");
874  fprintf(F, "# %s %s\n", Desc.CStr(), ParamStr().CStr());
875  fprintf(F, "#N_inside\tE_inside\tE_across\tConductance\tExpansion\tIntDensity\tCutRatio\tNormCut\tMx_FracDegOut\tAvg_FDO\tMedian_FDO\t90Pct_FDO\tFlake_FDO\tVolume\tExpVolume\tModularity\tModRatio\n");
876  for (int r = 0; r < ColV[0].Len(); r++) {
877  fprintf(F, "%g", ColV[0][r]());
878  for (int c = 1; c < ColV.Len(); c++) {
879  fprintf(F, "\t%g", ColV[c][r]()); }
880  fprintf(F, "\n");
881  }
882  fclose(F);
883  printf("[%s]\n", ExeTm.GetStr());
884  TGnuPlot GP(TStr::Fmt("ncp.%s.All", OutFNmPref.CStr()), TStr::Fmt("%s %s",
885  Desc.CStr(), ParamStr().CStr()));
886  GP.AddPlot(DatFNm, 1, 4, gpwLines, "Conductance", "lw 2");
887  GP.AddPlot(DatFNm, 1, 5, gpwPoints, "Expansion", "pt 3");
888  GP.AddPlot(DatFNm, 1, 6, gpwPoints, "Internal Density", "pt 5 ps 0.8");
889  GP.AddPlot(DatFNm, 1, 7, gpwPoints, "Cut Ratio", "pt 6");
890  GP.AddPlot(DatFNm, 1, 8, gpwPoints, "Normalized Cut", "pt 7");
891  GP.AddPlot(DatFNm, 1, 9, gpwPoints, "Maximum FDO", "pt 9");
892  GP.AddPlot(DatFNm, 1, 10, gpwPoints, "Avg FDO", "pt 11");
893  GP.AddPlot(DatFNm, 1, 13, gpwPoints, "Flake FDO", "pt 13");
894  GP.SetScale(gpsLog10XY);
895  GP.SetXYLabel("k (number of nodes in the cluster)", "Normalized community score (lower is better)");
896  GP.SavePng();
897 }
898 
899 // conductances if clusters are composed of disjoint pieces that can be separated
900 // from the graph by a single edge
901 void TLocClustStat::BagOfWhiskers(const PUNGraph& Graph, TFltPrV& SizePhiV, TFltPr& MaxWhisk) {
902  TCnComV Cn1ComV;
903  TSnap::Get1CnCom(Graph, Cn1ComV);
904  TIntPrV SzVolV;
905  int MxSize=0;
906  if (Cn1ComV.Empty()) { printf("** No bridges\n"); SizePhiV.Clr(); return; }
907  // Graph->SaveEdgeList("g-vol.txt"); TGraphViz::Plot(Graph, gvlNeato, "g-vol.gif"); Fail; } IAssert(vol <= sz*(sz-1));
908  printf(" 1-connected components: %d\n", Cn1ComV.Len());
909  MaxWhisk = TFltPr(1,1);
910  for (int c = 0; c < Cn1ComV.Len(); c++) {
911  const TIntV& NIdV = Cn1ComV[c].NIdV;
912  const int sz = NIdV.Len();
913  if (sz < 2) { continue; }
914  int vol = 0; // volume is the size of degrees
915  for (int n = 0; n < sz; n++) {
916  vol += Graph->GetNI(NIdV[n]).GetOutDeg(); }
917  SzVolV.Add(TIntPr(sz, vol));
918  MxSize += sz;
919  if (1.0/double(vol) < MaxWhisk.Val2) { MaxWhisk=TFltPr(NIdV.Len(), 1.0/double(vol)); }
920  }
921  SzVolV.Sort(false);
922  // compose clusters
923  THash<TInt, TIntSet> ItemSetH(MxSize, true);
924  THash<TInt, TInt> VolH(MxSize, true);
925  THash<TInt, TFlt> CostH(MxSize, true);
926  ItemSetH.AddKey(0); VolH.AddKey(0);
927  TExeTm ExeTm;
928  // exact up to size 1000
929  for (int size = 2; size <= TMath::Mn(MxSize, 1000); size++) {
930  for (int item = 0; item <SzVolV.Len(); item++) {
931  const int smallSz = size-SzVolV[item].Val1;
932  if (ItemSetH.IsKey(smallSz)) {
933  const TIntSet& SmallSet = ItemSetH.GetDat(smallSz);
934  if (SmallSet.IsKey(item)) { continue; }
935  const int SmallVol = VolH.GetDat(smallSz);
936  // combine smaller nad new solution
937  const double curCost = CostH.IsKey(size) ? double(CostH.GetDat(size)) : double(10e10);
938  const double newCost = double(SmallSet.Len()+1) / double(SmallVol+SzVolV[item].Val2);
939  if (curCost < newCost) { continue; }
940  VolH.AddDat(size, SmallVol+SzVolV[item].Val2);
941  ItemSetH.AddDat(size, SmallSet);
942  ItemSetH.GetDat(size).AddKey(item);
943  CostH.AddDat(size, newCost);
944  }
945  }
946  if (VolH.IsKey(size) && size%100==0) {
947  printf("\rSize: %d/%d: vol: %d, items: %d/%d [%s]", size, MxSize, VolH.GetDat(size).Val,
948  ItemSetH.GetDat(size).Len(), SzVolV.Len(), ExeTm.GetStr());
949  }
950  }
951  // add sizes larger than 1000
952  printf("\nAdding sizes > 1000 nodes...");
953  int partSz=0; double partVol=0.0;
954  for (int i = 0; i < SzVolV.Len(); i++) {
955  partSz += SzVolV[i].Val1();
956  partVol += SzVolV[i].Val2();
957  if (partSz < 1000) { continue; }
958  const double curCost = CostH.IsKey(partSz) ? double(CostH.GetDat(partSz)) : double(10e10);
959  const double partPhi = double(i+1)/partVol;
960  if (partPhi < curCost) {
961  CostH.AddDat(partSz, partPhi);
962  }
963  }
964  VolH.SortByKey();
965  CostH.SortByKey();
966  SizePhiV.Gen(CostH.Len(), 0);
967  SizePhiV.Add(TFltPr(1, 1));
968  for (int s = 0; s < CostH.Len(); s++) {
969  const int size = CostH.GetKey(s);
970  const double cond = CostH[s]; //double(ItemSetH.GetDat(size).Len())/double(VolH[s]);
971  SizePhiV.Add(TFltPr(size, cond));
972  }
973  printf("done\n");
974 }
975 
976 // faster greedy version
977 void TLocClustStat::BagOfWhiskers2(const PUNGraph& Graph, TFltPrV& SizePhiV) {
978  TCnComV Cn1ComV;
979  TSnap::Get1CnCom(Graph, Cn1ComV);
980  TIntPrV SzVolV;
981  int MxSize=0, TotVol=0;
982  if (Cn1ComV.Empty()) { printf("** No bridges\n"); SizePhiV.Clr(); return; }
983  printf(" 1-connected components: %d\n", Cn1ComV.Len());
984  for (int c = 0; c < Cn1ComV.Len(); c++) {
985  const TIntV& NIdV = Cn1ComV[c].NIdV;
986  const int sz = NIdV.Len();
987  if (sz < 2) { continue; }
988  int vol = 0; // volume is the size of degrees
989  for (int n = 0; n < sz; n++) {
990  vol += Graph->GetNI(NIdV[n]).GetOutDeg(); }
991  SzVolV.Add(TIntPr(sz, vol));
992  MxSize += sz; TotVol += vol;
993  }
994  printf(" Total size: %d\t Total vol: %d\n", MxSize, TotVol);
995  SzVolV.Sort(false);
996  // compose clusters
997  THash<TFlt, TFlt> SizePhiH(MxSize, true);
998  for (int i = 0; i < SzVolV.Len(); i++) {
999  const int Sz = SzVolV[i].Val1();
1000  const double Phi = 1.0/double(SzVolV[i].Val2());
1001  if ((! SizePhiH.IsKey(Sz)) || SizePhiH.GetDat(Sz) > Phi) {
1002  SizePhiH.AddDat(Sz, Phi); }
1003  }
1004  double partSz=0.0, partVol=0.0;
1005  for (int i = 0; i < SzVolV.Len(); i++) {
1006  partSz += SzVolV[i].Val1();
1007  partVol += SzVolV[i].Val2();
1008  const double partPhi = double(i+1)/partVol;
1009  if ((! SizePhiH.IsKey(partSz)) || partPhi < SizePhiH.GetDat(partSz)) {
1010  SizePhiH.AddDat(partSz, partPhi); }
1011  }
1012  SizePhiV.Gen(SizePhiH.Len()+1, 0);
1013  SizePhiV.Add(TFltPr(1, 1));
1014  SizePhiH.SortByKey();
1015  for (int s = 0; s < SizePhiH.Len(); s++) {
1016  SizePhiV.Add(TFltPr(SizePhiH.GetKey(s), SizePhiH[s]));
1017  }
1018 }
1019 
1020 void TLocClustStat::MakeExpBins(const TFltPrV& ValV, TFltPrV& NewV) {
1021  if (ValV.Empty()) { NewV.Clr(false); return; }
1022  NewV.Gen(1000, 0);
1023  double PrevBPos = 1, BPos = 1;
1024  int i = 0;
1025  while (BPos <= ValV.Last().Val1) {
1026  //if (TakeValAt == 1) {
1027  // while (i < ValV.Len() && ValV[i].Val1 <= BPos) { i++; }
1028  // NewV.Add(ValV[i-1]); }
1029  //else if (TakeValAt == 2) {
1030  int MinI=-1; double MinCnt=TFlt::Mx;
1031  while (i < ValV.Len() && ValV[i].Val1 <= BPos) {
1032  if (ValV[i].Val2 < MinCnt) { MinCnt=ValV[i].Val2; MinI=i; } i++; }
1033  if (MinI>=0 && MinI<ValV.Len()) {
1034  NewV.Add(ValV[MinI]); }
1035  PrevBPos = (uint) floor(BPos);
1036  BPos *= BinFactor;
1037  if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
1038  }
1039  NewV.Add(ValV.Last());
1040 }
1041 
1042 void TLocClustStat::MakeExpBins(const TFltV& ValV, TFltV& NewV) {
1043  if (ValV.Empty()) { NewV.Clr(false); return; }
1044  NewV.Gen(1000, 0);
1045  double PrevBPos = 1, BPos = 1;
1046  int i = 1;
1047  NewV.Add(ValV[0]);
1048  while (BPos <= ValV.Len()) {
1049  int MinI=-1; double MinCnt=TFlt::Mx;
1050  while (i < ValV.Len() && i <= BPos) {
1051  if (ValV[i] < MinCnt) { MinCnt=ValV[i]; MinI=i; } i++; }
1052  if (MinI>=0 && MinI<ValV.Len()) {
1053  NewV.Add(ValV[MinI]); }
1054  PrevBPos = (uint) floor(BPos);
1055  BPos *= BinFactor;
1056  if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
1057  }
1058  NewV.Add(ValV.Last());
1059 }
1060 
1061 
1063 // Local clustering for a set of graphs (loads ncp-*.tab files)
1065  TStr FNm;
1066  for (TFFile FFile(FNmWc); FFile.Next(FNm); ) {
1067  TSsParser Ss(FNm, ssfTabSep, true, false);
1068  int TrueNcpId=-1, WhiskId=-1, RewBestWhiskId=-1, RewId=-1, BestWhiskId=-1;
1069  while (Ss.Next()) {
1070  for (int f = 0; f < Ss.GetFlds(); f++) {
1071  // load ForestFire parameter (fwd burn prob)
1072  if (strstr(Ss[f], "FWD:")) {
1073  TStr S(Ss[f]); const int x = S.SearchStr("FWD:");
1074  ParamValV.Add(S.GetSubStr(x+4, S.SearchCh(' ', x+1)-1).GetFlt());
1075  }
1076  // extract column names
1077  if (strstr(Ss[f], "ORIGINAL MIN")!=NULL) {
1078  GNmV.Add(TStr::Fmt("%s %s", FNm.GetSubStr(FNm.SearchCh('.')+1, FNm.SearchChBack('.')-1).CStr(), strchr(Ss[f], '(')));
1079  int Nodes=0,Edges=0; sscanf(strchr(Ss[f], '(')+1, "%d,%d)", &Nodes, &Edges);
1080  GSizeV.Add(TIntPr(Nodes, Edges));
1081  printf("%s: %d %d\n", GNmV.Last().CStr(), Nodes, Edges);
1082  TrueNcpId=f;
1083  }
1084  if (strstr(Ss[f], "ORIGINAL whisker")!=NULL || strstr(Ss[f], "TRUE whisker")!=NULL) { WhiskId=f; }
1085  if (strstr(Ss[f], "ORIGINAL Best whisker")!=NULL || strstr(Ss[f], "TRUE Best whisker")!=NULL) { BestWhiskId=f; }
1086  if (strstr(Ss[f], "REWIRED MIN")!=NULL || strstr(Ss[f], "RAND MIN")!=NULL) { RewId=f; }
1087  if (strstr(Ss[f], "REWIRED Best whisker")!=NULL || strstr(Ss[f], "RAND Best whisker")!=NULL) { RewBestWhiskId=f; }
1088  }
1089  if (TrueNcpId!=-1 || WhiskId!=-1) { break; }
1090  }
1091  if (TrueNcpId < 0) { printf("%s\n", FNm.GetFMid().CStr()); break; }
1092  if (BestWhiskId < 0) { WhiskerV.Add(TFltPr(1,1)); }
1093  if (RewBestWhiskId < 0) { RewWhiskerV.Add(TFltPr(1,1)); }
1094  NcpV.Add(); WhiskNcpV.Add(); RewNcpV.Add();
1095  TFltPrV& Ncp = NcpV.Last();
1096  TFltPrV& WhiskNcp = WhiskNcpV.Last();
1097  TFltPrV& RewNcp = RewNcpV.Last();
1098  bool Once=false, Once2=false;
1099  while (Ss.Next()) {
1100  if (TrueNcpId < Ss.GetFlds()&& Ss.IsFlt(TrueNcpId)) { Ncp.Add(TFltPr(Ss.GetFlt(TrueNcpId-1), Ss.GetFlt(TrueNcpId))); }
1101  if (WhiskId>=0 && WhiskId < Ss.GetFlds() && Ss.IsFlt(WhiskId)) { WhiskNcp.Add(TFltPr(Ss.GetFlt(WhiskId-1), Ss.GetFlt(WhiskId))); }
1102  if (RewId >=0 && RewId < Ss.GetFlds()&& Ss.IsFlt(RewId)) { RewNcp.Add(TFltPr(Ss.GetFlt(RewId-1), Ss.GetFlt(RewId))); }
1103  if (BestWhiskId>=0 && BestWhiskId < Ss.GetFlds() && ! Once) { Once=true;
1104  int W2=BestWhiskId-1; while (W2 > 0 && Ss.GetFlt(W2)!=(double)int(Ss.GetFlt(W2))) { W2--; }
1105  WhiskerV.Add(TFltPr(Ss.GetFlt(W2), Ss.GetFlt(BestWhiskId))); }
1106  if (RewBestWhiskId>=0 && RewBestWhiskId < Ss.GetFlds() && ! Once2) { Once2=true;
1107  int W2=RewBestWhiskId-1; while (W2 > 0 && Ss.GetFlt(W2)!=(double)int(Ss.GetFlt(W2))) { W2--; }
1108  RewWhiskerV.Add(TFltPr(Ss.GetFlt(W2), Ss.GetFlt(RewBestWhiskId))); }
1109  }
1110  printf(" ncp:%d whisk:%d rew:%d\n", NcpV.Last().Len(), WhiskNcpV.Last().Len(), RewNcpV.Last().Len());
1111  }
1112  IAssert(NcpV.Len() == GSizeV.Len());
1113 }
1114 TNcpGraphsBase::TNcpGraphsBase(TSIn& SIn) : GNmV(SIn), GSizeV(SIn), WhiskerV(SIn),
1115  RewWhiskerV(SIn),NcpV(SIn), RewNcpV(SIn),WhiskNcpV(SIn) {
1116 }
1117 
1118 void TNcpGraphsBase::Save(TSOut& SOut) const {
1119  GNmV.Save(SOut); GSizeV.Save(SOut);
1120  WhiskerV.Save(SOut); RewWhiskerV.Save(SOut); NcpV.Save(SOut);
1121  RewNcpV.Save(SOut); WhiskNcpV.Save(SOut);
1122 }
1123 
1124 void TNcpGraphsBase::Impose(const TStr& OutFNm, const int& TopN, const bool& Smooth) {
1125  TGnuPlot GP(OutFNm);
1126  for (int i = 0; i < TMath::Mn(NcpV.Len(), TopN); i++) {
1127  GP.AddPlot(NcpV[i], gpwLines, GNmV[i], Smooth?"smooth csplines":"");
1128  }
1129  GP.SetScale(gpsLog10XY);
1130  GP.SavePng();
1131 }
1132 
1133 double TNcpGraphsBase::GetXAtMinY(const TFltPrV& Ncp, const int& NNodes) {
1134  double MinX1=1, MinY1=1;
1135  for (int k = 0; k < Ncp.Len(); k++) {
1136  if (Ncp[k].Val2<MinY1) { MinX1=Ncp[k].Val1; MinY1=Ncp[k].Val2; } }
1137  return MinX1<1 ? 1 : MinX1;
1138  //if (NNodes < 1000) return MinX1;
1139  // smooth
1140  /*const int WndSize = 50;
1141  double MinX=1, MinY=1;
1142  TFltPrV Ncp2V;
1143  for (int k = 0; k < Ncp.Len(); k++) {
1144  int WndSz = k > WndSize ? WndSize : k;
1145  double SmoothVal=0.0, SmoothCnt=0;
1146  for (int i = -WndSz; i <= WndSz; i++) {
1147  if (k+i > -1 && k+i < Ncp.Len()) { SmoothCnt+=pow(1.1, -abs(i));
1148  SmoothVal+=pow(1.1, -abs(i)) * Ncp[k+i].Val2; }
1149  }
1150  SmoothVal = SmoothVal/SmoothCnt;
1151  Ncp2V.Add(TFltPr(Ncp[k].Val1, SmoothVal));
1152  if (SmoothVal<MinY) { MinX=Ncp[k].Val1; MinY=SmoothVal; }
1153  }
1154  static int cnt = 0;
1155  if (Ncp2V.Len() > 10 && cnt < 10) {
1156  TGnuPlot GP(TStr::Fmt("test-%03d", ++cnt));
1157  GP.SetScale(gpsLog10XY);
1158  GP.AddPlot(Ncp, gpwLines, "true");
1159  GP.AddPlot(Ncp2V, gpwLines, "smooth");
1160  GP.SavePng();
1161  }
1162  if (MinX < 1) { return 1; } else if (MinX > 1000) { return 1000; }
1163  return MinX;*/
1164 }
1165 
1166 TFltPr TNcpGraphsBase::GetXYAtMinY(const TFltPrV& Ncp, const int& NNodes) {
1167  double MinX1=1, MinY1=1;
1168  for (int k = 0; k < Ncp.Len(); k++) {
1169  if (Ncp[k].Val2<MinY1) { MinX1=Ncp[k].Val1; MinY1=Ncp[k].Val2; } }
1170  return TFltPr(MinX1<1?1:MinX1, MinY1);
1171 }
1172 
1173 void TNcpGraphsBase::PlotNcpMin(const TStr& OutFNm, const bool& VsGraphN) {
1174  TFltPrV GSzMinK, GSzMinY;
1175  for (int i = 0; i < NcpV.Len(); i++) {
1176  const TFltPr XYAtMinY = GetXYAtMinY(NcpV[i], GSizeV[i].Val1);
1177  const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
1178  GSzMinK.Add(TFltPr(X, XYAtMinY.Val1));
1179  GSzMinY.Add(TFltPr(X, XYAtMinY.Val2));
1180  }
1181  GSzMinK.Sort(); GSzMinY.Sort();
1182  const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size";
1183  TGnuPlot::PlotValV(GSzMinK, TStr("bestK-")+OutFNm, "Network", XLabel, "size of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
1184  TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestK-")+OutFNm, "Network", XLabel, "conductance of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
1185 }
1186 
1187 void TNcpGraphsBase::SaveTxtNcpMin(const TStr& OutFNm, const bool& VsGraphN) {
1189  for (int i = 0; i < NcpV.Len(); i++) {
1190  const TFltPr XYAtMinY = GetXYAtMinY(NcpV[i], GSizeV[i].Val1);
1191  const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
1192  GSzMinK.Add(TQuad<TInt, TInt, TFlt, TStr>((int)X, (int)XYAtMinY.Val1(), XYAtMinY.Val2, GNmV[i]));
1193  }
1194  GSzMinK.Sort();
1195  FILE *F = fopen(TStr::Fmt("bestK-%s.txt", OutFNm.CStr()).CStr(), "wt");
1196  fprintf(F, "#nodes\tbestK\tcondAtBestK\tgraph name\n");
1197  for (int i = 0; i < GSzMinK.Len(); i++) {
1198  fprintf(F, "%d\t%d\t%f\t%s\n", GSzMinK[i].Val1(), GSzMinK[i].Val2(), GSzMinK[i].Val3(), GSzMinK[i].Val4.CStr());
1199  }
1200  fclose(F);
1201 }
1202 
1203 void TNcpGraphsBase::PlotRewNcpMin(const TStr& OutFNm, const bool& VsGraphN) {
1204  TFltPrV GSzMinK, GSzMinY;
1205  for (int i = 0; i < NcpV.Len(); i++) {
1206  const TFltPr XYAtMinY = GetXYAtMinY(RewNcpV[i], GSizeV[i].Val1);
1207  const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
1208  GSzMinK.Add(TFltPr(X, XYAtMinY.Val1));
1209  GSzMinY.Add(TFltPr(X, XYAtMinY.Val2));
1210  }
1211  GSzMinK.Sort(); GSzMinY.Sort();
1212  const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size";
1213  TGnuPlot::PlotValV(GSzMinK, TStr("bestR-")+OutFNm, "Rewired network", XLabel, "size of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
1214  TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestR-")+OutFNm, "Rewired network", XLabel, "conductance of best cluster", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
1215 }
1216 
1217 void TNcpGraphsBase::PlotBestWhisker(const TStr& OutFNm, const bool& VsGraphN) {
1218  TFltPrV GSzMinK, GSzMinY;
1219  for (int i = 0; i < GSizeV.Len(); i++) {
1220  if (WhiskerV[i].Val1()>0) {
1221  const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
1222  GSzMinK.Add(TFltPr(X, WhiskerV[i].Val1()));
1223  GSzMinY.Add(TFltPr(X, WhiskerV[i].Val2()));
1224  }
1225  }
1226  GSzMinK.Sort(); GSzMinY.Sort();
1227  const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size";
1228  TGnuPlot::PlotValV(GSzMinK, TStr("bestW-")+OutFNm, "Network", XLabel, "size of best whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
1229  TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestW-")+OutFNm, "Network", XLabel, "conductance of best whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
1230 }
1231 
1232 void TNcpGraphsBase::PlotRewBestWhisker(const TStr& OutFNm, const bool& VsGraphN) {
1233  TFltPrV GSzMinK, GSzMinY;
1234  for (int i = 0; i < GSizeV.Len(); i++) {
1235  if (WhiskerV[i].Val1()>0) {
1236  const double X = VsGraphN ? (!ParamValV.Empty()?ParamValV[i]():i+1) : GSizeV[i].Val1();
1237  GSzMinK.Add(TFltPr(X, RewWhiskerV[i].Val1()));
1238  GSzMinY.Add(TFltPr(X, RewWhiskerV[i].Val2()));
1239  }
1240  }
1241  GSzMinK.Sort(); GSzMinY.Sort();
1242  const TStr XLabel = VsGraphN ? (!ParamValV.Empty()?"parameter value":"network number") : "network size";
1243  TGnuPlot::PlotValV(GSzMinK, TStr("bestWR-")+OutFNm, "Rewired network", XLabel, "size of best rewired whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
1244  TGnuPlot::PlotValV(GSzMinY, TStr("condAtBestWR-")+OutFNm, "Rewired network", XLabel, "conductance of best rewired whisker", VsGraphN?gpsLog10Y:gpsLog, false, gpwLinesPoints);
1245 }
1246 
1247 void TNcpGraphsBase::PlotAvgNcp(const TStr& OutFNm, const TVec<TFltPrV>& NcpVec, const int& MinSz, const double& MaxMinY) {
1248  THash<TFlt, TMom> MomH;
1249  int Cnt=0;
1250  for (int i = 0; i < NcpVec.Len(); i++) {
1251  if (GSizeV[i].Val1 < MinSz) { continue; }
1252  const TFltPrV& Ncp = NcpVec[i];
1253  double MinX=1, MinY=1;
1254  for (int k = 0; k < Ncp.Len(); k++){
1255  if (Ncp[k].Val2<MinY) { MinX=Ncp[k].Val1; MinY=Ncp[k].Val2; } }
1256  if (MinY > MaxMinY) { continue; } Cnt++;
1257  //const double Coef = (1-0.0001)/(1.0-MinY);
1258  for (int k = 0; k < Ncp.Len(); k++){
1259  //MomH.AddDat(TMath::Round(exp(TMath::Round(log(Ncp[k].Val1()), 2)),2)).Add(0.0001+(Ncp[k].Val2-MinY)*Coef);
1260  MomH.AddDat(TMath::Round(exp(TMath::Round(log(Ncp[k].Val1()), 1)),0)).Add(Ncp[k].Val2);
1261  }
1262  }
1263  TGnuPlot::PlotValMomH(MomH, OutFNm, "", "size of the cluster, k", "phi(k)", gpsLog, gpwLines, true, true,true,true);
1264  printf(" minSz: %d, miny %g\t%d\n", MinSz, MaxMinY, Cnt);
1265 }
1266 
1267 void TNcpGraphsBase::SaveTxt(const TStr& OutFNm) {
1268  FILE *F=fopen(OutFNm.CStr(), "wt");
1269  fprintf(F, "#Nodes\tEdges\tBestK\tPhi(BestK)\tMaxWhiskN\tPhi(MaxWhisk)\tGraph\n");
1270  for (int i = 0; i < NcpV.Len(); i++) {
1271  const TFltPrV& Ncp = NcpV[i];
1272  double MinX=1, MinY=1;
1273  for (int k = 0; k < Ncp.Len(); k++){
1274  if (Ncp[k].Val2<MinY) { MinX=Ncp[k].Val1; MinY=Ncp[k].Val2; } }
1275  fprintf(F, "%d\t%d\t%d\t%f\t%d\t%f\t%s\n", GSizeV[i].Val1(), GSizeV[i].Val2(),
1276  int(MinX), MinY, int(WhiskerV[i].Val1), WhiskerV[i].Val2(), GNmV[i].CStr());
1277  }
1278  fclose(F);
1279 }
1280 
1281 void TNcpGraphsBase::PlotDataset(const TStr& InFNmWc, const TStr& OutFNm, const bool& ImposeNcp, const bool& VsGraphN) {
1282  TNcpGraphsBase NcpBs(InFNmWc);
1283  //NcpBs.Save(TFOut(OutFNm+".NcpBase"));
1284  //TNcpGraphsBase NcpBs(TFIn(OutFNm+".NcpBase"));
1285  if (ImposeNcp) {
1286  NcpBs.Impose(OutFNm+"5R", 5, false); NcpBs.Impose(OutFNm+"5S", 5, true);
1287  NcpBs.Impose(OutFNm+"R", 10, false); NcpBs.Impose(OutFNm+"S", 10, true);
1288  }
1289  NcpBs.PlotNcpMin(OutFNm, VsGraphN);
1290  //NcpBs.SaveTxtNcpMin(OutFNm, VsGraphN);
1291  NcpBs.PlotRewNcpMin(OutFNm, VsGraphN);
1292  NcpBs.PlotBestWhisker(OutFNm, VsGraphN);
1293  NcpBs.PlotRewBestWhisker(OutFNm, VsGraphN);
1294 
1295  //NcpBs.PlotAvgNcp(OutFNm+"AvgNcp", NcpBs.NcpV, 1, 1);
1296  //NcpBs.PlotAvgNcp(OutFNm+"AvgRewNcp", NcpBs.RewNcpV, 1, 1);
1297  /*NcpBs.PlotAvgNcp(OutFNm+"AvgNcp2", NcpBs.NcpV, 100, 0.1);
1298  NcpBs.PlotAvgNcp(OutFNm+"AvgNcp3", NcpBs.NcpV, 100, 0.01);
1299  NcpBs.PlotAvgNcp(OutFNm+"AvgNcp4", NcpBs.NcpV, 100, 0.001);
1300  NcpBs.PlotAvgNcp(OutFNm+"AvgNcp5", NcpBs.NcpV, 100, 0.0001);
1301  NcpBs.PlotAvgNcp(OutFNm+"RewNcp1", NcpBs.RewNcpV, 1000, 1);
1302  NcpBs.PlotAvgNcp(OutFNm+"RewNcp2", NcpBs.RewNcpV, 100, 0.1);
1303  NcpBs.PlotAvgNcp(OutFNm+"RewNcp3", NcpBs.RewNcpV, 100, 0.01);
1304  NcpBs.PlotAvgNcp(OutFNm+"RewNcp4", NcpBs.RewNcpV, 100, 0.001);
1305  NcpBs.PlotAvgNcp(OutFNm+"RewNcp5", NcpBs.RewNcpV, 100, 0.0001);
1306  NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp1", NcpBs.WhiskNcpV, 1000, 1);
1307  NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp2", NcpBs.WhiskNcpV, 100, 0.1);
1308  NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp3", NcpBs.WhiskNcpV, 100, 0.01);
1309  NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp4", NcpBs.WhiskNcpV, 100, 0.001);
1310  NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp5", NcpBs.WhiskNcpV, 100, 0.0001);
1311  NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp6", NcpBs.WhiskNcpV, 100, 0.00004);
1312  NcpBs.PlotAvgNcp(OutFNm+"WhiskNcp7", NcpBs.WhiskNcpV, 100, 0.00005);*/
1313  //NcpBs.SaveTxt(OutFNm+"bestK.txt");
1314 }
double GetModRat(const int &GEdges) const
Definition: ncp.h:171
void Clr(const bool &DoDel=true, const int &NoDelLim=-1)
Definition: shash.h:1243
void DrawGViz(const PGraph &Graph, const TGVizLayout &Layout, const TStr &PltFNm, const TStr &Desc=TStr(), const bool &NodeLabels=false, const TIntStrH &NIdColorH=TIntStrH())
Draws a given Graph using a selected GraphViz Layout engine with nodes colored.
Definition: gviz.h:34
#define IAssert(Cond)
Definition: bd.h:262
static const T & Mn(const T &LVal, const T &RVal)
Definition: xmath.h:36
TFlt SizeFrac
Definition: ncp.h:178
TFltPrV RewWhiskerV
Definition: ncp.h:246
TPair< TInt, TInt > TIntPr
Definition: ds.h:83
void SavePajek(const TStr &OutFNm) const
Saves the network in the Pajek format so it can be visualized. Red node represents the seed and color...
Definition: ncp.cpp:139
int SearchCh(const char &Ch, const int &BChN=0) const
Definition: dt.cpp:1043
TVec< TNodeSweep > SweepsV
Definition: ncp.h:183
static void PlotDataset(const TStr &InFNmWc, const TStr &OutFNm, const bool &ImposeNcp=false, const bool &VsGraphN=false)
Definition: ncp.cpp:1281
void PlotAvgNcp(const TStr &OutFNm, const TVec< TFltPrV > &NcpVec, const int &MinSz, const double &MaxMinY)
Definition: ncp.cpp:1247
static void BagOfWhiskers2(const PUNGraph &Graph, TFltPrV &SizePhiV)
Definition: ncp.cpp:977
int BestCutIdx
Definition: ncp.h:37
#define IAssertR(Cond, Reason)
Definition: bd.h:265
TIntFltH ResH
Definition: ncp.h:30
static char * GetCurTm()
Definition: tm.h:374
static void DrawWhiskers(const PUNGraph &Graph, TStr FNmPref, const int &PlotN)
Draws the 'whiskers' of the graph. Whiskers are small sub-graphs that are attached to the rest of the...
Definition: ncp.cpp:172
Definition: xfl.h:30
TStr GetFMid() const
Definition: dt.cpp:1403
int GetVol(const int &Nodes) const
Returns the volume of the set of first NodeN nodes in the sweep vector.
Definition: ncp.h:51
Definition: tm.h:355
static TStr GetMegaStr(const int &Val)
Definition: dt.h:1133
void SetXRange(const double &Min, const double &Max)
Definition: gnuplot.h:79
double GetMedian() const
Definition: xmath.h:244
void SupportSweep()
After the function ApproxPageRank() has been run the SupportSweep() computes the volume, cut size, node ids, conductance vectors.
Definition: ncp.cpp:52
TInt KMax
Definition: ncp.h:179
TVec< TFltPrV > NcpV
Definition: ncp.h:247
bool Empty() const
Definition: ds.h:2580
TFltV PhiV
Definition: ncp.h:36
static void GetCutStat(const PUNGraph &Graph, const TIntV &NIdV, int &Vol, int &Cut, double &Phi, int GraphEdges=-1)
For a given Graph and a set of nodes NIdV the function returns the Volume, CutSize and the Conductanc...
Definition: ncp.cpp:192
int GetCut(const int &Nodes) const
Returns the size of the cut of the first Nodes nodes in the sweep vector.
Definition: ncp.h:55
PUNGraph GenRewire(const PUNGraph &OrigGraph, const int &NSwitch, TRnd &Rnd)
Rewire a random undirected graph. Keeps node degrees the same, but randomly rewires the edges...
Definition: ggen.cpp:165
const TIntV & GetNIdV() const
Vector of node IDs sorted in the random walk order.
Definition: ncp.h:62
void SavePng(const int &SizeX=1000, const int &SizeY=800, const TStr &Comment=TStr())
Definition: gnuplot.h:120
void Run(const PUNGraph &Graph, const bool &SaveAllSweeps=false, const bool &SaveAllCond=false, const bool &SaveBestNodesAtK=false)
Definition: ncp.cpp:300
PGraph GetMxWcc(const PGraph &Graph)
Returns a graph representing the largest weakly connected component on an input Graph.
Definition: cncom.h:452
static const T & Mx(const T &LVal, const T &RVal)
Definition: xmath.h:32
int Val
Definition: dt.h:1046
int SearchChBack(const char &Ch, int BChN=-1) const
Definition: dt.cpp:1053
TVec< TFltPrV > WhiskNcpV
Definition: ncp.h:249
void Save(TSOut &SOut) const
Definition: dt.h:1060
TFlt KFac
Definition: ncp.h:178
int ApproxPageRank(const int &SeedNode, const double &Eps)
Computes Approximate PageRank from the seed node SeedNId and with tolerance Eps.
Definition: ncp.cpp:9
unsigned int uint
Definition: bd.h:11
double GetPhi() const
Definition: ncp.h:164
TInt Coverage
Definition: ncp.h:179
TStr ParamStr() const
Definition: ncp.cpp:555
bool Empty() const
Definition: bd.h:501
TFltPr GetXYAtMinY(const TFltPrV &Ncp, const int &NNodes)
Definition: ncp.cpp:1166
void PlotNcpMin(const TStr &OutFNm, const bool &VsGraphN=false)
Definition: ncp.cpp:1173
void PlotPhiDistr(const int &CmtySz, const TStr &OutFNm, TStr Desc=TStr()) const
Definition: ncp.cpp:735
void FindBestCut(const int &SeedNode, const int &ClustSz, const double &MinSizeFrac=0.2)
Finds minimum conductance cut in the graph around the seed node.
Definition: ncp.cpp:81
double Val
Definition: dt.h:1295
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
void PlotPhiDistr(const TStr &OutFNm, TStr Desc=TStr()) const
Plots the cluster conductance vs. cluster size K (cluster is composed of nodes NIdV[1...K]).
Definition: ncp.cpp:126
PUNGraph Graph
Definition: ncp.h:180
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:64
double GetExpEdgesIn(const int &GEdges) const
Definition: ncp.h:172
void PlotCutDistr(const TStr &OutFNm, TStr Desc=TStr()) const
Plots the cluster cut size vs. cluster size K (cluster is composed of nodes NIdV[1...K]).
Definition: ncp.cpp:113
void GetBoltzmanCurveStat(const TFltV &TempV, TVec< TFltPrV > &NcpV) const
Definition: ncp.cpp:529
static bool Verbose
Definition: ncp.h:26
bool Empty() const
Definition: hash.h:185
const TCutInfo & GetKNodeCut(const int &Nodes) const
Definition: ncp.h:208
int Len() const
Returns the support of the approximate random walk, the number of nodes with non-zero PageRank score...
Definition: ncp.h:42
void GetKeyV(TVec< TKey > &KeyV) const
Definition: shash.h:1347
Definition: ss.h:72
void PlotNCP(const TStr &OutFNm, TStr Desc=TStr()) const
Definition: ncp.cpp:564
TInt KMin
Definition: ncp.h:179
TStr GetSubStr(const int &BChN, const int &EChN) const
Definition: dt.cpp:811
void SetXYLabel(const TStr &XLabel, const TStr &YLabel)
Definition: gnuplot.h:73
int GetVol() const
Definition: ncp.h:161
static double BinFactor
Definition: ncp.h:127
Definition: xmath.h:129
double GetNormCut(const int &GEdges) const
Definition: ncp.h:168
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:220
int GetFlds() const
Returns the number of fields in the current line.
Definition: ss.h:116
static TRnd Rnd
Definition: dt.h:1053
static const double Mx
Definition: dt.h:1298
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:168
double GetCutPhi() const
Conductance of the 'best' (minimum conductance) cut.
Definition: ncp.h:81
void PlotRewBestWhisker(const TStr &OutFNm, const bool &VsGraphN=false)
Definition: ncp.cpp:1232
Definition: gviz.h:3
TCutInfo BestCut
Definition: ncp.h:188
int GetCutSz() const
Definition: ncp.h:162
Definition: fl.h:58
void Save(TSOut &SOut) const
Definition: ds.h:903
int GetRndNId(TRnd &Rnd=TInt::Rnd)
Returns an ID of a random node in the graph.
Definition: graph.h:250
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:542
void PlotNCPModul(const TStr &OutFNm, TStr Desc=TStr()) const
Definition: ncp.cpp:589
double GetExpansion() const
Definition: ncp.h:165
static void PlotValMomH(const THash< TVal1, TMom > &ValMomH, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const TGpSeriesTy &SeriesTy=gpwLinesPoints, bool PlotAvg=true, bool PlotMed=true, bool PlotMin=false, bool PlotMax=false, bool PlotSDev=false, bool PlotStdErr=true, bool PlotScatter=false)
Definition: gnuplot.h:491
void Swap(TVec< TVal, TSizeTy > &Vec)
Swaps the contents of the vector with Vec.
Definition: ds.h:1047
int GetDeg() const
Returns degree of the current node.
Definition: graph.h:86
bool Empty() const
Definition: shash.h:1120
Local-Spectral-Clustering statistics of a given Graph.
Definition: ncp.h:125
void Pop()
Definition: ds.h:2584
TFltPrV WhiskerV
Definition: ncp.h:246
int GetOutDeg() const
Returns out-degree of the current node (returns same as value GetDeg() since the graph is undirected)...
Definition: graph.h:90
static void BagOfWhiskers(const PUNGraph &Graph, TFltPrV &SizePhiV, TFltPr &BestWhisk)
Definition: ncp.cpp:901
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:971
void AddCmd(const TStr &Cmd)
Definition: gnuplot.h:81
double GetMx() const
Definition: xmath.h:238
void AddToBestCutH(const PUNGraph &Graph, const TIntV &NIdV, const bool &AddBestCond=true)
Definition: ncp.cpp:422
void Add(const TFlt &Val, const TFlt &Wgt=1)
Definition: xmath.h:217
Local Spectral Clustering algorithm.
Definition: ncp.h:24
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1254
Definition: gnuplot.h:7
void SaveTxtNcpMin(const TStr &OutFNm, const bool &VsGraphN=false)
Definition: ncp.cpp:1187
TStrV GNmV
Definition: ncp.h:243
void PlotBestWhisker(const TStr &OutFNm, const bool &VsGraphN=false)
Definition: ncp.cpp:1217
double GetCutRatio(const int &GNodes) const
Definition: ncp.h:167
int Edges2
Definition: ncp.h:29
void Save(TSOut &SOut) const
Definition: ncp.cpp:1118
int SearchStr(const TStr &Str, const int &BChN=0) const
Definition: dt.cpp:1065
const TFltV & GetPhiV() const
Conducatance of the cut separating first K-nodes in the node-id vector (GetNIdV()) from the rest of t...
Definition: ncp.h:68
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
Definition: subgraph.cpp:7
void PlotRewNcpMin(const TStr &OutFNm, const bool &VsGraphN=false)
Definition: ncp.cpp:1203
TVec< TFltPrV > RewNcpV
Definition: ncp.h:248
double GetIntDens() const
Definition: ncp.h:166
double FindBestCut(const int &Nodes, const TIntSet &TabuNIdSet, int &BestCutId) const
Definition: ncp.cpp:469
static double Round(const double &Val)
Definition: xmath.h:16
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:213
#define Mega(n)
Definition: gbase.h:4
TIntV VolV
Definition: ncp.h:35
int Len() const
Definition: ds.h:2581
int AddKey(const TKey &Key)
Definition: shash.h:1254
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:551
Tab separated.
Definition: ss.h:6
int GetNodes() const
Definition: ncp.h:159
bool GetFlt(const int &FldN, double &Val) const
If the field FldN is a float its value is returned in Val and the function returns true...
Definition: ss.cpp:485
static void MakeExpBins(const TFltPrV &ValV, TFltPrV &NewV)
Definition: ncp.cpp:1020
void Tick()
Definition: tm.h:364
void Clr()
Definition: ncp.cpp:293
const TVal & Top() const
Definition: ds.h:2582
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
Definition: fl.h:128
double Phi(const int i) const
Definition: ncp.h:143
double GetPhi(const int &ValId) const
Returns the conductance of the cut separating the first Nodes nodes in the sweep vector from the rest...
Definition: ncp.h:59
int GetCutVol() const
Volume of the 'best' (minimum conductance) cut.
Definition: ncp.h:79
THash< TInt, TFltV > SizePhiH
Definition: ncp.h:184
void PlotNCPScatter(const TStr &OutFNm, TStr Desc=TStr()) const
Definition: ncp.cpp:715
void PlotPhiInOut(const TStr &OutFNm, TStr Desc=TStr()) const
Definition: ncp.cpp:643
double Alpha
Definition: ncp.h:32
void GetCurveStat(TFltPrV &MeanV, TFltPrV &MedV, TFltPrV &Dec1V, TFltPrV &MinV, TFltPrV &MaxV) const
Definition: ncp.cpp:490
double GetFracDegOut(const PUNGraph &Graph, double &MxFrac, double &AvgFrac, double &MedianFrac, double &Pct9Frac, double &Flake) const
Definition: ncp.cpp:245
void SortByKey(const bool &Asc=true)
Definition: hash.h:249
THash< TInt, TCutInfo > BestCutH
Definition: ncp.h:185
void SaveTxt(const TStr &OutFNm)
Definition: ncp.cpp:1267
double GetModular(const int &GEdges) const
Definition: ncp.h:170
int GetKeyId(const TKey &Key) const
Definition: hash.h:424
void Get1CnCom(const PUNGraph &Graph, TCnComV &Cn1ComV)
Returns 1-components: maximal connected components of that can be disconnected from the Graph by remo...
Definition: cncom.cpp:98
TIntPrV GSizeV
Definition: ncp.h:245
int GetEdges() const
Definition: ncp.h:160
TIntV CutV
Definition: ncp.h:35
void ImposeNCP(const TLocClustStat &LcStat2, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2) const
Definition: ncp.cpp:786
int Len() const
Definition: shash.h:1121
int AddKey(const TKey &Key)
Definition: hash.h:331
TFlt Alpha
Definition: ncp.h:178
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:102
void PlotNcpTop10(const TStr &OutFNm, TStr Desc, const int &TakeMinN) const
Definition: ncp.cpp:607
void SetScale(const TGpScaleTy &GpScaleTy)
Definition: gnuplot.h:78
TFltPr BestWhisk
Definition: ncp.h:187
double GetMean() const
Definition: xmath.h:240
TIntV NIdV
Definition: ncp.h:35
Definition: dt.h:412
Definition: ds.h:218
bool Empty() const
Definition: dt.h:488
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
void PlotBestClustDens(TStr OutFNm, TStr Desc=TStr()) const
Definition: ncp.cpp:689
PUNGraph Graph
Definition: ncp.h:28
TIntQ NodeQ
Definition: ncp.h:31
double GetSecs() const
Definition: tm.h:366
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:211
TNcpGraphsBase(const TStr &FNmWc)
Definition: ncp.cpp:1064
TVal1 Val1
Definition: ds.h:34
int AddPlot(const TIntV &YValV, const TGpSeriesTy &SeriesTy=gpwLinesPoints, const TStr &Label=TStr(), const TStr &Style=TStr())
Definition: gnuplot.cpp:186
static void PlotValCntH(const THash< TKey, TVal, THashFunc > &ValCntH, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints, const bool &PlotCCDF=false, const bool &ExpBucket=false)
Definition: gnuplot.h:434
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
Definition: graph.h:107
TVal2 Val2
Definition: ds.h:35
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
Definition: hash.h:319
int BestCut() const
Index K of the cut of the minimum conductance around the seed node.
Definition: ncp.h:73
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:207
bool Next()
Loads next line from the input file.
Definition: ss.cpp:412
static void Sweep(const TSparseColMatrix &W, const TFltV &fvec, TFltV &conds, TIntV &order)
void Push(const TVal &Val)
Definition: ds.h:2587
double GetDecile(const int &DecileN) const
Definition: xmath.h:248
TIntSet SizeBucketSet
Definition: ncp.h:189
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
TFltPrV BagOfWhiskerV
Definition: ncp.h:186
bool Next(TStr &FNm)
int SeedNId
Definition: ncp.h:33
void Clr(const bool &DoDel=true)
Definition: ds.h:2569
void PlotBoltzmanCurve(const TStr &OutFNm, TStr Desc=TStr()) const
Definition: ncp.cpp:747
static TVec< TVal, TSizeTy > GetV(const TVal &Val1)
Returns a vector on element Val1.
Definition: ds.h:817
void PlotVolDistr(const TStr &OutFNm, TStr Desc=TStr()) const
Plots the cluster volume vs. cluster size K (cluster is composed of nodes NIdV[1...K]).
Definition: ncp.cpp:100
void AddCut(const TIntV &NIdV)
Definition: ncp.cpp:410
const char * GetStr() const
Definition: tm.h:368
char * CStr()
Definition: dt.h:476
bool IsKey(const TKey &Key) const
Definition: hash.h:216
void Save(TSOut &SOut) const
Definition: ncp.cpp:280
void AddBagOfWhiskers()
Definition: ncp.cpp:404
void RunGnuPlot() const
Definition: gnuplot.cpp:873
TFltV ParamValV
Definition: ncp.h:244
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:209
int Len() const
Definition: hash.h:186
TDat & AddDat(const TKey &Key)
Definition: hash.h:196
void SaveTxtInfo(const TStr &OutFNmPref, const TStr &Desc, const bool &SetMaxAt1) const
Definition: ncp.cpp:841
int BestCutNodes() const
Number of nodes inside the 'best' (minimum conductance) cut.
Definition: ncp.h:75
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
double GetMn() const
Definition: xmath.h:237
int Len() const
Definition: ncp.h:140
double GetXAtMinY(const TFltPrV &Ncp, const int &NNodes)
Definition: ncp.cpp:1133
void Def()
Definition: xmath.cpp:339
void Impose(const TStr &OutFNm, const int &TopN, const bool &Smooth)
Definition: ncp.cpp:1124
static void PlotNCP(const PUNGraph &Graph, const TStr &FNm, const TStr Desc="", const bool &BagOfWhiskers=true, const bool &RewireNet=false, const int &KMin=10, const int &KMax=Mega(100), const int &Coverage=10, const bool &SaveTxtStat=false, const bool &PlotBoltzman=false)
Definition: ncp.cpp:219
void Save(TSOut &SOut) const
Definition: dt.h:1309
Local-Spectral-Clustering for a set of graphs (loads ncp-*.tab files)
Definition: ncp.h:241
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:210
int NId(const int i) const
Definition: ncp.h:142
bool IsFlt(const int &FldN) const
Checks whether fields FldN is a float.
Definition: ss.h:148
void GetSubValV(const TSizeTy &BValN, const TSizeTy &EValN, TVec< TVal, TSizeTy > &ValV) const
Fills ValV with elements at positions BValN...EValN.
Definition: ds.h:1112
TLocClustStat(const double &_Alpha, const int &_KMin, const int &_KMax, const double &_KFac, const int &_Coverage, const double &_SizeFrac)
Definition: ncp.cpp:275
TIntFltH ProbH
Definition: ncp.h:30
void SortByDat(const bool &Asc=true)
Definition: hash.h:250