SNAP Library 6.0, User Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
TLocClustStat Class Reference

Local-Spectral-Clustering statistics of a given Graph. More...

#include <ncp.h>

Classes

class  TCutInfo
 
class  TNodeSweep
 

Public Member Functions

 TLocClustStat (const double &_Alpha, const int &_KMin, const int &_KMax, const double &_KFac, const int &_Coverage, const double &_SizeFrac)
 
void Save (TSOut &SOut) const
 
void Clr ()
 
void SetGraph (const PUNGraph &GraphPt)
 
void Run (const PUNGraph &Graph, const bool &SaveAllSweeps=false, const bool &SaveAllCond=false, const bool &SaveBestNodesAtK=false)
 
void AddBagOfWhiskers ()
 
void AddCut (const TIntV &NIdV)
 
void AddCut (const int &ClustSz, const double &Phi)
 
void AddToBestCutH (const PUNGraph &Graph, const TIntV &NIdV, const bool &AddBestCond=true)
 
double FindBestCut (const int &Nodes, const TIntSet &TabuNIdSet, int &BestCutId) const
 
const TCutInfoFindBestCut (const int &Nodes=-1) const
 
double FindBestCut (const int &Nodes, TIntV &ClustNIdV) const
 
int FindBestCut (const int &Nodes, int &Vol, int &Cut, double &Phi) const
 
const TCutInfoGetBestCut () const
 
int GetCuts () const
 
const TCutInfoGetKNodeCut (const int &Nodes) const
 
const TCutInfoGetCutN (const int &N) const
 
int BestWhiskNodes () const
 
int BestWhiskEdges () const
 
double BestWhiskPhi () const
 
TFltPr GetBestWhisk () const
 
const TFltPrVGetBagOfWhiskersV () const
 
void GetCurveStat (TFltPrV &MeanV, TFltPrV &MedV, TFltPrV &Dec1V, TFltPrV &MinV, TFltPrV &MaxV) const
 
void GetBoltzmanCurveStat (const TFltV &TempV, TVec< TFltPrV > &NcpV) const
 
TStr ParamStr () const
 
void PlotNCP (const TStr &OutFNm, TStr Desc=TStr()) const
 
void PlotNCPModul (const TStr &OutFNm, TStr Desc=TStr()) const
 
void PlotNcpTop10 (const TStr &OutFNm, TStr Desc, const int &TakeMinN) const
 
void PlotPhiInOut (const TStr &OutFNm, TStr Desc=TStr()) const
 
void PlotBestClustDens (TStr OutFNm, TStr Desc=TStr()) const
 
void PlotNCPScatter (const TStr &OutFNm, TStr Desc=TStr()) const
 
void PlotPhiDistr (const int &CmtySz, const TStr &OutFNm, TStr Desc=TStr()) const
 
void PlotBoltzmanCurve (const TStr &OutFNm, TStr Desc=TStr()) const
 
void ImposeNCP (const TLocClustStat &LcStat2, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2) const
 
void ImposeNCP (const TLocClustStat &LcStat2, const TLocClustStat &LcStat3, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2, TStr Desc3) const
 
void SaveTxtInfo (const TStr &OutFNmPref, const TStr &Desc, const bool &SetMaxAt1) const
 

Static Public Member Functions

static void BagOfWhiskers (const PUNGraph &Graph, TFltPrV &SizePhiV, TFltPr &BestWhisk)
 
static void BagOfWhiskers2 (const PUNGraph &Graph, TFltPrV &SizePhiV)
 
static void MakeExpBins (const TFltPrV &ValV, TFltPrV &NewV)
 
static void MakeExpBins (const TFltV &ValV, TFltV &NewV)
 

Public Attributes

TVec< TNodeSweepSweepsV
 
THash< TInt, TFltVSizePhiH
 
THash< TInt, TCutInfoBestCutH
 
TFltPrV BagOfWhiskerV
 
TFltPr BestWhisk
 
TCutInfo BestCut
 
TIntSet SizeBucketSet
 

Static Public Attributes

static double BinFactor = 1.01
 
static int TakeValAt
 

Private Attributes

TFlt Alpha
 
TFlt SizeFrac
 
TFlt KFac
 
TInt KMin
 
TInt KMax
 
TInt Coverage
 
PUNGraph Graph
 

Detailed Description

Local-Spectral-Clustering statistics of a given Graph.

Definition at line 125 of file ncp.h.

Constructor & Destructor Documentation

TLocClustStat::TLocClustStat ( const double &  _Alpha,
const int &  _KMin,
const int &  _KMax,
const double &  _KFac,
const int &  _Coverage,
const double &  _SizeFrac 
)

Definition at line 275 of file ncp.cpp.

276  : Alpha(_Alpha), SizeFrac(_SizeFrac), KFac(_KFac),
277  KMin(_KMin), KMax(_KMax), Coverage(_Coverage) {
278 }
TFlt SizeFrac
Definition: ncp.h:178
TInt KMax
Definition: ncp.h:179
TFlt KFac
Definition: ncp.h:178
TInt Coverage
Definition: ncp.h:179
TInt KMin
Definition: ncp.h:179
TFlt Alpha
Definition: ncp.h:178

Member Function Documentation

void TLocClustStat::AddBagOfWhiskers ( )

Definition at line 404 of file ncp.cpp.

404  {
405  BagOfWhiskers(Graph, BagOfWhiskerV, BestWhisk); // slow but exact
406  //BagOfWhiskers2(Graph, BagOfWhiskerV);
407 }
PUNGraph Graph
Definition: ncp.h:180
static void BagOfWhiskers(const PUNGraph &Graph, TFltPrV &SizePhiV, TFltPr &BestWhisk)
Definition: ncp.cpp:901
TFltPr BestWhisk
Definition: ncp.h:187
TFltPrV BagOfWhiskerV
Definition: ncp.h:186
void TLocClustStat::AddCut ( const TIntV NIdV)

Definition at line 410 of file ncp.cpp.

410  {
411  int Vol, Cut; double Phi;
412  TLocClust::GetCutStat(Graph, NIdV, Vol, Cut, Phi, Graph->GetEdges());
413  SizePhiH.AddDat(NIdV.Len()).Add(Phi);
414 }
static void GetCutStat(const PUNGraph &Graph, const TIntV &NIdV, int &Vol, int &Cut, double &Phi, int GraphEdges=-1)
For a given Graph and a set of nodes NIdV the function returns the Volume, CutSize and the Conductanc...
Definition: ncp.cpp:192
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
PUNGraph Graph
Definition: ncp.h:180
THash< TInt, TFltV > SizePhiH
Definition: ncp.h:184
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
void TLocClustStat::AddCut ( const int &  ClustSz,
const double &  Phi 
)

Definition at line 417 of file ncp.cpp.

417  {
418  SizePhiH.AddDat(ClustSz).Add(Phi);
419 }
THash< TInt, TFltV > SizePhiH
Definition: ncp.h:184
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
void TLocClustStat::AddToBestCutH ( const PUNGraph Graph,
const TIntV NIdV,
const bool &  AddBestCond = true 
)

Definition at line 422 of file ncp.cpp.

422  {
423  const int K = NIdV.Len();
424  TCutInfo CutInfo(Graph, NIdV, true);
425  printf("new: %d\t%d\t%d\t%f\t%f\n", CutInfo.GetNodes(), CutInfo.GetEdges(),
426  CutInfo.GetCutSz(), CutInfo.GetPhi(), CutInfo.GetModular(Graph->GetEdges()));
427  if (! BestCutH.IsKey(K)) { BestCutH.AddDat(K, CutInfo); return; }
428  TCutInfo& CurCut = BestCutH.GetDat(K);
429  // keep the cluster with best conductance
430  if (AddBestCond && CurCut.GetPhi() > CutInfo.GetPhi()) { CurCut=CutInfo; }
431  // keep the cluster with best modularity
432  //const int E = Graph->GetEdges();
433  //if (! AddBestCond && CurCut.GetModular(E) < CutInfo.GetModular(E)) { CurCut=CutInfo; }
434 }
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
THash< TInt, TCutInfo > BestCutH
Definition: ncp.h:185
void TLocClustStat::BagOfWhiskers ( const PUNGraph Graph,
TFltPrV SizePhiV,
TFltPr BestWhisk 
)
static

Definition at line 901 of file ncp.cpp.

901  {
902  TCnComV Cn1ComV;
903  TSnap::Get1CnCom(Graph, Cn1ComV);
904  TIntPrV SzVolV;
905  int MxSize=0;
906  if (Cn1ComV.Empty()) { printf("** No bridges\n"); SizePhiV.Clr(); return; }
907  // Graph->SaveEdgeList("g-vol.txt"); TGraphViz::Plot(Graph, gvlNeato, "g-vol.gif"); Fail; } IAssert(vol <= sz*(sz-1));
908  printf(" 1-connected components: %d\n", Cn1ComV.Len());
909  MaxWhisk = TFltPr(1,1);
910  for (int c = 0; c < Cn1ComV.Len(); c++) {
911  const TIntV& NIdV = Cn1ComV[c].NIdV;
912  const int sz = NIdV.Len();
913  if (sz < 2) { continue; }
914  int vol = 0; // volume is the size of degrees
915  for (int n = 0; n < sz; n++) {
916  vol += Graph->GetNI(NIdV[n]).GetOutDeg(); }
917  SzVolV.Add(TIntPr(sz, vol));
918  MxSize += sz;
919  if (1.0/double(vol) < MaxWhisk.Val2) { MaxWhisk=TFltPr(NIdV.Len(), 1.0/double(vol)); }
920  }
921  SzVolV.Sort(false);
922  // compose clusters
923  THash<TInt, TIntSet> ItemSetH(MxSize, true);
924  THash<TInt, TInt> VolH(MxSize, true);
925  THash<TInt, TFlt> CostH(MxSize, true);
926  ItemSetH.AddKey(0); VolH.AddKey(0);
927  TExeTm ExeTm;
928  // exact up to size 1000
929  for (int size = 2; size <= TMath::Mn(MxSize, 1000); size++) {
930  for (int item = 0; item <SzVolV.Len(); item++) {
931  const int smallSz = size-SzVolV[item].Val1;
932  if (ItemSetH.IsKey(smallSz)) {
933  const TIntSet& SmallSet = ItemSetH.GetDat(smallSz);
934  if (SmallSet.IsKey(item)) { continue; }
935  const int SmallVol = VolH.GetDat(smallSz);
936  // combine smaller nad new solution
937  const double curCost = CostH.IsKey(size) ? double(CostH.GetDat(size)) : double(10e10);
938  const double newCost = double(SmallSet.Len()+1) / double(SmallVol+SzVolV[item].Val2);
939  if (curCost < newCost) { continue; }
940  VolH.AddDat(size, SmallVol+SzVolV[item].Val2);
941  ItemSetH.AddDat(size, SmallSet);
942  ItemSetH.GetDat(size).AddKey(item);
943  CostH.AddDat(size, newCost);
944  }
945  }
946  if (VolH.IsKey(size) && size%100==0) {
947  printf("\rSize: %d/%d: vol: %d, items: %d/%d [%s]", size, MxSize, VolH.GetDat(size).Val,
948  ItemSetH.GetDat(size).Len(), SzVolV.Len(), ExeTm.GetStr());
949  }
950  }
951  // add sizes larger than 1000
952  printf("\nAdding sizes > 1000 nodes...");
953  int partSz=0; double partVol=0.0;
954  for (int i = 0; i < SzVolV.Len(); i++) {
955  partSz += SzVolV[i].Val1();
956  partVol += SzVolV[i].Val2();
957  if (partSz < 1000) { continue; }
958  const double curCost = CostH.IsKey(partSz) ? double(CostH.GetDat(partSz)) : double(10e10);
959  const double partPhi = double(i+1)/partVol;
960  if (partPhi < curCost) {
961  CostH.AddDat(partSz, partPhi);
962  }
963  }
964  VolH.SortByKey();
965  CostH.SortByKey();
966  SizePhiV.Gen(CostH.Len(), 0);
967  SizePhiV.Add(TFltPr(1, 1));
968  for (int s = 0; s < CostH.Len(); s++) {
969  const int size = CostH.GetKey(s);
970  const double cond = CostH[s]; //double(ItemSetH.GetDat(size).Len())/double(VolH[s]);
971  SizePhiV.Add(TFltPr(size, cond));
972  }
973  printf("done\n");
974 }
static const T & Mn(const T &LVal, const T &RVal)
Definition: xmath.h:36
TPair< TInt, TInt > TIntPr
Definition: ds.h:83
Definition: tm.h:355
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:570
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1318
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
void Get1CnCom(const PUNGraph &Graph, TCnComV &Cn1ComV)
Returns 1-components: maximal connected components of that can be disconnected from the Graph by remo...
Definition: cncom.cpp:98
int Len() const
Definition: shash.h:1121
Definition: hash.h:97
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
const char * GetStr() const
Definition: tm.h:368
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
void TLocClustStat::BagOfWhiskers2 ( const PUNGraph Graph,
TFltPrV SizePhiV 
)
static

Definition at line 977 of file ncp.cpp.

977  {
978  TCnComV Cn1ComV;
979  TSnap::Get1CnCom(Graph, Cn1ComV);
980  TIntPrV SzVolV;
981  int MxSize=0, TotVol=0;
982  if (Cn1ComV.Empty()) { printf("** No bridges\n"); SizePhiV.Clr(); return; }
983  printf(" 1-connected components: %d\n", Cn1ComV.Len());
984  for (int c = 0; c < Cn1ComV.Len(); c++) {
985  const TIntV& NIdV = Cn1ComV[c].NIdV;
986  const int sz = NIdV.Len();
987  if (sz < 2) { continue; }
988  int vol = 0; // volume is the size of degrees
989  for (int n = 0; n < sz; n++) {
990  vol += Graph->GetNI(NIdV[n]).GetOutDeg(); }
991  SzVolV.Add(TIntPr(sz, vol));
992  MxSize += sz; TotVol += vol;
993  }
994  printf(" Total size: %d\t Total vol: %d\n", MxSize, TotVol);
995  SzVolV.Sort(false);
996  // compose clusters
997  THash<TFlt, TFlt> SizePhiH(MxSize, true);
998  for (int i = 0; i < SzVolV.Len(); i++) {
999  const int Sz = SzVolV[i].Val1();
1000  const double Phi = 1.0/double(SzVolV[i].Val2());
1001  if ((! SizePhiH.IsKey(Sz)) || SizePhiH.GetDat(Sz) > Phi) {
1002  SizePhiH.AddDat(Sz, Phi); }
1003  }
1004  double partSz=0.0, partVol=0.0;
1005  for (int i = 0; i < SzVolV.Len(); i++) {
1006  partSz += SzVolV[i].Val1();
1007  partVol += SzVolV[i].Val2();
1008  const double partPhi = double(i+1)/partVol;
1009  if ((! SizePhiH.IsKey(partSz)) || partPhi < SizePhiH.GetDat(partSz)) {
1010  SizePhiH.AddDat(partSz, partPhi); }
1011  }
1012  SizePhiV.Gen(SizePhiH.Len()+1, 0);
1013  SizePhiV.Add(TFltPr(1, 1));
1014  SizePhiH.SortByKey();
1015  for (int s = 0; s < SizePhiH.Len(); s++) {
1016  SizePhiV.Add(TFltPr(SizePhiH.GetKey(s), SizePhiH[s]));
1017  }
1018 }
TPair< TInt, TInt > TIntPr
Definition: ds.h:83
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:570
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1318
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
THash< TInt, TFltV > SizePhiH
Definition: ncp.h:184
void SortByKey(const bool &Asc=true)
Definition: hash.h:291
void Get1CnCom(const PUNGraph &Graph, TCnComV &Cn1ComV)
Returns 1-components: maximal connected components of that can be disconnected from the Graph by remo...
Definition: cncom.cpp:98
Definition: hash.h:97
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
bool IsKey(const TKey &Key) const
Definition: hash.h:258
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
int Len() const
Definition: hash.h:228
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:252
int TLocClustStat::BestWhiskEdges ( ) const
inline

Definition at line 212 of file ncp.h.

212 { return (int)TMath::Round(1.0/BestWhisk.Val2.Val)/2-1; }
double Val
Definition: dt.h:1388
static double Round(const double &Val)
Definition: xmath.h:16
TFltPr BestWhisk
Definition: ncp.h:187
TVal2 Val2
Definition: ds.h:35
int TLocClustStat::BestWhiskNodes ( ) const
inline

Definition at line 211 of file ncp.h.

211 { return int(BestWhisk.Val1.Val); }
double Val
Definition: dt.h:1388
TFltPr BestWhisk
Definition: ncp.h:187
TVal1 Val1
Definition: ds.h:34
double TLocClustStat::BestWhiskPhi ( ) const
inline

Definition at line 213 of file ncp.h.

213 { return BestWhisk.Val2; }
TFltPr BestWhisk
Definition: ncp.h:187
TVal2 Val2
Definition: ds.h:35
void TLocClustStat::Clr ( )

Definition at line 293 of file ncp.cpp.

293  {
294  SweepsV.Clr(false); // node ids and conductances for each run of local clustering
295  SizePhiH.Clr(false); // conductances at cluster size K
296  BestCutH.Clr(false); // best cut (min conductance) at size K
297  BagOfWhiskerV.Clr(false); // (Size, Conductance) of bag of whiskers clusters
298 }
TVec< TNodeSweep > SweepsV
Definition: ncp.h:183
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
THash< TInt, TFltV > SizePhiH
Definition: ncp.h:184
THash< TInt, TCutInfo > BestCutH
Definition: ncp.h:185
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
Definition: hash.h:361
TFltPrV BagOfWhiskerV
Definition: ncp.h:186
double TLocClustStat::FindBestCut ( const int &  Nodes,
const TIntSet TabuNIdSet,
int &  BestCutId 
) const

Definition at line 469 of file ncp.cpp.

469  {
470  double bestPhi = 1;
471  BestCutId = -1;
472  bool Tabu;
473  IAssert(! SweepsV.Empty());
474  for (int c = 0; c < SweepsV.Len(); c++) {
475  const TNodeSweep& Sweep = SweepsV[c];
476  if (Sweep.Len() < Nodes) { continue; }
477  if (Sweep.Phi(Nodes-1) > bestPhi) { continue; }
478  Tabu = false;
479  for (int i = 0; i < Nodes; i++) {
480  if (TabuNIdSet.IsKey(Sweep.NId(i))) { Tabu=true; break; }
481  }
482  if (Tabu) { continue; }
483  bestPhi = Sweep.Phi(Nodes-1);
484  BestCutId = c;
485  }
486  return bestPhi;
487 }
#define IAssert(Cond)
Definition: bd.h:262
TVec< TNodeSweep > SweepsV
Definition: ncp.h:183
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
static void Sweep(const TSparseColMatrix &W, const TFltV &fvec, TFltV &conds, TIntV &order)
const TLocClustStat::TCutInfo & TLocClustStat::FindBestCut ( const int &  Nodes = -1) const

Definition at line 436 of file ncp.cpp.

436  {
437  double bestPhi = 1;
438  int CutId = -1;
439  if (Nodes > 0) {
440  IAssert(BestCutH.IsKey(Nodes));
441  return BestCutH.GetDat(Nodes);
442  } else {
443  for (int n = 0; n < BestCutH.Len(); n++) {
444  if (BestCutH[n].GetPhi() <= bestPhi) {
445  bestPhi = BestCutH[n].GetPhi(); CutId = n; }
446  }
447  IAssert(CutId != -1);
448  IAssert(! BestCutH[CutId].CutNIdV.Empty());
449  return BestCutH[CutId];
450  }
451 }
#define IAssert(Cond)
Definition: bd.h:262
THash< TInt, TCutInfo > BestCutH
Definition: ncp.h:185
double TLocClustStat::FindBestCut ( const int &  Nodes,
TIntV ClustNIdV 
) const

Definition at line 454 of file ncp.cpp.

454  {
455  const TCutInfo& Cut = FindBestCut(Nodes);
456  ClustNIdV = Cut.CutNIdV;
457  return Cut.GetPhi();
458 }
double FindBestCut(const int &Nodes, const TIntSet &TabuNIdSet, int &BestCutId) const
Definition: ncp.cpp:469
int TLocClustStat::FindBestCut ( const int &  Nodes,
int &  Vol,
int &  Cut,
double &  Phi 
) const

Definition at line 460 of file ncp.cpp.

460  {
461  const TCutInfo& CutInfo = FindBestCut(Nodes);
462  Vol = CutInfo.GetVol();
463  Cut = CutInfo.GetCutSz();
464  Phi = CutInfo.GetPhi();
465  return CutInfo.GetNodes();
466 }
double FindBestCut(const int &Nodes, const TIntSet &TabuNIdSet, int &BestCutId) const
Definition: ncp.cpp:469
const TFltPrV& TLocClustStat::GetBagOfWhiskersV ( ) const
inline

Definition at line 215 of file ncp.h.

215 { return BagOfWhiskerV; }
TFltPrV BagOfWhiskerV
Definition: ncp.h:186
const TCutInfo& TLocClustStat::GetBestCut ( ) const
inline

Definition at line 206 of file ncp.h.

206 { return BestCut; } // overall best cut
TCutInfo BestCut
Definition: ncp.h:188
TFltPr TLocClustStat::GetBestWhisk ( ) const
inline

Definition at line 214 of file ncp.h.

214 { return BestWhisk; }
TFltPr BestWhisk
Definition: ncp.h:187
void TLocClustStat::GetBoltzmanCurveStat ( const TFltV TempV,
TVec< TFltPrV > &  NcpV 
) const

Definition at line 529 of file ncp.cpp.

529  {
530  IAssert(! SizePhiH.Empty()); // stores conductances of all little clusters
531  NcpV.Gen(TempV.Len());
532  TFltPrV BucketV;
533  for (int t = 0; t < TempV.Len(); t++) {
534  const double T = TempV[t];
535  for (int i = 0; i < SizePhiH.Len(); i++) {
536  const double X = SizePhiH.GetKey(i).Val; IAssert(X >= 1.0);
537  const TFltV& PhiV = SizePhiH[i];
538  double V = 0.0, SumW = 0.0;
539  for (int j = 0; j < PhiV.Len(); j++) {
540  V += PhiV[j] * exp(-PhiV[j]/T);
541  SumW += exp(-PhiV[j]/T);
542  }
543  //V /= PhiV.Len();
544  V /= SumW;
545  NcpV[t].Add(TFltPr(X, V));
546  }
547  TLocClustStat::MakeExpBins(NcpV[t], BucketV); NcpV[t].Swap(BucketV);
548  // normalize to start at (1,1)
549  //for (int i = 0; i < NcpV[t].Len(); i++) {
550  // NcpV[t][i].Val2 /= NcpV[t][0].Val2;
551  //}
552  }
553 }
#define IAssert(Cond)
Definition: bd.h:262
int Val
Definition: dt.h:1139
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
bool Empty() const
Definition: hash.h:227
void Swap(TVec< TVal, TSizeTy > &Vec)
Swaps the contents of the vector with Vec.
Definition: ds.h:1101
static void MakeExpBins(const TFltPrV &ValV, TFltPrV &NewV)
Definition: ncp.cpp:1020
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
THash< TInt, TFltV > SizePhiH
Definition: ncp.h:184
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
int Len() const
Definition: hash.h:228
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:252
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
void TLocClustStat::GetCurveStat ( TFltPrV MeanV,
TFltPrV MedV,
TFltPrV Dec1V,
TFltPrV MinV,
TFltPrV MaxV 
) const

Definition at line 490 of file ncp.cpp.

490  {
491  TFltPrV BucketV;
492  MeanV.Clr(false); MedV.Clr(false); Dec1V.Clr(false); MinV.Clr(false); MaxV.Clr(false);
493  if (! SizePhiH.Empty()) { // stores conductances of all little clusters
494  const THash<TInt, TFltV>& KvH = SizePhiH;
495  for (int i = 0; i < KvH.Len(); i++) {
496  const double X = KvH.GetKey(i).Val; IAssert(X >= 1.0);
497  const TFltV& YVec = KvH[i];
498  TMom Mom;
499  for (int j = 0; j < YVec.Len(); j++) { Mom.Add(YVec[j]); }
500  Mom.Def();
501  /*IAssert(Mom.GetMean()>=0 && Mom.GetMean()<=1);
502  IAssert(Mom.GetMedian()>=0 && Mom.GetMedian()<=1);
503  IAssert(Mom.GetDecile(1)>=0 && Mom.GetDecile(1)<=1);
504  IAssert(Mom.GetMn()>=0 && Mom.GetMn()<=1);
505  IAssert(Mom.GetMx()>=0 && Mom.GetMx()<=1);*/
506  MeanV.Add(TFltPr(X, Mom.GetMean()));
507  MedV.Add(TFltPr(X, Mom.GetMedian()));
508  Dec1V.Add(TFltPr(X, Mom.GetDecile(1)));
509  MinV.Add(TFltPr(X, Mom.GetMn()));
510  MaxV.Add(TFltPr(X, Mom.GetMx()));
511  }
512  MeanV.Sort(); MedV.Sort(); Dec1V.Sort(); MinV.Sort(); MaxV.Sort();
513  // create log buckets (makes plots smaller and smoother)
514  TLocClustStat::MakeExpBins(MeanV, BucketV); MeanV.Swap(BucketV);
515  TLocClustStat::MakeExpBins(MedV, BucketV); MedV.Swap(BucketV);
516  TLocClustStat::MakeExpBins(Dec1V, BucketV); Dec1V.Swap(BucketV);
517  TLocClustStat::MakeExpBins(MinV, BucketV); MinV.Swap(BucketV);
518  TLocClustStat::MakeExpBins(MaxV, BucketV); MaxV.Swap(BucketV);
519  } else {
520  //const int GEdges = Graph->GetEdges();
521  for (int i = 0; i < BestCutH.Len(); i++) {
522  MinV.Add(TFltPr(BestCutH.GetKey(i).Val, BestCutH[i].GetPhi()));
523  }
524  MinV.Sort();
525  TLocClustStat::MakeExpBins(MinV, BucketV); MinV.Swap(BucketV);
526  }
527 }
#define IAssert(Cond)
Definition: bd.h:262
double GetMedian() const
Definition: xmath.h:244
int Val
Definition: dt.h:1139
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
bool Empty() const
Definition: hash.h:227
Definition: xmath.h:129
void Swap(TVec< TVal, TSizeTy > &Vec)
Swaps the contents of the vector with Vec.
Definition: ds.h:1101
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
double GetMx() const
Definition: xmath.h:238
void Add(const TFlt &Val, const TFlt &Wgt=1)
Definition: xmath.h:217
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1318
static void MakeExpBins(const TFltPrV &ValV, TFltPrV &NewV)
Definition: ncp.cpp:1020
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
THash< TInt, TFltV > SizePhiH
Definition: ncp.h:184
THash< TInt, TCutInfo > BestCutH
Definition: ncp.h:185
double GetMean() const
Definition: xmath.h:240
double GetDecile(const int &DecileN) const
Definition: xmath.h:248
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
int Len() const
Definition: hash.h:228
double GetMn() const
Definition: xmath.h:237
void Def()
Definition: xmath.cpp:339
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:252
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
const TCutInfo& TLocClustStat::GetCutN ( const int &  N) const
inline

Definition at line 209 of file ncp.h.

209 { return BestCutH[N]; }
THash< TInt, TCutInfo > BestCutH
Definition: ncp.h:185
int TLocClustStat::GetCuts ( ) const
inline

Definition at line 207 of file ncp.h.

207 { return BestCutH.Len(); }
THash< TInt, TCutInfo > BestCutH
Definition: ncp.h:185
const TCutInfo& TLocClustStat::GetKNodeCut ( const int &  Nodes) const
inline

Definition at line 208 of file ncp.h.

208 { return BestCutH.GetDat(Nodes); }
THash< TInt, TCutInfo > BestCutH
Definition: ncp.h:185
void TLocClustStat::ImposeNCP ( const TLocClustStat LcStat2,
TStr  OutFNm,
TStr  Desc,
TStr  Desc1,
TStr  Desc2 
) const

Definition at line 786 of file ncp.cpp.

786  {
787  if (Desc.Empty()) { Desc = OutFNm; }
788  TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
789  TFltPrV MeanV2, MedV2, Dec1V2, MinV2, MaxV2;
790  GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
791  LcStat2.GetCurveStat(MeanV2, MedV2, Dec1V2, MinV2, MaxV2);
792  // plot
793  TGnuPlot GP("ncp."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
794  if (! MinV1.Empty()) { GP.AddPlot(MinV1, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc1.CStr(), Graph->GetNodes(), Graph->GetEdges()), "lw 1"); }
795  if (! MinV2.Empty()) { GP.AddPlot(MinV2, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc2.CStr(), LcStat2.Graph->GetNodes(), LcStat2.Graph->GetEdges()), "lw 1"); }
796  if (! BagOfWhiskerV.Empty()) {
797  GP.AddPlot(BagOfWhiskerV, gpwLines, Desc1+" whiskers", "lw 1");
798  TFltPrV BestWhiskV; BestWhiskV.Add(BestWhisk);
799  GP.AddPlot(BestWhiskV, gpwPoints, Desc1+" Best whisker", "pt 5 ps 2"); }
800  if (! LcStat2.BagOfWhiskerV.Empty()) {
801  GP.AddPlot(LcStat2.BagOfWhiskerV, gpwLines, Desc2+" whiskers", "lw 1");
802  TFltPrV BestWhiskV; BestWhiskV.Add(LcStat2.BestWhisk);
803  GP.AddPlot(BestWhiskV, gpwPoints, Desc2+" Best whisker", "pt 5 ps 2"); }
804  GP.SetScale(gpsLog10XY);
805  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
806  //GP.SetXRange(1, Graph->GetNodes()/2);
807  GP.SavePng();
808 }
TStr ParamStr() const
Definition: ncp.cpp:555
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
PUNGraph Graph
Definition: ncp.h:180
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:570
void GetCurveStat(TFltPrV &MeanV, TFltPrV &MedV, TFltPrV &Dec1V, TFltPrV &MinV, TFltPrV &MaxV) const
Definition: ncp.cpp:490
TFltPr BestWhisk
Definition: ncp.h:187
bool Empty() const
Definition: dt.h:491
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
TFltPrV BagOfWhiskerV
Definition: ncp.h:186
char * CStr()
Definition: dt.h:479
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
void TLocClustStat::ImposeNCP ( const TLocClustStat LcStat2,
const TLocClustStat LcStat3,
TStr  OutFNm,
TStr  Desc,
TStr  Desc1,
TStr  Desc2,
TStr  Desc3 
) const

Definition at line 810 of file ncp.cpp.

810  {
811  if (Desc.Empty()) { Desc = OutFNm; }
812  TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
813  TFltPrV MeanV2, MedV2, Dec1V2, MinV2, MaxV2;
814  TFltPrV MeanV3, MedV3, Dec1V3, MinV3, MaxV3;
815  GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
816  LcStat2.GetCurveStat(MeanV2, MedV2, Dec1V2, MinV2, MaxV2);
817  LcStat3.GetCurveStat(MeanV3, MedV3, Dec1V3, MinV3, MaxV3);
818  // plot
819  TGnuPlot GP("phiTR."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
820  if (! MinV1.Empty()) { GP.AddPlot(MinV1, gpwLines, Desc1+" MIN", "lw 1"); }
821  if (! MinV2.Empty()) { GP.AddPlot(MinV2, gpwLines, Desc2+" MIN", "lw 1"); }
822  if (! MinV3.Empty()) { GP.AddPlot(MinV3, gpwLines, Desc3+" MIN", "lw 1"); }
823  if (! BagOfWhiskerV.Empty()) {
824  GP.AddPlot(BagOfWhiskerV, gpwLines, Desc1+" whiskers", "lw 1");
825  TFltPrV BestWhiskV; BestWhiskV.Add(BestWhisk);
826  GP.AddPlot(BestWhiskV, gpwPoints, Desc1+" Best whisker", "pt 5 ps 2"); }
827  if (! LcStat2.BagOfWhiskerV.Empty()) {
828  GP.AddPlot(LcStat2.BagOfWhiskerV, gpwLines, Desc2+" whiskers", "lw 1");
829  TFltPrV BestWhiskV; BestWhiskV.Add(LcStat2.BestWhisk);
830  GP.AddPlot(BestWhiskV, gpwPoints, Desc2+" Best whisker", "pt 5 ps 2"); }
831  if (! LcStat3.BagOfWhiskerV.Empty()) {
832  GP.AddPlot(LcStat3.BagOfWhiskerV, gpwLines, Desc3+" whiskers", "lw 1");
833  TFltPrV BestWhiskV; BestWhiskV.Add(LcStat3.BestWhisk);
834  GP.AddPlot(BestWhiskV, gpwPoints, Desc3+" Best whisker", "pt 5 ps 2"); }
835  GP.SetScale(gpsLog10XY);
836  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
837  GP.SetXRange(1, Graph->GetNodes()/2);
838  GP.SavePng();
839 }
TStr ParamStr() const
Definition: ncp.cpp:555
PUNGraph Graph
Definition: ncp.h:180
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:570
void GetCurveStat(TFltPrV &MeanV, TFltPrV &MedV, TFltPrV &Dec1V, TFltPrV &MinV, TFltPrV &MaxV) const
Definition: ncp.cpp:490
TFltPr BestWhisk
Definition: ncp.h:187
bool Empty() const
Definition: dt.h:491
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
TFltPrV BagOfWhiskerV
Definition: ncp.h:186
char * CStr()
Definition: dt.h:479
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
void TLocClustStat::MakeExpBins ( const TFltPrV ValV,
TFltPrV NewV 
)
static

Definition at line 1020 of file ncp.cpp.

1020  {
1021  if (ValV.Empty()) { NewV.Clr(false); return; }
1022  NewV.Gen(1000, 0);
1023  double PrevBPos = 1, BPos = 1;
1024  int i = 0;
1025  while (BPos <= ValV.Last().Val1) {
1026  //if (TakeValAt == 1) {
1027  // while (i < ValV.Len() && ValV[i].Val1 <= BPos) { i++; }
1028  // NewV.Add(ValV[i-1]); }
1029  //else if (TakeValAt == 2) {
1030  int MinI=-1; double MinCnt=TFlt::Mx;
1031  while (i < ValV.Len() && ValV[i].Val1 <= BPos) {
1032  if (ValV[i].Val2 < MinCnt) { MinCnt=ValV[i].Val2; MinI=i; } i++; }
1033  if (MinI>=0 && MinI<ValV.Len()) {
1034  NewV.Add(ValV[MinI]); }
1035  PrevBPos = (uint) floor(BPos);
1036  BPos *= BinFactor;
1037  if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
1038  }
1039  NewV.Add(ValV.Last());
1040 }
unsigned int uint
Definition: bd.h:11
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
static double BinFactor
Definition: ncp.h:127
static const double Mx
Definition: dt.h:1391
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:570
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:579
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
void TLocClustStat::MakeExpBins ( const TFltV ValV,
TFltV NewV 
)
static

Definition at line 1042 of file ncp.cpp.

1042  {
1043  if (ValV.Empty()) { NewV.Clr(false); return; }
1044  NewV.Gen(1000, 0);
1045  double PrevBPos = 1, BPos = 1;
1046  int i = 1;
1047  NewV.Add(ValV[0]);
1048  while (BPos <= ValV.Len()) {
1049  int MinI=-1; double MinCnt=TFlt::Mx;
1050  while (i < ValV.Len() && i <= BPos) {
1051  if (ValV[i] < MinCnt) { MinCnt=ValV[i]; MinI=i; } i++; }
1052  if (MinI>=0 && MinI<ValV.Len()) {
1053  NewV.Add(ValV[MinI]); }
1054  PrevBPos = (uint) floor(BPos);
1055  BPos *= BinFactor;
1056  if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
1057  }
1058  NewV.Add(ValV.Last());
1059 }
unsigned int uint
Definition: bd.h:11
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
static double BinFactor
Definition: ncp.h:127
static const double Mx
Definition: dt.h:1391
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:570
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:579
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
TStr TLocClustStat::ParamStr ( ) const

Definition at line 555 of file ncp.cpp.

555  {
556  if (Graph.Empty()) {
557  return TStr::Fmt("A=%g, K=%d-%g-%s, Cvr=%d, SzFrc=%g", Alpha(), KMin(), KFac(), TInt::GetMegaStr(KMax()).CStr(), Coverage(), SizeFrac()); }
558  else {
559  return TStr::Fmt("A=%g, K=%d-%g-%s, Cvr=%d, SzFrc=%g G(%d, %d)", Alpha(), KMin(), KFac(), TInt::GetMegaStr(KMax()).CStr(), Coverage(), SizeFrac(),
560  Graph->GetNodes(), Graph->GetEdges());
561  }
562 }
TFlt SizeFrac
Definition: ncp.h:178
static TStr GetMegaStr(const int &Val)
Definition: dt.h:1226
TInt KMax
Definition: ncp.h:179
TFlt KFac
Definition: ncp.h:178
TInt Coverage
Definition: ncp.h:179
bool Empty() const
Definition: bd.h:501
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
PUNGraph Graph
Definition: ncp.h:180
TInt KMin
Definition: ncp.h:179
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
TFlt Alpha
Definition: ncp.h:178
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
void TLocClustStat::PlotBestClustDens ( TStr  OutFNm,
TStr  Desc = TStr() 
) const

Definition at line 689 of file ncp.cpp.

689  {
690  if (Desc.Empty()) { Desc = OutFNm; }
691  const int len = BestCutH.Len();
692  TFltPrV CutV(len, 0), EdgesV(len, 0), PhiV(len,0);
693  for (int i = 0; i < BestCutH.Len(); i++) {
694  const double size = BestCutH.GetKey(i).Val;
695  CutV.Add(TFltPr(size, BestCutH[i].GetCutSz()));
696  EdgesV.Add(TFltPr(size, BestCutH[i].GetEdges()));
697  PhiV.Add(TFltPr(size, BestCutH[i].GetPhi()));
698  }
699  TGnuPlot GP("cutEdges."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
700  TFltPrV NewV; TLocClustStat::MakeExpBins(EdgesV, NewV);
701  GP.AddPlot(NewV, gpwLines, "Edges inside");
702  TLocClustStat::MakeExpBins(CutV, NewV);
703  GP.AddPlot(NewV, gpwLines, "Cut edges");
704  TLocClustStat::MakeExpBins(PhiV, NewV);
705  GP.AddPlot(NewV, gpwLines, "Conductance");
706  GP.SetXYLabel("Cluster size", "Edges"); GP.SetScale(gpsLog10XY);
707  GP.AddCmd("set logscale xyy2 10");
708  GP.AddCmd("set y2label \"Conductance\"");
709  GP.SavePng();
710  system(TStr(TStr("replace_all.py cutEdges.")+OutFNm+".plt \"title \\\"Conductance\" \"axis x1y2 title \\\"Conductance\"").CStr());
711  GP.RunGnuPlot();
712 }
TStr ParamStr() const
Definition: ncp.cpp:555
static void MakeExpBins(const TFltPrV &ValV, TFltPrV &NewV)
Definition: ncp.cpp:1020
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
THash< TInt, TCutInfo > BestCutH
Definition: ncp.h:185
Definition: dt.h:412
bool Empty() const
Definition: dt.h:491
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
char * CStr()
Definition: dt.h:479
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
void TLocClustStat::PlotBoltzmanCurve ( const TStr OutFNm,
TStr  Desc = TStr() 
) const

Definition at line 747 of file ncp.cpp.

747  {
748  TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
749  GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
750  TVec<TFltPrV> NcpV;
751  //const TFltV TempV = TFltV::GetV(0.05, 0.01, 0.1, 10, 100, 1000);
752  const TFltV TempV = TFltV::GetV(0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.5, 1);
753  GetBoltzmanCurveStat(TempV, NcpV);
754  TGnuPlot GP("ncp."+OutFNm+"-B", TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
755  GP.AddPlot(MinV1, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc.CStr(), Graph->GetNodes(), Graph->GetEdges()), "lw 1");
756  GP.AddPlot(MeanV1, gpwLines, "Avg", "lw 1");
757  GP.AddPlot(MedV1, gpwLines, "Median", "lw 1");
758  GP.AddPlot(Dec1V1, gpwLines, "Decile-1", "lw 1");
759  for (int t = 0; t < TempV.Len(); t++) {
760  GP.AddPlot(NcpV[t], gpwLines, TStr::Fmt("Temp %g", TempV[t]()), "lw 1");
761  }
762  GP.SetScale(gpsLog10XY);
763  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
764  GP.SavePng();
765  //
766  TFltPrV SzNClustV;
767  int kCnt=1;
768  for (int i = 0; i < SizePhiH.Len(); i++) {
769  const int K = SizePhiH.GetKey(i);
770  const TFltV& PhiV = SizePhiH[i];
771  SzNClustV.Add(TFltPr(K, PhiV.Len()));
772  if (K>2 && (pow(10.0, (int)log10((double)K))==K || (K >=10 && K<=100 && K%10==0) || (K >=100 && K<=1000 && K%100==0))) {
773  THash<TFlt, TFlt> CntH;
774  for (int p = 0; p < PhiV.Len(); p++) {
775  CntH.AddDat(TMath::Round(log10(PhiV[p].Val),1)) += 1;
776  }
777  TGnuPlot::PlotValCntH(CntH, TStr::Fmt("ncp.%s-c%02d", OutFNm.CStr(), kCnt++), TStr::Fmt("%s. K=%d, NPieces=%d, %s",
778  Desc.CStr(), K, PhiV.Len(), ParamStr().CStr()), "log_10 {/Symbol \\F} (conductance)",
779  TStr::Fmt("Number of pieces of such conductance, K=%d, NPieces=%d)", K, PhiV.Len()));
780  }
781  }
782  TGnuPlot::PlotValV(SzNClustV, "ncp."+OutFNm+"-ClSz", TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()),
783  "k (cluster size)", "c(k) (number of extracted clusters)", gpsLog);
784 }
TStr ParamStr() const
Definition: ncp.cpp:555
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
PUNGraph Graph
Definition: ncp.h:180
void GetBoltzmanCurveStat(const TFltV &TempV, TVec< TFltPrV > &NcpV) const
Definition: ncp.cpp:529
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
Definition: gnuplot.h:7
static double Round(const double &Val)
Definition: xmath.h:16
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
THash< TInt, TFltV > SizePhiH
Definition: ncp.h:184
void GetCurveStat(TFltPrV &MeanV, TFltPrV &MedV, TFltPrV &Dec1V, TFltPrV &MinV, TFltPrV &MaxV) const
Definition: ncp.cpp:490
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
Definition: hash.h:97
static void PlotValCntH(const THash< TKey, TVal, THashFunc > &ValCntH, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints, const bool &PlotCCDF=false, const bool &ExpBucket=false)
Definition: gnuplot.h:434
static TVec< TFlt, int > GetV(const TFlt &Val1)
Returns a vector on element Val1.
Definition: ds.h:848
char * CStr()
Definition: dt.h:479
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
int Len() const
Definition: hash.h:228
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:252
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
void TLocClustStat::PlotNCP ( const TStr OutFNm,
TStr  Desc = TStr() 
) const

Definition at line 564 of file ncp.cpp.

564  {
565  if (Desc.Empty()) { Desc = OutFNm; }
566  TFltPrV MeanV, MedV, Dec1V, MinV, MaxV;
567  GetCurveStat(MeanV, MedV, Dec1V, MinV, MaxV);
568  TGnuPlot GP("ncp."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
569  if (! MaxV.Empty()) { GP.AddPlot((MaxV), gpwLines, "MAX"); }
570  if (! MedV.Empty()) { GP.AddPlot((MedV), gpwLines, "MEDIAN"); }
571  if (! MeanV.Empty()) { GP.AddPlot((MeanV), gpwLines, "MEAN"); }
572  if (! Dec1V.Empty()) { GP.AddPlot((Dec1V), gpwLines, "1-st DECILE"); }
573  if (! MinV.Empty()) {
574  GP.AddPlot((MinV), gpwLines, "MIN");
575  //GP.AddPlot(MinV, gpwLines, "smooth MIN", "lw 2 smooth bezier");
576  }
577  if (! BagOfWhiskerV.Empty()) {
578  GP.AddPlot(BagOfWhiskerV, gpwLines, "Whiskers", "lw 1");
579  TFltPrV BestWhiskV; BestWhiskV.Add(TFltPr(BestWhisk));
580  GP.AddPlot(BestWhiskV, gpwPoints, "Best whisker", "pt 5 ps 2");
581  }
582  GP.SetScale(gpsLog10XY);
583  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
584  GP.SetXRange(1, Graph->GetNodes()/2);
585  GP.SavePng();
586 }
TStr ParamStr() const
Definition: ncp.cpp:555
PUNGraph Graph
Definition: ncp.h:180
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:570
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
void GetCurveStat(TFltPrV &MeanV, TFltPrV &MedV, TFltPrV &Dec1V, TFltPrV &MinV, TFltPrV &MaxV) const
Definition: ncp.cpp:490
TFltPr BestWhisk
Definition: ncp.h:187
bool Empty() const
Definition: dt.h:491
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
TFltPrV BagOfWhiskerV
Definition: ncp.h:186
char * CStr()
Definition: dt.h:479
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
void TLocClustStat::PlotNCPModul ( const TStr OutFNm,
TStr  Desc = TStr() 
) const

Definition at line 589 of file ncp.cpp.

589  {
590  if (Desc.Empty()) { Desc = OutFNm; }
591  TFltPrV MinV, BucketV;
592  const int GEdges = Graph->GetEdges();
593  for (int i = 0; i < BestCutH.Len(); i++) {
594  MinV.Add(TFltPr(BestCutH.GetKey(i).Val, BestCutH[i].GetModular(GEdges))); }
595  MinV.Sort();
596  TLocClustStat::MakeExpBins(MinV, BucketV); MinV.Swap(BucketV);
597  TGnuPlot GP("ncpMod."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
598  if (! MinV.Empty()) {
599  GP.AddPlot((MinV), gpwLines, "MIN"); }
600  GP.SetScale(gpsLog10XY);
601  GP.SetXYLabel("k (number of nodes in the cluster)", "Q (modularity)");
602  GP.SetXRange(1, Graph->GetNodes()/2);
603  GP.SavePng();
604 }
TStr ParamStr() const
Definition: ncp.cpp:555
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
PUNGraph Graph
Definition: ncp.h:180
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:570
void Swap(TVec< TVal, TSizeTy > &Vec)
Swaps the contents of the vector with Vec.
Definition: ds.h:1101
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1318
static void MakeExpBins(const TFltPrV &ValV, TFltPrV &NewV)
Definition: ncp.cpp:1020
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
THash< TInt, TCutInfo > BestCutH
Definition: ncp.h:185
bool Empty() const
Definition: dt.h:491
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
char * CStr()
Definition: dt.h:479
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
void TLocClustStat::PlotNCPScatter ( const TStr OutFNm,
TStr  Desc = TStr() 
) const

Definition at line 715 of file ncp.cpp.

715  {
716  if (Desc.Empty()) { Desc = OutFNm; }
717  THashSet<TFltPr> PhiSzH;
718  IAssertR(! SizePhiH.Empty(), "Set SaveAllCond=true in TLocClustStat::Run()");
719  for (int k = 0; k < SizePhiH.Len(); k++) {
720  const int K = SizePhiH.GetKey(k);
721  const TFltV& PhiV = SizePhiH[k];
722  for (int p = 0; p < PhiV.Len(); p++) {
723  PhiSzH.AddKey(TFltPr(K, TMath::Round(PhiV[p], 3))); }
724  }
725  TFltPrV PhiSzV; PhiSzH.GetKeyV(PhiSzV);
726  TGnuPlot GP("ncpScatter."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
727  GP.AddPlot(PhiSzV, gpwPoints, "", "pt 5 ps 0.2");
728  GP.SetScale(gpsLog10XY);
729  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
730  GP.SetXRange(1, Graph->GetNodes()/2);
731  GP.SavePng();
732 }
#define IAssertR(Cond, Reason)
Definition: bd.h:265
TStr ParamStr() const
Definition: ncp.cpp:555
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
PUNGraph Graph
Definition: ncp.h:180
bool Empty() const
Definition: hash.h:227
void GetKeyV(TVec< TKey > &KeyV) const
Definition: shash.h:1347
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
static double Round(const double &Val)
Definition: xmath.h:16
int AddKey(const TKey &Key)
Definition: shash.h:1254
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
THash< TInt, TFltV > SizePhiH
Definition: ncp.h:184
bool Empty() const
Definition: dt.h:491
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
char * CStr()
Definition: dt.h:479
int Len() const
Definition: hash.h:228
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:252
void TLocClustStat::PlotNcpTop10 ( const TStr OutFNm,
TStr  Desc,
const int &  TakeMinN 
) const

Definition at line 607 of file ncp.cpp.

607  {
608  if (Desc.Empty()) { Desc = OutFNm; }
609  const double BinFactor = 1.05;
610  double BinPos=1;
611  int PrevBPos=1, CurBPos=1, CutId;
612  bool AddSome;
613  TVec<TFltPrV> Curves(TakeMinN);
614  while (true) {
615  PrevBPos = CurBPos;
616  BinPos *= BinFactor; // do exponential binning
617  CurBPos = (int) floor(BinPos);
618  if (CurBPos == PrevBPos) { CurBPos=PrevBPos+1; BinPos=CurBPos; }
619  const int Nodes = CurBPos;
620  TIntSet TabuNIdSet(Graph->GetNodes());
621  AddSome = false;
622  for (int t = 0; t < TakeMinN; t++) {
623  const double Phi = FindBestCut(Nodes, TabuNIdSet, CutId);
624  if (CutId == -1) { break; }
625  Curves[t].Add(TFltPr(Nodes, Phi));
626  for (int n = 0; n < Nodes; n++) {
627  TabuNIdSet.AddKey(SweepsV[CutId].NId(n)); }
628  AddSome = true;
629  }
630  if (! AddSome) { break; }
631  }
632  TGnuPlot GP("ncpTop."+OutFNm, TStr::Fmt("%s. Top disjoint clusters. Take:%d, %s", Desc.CStr(), TakeMinN, ParamStr().CStr()));
633  for (int i = 0; i < Curves.Len(); i++) {
634  GP.AddPlot(Curves[i], gpwLines, TStr::Fmt("MIN %d", i+1), "lw 1"); }
635  //if (! BagOfWhiskerV.Empty()) { GP.AddPlot(BagOfWhiskerV, gpwLines, " Whiskers", "lw 2"); }
636  GP.SetScale(gpsLog10XY);
637  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
638  GP.SetXRange(1, Graph->GetNodes()/2);
639  GP.SavePng();
640 }
TVec< TNodeSweep > SweepsV
Definition: ncp.h:183
TStr ParamStr() const
Definition: ncp.cpp:555
PUNGraph Graph
Definition: ncp.h:180
static double BinFactor
Definition: ncp.h:127
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
double FindBestCut(const int &Nodes, const TIntSet &TabuNIdSet, int &BestCutId) const
Definition: ncp.cpp:469
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
bool Empty() const
Definition: dt.h:491
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
char * CStr()
Definition: dt.h:479
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
void TLocClustStat::PlotPhiDistr ( const int &  CmtySz,
const TStr OutFNm,
TStr  Desc = TStr() 
) const

Definition at line 735 of file ncp.cpp.

735  {
736  IAssert(! SizePhiH.Empty());
737  const TFltV& PhiV = SizePhiH.GetDat(CmtySz);
738  THash<TFlt, TInt> PhiCntH;
739  for (int i = 0; i < PhiV.Len(); i++) {
740  const double Buck = TMath::Round(PhiV[i], 3);
741  PhiCntH.AddDat(Buck) += 1;
742  }
743  TGnuPlot::PlotValCntH(PhiCntH, TStr::Fmt("phiDistr%03d.%s", CmtySz, OutFNm.CStr()),
744  TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()), "{/Symbol \\F} (conductance)", "Count");
745 }
#define IAssert(Cond)
Definition: bd.h:262
TStr ParamStr() const
Definition: ncp.cpp:555
bool Empty() const
Definition: hash.h:227
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
static double Round(const double &Val)
Definition: xmath.h:16
THash< TInt, TFltV > SizePhiH
Definition: ncp.h:184
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
Definition: hash.h:97
static void PlotValCntH(const THash< TKey, TVal, THashFunc > &ValCntH, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints, const bool &PlotCCDF=false, const bool &ExpBucket=false)
Definition: gnuplot.h:434
char * CStr()
Definition: dt.h:479
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
void TLocClustStat::PlotPhiInOut ( const TStr OutFNm,
TStr  Desc = TStr() 
) const

Definition at line 643 of file ncp.cpp.

643  {
644  IAssert(! BestCutH.Empty() && ! Graph.Empty());
645  TFltPrV PhiInV, PhiBoundV, PhiRatV;
646  FILE *F = fopen(TStr::Fmt("phiInOut.%s-all.tab", OutFNm.CStr()).CStr(), "wt");
647  fprintf(F, "#Nodes\tEdges\tVol\tCut\tPhi\tInNodes\tInEdges\tInVol\tInCut\tInPhi\n");
648  TLocClustStat ClustStat2(Alpha, 1, KMax, KFac, Coverage, SizeFrac);
649  const double IdxFact = 1.05;
650  int curIdx=1, prevIdx=1;
651  while (curIdx <= BestCutH.Len()) {
652  const TCutInfo& CutInfo = BestCutH[curIdx-1];
653  if (CutInfo.GetNodes() > 1) {
654  PUNGraph ClustG = TSnap::GetSubGraph(Graph, CutInfo.CutNIdV);
655  ClustStat2.Run(ClustG);
656  const TCutInfo& InCut = ClustStat2.FindBestCut(-1);
657  PhiInV.Add(TFltPr(CutInfo.GetNodes(), InCut.GetPhi()));
658  PhiBoundV.Add(TFltPr(CutInfo.GetNodes(), CutInfo.GetPhi()));
659  PhiRatV.Add(TFltPr(CutInfo.GetNodes(), InCut.GetPhi()/CutInfo.GetPhi()));
660  fprintf(F, "%d\t%d\t%d\t%d\t%f\t%d\t%d\t%d\t%d\t%f\n", CutInfo.GetNodes(), CutInfo.GetEdges(), CutInfo.GetVol(),
661  CutInfo.GetCutSz(), CutInfo.GetPhi(), InCut.GetNodes(), InCut.GetEdges(), InCut.GetVol(), InCut.GetCutSz(), InCut.GetPhi());
662  fflush(F);
663  }
664  prevIdx = curIdx;
665  curIdx = (int) TMath::Round(double(curIdx)*IdxFact);
666  if (prevIdx == curIdx) { curIdx++; }
667  }
668  fclose(F);
669  { TGnuPlot GP("phiInOut."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
670  GP.AddPlot(PhiBoundV, gpwLines, "CUT conductance", "lw 1");
671  GP.AddPlot(PhiInV, gpwLines, "INSIDE conductance", "lw 1");
672  //GP.AddPlot(PhiRatV, gpwLines, "RATIO (inside/boundary)", "lw 1");
673  GP.SetXRange(1, Graph->GetNodes()/2); GP.SetScale(gpsLog10XY);
674  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
675  //GP.AddCmd("set logscale xyy2 10");
676  //GP.AddCmd("set y2label \"Conductance ratio (inside/boundary -- higher better)\"");
677  GP.SavePng(); }
678  //system(TStr(TStr("replace_all.py phiInOut.")+OutFNm+".plt \"title \\\"RATIO (inside/boundary)\" \"axis x1y2 title \\\"RATIO (inside/boundary)\"").CStr());
679  //GP.RunGnuPlot();
680  { TGnuPlot GP("phiInOutRat."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
681  GP.AddPlot(PhiRatV, gpwLines, "RATIO (Inside / Boundary)", "lw 1");
682  GP.SetXRange(1, Graph->GetNodes()/2); GP.SetScale(gpsLog10XY);
683  GP.SetXYLabel("Nodes", "Conductance ratio (inside/boundary) -- higher better");
684  GP.SavePng(); }
685 }
#define IAssert(Cond)
Definition: bd.h:262
TFlt SizeFrac
Definition: ncp.h:178
TInt KMax
Definition: ncp.h:179
TFlt KFac
Definition: ncp.h:178
TInt Coverage
Definition: ncp.h:179
TStr ParamStr() const
Definition: ncp.cpp:555
bool Empty() const
Definition: bd.h:501
PUNGraph Graph
Definition: ncp.h:180
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
Local-Spectral-Clustering statistics of a given Graph.
Definition: ncp.h:125
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
Definition: subgraph.cpp:7
static double Round(const double &Val)
Definition: xmath.h:16
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
THash< TInt, TCutInfo > BestCutH
Definition: ncp.h:185
TFlt Alpha
Definition: ncp.h:178
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
Definition: bd.h:196
char * CStr()
Definition: dt.h:479
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
void TLocClustStat::Run ( const PUNGraph Graph,
const bool &  SaveAllSweeps = false,
const bool &  SaveAllCond = false,
const bool &  SaveBestNodesAtK = false 
)

pow(1.1, cnt)

Definition at line 300 of file ncp.cpp.

300  {
301  Graph = TSnap::GetMxWcc(_Graph);
302  const int Nodes = Graph->GetNodes();
303  const int Edges = Graph->GetEdges();
304  printf("\nLocal clustering: Graph(%d, %d)\n", Nodes, Edges);
305  printf(" Alpha: %g\n", Alpha());
306  printf(" K: %d -- %g -- %dm\n", KMin(), KFac(), int(KMax/Mega(1)));
307  printf(" Coverage: %d\n", Coverage());
308  printf(" SizeFrac: %g\n\n", SizeFrac());
309  TExeTm TotTm;
310  Clr();
311  TLocClust Clust(Graph, Alpha);
312  BestCut.CutNIdV.Clr(false);
313  BestCut.CutSz=-1; BestCut.Edges=-1;
314  double BestPhi = TFlt::Mx;
315  int prevK=-1;
316  bool NextDone=false;
317  if (SaveBestNodesAtK) { // fill buckets (only store nodes in clusters for sizes in SizeBucketSet)
318  SizeBucketSet.Clr();
319  double PrevBPos = 1, BPos = 1;
320  while (BPos <= Mega(100)) {
321  PrevBPos = (uint) floor(BPos);
322  BPos *= BinFactor;
323  if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
324  SizeBucketSet.AddKey(int(floor(BPos) - 1));
325  }
326  }
327  for (int K = KMin, cnt=1; K < KMax; K = int(KFac * double(K))+1, cnt++) {
328  if (K == prevK) { K++; } prevK = K;
329  const int Runs = 2 + int(Coverage * floor(double(Graph->GetEdges()) / double(K)));
330  printf("%d] K: %d, %d runs\n", cnt+1, K, Runs);
331  if (NextDone) { break; } // done
332  if (K+1 > 2*Graph->GetEdges()) { K = Graph->GetEdges(); NextDone=true; }
333  //if (K+1 > Graph->GetEdges()) { K = Graph->GetEdges(); NextDone=true; }
334  TExeTm ExeTm, IterTm;
335  double MeanSz=0.0, MeanVol=0.0, Count=0.0;
336  for (int run = 0; run < Runs; run++) {
337  const int SeedNId = Graph->GetRndNId(); IterTm.Tick();
338  Clust.FindBestCut(SeedNId, K, SizeFrac);
339  const int Sz = Clust.BestCutNodes();
340  const int Vol = Clust.GetCutVol();
341  const double Phi = TMath::Round(Clust.GetCutPhi(), 4);
342  if (Sz == 0 || Vol == 0 || Phi == 0) { continue; }
343  MeanSz+=Sz; MeanVol+=Vol; Count+= 1;
344  if (SaveAllSweeps) { // save the full cut set and conductances for all trials
345  SweepsV.Add(TNodeSweep(SeedNId, Clust.GetNIdV(), Clust.GetPhiV())); }
346  int SAtBestPhi=-1;
347  for (int s = 0; s < Clust.Len(); s++) {
348  const int size = s+1;
349  const int cut = Clust.GetCut(s);
350  const int edges = (Clust.GetVol(s)-cut)/2;
351  const double phi = Clust.GetPhi(s);
352  if (( Clust.GetPhi(s) != double(cut)/double(2*edges+cut))) { continue; } // more than half of the edges
353  IAssert((Clust.GetVol(s) - cut) % 2 == 0);
354  IAssert(phi == double(cut)/double(2*edges+cut));
355  IAssert(phi >= 1.0/double((1+s)*s+1));
357  // TCutInfo CutInfo(size, edges, cut); Clust.GetNIdV().GetSubValV(0, size-1, CutInfo.CutNIdV);
358  //double MxFrac=0, AvgFrac=0, MedianFrac=0, Pct9Frac=0, Flake=0;
359  //CutInfo.GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake);
360  //const double phi = MxFrac;
361  if (BestPhi >= phi) {
362  BestPhi = phi;
363  BestCut = TCutInfo(size, edges, cut);
364  SAtBestPhi = s;
365  }
367  //bool TAKE=false; if (! BestCutH.IsKey(size)) { TAKE=true; }
368  //else { BestCutH.GetDat(size).GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake); if (MxFrac >= phi) { TAKE = true; } }
369  // if (TAKE) {
370  if (! BestCutH.IsKey(size) || BestCutH.GetDat(size).GetPhi() >= phi) { //new best cut (size, edges inside and nodes)
371  BestCutH.AddDat(size, TCutInfo(size, edges, cut)); // for every size store best cut (NIds inside the cut)
372  if (SaveBestNodesAtK) { // store node ids in best community for each size k
373  if (! SizeBucketSet.Empty() && ! SizeBucketSet.IsKey(size)) { continue; } // only save best clusters at SizeBucketSet
374  Clust.GetNIdV().GetSubValV(0, size-1, BestCutH.GetDat(size).CutNIdV); }
375  }
376  if (SaveAllCond) { // for every size store all conductances
377  SizePhiH.AddDat(size).Add(phi); }
378  }
379  if (SAtBestPhi != -1) { // take nodes in best cluster
380  const int size = SAtBestPhi+1;
381  Clust.GetNIdV().GetSubValV(0, size-1, BestCut.CutNIdV);
382  }
383  if (TLocClust::Verbose) {
384  printf(".");
385  if (run % 50 == 0) {
386  printf("\r %d / %d \r", run, Runs); }
387  }
388  }
389  if (TLocClust::Verbose) {
390  printf("\r %d / %d: %s \n", Runs, Runs, ExeTm.GetStr());
391  }
392  MeanSz/=Count; MeanVol/=Count;
393  printf(" Graph(%d, %d) ", Nodes, Edges);
394  printf(" mean: sz: %.2f vol: %.2f [%s] %s\n", MeanSz, MeanVol, ExeTm.GetStr(), TExeTm::GetCurTm());
395  }
397  for (int k = 0; k < SizePhiH.Len(); k++) {
398  SizePhiH[k].Sort(); }
399  SweepsV.Sort();
400  BestCutH.SortByKey();
401  printf("DONE. Graph(%d, %d): Total run time: %s\n\n", Nodes, Edges, TotTm.GetStr());
402 }
void Clr(const bool &DoDel=true, const int &NoDelLim=-1)
Definition: shash.h:1243
#define IAssert(Cond)
Definition: bd.h:262
TFlt SizeFrac
Definition: ncp.h:178
TVec< TNodeSweep > SweepsV
Definition: ncp.h:183
static char * GetCurTm()
Definition: tm.h:374
Definition: tm.h:355
TInt KMax
Definition: ncp.h:179
void Sort(const bool &CmpKey, const bool &Asc)
Definition: hash.h:570
PGraph GetMxWcc(const PGraph &Graph)
Returns a graph representing the largest weakly connected component on an input Graph.
Definition: cncom.h:452
TFlt KFac
Definition: ncp.h:178
unsigned int uint
Definition: bd.h:11
TInt Coverage
Definition: ncp.h:179
static bool Verbose
Definition: ncp.h:26
TInt KMin
Definition: ncp.h:179
static double BinFactor
Definition: ncp.h:127
static const double Mx
Definition: dt.h:1391
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
TCutInfo BestCut
Definition: ncp.h:188
bool Empty() const
Definition: shash.h:1120
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
Local Spectral Clustering algorithm.
Definition: ncp.h:24
static double Round(const double &Val)
Definition: xmath.h:16
#define Mega(n)
Definition: gbase.h:4
int AddKey(const TKey &Key)
Definition: shash.h:1254
void Tick()
Definition: tm.h:364
void Clr()
Definition: ncp.cpp:293
THash< TInt, TFltV > SizePhiH
Definition: ncp.h:184
void SortByKey(const bool &Asc=true)
Definition: hash.h:291
THash< TInt, TCutInfo > BestCutH
Definition: ncp.h:185
TFlt Alpha
Definition: ncp.h:178
TIntSet SizeBucketSet
Definition: ncp.h:189
const char * GetStr() const
Definition: tm.h:368
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
int Len() const
Definition: hash.h:228
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
void TLocClustStat::Save ( TSOut SOut) const

Definition at line 280 of file ncp.cpp.

280  {
281  // parameters
282  Alpha.Save(SOut);
283  SizeFrac.Save(SOut);
284  KFac.Save(SOut);
285  KMin.Save(SOut);
286  KMax.Save(SOut);
287  Coverage.Save(SOut);
288  // cluster stat
289  //SweepsV.Save(SOut);
290  //SizePhiH.Save(SOut);
291 }
TFlt SizeFrac
Definition: ncp.h:178
TInt KMax
Definition: ncp.h:179
void Save(TSOut &SOut) const
Definition: dt.h:1153
TFlt KFac
Definition: ncp.h:178
TInt Coverage
Definition: ncp.h:179
TInt KMin
Definition: ncp.h:179
TFlt Alpha
Definition: ncp.h:178
void Save(TSOut &SOut) const
Definition: dt.h:1402
void TLocClustStat::SaveTxtInfo ( const TStr OutFNmPref,
const TStr Desc,
const bool &  SetMaxAt1 
) const

Definition at line 841 of file ncp.cpp.

841  {
842  printf("Save text info...");
843  TExeTm ExeTm;
844  const int GNodes = Graph->GetNodes();
845  const int GEdges = Graph->GetEdges();
846  TVec<TFltV> ColV(17);
847  double MxFrac=0, AvgFrac=0, MedianFrac=0, Pct9Frac=0, Flake=0;
848  // double PrevBPos = 1, BPos = 1;
849  for (int i = 0; i < SizeBucketSet.Len(); i++) {
850  if ( !BestCutH.IsKey(SizeBucketSet[i])) { continue; }
852  C.GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake);
853  ColV[0].Add(C.Nodes()); ColV[1].Add(C.Edges());
854  ColV[2].Add(C.CutSz()); ColV[3].Add(C.GetPhi());
855  ColV[4].Add(C.GetExpansion()); ColV[5].Add(C.GetIntDens());
856  ColV[6].Add(C.GetCutRatio(GNodes)); ColV[7].Add(C.GetNormCut(GEdges));
857  ColV[8].Add(MxFrac); ColV[9].Add(AvgFrac);
858  ColV[10].Add(MedianFrac); ColV[11].Add(Pct9Frac); ColV[12].Add(Flake);
859  ColV[13].Add(double(2.0*C.Edges)); ColV[14].Add(C.GetExpEdgesIn(GEdges));
860  ColV[15].Add(C.GetModular(GEdges)); ColV[16].Add(C.GetModRat(GEdges));
861  printf(".");
862  }
863  // normalize so that maximum value is at 1
864  if (SetMaxAt1) {
865  for (int c = 1; c < ColV.Len(); c++) {
866  double MaxVal=1e-10;
867  for (int r = 0; r < ColV[c].Len(); r++) { MaxVal = TMath::Mx(MaxVal, ColV[c][r]()); }
868  for (int r = 0; r < ColV[c].Len(); r++) { ColV[c][r] /= MaxVal; }
869  }
870  }
871  // save
872  const TStr DatFNm = TStr::Fmt("ncp.%s.INFO.tab", OutFNmPref.CStr());
873  FILE *F = fopen(DatFNm.CStr(), "wt");
874  fprintf(F, "# %s %s\n", Desc.CStr(), ParamStr().CStr());
875  fprintf(F, "#N_inside\tE_inside\tE_across\tConductance\tExpansion\tIntDensity\tCutRatio\tNormCut\tMx_FracDegOut\tAvg_FDO\tMedian_FDO\t90Pct_FDO\tFlake_FDO\tVolume\tExpVolume\tModularity\tModRatio\n");
876  for (int r = 0; r < ColV[0].Len(); r++) {
877  fprintf(F, "%g", ColV[0][r]());
878  for (int c = 1; c < ColV.Len(); c++) {
879  fprintf(F, "\t%g", ColV[c][r]()); }
880  fprintf(F, "\n");
881  }
882  fclose(F);
883  printf("[%s]\n", ExeTm.GetStr());
884  TGnuPlot GP(TStr::Fmt("ncp.%s.All", OutFNmPref.CStr()), TStr::Fmt("%s %s",
885  Desc.CStr(), ParamStr().CStr()));
886  GP.AddPlot(DatFNm, 1, 4, gpwLines, "Conductance", "lw 2");
887  GP.AddPlot(DatFNm, 1, 5, gpwPoints, "Expansion", "pt 3");
888  GP.AddPlot(DatFNm, 1, 6, gpwPoints, "Internal Density", "pt 5 ps 0.8");
889  GP.AddPlot(DatFNm, 1, 7, gpwPoints, "Cut Ratio", "pt 6");
890  GP.AddPlot(DatFNm, 1, 8, gpwPoints, "Normalized Cut", "pt 7");
891  GP.AddPlot(DatFNm, 1, 9, gpwPoints, "Maximum FDO", "pt 9");
892  GP.AddPlot(DatFNm, 1, 10, gpwPoints, "Avg FDO", "pt 11");
893  GP.AddPlot(DatFNm, 1, 13, gpwPoints, "Flake FDO", "pt 13");
894  GP.SetScale(gpsLog10XY);
895  GP.SetXYLabel("k (number of nodes in the cluster)", "Normalized community score (lower is better)");
896  GP.SavePng();
897 }
double GetModRat(const int &GEdges) const
Definition: ncp.h:171
Definition: tm.h:355
static const T & Mx(const T &LVal, const T &RVal)
Definition: xmath.h:32
double GetPhi() const
Definition: ncp.h:164
TStr ParamStr() const
Definition: ncp.cpp:555
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
PUNGraph Graph
Definition: ncp.h:180
double GetExpEdgesIn(const int &GEdges) const
Definition: ncp.h:172
const TCutInfo & GetKNodeCut(const int &Nodes) const
Definition: ncp.h:208
double GetNormCut(const int &GEdges) const
Definition: ncp.h:168
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:192
double GetExpansion() const
Definition: ncp.h:165
double GetCutRatio(const int &GNodes) const
Definition: ncp.h:167
double GetIntDens() const
Definition: ncp.h:166
double GetFracDegOut(const PUNGraph &Graph, double &MxFrac, double &AvgFrac, double &MedianFrac, double &Pct9Frac, double &Flake) const
Definition: ncp.cpp:245
THash< TInt, TCutInfo > BestCutH
Definition: ncp.h:185
double GetModular(const int &GEdges) const
Definition: ncp.h:170
int Len() const
Definition: shash.h:1121
Definition: dt.h:412
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
TIntSet SizeBucketSet
Definition: ncp.h:189
const char * GetStr() const
Definition: tm.h:368
char * CStr()
Definition: dt.h:479
void TLocClustStat::SetGraph ( const PUNGraph GraphPt)
inline

Definition at line 194 of file ncp.h.

194 { Graph=GraphPt; }
PUNGraph Graph
Definition: ncp.h:180

Member Data Documentation

TFlt TLocClustStat::Alpha
private

Definition at line 178 of file ncp.h.

TFltPrV TLocClustStat::BagOfWhiskerV

Definition at line 186 of file ncp.h.

TCutInfo TLocClustStat::BestCut

Definition at line 188 of file ncp.h.

THash<TInt, TCutInfo> TLocClustStat::BestCutH

Definition at line 185 of file ncp.h.

TFltPr TLocClustStat::BestWhisk

Definition at line 187 of file ncp.h.

double TLocClustStat::BinFactor = 1.01
static

Definition at line 127 of file ncp.h.

TInt TLocClustStat::Coverage
private

Definition at line 179 of file ncp.h.

PUNGraph TLocClustStat::Graph
private

Definition at line 180 of file ncp.h.

TFlt TLocClustStat::KFac
private

Definition at line 178 of file ncp.h.

TInt TLocClustStat::KMax
private

Definition at line 179 of file ncp.h.

TInt TLocClustStat::KMin
private

Definition at line 179 of file ncp.h.

TIntSet TLocClustStat::SizeBucketSet

Definition at line 189 of file ncp.h.

TFlt TLocClustStat::SizeFrac
private

Definition at line 178 of file ncp.h.

THash<TInt, TFltV> TLocClustStat::SizePhiH

Definition at line 184 of file ncp.h.

TVec<TNodeSweep> TLocClustStat::SweepsV

Definition at line 183 of file ncp.h.

int TLocClustStat::TakeValAt
static

Definition at line 128 of file ncp.h.


The documentation for this class was generated from the following files: