SNAP Library 6.0, Developer Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
statplot.h
Go to the documentation of this file.
1 namespace TSnap {
2 
4 // Plot graph properties
5 
9 template <class PGraph> void PlotInDegDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr(), const bool& PlotCCdf=false, const bool& PowerFit=false);
13 template <class PGraph> void PlotOutDegDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr(), const bool& PlotCCdf=false, const bool& PowerFit=false);
15 template <class PGraph> void PlotWccDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr());
17 template <class PGraph> void PlotSccDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr());
19 template <class PGraph> void PlotClustCf(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr());
22 template <class PGraph> void PlotHops(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr(), const bool& IsDir=false, const int& NApprox=32);
24 template <class PGraph> void PlotShortPathDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr(), int TestNodes=TInt::Mx);
26 template <class PGraph> void PlotKCoreNodes(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr());
28 template <class PGraph> void PlotKCoreEdges(const PGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr());
29 
31 void PlotEigValRank(const PUNGraph& Graph, const int& EigVals, const TStr& FNmPref, TStr DescStr=TStr());
33 void PlotEigValDistr(const PUNGraph& Graph, const int& EigVals, const TStr& FNmPref, TStr DescStr=TStr());
36 void PlotInvParticipRat(const PUNGraph& Graph, const int& MaxEigVecs, const int& TimeLimit, const TStr& FNmPref, TStr DescStr=TStr());
38 void PlotSngValRank(const PNGraph& Graph, const int& SngVals, const TStr& FNmPref, TStr DescStr=TStr());
40 void PlotSngValDistr(const PNGraph& Graph, const int& SngVals, const TStr& FNmPref, TStr DescStr=TStr());
42 void PlotSngVec(const PNGraph& Graph, const TStr& FNmPref, TStr DescStr=TStr());
43 
45 // Implementation
46 template <class PGraph>
47 void PlotInDegDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr, const bool& PlotCCdf, const bool& PowerFit) {
48  TIntPrV DegCntV;
49  TSnap::GetInDegCnt(Graph, DegCntV);
50  const double AvgDeg = 2*Graph->GetEdges()/double(Graph->GetNodes());
51  int AboveAvg=0, Above2Avg=0;
52  for (int i = 0; i < DegCntV.Len(); i++) {
53  if (DegCntV[i].Val1 > AvgDeg) { AboveAvg += DegCntV[i].Val2; }
54  if (DegCntV[i].Val1 > 2*AvgDeg) { Above2Avg += DegCntV[i].Val2; }
55  }
56  if (PlotCCdf) {
57  DegCntV = TGUtil::GetCCdf(DegCntV); }
58  if (DescStr.Empty()) { DescStr = FNmPref; }
59  TGnuPlot::PlotValV(DegCntV, (PlotCCdf?"inDegC.":"inDeg.")+FNmPref,
60  TStr::Fmt("%s. G(%d, %d). %d (%.4f) nodes with in-deg > avg deg (%.1f), %d (%.4f) with >2*avg.deg", DescStr.CStr(),
61  Graph->GetNodes(), Graph->GetEdges(), AboveAvg, AboveAvg/double(Graph->GetNodes()), AvgDeg, Above2Avg, Above2Avg/double(Graph->GetNodes())),
62  "In-degree", PlotCCdf?"Count (CCDF)":"Count", gpsLog10XY, PowerFit, gpwLinesPoints);
63 }
64 
65 template <class PGraph>
66 void PlotOutDegDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr, const bool& PlotCCdf, const bool& PowerFit) {
67  TIntPrV DegCntV;
68  TSnap::GetOutDegCnt(Graph, DegCntV);
69  const double AvgDeg = 2*Graph->GetEdges()/double(Graph->GetNodes());
70  int AboveAvg=0, Above2Avg=0;
71  for (int i = 0; i < DegCntV.Len(); i++) {
72  if (DegCntV[i].Val1 > AvgDeg) { AboveAvg += DegCntV[i].Val2; }
73  if (DegCntV[i].Val1 > 2*AvgDeg) { Above2Avg += DegCntV[i].Val2; }
74  }
75  if (PlotCCdf) {
76  DegCntV = TGUtil::GetCCdf(DegCntV); }
77  if (DescStr.Empty()) { DescStr = FNmPref; }
78  TGnuPlot::PlotValV(DegCntV, (PlotCCdf?"outDegC.":"outDeg.")+FNmPref,
79  TStr::Fmt("%s. G(%d, %d). %d (%.4f) nodes with out-deg > avg deg (%.1f), %d (%.4f) with >2*avg.deg", DescStr.CStr(),
80  Graph->GetNodes(), Graph->GetEdges(), AboveAvg, AboveAvg/double(Graph->GetNodes()), AvgDeg, Above2Avg, Above2Avg/double(Graph->GetNodes())),
81  "Out-degree", PlotCCdf?"Count (CCDF)":"Count", gpsLog10XY, PowerFit, gpwLinesPoints);
82 }
83 
84 template <class PGraph>
85 void PlotWccDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr) {
86  TIntPrV WccSzCnt;
87  TSnap::GetWccSzCnt(Graph, WccSzCnt);
88  if (DescStr.Empty()) { DescStr = FNmPref; }
89  TGnuPlot GnuPlot("wcc."+FNmPref, TStr::Fmt("%s. G(%d, %d). Largest component has %f nodes",
90  DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges(), WccSzCnt.Last().Val1/double(Graph->GetNodes())));
91  GnuPlot.AddPlot(WccSzCnt, gpwLinesPoints, "", "pt 6");
92  GnuPlot.SetXYLabel("Size of weakly connected component", "Number of components");
93  GnuPlot.SetScale(gpsLog10XY);
94  GnuPlot.SavePng();
95 }
96 
97 template <class PGraph>
98 void PlotSccDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr) {
99  TIntPrV SccSzCnt;
100  TSnap::GetSccSzCnt(Graph, SccSzCnt);
101  if (DescStr.Empty()) { DescStr = FNmPref; }
102  TGnuPlot GnuPlot("scc."+FNmPref, TStr::Fmt("%s. G(%d, %d). Largest component has %f nodes",
103  DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges(), SccSzCnt.Last().Val1/double(Graph->GetNodes())));
104  GnuPlot.AddPlot(SccSzCnt, gpwLinesPoints, "", "pt 6");
105  GnuPlot.SetXYLabel("Size of strongly connected component", "Number of components");
106  GnuPlot.SetScale(gpsLog10XY);
107  GnuPlot.SavePng();
108 }
109 
110 template <class PGraph>
111 void PlotClustCf(const PGraph& Graph, const TStr& FNmPref, TStr DescStr) {
112  TFltPrV DegToCCfV;
113  int64 ClosedTriads, OpenTriads;
114  const double CCF = GetClustCf(Graph, DegToCCfV, ClosedTriads, OpenTriads);
115  if (DescStr.Empty()) { DescStr = FNmPref; }
116  TGnuPlot GnuPlot("ccf."+FNmPref,
117  TStr::Fmt("%s. G(%d, %d). Average clustering: %.4f OpenTriads: %d (%.4f) ClosedTriads: %d (%.4f)", DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges(),
118  CCF, OpenTriads, OpenTriads/double(OpenTriads+ClosedTriads), ClosedTriads, ClosedTriads/double(OpenTriads+ClosedTriads)));
119  GnuPlot.AddPlot(DegToCCfV, gpwLinesPoints, "", "pt 6");
120  GnuPlot.SetXYLabel("Node degree", "Average clustering coefficient");
121  GnuPlot.SetScale(gpsLog10XY);
122  GnuPlot.SavePng();
123 }
124 
125 template <class PGraph>
126 void PlotHops(const PGraph& Graph, const TStr& FNmPref, TStr DescStr, const bool& IsDir, const int& NApprox) {
127  TIntFltKdV DistNbrsV;
128  TSnap::GetAnf(Graph, DistNbrsV, -1, IsDir, NApprox);
129  const double EffDiam = TSnap::TSnapDetail::CalcEffDiam(DistNbrsV, 0.9);
130  if (DescStr.Empty()) { DescStr = FNmPref; }
131  TGnuPlot GnuPlot("hop."+FNmPref, TStr::Fmt("%s. Hop plot. EffDiam: %g, G(%d, %d)",
132  DescStr.CStr(), EffDiam, Graph->GetNodes(), Graph->GetEdges()));
133  GnuPlot.SetXYLabel("Number of hops", "Number of pairs of nodes");
134  GnuPlot.SetScale(gpsLog10Y);
135  GnuPlot.AddPlot(DistNbrsV, gpwLinesPoints, "", "pt 6");
136  GnuPlot.SavePng();
137 }
138 
139 template <class PGraph>
140 void PlotShortPathDistr(const PGraph& Graph, const TStr& FNmPref, TStr DescStr, int TestNodes) {
141  TIntH DistToCntH;
142  TBreathFS<PGraph> BFS(Graph);
143  // shotest paths
144  TIntV NodeIdV;
145  Graph->GetNIdV(NodeIdV); NodeIdV.Shuffle(TInt::Rnd);
146  for (int tries = 0; tries < TMath::Mn(TestNodes, Graph->GetNodes()); tries++) {
147  const int NId = NodeIdV[tries];
148  BFS.DoBfs(NId, true, false, -1, TInt::Mx);
149  for (int i = 0; i < BFS.NIdDistH.Len(); i++) {
150  DistToCntH.AddDat(BFS.NIdDistH[i]) += 1; }
151  }
152  DistToCntH.SortByKey(true);
153  TFltPrV DistNbrsPdfV;
154  for (int i = 0; i < DistToCntH.Len(); i++) {
155  DistNbrsPdfV.Add(TFltPr(DistToCntH.GetKey(i)(), DistToCntH[i]()));
156  }
157  const double EffDiam = TSnap::TSnapDetail::CalcEffDiamPdf(DistNbrsPdfV, 0.9);
158  const double AvgDiam = TSnap::TSnapDetail::CalcAvgDiamPdf(DistNbrsPdfV);
159  const int FullDiam = (int) DistNbrsPdfV.Last().Val1;
160  if (DescStr.Empty()) { DescStr = FNmPref; }
161  TGnuPlot::PlotValV(DistNbrsPdfV, "diam."+FNmPref,
162  TStr::Fmt("%s. G(%d, %d). Diam: avg:%.2f eff:%.2f max:%d", DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges(),
163  AvgDiam, EffDiam, FullDiam), "Number of hops", "Number of shortest paths", gpsLog10Y, false, gpwLinesPoints);
164 }
165 
166 template <class PGraph>
167 void PlotKCoreNodes(const PGraph& Graph, const TStr& FNmPref, TStr DescStr) {
168  TIntPrV CoreNodesV;
169  TSnap::GetKCoreNodes(Graph, CoreNodesV);
170  if (DescStr.Empty()) { DescStr = FNmPref; }
171  TGnuPlot::PlotValV(CoreNodesV, "coreNodes."+FNmPref, TStr::Fmt("%s. G(%d, %d).", DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges()), "k-Core", "Number of nodes in the k-Core", gpsLog10Y, false, gpwLinesPoints);
172 }
173 
174 template <class PGraph>
175 void PlotKCoreEdges(const PGraph& Graph, const TStr& FNmPref, TStr DescStr) {
176  TIntPrV CoreEdgesV;
177  TSnap::GetKCoreEdges(Graph, CoreEdgesV);
178  if (DescStr.Empty()) { DescStr = FNmPref; }
179  TGnuPlot::PlotValV(CoreEdgesV, "coreEdges."+FNmPref, TStr::Fmt("%s. G(%d, %d).", DescStr.CStr(), Graph->GetNodes(), Graph->GetEdges()), "k-Core", "Number of edges in the k-Core", gpsLog10Y, false, gpwLinesPoints);
180 }
181 
182 //TIntPrV CoreNV, CoreEV;
183 //TSnap::GetKCoreNodes(UG, CoreNV);
184 //TSnap::GetKCoreEdges(UG, CoreEV);
185 //TGnuPlot::PlotValV(CoreNV, "kcoreN.fullUndir", "kCore nodes size", "k-Core", "Number of nodes", gpsLog10Y);
186 //TGnuPlot::PlotValV(CoreEV, "kcoreE.fullUndir", "kCore edges size", "k-Core", "Number of edges", gpsLog10Y);
187 
188 
189 } // namespace TSnap
void PlotSccDistr(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr())
Plots the distribution of sizes of strongly connected components of a Graph.
Definition: statplot.h:98
static const T & Mn(const T &LVal, const T &RVal)
Definition: xmath.h:36
Main namespace for all the Snap global entities.
Definition: alg.h:1
int GetKCoreNodes(const PGraph &Graph, TIntPrV &CoreIdSzV)
Returns the number of nodes in each core of order K (where K=0, 1, ...)
Definition: kcore.h:114
void PlotKCoreNodes(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr())
Plots the k-Core node-size distribution: Core k vs. number of nodes in k-core.
Definition: statplot.h:167
void GetOutDegCnt(const PGraph &Graph, TIntPrV &DegToCntV)
Returns an out-degree histogram: a set of pairs (out-degree, number of nodes of such out-degree) ...
Definition: alg.h:201
void SavePng(const int &SizeX=1000, const int &SizeY=800, const TStr &Comment=TStr())
Definition: gnuplot.h:120
static const int Mx
Definition: dt.h:1142
void PlotOutDegDistr(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr(), const bool &PlotCCdf=false, const bool &PowerFit=false)
Definition: statplot.h:66
int DoBfs(const int &StartNode, const bool &FollowOut, const bool &FollowIn, const int &TargetNId=-1, const int &MxDist=TInt::Mx)
Performs BFS from node id StartNode for at maps MxDist steps by only following in-links (parameter Fo...
Definition: bfsdfs.h:128
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
void GetAnf(const PGraph &Graph, const int &SrcNId, TIntFltKdV &DistNbrsV, const int &MxDist, const bool &IsDir, const int &NApprox=32)
Definition: anf.h:204
void SetXYLabel(const TStr &XLabel, const TStr &YLabel)
Definition: gnuplot.h:73
void PlotSngValRank(const PNGraph &Graph, const int &SngVals, const TStr &FNmPref, TStr DescStr)
Plots the rank distribution of singular values of the Graph adjacency matrix. Plots first SngVals val...
Definition: statplot.cpp:49
void PlotEigValRank(const PUNGraph &Graph, const int &EigVals, const TStr &FNmPref, TStr DescStr)
Plots the eigen-value rank distribution of the Graph adjacency matrix. Plots first EigVals eigenvalue...
Definition: statplot.cpp:5
void PlotEigValDistr(const PUNGraph &Graph, const int &EigVals, const TStr &FNmPref, TStr DescStr)
Plots the distribution of components of the leading eigen-vector of the Graph adjacency matrix...
Definition: statplot.cpp:14
void PlotInvParticipRat(const PUNGraph &Graph, const int &MaxEigVecs, const int &TimeLimit, const TStr &FNmPref, TStr DescStr)
Definition: statplot.cpp:39
static TRnd Rnd
Definition: dt.h:1146
void GetSccSzCnt(const PGraph &Graph, TIntPrV &SccSzCnt)
Returns a distribution of strongly connected component sizes.
Definition: cncom.h:420
void GetInDegCnt(const PGraph &Graph, TIntPrV &DegToCntV)
Returns an in-degree histogram: a set of pairs (in-degree, number of nodes of such in-degree) ...
Definition: alg.h:179
void PlotWccDistr(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr())
Plots the distribution of sizes of weakly connected components of a Graph.
Definition: statplot.h:85
double CalcEffDiamPdf(const TIntFltKdV &DistNbrsPdfV, const double &Percentile)
Helper function for computing a given Percentile of a (unnormalized) probability distribution functio...
Definition: anf.cpp:29
int GetKCoreEdges(const PGraph &Graph, TIntPrV &CoreIdSzV)
Returns the number of edges in each core of order K (where K=0, 1, ...)
Definition: kcore.h:126
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:579
void PlotSngValDistr(const PNGraph &Graph, const int &SngVals, const TStr &FNmPref, TStr DescStr)
Plots the rank distribution of singular values of the Graph adjacency matrix. Plots first SngVals val...
Definition: statplot.cpp:58
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
void PlotKCoreEdges(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr())
Plots the k-Core edge-size distribution: Core k vs. number of edges in k-core.
Definition: statplot.h:175
void PlotHops(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr(), const bool &IsDir=false, const int &NApprox=32)
Definition: statplot.h:126
void PlotSngVec(const PNGraph &Graph, const TStr &FNmPref, TStr DescStr)
Plots the distribution of the values of the leading left singular vector of the Graph adjacency matri...
Definition: statplot.cpp:81
void SortByKey(const bool &Asc=true)
Definition: hash.h:291
static void GetCCdf(const TIntPrV &PdfV, TIntPrV &CCdfV)
Definition: util.cpp:33
double CalcAvgDiamPdf(const TIntFltKdV &DistNbrsPdfV)
Helper function for computing the mean of a (unnormalized) probability distribution function...
Definition: anf.cpp:41
double CalcEffDiam(const TIntFltKdV &DistNbrsCdfV, const double &Percentile)
Helper function for computing a given Percentile of a (unnormalized) cumulative distribution function...
Definition: anf.cpp:7
long long int64
Definition: bd.h:27
void SetScale(const TGpScaleTy &GpScaleTy)
Definition: gnuplot.h:78
Definition: dt.h:412
bool Empty() const
Definition: dt.h:491
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
void PlotShortPathDistr(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr(), int TestNodes=TInt::Mx)
Plots the distribution of the shortest path lengths of a Graph. Implementation is based on BFS...
Definition: statplot.h:140
Definition: hash.h:97
void Shuffle(TRnd &Rnd)
Randomly shuffles the elements of the vector.
Definition: ds.h:1335
void PlotClustCf(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr())
Plots the distribution of clustering coefficient of a Graph.
Definition: statplot.h:111
double GetClustCf(const PGraph &Graph, int SampleNodes=-1)
Computes the average clustering coefficient as defined in Watts and Strogatz, Collective dynamics of ...
Definition: triad.h:137
TVal1 Val1
Definition: ds.h:34
int AddPlot(const TIntV &YValV, const TGpSeriesTy &SeriesTy=gpwLinesPoints, const TStr &Label=TStr(), const TStr &Style=TStr())
Definition: gnuplot.cpp:186
Definition: bd.h:196
char * CStr()
Definition: dt.h:479
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
TIntH NIdDistH
Definition: bfsdfs.h:88
int Len() const
Definition: hash.h:228
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
void GetWccSzCnt(const PGraph &Graph, TIntPrV &WccSzCnt)
Returns a distribution of weakly connected component sizes.
Definition: cncom.h:337
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
void PlotInDegDistr(const PGraph &Graph, const TStr &FNmPref, TStr DescStr=TStr(), const bool &PlotCCdf=false, const bool &PowerFit=false)
Definition: statplot.h:47
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:252