SNAP Library 3.0, User Reference  2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
timenet.cpp
Go to the documentation of this file.
1 // Time Network
4  if (this != &TimeNet) {
5  TNet::operator=(TimeNet);
6  }
7  return *this;
8 }
9 
10 PTimeNet TTimeNet::GetSubGraph(const TIntV& NIdV) const {
11  PTimeNet NewNetPt = TTimeNet::New();
12  TTimeNet& NewNet = *NewNetPt;
13  NewNet.Reserve(NIdV.Len(), -1);
14  int node, edge;
15  TNodeI NI;
16  for (node = 0; node < NIdV.Len(); node++) {
17  NewNet.AddNode(NIdV[node], GetNDat(NIdV[node])); // also copy the node data
18  }
19  for (node = 0; node < NIdV.Len(); node++) {
20  NI = GetNI(NIdV[node]);
21  const int SrcNId = NI.GetId();
22  for (edge = 0; edge < NI.GetOutDeg(); edge++) {
23  const int OutNId = NI.GetOutNId(edge);
24  if (NewNet.IsNode(OutNId)) {
25  NewNet.AddEdge(SrcNId, OutNId); }
26  }
27  }
28  NewNet.Defrag();
29  return NewNetPt;
30 }
31 
33  TIntV NIdV; GetNIdByTm(NIdV);
35  for (int i = 0; i < NIdV.Len(); i++) {
36  const int Src = NIdV[i];
37  const TTimeNet::TNodeI NI = GetNI(Src);
38  const TSecTm SrcTm = NI.GetDat();
39  if (! OutNet->IsNode(Src)) { OutNet->AddNode(Src, SrcTm); }
40  for (int e = 0; e < NI.GetOutDeg(); e++) {
41  if (! OutNet->IsNode(NI.GetOutNId(e))) { OutNet->AddNode(NI.GetOutNId(e), SrcTm); }
42  OutNet->AddEdge(Src, NI.GetOutNId(e), -1, SrcTm);
43  }
44  }
45  return OutNet;
46 }
47 
48 void TTimeNet::GetNIdByTm(TIntV& NIdV) const {
49  TVec<TKeyDat<TSecTm, TInt> > TmToNIdV(GetNodes(), 0);
50  for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
51  TmToNIdV.Add(TKeyDat<TSecTm, TInt>(NodeI.GetDat(), NodeI.GetId())); }
52  TmToNIdV.Sort();
53  NIdV.Gen(GetNodes(), 0);
54  for (int i = 0; i < TmToNIdV.Len(); i++) {
55  NIdV.Add(TmToNIdV[i].Dat); }
56 }
57 
58 // put nodes into buckets by TmUnit
59 void TTimeNet::GetTmBuckets(const TTmUnit& TmUnit, TTmBucketV& TmBucketV) const {
60  THash<TInt, TIntV> TmIdToNIdVH;
61  for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
62  const int TmId = NodeI().Round(TmUnit);
63  if (! TmIdToNIdVH.IsKey(TmId)) TmIdToNIdVH.AddKey(TmId);
64  TmIdToNIdVH.GetDat(TmId).Add(NodeI.GetId());
65  }
66  TVec<TPair<TInt, TIntV> > TmIdNIdVV;
67  TmIdToNIdVH.GetKeyDatPrV(TmIdNIdVV);
68  TmIdNIdVV.Sort();
69  TmBucketV.Gen(TmIdNIdVV.Len());
70  for (int i = 0; i < TmIdNIdVV.Len(); i++) {
71  TTmBucket& Bucket = TmBucketV[i];
72  Bucket.BegTm = TmIdNIdVV[i].Val1;
73  Bucket.NIdV = TmIdNIdVV[i].Val2;
74  }
75 }
76 
77 void TTimeNet::GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const {
78  TIntV NIdV;
79  GetNIdByTm(NIdV);
80  TmBucketV.Gen(NIdV.Len() / NodesPerBucket + 1, 0);
81  for (int i = 0; i < NIdV.Len(); i++) {
82  const int b = i/NodesPerBucket;
83  if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); }
84  TmBucketV[b].NIdV.Add(NIdV[i]);
85  }
86 }
87 
88 PGStatVec TTimeNet::TimeGrowth(const TTmUnit& TmUnit, const TFSet& TakeStat, const TSecTm& StartTm) const {
89  PGStatVec GrowthStat = new TGStatVec(TmUnit, TakeStat);
90  TTmBucketV TmBucketV;
91  GetTmBuckets(TmUnit, TmBucketV);
92  TIntV NodeIdV;
93  TExeTm ExeTm;
94  for (int t = 0; t < TmBucketV.Len(); t++) {
95  NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T
96  printf("\n=== %d/%d] %s (%d nodes)\n", t+1, TmBucketV.Len(),
97  TmBucketV[t].BegTm.GetStr().CStr(), NodeIdV.Len()); ExeTm.Tick();
98  if (TmBucketV[t].BegTm < StartTm) continue;
99  //PNGraph PreGraph = GetSubGraph(NodeIdV, true); // renumber nodes
100  PNGraph PreGraph = TSnap::ConvertSubGraph<PNGraph>(PTimeNet((TTimeNet*)this), NodeIdV); // don't renumber nodes
101  GrowthStat->Add(PreGraph, TmBucketV[t].BegTm);
102  }
103  return GrowthStat;
104 }
105 
106 void TTimeNet::PlotEffDiam(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
107  const TSecTm& StartTm, const int& NDiamRuns, const bool& OnlyWcc, const bool& AlsoRewire) const {
108  const TStr WccStr = OnlyWcc ? "WCC " : TStr::GetNullStr();
109  TTmBucketV TmBucketV;
110  GetTmBuckets(TmUnit, TmBucketV);
111  TIntV NodeIdV;
112  TExeTm ExeTm, Run1Tm;
113  TFltTrV TmDiamV, NdsDiamV;
114  TFltTrV RwTmDiamV, RwNdsDiamV;
115  for (int t = 0; t < TmBucketV.Len(); t++) {
116  NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T
117  printf("\n*** %d/%d] at %s (%d nodes)\n", t+1, TmBucketV.Len(),
118  TmBucketV[t].BegTm.GetStr(TmUnit).CStr(), NodeIdV.Len()); ExeTm.Tick();
119  if (TmBucketV[t].BegTm < StartTm) continue;
120  //PUNGraph PreGraph = GetSubUNGraph(NodeIdV, true);
121  PUNGraph PreGraph = TSnap::ConvertSubGraph<PUNGraph>(PTimeNet((TTimeNet*)this), NodeIdV);
122  { TMom Mom;
123  for (int r = 0; r < NDiamRuns; r++) {
124  printf("%d...", r+1); Run1Tm.Tick();
125  const double EffDiam = TSnap::GetAnfEffDiam(OnlyWcc ? TSnap::GetMxWcc(PreGraph) : PreGraph);
126  Mom.Add(EffDiam); printf("[%s]\r", Run1Tm.GetTmStr());
127  }
128  Mom.Def();
129  TmDiamV.Add(TFltTr((int)TmBucketV[t].BegTm.GetInUnits(TmUnit), Mom.GetMean(), Mom.GetSDev()));
130  NdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev()));
131  NdsDiamV.Sort();
132  printf(" [%s] \n", ExeTm.GetTmStr()); }
133  if (AlsoRewire) {
134  //PUNGraph RwGraph = TGGen::GenRndDegSeqS(PreGraph, 50, TInt::Rnd); // edge switching model
135  TIntV DegSeqV(PreGraph->GetNodes(), 0);
136  for (TUNGraph::TNodeI NI = PreGraph->BegNI(); NI < PreGraph->EndNI(); NI++) { DegSeqV.Add(NI.GetDeg()); }
137  PUNGraph RwGraph = TSnap::GenConfModel(DegSeqV, TInt::Rnd);
138  printf("Configuration model: (%d, %d) --> (%d, %d)\n", PreGraph->GetNodes(), PreGraph->GetEdges(), RwGraph->GetNodes(), RwGraph->GetEdges());
139  TMom Mom;
140  for (int r = 0; r < NDiamRuns; r++) {
141  printf(" diam run %d...", r+1); Run1Tm.Tick();
142  const double EffDiam = TSnap::GetAnfEffDiam(OnlyWcc ? TSnap::GetMxWcc(PreGraph):PreGraph);
143  Mom.Add(EffDiam); printf(" current run [%s]\n", Run1Tm.GetTmStr());
144  }
145  Mom.Def();
146  RwTmDiamV.Add(TFltTr((int)TmBucketV[t].BegTm.GetInUnits(TmUnit), Mom.GetMean(), Mom.GetSDev()));
147  RwNdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev()));
148  RwNdsDiamV.Sort();
149  printf("done with diameter. Total time [%s] \n", ExeTm.GetTmStr());
150  }
151  // plot
152  { TGnuPlot GnuPlot("diamEff-T."+FNmPref, TStr::Fmt("%s. G(%d, %d)", Desc.CStr(), GetNodes(), GetEdges()));
153  GnuPlot.SetXYLabel(TStr::Fmt("TIME [%s]", TTmInfo::GetTmUnitStr(TmUnit).CStr()), WccStr+"Effective Diameter");
154  GnuPlot.AddErrBar(TmDiamV, "True", "");
155  if (! RwTmDiamV.Empty()) { GnuPlot.AddErrBar(RwTmDiamV, "Rewired", "");}
156  GnuPlot.SavePng(); }
157  { TGnuPlot GnuPlot("diamEff-N."+FNmPref, TStr::Fmt("%s. G(%d, %d)", Desc.CStr(), GetNodes(), GetEdges()));
158  GnuPlot.SetXYLabel("NODES", WccStr+"Effective Diameter");
159  GnuPlot.AddErrBar(NdsDiamV, "True", "");
160  if (! RwNdsDiamV.Empty()) { GnuPlot.AddErrBar(RwNdsDiamV, "Rewired", "");}
161  GnuPlot.SavePng(); }
162  }
163 }
164 
165 // pretend we only have link data starting in PostTmDiam
166 // compare the diameter of the nodes after PostTmDiam with diameter
167 // of the nodes after PostTmDiam but *also* using nodes and edges
168 // from before PostTmDiam
169 void TTimeNet::PlotMissingPast(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
170  const TSecTm& DelPreTmEdges, const TSecTm& PostTmDiam) const {
171  printf("\nGrowth over time: degree distribution, Growth Power Law, Diameter.\n %s group by %s.\n",
172  FNmPref.CStr(), TTmInfo::GetTmUnitStr(TmUnit).CStr());
173  printf(" Delete out-edges of pre time %s nodes.\n Take subgraph of post year %s subgraph.\n\n",
174  DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr());
175  const int NDiamRuns = 10;
176  const int NSamples = 100;
177  //PUNGraph FullGraph = GetUNGraph();
178  PUNGraph FullGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this));
179  // delete past
180  if (DelPreTmEdges.IsDef()) {
181  int NDelNodes = 0, NDelEdges = 0;
182  printf("Deleting pre-%s node out-links\n", DelPreTmEdges.GetStr().CStr());
183  for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
184  if (NodeI() < DelPreTmEdges) {
185  const int NodeId = NodeI.GetId();
186  for (int edge = 0; edge < NodeI.GetOutDeg(); edge++) {
187  FullGraph->DelEdge(NodeId, NodeI.GetOutNId(edge)); }
188  NDelEdges += NodeI.GetOutDeg(); NDelNodes++;
189  }
190  }
191  printf(" Deleted %d nodes out-edges (%d edges total).\n", NDelNodes, NDelEdges);
192  FullGraph->Defrag(true);
193  }
194  PGStatVec GrowthStat = TGStatVec::New(TmUnit);
195  TFltV PreDiamSDev, PreEffDiamSDev, WccDiamSDev, WccEffDiamSDev;
196  TIntV NodeIdV;
197  TExeTm ExeTm;
198  TTmBucketV TmBucketV;
199  GetTmBuckets(TmUnit, TmBucketV);
200  for (int t = 0; t < TmBucketV.Len(); t++) {
201  printf("\nGraph: %s (%d / %d) [%s]\n", TmBucketV[t].BegTm.GetTmStr().CStr(),
202  t+1, TmBucketV.Len(), TExeTm::GetCurTm());
203  // up-to-year subgraph
204  NodeIdV.AddV(TmBucketV[t].NIdV); // nodes up to time T
205  if (TmBucketV[t].BegTm < PostTmDiam) { continue; }
206  const PUNGraph PreGraph = TSnap::GetSubGraph(FullGraph, NodeIdV, true);
207  const PUNGraph WccGraph = TSnap::GetMxWcc(PreGraph);
208  TIntV PostYearNIdV, WccPostYearNIdV;
209  for (TUNGraph::TNodeI NI = PreGraph->BegNI(); NI < PreGraph->EndNI(); NI++) {
210  if (GetNDat(NI.GetId()) >= PostTmDiam) {
211  PostYearNIdV.Add(NI.GetId());
212  if (WccGraph->IsNode(NI.GetId())) { WccPostYearNIdV.Add(NI.GetId()); }
213  }
214  }
215  TMom PreDiamMom, PreEffDiamMom, WccDiamMom, WccEffDiamMom;
216  // diameter of PostYearDiam subgraph using whole graph (all edges)
217  int FullDiam; double EffDiam;
218  for (int r = 0; r < NDiamRuns; r++) {
219  if (! PostYearNIdV.Empty()) {
220  TSnap::GetBfsEffDiam(PreGraph, NSamples, PostYearNIdV, false, EffDiam, FullDiam);
221  PreDiamMom.Add(FullDiam); PreEffDiamMom.Add(EffDiam);
222  }
223  if (! WccPostYearNIdV.Empty()) {
224  TSnap::GetBfsEffDiam(WccGraph, NSamples, WccPostYearNIdV, false, EffDiam, FullDiam);
225  WccDiamMom.Add(FullDiam); WccEffDiamMom.Add(EffDiam);
226  }
227  printf(" diam: %d [%s] \r", r+1, ExeTm.GetTmStr()); ExeTm.Tick();
228  }
229  PreDiamMom.Def(); PreEffDiamMom.Def();
230  WccDiamMom.Def(); WccEffDiamMom.Def();
231  // save stat
232  PGStat GraphStatPt = GrowthStat->Add(TmBucketV[t].BegTm);
233  TGStat& GS = *GraphStatPt;
234  GS.TakeBasicStat(PreGraph, false);
235  GS.TakeBasicStat(WccGraph, true);
236  GS.SetVal(gsvFullDiam, PreDiamMom.GetMean()); // mean
237  GS.SetVal(gsvEffDiam, PreEffDiamMom.GetMean());
238  GS.SetVal(gsvFullWccDiam, WccDiamMom.GetMean());
239  GS.SetVal(gsvEffWccDiam, WccEffDiamMom.GetMean());
240  GS.SetVal(gsvFullDiamDev, PreDiamMom.GetSDev()); // variance
241  GS.SetVal(gsvEffDiamDev, PreEffDiamMom.GetSDev());
242  GS.SetVal(gsvFullWccDiamDev, WccDiamMom.GetSDev());
243  GS.SetVal(gsvEffWccDiamDev, WccEffDiamMom.GetSDev());
244  { TFOut FOut("growth."+FNmPref+".gStatVec"); GrowthStat->Save(FOut); }
245  GrowthStat->SaveTxt(FNmPref, TStr::Fmt("%s. MISSING PAST DIAMETER\nDelPreEdges\t%s\nPostYearDiam\t%s\n",
246  Desc.CStr(), DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr()));
247  }
248  // diameter plots
249  //GrowthStat->PlotDiam(FNmPref, Desc + TStr::Fmt(" MISSING PAST. DelPre:%d PostYear:%d.",
250  // DelPreEdges, PostYearDiam));*/
251 }
252 
253 void TTimeNet::PlotCCfOverTm(const TStr& FNmPref, TStr Desc, const TTmUnit& TmUnit, const int& NodesBucket) const {
254  if (Desc.Empty()) { Desc = FNmPref; }
255  TTimeNet::TTmBucketV TmBucketV;
256  TStr XLbl;
257  if (TmUnit == tmuNodes) {
258  XLbl = "Number of nodes (time)";
259  IAssert(NodesBucket > 0);
260  GetNodeBuckets(NodesBucket, TmBucketV); }
261  else {
262  XLbl = TStr::Fmt("Time (%s)", TTmInfo::GetTmUnitStr(TmUnit).CStr());
263  GetTmBuckets(TmUnit, TmBucketV);
264  }
265  TIntV NodeIdV;
266  TFltPrV DegToCCfV, CcfV, OpClV, OpV;
267  TVec<TTuple<TFlt, 4> > OpenClsV;
268  TTuple<TFlt, 4> Tuple;
269  TExeTm ExeTm;
270  int XVal = 0;
271  printf("Clustering coefficient over time:\n %d edges, %d edges per bucket, %d buckets \n", GetEdges(), 100000, TmBucketV.Len());
272  PUNGraph UNGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this));
273  for (int t = 0; t < TmBucketV.Len(); t++) {
274  printf("\r %d/%d: ", t+1, TmBucketV.Len());
275  NodeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
276  int64 Open=0, Close=0;
277  const PUNGraph Graph = TSnap::GetSubGraph(UNGraph, NodeIdV);
278  const double CCf = TSnap::GetClustCf(Graph, DegToCCfV, Open, Close);
279  if (TmUnit == tmuNodes) { XVal = Graph->GetNodes(); }
280  else { XVal = TmBucketV[t].BegTm.GetInUnits(TmUnit); }
281  CcfV.Add(TFltPr(XVal, CCf));
282  double FltOpen = static_cast<double>(Open);
283  double FltClose = static_cast<double>(Close);
284  OpClV.Add(TFltPr(XVal, (Open+Close==0 ? 0.0 : FltClose/(FltOpen+FltClose))));
285  OpV.Add(TFltPr(XVal, (Open==0 ? 0.0 : FltClose/FltOpen)));
286  Tuple[0]=Graph->GetNodes();
287  Tuple[1]=Graph->GetEdges();
288  Tuple[2]=FltClose; Tuple[3]=FltOpen;
289  OpenClsV.Add(Tuple);
290  printf(" %s", ExeTm.GetStr());
291  TGnuPlot::PlotValV(DegToCCfV, TStr::Fmt("ccfAt%02dtm.%s", t+1, FNmPref.CStr()),
292  TStr::Fmt("%s. At time %d. Clustering Coefficient. G(%d,%d)", Desc.CStr(), t+1, Graph->GetNodes(), Graph->GetEdges()),
293  "Degree", "Clustering coefficient", gpsLog10XY, false);
294  }
295  TGnuPlot::PlotValV(CcfV, "ccfOverTm."+FNmPref, Desc+". Average Clustering Coefficient", XLbl, "Average clustering coefficient", gpsAuto, false);
296  TGnuPlot::PlotValV(OpClV, "ClsOpnTr1."+FNmPref, Desc+". Close/(Open+Closed) triads", XLbl, "Close / (Open+Closed) triads", gpsAuto, false);
297  TGnuPlot::PlotValV(OpV, "ClsOpnTr2."+FNmPref, Desc+". Close/Open triads", XLbl, "Close / Open triads", gpsAuto, false);
298  TGnuPlot::SaveTs(OpenClsV, "ClsOpnTr."+FNmPref+".tab", TStr::Fmt("#%s\n#Nodes\tEdges\tClosed\tOpenTriads", Desc.CStr()));
299  printf("\n");
300 }
301 
302 void TTimeNet::PlotMedianDegOverTm(const TStr& FNmPref, const TTmUnit& TmUnit, const int& NodesPerBucket) const {
303  TTimeNet::TTmBucketV TmBucketV;
304  TStr XLbl;
305  if (TmUnit == tmuNodes) {
306  XLbl = "Number of nodes (time)"; IAssert(NodesPerBucket > 0);
307  GetNodeBuckets(NodesPerBucket, TmBucketV); }
308  else {
309  XLbl = TStr::Fmt("Time (%s)", TTmInfo::GetTmUnitStr(TmUnit).CStr());
310  GetTmBuckets(TmUnit, TmBucketV); }
311  printf("\n\n%s\nMedian degree over time:\n %d edges, %d edges per bucket, %d buckets \n", FNmPref.CStr(), GetEdges(), NodesPerBucket, TmBucketV.Len());
312  TFltPrV MedDegV, MedInDegV, MedOutDegV;
313  TIntV NodeIdV;
314  int XVal;
315  PUNGraph UNGraph = TSnap::ConvertGraph<PUNGraph>(PTimeNet((TTimeNet*)this));
316  PNGraph NGraph = TSnap::ConvertGraph<PNGraph>(PTimeNet((TTimeNet*)this));
317  FILE *F = fopen(("gStat-"+FNmPref+".tab").CStr(), "wt");
318  fprintf(F, "UndirNodes\tUndirEdges\tUndirNonZNodes\tMedianDeg\tMeanDeg\tDirNodes\tDirEdges\tDirNonzNodes\tMedInDeg\tMedOutDeg\tMeanInDeg\tMeanOutDeg\n");
319  for (int t = 0; t < TmBucketV.Len(); t++) {
320  printf("\r %d/%d: ", t+1, TmBucketV.Len());
321  NodeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
322  if (TmUnit == tmuNodes) { XVal = NodeIdV.Len(); }
323  else { XVal = TmBucketV[t].BegTm.GetInUnits(TmUnit); }
324  // un graph
325  { const PUNGraph Graph = TSnap::GetSubGraph(UNGraph, NodeIdV); TMom Mom;
326  for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { if (NI.GetOutDeg()>0) { Mom.Add(NI.GetOutDeg());} }
327  Mom.Def(); MedDegV.Add(TFltPr(XVal, Mom.GetMedian()));
328  fprintf(F, "%d\t%d\t%d\t%f\t%f", Graph->GetNodes(), Graph->GetEdges(), TSnap::CntNonZNodes(Graph), (float)Mom.GetMedian(), (float)Mom.GetMean()); }
329  // directed graph
330  { const PNGraph Graph = TSnap::GetSubGraph<PNGraph>(NGraph, NodeIdV); TMom MomOut, MomIn;
331  for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
332  if (NI.GetOutDeg()>0) { MomOut.Add(NI.GetOutDeg()); }
333  if (NI.GetInDeg()>0) { MomIn.Add(NI.GetInDeg()); } }
334  MomOut.Def(); MedOutDegV.Add(TFltPr(XVal, MomOut.GetMedian()));
335  MomIn.Def(); MedInDegV.Add(TFltPr(XVal, MomIn.GetMedian()));
336  fprintf(F, "\t%d\t%d\t%d\t%f\t%f\t%f\t%f\n", Graph->GetNodes(), Graph->GetEdges(), (int)TSnap::CntNonZNodes(Graph), (float)MomIn.GetMedian(), (float)MomOut.GetMedian(), (float)MomIn.GetMean(), (float)MomOut.GetMean()); }
337  }
338  fclose(F);
339  TGnuPlot::PlotValV(MedDegV, "medDeg."+FNmPref, FNmPref+" Median degree", TTmInfo::GetTmUnitStr(TmUnit), "Median degree");
340  TGnuPlot::PlotValV(MedOutDegV, "medOutDeg."+FNmPref, FNmPref+" Median OUT degree", TTmInfo::GetTmUnitStr(TmUnit), "Median OUT degree");
341  TGnuPlot::PlotValV(MedInDegV, "medInDeg."+FNmPref, FNmPref+" Median IN degree", TTmInfo::GetTmUnitStr(TmUnit), "Median IN degree");
342 }
343 
344 // load Arxiv affiliation network (papers to authors bipartite graph)
345 // <set1_id> <time> <set2_id1> <set2_id2> ... (all ids have to be unique)
347  PTimeNet TimeNetPt = TTimeNet::New();
348  TTimeNet& TimeNet = *TimeNetPt;
349  PSs Ss = TSs::LoadTxt(ssfTabSep, InFNm.CStr());
350  TIntH Set1IdH; // paper ids
351  TStrV StrTimeV;
352  for (int y = 0; y < Ss->GetYLen(); y++) {
353  if (Ss->At(0, y)[0] == '#') continue; // skip comments
354  if (Ss->GetXLen(y) < 3) continue; // there must be at least one author
355  const int& SrcId = Ss->At(0, y).GetInt();
356  IAssert(! Set1IdH.IsKey(SrcId));
357  IAssert(! TimeNet.IsNode(SrcId));
358  Set1IdH.AddKey(SrcId);
359  Ss->At(1, y).SplitOnAllCh('-', StrTimeV);
360  const int Year = StrTimeV[0].GetInt();
361  const int Month = StrTimeV[1].GetInt();
362  const int Day = StrTimeV[2].GetInt();
363  const TSecTm NodeTm(Year, Month, Day);
364  TimeNet.AddNode(SrcId, NodeTm);
365  for (int dst = 2; dst < Ss->GetXLen(y); dst++) {
366  const int DstId = Ss->At(dst, y).GetInt();
367  IAssert(! Set1IdH.IsKey(DstId));
368  if (! TimeNet.IsNode(DstId)) { TimeNet.AddNode(DstId, NodeTm); }
369  else { TimeNet.GetNDat(DstId) = TMath::Mn(NodeTm, TimeNet.GetNDat(DstId)); }
370  if (! TimeNet.IsEdge(SrcId, DstId)) { TimeNet.AddEdge(SrcId, DstId); }
371  }
372  }
373  TimeNet.Defrag();
374  printf("Bipartate graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
375  printf(" Bipartate sets: %d nodes --> %d nodes\n", TSnap::CntInDegNodes(TimeNetPt, 0),
376  TSnap::CntOutDegNodes(TimeNetPt, 0));
377  return TimeNetPt;
378 }
379 
380 // load Arxiv paper citation network
381 // PaperFNm: <paper> <time>
382 // CiteFNm: <source> <destination>
383 PTimeNet TTimeNet::LoadArxiv(const TStr& PaperFNm, const TStr& CiteFNm) {
384  TExeTm ExeTm;
385  PTimeNet TimeNetPt = TTimeNet::New();
386  TTimeNet& TimeNet = *TimeNetPt;
387  printf("Arxiv citation graph (paper publication year)...\n");
388  // load time data (file hep-ph-slacdates)
389  char Line [1024];
390  FILE *PprF = fopen(PaperFNm.CStr(), "rt");
391  TStr StrId, StrTime;
392  TStrV StrV, StrTimeV;
393  int N = 0, DuplicateNode = 0;
394  while (! feof(PprF)) {
395  Line[0] = 0;
396  fgets(Line, 1024, PprF);
397  if (strlen(Line) == 0 || Line[0] == '#') continue;
398  Line[strlen(Line)-1] = 0; // delete trailing '\n'
399  TStr(Line).SplitOnWs(StrV); IAssert(StrV.Len() == 2);
400  StrId = StrV[0]; StrTime = StrV[1]; IAssert(!StrId.Empty() && !StrTime.Empty());
401  StrTime.SplitOnAllCh('-', StrTimeV); IAssert(StrTimeV.Len() == 3);
402  const int NodeId = StrId.GetInt();
403  if (! TimeNet.IsNode(NodeId)) {
404  const int Year = StrTimeV[0].GetInt();
405  const int Month = StrTimeV[1].GetInt();
406  const int Day = StrTimeV[2].GetInt();
407  TimeNet.AddNode(NodeId, TSecTm(Year, Month, Day));
408  } else { DuplicateNode++; }
409  if (++N % 10000 == 0) printf("\r %dk", N/1000);
410  }
411  printf("\r %d nodes read. %d duplicate nodes. %s\n", N, DuplicateNode, ExeTm.GetTmStr());
412  fclose(PprF);
413  // load citations (file hep-ph-citations)
414  int NewSrcIds=0, NewDstIds=0, DupLinks=0, NewCits=0;
415  FILE *CiteF = fopen(CiteFNm.CStr(), "rt");
416  N = 0; ExeTm.Tick();
417  printf("Loading Arxiv citations...\n");
418  TIntPrV EdgeV;
419  THash<TInt, TSecTm> NIdToTimeH;
420  while (! feof(CiteF)) {
421  Line[0] = 0;
422  fgets(Line, 1024, CiteF);
423  if (strlen(Line) == 0 || Line[0] == '#') continue;
424  Line[strlen(Line)-1] = 0; // delete trailing '\n'
425  TStr(Line).SplitOnWs(StrV); IAssert(StrV.Len() == 2);
426  const int SrcNId = StrV[0].GetInt();
427  const int DstNId = StrV[1].GetInt();
428  EdgeV.Add(TIntPr(SrcNId, DstNId));
429  // infer time of destination node -- earliest paper that cites the node (paper)
430  if (! TimeNet.IsNode(DstNId) && TimeNet.IsNode(SrcNId)) {
431  const TSecTm& SrcTm = TimeNet.GetNDat(SrcNId);
432  if (! NIdToTimeH.IsKey(DstNId)) {
433  NIdToTimeH.AddDat(DstNId, SrcTm);
434  NewDstIds++;
435  }
436  else if (NIdToTimeH.GetDat(DstNId) < SrcTm) {
437  NIdToTimeH.GetDat(DstNId) = SrcTm; }
438  }
439  if (++N % 10000 == 0) printf("\r %dk", N/1000);
440  }
441  fclose(CiteF);
442  // add infeered time nodes (nodes which are cited by papers with known time)
443  for (int i = 0; i < NIdToTimeH.Len(); i++) {
444  TimeNet.AddNode(NIdToTimeH.GetKey(i), NIdToTimeH[i]);
445  }
446  // add links
447  for (int i = 0; i < EdgeV.Len(); i++) {
448  const int SrcNId = EdgeV[i].Val1;
449  const int DstNId = EdgeV[i].Val2;
450  if (TimeNet.IsNode(SrcNId) && TimeNet.IsNode(DstNId)) {
451  if (! TimeNet.IsEdge(SrcNId, DstNId)) { TimeNet.AddEdge(SrcNId, DstNId); }
452  else { DupLinks++; }
453  } else {
454  if (! TimeNet.IsNode(SrcNId)) {
455  NewSrcIds++;
456  if (! TimeNet.IsNode(DstNId)) { NewCits++; }
457  }
458  }
459  }
460  printf("\r %d citations read. %s\n", N, ExeTm.GetTmStr());
461  printf("Graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
462  printf("Removing 0-degree nodes: %d nodes\n", TSnap::CntDegNodes(TimeNetPt, 0));
463  TIntV RmNIdV;
464  for (TTimeNet::TNodeI ni = TimeNet.BegNI(); ni < TimeNet.EndNI(); ni++) {
465  if (ni.GetDeg() == 0) { RmNIdV.Add(ni.GetId()); }
466  }
467  for (int i = 0; i < RmNIdV.Len(); i++) {
468  TimeNet.DelNode(RmNIdV[i]);
469  }
470  TimeNet.Defrag(true);
471  printf("\nFinal graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
472  printf(" Duplicate citations : %d\n", DupLinks);
473  printf(" Nodes without time which are cited : %d (add them to graph, use time of the earliest source node)\n", NewDstIds);
474  printf(" Citations between unknown time nodes : %d\n", NewCits);
475  printf(" Nodes without time which make citations: %d (do not add them into the graph)\n", NewSrcIds);
476  return TimeNetPt;
477 }
478 
479 // Patent citation network. Time units are seconds even though they mean years
480 PTimeNet TTimeNet::LoadPatents(const TStr& PatentFNm, const TStr& CiteFNm) {
481  int N = 0;
482  TExeTm ExeTm;
483  PTimeNet TimeNetPt = TTimeNet::New();
484  TTimeNet& TimeNet = *TimeNetPt;
485  TimeNet.Reserve(4000000, 160000000);
486  printf("parsing patent data (patent grant year)...\n");
487  // load time data (file pat63_99.txt)
488  const int& PatIdCol = 0;
489  const int& GYearCol = 1;
490  TStrV ColV;
491  char Line [1024];
492  FILE *PatF = fopen(PatentFNm.CStr(), "rt");
493  fgets(Line, 1024, PatF); // skip 1st line
494  while (! feof(PatF)) {
495  Line[0] = 0;
496  fgets(Line, 1024, PatF);
497  if (strlen(Line) == 0) break;
498  TStr(Line).SplitOnAllCh(',', ColV, false);
499  IAssert(ColV.Len() == 23);
500  const int PatentId = ColV[PatIdCol].GetInt();
501  const int GrantYear = ColV[GYearCol].GetInt();
502  IAssert(! TimeNet.IsNode(PatentId));
503  TimeNet.AddNode(PatentId, TSecTm(GrantYear)); // pretend year is a second
504  if (++N % 100000 == 0) printf("\r %dk", N/1000);
505  }
506  printf("\r %d patents read. %s\n", N, ExeTm.GetTmStr());
507  fclose(PatF);
508  // load citations (file cite75_99.txt)
509  printf("\nLoading patent citations...\n");
510  int NewSrcIds=0, NewDstIds=0, DupLinks=0, NewCits=0;
511  N = 0; ExeTm.Tick();
512  TStr SrcId, DstId;
513  FILE *CiteF = fopen(CiteFNm.CStr(), "rt");
514  fgets(Line, 1024, CiteF); // skip 1st line
515  while (! feof(CiteF)) {
516  Line[0] = 0;
517  fgets(Line, 1024, CiteF);
518  if (strlen(Line) == 0) break;
519  Line[strlen(Line)-1] = 0; // delete trailing '\n'
520  TStr(Line).SplitOnCh(SrcId, ',', DstId);
521  const int SrcNId = SrcId.GetInt();
522  const int DstNId = DstId.GetInt();
523  if (! TimeNet.IsNode(SrcNId) && ! TimeNet.IsNode(DstNId)) {
524  //TimeNet.AddNode(SrcNId, TSecTm(1, 1, 1)); NewSrcIds++;
525  //TimeNet.AddNode(DstNId, TSecTm(1, 1, 1)); NewDstIds++;
526  NewCits++;
527  continue;
528  }
529  else if (TimeNet.IsNode(SrcNId) && ! TimeNet.IsNode(DstNId)) {
530  TimeNet.AddNode(DstNId, TimeNet.GetNDat(SrcNId)); NewDstIds++;
531  }
532  else if (! TimeNet.IsNode(SrcNId) && TimeNet.IsNode(DstNId)) {
533  TimeNet.AddNode(SrcNId, TimeNet.GetNDat(DstNId)); NewSrcIds++;
534  }
535  if (! TimeNet.IsEdge(SrcNId, DstNId)) {
536  TimeNet.AddEdge(SrcNId, DstNId);
537  } else { DupLinks++; }
538  if (++N % 100000 == 0) printf("\r %dk", N/1000);
539  }
540  fclose(CiteF);
541  printf("\r %d citations read. %s\n\n", N, ExeTm.GetTmStr());
542  printf("Graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
543  printf("Removing 0-degree nodes: %d nodes\n", TSnap::CntDegNodes(TimeNetPt, 0));
544  TIntV RmNIdV;
545  for (TTimeNet::TNodeI ni = TimeNet.BegNI(); ni < TimeNet.EndNI(); ni++) {
546  if (ni.GetDeg() == 0) { RmNIdV.Add(ni.GetId()); }
547  }
548  for (int i = 0; i < RmNIdV.Len(); i++) {
549  TimeNet.DelNode(RmNIdV[i]);
550  }
551  TimeNet.Defrag(true);
552  printf("\nFinal graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
553  printf(" Duplicate citations : %d\n", DupLinks);
554  printf(" Citations between unknown time nodes : %d\n", NewCits);
555  printf(" Nodes without time which make citations: %d\n", NewSrcIds);
556  printf(" Nodes without time which are cited : %d\n", NewDstIds);
557  return TimeNetPt;
558 }
559 
560 // Amazon ShareTheLove: nodes are people, we add an edge when there is a first
561 // recommendation between 2 people (we count each edge only once (regardless
562 // of how many recommendations there were between 2 persons)
564  PTimeNet TimeNetPt = TTimeNet::New();
565  TTimeNet& TimeNet = *TimeNetPt;
566  TimeNet.Reserve(3953993, -1);
567  printf("Amazon Share-the-Love...\n");
568  char line [2024], MonthStr[4];
569  int NLines=0;
570  TStrV ColV;
571  FILE *F = fopen(StlFNm.CStr(), "rt");
572  while (! feof(F)) {
573  memset(line, 0, 2024);
574  fgets(line, 2024, F);
575  if (strlen(line) == 0) break;
576  TStr(line).SplitOnAllCh(',', ColV);
577  const int SrcNId = ColV[0].GetInt();
578  const int DstNId = ColV[1].GetInt();
579  // time data
580  TStr TmStr = ColV[2]; // time-format: 29JAN02:21:55:23
581  int Year = TmStr.GetSubStr(5, 6).GetInt();
582  if (Year < 10) { Year += 2000; } else { Year += 1900; }
583  MonthStr[0]=toupper(TmStr[2]); MonthStr[1]=tolower(TmStr[3]);
584  MonthStr[2]=tolower(TmStr[4]); MonthStr[3]=0;
585  const int Month = TTmInfo::GetMonthN(MonthStr, lUs);
586  const int Day = TmStr.GetSubStr(0, 1).GetInt();
587  const int Hour = TmStr.GetSubStr(8, 9).GetInt();
588  const int Min = TmStr.GetSubStr(11, 12).GetInt();
589  const int Sec = TmStr.GetSubStr(14, 15).GetInt();
590  // add nodes and links
591  if (! TimeNet.IsNode(SrcNId)) { TimeNet.AddNode(SrcNId, TSecTm(Year, Month, Day, Hour, Min, Sec)); }
592  if (! TimeNet.IsNode(DstNId)) { TimeNet.AddNode(DstNId, TSecTm(Year, Month, Day, Hour, Min, Sec)); }
593  if (! TimeNet.IsEdge(SrcNId, DstNId)) { TimeNet.AddEdge(SrcNId, DstNId); }
594  if (++NLines % 100000 == 0) printf("\r %dk", NLines/1000);
595  }
596  fclose(F);
597  printf("\r %d lines read\n", NLines);
598  printf("Graph: nodes: %d edges: %d\n", TimeNet.GetNodes(), TimeNet.GetEdges());
599  TimeNet.Defrag(true);
600  return TimeNetPt;
601 }
602 
604 // Time Node-Edge Network
606  if (this != &TimeNet) {
607  TNet::operator=(TimeNet);
608  }
609  return *this;
610 }
611 
613  PTimeNet NewNet = TTimeNet::New();
614  NewNet->Reserve(GetNodes(), -1);
615  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
616  NewNet->AddNode(NI.GetId(), NI.GetDat());
617  }
618  for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) {
619  const int src = EI.GetSrcNId();
620  const int dst = EI.GetDstNId();
621  if (! NewNet->IsEdge(src, dst)) {
622  NewNet->AddEdge(src, dst); }
623  }
624  NewNet->Defrag();
625  return NewNet;
626 }
627 
628 // only take earliest edge between the nodes
630  PTimeNENet Net = TTimeNENet::New();
631  Net->Reserve(GetNodes(), -1);
632  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
633  Net->AddNode(NI.GetId(), NI.GetDat()); }
634  TIntV EIdV; GetEIdByTm(EIdV);
635  TIntPrSet EdgeSet(GetEdges());
636  for (int edge = 0; edge < EIdV.Len(); edge++) {
637  const TEdgeI EI = GetEI(EIdV[edge]);
638  const int Src = EI.GetSrcNId();
639  const int Dst = EI.GetDstNId();
640  if (Src==Dst || EdgeSet.IsKey(TIntPr(TMath::Mn(Src, Dst), TMath::Mx(Src, Dst)))) { continue; } // take only 1st edge
641  EdgeSet.AddKey(TIntPr(TMath::Mn(Src, Dst), TMath::Mx(Src, Dst)));
642  Net->AddEdge(EI);
643  }
644  return Net;
645 }
646 
648  PTimeNENet NewNetPt = TTimeNENet::New();
649  TTimeNENet& NewNet = *NewNetPt;
650  NewNet.Reserve(NIdV.Len(), -1);
651  int node, edge;
652  TNodeI NI;
653  for (node = 0; node < NIdV.Len(); node++) {
654  NewNet.AddNode(NIdV[node], GetNDat(NIdV[node]));
655  }
656  for (node = 0; node < NIdV.Len(); node++) {
657  NI = GetNI(NIdV[node]);
658  for (edge = 0; edge < NI.GetOutDeg(); edge++) {
659  const TEdgeI EI = GetEI(NI.GetOutEId(edge));
660  if (NewNet.IsNode(EI.GetDstNId())) {
661  NewNet.AddEdge(EI); }
662  }
663  }
664  NewNet.Defrag();
665  return NewNetPt;
666 }
667 
669  PTimeNENet NewNetPt = TTimeNENet::New();
670  TTimeNENet& NewNet = *NewNetPt;
671  NewNet.Reserve(-1, EIdV.Len());
672  for (int edge = 0; edge < EIdV.Len(); edge++) {
673  const TEdgeI Edge = GetEI(EIdV[edge]);
674  if (! NewNet.IsNode(Edge.GetSrcNId()))
675  NewNet.AddNode(GetNI(Edge.GetSrcNId()));
676  if (! NewNet.IsNode(Edge.GetDstNId()))
677  NewNet.AddNode(GetNI(Edge.GetDstNId()));
678  NewNet.AddEdge(Edge);
679  }
680  NewNet.Defrag();
681  return NewNetPt;
682 }
683 
684 // assumes that the edges of the network are sorted in time
686  PTimeNENet NewNetPt = TTimeNENet::New();
687  TTimeNENet& NewNet = *NewNetPt;
688  TSecTm PrevTm;
689  for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) {
690  if (EI() > MaxEdgeTm) { break; }
691  if (! NewNet.IsNode(EI.GetSrcNId()))
692  NewNet.AddNode(GetNI(EI.GetSrcNId()));
693  if (! NewNet.IsNode(EI.GetDstNId()))
694  NewNet.AddNode(GetNI(EI.GetDstNId()));
695  NewNet.AddEdge(EI);
696  IAssert(! PrevTm.IsDef() || PrevTm <= EI()); // edge times must be sorted
697  PrevTm = EI();
698  }
699  NewNet.Defrag();
700  return NewNetPt;
701 }
702 
704  NodeH.SortByDat(true);
705  EdgeH.SortByDat(true);
706 }
707 
708 // node time must be smaller than times of incoming or outgoing edges
710  int Cnt = 0;
711  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
712  TSecTm& NodeTm = NI();
713  for (int edge = 0; edge < NI.GetOutDeg(); edge++) {
714  const TSecTm& EdgeTm = GetEDat(NI.GetOutEId(edge));
715  if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
716  }
717  for (int edge = 0; edge < NI.GetInDeg(); edge++) {
718  const TSecTm& EdgeTm = GetEDat(NI.GetInEId(edge));
719  if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
720  }
721  }
722  printf("Update node times: %d/%d updates\n", Cnt, GetNodes());
723 }
724 
726  int Cnt = 0;
727  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
728  if (NI.GetDeg() == 0) { continue; }
729  TSecTm NodeTm;
730  for (int edge = 0; edge < NI.GetOutDeg(); edge++) {
731  const TSecTm& EdgeTm = GetEDat(NI.GetOutEId(edge)); IAssert(EdgeTm.IsDef());
732  if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
733  }
734  for (int edge = 0; edge < NI.GetInDeg(); edge++) {
735  const TSecTm& EdgeTm = GetEDat(NI.GetInEId(edge)); IAssert(EdgeTm.IsDef());
736  if (! NodeTm.IsDef() || EdgeTm < NodeTm) { NodeTm = EdgeTm; Cnt++; }
737  }
738  GetNDat(NI.GetId()) = NodeTm;
739  }
740  printf("Node times set: %d/%d updates\n", Cnt, GetNodes());
741 }
742 
743 // shuffle edge arrivals
744 void TTimeNENet::SetRndEdgeTimes(const int& MinTmEdge) {
745  printf("Shuffling last %d (%d%%) edge arrival times..\n", GetEdges()-MinTmEdge, int(100.0*(GetEdges()-MinTmEdge)/double(GetEdges())));
746  TIntV RndEIdV; GetEIdByTm(RndEIdV);
747  TIntV TrueEIdV = RndEIdV;
748  TSecTmV TrueTmV;
749  const int SwapLen = RndEIdV.Len()-MinTmEdge;
750  for (int R = 0; R < 10; R++) {
751  for (int i = MinTmEdge; i < RndEIdV.Len(); i++) {
752  RndEIdV.Swap(TInt::Rnd.GetUniDevInt(SwapLen)+MinTmEdge, TInt::Rnd.GetUniDevInt(SwapLen)+MinTmEdge); }
753  }
754  for (int e = 0; e < TrueEIdV.Len(); e++) {
755  TrueTmV.Add(GetEDat(TrueEIdV[e])); }
756  for (int e = 0; e < RndEIdV.Len(); e++) {
757  GetEDat(RndEIdV[e]) = TrueTmV[e]; }
758  UpdateNodeTimes();
759 }
760 
762  TSecTm MnNodeTm, MxNodeTm;
763  TSecTm MnEdgeTm, MxEdgeTm;
764  for (TNodeI NI = BegNI(); NI < EndNI(); NI++) {
765  const TSecTm NodeTm = NI();
766  if (! MnNodeTm.IsDef() || MnNodeTm>NodeTm) { MnNodeTm = NodeTm; }
767  if (! MxNodeTm.IsDef() || MxNodeTm<NodeTm) { MxNodeTm = NodeTm; }
768  }
769  printf("Node times:\n %s\n %s\n", MnNodeTm.GetStr().CStr(), MxNodeTm.GetStr().CStr());
770  for (TEdgeI EI= BegEI(); EI < EndEI(); EI++) {
771  const TSecTm EdgeTm = EI();
772  if (! MnEdgeTm.IsDef() || MnEdgeTm>EdgeTm) { MnEdgeTm = EdgeTm; }
773  if (! MxEdgeTm.IsDef() || MxEdgeTm<EdgeTm) { MxEdgeTm = EdgeTm; }
774  }
775  printf("Edge times:\n %s\n %s\n", MnEdgeTm.GetStr().CStr(), MxEdgeTm.GetStr().CStr());
776 }
777 
778 void TTimeNENet::GetNIdByTm(TIntV& NIdV) const {
779  TVec<TKeyDat<TSecTm, TInt> > TmToNIdV(GetNodes(), 0);
780  for (TNodeI NodeI = BegNI(); NodeI < EndNI(); NodeI++) {
781  TmToNIdV.Add(TKeyDat<TSecTm, TInt>(NodeI.GetDat(), NodeI.GetId())); }
782  TmToNIdV.Sort();
783  NIdV.Gen(GetNodes(), 0);
784  for (int i = 0; i < TmToNIdV.Len(); i++) {
785  NIdV.Add(TmToNIdV[i].Dat); }
786 }
787 
788 void TTimeNENet::GetEIdByTm(TIntV& EIdV) const {
789  TVec<TKeyDat<TSecTm, TInt> > TmToEIdV(GetEdges(), 0);
790  for (TEdgeI EI= BegEI(); EI < EndEI(); EI++) {
791  TmToEIdV.Add(TKeyDat<TSecTm, TInt>(EI.GetDat(), EI.GetId())); }
792  TmToEIdV.Sort();
793  EIdV.Gen(GetEdges(), 0);
794  for (int i = 0; i < TmToEIdV.Len(); i++) {
795  EIdV.Add(TmToEIdV[i].Dat); }
796 }
797 
798 void TTimeNENet::GetTmBuckets(const TTmUnit& TmUnit, TTimeNet::TTmBucketV& TmBucketV) const {
799  THash<TInt, TIntV> TmIdToNIdVH;
800  TIntV NIdV; GetNIdByTm(NIdV);
801  for (int n = 0; n < NIdV.Len(); n++) {
802  const int TmId = GetNDat(NIdV[n]).Round(TmUnit).GetAbsSecs();
803  if (! TmIdToNIdVH.IsKey(TmId)) { TmIdToNIdVH.AddKey(TmId); }
804  TmIdToNIdVH.GetDat(TmId).Add(NIdV[n]);
805  }
806  TVec<TPair<TInt, TIntV> > TmIdNIdVV;
807  TmIdToNIdVH.GetKeyDatPrV(TmIdNIdVV);
808  TmIdNIdVV.Sort();
809  TmBucketV.Gen(TmIdNIdVV.Len());
810  for (int i = 0; i < TmIdNIdVV.Len(); i++) {
811  TTimeNet::TTmBucket& Bucket = TmBucketV[i];
812  Bucket.BegTm = TmIdNIdVV[i].Val1;
813  Bucket.NIdV = TmIdNIdVV[i].Val2;
814  }
815 }
816 
817 void TTimeNENet::GetEdgeTmBuckets(const TTmUnit& TmUnit, TTimeNet::TTmBucketV& TmBucketV) const {
818  THash<TInt, TIntV> TmIdToEIdVH;
819  TIntV EIdV; GetEIdByTm(EIdV);
820  for (int e = 0; e < EIdV.Len(); e++) {
821  const int TmId = GetEDat(EIdV[e]).Round(TmUnit).GetAbsSecs();
822  if (! TmIdToEIdVH.IsKey(TmId)) { TmIdToEIdVH.AddKey(TmId); }
823  TmIdToEIdVH.GetDat(TmId).Add(EIdV[e]);
824  }
825  TVec<TPair<TInt, TIntV> > TmIdEIdVV;
826  TmIdToEIdVH.GetKeyDatPrV(TmIdEIdVV);
827  TmIdEIdVV.Sort();
828  TmBucketV.Gen(TmIdEIdVV.Len());
829  for (int i = 0; i < TmIdEIdVV.Len(); i++) {
830  TTimeNet::TTmBucket& Bucket = TmBucketV[i];
831  Bucket.BegTm = TmIdEIdVV[i].Val1;
832  Bucket.NIdV = TmIdEIdVV[i].Val2;
833  }
834 }
835 
836 void TTimeNENet::GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const {
837  TIntV NIdV; GetNIdByTm(NIdV);
838  TmBucketV.Gen(NIdV.Len() / NodesPerBucket + 1, 0);
839  for (int i = 0; i < NIdV.Len(); i++) {
840  const int b = i/NodesPerBucket;
841  if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); }
842  TmBucketV[b].NIdV.Add(NIdV[i]);
843  }
844 }
845 
846 void TTimeNENet::GetEdgeBuckets(const int EdgesPerBucket, TTimeNet::TTmBucketV& TmBucketV) const {
847  TIntV EIdV; GetEIdByTm(EIdV);
848  TmBucketV.Gen(EIdV.Len()/EdgesPerBucket + 1, 0);
849  for (int i = 0; i < EIdV.Len(); i++) {
850  const int b = i/EdgesPerBucket;
851  if (TmBucketV.Len() <= b) { TmBucketV.Add(TTimeNet::TTmBucket(TSecTm(b))); }
852  TmBucketV[b].NIdV.Add(EIdV[i]);
853  }
854 }
855 
856 // get edges that close triangles over time
857 int TTimeNENet::GetTriadEdges(TIntV& TriadEIdV) const {
858  PUNGraph Graph = TUNGraph::New(GetNodes(), GetEdges());
859  TIntV EIdV; GetEIdByTm(EIdV);
860  TriadEIdV.Clr();
861  TExeTm ExeTm;
862  for (int edge = 0; edge < EIdV.Len(); edge++) {
863  const TEdgeI EI = GetEI(EIdV[edge]);
864  const int Src = EI.GetSrcNId();
865  const int Dst = EI.GetDstNId();
866  if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge
867  if (! Graph->IsNode(Src)) { Graph->AddNode(Src); }
868  if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); }
869  if (TSnap::GetCmnNbrs(Graph, Src, Dst) > 0) { TriadEIdV.Add(EIdV[edge]); }
870  Graph->AddEdge(Src, Dst);
871  if (edge % 10000 == 0) {
872  printf("\redges %dk / %dk: triangle edges: %dk [total %s]", edge/1000, EIdV.Len()/1000,
873  TriadEIdV.Len()/1000, ExeTm.GetStr()); }
874  }
875  return Graph->GetEdges();
876 }
877 
878 PGStatVec TTimeNENet::TimeGrowth(const TTmUnit& TimeStep, const TFSet& TakeStat, const TSecTm& StartTm) const {
879  TExeTm ExeTm;
880  PGStatVec GStatVec = TGStatVec::New(TimeStep, TakeStat);
881  TTimeNet::TTmBucketV TmBucketV;
882  GetEdgeTmBuckets(TimeStep, TmBucketV);
883  const PNEGraph FullGraph = TSnap::ConvertGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this));
884  TIntV EdgeIdV;
885  for (int t = 0; t < TmBucketV.Len(); t++) {
886  EdgeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
887  printf("\n***%d/%d: %s (%d edges) ", t+1, TmBucketV.Len(), TmBucketV[t].BegTm.GetStr().CStr(), EdgeIdV.Len()); ExeTm.Tick();
888  if (TmBucketV[t].BegTm < StartTm) { continue; }
889  const PNEGraph PreGraph = TSnap::GetESubGraph(FullGraph, EdgeIdV);
890  GStatVec->Add(PreGraph, TmBucketV[t].BegTm);
891  printf(" [%s]\n", ExeTm.GetTmStr());
892  //{ TFOut FOut("LinkedIn.GStatVec"); GStatVec->Save(FOut); }
893  }
894  return GStatVec;
895 }
896 
897 // take network from last TakeNTmUnits and measure properties
898 PGStatVec TTimeNENet::TimeGrowth(const TStr& FNmPref, const TStr& Desc, const TFSet& TakeStat, const int& NDiamRuns,
899  const TTmUnit& TmUnit, const int& TakeNTmUnits, const bool& LinkBWays) const {
900  TGStat::NDiamRuns = NDiamRuns;
901  PGStatVec GrowthStat = TGStatVec::New(TmUnit, TakeStat);
902  TTimeNet::TTmBucketV TmBucketV;
903  GetEdgeTmBuckets(TmUnit, TmBucketV);
904  TIntV EdgeIdV;
905  TExeTm ExeTm;
906  for (int t = 0; t < TmBucketV.Len(); t++) {
907  // take graph over last TakeNTmUnits time units
908  if (TakeNTmUnits == -1) {
909  EdgeIdV.AddV(TmBucketV[t].NIdV); }
910  else {
911  if (t < TakeNTmUnits) { continue; }
912  EdgeIdV.Clr(false);
913  for (int i = t-TakeNTmUnits; i < t; i++) { EdgeIdV.AddV(TmBucketV[i].NIdV); }
914  }
915  printf("*** %s (%d edges)\n", TmBucketV[t].BegTm.GetStr().CStr(), EdgeIdV.Len()); ExeTm.Tick();
916  PNEGraph PreGraph = TSnap::ConvertESubGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this), EdgeIdV);
917  if (LinkBWays) {
918  TIntV KeepEIdV; // keep only nodes that have in- and out-links (send and receive email)
919  for (TNEGraph::TEdgeI EI = PreGraph->BegEI(); EI < PreGraph->EndEI(); EI++) {
920  if (PreGraph->IsEdge(EI.GetDstNId(), EI.GetSrcNId(), true)) { KeepEIdV.Add(EI.GetId()); }
921  }
922  PreGraph = TSnap::GetESubGraph(PreGraph, KeepEIdV);
923  }
924  // take statistics
925  GrowthStat->Add(PreGraph, TmBucketV[t].BegTm);
926  { TFOut FOut(TStr::Fmt("growth.%s.gStatVec", FNmPref.CStr()));
927  GrowthStat->Save(FOut); }
928  GrowthStat->SaveTxt(FNmPref, Desc);
929  printf(" [%s]\n", ExeTm.GetTmStr());
930  }
931  return GrowthStat;
932 }
933 
934 void TTimeNENet::PlotEffDiam(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
935  const TSecTm& StartTm, const int& NDiamRuns, const bool& OnlyWcc) const {
936  TTimeNet::TTmBucketV TmBucketV;
937  GetEdgeTmBuckets(TmUnit, TmBucketV);
938  PNEGraph FullGraph = TSnap::ConvertGraph<PNEGraph>(PTimeNENet((TTimeNENet*)this));
939  TIntV EdgeIdV;
940  TExeTm ExeTm, Run1Tm;
941  TFltTrV TmDiamV, NdsDiamV;
942  for (int t = 0; t < TmBucketV.Len(); t++) {
943  EdgeIdV.AddV(TmBucketV[t].NIdV); // edges up to time T
944  printf("\n*** %s (%d edges)\n", TmBucketV[t].BegTm.GetStr(TmUnit).CStr(), EdgeIdV.Len()); ExeTm.Tick();
945  if (TmBucketV[t].BegTm < StartTm) continue;
946  PNGraph PreGraph = TSnap::ConvertESubGraph<PNGraph>(FullGraph, EdgeIdV);
947  TMom Mom;
948  double EffDiam = 0.0;
949  for (int r = 0; r < NDiamRuns; r++) {
950  printf("%d...", r+1); Run1Tm.Tick();
951  if (OnlyWcc) { EffDiam = TSnap::GetAnfEffDiam(TSnap::GetMxWcc(PreGraph)); }
952  else { EffDiam = TSnap::GetAnfEffDiam(PreGraph); }
953  Mom.Add(EffDiam);
954  printf("[%s]\r", Run1Tm.GetTmStr());
955  }
956  Mom.Def();
957  TmDiamV.Add(TFltTr(TmBucketV[t].BegTm.Round(TmUnit).GetAbsSecs(), Mom.GetMean(), Mom.GetSDev()));
958  NdsDiamV.Add(TFltTr(PreGraph->GetNodes(), Mom.GetMean(), Mom.GetSDev()));
959  NdsDiamV.Sort();
960  printf(" [%s] \n", ExeTm.GetTmStr());
961  const TStr WccStr = OnlyWcc ? "WCC " : TStr::GetNullStr();
962  { TGnuPlot GnuPlot("diamEff1."+FNmPref, TStr::Fmt("%s. G(%d, %d). %d RUNS.", Desc.CStr(), GetNodes(), GetEdges(), NDiamRuns));
963  GnuPlot.SetXYLabel(TStr::Fmt("TIME [%s]", TTmInfo::GetTmUnitStr(TmUnit).CStr()), "AVERAGE "+WccStr+"Effective Diameter");
964  GnuPlot.AddErrBar(TmDiamV, "", "");
965  GnuPlot.SavePng(); }
966  { TGnuPlot GnuPlot("diamEff2."+FNmPref, TStr::Fmt("%s. G(%d, %d). %d RUNS.", Desc.CStr(), GetNodes(), GetEdges(), NDiamRuns));
967  GnuPlot.SetXYLabel("NODES", "AVERAGE "+WccStr+"Effective Diameter");
968  GnuPlot.AddErrBar(NdsDiamV, "", "");
969  GnuPlot.SavePng(); }
970  }
971 }
972 
973 // pretend we only have link data starting in PostTmDiam
974 // compare the diameter of the nodes after PostTmDiam with diameter
975 // of the nodes after PostTmDiam but *also* using nodes and edges
976 // from before PostTmDiam
977 void TTimeNENet::PlotMissingPast(const TStr& FNmPref, const TStr& Desc, const TTmUnit& TmUnit,
978  const TSecTm& DelPreTmEdges, const TSecTm& PostTmDiam, const bool& LinkBWays) {
979  printf("\nGrowth over time: degree distribution, Growth Power Law, Diameter.\n %s group by %s.\n",
980  FNmPref.CStr(), TTmInfo::GetTmUnitStr(TmUnit).CStr());
981  printf(" Delete out-edges of pre time %s nodes.\n Take subgraph of post year %s subgraph.\n\n",
982  DelPreTmEdges.GetStr().CStr(), PostTmDiam.GetStr().CStr());
983  // run diameter
984  /*const int NSamples = 100;
985  const int NDiamRuns = 10;
986  // delete past
987  if (DelPreEdges != -1) {
988  TIntV EdgeIdV;
989  printf("Deleting pre-year %d edges\n", DelPreEdges);
990  for (TEdgeI EI = BegEI(); EI < EndEI(); EI++) {
991  if (EI().GetYear() < DelPreEdges) { EdgeIdV.Add(EI.GetId()); } }
992  for (int e = 0; e < EdgeIdV.Len(); e++) { DelEdge(EdgeIdV[e]); }
993  printf(" Deleted %d edges (out of %d edges total).\n", EdgeIdV.Len(), GetEdges());
994  }
995  PNEGraph FullGraph = GetNEGraph();
996  TSnap::DelZeroDegNodes(FullGraph);
997 
998  PGrowthStat GrowthStat = TGrowthStat::New(TmUnit, TGraphStat::AllStat());
999  TFltV PreDiamSDev, PreEffDiamSDev, WccDiamSDev, WccEffDiamSDev;
1000  TIntV EdgeIdV;
1001  TExeTm ExeTm;
1002  TTimeNet::TTmBucketV EdgeTmBucketV;
1003  GetEdgeTmBuckets(TmUnit, EdgeTmBucketV);
1004  for (int t = 0; t < EdgeTmBucketV.Len(); t++) {
1005  printf("\nGraph: %s (%d / %d) [%s]\n", EdgeTmBucketV[t].BegTm.GetTmStr().CStr(),
1006  t+1, EdgeTmBucketV.Len(), TExeTm::GetCurTm());
1007  // up-to-year subgraph
1008  EdgeIdV.AddV(EdgeTmBucketV[t].NIdV); // nodes up to time T
1009  if (EdgeTmBucketV[t].BegTm.GetYear() < PostYearDiam) continue;
1010 
1011  const PNEGraph PreNEGraph = EdgeBothWays ? FullGraph->GetEdgeSubGraph(EdgeIdV) : FullGraph;
1012  PNGraph PreGraph = PreNEGraph->GetBWayGraph();
1013  PNGraph WccGraph = TSnap::GetMxWcc(PreGraph);
1014 
1015  // find nodes that are not in missing past
1016  TIntV PostYearNIdV, WccPostYearNIdV;
1017  THash<TIntPr, TInt> PostYearEdgeH;
1018  for (TNEGraph::TEdgeI EI = PreNEGraph->BegEI(); EI < PreNEGraph->EndEI(); EI++) {
1019  if (GetEDat(EI.GetId()).GetYear() >= PostYearDiam) {
1020  PostYearEdgeH.AddKey(TIntPr(EI.GetSrcNId(), EI.GetDstNId())); }
1021  }
1022  TIntH PostYearNIdH;
1023  for (int i = 0; i < PostYearEdgeH.Len(); i++) {
1024  if ((! EdgeBothWays) || (EdgeBothWays && PostYearEdgeH.IsKey(TIntPr(PostYearEdgeH.GetKey(i).Val2, PostYearEdgeH.GetKey(i).Val1)))) { //reverse edge exists
1025  PostYearNIdH.AddKey(PostYearEdgeH.GetKey(i).Val1);
1026  PostYearNIdH.AddKey(PostYearEdgeH.GetKey(i).Val2);
1027  }
1028  }
1029  PostYearNIdH.GetKeyV(PostYearNIdV);
1030  for (int i = 0; i < PostYearNIdV.Len(); i++) {
1031  if (WccGraph->IsNode(PostYearNIdV[i])) {
1032  WccPostYearNIdV.Add(PostYearNIdV[i]); }
1033  }
1034 
1035  // diameter of PostYearDiam subgraph using whole graph (all edges)
1036  TMom PreDiamMom, PreEffDiamMom, WccDiamMom, WccEffDiamMom;
1037  for (int r = 0; r < NDiamRuns; r++) {
1038  if (! PostYearNIdV.Empty()) {
1039  PreDiamMom.Add(-1); //PreDiamMom.Add(TSnap::GetBfsFullDiam(PreGraph, NSamples, PostYearNIdV, false));
1040  PreEffDiamMom.Add(TSnap::GetBfsEffDiam(PreGraph, NSamples, PostYearNIdV, false));
1041  }
1042  if (! WccPostYearNIdV.Empty()) {
1043  //WccDiamMom.Add(TSnap::GetBfsFullDiam(WccGraph, NSamples, WccPostYearNIdV, false));
1044  //WccEffDiamMom.Add(TSnap::GetBfsEffDiam(WccGraph, NSamples, WccPostYearNIdV, false));
1045  WccDiamMom.Add(-1); WccEffDiamMom.Add(-1);
1046  }
1047  printf(" diam: %d [%s] %.2f\r", r+1, ExeTm.GetTmStr(), PreEffDiamMom.GetValV().Last().Val); ExeTm.Tick();
1048  }
1049  PreDiamMom.Def(); PreEffDiamMom.Def();
1050  WccDiamMom.Def(); WccEffDiamMom.Def();
1051  // save stat
1052  PGraphStat GraphStatPt = GrowthStat->Add(EdgeTmBucketV[t].BegTm);
1053  TGraphStat& GraphStat = *GraphStatPt;
1054  GraphStat.Nodes = PreGraph->GetNodes();
1055  GraphStat.NonZNodes = PreGraph->GetNodes() - TSnap::CntDegNodes<PNGraph>(PreGraph, 0);
1056  GraphStat.Srcs = GraphStat.Nodes - TSnap::CntOutDegNodes<PNGraph>(PreGraph, 0);
1057  GraphStat.Edges = PreGraph->GetEdges();
1058  GraphStat.WccNodes= WccGraph->GetNodes();
1059  GraphStat.WccEdges = WccGraph->GetEdges();
1060  GraphStat.FullDiam = PreDiamMom.GetMean(); // mean
1061  GraphStat.EffDiam = PreEffDiamMom.GetMean();
1062  GraphStat.FullWccDiam = WccDiamMom.GetMean();
1063  GraphStat.EffWccDiam = WccEffDiamMom.GetMean();
1064  GraphStat.FullDiamDev = PreDiamMom.GetSDev(); // variance
1065  GraphStat.EffDiamDev = PreEffDiamMom.GetSDev();
1066  GraphStat.FullWccDiamDev = WccDiamMom.GetSDev();
1067  GraphStat.EffWccDiamDev = WccEffDiamMom.GetSDev();
1068  { TFOut FOut("growth."+FNmPref+".bin");
1069  GrowthStat->Save(FOut); }
1070  const TStr BigDesc = TStr::Fmt("%s. MISSING PAST DIAMETER\nDelPreEdges\t%d\nPostYearDiam\t%d\n",
1071  Desc.CStr(), DelPreEdges, PostYearDiam);
1072  GrowthStat->SaveTxt(FNmPref, BigDesc);
1073  }
1074  // diameter plots
1075  GrowthStat->PlotDiam(FNmPref, Desc + TStr::Fmt(" MISSING PAST. DelPre:%d PostYear:%d.",
1076  DelPreEdges, PostYearDiam));
1077  */
1078 }
1079 
1080 // plot time difference of the node ages when an edge arrives
1081 /*void TTimeNENet::PlotEdgeNodeTimeDiff(const TStr& FNmPref, TStr Desc) const {
1082  if (Desc.Empty()) { Desc = FNmPref; }
1083  printf("Edge node time differences:\n");
1084  THash<TInt, TInt> NodeAgeH, Node1AgeH; // (time_dst - time_src), only 1st edge
1085  THash<TInt, TInt> SrcTmH, DstTmH; // time_edge - time_src/time_dst)
1086  PUNGraph Graph = TUNGraph::New(GetNodes(), -1);
1087  TIntV EIdV; GetEIdByTm(EIdV);
1088  TIntH DegCntH(10, true);
1089  TExeTm ExeTm;
1090  for (int edge = 0; edge < EIdV.Len(); edge++) {
1091  const TEdgeI EI = GetEI(EIdV[edge]);
1092  const int Src = EI.GetSrcNId();
1093  const int Dst = EI.GetDstNId();
1094  if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge
1095  if (! Graph->IsNode(Src)) { Graph->AddNode(Src); }
1096  if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); }
1097  const int SrcDeg = Graph->GetNode(Src).GetOutDeg();
1098  const int DstDeg = Graph->GetNode(Dst).GetInDeg();
1099  const int EdgeTm = EI.GetDat().GetAbsSecs();
1100  const int SrcTm = EI.GetSrcNDat().GetAbsSecs();
1101  const int DstTm = EI.GetDstNDat().GetAbsSecs();
1102  NodeAgeH.AddDat((DstTm-SrcTm)/3600) += 1;
1103  if (SrcDeg == 0 || DstDeg == 0) {
1104  Node1AgeH.AddDat((DstTm-SrcTm)/3600) += 1; }
1105  SrcTmH.AddDat((EdgeTm-SrcTm)/3600) += 1;
1106  DstTmH.AddDat((EdgeTm-DstTm)/3600) += 1;
1107  // add edge
1108  Graph->AddEdge(Src, Dst);
1109  if (edge % 1000 == 0) {
1110  printf("\r%dk / %dk [total %s]", edge/1000, EIdV.Len()/1000, ExeTm.GetStr()); }
1111  }
1112  TGnuPlot::PlotValCntH(NodeAgeH, "tmDstSrc."+FNmPref, TStr::Fmt("%s, Node time difference.", Desc.CStr()),
1113  "Time(destination) - Time(source) [hours]", "Count");
1114  TGnuPlot::PlotValCntH(Node1AgeH, "tmDstSrc1."+FNmPref, TStr::Fmt("%s. 1-ST EDGE of one of the nodes.", Desc.CStr()),
1115  "Time(destination) - Time(source) [hours]", "Count");
1116  TGnuPlot::PlotValCntH(SrcTmH, "tmEdgeSrc."+FNmPref, TStr::Fmt("%s. Edge time - source node time.", Desc.CStr()),
1117  "Edge time - source node time [hours]", "Count");
1118  TGnuPlot::PlotValCntH(DstTmH, "tmEdgeDst."+FNmPref, TStr::Fmt("%s. Edge time - destination node time.", Desc.CStr()),
1119  "Edge time - destination node time [hours]", "Count");
1120  printf("\n");
1121 }
1122 
1123 // plot the distribution whether the edge to node that closes the triad was
1124 // chosen random or preferentially
1125 void TTimeNENet::PlotCloseTriad(const TStr& FNmPref, TStr Desc, const bool& OnlyBackEdges) const {
1126  if (Desc.Empty()) { Desc = FNmPref; }
1127  printf("Random vs. preferential triad closing: %s\n", OnlyBackEdges?"OnlyBackEdges":"");
1128  THash<TInt, TInt> NodeAgeH, Node1AgeH; // (time_dst - time_src), only 1st edge
1129  THash<TInt, TInt> SrcTmH, DstTmH; // time_edge - time_src/time_dst)
1130  PUNGraph Graph = TUNGraph::New(GetNodes(), -1);
1131  TIntV EIdV; GetEIdByTm(EIdV);
1132  TIntV CmnV, SrcNbrV, DstNbrV;
1133  TExeTm ExeTm;
1134  TFltV RndRdnV, PrefPrefV, PrefRndV, RndPrefV;
1135  TFltFltH RndRndH, PrefPrefH, PrefRndH, RndPrefH;
1136  int TriadEdges = 0;
1137  FILE *F = fopen(TStr::Fmt("%s-prob.tab", FNmPref.CStr()).CStr(), "wt");
1138  fprintf(F, "#Probability of picking a node that closes the triangle (pick preferentially vs. uniformly at random)\n");
1139  fprintf(F, "#i\tEId\tRndRnd\tRndPref\tPrefRnd\tPrefPref\n");
1140  for (int edge = 0; edge < EIdV.Len(); edge++) {
1141  const TEdgeI EI = GetEI(EIdV[edge]);
1142  const int Src = EI.GetSrcNId();
1143  const int Dst = EI.GetDstNId();
1144  if (Src==Dst || Graph->IsEdge(Src, Dst)) { continue; } // take only 1st edge
1145  if (! Graph->IsNode(Src)) { Graph->AddNode(Src); }
1146  if (! Graph->IsNode(Dst)) { Graph->AddNode(Dst); }
1147  if (OnlyBackEdges && EI.GetSrcNDat().GetAbsSecs()-EI.GetDstNDat().GetAbsSecs() < 0) {
1148  Graph->AddEdge(Src, Dst); continue;
1149  }
1150  // find common neighbors
1151  const TUNGraph::TNodeI SrcNI = Graph->GetNI(Src);
1152  const TUNGraph::TNodeI DstNI = Graph->GetNI(Dst);
1153  SrcNbrV.Clr(false); DstNbrV.Clr(false);
1154  for (int i = 0; i < SrcNI.GetOutDeg(); i++) { SrcNbrV.Add(SrcNI.GetOutNId(i)); }
1155  for (int i = 0; i < DstNI.GetOutDeg(); i++) { DstNbrV.Add(DstNI.GetOutNId(i)); }
1156  SrcNbrV.Sort(); DstNbrV.Sort();
1157  SrcNbrV.Intrs(DstNbrV, CmnV);
1158  if (! CmnV.Empty()) {
1159  // triad completion
1160  const int SrcDeg = Graph->GetNode(Src).GetOutDeg();
1161  const int DstDeg = Graph->GetNode(Dst).GetInDeg();
1162  double RndRnd=0, RndPref=0, PrefRnd=0, PrefPref=0;
1163  int AllNbrDegSum=0;//, NbrDegSum = 0;
1164  //for (int i = 0; i < CmnV.Len(); i++) {
1165  // NbrDegSum += Graph->GetNode(CmnV[i]).GetOutDeg(); }
1166  for (int i = 0; i < SrcNI.GetOutDeg(); i++) {
1167  AllNbrDegSum += Graph->GetNode(SrcNI.GetOutNId(i)).GetOutDeg(); }
1168  // uniform-uniform node selection = 1/|cmn| sum_{i\in cmn} 1/(d_i-1)
1169  // preferential-uniform
1170  for (int i = 0; i < CmnV.Len(); i++) {
1171  const int Deg = Graph->GetNode(CmnV[i]).GetOutDeg();
1172  RndRnd += (1.0/double(SrcDeg)) * (1.0/double(Deg-1));
1173  PrefRnd += (Deg/double(AllNbrDegSum)) * (1.0/double(Deg-1));
1174  }
1175  // uniform-preferential selection = 1/|cmn| sum_{i\in cmn} d_t / sum_{i--j} d_j
1176  // preferential-preferential
1177  for (int i = 0; i < CmnV.Len(); i++) {
1178  const TUNGraph::TNode N = Graph->GetNode(CmnV[i]);
1179  const int Deg = N.GetOutDeg();
1180  int DegSum = 0;
1181  for (int j = 0; j < N.GetOutDeg(); j++) {
1182  if (N.GetOutNId(j) != Src) {
1183  DegSum += Graph->GetNode(N.GetOutNId(j)).GetOutDeg(); }
1184  }
1185  RndPref += 1.0/double(SrcDeg) * DstDeg/double(DegSum);
1186  PrefPref += (Deg/double(AllNbrDegSum)) * (DstDeg/double(DegSum));
1187  }
1188  IAssert(0.0<RndRnd && RndRnd<=1.0); IAssert(0.0<RndPref && RndPref<=1.0);
1189  IAssert(0.0<PrefRnd && PrefRnd<=1.0); IAssert(0.0<PrefPref && PrefPref<=1.0);
1190  RndRndH.AddDat(TMath::Round(RndRnd, 2)) += 1.0;
1191  RndPrefH.AddDat(TMath::Round(RndPref, 2)) += 1.0;
1192  PrefRndH.AddDat(TMath::Round(PrefRnd, 2)) += 1.0;
1193  PrefPrefH.AddDat(TMath::Round(PrefPref, 2)) += 1.0;
1194  fprintf(F, "%d\t%d\t%g\t%g\t%g\t%g\n", edge, EI.GetId(), RndRnd, RndPref, PrefRnd, PrefPref);
1195  TriadEdges++;
1196  }
1197  Graph->AddEdge(Src, Dst);
1198  if (edge % 1000 == 0) {
1199  printf("\r%dk / %dk triad edges %dk [total %s]", edge/1000, EIdV.Len()/1000, TriadEdges/1000, ExeTm.GetStr()); }
1200  }
1201  fclose(F);
1202  printf("\nRandom vs. preferential triad closing\n");
1203  printf("All edges: %d\n", Graph->GetEdges());
1204  printf("Triad closing edges: %d\n", TriadEdges);
1205  // plot distribution
1206  TFltPrV RndRndPrV; RndRndH.GetKeyDatPrV(RndRndPrV); RndRndPrV.Sort();
1207  TFltPrV RndPrefPrV; RndPrefH.GetKeyDatPrV(RndPrefPrV); RndPrefPrV.Sort();
1208  TFltPrV PrefRndPrV; PrefRndH.GetKeyDatPrV(PrefRndPrV); PrefRndPrV.Sort();
1209  TFltPrV PrefPrefPrV; PrefPrefH.GetKeyDatPrV(PrefPrefPrV); PrefPrefPrV.Sort();
1210  { TGnuPlot GP(TStr::Fmt("probPdf.%s", FNmPref.CStr()), TStr::Fmt("%s. %d/%d edges. CDF. Uniform vs. Preferential triangle closure.", Desc.CStr(), TriadEdges,Graph->GetEdges()));
1211  GP.AddPlot(RndRndPrV, gpwLinesPoints, "Random--Random");
1212  GP.AddPlot(RndPrefPrV, gpwLinesPoints, "Random--Preferential");
1213  GP.AddPlot(PrefRndPrV, gpwLinesPoints, "Preferential--Random");
1214  GP.AddPlot(PrefPrefPrV, gpwLinesPoints, "Preferential--Preferential");
1215  GP.SetXYLabel("Probability", "Count");
1216  GP.SavePng(); }
1217  { TGnuPlot GP(TStr::Fmt("probCdf.%s", FNmPref.CStr()), TStr::Fmt("%s. %d/%d edges. CDF. Uniform vs. Preferential triangle closure.", Desc.CStr(), TriadEdges,Graph->GetEdges()));
1218  GP.AddPlot(TGUtil::GetCdf(RndRndPrV), gpwLinesPoints, "Random--Random");
1219  GP.AddPlot(TGUtil::GetCdf(RndPrefPrV), gpwLinesPoints, "Random--Preferential");
1220  GP.AddPlot(TGUtil::GetCdf(PrefRndPrV), gpwLinesPoints, "Preferential--Random");
1221  GP.AddPlot(TGUtil::GetCdf(PrefPrefPrV), gpwLinesPoints, "Preferential--Preferential");
1222  GP.SetXYLabel("Cumulative probability", "Count");
1223  GP.SavePng(); }
1224 }*/
1225 
1226 PTimeNENet TTimeNENet::GetGnmRndNet(const int& Nodes, const int& Edges) {
1227  printf("Generating G_nm(%d, %d)\n", Nodes, Edges);
1228  int Src, Dst;
1229  PTimeNENet Net = TTimeNENet::New();
1230  Net->Reserve(Nodes, Edges);
1231  for (int e = 0; e < Edges; e++) {
1232  Src = TInt::Rnd.GetUniDevInt(Nodes);
1233  Dst = TInt::Rnd.GetUniDevInt(Nodes);
1234  while (Dst == Src || Net->IsEdge(Src, Dst)) {
1235  Dst = TInt::Rnd.GetUniDevInt(Nodes); }
1236  if (! Net->IsNode(Src)) { Net->AddNode(Src, TSecTm(e)); }
1237  if (! Net->IsNode(Dst)) { Net->AddNode(Dst, TSecTm(e)); }
1238  Net->AddEdge(Src, Dst, -1, TSecTm(e));
1239  }
1240  return Net;
1241 }
1242 
1243 // ACL, model B: Aiello, Chung, Lu: Random evolution in massive graphs
1244 PTimeNENet TTimeNENet::GetPrefAttach(const int& Nodes, const int& Edges, const double& GammaIn, const double& GammaOut) {
1245  const double Alpha = Nodes/double(Edges);
1246  printf("Generating PA(%d, %d), with slope in:%.1f, out: %.1f\n", Nodes, Edges,
1247  2+GammaIn/(Alpha/(1-Alpha)), 2+GammaOut/(Alpha/(1-Alpha)));
1248  // init
1249  int nodes=0, edges=0, time=0, iter=0;
1250  TIntV OutW(Edges, 0), InW(Edges, 0);
1251  PTimeNENet Net = TTimeNENet::New();
1252  Net->Reserve(Nodes, Edges);
1253  // 1st node
1254  Net->AddNode(0, TSecTm(time++)); nodes++;
1255  OutW.Add(0); InW.Add(0);
1256  while (edges < Edges) {
1257  int Src=-1, Dst=-1; iter++;
1258  if (TInt::Rnd.GetUniDev() < Alpha) {
1259  if (nodes < Nodes) {
1260  IAssert(Net->AddNode(nodes, TSecTm(time++)));
1261  nodes++; }
1262  } else {
1263  if (TInt::Rnd.GetUniDev() < nodes*GammaIn/double(edges+nodes*GammaIn)) {
1264  Src = TInt::Rnd.GetUniDevInt(nodes); }
1265  else { Src = OutW[TInt::Rnd.GetUniDevInt(OutW.Len())]; }
1266  if (TInt::Rnd.GetUniDev() < nodes*GammaOut/double(edges+nodes*GammaOut)) {
1267  Dst = TInt::Rnd.GetUniDevInt(nodes); }
1268  else { Dst = InW[TInt::Rnd.GetUniDevInt(InW.Len())]; }
1269  }
1270  if (Src == Dst || Net->IsEdge(Src, Dst)) {
1271  continue;
1272  }
1273  //printf("%d/%d %d %d\n", edges, iter, Src, Dst);
1274  if (! Net->IsNode(Src)) { Net->AddNode(Src, TSecTm(time++)); nodes++; }
1275  if (! Net->IsNode(Dst)) { Net->AddNode(Dst, TSecTm(time++)); nodes++; }
1276  Net->AddEdge(Src, Dst, -1, TSecTm(time++));
1277  OutW.Add(Src); InW.Add(Dst); edges++;
1278  }
1279  for (int node = 0; node < Nodes; node++) {
1280  if (! Net->IsNode(node)) {
1281  Net->AddNode(node, TSecTm(time++)); }
1282  }
1283  return Net;
1284 }
1285 
1286 PTimeNENet TTimeNENet::GetPrefAttach(const int& Nodes, const int& OutDeg) {
1287  printf("Generating PA, nodes:%d, out-deg:%d\n", Nodes, OutDeg);
1288  // init
1289  int time=0;
1290  PTimeNENet Net = TTimeNENet::New();
1291  Net->Reserve(Nodes, OutDeg*Nodes);
1292  Net->AddNode(0, TSecTm(++time)); Net->AddNode(1, TSecTm(++time));
1293  Net->AddEdge(0, 1, -1, TSecTm(++time));
1294  TIntV NIdV; NIdV.Add(0); NIdV.Add(1);
1295  TIntSet NodeSet;
1296  for (int node = 2; node <= Nodes; node++) {
1297  NodeSet.Clr(false);
1298  while (NodeSet.Len() < OutDeg && NodeSet.Len() < node) {
1299  NodeSet.AddKey(NIdV[TInt::Rnd.GetUniDevInt(NIdV.Len())]);
1300  }
1301  const int N = Net->AddNode(node, TSecTm(++time));
1302  for (int i = 0; i < NodeSet.Len(); i++) {
1303  Net->AddEdge(node, NodeSet[i], -1, TSecTm(++time));
1304  NIdV.Add(N); NIdV.Add(NodeSet[i]);
1305  }
1306  }
1307  return Net;
1308 }
1309 
1310 void TTimeNENet::SaveEdgeTm(const TStr& EdgeFNm, const bool& RenumberNId, const bool& RelativeTm) const {
1311  TIntV EIdV; GetEIdByTm(EIdV);
1312  const int BegTm = RelativeTm ? GetEDat(EIdV[0]).GetAbsSecs() : 0;
1313  TIntSet NIdMap;
1314  if (RenumberNId) { NIdMap.Gen(GetNodes()); }
1315  FILE *F = fopen(EdgeFNm.CStr(), "wt");
1316  //fprintf(F, "#Nodes\t%d\n#Edges\t%d\n", GetNodes(), GetEdges());
1317  //fprintf(F, "#<src>\t<dst>\t<time>\n");
1318  for (int e =0; e < EIdV.Len(); e++) {
1319  const TEdgeI EI = GetEI(EIdV[e]);
1320  if (RenumberNId) {
1321  const int src = EI.GetSrcNId();
1322  const int dst = EI.GetDstNId();
1323  NIdMap.AddKey(src); NIdMap.AddKey(dst);
1324  fprintf(F, "%d\t%d\t%d\n", NIdMap.GetKeyId(src), NIdMap.GetKeyId(dst), EI().GetAbsSecs()-BegTm);
1325  }else {
1326  fprintf(F, "%d\t%d\t%d\n", EI.GetSrcNId(), EI.GetDstNId(), EI().GetAbsSecs()-BegTm); }
1327  }
1328  fclose(F);
1329 }
1330 
1332  PTimeNENet Net = TTimeNENet::New();
1333  for (int i = 1; i <= 6; i++) {
1334  Net->AddNode(i, TSecTm(0)); }
1335  int tm = 1;
1336  Net->AddEdge(1, 2, -1, TSecTm(tm++));
1337  Net->AddEdge(3, 4, -1, TSecTm(tm++));
1338  Net->AddEdge(3, 1, -1, TSecTm(tm++));
1339  Net->AddEdge(5, 6, -1, TSecTm(tm++));
1340  Net->AddEdge(6, 4, -1, TSecTm(tm++));
1341  Net->AddEdge(5, 3, -1, TSecTm(tm++));
1342  Net->AddEdge(5, 4, -1, TSecTm(tm++));
1343  Net->AddEdge(5, 2, -1, TSecTm(tm++));
1344  return Net;
1345 }
1346 
1347 PTimeNENet TTimeNENet::LoadFlickr(const TStr& NodeFNm, const TStr& EdgeFNm) {
1348  const int BegOfTm = 1047369600; // Tue Mar 11 01:00:00 2003 (begining of Flickr)
1349  PTimeNENet Net = TTimeNENet::New();
1350  printf("Adding nodes...");
1351  { TSsParser Ss(NodeFNm, ssfWhiteSep);
1352  while (Ss.Next()) {
1353  const int NId = Ss.GetInt(0);
1354  const int Tm = Ss.GetInt(1)+BegOfTm;
1355  if (TSecTm(Tm) < TSecTm(2002, 1, 1)) {
1356  printf(" skip node %g (time %d)\n", (double) Ss.GetLineNo(), Ss.GetInt(1)); continue; }
1357  Net->AddNode(NId, TSecTm(Tm));
1358  } }
1359  printf(" %d nodes\n", Net->GetNodes());
1360  printf("Adding edges...");
1361  int SkipCnt=0;
1362  //TIntH SkipDiffCnt;
1363  { TSsParser Ss(EdgeFNm, ssfWhiteSep);
1364  while (Ss.Next()) {
1365  const int NId1 = Ss.GetInt(0);
1366  const int NId2 = Ss.GetInt(1);
1367  const TSecTm Tm = TSecTm(Ss.GetInt(2)+BegOfTm);
1368  if (! Net->IsNode(NId1) || ! Net->IsNode(NId2)) { printf("not node\n"); continue; }
1369  if (Tm < TSecTm(2002, 1, 1)) { SkipCnt++;
1370  printf(" skip edge %g (time %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr()); continue; }
1371  if (Tm+600 < Net->GetNDat(NId1)) {
1372  printf(" 1:skip edge %g (time %s < %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr(), Net->GetNDat(NId1).GetStr().CStr());
1373  //SkipDiffCnt.AddDat(-Tm.GetAbsSecs()+Net->GetNDat(NId1).GetAbsSecs()) += 1;
1374  SkipCnt++; continue; }
1375  if (Tm+600 < Net->GetNDat(NId2)) { SkipCnt++;
1376  printf(" 2:skip edge %g (time %s < %s)\n", (double) Ss.GetLineNo(), Tm.GetStr().CStr(), Net->GetNDat(NId2).GetStr().CStr());
1377  //SkipDiffCnt.AddDat(-Tm.GetAbsSecs()+Net->GetNDat(NId2).GetAbsSecs()) += 1;
1378  SkipCnt++; continue; }
1379  Net->AddEdge(NId1, NId2, -1, TSecTm(Tm));
1380  } }
1381  //TGnuPlot::PlotValCntH(SkipDiffCnt, "flickr-edgeNodeDiff", "", "seconds", "count");
1382  printf(" %d edges\n", Net->GetEdges());
1383  printf(" %d edges skipped (edge time < node time)\n", SkipCnt);
1384  Net->UpdateNodeTimes();
1385  return Net;
1386 }
1387 
1388 // load network where fields are separated by Separator and each line contains (*Fld are column indexes)
1389 // .. <source> .. <destination> .. <time>
1390 PTimeNENet TTimeNENet::LoadEdgeTm(const TStr& EdgeFNm, const int& SrcFld, const int& DstFld, const int& TimeFld, const TSsFmt& Separator) {
1391  printf("Loading %s\n", EdgeFNm.CStr());
1392  PTimeNENet Net = TTimeNENet::New();
1393  TStrHash<TInt> StrToId(Mega(1), true); // node id to string
1394  int LineCnt=0;
1395  TExeTm ExeTm;
1396  TSsParser Ss(EdgeFNm, Separator);
1397  TSecTm MinTm=TSecTm::GetCurTm(), MaxTm=TSecTm(100);
1398  while (Ss.Next()) {
1399  if (Ss.IsCmt()) { continue; }
1400  IAssert(Ss.Len() > TimeFld);
1401  const char* Node1 = Ss.GetFld(SrcFld);
1402  const char* Node2 = Ss.GetFld(DstFld);
1403  const char* TmStr = Ss.GetFld(TimeFld);
1404  if (strcmp(TmStr,"NULL")==0) { continue; }
1405  //const TSecTm Tm = TSecTm::GetDtTmFromYmdHmsStr(TmStr);
1406  const TSecTm Tm(atoi(TmStr));
1407  const int NId1 = StrToId.AddKey(Node1);
1408  const int NId2 = StrToId.AddKey(Node2);
1409  if (! Net->IsNode(NId1)) { Net->AddNode(NId1, TSecTm()); }
1410  if (! Net->IsNode(NId2)) { Net->AddNode(NId2, TSecTm()); }
1411  MinTm=TMath::Mn(MinTm, Tm);
1412  MaxTm=TMath::Mx(MaxTm, Tm);
1413  Net->AddEdge(NId1, NId2, -1, Tm);
1414  if (++LineCnt % 1000 == 0) {
1415  printf("\r %dk lines processed: %d %d [%s]", LineCnt/1000, Net->GetNodes(), Net->GetEdges(), ExeTm.GetStr()); }
1416  }
1417  printf("\r %d lines processed: %d %d [%s]\n", LineCnt, Net->GetNodes(), Net->GetEdges(), ExeTm.GetStr());
1418  printf(" Data range %s -- %s\n", MinTm.GetStr().CStr(), MaxTm.GetStr().CStr());
1419  //TSnap::PrintInfo(Net, "", "", false);
1420  Net->UpdateNodeTimes();
1421  return Net;
1422 }
Definition: ds.h:345
void Clr(const bool &DoDel=true, const int &NoDelLim=-1)
Definition: shash.h:1243
PTimeNENet GetTimeNENet() const
Definition: timenet.cpp:32
#define IAssert(Cond)
Definition: bd.h:262
int GetInt() const
Definition: dt.h:578
static const T & Mn(const T &LVal, const T &RVal)
Definition: xmath.h:36
TPair< TInt, TInt > TIntPr
Definition: ds.h:83
void PlotEffDiam(const TStr &FNmPref, const TStr &Desc, const TTmUnit &GroupBy, const TSecTm &StartTm, const int &NDiamRuns=10, const bool &OnlyWcc=false) const
Definition: timenet.cpp:934
static char * GetCurTm()
Definition: tm.h:374
static PTimeNet LoadArxiv(const TStr &PaperFNm, const TStr &CiteFNm)
Definition: timenet.cpp:383
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:479
TEdgeI BegEI() const
Returns an iterator referring to the first edge in the network.
Definition: network.h:1251
Definition: tm.h:355
double GetMedian() const
Definition: xmath.h:244
static PSs LoadTxt(const TSsFmt &SsFmt, const TStr &FNm, const PNotify &Notify=NULL, const bool &IsExcelEoln=true, const int &MxY=-1, const TIntV &AllowedColNV=TIntV(), const bool &IsQStr=true)
Definition: ss.cpp:100
void Reserve(const int &Nodes, const int &Edges)
Reserves memory for a network of Nodes nodes and Edges edges.
Definition: network.h:1285
Definition: ds.h:271
static PTimeNENet GetSmallNet()
Definition: timenet.cpp:1331
static PTimeNet New()
Definition: timenet.h:39
bool IsDef() const
Definition: tm.h:123
double GetBfsEffDiam(const PGraph &Graph, const int &NTestNodes, const bool &IsDir=false)
Returns the (approximation of the) Effective Diameter (90-th percentile of the distribution of shorte...
Definition: bfsdfs.h:415
void SavePng(const int &SizeX=1000, const int &SizeY=800, const TStr &Comment=TStr())
Definition: gnuplot.h:120
PGraph GetMxWcc(const PGraph &Graph)
Returns a graph representing the largest weakly connected component on an input Graph.
Definition: cncom.h:452
static const T & Mx(const T &LVal, const T &RVal)
Definition: xmath.h:32
TNodeI BegNI() const
Returns an iterator referring to the first node in the network.
Definition: network.h:1200
PTimeNENet Get1stEdgeNet() const
Definition: timenet.cpp:629
int GetKeyId(const TKey &Key) const
Definition: shash.h:1328
void PlotMissingPast(const TStr &FNmPref, const TStr &Desc, const TTmUnit &GroupBy, const TSecTm &DelPreTmEdges, const TSecTm &PostTmDiam) const
Definition: timenet.cpp:169
void GetNIdByTm(TIntV &NIdV) const
Definition: timenet.cpp:778
void PlotMissingPast(const TStr &FNmPref, const TStr &Desc, const TTmUnit &TmUnit, const TSecTm &DelPreTmEdges, const TSecTm &PostTmDiam, const bool &LinkBWays)
Definition: timenet.cpp:977
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:313
void GetEIdByTm(TIntV &EIdV) const
Definition: timenet.cpp:788
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
Definition: graph.cpp:8
Definition: fl.h:319
Definition: bits.h:119
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
void PlotCCfOverTm(const TStr &FNmPref, TStr Desc, const TTmUnit &TmUnit, const int &NodesBucket=-1) const
Definition: timenet.cpp:253
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:64
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: network.h:1198
int GetNodes() const
Returns the number of nodes in the network.
Definition: network.h:1175
double GetAnfEffDiam(const PGraph &Graph, const bool &IsDir, const double &Percentile, const int &NApprox)
Definition: anf.h:217
Definition: ss.h:72
void Gen(const int &ExpectVals)
Definition: shash.h:1115
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the network.
Definition: network.h:186
int GetEdges() const
Returns the number of edges in the network.
PGStatVec TimeGrowth(const TTmUnit &TimeStep, const TFSet &TakeStat, const TSecTm &StartTm=TSecTm(1)) const
Definition: timenet.cpp:878
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:438
TStr GetSubStr(const int &BChN, const int &EChN) const
Definition: dt.cpp:811
void SetXYLabel(const TStr &XLabel, const TStr &YLabel)
Definition: gnuplot.h:73
void GetEdgeBuckets(const int EdgesPerBucket, TTimeNet::TTmBucketV &TmBucketV) const
Definition: timenet.cpp:846
void GetTmBuckets(const TTmUnit &GroupBy, TTmBucketV &TmBucketV) const
Definition: timenet.cpp:59
int AddNode(int NId=-1)
Adds a node of ID NId to the network.
Definition: network.h:1345
static PTimeNet LoadPatents(const TStr &PatentFNm, const TStr &CiteFNm)
Definition: timenet.cpp:480
bool GetInt(const int &FldN, int &Val) const
If the field FldN is an integer its value is returned in Val and the function returns true...
Definition: ss.cpp:447
Definition: xmath.h:129
TSecTm & GetNDat(const int &NId)
Returns node data for the node of ID NId in the network.
Definition: network.h:1208
Edge iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:686
TSecTm & GetNDat(const int &NId)
Returns node data for the node of ID NId in the network.
Definition: network.h:194
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:220
void Defrag(const bool &OnlyNodeLinks=false)
Defragments the network.
Definition: network.h:1500
int GetNodes() const
Returns the number of nodes in the network.
Definition: network.h:160
Definition: gnuplot.h:7
TEdgeI GetEI(const int &EId) const
Not supported/implemented!
Definition: network.h:1255
static TRnd Rnd
Definition: dt.h:1053
TNodeI BegNI() const
Returns an iterator referring to the first node in the network.
Definition: network.h:184
double GetSDev() const
Definition: xmath.h:242
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
PTimeNENet GetGraphUpToTm(const TSecTm &MaxEdgeTm) const
Definition: timenet.cpp:685
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:168
const char * GetFld(const int &FldN) const
Returns the contents of the field at index FldN.
Definition: ss.h:129
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: network.h:182
TSsFmt
Spread-Sheet Separator Format.
Definition: ss.h:5
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the network.
Definition: network.h:188
int AddEdge(const int &SrcNId, const int &DstNId, int EId=-1)
Adds an edge between node IDs SrcNId and DstNId to the graph.
Definition: network.h:1407
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:542
void GetEdgeTmBuckets(const TTmUnit &GroupBy, TTimeNet::TTmBucketV &TmBucketV) const
Definition: timenet.cpp:817
static PTimeNENet LoadEdgeTm(const TStr &EdgeFNm, const int &SrcFld=0, const int &DstFld=1, const int &TimeFld=2, const TSsFmt &Separator=ssfTabSep)
Definition: timenet.cpp:1390
Definition: bd.h:63
void SaveEdgeTm(const TStr &EdgeFNm, const bool &RenumberNId=false, const bool &RelativeTm=false) const
Definition: timenet.cpp:1310
Graph Statistics Sequence.
Definition: gstat.h:155
void SplitOnCh(TStr &LStr, const char &SplitCh, TStr &RStr) const
Definition: dt.cpp:901
void Swap(TVec< TVal, TSizeTy > &Vec)
Swaps the contents of the vector with Vec.
Definition: ds.h:1047
static void SaveTs(const TIntKdV &KdV, const TStr &FNm, const TStr &HeadLn=TStr())
Definition: gnuplot.cpp:717
const char * GetTmStr() const
Definition: tm.h:370
virtual void DelNode(const int &NId)
Deletes node of ID NId from the network.
Definition: network.h:300
void PlotEffDiam(const TStr &FNmPref, const TStr &Desc, const TTmUnit &GroupBy, const TSecTm &StartTm, const int &NDiamRuns=10, const bool &OnlyWcc=false, const bool &AlsoRewire=false) const
Definition: timenet.cpp:106
void GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV &TmBucketV) const
Definition: timenet.cpp:836
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:971
static PTimeNENet GetPrefAttach(const int &Nodes, const int &Edges, const double &GammaIn, const double &GammaOut)
Definition: timenet.cpp:1244
void Add(const TFlt &Val, const TFlt &Wgt=1)
Definition: xmath.h:217
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1254
Statistics of a Graph Snapshot.
Definition: gstat.h:36
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge from node IDs SrcNId to node DstNId to the network.
Definition: network.h:334
static PTimeNENet New()
Definition: timenet.h:86
void SetNodeTmToFirstEdgeTm()
Definition: timenet.cpp:725
int CntNonZNodes(const PGraph &Graph)
Returns the number of nodes with degree greater than 0.
Definition: alg.h:114
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
Definition: subgraph.cpp:7
Whitespace (space or tab) separated.
Definition: ss.h:11
uint GetAbsSecs() const
Definition: tm.h:150
#define Mega(n)
Definition: gbase.h:4
THash< TInt, TNode > NodeH
Definition: network.h:1153
static PUNGraph New()
Static constructor that returns a pointer to the graph. Call: PUNGraph Graph = TUNGraph::New().
Definition: graph.h:155
int AddKey(const TKey &Key)
Definition: shash.h:1254
void DumpTimeStat() const
Definition: timenet.cpp:761
Tab separated.
Definition: ss.h:6
void SetRndEdgeTimes(const int &MinTmEdge=0)
Definition: timenet.cpp:744
void DelEdge(const int &SrcNId, const int &DstNId)
Deletes an edge between node IDs SrcNId and DstNId from the graph.
Definition: graph.cpp:124
int Len() const
Returns the number of fields in the current line.
Definition: ss.h:114
TTriple< TFlt, TFlt, TFlt > TFltTr
Definition: ds.h:180
Definition: tm.h:14
PUNGraph GenConfModel(const TIntV &DegSeqV, TRnd &Rnd)
Generates a random undirect graph with a given degree sequence.
Definition: ggen.cpp:119
TNodeEdgeNet & operator=(const TNodeEdgeNet &Net)
Definition: network.h:1171
static TStr GetNullStr()
Definition: dt.cpp:1626
void Tick()
Definition: tm.h:364
PTimeNENet GetSubGraph(const TIntV &NIdV) const
Definition: timenet.cpp:647
THash< TInt, TEdge > EdgeH
Definition: network.h:1154
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
void PlotMedianDegOverTm(const TStr &FNmPref, const TTmUnit &TmUnit, const int &NodesPerBucket=-1) const
Definition: timenet.cpp:302
void SetVal(const TGStatVal &StatVal, const double &Val)
Definition: gstat.cpp:88
static PTimeNet LoadBipartite(const TStr &InFNm)
Definition: timenet.cpp:346
int CntInDegNodes(const PGraph &Graph, const int &NodeInDeg)
Returns the number of nodes with in-degree NodeInDeg.
Definition: alg.h:87
static int NDiamRuns
Definition: gstat.h:38
TSecTm & GetEDat(const int &EId)
Returns edge data for the edge with ID EId.
Definition: network.h:1261
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge between node IDs SrcNId and DstNId to the graph.
Definition: graph.cpp:92
int AddKey(const char *Key)
Definition: hash.h:896
bool IsEdge(const int &SrcNId, const int &DstNId, const bool &IsDir=true) const
Tests whether an edge from node IDs SrcNId to DstNId exists in the network.
Definition: network.h:354
static PTimeNENet LoadFlickr(const TStr &NodeFNm, const TStr &EdgeFNm)
Definition: timenet.cpp:1347
Definition: hash.h:729
PTimeNENet GetESubGraph(const TIntV &EIdV) const
Definition: timenet.cpp:668
int AddNode(int NId=-1)
Adds a node of ID NId to the network.
Definition: network.h:276
int Len() const
Definition: shash.h:1121
Definition: tm.h:81
void GetNodeBuckets(const int NodesPerBucket, TTimeNet::TTmBucketV &TmBucketV) const
Definition: timenet.cpp:77
Definition: ds.h:32
static PTimeNENet GetGnmRndNet(const int &Nodes, const int &Edges)
Definition: timenet.cpp:1226
int AddKey(const TKey &Key)
Definition: hash.h:331
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:481
TTimeNENet & operator=(const TTimeNENet &TimeNet)
Definition: timenet.cpp:605
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the network.
Definition: network.h:1204
void GetNIdByTm(TIntV &NIdV) const
Definition: timenet.cpp:48
static TStr GetTmUnitStr(const TTmUnit &TmUnit)
Definition: tm.cpp:108
long long int64
Definition: bd.h:27
double GetMean() const
Definition: xmath.h:240
void Save(TSOut &SOut) const
Definition: xmlser.h:16
Definition: dt.h:412
TStr GetStr(const TLoc &Loc=lUs) const
Definition: tm.cpp:457
bool Empty() const
Definition: dt.h:488
TNodeNet & operator=(const TNodeNet &NodeNet)
Definition: network.h:156
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
uint64 GetLineNo() const
Returns the line number of the current line.
Definition: ss.h:118
int GetTriadEdges(TIntV &TriadEIdV) const
Definition: timenet.cpp:857
void SplitOnAllCh(const char &SplitCh, TStrV &StrV, const bool &SkipEmpty=true) const
Definition: dt.cpp:926
int GetEdges() const
Returns the number of edges in the network.
Definition: network.h:1216
static int GetMonthN(const TStr &MonthNm, const TLoc &Loc=lUs)
Definition: tm.cpp:39
void UpdateNodeTimes()
Definition: timenet.cpp:709
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:339
void Defrag(const bool &OnlyNodeLinks=false)
Defragments the graph.
Definition: graph.cpp:160
void GetKeyDatPrV(TVec< TPair< TKey, TDat > > &KeyDatPrV) const
Definition: hash.h:458
void GetTmBuckets(const TTmUnit &GroupBy, TTimeNet::TTmBucketV &TmBucketV) const
Definition: timenet.cpp:798
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:211
static TSecTm GetCurTm()
Definition: tm.cpp:697
double GetClustCf(const PGraph &Graph, int SampleNodes=-1)
Computes the average clustering coefficient as defined in Watts and Strogatz, Collective dynamics of ...
Definition: triad.h:117
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:207
bool Next()
Loads next line from the input file.
Definition: ss.cpp:412
Definition: bd.h:196
void SortNodeEdgeTimes()
Definition: timenet.cpp:703
void Defrag(const bool &OnlyNodeLinks=false)
Defragments the network.
Definition: network.h:376
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
TPt< TTimeNet > PTimeNet
Definition: timenet.h:8
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
PTimeNet GetTimeNet() const
Definition: timenet.cpp:612
PGraph GetESubGraph(const PGraph &Graph, const TIntV &EIdV)
Returns a subgraph of graph Graph with EIdV edges.
Definition: subgraph.h:206
void SplitOnWs(TStrV &StrV) const
Definition: dt.cpp:972
TTimeNet & operator=(const TTimeNet &TimeNet)
Definition: timenet.cpp:3
void Reserve(const int &Nodes, const int &Edges)
Reserves memory for a network of Nodes nodes and Edges edges.
Definition: network.h:243
int AddErrBar(const TFltTrV &XYDValV, const TStr &Label=TStr())
Definition: gnuplot.cpp:265
const char * GetStr() const
Definition: tm.h:368
char * CStr()
Definition: dt.h:476
TTmUnit
Definition: tm.h:11
bool IsEdge(const int &SrcNId, const int &DstNId) const
Tests whether an edge between node IDs SrcNId and DstNId exists in the graph.
Definition: graph.cpp:137
int CntDegNodes(const PGraph &Graph, const int &NodeDeg)
Returns the number of nodes with degree NodeDeg.
Definition: alg.h:105
bool IsKey(const TKey &Key) const
Definition: hash.h:216
static PGStatVec New(const TTmUnit &_TmUnit=tmu1Sec)
Definition: gstat.cpp:426
static PTimeNet LoadAmazon(const TStr &StlFNm)
Definition: timenet.cpp:563
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
PTimeNet GetSubGraph(const TIntV &NIdV) const
Definition: timenet.cpp:10
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:209
int Len() const
Definition: hash.h:186
TDat & AddDat(const TKey &Key)
Definition: hash.h:196
TEdgeI EndEI() const
Returns an iterator referring to the past-the-end edge in the network.
Definition: network.h:1253
PGStatVec TimeGrowth(const TTmUnit &TmUnit, const TFSet &TakeStat, const TSecTm &StartTm) const
Definition: timenet.cpp:88
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the network.
Definition: network.h:1202
int GetCmnNbrs(const PGraph &Graph, const int &NId1, const int &NId2)
Returns a number of shared neighbors between a pair of nodes NId1 and NId2.
Definition: triad.h:725
void Def()
Definition: xmath.cpp:339
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:210
bool IsCmt() const
Checks whether the current line is a comment (starts with '#').
Definition: ss.h:120
int CntOutDegNodes(const PGraph &Graph, const int &NodeOutDeg)
Returns the number of nodes with out-degree NodeOutDeg.
Definition: alg.h:96
TSecTm Round(const TTmUnit &TmUnit) const
Definition: tm.cpp:625
TPt< TTimeNENet > PTimeNENet
Definition: timenet.h:11
TSizeTy AddV(const TVec< TVal, TSizeTy > &ValV)
Adds the elements of the vector ValV to the to end of the vector.
Definition: ds.h:1056
void TakeBasicStat(const PGraph &Graph, const bool &IsMxWcc=false)
Definition: gstat.h:257
void SortByDat(const bool &Asc=true)
Definition: hash.h:250