SNAP Library 6.0, Developer Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
kronecker.cpp
Go to the documentation of this file.
1 #include "stdafx.h"
2 #include "kronecker.h"
3 
5 // Kronecker Graphs
6 const double TKronMtx::NInf = -DBL_MAX;
8 
9 TKronMtx::TKronMtx(const TFltV& SeedMatrix) : SeedMtx(SeedMatrix) {
10  MtxDim = (int) sqrt((double)SeedMatrix.Len());
12 }
13 
14 void TKronMtx::SaveTxt(const TStr& OutFNm) const {
15  FILE *F = fopen(OutFNm.CStr(), "wt");
16  for (int i = 0; i < GetDim(); i++) {
17  for (int j = 0; j < GetDim(); j++) {
18  if (j > 0) fprintf(F, "\t");
19  fprintf(F, "%f", At(i,j)); }
20  fprintf(F, "\n");
21  }
22  fclose(F);
23 }
24 
26  if (this != &Kronecker){
27  MtxDim=Kronecker.MtxDim;
28  SeedMtx=Kronecker.SeedMtx;
29  }
30  return *this;
31 }
32 
33 bool TKronMtx::IsProbMtx() const {
34  for (int i = 0; i < Len(); i++) {
35  if (At(i) < 0.0 || At(i) > 1.0) return false;
36  }
37  return true;
38 }
39 
40 void TKronMtx::SetRndMtx(const int& PrmMtxDim, const double& MinProb) {
41  MtxDim = PrmMtxDim;
43  for (int p = 0; p < SeedMtx.Len(); p++) {
44  do {
45  SeedMtx[p] = TKronMtx::Rnd.GetUniDev();
46  } while (SeedMtx[p] < MinProb);
47  }
48 }
49 
50 void TKronMtx::SetEpsMtx(const double& Eps1, const double& Eps0, const int& Eps1Val, const int& Eps0Val) {
51  for (int i = 0; i < Len(); i++) {
52  double& Val = At(i);
53  if (Val == Eps1Val) Val = double(Eps1);
54  else if (Val == Eps0Val) Val = double(Eps0);
55  }
56 }
57 
58 // scales parameter values to allow Edges
59 void TKronMtx::SetForEdges(const int& Nodes, const int& Edges) {
60  const int KronIter = GetKronIter(Nodes);
61  const double EZero = pow((double) Edges, 1.0/double(KronIter));
62  const double Factor = EZero / GetMtxSum();
63  for (int i = 0; i < Len(); i++) {
64  At(i) *= Factor;
65  if (At(i) > 1) { At(i) = 1; }
66  }
67 }
68 
69 void TKronMtx::AddRndNoise(const double& SDev) {
70  Dump("before");
71  double NewVal;
72  int c =0;
73  for (int i = 0; i < Len(); i++) {
74  for(c = 0; ((NewVal = At(i)*Rnd.GetNrmDev(1, SDev, 0.8, 1.2)) < 0.01 || NewVal>0.99) && c <1000; c++) { }
75  if (c < 999) { At(i) = NewVal; } else { printf("XXXXX\n"); }
76  }
77  Dump("after");
78 }
79 
81  TChA ChA("[");
82  for (int i = 0; i < Len(); i++) {
83  ChA += TStr::Fmt("%g", At(i));
84  if ((i+1)%GetDim()==0 && (i+1<Len())) { ChA += "; "; }
85  else if (i+1<Len()) { ChA += ", "; }
86  }
87  ChA += "]";
88  return TStr(ChA);
89 }
90 
92  for (int i = 0; i < Len(); i++) {
93  IAssert(At(i) >= 0.0 && At(i) <= 1.0);
94  At(i) = 1.0 - At(i);
95  }
96 }
97 
99  LLMtx.GenMtx(MtxDim);
100  for (int i = 0; i < Len(); i++) {
101  if (At(i) != 0.0) { LLMtx.At(i) = log(At(i)); }
102  else { LLMtx.At(i) = NInf; }
103  }
104 }
105 
107  ProbMtx.GenMtx(MtxDim);
108  for (int i = 0; i < Len(); i++) {
109  if (At(i) != NInf) { ProbMtx.At(i) = exp(At(i)); }
110  else { ProbMtx.At(i) = 0.0; }
111  }
112 }
113 
114 void TKronMtx::Swap(TKronMtx& KronMtx) {
115  ::Swap(MtxDim, KronMtx.MtxDim);
116  SeedMtx.Swap(KronMtx.SeedMtx);
117 }
118 
119 int TKronMtx::GetNodes(const int& NIter) const {
120  return (int) pow(double(GetDim()), double(NIter));
121 }
122 
123 int TKronMtx::GetEdges(const int& NIter) const {
124  return (int) pow(double(GetMtxSum()), double(NIter));
125 }
126 
127 int TKronMtx::GetKronIter(const int& Nodes) const {
128  return (int) ceil(log(double(Nodes)) / log(double(GetDim()))); // upper bound
129  //return (int) TMath::Round(log(double(Nodes)) / log(double(GetDim()))); // round to nearest power
130 }
131 
132 int TKronMtx::GetNZeroK(const PNGraph& Graph) const {
133  return GetNodes(GetKronIter(Graph->GetNodes()));
134 }
135 
136 double TKronMtx::GetEZero(const int& Edges, const int& KronIters) const {
137  return pow((double) Edges, 1.0/double(KronIters));
138 }
139 
140 double TKronMtx::GetMtxSum() const {
141  double Sum = 0;
142  for (int i = 0; i < Len(); i++) {
143  Sum += At(i); }
144  return Sum;
145 }
146 
147 double TKronMtx::GetRowSum(const int& RowId) const {
148  double Sum = 0;
149  for (int c = 0; c < GetDim(); c++) {
150  Sum += At(RowId, c); }
151  return Sum;
152 }
153 
154 double TKronMtx::GetColSum(const int& ColId) const {
155  double Sum = 0;
156  for (int r = 0; r < GetDim(); r++) {
157  Sum += At(r, ColId); }
158  return Sum;
159 }
160 
161 double TKronMtx::GetEdgeProb(int NId1, int NId2, const int& NKronIters) const {
162  double Prob = 1.0;
163  for (int level = 0; level < NKronIters; level++) {
164  Prob *= At(NId1 % MtxDim, NId2 % MtxDim);
165  if (Prob == 0.0) { return 0.0; }
166  NId1 /= MtxDim; NId2 /= MtxDim;
167  }
168  return Prob;
169 }
170 
171 double TKronMtx::GetNoEdgeProb(int NId1, int NId2, const int& NKronIters) const {
172  return 1.0 - GetEdgeProb(NId1, NId2, NKronIters);
173 }
174 
175 double TKronMtx::GetEdgeLL(int NId1, int NId2, const int& NKronIters) const {
176  double LL = 0.0;
177  for (int level = 0; level < NKronIters; level++) {
178  const double& LLVal = At(NId1 % MtxDim, NId2 % MtxDim);
179  if (LLVal == NInf) return NInf;
180  LL += LLVal;
181  NId1 /= MtxDim; NId2 /= MtxDim;
182  }
183  return LL;
184 }
185 
186 double TKronMtx::GetNoEdgeLL(int NId1, int NId2, const int& NKronIters) const {
187  return log(1.0 - exp(GetEdgeLL(NId1, NId2, NKronIters)));
188 }
189 
190 // 2nd order Taylor approximation log(1-x) ~ -x - 0.5x^2
191 double TKronMtx::GetApxNoEdgeLL(int NId1, int NId2, const int& NKronIters) const {
192  const double EdgeLL = GetEdgeLL(NId1, NId2, NKronIters);
193  return -exp(EdgeLL) - 0.5*exp(2*EdgeLL);
194 }
195 
196 bool TKronMtx::IsEdgePlace(int NId1, int NId2, const int& NKronIters, const double& ProbTresh) const {
197  double Prob = 1.0;
198  for (int level = 0; level < NKronIters; level++) {
199  Prob *= At(NId1 % MtxDim, NId2 % MtxDim);
200  if (ProbTresh > Prob) { return false; }
201  NId1 /= MtxDim; NId2 /= MtxDim;
202  }
203  return true;
204 }
205 
206 // deriv a*log(x) = a/x
207 double TKronMtx::GetEdgeDLL(const int& ParamId, int NId1, int NId2, const int& NKronIters) const {
208  const int ThetaX = ParamId % GetDim();
209  const int ThetaY = ParamId / GetDim();
210  int ThetaCnt = 0;
211  for (int level = 0; level < NKronIters; level++) {
212  if ((NId1 % MtxDim) == ThetaX && (NId2 % MtxDim) == ThetaY) {
213  ThetaCnt++; }
214  NId1 /= MtxDim; NId2 /= MtxDim;
215  }
216  return double(ThetaCnt) / exp(At(ParamId));
217 }
218 
219 // deriv log(1-x^a*y^b..) = -x'/(1-x) = (-a*x^(a-1)*y^b..) / (1-x^a*y^b..)
220 double TKronMtx::GetNoEdgeDLL(const int& ParamId, int NId1, int NId2, const int& NKronIters) const {
221  const int& ThetaX = ParamId % GetDim();
222  const int& ThetaY = ParamId / GetDim();
223  int ThetaCnt = 0;
224  double DLL = 0, LL = 0;
225  for (int level = 0; level < NKronIters; level++) {
226  const int X = NId1 % MtxDim;
227  const int Y = NId2 % MtxDim;
228  const double LVal = At(X, Y);
229  if (X == ThetaX && Y == ThetaY) {
230  if (ThetaCnt != 0) { DLL += LVal; }
231  ThetaCnt++;
232  } else { DLL += LVal; }
233  LL += LVal;
234  NId1 /= MtxDim; NId2 /= MtxDim;
235  }
236  return -ThetaCnt*exp(DLL) / (1.0 - exp(LL));
237 }
238 
239 // 2nd order Taylor approximation log(1-x) ~ -x - 0.5x^2
240 double TKronMtx::GetApxNoEdgeDLL(const int& ParamId, int NId1, int NId2, const int& NKronIters) const {
241  const int& ThetaX = ParamId % GetDim();
242  const int& ThetaY = ParamId / GetDim();
243  int ThetaCnt = 0;
244  double DLL = 0;//, LL = 0;
245  for (int level = 0; level < NKronIters; level++) {
246  const int X = NId1 % MtxDim;
247  const int Y = NId2 % MtxDim;
248  const double LVal = At(X, Y); IAssert(LVal > NInf);
249  if (X == ThetaX && Y == ThetaY) {
250  if (ThetaCnt != 0) { DLL += LVal; }
251  ThetaCnt++;
252  } else { DLL += LVal; }
253  //LL += LVal;
254  NId1 /= MtxDim; NId2 /= MtxDim;
255  }
256  //return -ThetaCnt*exp(DLL)*(1.0 + exp(LL)); // -x'/(1+x) WRONG!
257  // deriv = -(ax^(a-1)*y^b..) - a*x^(2a-1)*y^2b..
258  // = - (ax^(a-1)*y^b..) - a*x*(x^(a-1)*y^b..)^2
259  return -ThetaCnt*exp(DLL) - ThetaCnt*exp(At(ThetaX, ThetaY)+2*DLL);
260 }
261 
262 uint TKronMtx::GetNodeSig(const double& OneProb) {
263  uint Sig = 0;
264  for (int i = 0; i < (int)(8*sizeof(uint)); i++) {
265  if (TKronMtx::Rnd.GetUniDev() < OneProb) {
266  Sig |= (1u<<i); }
267  }
268  return Sig;
269 }
270 
271 double TKronMtx::GetEdgeProb(const uint& NId1Sig, const uint& NId2Sig, const int& NIter) const {
272  Assert(GetDim() == 2);
273  double Prob = 1.0;
274  for (int i = 0; i < NIter; i++) {
275  const uint Mask = (1u<<i);
276  const uint Bit1 = NId1Sig & Mask;
277  const uint Bit2 = NId2Sig & Mask;
278  Prob *= At(int(Bit1!=0), int(Bit2!=0));
279  }
280  return Prob;
281 }
282 
283 PNGraph TKronMtx::GenThreshGraph(const double& Thresh) const {
284  PNGraph Graph = TNGraph::New();
285  for (int i = 0; i < GetDim(); i++) {
286  Graph->AddNode(i); }
287  for (int r = 0; r < GetDim(); r++) {
288  for (int c = 0; c < GetDim(); c++) {
289  if (At(r, c) >= Thresh) { Graph->AddEdge(r, c); }
290  }
291  }
292  return Graph;
293 }
294 
295 PNGraph TKronMtx::GenRndGraph(const double& RndFact) const {
296  PNGraph Graph = TNGraph::New();
297  for (int i = 0; i < GetDim(); i++) {
298  Graph->AddNode(i); }
299  for (int r = 0; r < GetDim(); r++) {
300  for (int c = 0; c < GetDim(); c++) {
301  if (RndFact * At(r, c) >= TKronMtx::Rnd.GetUniDev()) { Graph->AddEdge(r, c); }
302  }
303  }
304  return Graph;
305 }
306 
307 int TKronMtx::GetKronIter(const int& GNodes, const int& SeedMtxSz) {
308  return (int) ceil(log(double(GNodes)) / log(double(SeedMtxSz)));
309 }
310 
311 // slow but exaxt procedure (we flip all O(N^2) edges)
312 PNGraph TKronMtx::GenKronecker(const TKronMtx& SeedMtx, const int& NIter, const bool& IsDir, const int& Seed) {
313  const TKronMtx& SeedGraph = SeedMtx;
314  const int NNodes = SeedGraph.GetNodes(NIter);
315  printf(" Kronecker: %d nodes, %s...\n", NNodes, IsDir ? "Directed":"UnDirected");
316  PNGraph Graph = TNGraph::New(NNodes, -1);
317  TExeTm ExeTm;
318  TRnd Rnd(Seed);
319  int edges = 0;
320  for (int node1 = 0; node1 < NNodes; node1++) {
321  Graph->AddNode(node1); }
322  if (IsDir) {
323  for (int node1 = 0; node1 < NNodes; node1++) {
324  for (int node2 = 0; node2 < NNodes; node2++) {
325  if (SeedGraph.IsEdgePlace(node1, node2, NIter, Rnd.GetUniDev())) {
326  Graph->AddEdge(node1, node2);
327  edges++;
328  }
329  }
330  if (node1 % 1000 == 0) printf("\r...%dk, %dk", node1/1000, edges/1000);
331  }
332  } else {
333  for (int node1 = 0; node1 < NNodes; node1++) {
334  for (int node2 = node1; node2 < NNodes; node2++) {
335  if (SeedGraph.IsEdgePlace(node1, node2, NIter, Rnd.GetUniDev())) {
336  Graph->AddEdge(node1, node2);
337  Graph->AddEdge(node2, node1);
338  edges++;
339  }
340  }
341  if (node1 % 1000 == 0) printf("\r...%dk, %dk", node1/1000, edges/1000);
342  }
343  }
344  printf("\r %d edges [%s]\n", Graph->GetEdges(), ExeTm.GetTmStr());
345  return Graph;
346 }
347 
348 // use RMat like recursive descent to quickly generate a Kronecker graph
349 PNGraph TKronMtx::GenFastKronecker(const TKronMtx& SeedMtx, const int& NIter, const bool& IsDir, const int& Seed) {
350  const TKronMtx& SeedGraph = SeedMtx;
351  const int MtxDim = SeedGraph.GetDim();
352  const double MtxSum = SeedGraph.GetMtxSum();
353  const int NNodes = SeedGraph.GetNodes(NIter);
354  const int NEdges = SeedGraph.GetEdges(NIter);
355  //const double DiagEdges = NNodes * pow(SeedGraph.At(0,0), double(NIter));
356  //const int NEdges = (int) TMath::Round(((pow(double(SeedGraph.GetMtxSum()), double(NIter)) - DiagEdges) /2.0));
357  printf(" FastKronecker: %d nodes, %d edges, %s...\n", NNodes, NEdges, IsDir ? "Directed":"UnDirected");
358  PNGraph Graph = TNGraph::New(NNodes, -1);
359  TRnd Rnd(Seed);
360  TExeTm ExeTm;
361  // prepare cell probability vector
362  TVec<TFltIntIntTr> ProbToRCPosV; // row, col position
363  double CumProb = 0.0;
364  for (int r = 0; r < MtxDim; r++) {
365  for (int c = 0; c < MtxDim; c++) {
366  const double Prob = SeedGraph.At(r, c);
367  if (Prob > 0.0) {
368  CumProb += Prob;
369  ProbToRCPosV.Add(TFltIntIntTr(CumProb/MtxSum, r, c));
370  }
371  }
372  }
373  // add nodes
374  for (int i = 0; i < NNodes; i++) {
375  Graph->AddNode(i); }
376  // add edges
377  int Rng, Row, Col, Collision=0, n = 0;
378  for (int edges = 0; edges < NEdges; ) {
379  Rng=NNodes; Row=0; Col=0;
380  for (int iter = 0; iter < NIter; iter++) {
381  const double& Prob = Rnd.GetUniDev();
382  n = 0; while(Prob > ProbToRCPosV[n].Val1) { n++; }
383  const int MtxRow = ProbToRCPosV[n].Val2;
384  const int MtxCol = ProbToRCPosV[n].Val3;
385  Rng /= MtxDim;
386  Row += MtxRow * Rng;
387  Col += MtxCol * Rng;
388  }
389  if (! Graph->IsEdge(Row, Col)) { // allow self-loops
390  Graph->AddEdge(Row, Col); edges++;
391  if (! IsDir) {
392  if (Row != Col) Graph->AddEdge(Col, Row);
393  edges++;
394  }
395  } else { Collision++; }
396  //if (edges % 1000 == 0) printf("\r...%dk", edges/1000);
397  }
398  //printf(" %d edges [%s]\n", Graph->GetEdges(), ExeTm.GetTmStr());
399  printf(" collisions: %d (%.4f)\n", Collision, Collision/(double)Graph->GetEdges());
400  return Graph;
401 }
402 
403 // use RMat like recursive descent to quickly generate a Kronecker graph
404 PNGraph TKronMtx::GenFastKronecker(const TKronMtx& SeedMtx, const int& NIter, const int& Edges, const bool& IsDir, const int& Seed) {
405  const TKronMtx& SeedGraph = SeedMtx;
406  const int MtxDim = SeedGraph.GetDim();
407  const double MtxSum = SeedGraph.GetMtxSum();
408  const int NNodes = SeedGraph.GetNodes(NIter);
409  const int NEdges = Edges;
410  //const double DiagEdges = NNodes * pow(SeedGraph.At(0,0), double(NIter));
411  //const int NEdges = (int) TMath::Round(((pow(double(SeedGraph.GetMtxSum()), double(NIter)) - DiagEdges) /2.0));
412  printf(" RMat Kronecker: %d nodes, %d edges, %s...\n", NNodes, NEdges, IsDir ? "Directed":"UnDirected");
413  PNGraph Graph = TNGraph::New(NNodes, -1);
414  TRnd Rnd(Seed);
415  TExeTm ExeTm;
416  // prepare cell probability vector
417  TVec<TFltIntIntTr> ProbToRCPosV; // row, col position
418  double CumProb = 0.0;
419  for (int r = 0; r < MtxDim; r++) {
420  for (int c = 0; c < MtxDim; c++) {
421  const double Prob = SeedGraph.At(r, c);
422  if (Prob > 0.0) {
423  CumProb += Prob;
424  ProbToRCPosV.Add(TFltIntIntTr(CumProb/MtxSum, r, c));
425  }
426  }
427  }
428  // add nodes
429  for (int i = 0; i < NNodes; i++) {
430  Graph->AddNode(i); }
431  // add edges
432  int Rng, Row, Col, Collision=0, n = 0;
433  for (int edges = 0; edges < NEdges; ) {
434  Rng=NNodes; Row=0; Col=0;
435  for (int iter = 0; iter < NIter; iter++) {
436  const double& Prob = Rnd.GetUniDev();
437  n = 0; while(Prob > ProbToRCPosV[n].Val1) { n++; }
438  const int MtxRow = ProbToRCPosV[n].Val2;
439  const int MtxCol = ProbToRCPosV[n].Val3;
440  Rng /= MtxDim;
441  Row += MtxRow * Rng;
442  Col += MtxCol * Rng;
443  }
444  if (! Graph->IsEdge(Row, Col)) { // allow self-loops
445  Graph->AddEdge(Row, Col); edges++;
446  if (! IsDir) {
447  if (Row != Col) Graph->AddEdge(Col, Row);
448  edges++;
449  }
450  } else { Collision++; }
451  //if (edges % 1000 == 0) printf("\r...%dk", edges/1000);
452  }
453  //printf(" %d edges [%s]\n", Graph->GetEdges(), ExeTm.GetTmStr());
454  printf(" collisions: %d (%.4f)\n", Collision, Collision/(double)Graph->GetEdges());
455  return Graph;
456 }
457 
458 PNGraph TKronMtx::GenDetKronecker(const TKronMtx& SeedMtx, const int& NIter, const bool& IsDir) {
459  const TKronMtx& SeedGraph = SeedMtx;
460  const int NNodes = SeedGraph.GetNodes(NIter);
461  printf(" Deterministic Kronecker: %d nodes, %s...\n", NNodes, IsDir ? "Directed":"UnDirected");
462  PNGraph Graph = TNGraph::New(NNodes, -1);
463  TExeTm ExeTm;
464  int edges = 0;
465  for (int node1 = 0; node1 < NNodes; node1++) { Graph->AddNode(node1); }
466 
467  for (int node1 = 0; node1 < NNodes; node1++) {
468  for (int node2 = 0; node2 < NNodes; node2++) {
469  if (SeedGraph.IsEdgePlace(node1, node2, NIter, Rnd.GetUniDev())) {
470  Graph->AddEdge(node1, node2);
471  edges++;
472  }
473  }
474  if (node1 % 1000 == 0) printf("\r...%dk, %dk", node1/1000, edges/1000);
475  }
476  return Graph;
477 }
478 
479 void TKronMtx::PlotCmpGraphs(const TKronMtx& SeedMtx, const PNGraph& Graph, const TStr& FNmPref, const TStr& Desc) {
480  const int KronIters = SeedMtx.GetKronIter(Graph->GetNodes());
481  PNGraph KronG, WccG;
482  const bool FastGen = true;
483  if (FastGen) { KronG = TKronMtx::GenFastKronecker(SeedMtx, KronIters, true, 0); }
484  else { KronG = TKronMtx::GenKronecker(SeedMtx, KronIters, true, 0); }
485  TSnap::DelZeroDegNodes(KronG);
486  WccG = TSnap::GetMxWcc(KronG);
487  const TStr Desc1 = TStr::Fmt("%s", Desc.CStr());
489  //gsdHops
490  //gsWccHops, gsdSngVal, gsdSngVec, gsdClustCf
491  GS.Add(Graph, TSecTm(1), TStr::Fmt("GRAPH G(%d, %d)", Graph->GetNodes(), Graph->GetEdges()));
492  GS.Add(KronG, TSecTm(2), TStr::Fmt("KRONECKER K(%d, %d)", KronG->GetNodes(), KronG->GetEdges()));
493  GS.Add(WccG, TSecTm(3), TStr::Fmt("KRONECKER wccK(%d, %d)", WccG->GetNodes(), WccG->GetEdges()));
494  const TStr Style = "linewidth 1 pointtype 6 pointsize 1";
495  GS.ImposeDistr(gsdInDeg, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
496  GS.ImposeDistr(gsdInDeg, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
497  GS.ImposeDistr(gsdOutDeg, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
498  GS.ImposeDistr(gsdOutDeg, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
499  GS.ImposeDistr(gsdHops, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
500  GS.ImposeDistr(gsdClustCf, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
501  GS.ImposeDistr(gsdClustCf, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
502  GS.ImposeDistr(gsdSngVal, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
503  GS.ImposeDistr(gsdSngVal, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
504  GS.ImposeDistr(gsdSngVec, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
505  GS.ImposeDistr(gsdSngVec, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
506  GS.ImposeDistr(gsdWcc, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
507  GS.ImposeDistr(gsdWcc, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
508  GS.ImposeDistr(gsdScc, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
509  GS.ImposeDistr(gsdScc, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
510 // typedef enum { distrUndef, distrInDeg, distrOutDeg, distrWcc, distrScc,
511 // distrHops, distrWccHops, distrSngVal, distrSngVec, distrClustCf, distrMx } TGraphDistr;*/
512 }
513 
514 void TKronMtx::PlotCmpGraphs(const TKronMtx& SeedMtx1, const TKronMtx& SeedMtx2, const PNGraph& Graph, const TStr& FNmPref, const TStr& Desc) {
515  const int KronIters1 = SeedMtx1.GetKronIter(Graph->GetNodes());
516  const int KronIters2 = SeedMtx2.GetKronIter(Graph->GetNodes());
517  PNGraph KronG1, KronG2;
518  const bool FastGen = true;
519  if (FastGen) {
520  KronG1 = TKronMtx::GenFastKronecker(SeedMtx1, KronIters1, true, 0);
521  KronG2 = TKronMtx::GenFastKronecker(SeedMtx2, KronIters2, false, 0); }
522  else {
523  KronG1 = TKronMtx::GenKronecker(SeedMtx1, KronIters1, true, 0);
524  KronG2 = TKronMtx::GenKronecker(SeedMtx2, KronIters2, true, 0); }
525  TSnap::DelZeroDegNodes(KronG1);
526  TSnap::DelZeroDegNodes(KronG2);
527  const TStr Desc1 = TStr::Fmt("%s", Desc.CStr());
529  //gsdHops
530  //gsWccHops, gsdSngVal, gsdSngVec, gsdClustCf
531  GS.Add(Graph, TSecTm(1), TStr::Fmt("GRAPH G(%d, %d)", Graph->GetNodes(), Graph->GetEdges()));
532  GS.Add(KronG1, TSecTm(2), TStr::Fmt("KRONECKER1 K(%d, %d) %s", KronG1->GetNodes(), KronG1->GetEdges(), SeedMtx1.GetMtxStr().CStr()));
533  GS.Add(KronG2, TSecTm(3), TStr::Fmt("KRONECKER2 K(%d, %d) %s", KronG2->GetNodes(), KronG2->GetEdges(), SeedMtx2.GetMtxStr().CStr()));
534  const TStr Style = "linewidth 1 pointtype 6 pointsize 1";
535  // raw data
536  GS.ImposeDistr(gsdInDeg, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
537  GS.ImposeDistr(gsdOutDeg, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
538  GS.ImposeDistr(gsdHops, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
539  GS.ImposeDistr(gsdClustCf, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
540  GS.ImposeDistr(gsdSngVal, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
541  GS.ImposeDistr(gsdSngVec, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
542  GS.ImposeDistr(gsdWcc, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
543  GS.ImposeDistr(gsdScc, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
544  GS.ImposeDistr(gsdTriadPart, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
545  // smooth
546  GS.ImposeDistr(gsdInDeg, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
547  GS.ImposeDistr(gsdOutDeg, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
548  GS.ImposeDistr(gsdClustCf, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
549  GS.ImposeDistr(gsdScc, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
550  GS.ImposeDistr(gsdWcc, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
551  GS.ImposeDistr(gsdSngVec, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
552  GS.ImposeDistr(gsdSngVal, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
553  GS.ImposeDistr(gsdTriadPart, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
554 }
555 
556 void TKronMtx::PlotCmpGraphs(const TVec<TKronMtx>& SeedMtxV, const PNGraph& Graph, const TStr& FNmPref, const TStr& Desc) {
557  const TStr Desc1 = TStr::Fmt("%s", Desc.CStr());
559  GS.Add(Graph, TSecTm(1), TStr::Fmt("GRAPH G(%d, %d)", Graph->GetNodes(), Graph->GetEdges()));
560  //gsdHops
561  //gsWccHops, gsdSngVal, gsdSngVec, gsdClustCf
562  for (int m = 0; m < SeedMtxV.Len(); m++) {
563  const int KronIters = SeedMtxV[m].GetKronIter(Graph->GetNodes());
564  PNGraph KronG1 = TKronMtx::GenFastKronecker(SeedMtxV[m], KronIters, true, 0);
565  printf("*** K(%d, %d) n0=%d\n", KronG1->GetNodes(), KronG1->GetEdges(), SeedMtxV[m].GetDim());
566  TSnap::DelZeroDegNodes(KronG1);
567  printf(" del zero deg K(%d, %d) n0=%d\n", KronG1->GetNodes(), KronG1->GetEdges(), m);
568  GS.Add(KronG1, TSecTm(m+2), TStr::Fmt("K(%d, %d) n0^k=%d n0=%d", KronG1->GetNodes(), KronG1->GetEdges(), SeedMtxV[m].GetNZeroK(Graph), SeedMtxV[m].GetDim()));
569  // plot after each Kronecker is done
570  const TStr Style = "linewidth 1 pointtype 6 pointsize 1";
571  GS.ImposeDistr(gsdInDeg, FNmPref, Desc1, false, false, gpwLines, Style);
572  GS.ImposeDistr(gsdInDeg, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
573  GS.ImposeDistr(gsdOutDeg, FNmPref, Desc1, false, false, gpwLines, Style);
574  GS.ImposeDistr(gsdOutDeg, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
575  GS.ImposeDistr(gsdHops, FNmPref, Desc1, false, false, gpwLines, Style);
576  GS.ImposeDistr(gsdClustCf, FNmPref, Desc1, false, false, gpwLines, Style);
577  GS.ImposeDistr(gsdClustCf, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
578  GS.ImposeDistr(gsdSngVal, FNmPref, Desc1, false, false, gpwLines, Style);
579  GS.ImposeDistr(gsdSngVal, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
580  GS.ImposeDistr(gsdSngVec, FNmPref, Desc1, false, false, gpwLines, Style);
581  GS.ImposeDistr(gsdSngVec, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
582  GS.ImposeDistr(gsdWcc, FNmPref, Desc1, false, false, gpwLines, Style);
583  GS.ImposeDistr(gsdWcc, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
584  GS.ImposeDistr(gsdScc, FNmPref, Desc1, false, false, gpwLines, Style);
585  GS.ImposeDistr(gsdScc, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
586  }
587  // typedef enum { distrUndef, distrInDeg, distrOutDeg, distrWcc, distrScc,
588  // distrHops, distrWccHops, distrSngVal, distrSngVec, distrClustCf, distrMx } TGraphDistr;*/
589 }
590 
591 void TKronMtx::KronMul(const TKronMtx& Left, const TKronMtx& Right, TKronMtx& Result) {
592  const int LDim = Left.GetDim();
593  const int RDim = Right.GetDim();
594  Result.GenMtx(LDim * RDim);
595  for (int r1 = 0; r1 < LDim; r1++) {
596  for (int c1 = 0; c1 < LDim; c1++) {
597  const double& Val = Left.At(r1, c1);
598  for (int r2 = 0; r2 < RDim; r2++) {
599  for (int c2 = 0; c2 < RDim; c2++) {
600  Result.At(r1*RDim+r2, c1*RDim+c2) = Val * Right.At(r2, c2);
601  }
602  }
603  }
604  }
605 }
606 
607 void TKronMtx::KronSum(const TKronMtx& Left, const TKronMtx& Right, TKronMtx& Result) {
608  const int LDim = Left.GetDim();
609  const int RDim = Right.GetDim();
610  Result.GenMtx(LDim * RDim);
611  for (int r1 = 0; r1 < LDim; r1++) {
612  for (int c1 = 0; c1 < LDim; c1++) {
613  const double& Val = Left.At(r1, c1);
614  for (int r2 = 0; r2 < RDim; r2++) {
615  for (int c2 = 0; c2 < RDim; c2++) {
616  if (Val == NInf || Right.At(r2, c2) == NInf) {
617  Result.At(r1*RDim+r2, c1*RDim+c2) = NInf; }
618  else {
619  Result.At(r1*RDim+r2, c1*RDim+c2) = Val + Right.At(r2, c2); }
620  }
621  }
622  }
623  }
624 }
625 
626 void TKronMtx::KronPwr(const TKronMtx& KronMtx, const int& NIter, TKronMtx& OutMtx) {
627  OutMtx = KronMtx;
628  TKronMtx NewOutMtx;
629  for (int iter = 0; iter < NIter; iter++) {
630  KronMul(OutMtx, KronMtx, NewOutMtx);
631  NewOutMtx.Swap(OutMtx);
632  }
633 
634 }
635 
636 void TKronMtx::Dump(const TStr& MtxNm, const bool& Sort) const {
637  /*printf("%s: %d x %d\n", MtxNm.Empty()?"Mtx":MtxNm.CStr(), GetDim(), GetDim());
638  for (int r = 0; r < GetDim(); r++) {
639  for (int c = 0; c < GetDim(); c++) { printf(" %8.2g", At(r, c)); }
640  printf("\n");
641  }*/
642  if (! MtxNm.Empty()) printf("%s\n", MtxNm.CStr());
643  double Sum=0.0;
644  TFltV ValV = SeedMtx;
645  if (Sort) { ValV.Sort(false); }
646  for (int i = 0; i < ValV.Len(); i++) {
647  printf(" %10.4g", ValV[i]());
648  Sum += ValV[i];
649  if ((i+1) % GetDim() == 0) { printf("\n"); }
650  }
651  printf(" (sum:%.4f)\n", Sum);
652 }
653 
654 // average difference in the parameters
655 double TKronMtx::GetAvgAbsErr(const TKronMtx& Kron1, const TKronMtx& Kron2) {
656  TFltV P1 = Kron1.GetMtx();
657  TFltV P2 = Kron2.GetMtx();
658  IAssert(P1.Len() == P2.Len());
659  P1.Sort(); P2.Sort();
660  double delta = 0.0;
661  for (int i = 0; i < P1.Len(); i++) {
662  delta += fabs(P1[i] - P2[i]);
663  }
664  return delta/P1.Len();
665 }
666 
667 // average L2 difference in the parameters
668 double TKronMtx::GetAvgFroErr(const TKronMtx& Kron1, const TKronMtx& Kron2) {
669  TFltV P1 = Kron1.GetMtx();
670  TFltV P2 = Kron2.GetMtx();
671  IAssert(P1.Len() == P2.Len());
672  P1.Sort(); P2.Sort();
673  double delta = 0.0;
674  for (int i = 0; i < P1.Len(); i++) {
675  delta += pow(P1[i] - P2[i], 2);
676  }
677  return sqrt(delta/P1.Len());
678 }
679 
680 // get matrix from matlab matrix notation
682  TStrV RowStrV, ColStrV;
683  MatlabMtxStr.ChangeChAll(',', ' ');
684  MatlabMtxStr.SplitOnAllCh(';', RowStrV); IAssert(! RowStrV.Empty());
685  RowStrV[0].SplitOnWs(ColStrV); IAssert(! ColStrV.Empty());
686  const int Rows = RowStrV.Len();
687  const int Cols = ColStrV.Len();
688  IAssert(Rows == Cols);
689  TKronMtx Mtx(Rows);
690  for (int r = 0; r < Rows; r++) {
691  RowStrV[r].SplitOnWs(ColStrV);
692  IAssert(ColStrV.Len() == Cols);
693  for (int c = 0; c < Cols; c++) {
694  Mtx.At(r, c) = (double) ColStrV[c].GetFlt(); }
695  }
696  return Mtx;
697 }
698 
699 TKronMtx TKronMtx::GetRndMtx(const int& Dim, const double& MinProb) {
700  TKronMtx Mtx;
701  Mtx.SetRndMtx(Dim, MinProb);
702  return Mtx;
703 }
704 
705 TKronMtx TKronMtx::GetInitMtx(const int& Dim, const int& Nodes, const int& Edges) {
706  const double MxParam = 0.8+TKronMtx::Rnd.GetUniDev()/5.0;
707  const double MnParam = 0.2-TKronMtx::Rnd.GetUniDev()/5.0;
708  const double Step = (MxParam-MnParam) / (Dim*Dim-1);
709  TFltV ParamV(Dim*Dim);
710  if (Dim == 1) { ParamV.PutAll(0.5); } // random graph
711  else {
712  for (int p = 0; p < ParamV.Len(); p++) {
713  ParamV[p] = MxParam - p*Step; }
714  }
715  //IAssert(ParamV[0]==MxParam && ParamV.Last()==MnParam);
716  TKronMtx Mtx(ParamV);
717  Mtx.SetForEdges(Nodes, Edges);
718  return Mtx;
719 }
720 
721 TKronMtx TKronMtx::GetInitMtx(const TStr& MtxStr, const int& Dim, const int& Nodes, const int& Edges) {
722  TKronMtx Mtx(Dim);
723  if (TCh::IsNum(MtxStr[0])) { Mtx = TKronMtx::GetMtx(MtxStr); }
724  else if (MtxStr[0] == 'r') { Mtx = TKronMtx::GetRndMtx(Dim, 0.1); }
725  else if (MtxStr[0] == 'a') {
726  const double Prob = TKronMtx::Rnd.GetUniDev();
727  if (Prob < 0.4) {
728  Mtx = TKronMtx::GetInitMtx(Dim, Nodes, Edges); }
729  else { // interpolate so that there are in the corners 0.9, 0.5, 0.1, 0.5
730  const double Max = 0.9+TKronMtx::Rnd.GetUniDev()/10.0;
731  const double Min = 0.1-TKronMtx::Rnd.GetUniDev()/10.0;
732  const double Med = (Max-Min)/2.0;
733  Mtx.At(0,0) = Max; Mtx.At(0,Dim-1) = Med;
734  Mtx.At(Dim-1, 0) = Med; Mtx.At(Dim-1, Dim-1) = Min;
735  for (int i = 1; i < Dim-1; i++) {
736  Mtx.At(i,i) = Max - double(i)*(Max-Min)/double(Dim-1);
737  Mtx.At(i, 0) = Mtx.At(0, i) = Max - double(i)*(Max-Med)/double(Dim-1);
738  Mtx.At(i, Dim-1) = Mtx.At(Dim-1, i) = Med - double(i)*(Med-Min)/double(Dim-1);
739  }
740  for (int i = 1; i < Dim-1; i++) {
741  for (int j = 1; j < Dim-1; j++) {
742  if (i >= j) { continue; }
743  Mtx.At(i,j) = Mtx.At(j,i) = Mtx.At(i,i) - (j-i)*(Mtx.At(i,i)-Mtx.At(i,Dim-1))/(Dim-i-1);
744  }
745  }
746  Mtx.AddRndNoise(0.1);
747  }
748  } else { FailR("Wrong mtx: matlab str, or random (r), or all (a)"); }
749  Mtx.SetForEdges(Nodes, Edges);
750  return Mtx;
751 }
752 
754  if (MtxNm == "3chain") return TKronMtx::GetMtx("1 1 0; 1 1 1; 0 1 1");
755  else if (MtxNm == "4star") return TKronMtx::GetMtx("1 1 1 1; 1 1 0 0 ; 1 0 1 0; 1 0 0 1");
756  else if (MtxNm == "4chain") return TKronMtx::GetMtx("1 1 0 0; 1 1 1 0 ; 0 1 1 1; 0 0 1 1");
757  else if (MtxNm == "4square") return TKronMtx::GetMtx("1 1 0 1; 1 1 1 0 ; 0 1 1 1; 1 0 1 1");
758  else if (MtxNm == "5star") return TKronMtx::GetMtx("1 1 1 1 1; 1 1 0 0 0; 1 0 1 0 0; 1 0 0 1 0; 1 0 0 0 1");
759  else if (MtxNm == "6star") return TKronMtx::GetMtx("1 1 1 1 1 1; 1 1 0 0 0 0; 1 0 1 0 0 0; 1 0 0 1 0 0; 1 0 0 0 1 0; 1 0 0 0 0 1");
760  else if (MtxNm == "7star") return TKronMtx::GetMtx("1 1 1 1 1 1 1; 1 1 0 0 0 0 0; 1 0 1 0 0 0 0; 1 0 0 1 0 0 0; 1 0 0 0 1 0 0; 1 0 0 0 0 1 0; 1 0 0 0 0 0 1");
761  else if (MtxNm == "5burst") return TKronMtx::GetMtx("1 1 1 1 0; 1 1 0 0 0; 1 0 1 0 0; 1 0 0 1 1; 0 0 0 1 1");
762  else if (MtxNm == "7burst") return TKronMtx::GetMtx("1 0 0 1 0 0 0; 0 1 0 1 0 0 0; 0 0 1 1 0 0 0; 1 1 1 1 1 0 0; 0 0 0 1 1 1 1; 0 0 0 0 1 1 0; 0 0 0 0 1 0 1");
763  else if (MtxNm == "7cross") return TKronMtx::GetMtx("1 0 0 1 0 0 0; 0 1 0 1 0 0 0; 0 0 1 1 0 0 0; 1 1 1 1 1 0 0; 0 0 0 1 1 1 0; 0 0 0 0 1 1 1; 0 0 0 0 0 1 1");
764  FailR(TStr::Fmt("Unknow matrix: '%s'", MtxNm.CStr()).CStr());
765  return TKronMtx();
766 }
767 
769  PSs Ss = TSs::LoadTxt(ssfTabSep, MtxFNm);
770  IAssertR(Ss->GetXLen() == Ss->GetYLen(), "Not a square matrix");
771  IAssert(Ss->GetYLen() == Ss->GetXLen());
772  TKronMtx Mtx(Ss->GetYLen());
773  for (int r = 0; r < Ss->GetYLen(); r++) {
774  for (int c = 0; c < Ss->GetXLen(); c++) {
775  Mtx.At(r, c) = (double) Ss->At(c, r).GetFlt(); }
776  }
777  return Mtx;
778 }
779 
780 
782 // Kronecker Log Likelihood
783 TKroneckerLL::TKroneckerLL(const PNGraph& GraphPt, const TFltV& ParamV, const double& PermPSwapNd): PermSwapNodeProb(PermPSwapNd) {
784  InitLL(GraphPt, TKronMtx(ParamV));
785 }
786 
787 TKroneckerLL::TKroneckerLL(const PNGraph& GraphPt, const TKronMtx& ParamMtx, const double& PermPSwapNd) : PermSwapNodeProb(PermPSwapNd) {
788  InitLL(GraphPt, ParamMtx);
789 }
790 
791 TKroneckerLL::TKroneckerLL(const PNGraph& GraphPt, const TKronMtx& ParamMtx, const TIntV& NodeIdPermV, const double& PermPSwapNd) : PermSwapNodeProb(PermPSwapNd) {
792  InitLL(GraphPt, ParamMtx);
793  NodePerm = NodeIdPermV;
795 }
796 
797 PKroneckerLL TKroneckerLL::New(const PNGraph& GraphPt, const TKronMtx& ParamMtx, const double& PermPSwapNd) {
798  return new TKroneckerLL(GraphPt, ParamMtx, PermPSwapNd);
799 }
800 
801 PKroneckerLL TKroneckerLL::New(const PNGraph& GraphPt, const TKronMtx& ParamMtx, const TIntV& NodeIdPermV, const double& PermPSwapNd) {
802  return new TKroneckerLL(GraphPt, ParamMtx, NodeIdPermV, PermPSwapNd);
803 }
804 
805 void TKroneckerLL::SetPerm(const char& PermId) {
806  if (PermId == 'o') { SetOrderPerm(); }
807  else if (PermId == 'd') { SetDegPerm(); }
808  else if (PermId == 'r') { SetRndPerm(); }
809  else if (PermId == 'b') { SetBestDegPerm(); }
810  else FailR("Unknown permutation type (o,d,r)");
811 }
812 
814  NodePerm.Gen(Nodes, 0);
815  for (int i = 0; i < Graph->GetNodes(); i++) {
816  NodePerm.Add(i); }
818 }
819 
821  NodePerm.Gen(Nodes, 0);
822  for (int i = 0; i < Graph->GetNodes(); i++) {
823  NodePerm.Add(i); }
824  NodePerm.Shuffle(TKronMtx::Rnd);
826 }
827 
829  TIntPrV DegNIdV;
830  for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
831  DegNIdV.Add(TIntPr(NI.GetDeg(), NI.GetId()));
832  }
833  DegNIdV.Sort(false);
834  NodePerm.Gen(DegNIdV.Len(), 0);
835  for (int i = 0; i < DegNIdV.Len(); i++) {
836  NodePerm.Add(DegNIdV[i].Val2);
837  }
839 }
840 
843  NodePerm.Gen(Nodes);
844  const int NZero = ProbMtx.GetDim();
845  TFltIntPrV DegV(Nodes), CDegV(Nodes);
846  TFltV Row(NZero);
847  TFltV Col(NZero);
848  for(int i = 0; i < NZero; i++) {
849  for(int j = 0; j < NZero; j++) {
850  Row[i] += ProbMtx.At(i, j);
851  Col[i] += ProbMtx.At(j, i);
852  }
853  }
854 
855  for(int i = 0; i < Nodes; i++) {
856  TNGraph::TNodeI NodeI = Graph->GetNI(i);
857  int NId = i;
858  double RowP = 1.0, ColP = 1.0;
859  for(int j = 0; j < KronIters; j++) {
860  int Bit = NId % NZero;
861  RowP *= Row[Bit]; ColP *= Col[Bit];
862  NId /= NZero;
863  }
864  CDegV[i] = TFltIntPr(RowP + ColP, i);
865  DegV[i] = TFltIntPr(NodeI.GetDeg(), i);
866  }
867  DegV.Sort(false); CDegV.Sort(false);
868  for(int i = 0; i < Nodes; i++) {
869  NodePerm[DegV[i].Val2] = CDegV[i].Val2;
870  }
872 }
873 
875 void TKroneckerLL::SetIPerm(const TIntV& Perm) {
876  InvertPerm.Gen(Perm.Len());
877  for (int i = 0; i < Perm.Len(); i++) {
878  InvertPerm[Perm[i]] = i;
879  }
880 }
881 
882 void TKroneckerLL::SetGraph(const PNGraph& GraphPt) {
883  Graph = GraphPt;
884  bool NodesOk = true;
885  // check that nodes IDs are {0,1,..,Nodes-1}
886  for (int nid = 0; nid < Graph->GetNodes(); nid++) {
887  if (! Graph->IsNode(nid)) { NodesOk=false; break; } }
888  if (! NodesOk) {
889  TIntV NIdV; GraphPt->GetNIdV(NIdV);
890  Graph = TSnap::GetSubGraph(GraphPt, NIdV, true);
891  for (int nid = 0; nid < Graph->GetNodes(); nid++) {
892  IAssert(Graph->IsNode(nid)); }
893  }
894  Nodes = Graph->GetNodes();
895  IAssert(LLMtx.GetDim() > 1 && LLMtx.Len() == ProbMtx.Len());
896  KronIters = (int) ceil(log(double(Nodes)) / log(double(ProbMtx.GetDim())));
897  // edge vector (for swap-edge permutation proposal)
898 // if (PermSwapNodeProb < 1.0) { /// !!!!! MYUNGHWAN, CHECK! WHY IS THIS COMMENTED OUT
899  GEdgeV.Gen(Graph->GetEdges(), 0);
900  for (TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
901  if (EI.GetSrcNId() != EI.GetDstNId()) {
902  GEdgeV.Add(TIntTr(EI.GetSrcNId(), EI.GetDstNId(), -1));
903  }
904  }
905 // }
906 
907  RealNodes = Nodes;
908  RealEdges = Graph->GetEdges();
909  LEdgeV = TIntTrV();
910  LSelfEdge = 0;
911 }
912 
913 
915  Nodes = (int) pow((double)ProbMtx.GetDim(), KronIters);
916  // add nodes until filling the Kronecker graph model
917  for (int nid = Graph->GetNodes(); nid < Nodes; nid++) {
918  Graph->AddNode(nid);
919  }
920 }
921 
923 void TKroneckerLL::RestoreGraph(const bool RestoreNodes) {
924  // remove from Graph
925  int NId1, NId2;
926  for (int e = 0; e < LEdgeV.Len(); e++) {
927  NId1 = LEdgeV[e].Val1; NId2 = LEdgeV[e].Val2;
928  Graph->DelEdge(NId1, NId2);
929 // GEdgeV.DelIfIn(LEdgeV[e]);
930  }
931  if(LEdgeV.Len() - LSelfEdge)
932  GEdgeV.Del(GEdgeV.Len() - LEdgeV.Len() + LSelfEdge, GEdgeV.Len() - 1);
933  LEdgeV.Clr();
934  LSelfEdge = 0;
935 
936  if(RestoreNodes) {
937  for(int i = Graph->GetNodes()-1; i >= RealNodes; i--) {
938  Graph->DelNode(i);
939  }
940  }
941 }
942 
944  // the number of times a seed matrix element appears in
945  // the full kronecker adjacency matrix after KronIter
946  // kronecker multiplications
947  double ElemCnt = 1;
948  const double dim = LLMtx.GetDim();
949  // count number of times x appears in the full kronecker matrix
950  for (int i = 1; i < KronIters; i++) {
951  ElemCnt = dim*dim*ElemCnt + TMath::Power(dim, 2*i);
952  }
953  return ElemCnt * LLMtx.GetMtxSum();
954 }
955 
956 double TKroneckerLL::GetFullRowLL(int RowId) const {
957  double RowLL = 0.0;
958  const int MtxDim = LLMtx.GetDim();
959  for (int level = 0; level < KronIters; level++) {
960  RowLL += LLMtx.GetRowSum(RowId % MtxDim);
961  RowId /= MtxDim;
962  }
963  return RowLL;
964 }
965 
966 double TKroneckerLL::GetFullColLL(int ColId) const {
967  double ColLL = 0.0;
968  const int MtxDim = LLMtx.GetDim();
969  for (int level = 0; level < KronIters; level++) {
970  ColLL += LLMtx.GetColSum(ColId % MtxDim);
971  ColId /= MtxDim;
972  }
973  return ColLL;
974 }
975 
977  double LL = 0;
978  for (int NId1 = 0; NId1 < LLMtx.GetNodes(KronIters); NId1++) {
979  for (int NId2 = 0; NId2 < LLMtx.GetNodes(KronIters); NId2++) {
980  LL = LL + LLMtx.GetNoEdgeLL(NId1, NId2, KronIters);
981  }
982  }
983  return LL;
984 }
985 
986 // 2nd prder Taylor approximation, log(1-x) ~ -x - 0.5x^2
988  double Sum=0.0, SumSq=0.0;
989  for (int i = 0; i < ProbMtx.Len(); i++) {
990  Sum += ProbMtx.At(i);
991  SumSq += TMath::Sqr(ProbMtx.At(i));
992  }
993  return -pow(Sum, KronIters) - 0.5*pow(SumSq, KronIters);
994 }
995 
996 void TKroneckerLL::InitLL(const TFltV& ParamV) {
997  InitLL(TKronMtx(ParamV));
998 }
999 
1000 void TKroneckerLL::InitLL(const TKronMtx& ParamMtx) {
1001  IAssert(ParamMtx.IsProbMtx());
1002  ProbMtx = ParamMtx;
1003  ProbMtx.GetLLMtx(LLMtx);
1005  if (GradV.Len() != ProbMtx.Len()) {
1006  GradV.Gen(ProbMtx.Len()); }
1007  GradV.PutAll(0.0);
1008 }
1009 
1010 void TKroneckerLL::InitLL(const PNGraph& GraphPt, const TKronMtx& ParamMtx) {
1011  IAssert(ParamMtx.IsProbMtx());
1012  ProbMtx = ParamMtx;
1013  ProbMtx.GetLLMtx(LLMtx);
1014  SetGraph(GraphPt);
1016  if (GradV.Len() != ProbMtx.Len()) {
1017  GradV.Gen(ProbMtx.Len()); }
1018  GradV.PutAll(0.0);
1019 }
1020 
1021 // exact graph log-likelihood, takes O(N^2 + E)
1023  LogLike = GetEmptyGraphLL(); // takes O(N^2)
1024  for (int nid = 0; nid < Nodes; nid++) {
1025  const TNGraph::TNodeI Node = Graph->GetNI(nid);
1026  const int SrcNId = NodePerm[nid];
1027  for (int e = 0; e < Node.GetOutDeg(); e++) {
1028  const int DstNId = NodePerm[Node.GetOutNId(e)];
1029  LogLike = LogLike - LLMtx.GetNoEdgeLL(SrcNId, DstNId, KronIters)
1030  + LLMtx.GetEdgeLL(SrcNId, DstNId, KronIters);
1031  }
1032  }
1033  return LogLike;
1034 }
1035 
1036 // approximate graph log-likelihood, takes O(E + N_0)
1038  LogLike = GetApxEmptyGraphLL(); // O(N_0)
1039  for (int nid = 0; nid < Nodes; nid++) {
1040  const TNGraph::TNodeI Node = Graph->GetNI(nid);
1041  const int SrcNId = NodePerm[nid];
1042  for (int e = 0; e < Node.GetOutDeg(); e++) {
1043  const int DstNId = NodePerm[Node.GetOutNId(e)];
1044  LogLike = LogLike - LLMtx.GetApxNoEdgeLL(SrcNId, DstNId, KronIters)
1045  + LLMtx.GetEdgeLL(SrcNId, DstNId, KronIters);
1046  }
1047  }
1048  return LogLike;
1049 }
1050 
1051 // Used in TKroneckerLL::SwapNodesLL: DeltaLL if we
1052 // add the node to the matrix (node gets/creates all
1053 // of its in- and out-edges).
1054 // Zero is for the empty row/column (isolated node)
1055 double TKroneckerLL::NodeLLDelta(const int& NId) const {
1056  if (! Graph->IsNode(NId)) { return 0.0; } // zero degree node
1057  double Delta = 0.0;
1058  const TNGraph::TNodeI Node = Graph->GetNI(NId);
1059  // out-edges
1060  const int SrcRow = NodePerm[NId];
1061  for (int e = 0; e < Node.GetOutDeg(); e++) {
1062  const int DstCol = NodePerm[Node.GetOutNId(e)];
1063  Delta += - LLMtx.GetApxNoEdgeLL(SrcRow, DstCol, KronIters)
1064  + LLMtx.GetEdgeLL(SrcRow, DstCol, KronIters);
1065  }
1066  //in-edges
1067  const int SrcCol = NodePerm[NId];
1068  for (int e = 0; e < Node.GetInDeg(); e++) {
1069  const int DstRow = NodePerm[Node.GetInNId(e)];
1070  Delta += - LLMtx.GetApxNoEdgeLL(DstRow, SrcCol, KronIters)
1071  + LLMtx.GetEdgeLL(DstRow, SrcCol, KronIters);
1072  }
1073  // double counted self-edge
1074  if (Graph->IsEdge(NId, NId)) {
1075  Delta += + LLMtx.GetApxNoEdgeLL(SrcRow, SrcCol, KronIters)
1076  - LLMtx.GetEdgeLL(SrcRow, SrcCol, KronIters);
1077  IAssert(SrcRow == SrcCol);
1078  }
1079  return Delta;
1080 }
1081 
1082 // swapping two nodes, only need to go over two rows and columns
1083 double TKroneckerLL::SwapNodesLL(const int& NId1, const int& NId2) {
1084  // subtract old LL (remove nodes)
1085  LogLike = LogLike - NodeLLDelta(NId1) - NodeLLDelta(NId2);
1086  const int PrevId1 = NodePerm[NId1], PrevId2 = NodePerm[NId2];
1087  // double-counted edges
1088  if (Graph->IsEdge(NId1, NId2)) {
1089  LogLike += - LLMtx.GetApxNoEdgeLL(PrevId1, PrevId2, KronIters)
1090  + LLMtx.GetEdgeLL(PrevId1, PrevId2, KronIters); }
1091  if (Graph->IsEdge(NId2, NId1)) {
1092  LogLike += - LLMtx.GetApxNoEdgeLL(PrevId2, PrevId1, KronIters)
1093  + LLMtx.GetEdgeLL(PrevId2, PrevId1, KronIters); }
1094  // swap
1095  NodePerm.Swap(NId1, NId2);
1096  InvertPerm.Swap(NodePerm[NId1], NodePerm[NId2]);
1097  // add new LL (add nodes)
1098  LogLike = LogLike + NodeLLDelta(NId1) + NodeLLDelta(NId2);
1099  const int NewId1 = NodePerm[NId1], NewId2 = NodePerm[NId2];
1100  // correct for double-counted edges
1101  if (Graph->IsEdge(NId1, NId2)) {
1102  LogLike += + LLMtx.GetApxNoEdgeLL(NewId1, NewId2, KronIters)
1103  - LLMtx.GetEdgeLL(NewId1, NewId2, KronIters); }
1104  if (Graph->IsEdge(NId2, NId1)) {
1105  LogLike += + LLMtx.GetApxNoEdgeLL(NewId2, NewId1, KronIters)
1106  - LLMtx.GetEdgeLL(NewId2, NewId1, KronIters); }
1107  return LogLike;
1108 }
1109 
1110 // metropolis sampling from P(permutation|graph)
1111 bool TKroneckerLL::SampleNextPerm(int& NId1, int& NId2) {
1112  // pick 2 uniform nodes and swap
1113  if (TKronMtx::Rnd.GetUniDev() < PermSwapNodeProb) {
1114  NId1 = TKronMtx::Rnd.GetUniDevInt(Nodes);
1115  NId2 = TKronMtx::Rnd.GetUniDevInt(Nodes);
1116  while (NId2 == NId1) { NId2 = TKronMtx::Rnd.GetUniDevInt(Nodes); }
1117  } else {
1118  // pick uniform edge and swap endpoints (slow as it moves around high degree nodes)
1119  const int e = TKronMtx::Rnd.GetUniDevInt(GEdgeV.Len());
1120  NId1 = GEdgeV[e].Val1; NId2 = GEdgeV[e].Val2;
1121  }
1122  const double U = TKronMtx::Rnd.GetUniDev();
1123  const double OldLL = LogLike;
1124  const double NewLL = SwapNodesLL(NId1, NId2);
1125  const double LogU = log(U);
1126  if (LogU > NewLL - OldLL) { // reject
1127  LogLike = OldLL;
1128  NodePerm.Swap(NId2, NId1); //swap back
1129  InvertPerm.Swap(NodePerm[NId2], NodePerm[NId1]); // swap back
1130  return false;
1131  }
1132  return true; // accept new sample
1133 }
1134 
1135 // exact gradient of an empty graph, O(N^2)
1136 double TKroneckerLL::GetEmptyGraphDLL(const int& ParamId) const {
1137  double DLL = 0.0;
1138  for (int NId1 = 0; NId1 < Nodes; NId1++) {
1139  for (int NId2 = 0; NId2 < Nodes; NId2++) {
1140  DLL += LLMtx.GetNoEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters);
1141  }
1142  }
1143  return DLL;
1144 }
1145 
1146 // approx gradient, using 2nd order Taylor approximation, O(N_0^2)
1147 double TKroneckerLL::GetApxEmptyGraphDLL(const int& ParamId) const {
1148  double Sum=0.0, SumSq=0.0;
1149  for (int i = 0; i < ProbMtx.Len(); i++) {
1150  Sum += ProbMtx.At(i);
1151  SumSq += TMath::Sqr(ProbMtx.At(i));
1152  }
1153  // d/dx -sum(x_i) - 0.5sum(x_i^2) = d/dx sum(theta)^k - 0.5 sum(theta^2)^k
1154  return -KronIters*pow(Sum, KronIters-1) - KronIters*pow(SumSq, KronIters-1)*ProbMtx.At(ParamId);
1155 }
1156 
1157 // exact graph gradient, runs O(N^2)
1159  for (int ParamId = 0; ParamId < LLMtx.Len(); ParamId++) {
1160  double DLL = 0.0;
1161  for (int NId1 = 0; NId1 < Nodes; NId1++) {
1162  for (int NId2 = 0; NId2 < Nodes; NId2++) {
1163  if (Graph->IsEdge(NId1, NId2)) {
1164  DLL += LLMtx.GetEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters);
1165  } else {
1166  DLL += LLMtx.GetNoEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters);
1167  }
1168  }
1169  }
1170  GradV[ParamId] = DLL;
1171  }
1172  return GradV;
1173 }
1174 
1175 // slow (but correct) approximate gradient, runs O(N^2)
1177  for (int ParamId = 0; ParamId < LLMtx.Len(); ParamId++) {
1178  double DLL = 0.0;
1179  for (int NId1 = 0; NId1 < Nodes; NId1++) {
1180  for (int NId2 = 0; NId2 < Nodes; NId2++) {
1181  if (Graph->IsEdge(NId1, NId2)) {
1182  DLL += LLMtx.GetEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters);
1183  } else {
1184  DLL += LLMtx.GetApxNoEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters);
1185  }
1186  }
1187  }
1188  GradV[ParamId] = DLL;
1189  }
1190  return GradV;
1191 }
1192 
1193 // fast approximate gradient, runs O(E)
1195  for (int ParamId = 0; ParamId < LLMtx.Len(); ParamId++) {
1196  double DLL = GetApxEmptyGraphDLL(ParamId);
1197  for (int nid = 0; nid < Nodes; nid++) {
1198  const TNGraph::TNodeI Node = Graph->GetNI(nid);
1199  const int SrcNId = NodePerm[nid];
1200  for (int e = 0; e < Node.GetOutDeg(); e++) {
1201  const int DstNId = NodePerm[Node.GetOutNId(e)];
1202  DLL = DLL - LLMtx.GetApxNoEdgeDLL(ParamId, SrcNId, DstNId, KronIters)
1203  + LLMtx.GetEdgeDLL(ParamId, SrcNId, DstNId, KronIters);
1204  }
1205  }
1206  GradV[ParamId] = DLL;
1207  }
1208  return GradV;
1209 }
1210 
1211 // Used in TKroneckerLL::UpdateGraphDLL: DeltaDLL if we
1212 // add the node to the empty matrix/graph (node
1213 // gets/creates all of its in- and out-edges).
1214 double TKroneckerLL::NodeDLLDelta(const int ParamId, const int& NId) const {
1215  if (! Graph->IsNode(NId)) { return 0.0; } // zero degree node
1216  double Delta = 0.0;
1217  const TNGraph::TNodeI Node = Graph->GetNI(NId);
1218  const int SrcRow = NodePerm[NId];
1219  for (int e = 0; e < Node.GetOutDeg(); e++) {
1220  const int DstCol = NodePerm[Node.GetOutNId(e)];
1221  Delta += - LLMtx.GetApxNoEdgeDLL(ParamId, SrcRow, DstCol, KronIters)
1222  + LLMtx.GetEdgeDLL(ParamId, SrcRow, DstCol, KronIters);
1223  }
1224  const int SrcCol = NodePerm[NId];
1225  for (int e = 0; e < Node.GetInDeg(); e++) {
1226  const int DstRow = NodePerm[Node.GetInNId(e)];
1227  Delta += - LLMtx.GetApxNoEdgeDLL(ParamId, DstRow, SrcCol, KronIters)
1228  + LLMtx.GetEdgeDLL(ParamId, DstRow, SrcCol, KronIters);
1229  }
1230  // double counter self-edge
1231  if (Graph->IsEdge(NId, NId)) {
1232  Delta += + LLMtx.GetApxNoEdgeDLL(ParamId, SrcRow, SrcCol, KronIters)
1233  - LLMtx.GetEdgeDLL(ParamId, SrcRow, SrcCol, KronIters);
1234  IAssert(SrcRow == SrcCol);
1235  }
1236  return Delta;
1237 }
1238 
1239 // given old DLL and new permutation, efficiently updates the DLL
1240 // permutation is new, but DLL is old
1241 void TKroneckerLL::UpdateGraphDLL(const int& SwapNId1, const int& SwapNId2) {
1242  for (int ParamId = 0; ParamId < LLMtx.Len(); ParamId++) {
1243  // permutation before the swap (swap back to previous position)
1244  NodePerm.Swap(SwapNId1, SwapNId2);
1245  // subtract old DLL
1246  TFlt& DLL = GradV[ParamId];
1247  DLL = DLL - NodeDLLDelta(ParamId, SwapNId1) - NodeDLLDelta(ParamId, SwapNId2);
1248  // double-counted edges
1249  const int PrevId1 = NodePerm[SwapNId1], PrevId2 = NodePerm[SwapNId2];
1250  if (Graph->IsEdge(SwapNId1, SwapNId2)) {
1251  DLL += - LLMtx.GetApxNoEdgeDLL(ParamId, PrevId1, PrevId2, KronIters)
1252  + LLMtx.GetEdgeDLL(ParamId, PrevId1, PrevId2, KronIters); }
1253  if (Graph->IsEdge(SwapNId2, SwapNId1)) {
1254  DLL += - LLMtx.GetApxNoEdgeDLL(ParamId, PrevId2, PrevId1, KronIters)
1255  + LLMtx.GetEdgeDLL(ParamId, PrevId2, PrevId1, KronIters); }
1256  // permutation after the swap (restore the swap)
1257  NodePerm.Swap(SwapNId1, SwapNId2);
1258  // add new DLL
1259  DLL = DLL + NodeDLLDelta(ParamId, SwapNId1) + NodeDLLDelta(ParamId, SwapNId2);
1260  const int NewId1 = NodePerm[SwapNId1], NewId2 = NodePerm[SwapNId2];
1261  // double-counted edges
1262  if (Graph->IsEdge(SwapNId1, SwapNId2)) {
1263  DLL += + LLMtx.GetApxNoEdgeDLL(ParamId, NewId1, NewId2, KronIters)
1264  - LLMtx.GetEdgeDLL(ParamId, NewId1, NewId2, KronIters); }
1265  if (Graph->IsEdge(SwapNId2, SwapNId1)) {
1266  DLL += + LLMtx.GetApxNoEdgeDLL(ParamId, NewId2, NewId1, KronIters)
1267  - LLMtx.GetEdgeDLL(ParamId, NewId2, NewId1, KronIters); }
1268  }
1269 }
1270 
1271 void TKroneckerLL::SampleGradient(const int& WarmUp, const int& NSamples, double& AvgLL, TFltV& AvgGradV) {
1272  printf("SampleGradient: %s (%s warm-up):", TInt::GetMegaStr(NSamples).CStr(), TInt::GetMegaStr(WarmUp).CStr());
1273  int NId1=0, NId2=0, NAccept=0;
1274  TExeTm ExeTm1;
1275  if (WarmUp > 0) {
1276  CalcApxGraphLL();
1277  for (int s = 0; s < WarmUp; s++) { SampleNextPerm(NId1, NId2); }
1278  printf(" warm-up:%s,", ExeTm1.GetTmStr()); ExeTm1.Tick();
1279  }
1280  CalcApxGraphLL(); // re-calculate LL (due to numerical errors)
1281  CalcApxGraphDLL();
1282  AvgLL = 0;
1283  AvgGradV.Gen(LLMtx.Len()); AvgGradV.PutAll(0.0);
1284  printf(" sampl");
1285  for (int s = 0; s < NSamples; s++) {
1286  if (SampleNextPerm(NId1, NId2)) { // new permutation
1287  UpdateGraphDLL(NId1, NId2); NAccept++; }
1288  for (int m = 0; m < LLMtx.Len(); m++) { AvgGradV[m] += GradV[m]; }
1289  AvgLL += GetLL();
1290  }
1291  printf("ing");
1292  AvgLL = AvgLL / double(NSamples);
1293  for (int m = 0; m < LLMtx.Len(); m++) {
1294  AvgGradV[m] = AvgGradV[m] / double(NSamples); }
1295  printf(":%s (%.0f/s), accept %.1f%%\n", ExeTm1.GetTmStr(), double(NSamples)/ExeTm1.GetSecs(),
1296  double(100*NAccept)/double(NSamples));
1297 }
1298 
1299 double TKroneckerLL::GradDescent(const int& NIter, const double& LrnRate, double MnStep, double MxStep, const int& WarmUp, const int& NSamples) {
1300  printf("\n----------------------------------------------------------------------\n");
1301  printf("Fitting graph on %d nodes, %d edges\n", Graph->GetNodes(), Graph->GetEdges());
1302  printf("Kron iters: %d (== %d nodes)\n\n", KronIters(), ProbMtx.GetNodes(KronIters()));
1303  TExeTm IterTm, TotalTm;
1304  double OldLL=-1e10, CurLL=0;
1305  const double EZero = pow((double) Graph->GetEdges(), 1.0/double(KronIters));
1306  TFltV CurGradV, LearnRateV(GetParams()), LastStep(GetParams());
1307  LearnRateV.PutAll(LrnRate);
1308  TKronMtx NewProbMtx = ProbMtx;
1309 
1310  if(DebugMode) {
1311  LLV.Gen(NIter, 0);
1312  MtxV.Gen(NIter, 0);
1313  }
1314 
1315  for (int Iter = 0; Iter < NIter; Iter++) {
1316  printf("%03d] ", Iter);
1317  SampleGradient(WarmUp, NSamples, CurLL, CurGradV);
1318  for (int p = 0; p < GetParams(); p++) {
1319  LearnRateV[p] *= 0.95;
1320  if (Iter < 1) {
1321  while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; }
1322  while (fabs(LearnRateV[p]*CurGradV[p]) < 0.02) { LearnRateV[p] *= (1.0/0.95); } // move more
1323  } else {
1324  // set learn rate so that move for each parameter is inside the [MnStep, MxStep]
1325  while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; printf(".");}
1326  while (fabs(LearnRateV[p]*CurGradV[p]) < MnStep) { LearnRateV[p] *= (1.0/0.95); printf("*");}
1327  if (MxStep > 3*MnStep) { MxStep *= 0.95; }
1328  }
1329  NewProbMtx.At(p) = ProbMtx.At(p) + LearnRateV[p]*CurGradV[p];
1330  if (NewProbMtx.At(p) > 0.9999) { NewProbMtx.At(p)=0.9999; }
1331  if (NewProbMtx.At(p) < 0.0001) { NewProbMtx.At(p)=0.0001; }
1332  }
1333  printf(" trueE0: %.2f (%d), estE0: %.2f (%d), ERR: %f\n", EZero, Graph->GetEdges(),
1335  printf(" currLL: %.4f, deltaLL: %.4f\n", CurLL, CurLL-OldLL); // positive is good
1336  for (int p = 0; p < GetParams(); p++) {
1337  printf(" %d] %f <-- %f + %9f Grad: %9.1f Rate: %g\n", p, NewProbMtx.At(p),
1338  ProbMtx.At(p), (double)(LearnRateV[p]*CurGradV[p]), CurGradV[p](), LearnRateV[p]());
1339  }
1340  if (Iter+1 < NIter) { // skip last update
1341  ProbMtx = NewProbMtx; ProbMtx.GetLLMtx(LLMtx); }
1342  OldLL=CurLL;
1343  printf("\n"); fflush(stdout);
1344 
1345  if(DebugMode) {
1346  LLV.Add(CurLL);
1347  MtxV.Add(NewProbMtx);
1348  }
1349  }
1350  printf("TotalExeTm: %s %g\n", TotalTm.GetStr(), TotalTm.GetSecs());
1351  ProbMtx.Dump("FITTED PARAMS", false);
1352  return CurLL;
1353 }
1354 
1355 double TKroneckerLL::GradDescent2(const int& NIter, const double& LrnRate, double MnStep, double MxStep, const int& WarmUp, const int& NSamples) {
1356  printf("\n----------------------------------------------------------------------\n");
1357  printf("GradDescent2\n");
1358  printf("Fitting graph on %d nodes, %d edges\n", Graph->GetNodes(), Graph->GetEdges());
1359  printf("Skip moves that make likelihood smaller\n");
1360  printf("Kron iters: %d (== %d nodes)\n\n", KronIters(), ProbMtx.GetNodes(KronIters()));
1361  TExeTm IterTm, TotalTm;
1362  double CurLL=0, NewLL=0;
1363  const double EZero = pow((double) Graph->GetEdges(), 1.0/double(KronIters));
1364  TFltV CurGradV, NewGradV, LearnRateV(GetParams()), LastStep(GetParams());
1365  LearnRateV.PutAll(LrnRate);
1366  TKronMtx NewProbMtx=ProbMtx, CurProbMtx=ProbMtx;
1367  bool GoodMove = false;
1368  // Start
1369  for (int Iter = 0; Iter < NIter; Iter++) {
1370  printf("%03d] ", Iter);
1371  if (! GoodMove) { SampleGradient(WarmUp, NSamples, CurLL, CurGradV); }
1372  CurProbMtx = ProbMtx;
1373  // update parameters
1374  for (int p = 0; p < GetParams(); p++) {
1375  while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; printf(".");}
1376  while (fabs(LearnRateV[p]*CurGradV[p]) < MnStep) { LearnRateV[p] *= (1.0/0.95); printf("*");}
1377  NewProbMtx.At(p) = CurProbMtx.At(p) + LearnRateV[p]*CurGradV[p];
1378  if (NewProbMtx.At(p) > 0.9999) { NewProbMtx.At(p)=0.9999; }
1379  if (NewProbMtx.At(p) < 0.0001) { NewProbMtx.At(p)=0.0001; }
1380  LearnRateV[p] *= 0.95;
1381  }
1382  printf(" ");
1383  ProbMtx=NewProbMtx; ProbMtx.GetLLMtx(LLMtx);
1384  SampleGradient(WarmUp, NSamples, NewLL, NewGradV);
1385  if (NewLL > CurLL) { // accept the move
1386  printf("== Good move:\n");
1387  printf(" trueE0: %.2f (%d), estE0: %.2f (%d), ERR: %f\n", EZero, Graph->GetEdges(),
1389  printf(" currLL: %.4f deltaLL: %.4f\n", CurLL, NewLL-CurLL); // positive is good
1390  for (int p = 0; p < GetParams(); p++) {
1391  printf(" %d] %f <-- %f + %9f Grad: %9.1f Rate: %g\n", p, NewProbMtx.At(p),
1392  CurProbMtx.At(p), (double)(LearnRateV[p]*CurGradV[p]), CurGradV[p](), LearnRateV[p]()); }
1393  CurLL = NewLL;
1394  CurGradV = NewGradV;
1395  GoodMove = true;
1396  } else {
1397  printf("** BAD move:\n");
1398  printf(" *trueE0: %.2f (%d), estE0: %.2f (%d), ERR: %f\n", EZero, Graph->GetEdges(),
1400  printf(" *curLL: %.4f deltaLL: %.4f\n", CurLL, NewLL-CurLL); // positive is good
1401  for (int p = 0; p < GetParams(); p++) {
1402  printf(" b%d] %f <-- %f + %9f Grad: %9.1f Rate: %g\n", p, NewProbMtx.At(p),
1403  CurProbMtx.At(p), (double)(LearnRateV[p]*CurGradV[p]), CurGradV[p](), LearnRateV[p]()); }
1404  // move to old position
1405  ProbMtx = CurProbMtx; ProbMtx.GetLLMtx(LLMtx);
1406  GoodMove = false;
1407  }
1408  printf("\n"); fflush(stdout);
1409  }
1410  printf("TotalExeTm: %s %g\n", TotalTm.GetStr(), TotalTm.GetSecs());
1411  ProbMtx.Dump("FITTED PARAMS\n", false);
1412  return CurLL;
1413 }
1414 
1416 // filling in random edges for KronEM
1417 void TKroneckerLL::SetRandomEdges(const int& NEdges, const bool isDir) {
1418  int count = 0, added = 0, collision = 0;
1419  const int MtxDim = ProbMtx.GetDim();
1420  const double MtxSum = ProbMtx.GetMtxSum();
1421  TVec<TFltIntIntTr> ProbToRCPosV; // row, col position
1422  double CumProb = 0.0;
1423 
1424  for(int r = 0; r < MtxDim; r++) {
1425  for(int c = 0; c < MtxDim; c++) {
1426  const double Prob = ProbMtx.At(r, c);
1427  if (Prob > 0.0) {
1428  CumProb += Prob;
1429  ProbToRCPosV.Add(TFltIntIntTr(CumProb/MtxSum, r, c));
1430  }
1431  }
1432  }
1433 
1434  int Rng, Row, Col, n, NId1, NId2;
1435  while(added < NEdges) {
1436  Rng = Nodes; Row = 0; Col = 0;
1437  for (int iter = 0; iter < KronIters; iter++) {
1438  const double& Prob = TKronMtx::Rnd.GetUniDev();
1439  n = 0; while(Prob > ProbToRCPosV[n].Val1) { n++; }
1440  const int MtxRow = ProbToRCPosV[n].Val2;
1441  const int MtxCol = ProbToRCPosV[n].Val3;
1442  Rng /= MtxDim;
1443  Row += MtxRow * Rng;
1444  Col += MtxCol * Rng;
1445  }
1446 
1447  count++;
1448 
1449  NId1 = InvertPerm[Row]; NId2 = InvertPerm[Col];
1450 
1451  // Check conflicts
1452  if(EMType != kronEdgeMiss && IsObsEdge(NId1, NId2)) {
1453  continue;
1454  }
1455 
1456  if (! Graph->IsEdge(NId1, NId2)) {
1457  Graph->AddEdge(NId1, NId2);
1458  if(NId1 != NId2) { GEdgeV.Add(TIntTr(NId1, NId2, LEdgeV.Len())); }
1459  else { LSelfEdge++; }
1460  LEdgeV.Add(TIntTr(NId1, NId2, GEdgeV.Len()-1));
1461  added++;
1462  if (! isDir) {
1463  if (NId1 != NId2) {
1464  Graph->AddEdge(NId2, NId1);
1465  GEdgeV.Add(TIntTr(NId2, NId1, LEdgeV.Len()));
1466  LEdgeV.Add(TIntTr(NId2, NId1, GEdgeV.Len()-1));
1467  added++;
1468  }
1469  }
1470  } else { collision ++; }
1471  }
1472 // printf("total = %d / added = %d / collision = %d\n", count, added, collision);
1473 }
1474 
1475 // sampling setup for KronEM
1476 void TKroneckerLL::MetroGibbsSampleSetup(const int& WarmUp) {
1477  double alpha = log(ProbMtx.GetMtxSum()) / log(double(ProbMtx.GetDim()));
1478  int NId1 = 0, NId2 = 0;
1479  int NMissing;
1480 
1481  RestoreGraph(false);
1482  if(EMType == kronEdgeMiss) {
1483  CalcApxGraphLL();
1484  for (int s = 0; s < WarmUp; s++) SampleNextPerm(NId1, NId2);
1485  }
1486 
1487  if(EMType == kronFutureLink) {
1488  NMissing = (int) (pow(ProbMtx.GetMtxSum(), KronIters) - pow(double(RealNodes), alpha));
1489  } else if(EMType == kronEdgeMiss) {
1490  NMissing = MissEdges;
1491  } else {
1492  NMissing = (int) (pow(ProbMtx.GetMtxSum(), KronIters) * (1.0 - pow(double(RealNodes) / double(Nodes), 2)));
1493  }
1494  NMissing = (NMissing < 1) ? 1 : NMissing;
1495 
1496  SetRandomEdges(NMissing, true);
1497 
1498  CalcApxGraphLL();
1499  for (int s = 0; s < WarmUp; s++) SampleNextPerm(NId1, NId2);
1500 }
1501 
1502 // Metropolis-Hastings steps for KronEM
1503 void TKroneckerLL::MetroGibbsSampleNext(const int& WarmUp, const bool DLLUpdate) {
1504  int NId1 = 0, NId2 = 0, hit = 0, GId = 0;
1505  TIntTr EdgeToRemove, NewEdge;
1506  double RndAccept;
1507 
1508  if(LEdgeV.Len()) {
1509  for(int i = 0; i < WarmUp; i++) {
1510  hit = TKronMtx::Rnd.GetUniDevInt(LEdgeV.Len());
1511 
1512  NId1 = LEdgeV[hit].Val1; NId2 = LEdgeV[hit].Val2;
1513  GId = LEdgeV[hit].Val3;
1514  SetRandomEdges(1, true);
1515  NewEdge = LEdgeV.Last();
1516 
1517  RndAccept = (1.0 - exp(LLMtx.GetEdgeLL(NewEdge.Val1, NewEdge.Val2, KronIters))) / (1.0 - exp(LLMtx.GetEdgeLL(NId1, NId2, KronIters)));
1518  RndAccept = (RndAccept > 1.0) ? 1.0 : RndAccept;
1519 
1520  if(TKronMtx::Rnd.GetUniDev() > RndAccept) { // reject
1521  Graph->DelEdge(NewEdge.Val1, NewEdge.Val2);
1522  if(NewEdge.Val1 != NewEdge.Val2) { GEdgeV.DelLast(); }
1523  else { LSelfEdge--; }
1524  LEdgeV.DelLast();
1525  } else { // accept
1526  Graph->DelEdge(NId1, NId2);
1527  LEdgeV[hit] = LEdgeV.Last();
1528  LEdgeV.DelLast();
1529  if(NId1 == NId2) {
1530  LSelfEdge--;
1531  if(NewEdge.Val1 != NewEdge.Val2) {
1532  GEdgeV[GEdgeV.Len()-1].Val3 = hit;
1533  }
1534  } else {
1535  IAssertR(GEdgeV.Last().Val3 >= 0, "Invalid indexing");
1536 
1537  GEdgeV[GId] = GEdgeV.Last();
1538  if(NewEdge.Val1 != NewEdge.Val2) {
1539  GEdgeV[GId].Val3 = hit;
1540  }
1541  LEdgeV[GEdgeV[GId].Val3].Val3 = GId;
1542  GEdgeV.DelLast();
1543  }
1544 
1545  LogLike += LLMtx.GetApxNoEdgeLL(EdgeToRemove.Val1, EdgeToRemove.Val2, KronIters) - LLMtx.GetEdgeLL(EdgeToRemove.Val1, EdgeToRemove.Val2, KronIters);
1546  LogLike += -LLMtx.GetApxNoEdgeLL(NewEdge.Val1, NewEdge.Val2, KronIters) + LLMtx.GetEdgeLL(NewEdge.Val1, NewEdge.Val2, KronIters);
1547 
1548  if(DLLUpdate) {
1549  for (int p = 0; p < LLMtx.Len(); p++) {
1550  GradV[p] += LLMtx.GetApxNoEdgeDLL(p, EdgeToRemove.Val1, EdgeToRemove.Val2, KronIters) - LLMtx.GetEdgeDLL(p, EdgeToRemove.Val1, EdgeToRemove.Val2, KronIters);
1551  GradV[p] += -LLMtx.GetApxNoEdgeDLL(p, NewEdge.Val1, NewEdge.Val2, KronIters) + LLMtx.GetEdgeDLL(p, NewEdge.Val1, NewEdge.Val2, KronIters);
1552  }
1553  }
1554  }
1555  }
1556  }
1557 
1558 // CalcApxGraphLL();
1559  for (int s = 0; s < WarmUp; s++) {
1560  if(SampleNextPerm(NId1, NId2)) {
1561  if(DLLUpdate) UpdateGraphDLL(NId1, NId2);
1562  }
1563  }
1564 }
1565 
1566 // E-step in KronEM
1567 void TKroneckerLL::RunEStep(const int& GibbsWarmUp, const int& WarmUp, const int& NSamples, TFltV& LLV, TVec<TFltV>& DLLV) {
1568  TExeTm ExeTm, TotalTm;
1569  LLV.Gen(NSamples, 0);
1570  DLLV.Gen(NSamples, 0);
1571 
1572  ExeTm.Tick();
1573  for(int i = 0; i < 2; i++) MetroGibbsSampleSetup(WarmUp);
1574  printf(" Warm-Up [%u] : %s\n", WarmUp, ExeTm.GetTmStr());
1575  CalcApxGraphLL();
1576  for(int i = 0; i < GibbsWarmUp; i++) MetroGibbsSampleNext(10, false);
1577  printf(" Gibbs Warm-Up [%u] : %s\n", GibbsWarmUp, ExeTm.GetTmStr());
1578 
1579  ExeTm.Tick();
1580  CalcApxGraphLL();
1581  CalcApxGraphDLL();
1582  for(int i = 0; i < NSamples; i++) {
1583  MetroGibbsSampleNext(50, false);
1584 
1585  LLV.Add(LogLike);
1586  DLLV.Add(GradV);
1587 
1588  int OnePercent = (i+1) % (NSamples / 10);
1589  if(OnePercent == 0) {
1590  int TenPercent = ((i+1) / (NSamples / 10)) * 10;
1591  printf(" %3u%% done : %s\n", TenPercent, ExeTm.GetTmStr());
1592  }
1593  }
1594 }
1595 
1596 // M-step in KronEM
1597 double TKroneckerLL::RunMStep(const TFltV& LLV, const TVec<TFltV>& DLLV, const int& GradIter, const double& LrnRate, double MnStep, double MxStep) {
1598  TExeTm IterTm, TotalTm;
1599  double OldLL=LogLike, CurLL=0;
1600  const double alpha = log(double(RealEdges)) / log(double(RealNodes));
1601  const double EZero = pow(double(ProbMtx.GetDim()), alpha);
1602 
1603  TFltV CurGradV(GetParams()), LearnRateV(GetParams()), LastStep(GetParams());
1604  LearnRateV.PutAll(LrnRate);
1605 
1606  TKronMtx NewProbMtx = ProbMtx;
1607  const int NSamples = LLV.Len();
1608  const int ReCalcLen = NSamples / 10;
1609 
1610  for (int s = 0; s < LLV.Len(); s++) {
1611  CurLL += LLV[s];
1612  for(int p = 0; p < GetParams(); p++) { CurGradV[p] += DLLV[s][p]; }
1613  }
1614  CurLL /= NSamples;
1615  for(int p = 0; p < GetParams(); p++) { CurGradV[p] /= NSamples; }
1616 
1617  double MaxLL = CurLL;
1618  TKronMtx MaxProbMtx = ProbMtx;
1619  TKronMtx OldProbMtx = ProbMtx;
1620 
1621  for (int Iter = 0; Iter < GradIter; Iter++) {
1622  printf(" %03d] ", Iter+1);
1623  IterTm.Tick();
1624 
1625  for (int p = 0; p < GetParams(); p++) {
1626  if (Iter < 1) {
1627  while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; }
1628  while (fabs(LearnRateV[p]*CurGradV[p]) < 5 * MnStep) { LearnRateV[p] *= (1.0/0.95); } // move more
1629  } else {
1630  // set learn rate so that move for each parameter is inside the [MnStep, MxStep]
1631  while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; printf(".");}
1632  while (fabs(LearnRateV[p]*CurGradV[p]) < MnStep) { LearnRateV[p] *= (1.0/0.95); printf("*");}
1633  if (MxStep > 3*MnStep) { MxStep *= 0.95; }
1634  }
1635  NewProbMtx.At(p) = ProbMtx.At(p) + LearnRateV[p]*CurGradV[p];
1636  if (NewProbMtx.At(p) > 0.9999) { NewProbMtx.At(p)=0.9999; }
1637  if (NewProbMtx.At(p) < 0.0001) { NewProbMtx.At(p)=0.0001; }
1638  LearnRateV[p] *= 0.95;
1639  }
1640  printf(" trueE0: %.2f (%u from %u), estE0: %.2f (%u from %u), ERR: %f\n", EZero, RealEdges(), RealNodes(), ProbMtx.GetMtxSum(), Graph->GetEdges(), Graph->GetNodes(), fabs(EZero-ProbMtx.GetMtxSum()));
1641  printf(" currLL: %.4f, deltaLL: %.4f\n", CurLL, CurLL-OldLL); // positive is good
1642  for (int p = 0; p < GetParams(); p++) {
1643  printf(" %d] %f <-- %f + %9f Grad: %9.1f Rate: %g\n", p, NewProbMtx.At(p),
1644  ProbMtx.At(p), (double)(LearnRateV[p]*CurGradV[p]), CurGradV[p](), LearnRateV[p]());
1645  }
1646 
1647  OldLL=CurLL;
1648  if(Iter == GradIter - 1) {
1649  break;
1650  }
1651 
1652  CurLL = 0;
1653  CurGradV.PutAll(0.0);
1654  TFltV OneDLL;
1655 
1656  CalcApxGraphLL();
1657  CalcApxGraphDLL();
1658 
1659  for(int s = 0; s < NSamples; s++) {
1660  ProbMtx = OldProbMtx; ProbMtx.GetLLMtx(LLMtx);
1661  MetroGibbsSampleNext(10, true);
1662  ProbMtx = NewProbMtx; ProbMtx.GetLLMtx(LLMtx);
1663  if(s % ReCalcLen == ReCalcLen/2) {
1664  CurLL += CalcApxGraphLL();
1665  OneDLL = CalcApxGraphDLL();
1666  } else {
1667  CurLL += LogLike;
1668  OneDLL = GradV;
1669  }
1670  for(int p = 0; p < GetParams(); p++) {
1671  CurGradV[p] += OneDLL[p];
1672  }
1673  }
1674  CurLL /= NSamples;
1675 
1676  if(MaxLL < CurLL) {
1677  MaxLL = CurLL; MaxProbMtx = ProbMtx;
1678  }
1679 
1680  printf(" Time: %s\n", IterTm.GetTmStr());
1681  printf("\n"); fflush(stdout);
1682  }
1683  ProbMtx = MaxProbMtx; ProbMtx.GetLLMtx(LLMtx);
1684 
1685  printf(" FinalLL : %f, TotalExeTm: %s\n", MaxLL, TotalTm.GetTmStr());
1686  ProbMtx.Dump(" FITTED PARAMS", false);
1687 
1688  return MaxLL;
1689 }
1690 
1691 // KronEM
1692 void TKroneckerLL::RunKronEM(const int& EMIter, const int& GradIter, double LrnRate, double MnStep, double MxStep, const int& GibbsWarmUp, const int& WarmUp, const int& NSamples, const TKronEMType& Type, const int& NMissing) {
1693  printf("\n----------------------------------------------------------------------\n");
1694  printf("Fitting graph on %d nodes, %d edges\n", int(RealNodes), int(RealEdges));
1695  printf("Kron iters: %d (== %d nodes)\n\n", KronIters(), ProbMtx.GetNodes(KronIters()));
1696 
1697  TFltV LLV(NSamples);
1698  TVec<TFltV> DLLV(NSamples);
1699  //int count = 0;
1700 
1701  EMType = Type;
1702  MissEdges = NMissing;
1703  AppendIsoNodes();
1704  SetRndPerm();
1705 
1706  if(DebugMode) {
1707  LLV.Gen(EMIter, 0);
1708  MtxV.Gen(EMIter, 0);
1709  }
1710 
1711  for(int i = 0; i < EMIter; i++) {
1712  printf("\n----------------------------------------------------------------------\n");
1713  printf("%03d EM-iter] E-Step\n", i+1);
1714  RunEStep(GibbsWarmUp, WarmUp, NSamples, LLV, DLLV);
1715  printf("\n\n");
1716 
1717  printf("%03d EM-iter] M-Step\n", i+1);
1718  double CurLL = RunMStep(LLV, DLLV, GradIter, LrnRate, MnStep, MxStep);
1719  printf("\n\n");
1720 
1721  if(DebugMode) {
1722  LLV.Add(CurLL);
1723  MtxV.Add(ProbMtx);
1724  }
1725  }
1726 
1727  RestoreGraph();
1728 }
1729 
1730 
1731 
1732 void GetMinMax(const TFltPrV& XYValV, double& Min, double& Max, const bool& ResetMinMax) {
1733  if (ResetMinMax) { Min = TFlt::Mx; Max = TFlt::Mn; }
1734  for (int i = 0; i < XYValV.Len(); i++) {
1735  Min = TMath::Mn(Min, XYValV[i].Val2.Val);
1736  Max = TMath::Mx(Max, XYValV[i].Val2.Val);
1737  }
1738 }
1739 
1740 void PlotGrad(const TFltPrV& EstLLV, const TFltPrV& TrueLLV, const TVec<TFltPrV>& GradVV, const TFltPrV& AcceptV, const TStr& OutFNm, const TStr& Desc) {
1741  double Min, Max, Min1, Max1;
1742  // plot log-likelihood
1743  { TGnuPlot GP("sLL-"+OutFNm, TStr::Fmt("Log-likelihood (avg 1k samples). %s", Desc.CStr()), true);
1744  GP.AddPlot(EstLLV, gpwLines, "Esimated LL", "linewidth 1");
1745  if (! TrueLLV.Empty()) { GP.AddPlot(TrueLLV, gpwLines, "TRUE LL", "linewidth 1"); }
1746  //GetMinMax(EstLLV, Min, Max, true); GetMinMax(TrueLLV, Min, Max, false);
1747  //GP.SetYRange((int)floor(Min-1), (int)ceil(Max+1));
1748  GP.SetXYLabel("Sample Index (time)", "Log-likelihood");
1749  GP.SavePng(); }
1750  // plot accept
1751  { TGnuPlot GP("sAcc-"+OutFNm, TStr::Fmt("Pct. accepted rnd moves (over 1k samples). %s", Desc.CStr()), true);
1752  GP.AddPlot(AcceptV, gpwLines, "Pct accepted swaps", "linewidth 1");
1753  GP.SetXYLabel("Sample Index (time)", "Pct accept permutation swaps");
1754  GP.SavePng(); }
1755  // plot grads
1756  TGnuPlot GPAll("sGradAll-"+OutFNm, TStr::Fmt("Gradient (avg 1k samples). %s", Desc.CStr()), true);
1757  GetMinMax(GradVV[0], Min1, Max1, true);
1758  for (int g = 0; g < GradVV.Len(); g++) {
1759  GPAll.AddPlot(GradVV[g], gpwLines, TStr::Fmt("param %d", g+1), "linewidth 1");
1760  GetMinMax(GradVV[g], Min1, Max1, false);
1761  TGnuPlot GP(TStr::Fmt("sGrad%02d-", g+1)+OutFNm, TStr::Fmt("Gradient (avg 1k samples). %s", Desc.CStr()), true);
1762  GP.AddPlot(GradVV[g], gpwLines, TStr::Fmt("param id %d", g+1), "linewidth 1");
1763  GetMinMax(GradVV[g], Min, Max, true);
1764  GP.SetYRange((int)floor(Min-1), (int)ceil(Max+1));
1765  GP.SetXYLabel("Sample Index (time)", "Gradient");
1766  GP.SavePng();
1767  }
1768  GPAll.SetYRange((int)floor(Min1-1), (int)ceil(Max1+1));
1769  GPAll.SetXYLabel("Sample Index (time)", "Gradient");
1770  GPAll.SavePng();
1771 }
1772 
1773 void PlotAutoCorrelation(const TFltV& ValV, const int& MaxK, const TStr& OutFNm, const TStr& Desc) {
1774  double Avg=0.0, Var=0.0;
1775  for (int i = 0; i < ValV.Len(); i++) { Avg += ValV[i]; }
1776  Avg /= (double) ValV.Len();
1777  for (int i = 0; i < ValV.Len(); i++) { Var += TMath::Sqr(ValV[i]-Avg); }
1778  TFltPrV ACorrV;
1779  for (int k = 0; k < TMath::Mn(ValV.Len(), MaxK); k++) {
1780  double corr = 0.0;
1781  for (int i = 0; i < ValV.Len() - k; i++) {
1782  corr += (ValV[i]-Avg)*(ValV[i+k]-Avg);
1783  }
1784  ACorrV.Add(TFltPr(k, corr/Var));
1785  }
1786  // plot grads
1787  TGnuPlot GP("sAutoCorr-"+OutFNm, TStr::Fmt("AutoCorrelation (%d samples). %s", ValV.Len(), Desc.CStr()), true);
1788  GP.AddPlot(ACorrV, gpwLines, "", "linewidth 1");
1789  GP.SetXYLabel("Lag, k", "Autocorrelation, r_k");
1790  GP.SavePng();
1791 }
1792 
1793 // sample permutations and plot the current gradient and log-likelihood as the function
1794 // of the number of samples
1795 TFltV TKroneckerLL::TestSamplePerm(const TStr& OutFNm, const int& WarmUp, const int& NSamples, const TKronMtx& TrueMtx, const bool& DoPlot) {
1796  printf("Sample permutations: %s (warm-up: %s)\n", TInt::GetMegaStr(NSamples).CStr(), TInt::GetMegaStr(WarmUp).CStr());
1797  int NId1=0, NId2=0, NAccept=0;
1798  TExeTm ExeTm;
1799  const int PlotLen = NSamples/1000+1;
1800  double TrueLL=-1, AvgLL=0.0;
1801  TFltV AvgGradV(GetParams());
1802  TFltPrV TrueLLV(PlotLen, 0); // true log-likelihood (under the correct permutation)
1803  TFltPrV EstLLV(PlotLen, 0); // estiamted log-likelihood (averaged over last 1k permutation)
1804  TFltPrV AcceptV; // sample acceptance ratio
1805  TFltV SampleLLV(NSamples, 0);
1806  TVec<TFltPrV> GradVV(GetParams());
1807  for (int g = 0; g < GetParams(); g++) {
1808  GradVV[g].Gen(PlotLen, 0); }
1809  if (! TrueMtx.Empty()) {
1810  TIntV PermV=NodePerm; TKronMtx CurMtx=ProbMtx; ProbMtx.Dump();
1811  InitLL(TrueMtx); SetOrderPerm(); CalcApxGraphLL(); printf("TrueLL: %f\n", LogLike());
1812  TrueLL=LogLike; InitLL(CurMtx); NodePerm=PermV;
1813  }
1814  CalcApxGraphLL();
1815  printf("LogLike at start: %f\n", LogLike());
1816  if (WarmUp > 0) {
1817  EstLLV.Add(TFltPr(0, LogLike));
1818  if (TrueLL != -1) { TrueLLV.Add(TFltPr(0, TrueLL)); }
1819  for (int s = 0; s < WarmUp; s++) { SampleNextPerm(NId1, NId2); }
1820  printf(" warm-up:%s,", ExeTm.GetTmStr()); ExeTm.Tick();
1821  }
1822  printf("LogLike afterm warm-up: %f\n", LogLike());
1823  CalcApxGraphLL(); // re-calculate LL (due to numerical errors)
1824  CalcApxGraphDLL();
1825  EstLLV.Add(TFltPr(WarmUp, LogLike));
1826  if (TrueLL != -1) { TrueLLV.Add(TFltPr(WarmUp, TrueLL)); }
1827  printf(" recalculated: %f\n", LogLike());
1828  // start sampling
1829  printf(" sampling (average per 1000 samples)\n");
1830  TVec<TFltV> SamplVV(5);
1831  for (int s = 0; s < NSamples; s++) {
1832  if (SampleNextPerm(NId1, NId2)) { // new permutation
1833  UpdateGraphDLL(NId1, NId2); NAccept++; }
1834  for (int m = 0; m < AvgGradV.Len(); m++) { AvgGradV[m] += GradV[m]; }
1835  AvgLL += GetLL();
1836  SampleLLV.Add(GetLL());
1837  /*SamplVV[0].Add(GetLL()); // gives worse autocoreelation than the avg below
1838  SamplVV[1].Add(GradV[0]);
1839  SamplVV[2].Add(GradV[1]);
1840  SamplVV[3].Add(GradV[2]);
1841  SamplVV[4].Add(GradV[3]);*/
1842  if (s > 0 && s % 1000 == 0) {
1843  printf(".");
1844  for (int g = 0; g < AvgGradV.Len(); g++) {
1845  GradVV[g].Add(TFltPr(WarmUp+s, AvgGradV[g] / 1000.0)); }
1846  EstLLV.Add(TFltPr(WarmUp+s, AvgLL / 1000.0));
1847  if (TrueLL != -1) { TrueLLV.Add(TFltPr(WarmUp+s, TrueLL)); }
1848  AcceptV.Add(TFltPr(WarmUp+s, NAccept/1000.0));
1849  // better (faster decaying) autocorrelation when one takes avg. of 1000 consecutive samples
1850  /*SamplVV[0].Add(AvgLL);
1851  SamplVV[1].Add(AvgGradV[0]);
1852  SamplVV[2].Add(AvgGradV[1]);
1853  SamplVV[3].Add(AvgGradV[2]);
1854  SamplVV[4].Add(AvgGradV[3]); //*/
1855  if (s % 100000 == 0 && DoPlot) {
1856  const TStr Desc = TStr::Fmt("P(NodeSwap)=%g. Nodes: %d, Edges: %d, Params: %d, WarmUp: %s, Samples: %s", PermSwapNodeProb(),
1857  Graph->GetNodes(), Graph->GetEdges(), GetParams(), TInt::GetMegaStr(WarmUp).CStr(), TInt::GetMegaStr(NSamples).CStr());
1858  PlotGrad(EstLLV, TrueLLV, GradVV, AcceptV, OutFNm, Desc);
1859  for (int n = 0; n < SamplVV.Len(); n++) {
1860  PlotAutoCorrelation(SamplVV[n], 1000, TStr::Fmt("%s-n%d", OutFNm.CStr(), n), Desc); }
1861  printf(" samples: %d, time: %s, samples/s: %.1f\n", s, ExeTm.GetTmStr(), double(s+1)/ExeTm.GetSecs());
1862  }
1863  AvgLL = 0; AvgGradV.PutAll(0); NAccept=0;
1864  }
1865  }
1866  if (DoPlot) {
1867  const TStr Desc = TStr::Fmt("P(NodeSwap)=%g. Nodes: %d, Edges: %d, Params: %d, WarmUp: %s, Samples: %s", PermSwapNodeProb(),
1868  Graph->GetNodes(), Graph->GetEdges(), GetParams(), TInt::GetMegaStr(WarmUp).CStr(), TInt::GetMegaStr(NSamples).CStr());
1869  PlotGrad(EstLLV, TrueLLV, GradVV, AcceptV, OutFNm, Desc);
1870  for (int n = 0; n < SamplVV.Len(); n++) {
1871  PlotAutoCorrelation(SamplVV[n], 1000, TStr::Fmt("%s-n%d", OutFNm.CStr(), n), Desc); }
1872  }
1873  return SampleLLV; // seems to work better for potential scale reduction plot
1874 }
1875 
1876 void McMcGetAvgAvg(const TFltV& AvgJV, double& AvgAvg) {
1877  AvgAvg = 0.0;
1878  for (int j = 0; j < AvgJV.Len(); j++) {
1879  AvgAvg += AvgJV[j]; }
1880  AvgAvg /= AvgJV.Len();
1881 }
1882 
1883 void McMcGetAvgJ(const TVec<TFltV>& ChainLLV, TFltV& AvgJV) {
1884  for (int j = 0; j < ChainLLV.Len(); j++) {
1885  const TFltV& ChainV = ChainLLV[j];
1886  double Avg = 0;
1887  for (int i = 0; i < ChainV.Len(); i++) {
1888  Avg += ChainV[i];
1889  }
1890  AvgJV.Add(Avg/ChainV.Len());
1891  }
1892 }
1893 
1894 // calculates the chain potential scale reduction (see Gelman Bayesian Statistics book)
1895 double TKroneckerLL::CalcChainR2(const TVec<TFltV>& ChainLLV) {
1896  const double J = ChainLLV.Len();
1897  const double K = ChainLLV[0].Len();
1898  TFltV AvgJV; McMcGetAvgJ(ChainLLV, AvgJV);
1899  double AvgAvg; McMcGetAvgAvg(AvgJV, AvgAvg);
1900  IAssert(AvgJV.Len() == ChainLLV.Len());
1901  double InChainVar=0, OutChainVar=0;
1902  // between chain var
1903  for (int j = 0; j < AvgJV.Len(); j++) {
1904  OutChainVar += TMath::Sqr(AvgJV[j] - AvgAvg); }
1905  OutChainVar = OutChainVar * (K/double(J-1));
1906  printf("*** %g chains of len %g\n", J, K);
1907  printf(" ** between chain var: %f\n", OutChainVar);
1908  //within chain variance
1909  for (int j = 0; j < AvgJV.Len(); j++) {
1910  const TFltV& ChainV = ChainLLV[j];
1911  for (int k = 0; k < ChainV.Len(); k++) {
1912  InChainVar += TMath::Sqr(ChainV[k] - AvgJV[j]); }
1913  }
1914  InChainVar = InChainVar * 1.0/double(J*(K-1));
1915  printf(" ** within chain var: %f\n", InChainVar);
1916  const double PostVar = (K-1)/K * InChainVar + 1.0/K * OutChainVar;
1917  printf(" ** posterior var: %f\n", PostVar);
1918  const double ScaleRed = sqrt(PostVar/InChainVar);
1919  printf(" ** scale reduction (< 1.2): %f\n\n", ScaleRed);
1920  return ScaleRed;
1921 }
1922 
1923 // Gelman-Rubin-Brooks plot: how does potential scale reduction chainge with chain length
1924 void TKroneckerLL::ChainGelmapRubinPlot(const TVec<TFltV>& ChainLLV, const TStr& OutFNm, const TStr& Desc) {
1925  TFltPrV LenR2V; // how does potential scale reduction chainge with chain length
1926  TVec<TFltV> SmallLLV(ChainLLV.Len());
1927  const int K = ChainLLV[0].Len();
1928  const int Buckets=1000;
1929  const int BucketSz = K/Buckets;
1930  for (int b = 1; b < Buckets; b++) {
1931  const int End = TMath::Mn(BucketSz*b, K-1);
1932  for (int c = 0; c < ChainLLV.Len(); c++) {
1933  ChainLLV[c].GetSubValV(0, End, SmallLLV[c]); }
1934  LenR2V.Add(TFltPr(End, TKroneckerLL::CalcChainR2(SmallLLV)));
1935  }
1936  LenR2V.Add(TFltPr(K, TKroneckerLL::CalcChainR2(ChainLLV)));
1937  TGnuPlot::PlotValV(LenR2V, TStr::Fmt("gelman-%s", OutFNm.CStr()), TStr::Fmt("%s. %d chains of len %d. BucketSz: %d.",
1938  Desc.CStr(), ChainLLV.Len(), ChainLLV[0].Len(), BucketSz), "Chain length", "Potential scale reduction");
1939 }
1940 
1941 // given a Kronecker graph generate from TrueParam, try to recover the parameters
1942 TFltQu TKroneckerLL::TestKronDescent(const bool& DoExact, const bool& TruePerm, double LearnRate, const int& WarmUp, const int& NSamples, const TKronMtx& TrueParam) {
1943  printf("Test gradient descent on a synthetic kronecker graphs:\n");
1944  if (DoExact) { printf(" -- Exact gradient calculations\n"); }
1945  else { printf(" -- Approximate gradient calculations\n"); }
1946  if (TruePerm) { printf(" -- No permutation sampling (use true permutation)\n"); }
1947  else { printf(" -- Sample permutations (start with degree permutation)\n"); }
1948  TExeTm IterTm;
1949  int Iter;
1950  double OldLL=0, MyLL=0, AvgAbsErr, AbsSumErr;
1951  TFltV MyGradV, SDevV;
1952  TFltV LearnRateV(GetParams()); LearnRateV.PutAll(LearnRate);
1953  if (TruePerm) {
1954  SetOrderPerm();
1955  }
1956  else {
1957  /*printf("SET EIGEN VECTOR PERMUTATIONS\n");
1958  TFltV LeftSV, RightSV;
1959  TGSvd::GetSngVec(Graph, LeftSV, RightSV);
1960  TFltIntPrV V;
1961  for (int v=0; v<LeftSV.Len();v++) { V.Add(TFltIntPr(LeftSV[v], v)); }
1962  V.Sort(false);
1963  NodePerm.Gen(Nodes, 0);
1964  for (int v=0; v < V.Len();v++) { NodePerm.Add(V[v].Val2); } //*/
1965  //printf("RANDOM PERMUTATION\n"); SetRndPerm();
1966  printf("DEGREE PERMUTATION\n"); SetDegPerm();
1967  }
1968  for (Iter = 0; Iter < 100; Iter++) {
1969  if (TruePerm) {
1970  // don't sample over permutations
1971  if (DoExact) { CalcGraphDLL(); CalcGraphLL(); } // slow, O(N^2)
1972  else { CalcApxGraphDLL(); CalcApxGraphLL(); } // fast
1973  MyLL = LogLike; MyGradV = GradV;
1974  } else {
1975  printf(".");
1976  // sample over permutations (approximate calculations)
1977  SampleGradient(WarmUp, NSamples, MyLL, MyGradV);
1978  }
1979  printf("%d] LL: %g, ", Iter, MyLL);
1980  AvgAbsErr = TKronMtx::GetAvgAbsErr(ProbMtx, TrueParam);
1981  AbsSumErr = fabs(ProbMtx.GetMtxSum() - TrueParam.GetMtxSum());
1982  printf(" avgAbsErr: %.4f, absSumErr: %.4f, newLL: %.2f, deltaLL: %.2f\n", AvgAbsErr, AbsSumErr, MyLL, OldLL-MyLL);
1983  for (int p = 0; p < GetParams(); p++) {
1984  // set learn rate so that move for each parameter is inside the [0.01, 0.1]
1985  LearnRateV[p] *= 0.9;
1986  //printf("%d: rate: %f delta:%f\n", p, LearnRateV[p], fabs(LearnRateV[p]*MyGradV[p]));
1987  while (fabs(LearnRateV[p]*MyGradV[p]) > 0.1) { LearnRateV[p] *= 0.9; }
1988  //printf(" rate: %f delta:%f\n", LearnRateV[p], fabs(LearnRateV[p]*MyGradV[p]));
1989  while (fabs(LearnRateV[p]*MyGradV[p]) < 0.001) { LearnRateV[p] *= (1.0/0.9); }
1990  //printf(" rate: %f delta:%f\n", LearnRateV[p], fabs(LearnRateV[p]*MyGradV[p]));
1991  printf(" %d] %f <-- %f + %f lrnRate:%g\n", p, ProbMtx.At(p) + LearnRateV[p]*MyGradV[p],
1992  ProbMtx.At(p), (double)(LearnRateV[p]*MyGradV[p]), LearnRateV[p]());
1993  ProbMtx.At(p) = ProbMtx.At(p) + LearnRateV[p]*MyGradV[p];
1994  // box constraints
1995  if (ProbMtx.At(p) > 0.99) { ProbMtx.At(p)=0.99; }
1996  if (ProbMtx.At(p) < 0.01) { ProbMtx.At(p)=0.01; }
1997  }
1998  ProbMtx.GetLLMtx(LLMtx); OldLL = MyLL;
1999  if (AvgAbsErr < 0.01) { printf("***CONVERGED!\n"); break; }
2000  printf("\n"); fflush(stdout);
2001  }
2002  TrueParam.Dump("True Thetas", true);
2003  ProbMtx.Dump("Final Thetas", true);
2004  printf(" AvgAbsErr: %f\n AbsSumErr: %f\n Iterations: %d\n", AvgAbsErr, AbsSumErr, Iter);
2005  printf("Iteration run time: %s, sec: %g\n\n", IterTm.GetTmStr(), IterTm.GetSecs());
2006  return TFltQu(AvgAbsErr, AbsSumErr, Iter, IterTm.GetSecs());
2007 }
2008 
2009 void PlotTrueAndEst(const TStr& OutFNm, const TStr& Desc, const TStr& YLabel, const TFltPrV& EstV, const TFltPrV& TrueV) {
2010  TGnuPlot GP(OutFNm, Desc.CStr(), true);
2011  GP.AddPlot(EstV, gpwLinesPoints, YLabel, "linewidth 1 pointtype 6 pointsize 1");
2012  if (! TrueV.Empty()) { GP.AddPlot(TrueV, gpwLines, "TRUE"); }
2013  GP.SetXYLabel("Gradient descent iterations", YLabel);
2014  GP.SavePng();
2015 }
2016 
2017 void TKroneckerLL::GradDescentConvergence(const TStr& OutFNm, const TStr& Desc1, const bool& SamplePerm, const int& NIters,
2018  double LearnRate, const int& WarmUp, const int& NSamples, const int& AvgKGraphs, const TKronMtx& TrueParam) {
2019  TExeTm IterTm;
2020  int Iter;
2021  double OldLL=0, MyLL=0, AvgAbsErr=0, AbsSumErr=0;
2022  TFltV MyGradV, SDevV;
2023  TFltV LearnRateV(GetParams()); LearnRateV.PutAll(LearnRate);
2024  TFltPrV EZeroV, DiamV, Lambda1V, Lambda2V, AvgAbsErrV, AvgLLV;
2025  TFltPrV TrueEZeroV, TrueDiamV, TrueLambda1V, TrueLambda2V, TrueLLV;
2026  TFltV SngValV; TSnap::GetSngVals(Graph, 2, SngValV); SngValV.Sort(false);
2027  const double TrueEZero = pow((double) Graph->GetEdges(), 1.0/double(KronIters));
2028  const double TrueEffDiam = TSnap::GetAnfEffDiam(Graph, false, 10);
2029  const double TrueLambda1 = SngValV[0];
2030  const double TrueLambda2 = SngValV[1];
2031  if (! TrueParam.Empty()) {
2032  const TKronMtx CurParam = ProbMtx; ProbMtx.Dump();
2033  InitLL(TrueParam); SetOrderPerm(); CalcApxGraphLL(); printf("TrueLL: %f\n", LogLike());
2034  OldLL = LogLike; InitLL(CurParam);
2035  }
2036  const double TrueLL = OldLL;
2037  if (! SamplePerm) { SetOrderPerm(); } else { SetDegPerm(); }
2038  for (Iter = 0; Iter < NIters; Iter++) {
2039  if (! SamplePerm) {
2040  // don't sample over permutations
2041  CalcApxGraphDLL(); CalcApxGraphLL(); // fast
2042  MyLL = LogLike; MyGradV = GradV;
2043  } else {
2044  // sample over permutations (approximate calculations)
2045  SampleGradient(WarmUp, NSamples, MyLL, MyGradV);
2046  }
2047  double SumDiam=0, SumSngVal1=0, SumSngVal2=0;
2048  for (int trial = 0; trial < AvgKGraphs; trial++) {
2049  // generate kronecker graph
2050  PNGraph KronGraph = TKronMtx::GenFastKronecker(ProbMtx, KronIters, true, 0); // approx
2051  //PNGraph KronGraph = TKronMtx::GenKronecker(ProbMtx, KronIters, true, 0); // true
2052  SngValV.Clr(true); TSnap::GetSngVals(KronGraph, 2, SngValV); SngValV.Sort(false);
2053  SumDiam += TSnap::GetAnfEffDiam(KronGraph, false, 10);
2054  SumSngVal1 += SngValV[0]; SumSngVal2 += SngValV[1];
2055  }
2056  // how good is the current fit
2057  AvgLLV.Add(TFltPr(Iter, MyLL));
2058  EZeroV.Add(TFltPr(Iter, ProbMtx.GetMtxSum()));
2059  DiamV.Add(TFltPr(Iter, SumDiam/double(AvgKGraphs)));
2060  Lambda1V.Add(TFltPr(Iter, SumSngVal1/double(AvgKGraphs)));
2061  Lambda2V.Add(TFltPr(Iter, SumSngVal2/double(AvgKGraphs)));
2062  TrueLLV.Add(TFltPr(Iter, TrueLL));
2063  TrueEZeroV.Add(TFltPr(Iter, TrueEZero));
2064  TrueDiamV.Add(TFltPr(Iter, TrueEffDiam));
2065  TrueLambda1V.Add(TFltPr(Iter, TrueLambda1));
2066  TrueLambda2V.Add(TFltPr(Iter, TrueLambda2));
2067  if (Iter % 10 == 0) {
2068  const TStr Desc = TStr::Fmt("%s. Iter: %d, G(%d, %d) K(%d, %d)", Desc1.Empty()?OutFNm.CStr():Desc1.CStr(),
2070  PlotTrueAndEst("LL."+OutFNm, Desc, "Average LL", AvgLLV, TrueLLV);
2071  PlotTrueAndEst("E0."+OutFNm, Desc, "E0 (expected number of edges)", EZeroV, TrueEZeroV);
2072  PlotTrueAndEst("Diam."+OutFNm+"-Diam", Desc, "Effective diameter", DiamV, TrueDiamV);
2073  PlotTrueAndEst("Lambda1."+OutFNm, Desc, "Lambda 1", Lambda1V, TrueLambda1V);
2074  PlotTrueAndEst("Lambda2."+OutFNm, Desc, "Lambda 2", Lambda2V, TrueLambda2V);
2075  if (! TrueParam.Empty()) {
2076  PlotTrueAndEst("AbsErr."+OutFNm, Desc, "Average Absolute Error", AvgAbsErrV, TFltPrV()); }
2077  }
2078  if (! TrueParam.Empty()) {
2079  AvgAbsErr = TKronMtx::GetAvgAbsErr(ProbMtx, TrueParam);
2080  AvgAbsErrV.Add(TFltPr(Iter, AvgAbsErr));
2081  } else { AvgAbsErr = 1.0; }
2082  // update parameters
2083  AbsSumErr = fabs(ProbMtx.GetMtxSum() - TrueEZero);
2084  // update parameters
2085  for (int p = 0; p < GetParams(); p++) {
2086  LearnRateV[p] *= 0.99;
2087  while (fabs(LearnRateV[p]*MyGradV[p]) > 0.1) { LearnRateV[p] *= 0.99; printf(".");}
2088  while (fabs(LearnRateV[p]*MyGradV[p]) < 0.002) { LearnRateV[p] *= (1.0/0.95); printf("*");}
2089  printf(" %d] %f <-- %f + %9f Grad: %9.1f, Rate:%g\n", p, ProbMtx.At(p) + LearnRateV[p]*MyGradV[p],
2090  ProbMtx.At(p), (double)(LearnRateV[p]*MyGradV[p]), MyGradV[p](), LearnRateV[p]());
2091  ProbMtx.At(p) = ProbMtx.At(p) + LearnRateV[p]*MyGradV[p];
2092  // box constraints
2093  if (ProbMtx.At(p) > 1.0) { ProbMtx.At(p)=1.0; }
2094  if (ProbMtx.At(p) < 0.001) { ProbMtx.At(p)=0.001; }
2095  }
2096  printf("%d] LL: %g, ", Iter, MyLL);
2097  printf(" avgAbsErr: %.4f, absSumErr: %.4f, newLL: %.2f, deltaLL: %.2f\n", AvgAbsErr, AbsSumErr, MyLL, OldLL-MyLL);
2098  if (AvgAbsErr < 0.001) { printf("***CONVERGED!\n"); break; }
2099  printf("\n"); fflush(stdout);
2100  ProbMtx.GetLLMtx(LLMtx); OldLL = MyLL;
2101  }
2102  TrueParam.Dump("True Thetas", true);
2103  ProbMtx.Dump("Final Thetas", true);
2104  printf(" AvgAbsErr: %f\n AbsSumErr: %f\n Iterations: %d\n", AvgAbsErr, AbsSumErr, Iter);
2105  printf("Iteration run time: %s, sec: %g\n\n", IterTm.GetTmStr(), IterTm.GetSecs());
2106 }
2107 
2108 // given true N0, fit the parameters, get likelihood and calculate BIC (MDL), plot n0 vs. BIC
2109 void TKroneckerLL::TestBicCriterion(const TStr& OutFNm, const TStr& Desc1, const PNGraph& G, const int& GradIters,
2110  double LearnRate, const int& WarmUp, const int& NSamples, const int& TrueN0) {
2111  TFltPrV BicV, MdlV, LLV;
2112  const double rndGP = G->GetEdges()/TMath::Sqr(double(G->GetNodes()));
2113  const double RndGLL = G->GetEdges()*log(rndGP )+ (TMath::Sqr(double(G->GetNodes()))-G->GetEdges())*log(1-rndGP);
2114  LLV.Add(TFltPr(1, RndGLL));
2115  BicV.Add(TFltPr(1, -RndGLL + 0.5*TMath::Sqr(1)*log(TMath::Sqr(G->GetNodes()))));
2116  MdlV.Add(TFltPr(1, -RndGLL + 32*TMath::Sqr(1)+2*(log((double)1)+log((double)G->GetNodes()))));
2117  for (int NZero = 2; NZero < 10; NZero++) {
2118  const TKronMtx InitKronMtx = TKronMtx::GetInitMtx(NZero, G->GetNodes(), G->GetEdges());
2119  InitKronMtx.Dump("INIT PARAM", true);
2120  TKroneckerLL KronLL(G, InitKronMtx);
2121  KronLL.SetPerm('d'); // degree perm
2122  const double LastLL = KronLL.GradDescent(GradIters, LearnRate, 0.001, 0.01, WarmUp, NSamples);
2123  LLV.Add(TFltPr(NZero, LastLL));
2124  BicV.Add(TFltPr(NZero, -LastLL + 0.5*TMath::Sqr(NZero)*log(TMath::Sqr(G->GetNodes()))));
2125  MdlV.Add(TFltPr(NZero, -LastLL + 32*TMath::Sqr(NZero)+2*(log((double)NZero)+log((double)KronLL.GetKronIters()))));
2126  { TGnuPlot GP("LL-"+OutFNm, Desc1);
2127  GP.AddPlot(LLV, gpwLinesPoints, "Log-likelihood", "linewidth 1 pointtype 6 pointsize 2");
2128  GP.SetXYLabel("NZero", "Log-Likelihood"); GP.SavePng(); }
2129  { TGnuPlot GP("BIC-"+OutFNm, Desc1);
2130  GP.AddPlot(BicV, gpwLinesPoints, "BIC", "linewidth 1 pointtype 6 pointsize 2");
2131  GP.SetXYLabel("NZero", "BIC"); GP.SavePng(); }
2132  { TGnuPlot GP("MDL-"+OutFNm, Desc1);
2133  GP.AddPlot(MdlV, gpwLinesPoints, "MDL", "linewidth 1 pointtype 6 pointsize 2");
2134  GP.SetXYLabel("NZero", "MDL"); GP.SavePng(); }
2135  }
2136 }
2137 
2138 void TKroneckerLL::TestGradDescent(const int& KronIters, const int& KiloSamples, const TStr& Permutation) {
2139  const TStr OutFNm = TStr::Fmt("grad-%s%d-%dk", Permutation.CStr(), KronIters, KiloSamples);
2140  TKronMtx KronParam = TKronMtx::GetMtx("0.8 0.6; 0.6 0.4");
2141  PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, KronIters, true, 0);
2142  TKroneckerLL KronLL(Graph, KronParam);
2143  TVec<TFltV> GradVV(4), SDevVV(4); TFltV XValV;
2144  int NId1 = 0, NId2 = 0, NAccept = 0;
2145  TVec<TMom> GradMomV(4);
2146  TExeTm ExeTm;
2147  if (Permutation == "r") KronLL.SetRndPerm();
2148  else if (Permutation == "d") KronLL.SetDegPerm();
2149  else if (Permutation == "o") KronLL.SetOrderPerm();
2150  else FailR("Unknown permutation (r,d,o)");
2151  KronLL.CalcApxGraphLL();
2152  KronLL.CalcApxGraphDLL();
2153  for (int s = 0; s < 1000*KiloSamples; s++) {
2154  if (KronLL.SampleNextPerm(NId1, NId2)) { // new permutation
2155  KronLL.UpdateGraphDLL(NId1, NId2); NAccept++; }
2156  if (s > 50000) { //warm up period
2157  for (int m = 0; m < 4; m++) { GradVV[m].Add(KronLL.GradV[m]); }
2158  if ((s+1) % 1000 == 0) {
2159  printf(".");
2160  for (int m = 0; m < 4; m++) { GradVV[m].Add(KronLL.GradV[m]); }
2161  XValV.Add((s+1));
2162  if ((s+1) % 100000 == 0) {
2163  TGnuPlot GP(OutFNm, TStr::Fmt("Gradient vs. samples. %d nodes, %d edges", Graph->GetNodes(), Graph->GetEdges()), true);
2164  for (int g = 0; g < GradVV.Len(); g++) {
2165  GP.AddPlot(XValV, GradVV[g], gpwLines, TStr::Fmt("grad %d", g)); }
2166  GP.SetXYLabel("sample index","log Gradient");
2167  GP.SavePng();
2168  }
2169  }
2170  }
2171  }
2172  printf("\n");
2173  for (int m = 0; m < 4; m++) {
2174  GradMomV[m].Def();
2175  printf("grad %d: mean: %12f sDev: %12f median: %12f\n", m,
2176  GradMomV[m].GetMean(), GradMomV[m].GetSDev(), GradMomV[m].GetMedian());
2177  }
2178 }
2179 /*
2180 // sample over permutations
2181 void TKroneckerLL::GradDescent(const double& LearnRate, const int& WarmUp, const int& NSamples, const int& NIter) {
2182  TFltV GradV, SDevV;
2183  double AvgLL;
2184  for (int Iter = 0; Iter < 100; Iter++) {
2185  //SampleGradient(WarmUp, NSamples, AvgLL, GradV, SDevV, true);
2186  SampleGradient(WarmUp, NSamples, AvgLL, GradV);
2187  for (int theta = 0; theta < GetParams(); theta++) {
2188  printf("%d] %f <-- %f + %f\n", theta, ProbMtx.At(theta) + LearnRate*GradV[theta], ProbMtx.At(theta), LearnRate*GradV[theta]);
2189  ProbMtx.At(theta) = ProbMtx.At(theta) + LearnRate*GradV[theta];
2190  // box constraints
2191  if (ProbMtx.At(theta) > 0.99) ProbMtx.At(theta)=0.99;
2192  if (ProbMtx.At(theta) < 0.01) ProbMtx.At(theta)=0.01;
2193  }
2194  ProbMtx.GetLLMtx(LLMtx);
2195  }
2196  ProbMtx.Dump("Final Thetas");
2197 }
2198 */
2199 
2200 
2202 // Add Noise to Graph
2204 int TKronNoise::RemoveNodeNoise(PNGraph& Graph, const int& NNodes, const bool Random) {
2205  IAssert(NNodes > 0 && NNodes < (Graph->GetNodes() / 2));
2206 
2207  int i = 0;
2208  TIntV ShufflePerm;
2209  Graph->GetNIdV(ShufflePerm);
2210  if(Random) {
2211  ShufflePerm.Shuffle(TKronMtx::Rnd);
2212  for(i = 0; i < NNodes; i++) {
2213  Graph->DelNode(int(ShufflePerm[i]));
2214  }
2215  } else {
2216  for(i = 0; i < NNodes; i++) {
2217  Graph->DelNode(int(ShufflePerm[ShufflePerm.Len() - 1 - i]));
2218  }
2219  }
2220 
2221  return Graph->GetNodes();
2222 }
2223 
2224 int TKronNoise::RemoveNodeNoise(PNGraph& Graph, const double& Rate, const bool Random) {
2225  IAssert(Rate > 0 && Rate < 0.5);
2226  return TKronNoise::RemoveNodeNoise(Graph, (int) floor(Rate * double(Graph->GetNodes())), Random);
2227 }
2228 
2229 int TKronNoise::FlipEdgeNoise(PNGraph& Graph, const int& NEdges, const bool Random) {
2230  IAssert(NEdges > 0 && NEdges < Graph->GetEdges());
2231 
2232  const int Nodes = Graph->GetNodes();
2233  const int Edges = Graph->GetEdges();
2234  int Src, Dst;
2235 
2236  TIntV NIdV, TempV;
2237  TIntPrV ToAdd, ToDel;
2238  Graph->GetNIdV(NIdV);
2239 
2240  ToAdd.Gen(NEdges / 2, 0);
2241  for(int i = 0; i < NEdges / 2; i++) {
2242  Src = NIdV[TKronMtx::Rnd.GetUniDevInt(Nodes)];
2243  Dst = NIdV[TKronMtx::Rnd.GetUniDevInt(Nodes)];
2244  if(Graph->IsEdge(Src, Dst)) { i--; continue; }
2245 
2246  ToAdd.Add(TIntPr(Src, Dst));
2247  }
2248 
2249  ToDel.Gen(Edges, 0);
2250  for(TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
2251  ToDel.Add(TIntPr(EI.GetSrcNId(), EI.GetDstNId()));
2252  }
2253  ToDel.Shuffle(TKronMtx::Rnd);
2254 
2255  for(int i = 0; i < NEdges / 2; i++) {
2256  Graph->DelEdge(ToDel[i].Val1, ToDel[i].Val2);
2257  Graph->AddEdge(ToAdd[i].Val1, ToAdd[i].Val2);
2258  }
2259 
2260  return Graph->GetEdges();
2261 }
2262 
2263 int TKronNoise::FlipEdgeNoise(PNGraph& Graph, const double& Rate, const bool Random) {
2264  IAssert(Rate > 0 && Rate < 0.5);
2265  return TKronNoise::FlipEdgeNoise(Graph, (int) floor(Rate * double(Graph->GetEdges())), Random);
2266 }
2267 
2268 int TKronNoise::RemoveEdgeNoise(PNGraph& Graph, const int& NEdges) {
2269  IAssert(NEdges > 0 && NEdges < Graph->GetEdges());
2270 
2271  TIntPrV ToDel;
2272 
2273  ToDel.Gen(Graph->GetEdges(), 0);
2274  for(TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
2275  if(EI.GetSrcNId() != EI.GetDstNId()) {
2276  ToDel.Add(TIntPr(EI.GetSrcNId(), EI.GetDstNId()));
2277  }
2278  }
2279  ToDel.Shuffle(TKronMtx::Rnd);
2280 
2281  for(int i = 0; i < NEdges; i++) {
2282  Graph->DelEdge(ToDel[i].Val1, ToDel[i].Val2);
2283  }
2284 
2285  return Graph->GetEdges();
2286 }
2287 
2288 int TKronNoise::RemoveEdgeNoise(PNGraph& Graph, const double& Rate) {
2289  IAssert(Rate > 0 && Rate < 0.5);
2290  return TKronNoise::RemoveEdgeNoise(Graph, (int) floor(Rate * double(Graph->GetEdges())));
2291 }
2292 
2293 
2294 
2296 // Kronecker Log Likelihood Maximization
2297 void TKronMaxLL::SetPerm(const char& PermId) {
2298  if (PermId == 'o') KronLL.SetOrderPerm();
2299  else if (PermId == 'd') KronLL.SetDegPerm();
2300  else if (PermId == 'r') KronLL.SetRndPerm();
2301  else FailR("Unknown permutation type (o,d,r)");
2302 }
2303 
2304 /* void TKronMaxLL::EvalNewEdgeP(const TKronMtx& ProbMtx) {
2305  ProbMtx.Dump("\n***EVAL:");
2306  for (int i = 0; i < ProbMtx.Len(); i++) {
2307  // parameters are out of bounds
2308  if (ProbMtx.At(i) < 0.05 || ProbMtx.At(i) > 0.95) {
2309  TFltV MxDerivV(ProbMtx.Len()); MxDerivV.PutAll(1e5);
2310  FEvalH.AddDat(ProbMtx, TFEval(-1e10, MxDerivV));
2311  return;
2312  }
2313  }
2314  double AvgLL;
2315  TFltV GradV, SDevV;
2316  KronLL.InitLL(ProbMtx); // set current point
2317  //KronLL.SampleGradient(WarmUp, NSamples, AvgLL, GradV, SDevV, true); //sample the gradient
2318  KronLL.SampleGradient(WarmUp, NSamples, AvgLL, GradV);
2319  FEvalH.AddDat(ProbMtx, TFEval(AvgLL, GradV));
2320 }
2321 
2322 double TKronMaxLL::GetLL(const TFltV& ThetaV) {
2323  TKronMtx ProbMtx; RoundTheta(ThetaV, ProbMtx);
2324  if (! FEvalH.IsKey(ProbMtx)) {
2325  EvalNewEdgeP(ProbMtx);
2326  }
2327  return FEvalH.GetDat(ProbMtx).LogLike;
2328 }
2329 
2330 void TKronMaxLL::GetDLL(const TFltV& ThetaV, TFltV& GradV) {
2331  TKronMtx ProbMtx; RoundTheta(ThetaV, ProbMtx);
2332  if (! FEvalH.IsKey(ProbMtx)) {
2333  EvalNewEdgeP(ProbMtx);
2334  }
2335  GradV = FEvalH.GetDat(ProbMtx).GradV;
2336 }
2337 
2338 double TKronMaxLL::GetDLL(const TFltV& ThetaV, const int& ParamId) {
2339  TKronMtx ProbMtx; RoundTheta(ThetaV, ProbMtx);
2340  if (! FEvalH.IsKey(ProbMtx)) {
2341  EvalNewEdgeP(ProbMtx);
2342  }
2343  return FEvalH.GetDat(ProbMtx).GradV[ParamId];
2344 }
2345 void TKronMaxLL::MaximizeLL(const int& NWarmUp, const int& Samples) {
2346  WarmUp = NWarmUp;
2347  NSamples = Samples;
2348  TConjGrad<TFunc> ConjGrad(KronLL.GetProbMtx().GetMtx(), TFunc(this));
2349  //TConjGrad<TLogBarFunc> ConjGrad(KronLL.GetEdgeP().GetV(), TLogBarFunc(this, 0.1));
2350  ConjGrad.ConjGradMin(0.1);
2351 }*/
2352 
2353 // round to 3 decimal places
2354 void TKronMaxLL::RoundTheta(const TFltV& ThetaV, TFltV& NewThetaV) {
2355  NewThetaV.Gen(ThetaV.Len());
2356  for (int i = 0; i < ThetaV.Len(); i++) {
2357  NewThetaV[i] = TMath::Round(ThetaV[i], 3); }
2358 }
2359 
2360 // round to 3 decimal places
2361 void TKronMaxLL::RoundTheta(const TFltV& ThetaV, TKronMtx& Kronecker) {
2362  Kronecker.GenMtx((int)sqrt((double)ThetaV.Len()));
2363  for (int i = 0; i < ThetaV.Len(); i++) {
2364  Kronecker.At(i) = TMath::Round(ThetaV[i], 3); }
2365 }
2366 
2369  TKronMtx KronParam = TKronMtx::GetMtx("0.8 0.7; 0.6 0.5");
2370  PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, 8, true, 1);
2371 
2372  TKronMaxLL KronMaxLL(Graph, TFltV::GetV(0.9, 0.7, 0.5, 0.3));
2373  KronMaxLL.SetPerm('d');
2374  //KronMaxLL.MaximizeLL(10000, 50000);
2375  /*TKroneckerLL KronLL(Graph, *TKronMtx::GetMtx("0.9 0.7; 0.5 0.3"));
2376  KronLL.SetDegPerm();
2377  KronLL.GradDescent(0.005/double(Graph->GetNodes()));*/
2378 }
2379 
2381 // Kronecker Phase Plot
2382 /*
2383 void TKronPhasePlot::SaveMatlab(const TStr& OutFNm) const {
2384  FILE *F = fopen(OutFNm.CStr(), "wt");
2385  fprintf(F, "#Take last graph stats\n");
2386  fprintf(F, "#i\tAlpha\tBeta\tNodes\tNonZNodes\tEdges\tWccNodes\tWccEdges\tDiam\tEffDiam\tWccDiam\tWccEffDiam\t1StEigVal\tMxEigVal\n");
2387  for (int i = 0 ; i < PhaseV.Len(); i++) {
2388  const TPhasePoint& Point = PhaseV[i];
2389  const TGrowthStat& GrowthStat = Point.GrowthStat;
2390  const PGraphStat& FirstGrowth = GrowthStat[0];
2391  const PGraphStat& LastGrowth = GrowthStat.Last();
2392  fprintf(F, "%d\t%g\t%g\t", i, Point.Alpha, Point.Beta);
2393  fprintf(F, "%d\t%d\t%d\t", LastGrowth->Nodes, LastGrowth->Edges, LastGrowth->NonZNodes);
2394  fprintf(F, "%d\t%d\t", LastGrowth->WccNodes, LastGrowth->WccEdges);
2395  fprintf(F, "%f\t%f\t%f\t%f\t", LastGrowth->FullDiam, LastGrowth->EffDiam, LastGrowth->FullWccDiam, LastGrowth->EffWccDiam);
2396  //fprintf(F, "%f\t%f", FirstGrowth.MxEigVal, LastGrowth.MxEigVal);
2397  fprintf(F, "\n");
2398  }
2399  fclose(F);
2400 }
2401 
2402 void TKronPhasePlot::KroneckerPhase(const TStr& MtxId, const int& MxIter,
2403  const double& MnAlpha, const double& MxAlpha, const double& AlphaStep,
2404  const double& MnBeta, const double& MxBeta, const double& BetaStep,
2405  const TStr& FNmPref) {
2406  TKronPhasePlot PhasePlot;
2407  TExeTm KronTm;
2408  int AlphaCnt=0, BetaCnt=0;
2409  for (double Alpha = MnAlpha; (Alpha-1e-6) <= MxAlpha; Alpha += AlphaStep) {
2410  AlphaCnt++; BetaCnt = 0;
2411  printf("\n\n****A:%g***********************************************************************", Alpha);
2412  for (double Beta = MnBeta; (Beta-1e-6) <= MxBeta; Beta += BetaStep) {
2413  printf("\n\n==A[%d]:%g====B[%d]:%g=====================================================\n", AlphaCnt, Alpha, BetaCnt, Beta);
2414  BetaCnt++;
2415  TGrowthStat GrowthStat;
2416  PNGraph Graph;
2417  // run Kronecker
2418  TFullRectMtx SeedMtx = TFullRectMtx::GetMtxFromNm(MtxId);
2419  SeedMtx.Epsilonize(Alpha, Beta);
2420  for (int iter = 1; iter < MxIter + 1; iter++) {
2421  printf("%2d] at %s\n", iter, TExeTm::GetCurTm().CStr());
2422  Graph = PNGraph(); KronTm.Tick();
2423  Graph = SeedMtx.GenRMatKronecker(iter, false, 0);
2424  GrowthStat.Add(Graph, TNodeTm(iter));
2425  if (KronTm.GetSecs() > 30 * 60) {
2426  printf("*** TIME LIMIT [%s]\n", KronTm.GetTmStr().CStr()); break; }
2427  }
2428  const TStr Desc = TStr::Fmt("%s. Alpha:%g. Beta:%g", MtxId.CStr(), Alpha, Beta);
2429  const TStr FNmPref1 = TStr::Fmt("%s.a%02d.b%02d", FNmPref.CStr(), AlphaCnt, BetaCnt);
2430  TGPlot::PlotDegDist(Graph, FNmPref1, Desc, false, true, true);
2431  TGPlot::PlotWccDist(Graph, FNmPref1, Desc);
2432  TGPlot::PlotSccDist(Graph, FNmPref1, Desc);
2433  GrowthStat.PlotAll(FNmPref1, Desc);
2434  GrowthStat.SaveTxt(FNmPref1, Desc);
2435  PhasePlot.PhaseV.Add(TPhasePoint(Alpha, Beta, GrowthStat));
2436  }
2437  {TFOut FOut(TStr::Fmt("phase.%s.bin", FNmPref.CStr()));
2438  PhasePlot.Save(FOut); }
2439  }
2440 }
2441 */
2442 /*void TKroneckerLL::SetRndThetas() {
2443  ProbMtx.Dump("TRUE parameters");
2444  TFltV RndV;
2445  double SumRnd = 0.0;
2446  for (int i = 0; i < ProbMtx.Len(); i++) {
2447  RndV.Add(0.1+TKronMtx::Rnd.GetUniDev());
2448  SumRnd += RndV.Last();
2449  }
2450  RndV.Sort(false);
2451  for (int i = 0; i < ProbMtx.Len(); i++) { ProbMtx.At(i) = RndV[i]; }
2452  ProbMtx.Dump("Random parameters");
2453  const double EdgePSum = pow(Graph->GetEdges(), 1.0/KronIters);
2454  bool Repeat = true;
2455  while (Repeat) {
2456  const double Scale = EdgePSum / SumRnd;
2457  Repeat=false; SumRnd = 0.0;
2458  for (int i = 0; i < ProbMtx.Len(); i++) {
2459  ProbMtx.At(i) = ProbMtx.At(i)*Scale;
2460  if (ProbMtx.At(i) > 0.95) { ProbMtx.At(i)=0.95; Repeat=true; }
2461  SumRnd += ProbMtx.At(i);
2462  }
2463  }
2464  ProbMtx.Dump("INIT parameters");
2465  ProbMtx.GetLLMtx(LLMtx);
2466 }*/
2467 
2468 /*
2469 void TKroneckerLL::TestLL() {
2470  TExeTm ExeTm;
2471  // approximation to empty graph log-likelihood
2472  */
2473  /*{ PNGraph Graph = TNGraph::New();
2474  for (uint i = 0; i < TMath::Pow2(4); i++) { Graph->AddNode(i); } //8k nodes
2475  PKronecker KronParam = TKronMtx::GetMtx("0.8 0.6; 0.7 0.3");
2476  TKroneckerLL KronLL(Graph, KronParam);
2477  printf("\nNodes: %d\n", KronLL.GetNodes());
2478  printf("Full Graph LL: %f\n", KronLL.GetFullGraphLL());
2479  printf("Empty Graph Exact LL: %f\n", KronLL.GetEmptyGraphLL());
2480  printf("Empty Approx x=log(1-x) LL: %f\n", KronLL.GetApxEmptyGraphLL());
2481  printf("Empty Sample LL (100/node): %f\n", KronLL.GetSampleEmptyGraphLL(Graph->GetNodes() * 100));
2482  KronLL.SetOrderPerm();
2483  printf("\nEdge prob: %f, LL: %f\n", KronParam->GetEdgeProb(0,0,8), log(KronParam->GetEdgeProb(0,0,8)));
2484  printf("No Edge prob: %f, LL: %f\n", KronParam->GetNoEdgeProb(0,0,8), log(KronParam->GetNoEdgeProb(0,0,8)));
2485  printf("Empty Graph LL: %f\n", KronLL.CalcGraphLL());
2486  printf("Apx Empty Graph LL: %f\n", KronLL.CalcApxGraphLL());
2487  Graph->AddEdge(0, 0);
2488  printf("add 1 edge. LL: %f\n", KronLL.CalcGraphLL());
2489  printf("Apx add 1 edge. LL: %f\n", KronLL.CalcApxGraphLL()); }
2490  */
2491 
2492  // log-likelihood versus different Kronecker parameters
2493  /*{ PKronecker KronParam = TKronMtx::GetMtx("0.9 0.6; 0.6 0.2");
2494  PNGraph Graph = TKronMtx::GenKronecker(KronParam, 10, true, 10);
2495  TVec<PKronecker> ParamV;
2496  ParamV.Add(KronParam);
2497  ParamV.Add(TKronMtx::GetMtx("0.6 0.6; 0.6 0.5")); // sum = 2.3
2498  //ParamV.Add(TKronMtx::GetMtx("0.9 0.9; 0.4 0.1")); // sum = 2.3
2499  //ParamV.Add(TKronMtx::GetMtx("0.8 0.7; 0.6 0.2")); // sum = 2.3
2500  ParamV.Add(TKronMtx::GetMtx("0.9 0.9; 0.6 0.2")); // sum = 2.6
2501  for (int i = 0; i < ParamV.Len(); i++) {
2502  ParamV[i]->Dump();
2503  TKroneckerLL KronLL(Graph, ParamV[i]);
2504  for (int k = 0; k < 3; k++) {
2505  if (k==0) { KronLL.SetOrderPerm(); printf("Order permutation:\n"); }
2506  if (k==1) { KronLL.SetDegPerm(); printf("Degree permutation:\n"); }
2507  if (k==2) { KronLL.SetRndPerm(); printf("Random permutation:\n"); }
2508  const double LL = KronLL.CalcGraphLL(), aLL = KronLL.CalcApxGraphLL();
2509  printf(" Exact Graph LL: %f\n", LL);
2510  printf(" Approx Graph LL: %f\n", aLL);
2511  printf(" error : %.12f\n", -fabs(LL-aLL)/LL);
2512  }
2513  } }
2514  */
2515  // exact vs. approximate log-likelihood
2516  /*{ PKronecker KronParam = TKronMtx::GetMtx("0.9 0.6; 0.6 0.2");
2517  PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, 16, true, 0);
2518  TKroneckerLL KronLL(Graph, KronParam);
2519  TMom ExactLL, ApxLL;
2520  printf("Random permutation:\n");
2521  for (int i = 0; i < 100; i++) {
2522  KronLL.SetRndPerm();
2523  //ExactLL.Add(KronLL.CalcGraphLL());
2524  ApxLL.Add(KronLL.CalcApxGraphLL());
2525  //printf(" Exact Graph LL: %f\n", ExactLL.GetVal(ExactLL.GetVals()-1));
2526  printf(" Approx Graph LL: %f\n", ApxLL.GetVal(ApxLL.GetVals()-1));
2527  }
2528  ExactLL.Def(); ApxLL.Def();
2529  //printf("EXACT: %f (%f)\n", ExactLL.GetMean(), ExactLL.GetSDev());
2530  printf("APPROX: %f (%f)\n", ApxLL.GetMean(), ApxLL.GetSDev());
2531  KronLL.SetOrderPerm();
2532  printf("Order permutation:\n");
2533  printf(" Exact Graph LL: %f\n", KronLL.CalcGraphLL());
2534  printf(" Approx Graph LL: %f\n", KronLL.CalcApxGraphLL());
2535  }
2536  */
2537 
2538  // start from random permultation and sort it using bubble sort
2539  // compare the end result with ordered permutation
2540  //PKronecker KronParam = TKronMtx::GetMtx("0.9 0.6; 0.6 0.2");
2541  //PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, 10, true, 1);
2542  /*PKronecker KronParam = TKronMtx::GetMtx("0.9 0.7; 0.9 0.5");
2543  PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, 6, true, 2);
2544  TGAlg::SaveFullMtx(Graph, "kron32.tab");
2545 
2546  TKroneckerLL KronLL(Graph, KronParam);
2547  KronLL.SetOrderPerm();
2548  KronLL.LogLike = KronLL.CalcApxGraphLL();
2549  printf(" Approx Graph LL: %f\n", KronLL.CalcApxGraphLL());
2550  printf(" swap 1-20: %f\n", KronLL.SwapNodesLL(1, 20));
2551  printf(" swap 20-30: %f\n", KronLL.SwapNodesLL(20, 30));
2552  printf(" swap 30-1: %f\n", KronLL.SwapNodesLL(1, 30));
2553  printf(" swap 20-30: %f\n", KronLL.SwapNodesLL(30, 20));
2554  IAssert(KronLL.GetPerm().IsSorted());
2555  KronLL.SetRndPerm();
2556  KronLL.LogLike = KronLL.CalcApxGraphLL();
2557  for (int i = 0; i < 1000000; i++) {
2558  const int nid1 = TInt::Rnd.GetUniDevInt(KronLL.Nodes);
2559  const int nid2 = TInt::Rnd.GetUniDevInt(KronLL.Nodes);
2560  printf("%3d] swap LL: %f\n", i, KronLL.SwapNodesLL(nid1, nid2));
2561  }
2562  printf("*** approx LL: %f\n", KronLL.CalcApxGraphLL());
2563  printf("*** exact LL: %f\n", KronLL.CalcGraphLL());
2564  */
2565  /*ExeTm.Tick();
2566  // bubble sort
2567  for (int i = 0; i < Graph->GetNodes()-1; i++) {
2568  for (int j = 1; j < Graph->GetNodes(); j++) {
2569  if (KronLL.GetPerm()[j-1] > KronLL.GetPerm()[j]) {
2570  const double oldLL = KronLL.GetLL();
2571  const double newLL = KronLL.SwapNodesLL(j-1, j);
2572  //const double trueLL = KronLL.CalcApxGraphLL();
2573  //printf("swap %3d - %3d: old: %f new: %f true:%f\n",
2574  // KronLL.GetPerm()[j-1], KronLL.GetPerm()[j], oldLL, newLL, trueLL);
2575  }
2576  }
2577  }
2578  //for (int i = 0; i < 100000; i++) {
2579  // KronLL.SwapNodesLL(TInt::Rnd.GetUniDevInt(TMath::Pow2(16)), TInt::Rnd.GetUniDevInt(TMath::Pow2(16))); }
2580  printf("\nPermutation is %s\n", KronLL.GetPerm().IsSorted()? "SORTED" : "NOT SORTED");
2581  printf(" Swap Graph LL: %f\n", KronLL.GetLL());
2582  printf(" Approx Graph LL: %f\n", KronLL.CalcApxGraphLL());
2583  KronLL.SetOrderPerm();
2584  printf(" Order Graph LL: %f\n\n", KronLL.CalcApxGraphLL());
2585  printf("Permutation is %s\n", KronLL.GetPerm().IsSorted()? "SORTED" : "NOT SORTED");
2586  printf("time: %f\n", ExeTm.GetSecs());
2587  */
2588  // evaluate the derivatives
2589  /*{ PNGraph Graph = TNGraph::New();
2590  TKronMtx KronParam = TKronMtx::GetMtx("0.8 0.4; 0.4 0.2");
2591  //for (uint i = 0; i < TMath::Pow2(4); i++) { Graph->AddNode(i); } //8k nodes
2592  Graph = TKronMtx::GenFastKronecker(KronParam, 8, true, 2); //TGAlg::SaveFullMtx(Graph, "kron16.txt");
2593  TKroneckerLL KronLL(Graph, KronParam);
2594  KronLL.SetOrderPerm();
2595  printf("\nNodes: %d\n", KronLL.GetNodes());
2596  printf("Full Graph Exact LL: %f\n", KronLL.GetFullGraphLL());
2597  printf("Empty Graph Exact LL: %f\n", KronLL.GetEmptyGraphLL());
2598  printf("Empty Approx LL: %f\n", KronLL.GetApxEmptyGraphLL());
2599  printf("Exact Graph LL: %f\n", KronLL.CalcGraphLL());
2600  printf("Apx Graph LL: %f\n\n", KronLL.CalcApxGraphLL());
2601  // derivatives
2602  printf("Empty graph Exact DLL: %f\n", KronLL.GetEmptyGraphDLL(0));
2603  printf("Empty graph Apx DLL: %f\n", KronLL.GetApxEmptyGraphDLL(0));
2604  printf("Theta0 edge(0,1) DLL: %f\n", KronLL.LLMtx.GetEdgeDLL(0, 0, 1, 4));
2605  printf("Theta0 NO edge(0,1) DLL: %f\n", KronLL.LLMtx.GetNoEdgeDLL(0, 0, 1, 4));
2606  printf("Theta0 NO edge(0,1) DLL: %f\n", KronLL.LLMtx.GetApxNoEdgeDLL(0, 0, 1, 4));
2607  KronLL.CalcGraphDLL(); printf("Exact Theta0 DLL:"); KronLL.GetDLL(0);
2608  KronLL.CalcApxGraphDLL(); printf("Apx Theta0 DLL:"); KronLL.GetDLL(0);
2609  KronLL.CalcFullApxGraphDLL(); printf("Apx Theta0 DLL:"); KronLL.GetDLL(0);
2610  // swap
2611  */
2612  /*for (int i = 0; i < 100; i++) {
2613  const int A = TInt::Rnd.GetUniDevInt(KronLL.Nodes);
2614  KronLL.NodePerm.Swap(i, A);
2615  //KronLL.UpdateGraphDLL(i, A); printf("Fast Theta0 DLL:"); KronLL.GetDLL(0);
2616  KronLL.CalcApxGraphDLL(); printf("Apx Theta0 DLL:"); KronLL.GetDLL(0);
2617  //KronLL.CalcFullApxGraphDLL(); printf("Apx Theta0 DLL:"); KronLL.GetDLL(0);
2618  //KronLL.CalcGraphDLL(); printf("Exact Theta0 DLL:"); KronLL.GetDLL(0);
2619  printf("\n");
2620  } */
2621  //}
2622 //}
2623 
2624 /*void TKroneckerLL::SampleGradient(const int& WarmUp, const int& NSamples, double& AvgLL, TFltV& AvgGradV, TFltV& SDevV, const bool& Plot) {
2625  printf("Samples: %s (warm-up: %s)\n", TInt::GetMegaStr(NSamples).CStr(), TInt::GetMegaStr(WarmUp).CStr());
2626  int NId1 = 0, NId2 = 0;
2627  TExeTm ExeTm;
2628  CalcApxGraphLL();
2629  for (int s = 0; s < WarmUp; s++) {
2630  SampleNextPerm(NId1, NId2); }
2631  printf(" warm-up:%s", ExeTm.GetTmStr()); ExeTm.Tick();
2632  CalcApxGraphLL();
2633  CalcApxGraphDLL();
2634  AvgLL = 0;
2635  TVec<TMom> DLLMomV(LLMtx.Len());
2636  for (int s = 0; s < NSamples; s++) {
2637  if (SampleNextPerm(NId1, NId2)) { // new permutation
2638  UpdateGraphDLL(NId1, NId2);
2639  }
2640  AvgLL += GetLL();
2641  for (int m = 0; m < LLMtx.Len(); m++) { DLLMomV[m].Add(GradV[m]); }
2642  }
2643  AvgLL = AvgLL / (NSamples*Nodes);
2644  // plot gradients over sampling time
2645  if (Plot) {
2646  TVec<TFltV> FltVV(LLMtx.Len()+1);
2647  for (int s = 0; s < DLLMomV[0].GetVals(); s += 1000) {
2648  for (int m = 0; m < LLMtx.Len(); m++) { FltVV[m].Add(DLLMomV[m].GetVal(s)); }
2649  FltVV.Last().Add(s); }
2650  const TStr FNm = TFile::GetUniqueFNm(TStr::Fmt("grad%dW%sS%s-#.png", KronIters, TInt::GetMegaStr(WarmUp).CStr(), TInt::GetMegaStr(NSamples).CStr()));
2651  TGnuPlot GP(FNm.GetFMid(), TStr::Fmt("Gradient vs. Sample Index. Nodes: %d, WarmUp: %s, Samples: %s, Avg LL: %f", Nodes,
2652  TInt::GetMegaStr(WarmUp).CStr(), TInt::GetMegaStr(NSamples).CStr(), AvgLL), true);
2653  for (int m = 0; m < LLMtx.Len(); m++) {
2654  GP.AddPlot(FltVV.Last(), FltVV[m], gpwLines, TStr::Fmt("Grad %d", m+1), "linewidth 5"); }
2655  GP.SetXYLabel("Sample Index (time)", "Log-likelihood gradient");
2656  GP.SavePng();
2657  }
2658  // average gradients
2659  printf(" sampling:%s\n", ExeTm.GetTmStr());
2660  printf(" AverageLL: %f\n", AvgLL);
2661  printf("Gradients:\n");
2662  AvgGradV.Gen(LLMtx.Len());
2663  SDevV.Gen(LLMtx.Len());
2664  for (int m = 0; m < LLMtx.Len(); m++) {
2665  DLLMomV[m].Def();
2666  AvgGradV[m] = DLLMomV[m].GetMean() / (Nodes*Nodes);
2667  SDevV[m] = DLLMomV[m].GetSDev() / (Nodes*Nodes);
2668  printf(" %d] mean: %16f sDev: %16f\n", m, AvgGradV[m], SDevV[m]);
2669  }
2670 }
2671 
2672 void TKronMaxLL::TFunc::FDeriv(const TFltV& Point, TFltV& GradV) {
2673  CallBack->GetDLL(Point, GradV);
2674  for (int i = 0; i < GradV.Len(); i++) { GradV[i] = -GradV[i]; }
2675 }
2676 
2677 double TKronMaxLL::TLogBarFunc::FVal(const TFltV& Point) {
2678  // log-likelihood
2679  const double LogLL = CallBack->GetLL(Point);
2680  // log-barrier
2681  const double MinBarrier = 0.05;
2682  const double MaxBarrier = 0.95;
2683  const double T1 = 1.0/T;
2684  double Barrier = 0.0;
2685  for (int i = 0; i < Point.Len(); i++) {
2686  if(Point[i].Val > MinBarrier && Point[i].Val < MaxBarrier) {
2687  Barrier += - T1 * (log(Point[i]-MinBarrier) + log(MaxBarrier-Point[i])); //log-barrier
2688  } else { Barrier = 1e5; }
2689  }
2690  IAssert(Barrier > 0.0);
2691  printf("barrrier: %f\n", Barrier);
2692  return -LogLL + Barrier; // minus LL since we want to maximize it
2693 }
2694 
2695 void TKronMaxLL::TLogBarFunc::FDeriv(const TFltV& Point, TFltV& DerivV) {
2696  // derivative of log-likelihood
2697  CallBack->GetDLL(Point, DerivV);
2698  // derivative of log barrier
2699  const double MinBarrier = 0.05;
2700  const double MaxBarrier = 0.95;
2701  const double T1 = 1.0/T;
2702  for (int i = 0; i < Point.Len(); i++) {
2703  DerivV[i] = - DerivV[i] + (- T1*(1.0/(Point[i]-MinBarrier) - 1.0/(MaxBarrier-Point[i])));
2704  }
2705 }
2706 */
#define IAssert(Cond)
Definition: bd.h:262
static const T & Mn(const T &LVal, const T &RVal)
Definition: xmath.h:36
void SetRndMtx(const int &MtxDim, const double &MinProb=0.0)
Definition: kronecker.cpp:40
TPair< TInt, TInt > TIntPr
Definition: ds.h:83
TKronMtx & operator=(const TKronMtx &Kronecker)
Definition: kronecker.cpp:25
static int RemoveNodeNoise(PNGraph &Graph, const int &NNodes, const bool Random=true)
!!!!! MYUNGHWAN, CHECK!
Definition: kronecker.cpp:2204
static PNGraph GenKronecker(const TKronMtx &SeedMtx, const int &NIter, const bool &IsDir, const int &Seed=0)
Definition: kronecker.cpp:312
static PKroneckerLL New()
Definition: kronecker.h:154
double GetEdgeProb(int NId1, int NId2, const int &NKronIters) const
Definition: kronecker.cpp:161
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
Definition: graph.cpp:376
#define IAssertR(Cond, Reason)
Definition: bd.h:265
static TRnd Rnd
Definition: kronecker.h:14
TPair< TFlt, TInt > TFltIntPr
Definition: ds.h:97
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:548
int GetParams() const
Definition: kronecker.h:164
int GetEdges(const int &NIter) const
Definition: kronecker.cpp:123
static void TestBicCriterion(const TStr &OutFNm, const TStr &Desc1, const PNGraph &G, const int &GradIters, double LearnRate, const int &WarmUp, const int &NSamples, const int &TrueN0)
Definition: kronecker.cpp:2109
Definition: tm.h:355
static TStr GetMegaStr(const int &Val)
Definition: dt.h:1226
double GetMtxSum() const
Definition: kronecker.cpp:140
static void KronMul(const TKronMtx &LeftPt, const TKronMtx &RightPt, TKronMtx &OutMtx)
Definition: kronecker.cpp:591
TFltV TestSamplePerm(const TStr &OutFNm, const int &WarmUp, const int &NSamples, const TKronMtx &TrueMtx, const bool &DoPlot=true)
Definition: kronecker.cpp:1795
static TKronMtx LoadTxt(const TStr &MtxFNm)
Definition: kronecker.cpp:768
static PSs LoadTxt(const TSsFmt &SsFmt, const TStr &FNm, const PNotify &Notify=NULL, const bool &IsExcelEoln=true, const int &MxY=-1, const TIntV &AllowedColNV=TIntV(), const bool &IsQStr=true)
Definition: ss.cpp:100
TFltV & GetMtx()
Definition: kronecker.h:35
static bool IsNum(const char &Ch)
Definition: dt.h:1067
Definition: dt.h:11
void SetRandomEdges(const int &NEdges, const bool isDir=true)
!!!!! MYUNGHWAN, CHECK!
Definition: kronecker.cpp:1417
Definition: ds.h:130
void Del(const TSizeTy &ValN)
Removes the element at position ValN.
Definition: ds.h:1189
int GetKronIter(const int &Nodes) const
Definition: kronecker.cpp:127
TKronEMType
Definition: kronecker.h:114
void SavePng(const int &SizeX=1000, const int &SizeY=800, const TStr &Comment=TStr())
Definition: gnuplot.h:120
PGraph GetMxWcc(const PGraph &Graph)
Returns a graph representing the largest weakly connected component on an input Graph.
Definition: cncom.h:452
static const T & Mx(const T &LVal, const T &RVal)
Definition: xmath.h:32
static PNGraph New()
Static constructor that returns a pointer to the graph. Call: PNGraph Graph = TNGraph::New().
Definition: graph.h:481
void RunEStep(const int &GibbsWarmUp, const int &WarmUp, const int &NSamples, TFltV &LLV, TVec< TFltV > &DLLV)
Definition: kronecker.cpp:1567
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:552
void UpdateGraphDLL(const int &SwapNId1, const int &SwapNId2)
Definition: kronecker.cpp:1241
unsigned int uint
Definition: bd.h:11
static double CalcChainR2(const TVec< TFltV > &ChainLLV)
Definition: kronecker.cpp:1895
void SetOrderPerm()
Definition: kronecker.cpp:813
double GetApxNoEdgeLL(int NId1, int NId2, const int &NKronIters) const
Definition: kronecker.cpp:191
TEdgeI EndEI() const
Returns an iterator referring to the past-the-end edge in the graph.
Definition: graph.h:596
Definition: gstat.h:28
double GetFullGraphLL() const
Definition: kronecker.cpp:943
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:313
bool IsEdgePlace(int NId1, int NId2, const int &NKronIters, const double &ProbTresh) const
Definition: kronecker.cpp:196
Definition: bits.h:119
void Swap(TKronMtx &KronMtx)
Definition: kronecker.cpp:114
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
int Len() const
Definition: kronecker.h:31
void GetMinMax(const TFltPrV &XYValV, double &Min, double &Max, const bool &ResetMinMax)
Definition: kronecker.cpp:1732
void Dump(const TStr &MtxNm=TStr(), const bool &Sort=false) const
Definition: kronecker.cpp:636
TEdgeI BegEI() const
Returns an iterator referring to the first edge in the graph.
Definition: graph.h:594
double GetAnfEffDiam(const PGraph &Graph, const bool &IsDir, const double &Percentile, const int &NApprox)
Definition: anf.h:216
double NodeDLLDelta(const int ParamId, const int &NId) const
Definition: kronecker.cpp:1214
double GetFullRowLL(int RowId) const
Definition: kronecker.cpp:956
void SetPerm(const char &PermId)
Definition: kronecker.cpp:2297
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:503
double GetEdgeLL(int NId1, int NId2, const int &NKronIters) const
Definition: kronecker.cpp:175
void SetXYLabel(const TStr &XLabel, const TStr &YLabel)
Definition: gnuplot.h:73
double GetEmptyGraphDLL(const int &ParamId) const
Definition: kronecker.cpp:1136
TVec< TFltPr > TFltPrV
Definition: ds.h:1604
double GetEmptyGraphLL() const
Definition: kronecker.cpp:976
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
Definition: graph.cpp:236
static const double NInf
Definition: kronecker.h:13
TVal1 Val1
Definition: ds.h:132
static uint GetNodeSig(const double &OneProb=0.5)
Definition: kronecker.cpp:262
bool SampleNextPerm(int &NId1, int &NId2)
Definition: kronecker.cpp:1111
static TKronMtx GetInitMtx(const int &Dim, const int &Nodes, const int &Edges)
Definition: kronecker.cpp:705
TStr GetMtxStr() const
Definition: kronecker.cpp:80
void RunKronEM(const int &EMIter, const int &GradIter, double LrnRate, double MnStep, double MxStep, const int &GibbsWarmUp, const int &WarmUp, const int &NSamples, const TKronEMType &Type=kronNodeMiss, const int &NMissing=-1)
Definition: kronecker.cpp:1692
static double Sqr(const double &x)
Definition: xmath.h:12
void SetIPerm(const TIntV &Perm)
!!!!! MYUNGHWAN, CHECK!
Definition: kronecker.cpp:875
void SetYRange(const double &Min, const double &Max)
Definition: gnuplot.h:80
void SetForEdges(const int &Nodes, const int &Edges)
Definition: kronecker.cpp:59
TKroneckerLL KronLL
Definition: kronecker.h:281
TVec< TIntTr > TIntTrV
Definition: ds.h:1602
static void KronPwr(const TKronMtx &KronPt, const int &NIter, TKronMtx &OutMtx)
Definition: kronecker.cpp:626
static const double Mx
Definition: dt.h:1391
TFlt LogLike
Definition: kronecker.h:135
int ChangeChAll(const char &SrcCh, const char &DstCh)
Definition: dt.cpp:1113
TFlt PermSwapNodeProb
Definition: kronecker.h:123
const double & At(const int &Row, const int &Col) const
Definition: kronecker.h:46
void SetEpsMtx(const double &Eps1, const double &Eps0, const int &Eps1Val=1, const int &Eps0Val=0)
Definition: kronecker.cpp:50
double CalcGraphLL()
Definition: kronecker.cpp:1022
double GetEdgeDLL(const int &ParamId, int NId1, int NId2, const int &NKronIters) const
Definition: kronecker.cpp:207
Definition: dt.h:1386
void RestoreGraph(const bool RestoreNodes=true)
!!!!! MYUNGHWAN, CHECK!
Definition: kronecker.cpp:923
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:570
Definition: gstat.h:29
static void TestGradDescent(const int &KronIters, const int &KiloSamples, const TStr &Permutation)
Definition: kronecker.cpp:2138
Definition: gstat.h:28
bool IsProbMtx() const
Definition: kronecker.cpp:33
bool IsObsEdge(const int &NId1, const int &NId2) const
Definition: kronecker.h:173
TIntV NodePerm
Definition: kronecker.h:128
TInt MtxDim
Definition: kronecker.h:16
Graph Statistics Sequence.
Definition: gstat.h:155
double CalcApxGraphLL()
Definition: kronecker.cpp:1037
void DelNode(const int &NId)
Deletes node of ID NId from the graph.
Definition: graph.cpp:294
void Swap(TVec< TVal, TSizeTy > &Vec)
Swaps the contents of the vector with Vec.
Definition: ds.h:1101
void GetProbMtx(TKronMtx &ProbMtx)
Definition: kronecker.cpp:106
const char * GetTmStr() const
Definition: tm.h:370
void SetBestDegPerm()
!!!!! MYUNGHWAN, CHECK!
Definition: kronecker.cpp:842
void DelZeroDegNodes(PGraph &Graph)
Removes all the zero-degree nodes, that isolated nodes, from the graph.
Definition: alg.h:432
TVal2 Val2
Definition: ds.h:133
void SetDegPerm()
Definition: kronecker.cpp:828
TVec< TKronMtx > MtxV
Definition: kronecker.h:143
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
void SetPerm(const char &PermId)
Definition: kronecker.cpp:805
static int FlipEdgeNoise(PNGraph &Graph, const int &NEdges, const bool Random=true)
Definition: kronecker.cpp:2229
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1318
double GetApxEmptyGraphLL() const
Definition: kronecker.cpp:987
Edge iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:430
static void PutRndSeed(const int &Seed)
Definition: kronecker.h:108
double GetApxNoEdgeDLL(const int &ParamId, int NId1, int NId2, const int &NKronIters) const
Definition: kronecker.cpp:240
static TKronMtx GetMtxFromNm(const TStr &MtxNm)
Definition: kronecker.cpp:753
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge from node SrcNId to node DstNId to the graph.
Definition: graph.cpp:321
void SetRndPerm()
Definition: kronecker.cpp:820
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
Definition: ds.h:1229
void PlotGrad(const TFltPrV &EstLLV, const TFltPrV &TrueLLV, const TVec< TFltPrV > &GradVV, const TFltPrV &AcceptV, const TStr &OutFNm, const TStr &Desc)
Definition: kronecker.cpp:1740
void SampleGradient(const int &WarmUp, const int &NSamples, double &AvgLL, TFltV &GradV)
Definition: kronecker.cpp:1271
PNGraph GenRndGraph(const double &RndFact=1.0) const
Definition: kronecker.cpp:295
double NodeLLDelta(const int &NId) const
Definition: kronecker.cpp:1055
void DelEdge(const int &SrcNId, const int &DstNId, const bool &IsDir=true)
Deletes an edge from node IDs SrcNId to DstNId from the graph.
Definition: graph.cpp:345
bool IsEdge(const int &SrcNId, const int &DstNId, const bool &IsDir=true) const
Tests whether an edge from node IDs SrcNId to DstNId exists in the graph.
Definition: graph.cpp:363
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
Definition: subgraph.cpp:7
double GetApxEmptyGraphDLL(const int &ParamId) const
Definition: kronecker.cpp:1147
!!!!! MYUNGHWAN, CHECK!
Definition: kronecker.h:116
void PlotAutoCorrelation(const TFltV &ValV, const int &MaxK, const TStr &OutFNm, const TStr &Desc)
Definition: kronecker.cpp:1773
double RunMStep(const TFltV &LLV, const TVec< TFltV > &DLLV, const int &GradIter, const double &LrnRate, double MnStep, double MxStep)
Definition: kronecker.cpp:1597
static double Round(const double &Val)
Definition: xmath.h:16
int GetKronIters() const
Definition: kronecker.h:159
TBool DebugMode
Definition: kronecker.h:141
#define Assert(Cond)
Definition: bd.h:251
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:546
static double Power(const double &Base, const double &Exponent)
Definition: xmath.h:25
bool Empty() const
Definition: kronecker.h:32
TInt RealEdges
Definition: kronecker.h:132
void AddRndNoise(const double &SDev)
Definition: kronecker.cpp:69
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:579
static double GetAvgFroErr(const TKronMtx &Kron1, const TKronMtx &Kron2)
Definition: kronecker.cpp:668
static void Test()
Definition: kronecker.cpp:2367
Tab separated.
Definition: ss.h:6
void ToOneMinusMtx()
Definition: kronecker.cpp:91
void GradDescentConvergence(const TStr &OutFNm, const TStr &Desc1, const bool &SamplePerm, const int &NIters, double LearnRate, const int &WarmUp, const int &NSamples, const int &AvgKGraphs, const TKronMtx &TrueParam)
Definition: kronecker.cpp:2017
static void ChainGelmapRubinPlot(const TVec< TFltV > &ChainLLV, const TStr &OutFNm, const TStr &Desc)
Definition: kronecker.cpp:1924
TQuad< TFlt, TFlt, TFlt, TFlt > TFltQu
Definition: ds.h:264
Definition: tm.h:14
#define FailR(Reason)
Definition: bd.h:240
double GetFullColLL(int ColId) const
Definition: kronecker.cpp:966
void Tick()
Definition: tm.h:364
void InitLL(const TFltV &ParamV)
Definition: kronecker.cpp:996
TPair< TFlt, TFlt > TFltPr
Definition: ds.h:99
int GetDeg() const
Returns degree of the current node, the sum of in-degree and out-degree.
Definition: graph.h:402
double GetEZero(const int &Edges, const int &KronIter) const
Definition: kronecker.cpp:136
double GetNoEdgeProb(int NId1, int NId2, const int &NKronIters) const
Definition: kronecker.cpp:171
int GetNZeroK(const PNGraph &Graph) const
Definition: kronecker.cpp:132
void GetLLMtx(TKronMtx &LLMtx)
Definition: kronecker.cpp:98
const TFltV & CalcGraphDLL()
Definition: kronecker.cpp:1158
void MetroGibbsSampleSetup(const int &WarmUp)
Definition: kronecker.cpp:1476
const TFltV & CalcFullApxGraphDLL()
Definition: kronecker.cpp:1176
void MetroGibbsSampleNext(const int &WarmUp, const bool DLLUpdate=false)
Definition: kronecker.cpp:1503
void GetSngVals(const PNGraph &Graph, const int &SngVals, TFltV &SngValV)
Computes largest SngVals singular values of the adjacency matrix representing a directed Graph...
Definition: gsvd.cpp:175
Definition: dt.h:201
double GradDescent2(const int &NIter, const double &LrnRate, double MnStep, double MxStep, const int &WarmUp, const int &NSamples)
Definition: kronecker.cpp:1355
static PNGraph GenDetKronecker(const TKronMtx &SeedMtx, const int &NIter, const bool &IsDir)
Definition: kronecker.cpp:458
double GetNrmDev()
Definition: dt.cpp:63
Definition: tm.h:81
void GenMtx(const int &Dim)
Definition: kronecker.h:40
void AppendIsoNodes()
Definition: kronecker.cpp:914
TIntTrV GEdgeV
Definition: kronecker.h:125
Definition: gstat.h:28
double GetColSum(const int &ColId) const
Definition: kronecker.cpp:154
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:550
TInt LSelfEdge
Definition: kronecker.h:127
TIntTrV LEdgeV
Definition: kronecker.h:126
TInt KronIters
Definition: kronecker.h:121
TKronMtx ProbMtx
Definition: kronecker.h:134
void PlotTrueAndEst(const TStr &OutFNm, const TStr &Desc, const TStr &YLabel, const TFltPrV &EstV, const TFltPrV &TrueV)
Definition: kronecker.cpp:2009
TInt MissEdges
Definition: kronecker.h:139
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:406
int GetDim() const
Definition: kronecker.h:30
const TFltV & CalcApxGraphDLL()
Definition: kronecker.cpp:1194
double GetNoEdgeLL(int NId1, int NId2, const int &NKronIters) const
Definition: kronecker.cpp:186
static TKronMtx GetRndMtx(const int &Dim, const double &MinProb)
Definition: kronecker.cpp:699
Definition: dt.h:412
Definition: ds.h:219
void SaveTxt(const TStr &OutFNm) const
Definition: kronecker.cpp:14
double SwapNodesLL(const int &NId1, const int &NId2)
Definition: kronecker.cpp:1083
bool Empty() const
Definition: dt.h:491
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
TKronMtx LLMtx
Definition: kronecker.h:134
void SplitOnAllCh(const char &SplitCh, TStrV &StrV, const bool &SkipEmpty=true) const
Definition: dt.cpp:926
TInt RealNodes
Definition: kronecker.h:131
void Shuffle(TRnd &Rnd)
Randomly shuffles the elements of the vector.
Definition: ds.h:1335
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:383
PNGraph Graph
Definition: kronecker.h:120
void McMcGetAvgAvg(const TFltV &AvgJV, double &AvgAvg)
Definition: kronecker.cpp:1876
double GetSecs() const
Definition: tm.h:366
double GetUniDev()
Definition: dt.h:30
TFltV GradV
Definition: kronecker.h:136
int AddPlot(const TIntV &YValV, const TGpSeriesTy &SeriesTy=gpwLinesPoints, const TStr &Label=TStr(), const TStr &Style=TStr())
Definition: gnuplot.cpp:186
static void KronSum(const TKronMtx &LeftPt, const TKronMtx &RightPt, TKronMtx &OutMtx)
Definition: kronecker.cpp:607
double GetRowSum(const int &RowId) const
Definition: kronecker.cpp:147
TTriple< TInt, TInt, TInt > TIntTr
Definition: ds.h:171
int GetNodes(const int &NIter) const
Definition: kronecker.cpp:119
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
PNGraph GenThreshGraph(const double &Thresh) const
Definition: kronecker.cpp:283
double GetNoEdgeDLL(const int &ParamId, int NId1, int NId2, const int &NKronIters) const
Definition: kronecker.cpp:220
static TVec< TFlt, int > GetV(const TFlt &Val1)
Returns a vector on element Val1.
Definition: ds.h:848
void SetGraph(const PNGraph &GraphPt)
Definition: kronecker.cpp:882
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:404
TKronEMType EMType
Definition: kronecker.h:138
TFltV SeedMtx
Definition: kronecker.h:17
char * CStr()
Definition: dt.h:479
TKronMtx()
Definition: kronecker.h:19
int GetInNId(const int &NodeN) const
Returns ID of NodeN-th in-node (the node pointing to the current node).
Definition: graph.h:412
TFltQu TestKronDescent(const bool &DoExact, const bool &TruePerm, double LearnRate, const int &WarmUp, const int &NSamples, const TKronMtx &TrueParam)
Definition: kronecker.cpp:1942
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
void DelLast()
Removes the last element of the vector.
Definition: ds.h:665
TIntV InvertPerm
Definition: kronecker.h:129
double GetLL() const
Definition: kronecker.h:204
static double GetAvgAbsErr(const TKronMtx &Kron1, const TKronMtx &Kron2)
Definition: kronecker.cpp:655
static void PlotCmpGraphs(const TKronMtx &SeedMtx, const PNGraph &Graph, const TStr &OutFNm, const TStr &Desc)
Definition: kronecker.cpp:479
static void RoundTheta(const TFltV &ThetaV, TFltV &NewThetaV)
Definition: kronecker.cpp:2354
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
double GradDescent(const int &NIter, const double &LrnRate, double MnStep, double MxStep, const int &WarmUp, const int &NSamples)
Definition: kronecker.cpp:1299
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:416
TTriple< TFlt, TInt, TInt > TFltIntIntTr
Definition: ds.h:182
void McMcGetAvgJ(const TVec< TFltV > &ChainLLV, TFltV &AvgJV)
Definition: kronecker.cpp:1883
TVal3 Val3
Definition: ds.h:134
static int RemoveEdgeNoise(PNGraph &Graph, const int &NEdges)
Definition: kronecker.cpp:2268
static const double Mn
Definition: dt.h:1390
static PNGraph GenFastKronecker(const TKronMtx &SeedMtx, const int &NIter, const bool &IsDir, const int &Seed=0)
Definition: kronecker.cpp:349
void GetSubValV(const TSizeTy &BValN, const TSizeTy &EValN, TVec< TVal, TSizeTy > &ValV) const
Fills ValV with elements at positions BValN...EValN.
Definition: ds.h:1170