SNAP Library 6.0, Developer Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
mag.cpp
Go to the documentation of this file.
1 #include "stdafx.h"
2 #include "mag.h"
3 
7 
8 
10 // MAG affinity matrix
11 
12 const double TMAGAffMtx::NInf = -DBL_MAX;
13 
14 TMAGAffMtx::TMAGAffMtx(const TFltV& SeedMatrix) : SeedMtx(SeedMatrix) {
15  MtxDim = (int) sqrt((double)SeedMatrix.Len());
17 }
18 
20  if (this != &Kronecker){
21  MtxDim=Kronecker.MtxDim;
22  SeedMtx=Kronecker.SeedMtx;
23  }
24  return *this;
25 }
26 
27 bool TMAGAffMtx::IsProbMtx() const {
28  for (int i = 0; i < Len(); i++) {
29  if (At(i) < 0.0 || At(i) > 1.0) return false;
30  }
31  return true;
32 }
33 
34 void TMAGAffMtx::SetRndMtx(TRnd& Rnd, const int& PrmMtxDim, const double& MinProb) {
35  MtxDim = PrmMtxDim;
37  for (int p = 0; p < SeedMtx.Len(); p++) {
38  do {
39  SeedMtx[p] = Rnd.GetUniDev();
40  } while (SeedMtx[p] < MinProb);
41  }
42 }
43 
44 void TMAGAffMtx::SetEpsMtx(const double& Eps1, const double& Eps0, const int& Eps1Val, const int& Eps0Val) {
45  for (int i = 0; i < Len(); i++) {
46  double& Val = At(i);
47  if (Val == Eps1Val) Val = double(Eps1);
48  else if (Val == Eps0Val) Val = double(Eps0);
49  }
50 }
51 
52 void TMAGAffMtx::AddRndNoise(TRnd& Rnd, const double& SDev) {
53  Dump("before");
54  double NewVal;
55  int c =0;
56  for (int i = 0; i < Len(); i++) {
57  for(c = 0; ((NewVal = At(i)*Rnd.GetNrmDev(1, SDev, 0.8, 1.2)) < 0.01 || NewVal>0.99) && c <1000; c++) { }
58  if (c < 999) { At(i) = NewVal; } else { printf("XXXXX\n"); }
59  }
60  Dump("after");
61 }
62 
64  TChA ChA("[");
65  for (int i = 0; i < Len(); i++) {
66  ChA += TStr::Fmt("%g", At(i));
67  if ((i+1)%GetDim()==0 && (i+1<Len())) { ChA += "; "; }
68  else if (i+1<Len()) { ChA += " "; }
69  }
70  ChA += "]";
71  return TStr(ChA);
72 }
73 
75  LLMtx.GenMtx(MtxDim);
76  for (int i = 0; i < Len(); i++) {
77  if (At(i) != 0.0) { LLMtx.At(i) = log(At(i)); }
78  else { LLMtx.At(i) = NInf; }
79  }
80 }
81 
83  ProbMtx.GenMtx(MtxDim);
84  for (int i = 0; i < Len(); i++) {
85  if (At(i) != NInf) { ProbMtx.At(i) = exp(At(i)); }
86  else { ProbMtx.At(i) = 0.0; }
87  }
88 }
89 
91  ::Swap(MtxDim, Mtx.MtxDim);
92  SeedMtx.Swap(Mtx.SeedMtx);
93 }
94 
95 double TMAGAffMtx::GetMtxSum() const {
96  double Sum = 0;
97  for (int i = 0; i < Len(); i++) {
98  Sum += At(i); }
99  return Sum;
100 }
101 
102 double TMAGAffMtx::GetRowSum(const int& RowId) const {
103  double Sum = 0;
104  for (int c = 0; c < GetDim(); c++) {
105  Sum += At(RowId, c); }
106  return Sum;
107 }
108 
109 double TMAGAffMtx::GetColSum(const int& ColId) const {
110  double Sum = 0;
111  for (int r = 0; r < GetDim(); r++) {
112  Sum += At(r, ColId); }
113  return Sum;
114 }
115 
117  double Sum = GetMtxSum();
118  if(Sum == 0) {
119  return 0;
120  }
121 
122  for(int i = 0; i < Len(); i++) {
123  At(i) = At(i) / Sum;
124  }
125  return Sum;
126 }
127 
128 void TMAGAffMtx::Dump(const TStr& MtxNm, const bool& Sort) const {
129  /*printf("%s: %d x %d\n", MtxNm.Empty()?"Mtx":MtxNm.CStr(), GetDim(), GetDim());
130  for (int r = 0; r < GetDim(); r++) {
131  for (int c = 0; c < GetDim(); c++) { printf(" %8.2g", At(r, c)); }
132  printf("\n");
133  }*/
134  if (! MtxNm.Empty()) printf("%s\n", MtxNm.CStr());
135  double Sum=0.0;
136  TFltV ValV = SeedMtx;
137  if (Sort) { ValV.Sort(false); }
138  for (int i = 0; i < ValV.Len(); i++) {
139  printf(" %10.4g", ValV[i]());
140  Sum += ValV[i];
141  if ((i+1) % GetDim() == 0) { printf("\n"); }
142  }
143  printf(" (sum:%.4f)\n", Sum);
144 }
145 
146 // average difference in the parameters
147 double TMAGAffMtx::GetAvgAbsErr(const TMAGAffMtx& Mtx1, const TMAGAffMtx& Mtx2) {
148  TFltV P1 = Mtx1.GetMtx();
149  TFltV P2 = Mtx2.GetMtx();
150  IAssert(P1.Len() == P2.Len());
151  P1.Sort(); P2.Sort();
152  double delta = 0.0;
153  for (int i = 0; i < P1.Len(); i++) {
154  delta += fabs(P1[i] - P2[i]);
155  }
156  return delta/P1.Len();
157 }
158 
159 // average L2 difference in the parameters
160 double TMAGAffMtx::GetAvgFroErr(const TMAGAffMtx& Mtx1, const TMAGAffMtx& Mtx2) {
161  TFltV P1 = Mtx1.GetMtx();
162  TFltV P2 = Mtx2.GetMtx();
163  IAssert(P1.Len() == P2.Len());
164  P1.Sort(); P2.Sort();
165  double delta = 0.0;
166  for (int i = 0; i < P1.Len(); i++) {
167  delta += pow(P1[i] - P2[i], 2);
168  }
169  return sqrt(delta/P1.Len());
170 }
171 
172 // get matrix from matlab matrix notation
174  TStrV RowStrV, ColStrV;
175  MatlabMtxStr.ChangeChAll(',', ' ');
176  MatlabMtxStr.SplitOnAllCh(';', RowStrV); IAssert(! RowStrV.Empty());
177  RowStrV[0].SplitOnWs(ColStrV); IAssert(! ColStrV.Empty());
178  const int Rows = RowStrV.Len();
179  const int Cols = ColStrV.Len();
180  IAssert(Rows == Cols);
181  TMAGAffMtx Mtx(Rows);
182  for (int r = 0; r < Rows; r++) {
183  RowStrV[r].SplitOnWs(ColStrV);
184  IAssert(ColStrV.Len() == Cols);
185  for (int c = 0; c < Cols; c++) {
186  Mtx.At(r, c) = (double) ColStrV[c].GetFlt(); }
187  }
188  return Mtx;
189 }
190 
191 TMAGAffMtx TMAGAffMtx::GetRndMtx(TRnd& Rnd, const int& Dim, const double& MinProb) {
192  TMAGAffMtx Mtx;
193  Mtx.SetRndMtx(Rnd, Dim, MinProb);
194  return Mtx;
195 }
196 
197 
199 // Simple MAG node attributes (Same Bernoulli for every attribute)
200 
201 void TMAGNodeSimple::AttrGen(TIntVV& AttrVV, const int& NNodes) {
202  IAssert(Dim > 0);
203  AttrVV.Gen(NNodes, Dim);
204  AttrVV.PutAll(0);
205 
206  for(int i = 0; i < NNodes; i++) {
207  for(int l = 0; l < Dim; l++) {
208  if((TMAGNodeSimple::Rnd).GetUniDev() > Mu) {
209  AttrVV.At(i, l) = 1;
210  }
211  }
212  }
213 }
214 
215 void TMAGNodeSimple::LoadTxt(const TStr& InFNm) {
216  FILE *fp = fopen(InFNm.CStr(), "r");
217  IAssertR(fp != NULL, "File does not exist: " + InFNm);
218 
219  char buf[128];
220  char *token;
221  TStr TokenStr;
222  TFlt Val;
223 
224  token = strtok(buf, "&");
225  token = strtok(token, " \t");
226  TokenStr = TStr(token);
227  Mu = TokenStr.GetFlt(Val);
228 
229  fclose(fp);
230 }
231 
232 void TMAGNodeSimple::SaveTxt(TStrV& OutStrV) const {
233  OutStrV.Gen(Dim, 0);
234 
235  for(int i = 0; i < Dim; i++) {
236  OutStrV.Add(TStr::Fmt("%f", double(Mu)));
237  }
238 }
239 
240 
242 // MAG node attributes with (different) Bernoulli
243 
245  MuV = Dist.MuV;
246  Dim = Dist.Dim;
247  return (*this);
248 }
249 
250 void TMAGNodeBern::AttrGen(TIntVV& AttrVV, const int& NNodes) {
251  IAssert(Dim > 0);
252  AttrVV.Gen(NNodes, Dim);
253  AttrVV.PutAll(0);
254 
255  for(int i = 0; i < NNodes; i++) {
256  for(int l = 0; l < Dim; l++) {
257  if((TMAGNodeBern::Rnd).GetUniDev() > MuV[l]) {
258  AttrVV.At(i, l) = 1;
259  }
260  }
261  }
262 }
263 
264 void TMAGNodeBern::LoadTxt(const TStr& InFNm) {
265  FILE *fp = fopen(InFNm.CStr(), "r");
266  IAssertR(fp != NULL, "File does not exist: " + InFNm);
267 
268  Dim = 0;
269  MuV.Gen(10, 0);
270 
271  char buf[128];
272  char *token;
273  TStr TokenStr;
274  TFlt Val;
275 
276  while(fgets(buf, sizeof(buf), fp) != NULL) {
277  token = strtok(buf, "&");
278  token = strtok(token, " \t");
279  TokenStr = TStr(token);
280  MuV.Add(TokenStr.GetFlt(Val));
281  }
282  Dim = MuV.Len();
283 
284  fclose(fp);
285 }
286 
287 void TMAGNodeBern::SaveTxt(TStrV& OutStrV) const {
288  OutStrV.Gen(Dim, 0);
289 
290  for(int i = 0; i < Dim; i++) {
291  OutStrV.Add(TStr::Fmt("%f", double(MuV[i])));
292  }
293 }
294 
295 
297 // MAG node attributes with Beta + Bernoulli family
298 
300  AlphaV = Dist.AlphaV;
301  BetaV = Dist.BetaV;
302  Dim = Dist.Dim;
303  MuV = Dist.MuV;
304  Dirty = Dist.Dirty;
305  return (*this);
306 }
307 
308 void TMAGNodeBeta::SetBeta(const int& Attr, const double& Alpha, const double& Beta) {
309  IAssert(Attr < Dim);
310  AlphaV[Attr] = Alpha;
311  BetaV[Attr] = Beta;
312  Dirty = true;
313 }
314 
315 void TMAGNodeBeta::SetBetaV(const TFltV& _AlphaV, const TFltV& _BetaV) {
316  IAssert(_AlphaV.Len() == _BetaV.Len());
317  AlphaV = _AlphaV;
318  BetaV = _BetaV;
319  Dim = _AlphaV.Len();
320  Dirty = true;
321 }
322 
323 void TMAGNodeBeta::AttrGen(TIntVV& AttrVV, const int& NNodes) {
324  IAssert(Dim > 0);
325  AttrVV.Gen(NNodes, Dim);
326  AttrVV.PutAll(0);
327 
328 // printf("AlphaV = %d, BetaV = %d, MuV = %d\n", AlphaV.Len(), BetaV.Len(), MuV.Len());
329 
330  for(int i = 0; i < NNodes; i++) {
331  for(int l = 0; l < Dim; l++) {
332  double x = TMAGNodeBeta::Rnd.GetGammaDev((int)AlphaV[l]);
333  double y = TMAGNodeBeta::Rnd.GetGammaDev((int)BetaV[l]);
334  MuV[l] = x / (x + y);
335  if((TMAGNodeBeta::Rnd).GetUniDev() > MuV[l]) {
336  AttrVV.At(i, l) = 1;
337  }
338  }
339  }
340  Dirty = false;
341 }
342 
343 void TMAGNodeBeta::LoadTxt(const TStr& InFNm) {
344  FILE *fp = fopen(InFNm.CStr(), "r");
345  IAssertR(fp != NULL, "File does not exist: " + InFNm);
346 
347  Dim = 0;
348  AlphaV.Gen(10, 0);
349  BetaV.Gen(10, 0);
350 
351  char buf[128];
352  char *token;
353  TStr TokenStr;
354  TFlt Val;
355 
356  while(fgets(buf, sizeof(buf), fp) != NULL) {
357  token = strtok(buf, "&");
358 
359  token = strtok(token, " \t");
360  TokenStr = TStr(token);
361  AlphaV.Add(TokenStr.GetFlt(Val));
362 
363  token = strtok(NULL, " \t");
364  TokenStr = TStr(token);
365  BetaV.Add(TokenStr.GetFlt(Val));
366 
367  Dim++;
368  }
369 
370  fclose(fp);
371 }
372 
373 void TMAGNodeBeta::SaveTxt(TStrV& OutStrV) const {
374  OutStrV.Gen(Dim, 0);
375 
376  for(int i = 0; i < Dim; i++) {
377  OutStrV.Add(TStr::Fmt("%f %f", double(AlphaV[i]), double(BetaV[i])));
378  }
379 }
380 
381 
383 // MAGFit with Bernoulli node attributes
384 
385 void TMAGFitBern::SetGraph(const PNGraph& GraphPt) {
386  Graph = GraphPt;
387  bool NodesOk = true;
388  // check that nodes IDs are {0,1,..,Nodes-1}
389  for (int nid = 0; nid < Graph->GetNodes(); nid++) {
390  if (! Graph->IsNode(nid)) { NodesOk=false; break; } }
391  if (! NodesOk) {
392  TIntV NIdV; GraphPt->GetNIdV(NIdV);
393  Graph = TSnap::GetSubGraph(GraphPt, NIdV, true);
394  for (int nid = 0; nid < Graph->GetNodes(); nid++) {
395  IAssert(Graph->IsNode(nid)); }
396  }
397 }
398 
399 void TMAGFitBern::SetPhiVV(const TIntVV& AttrVV, const int KnownIds) {
400  const int NNodes = Param.GetNodes();
401  const int NAttrs = Param.GetAttrs();
402 
403  PhiVV.Gen(NNodes, NAttrs);
404  KnownVV.Gen(NNodes, NAttrs);
405 
406  for(int l = 0; l < NAttrs; l++) {
407  for(int i = 0; i < NNodes; i++) {
408  if(int(AttrVV(i, l)) == 0) {
409  PhiVV(i, l) = 0.9999;
410  } else {
411  PhiVV(i, l) = 0.0001;
412  }
413  }
414 
415  if(l < KnownIds) {
416  KnownVV.PutY(l, true);
417  } else {
418  KnownVV.PutY(l, false);
419  }
420  }
421 }
422 
423 void TMAGFitBern::SaveTxt(const TStr& FNm) {
424  const int NNodes = Param.GetNodes();
425  const int NAttrs = Param.GetAttrs();
426  const TFltV MuV = GetMuV();
427  TMAGAffMtxV MtxV;
428  Param.GetMtxV(MtxV);
429 
430  FILE *fp = fopen(FNm.GetCStr(), "w");
431  for(int l = 0; l < NAttrs; l++) {
432  fprintf(fp, "%.4f\t", double(MuV[l]));
433  for(int row = 0; row < 2; row++) {
434  for(int col = 0; col < 2; col++) {
435  fprintf(fp, " %.4f", double(MtxV[l].At(row, col)));
436  }
437  fprintf(fp, (row == 0) ? ";" : "\n");
438  }
439  }
440  fclose(fp);
441 
442  fp = fopen((FNm + "f").CStr(), "w");
443  for(int i = 0; i < NNodes; i++) {
444  for(int l = 0; l < NAttrs; l++) {
445  fprintf(fp, "%f ", double(PhiVV(i, l)));
446  }
447  fprintf(fp, "\n");
448  }
449  fclose(fp);
450 }
451 
452 void TMAGFitBern::Init(const TFltV& MuV, const TMAGAffMtxV& AffMtxV) {
453  TMAGNodeBern DistParam(MuV);
454  Param.SetNodeAttr(DistParam);
455  Param.SetMtxV(AffMtxV);
456 
457  const int NNodes = Param.GetNodes();
458  const int NAttrs = Param.GetAttrs();
459 
460  PhiVV.Gen(NNodes, NAttrs);
461  KnownVV.Gen(NNodes, NAttrs);
462  KnownVV.PutAll(false);
463 }
464 
465 #if 0
466 void TMAGFitBern::PerturbInit(const TFltV& MuV, const TMAGAffMtxV& AffMtxV, const double& PerturbRate) {
467  IAssert(PerturbRate < 1.0);
468 
469  TFltV InitMuV = MuV; //InitMuV.PutAll(0.5);
470  TMAGNodeBern DistParam(InitMuV);
471  Param.SetMtxV(AffMtxV);
472  TRnd& Rnd = TMAGNodeBern::Rnd;
473  TMAGAffMtxV PerturbMtxV = AffMtxV;
474 
475  const int NNodes = Param.GetNodes();
476  const int NAttrs = Param.GetAttrs();
477 
478  for(int l = 0; l < NAttrs; l++) {
479  double Mu = MuV[l] + PerturbRate * (Rnd.GetUniDev() - 0.5) * 2;
480 // double Mu = 0.5;
481  if(Mu < 0.01) { Mu = 0.01; }
482  if(Mu > 0.99) { Mu = 0.99; }
483  DistParam.SetMu(l, Mu);
484 // PhiVV.PutY(l, MuV[l] + Perturb);
485  TMAGAffMtx AffMtx(AffMtxV[l]);
486  for(int p = 0; p < 4; p++) {
487  AffMtx.At(p) += PerturbRate * (Rnd.GetUniDev() - 0.5) * 2;
488  if(AffMtx.At(p) < 0.05) { AffMtx.At(p) = 0.05; }
489  if(AffMtx.At(p) > 0.95) { AffMtx.At(p) = 0.95; }
490  }
491  AffMtx.At(0, 1) = AffMtx.At(1, 0);
492  PerturbMtxV[l] = AffMtx;
493  }
494 // NormalizeAffMtxV(PerturbMtxV);
495 
496  printf("\n");
497  for(int l = 0; l < NAttrs; l++) {
498  printf("Mu = %.3f ", DistParam.GetMu(l));
499  printf("AffMtx = %s\n", PerturbMtxV[l].GetMtxStr().GetCStr());
500  }
501  Param.SetMtxV(PerturbMtxV);
502  Param.SetNodeAttr(DistParam);
503 
504  PhiVV.Gen(NNodes, NAttrs);
505  KnownVV.Gen(NNodes, NAttrs);
506  KnownVV.PutAll(false);
507 }
508 #endif // 0
509 
510 void TMAGFitBern::RandomInit(const TFltV& MuV, const TMAGAffMtxV& AffMtxV, const int& Seed) {
511  TRnd& Rnd = TMAGNodeBern::Rnd;
512  Rnd.PutSeed(Seed);
513 
514  TFltV InitMuV = MuV; InitMuV.PutAll(0.5);
515  TMAGNodeBern DistParam(InitMuV);
516  Param.SetMtxV(AffMtxV);
517 
518  const int NNodes = Param.GetNodes();
519  const int NAttrs = Param.GetAttrs();
520 
521  PhiVV.Gen(NNodes, NAttrs);
522  KnownVV.Gen(NNodes, NAttrs);
523  KnownVV.PutAll(false);
524 
525  for(int i = 0; i < NNodes; i++) {
526  for(int l = 0; l < NAttrs; l++) {
527  PhiVV.At(i, l) = Rnd.GetUniDev();
528 // PhiVV.At(i, l) = 0.5;
529  }
530  }
531 
532  TMAGAffMtxV RndMtxV = AffMtxV;
533  for(int l = 0; l < NAttrs; l++) {
534  for(int p = 0; p < 4; p++) {
535  RndMtxV[l].At(p) = TMAGNodeBern::Rnd.GetUniDev();
536  if(RndMtxV[l].At(p) < 0.1) { RndMtxV[l].At(p) = 0.1; }
537  if(RndMtxV[l].At(p) > 0.9) { RndMtxV[l].At(p) = 0.9; }
538  }
539  RndMtxV[l].At(0, 1) = RndMtxV[l].At(1, 0);
540  }
541 
542  printf("\n");
543  for(int l = 0; l < NAttrs; l++) {
544  printf("AffMtx = %s\n", RndMtxV[l].GetMtxStr().GetCStr());
545  }
546  Param.SetMtxV(RndMtxV);
547  Param.SetNodeAttr(DistParam);
548 }
549 
550 const double TMAGFitBern::GetInCoeff(const int& i, const int& j, const int& l, const int& A, const TMAGAffMtx& Theta) const {
551  return (PhiVV.At(i, l) * Theta.At(0, A) + (1.0 - PhiVV.At(i, l)) * Theta.At(1, A));
552 }
553 
554 const double TMAGFitBern::GetOutCoeff(const int& i, const int& j, const int& l, const int& A, const TMAGAffMtx& Theta) const {
555  return (PhiVV.At(j, l) * Theta.At(A, 0) + (1.0 - PhiVV.At(j, l)) * Theta.At(A, 1));
556 }
557 
558 const double TMAGFitBern::GetAvgInCoeff(const int& i, const int& AId, const int& A, const TMAGAffMtx& Theta) const {
559  const int NNodes = Param.GetNodes();
560  const double Mu_l = AvgPhiV[AId] / double(NNodes);
561  return (Mu_l * Theta.At(0, A) + (1.0 - Mu_l) * Theta.At(1, A));
562 }
563 
564 const double TMAGFitBern::GetAvgOutCoeff(const int& i, const int& AId, const int& A, const TMAGAffMtx& Theta) const {
565  const int NNodes = Param.GetNodes();
566  const double Mu_l = AvgPhiV[AId] / double(NNodes);
567  return (Mu_l * Theta.At(A, 0) + (1.0 - Mu_l) * Theta.At(A, 1));
568 }
569 
570 const double TMAGFitBern::GetProbPhi(const int& NId1, const int& NId2, const int& AId, const int& Attr1, const int& Attr2) const {
571  double Prob1 = (Attr1 == 0) ? double(PhiVV.At(NId1, AId)) : (1.0 - PhiVV.At(NId1, AId));
572  double Prob2 = (Attr2 == 0) ? double(PhiVV.At(NId2, AId)) : (1.0 - PhiVV.At(NId2, AId));
573  return (Prob1 * Prob2);
574 }
575 
576 const double TMAGFitBern::GetProbMu(const int& NId1, const int& NId2, const int& AId, const int& Attr1, const int& Attr2, const bool Left, const bool Right) const {
577  TMAGNodeBern DistParam = Param.GetNodeAttr();
578 // double Mu = DistParam.GetMu(AId);
579  double Mu = AvgPhiV[AId] / double(Param.GetNodes());
580  double Prob1 = (Left) ? double(PhiVV.At(NId1, AId)) : double(Mu);
581  double Prob2 = (Right)? double(PhiVV.At(NId2, AId)) : double(Mu);
582  Prob1 = (Attr1 == 0) ? Prob1 : 1.0 - Prob1;
583  Prob2 = (Attr2 == 0) ? Prob2 : 1.0 - Prob2;
584  return (Prob1 * Prob2);
585 }
586 
587 const double TMAGFitBern::GetThetaLL(const int& NId1, const int& NId2, const int& AId) const {
588  double LL = 0.0;
589  const TMAGAffMtx& Mtx = Param.GetMtx(AId);
590  for(int A1 = 0; A1 < 2; A1++) {
591  for(int A2 = 0; A2 < 2; A2++) {
592  LL += GetProbPhi(NId1, NId2, AId, A1, A2) * Mtx.At(A1, A2);
593  }
594  }
595  return log(LL);
596 }
597 
598 const double TMAGFitBern::GetAvgThetaLL(const int& NId1, const int& NId2, const int& AId, const bool Left, const bool Right) const {
599  double LL = 0.0;
600  const TMAGAffMtx& Mtx = Param.GetMtx(AId);
601  for(int A1 = 0; A1 < 2; A1++) {
602  for(int A2 = 0; A2 < 2; A2++) {
603  LL += GetProbMu(NId1, NId2, AId, A1, A2, Left, Right) * Mtx.At(A1, A2);
604  }
605  }
606  return log(LL);
607 }
608 
609 const double TMAGFitBern::GetSqThetaLL(const int& NId1, const int& NId2, const int& AId) const {
610  double LL = 0.0;
611  const TMAGAffMtx& Mtx = Param.GetMtx(AId);
612  for(int A1 = 0; A1 < 2; A1++) {
613  for(int A2 = 0; A2 < 2; A2++) {
614  LL += GetProbPhi(NId1, NId2, AId, A1, A2) * Mtx.At(A1, A2) * Mtx.At(A1, A2);
615  }
616  }
617  return log(LL);
618 }
619 
620 const double TMAGFitBern::GetAvgSqThetaLL(const int& NId1, const int& NId2, const int& AId, const bool Left, const bool Right) const {
621  double LL = 0.0;
622  const TMAGAffMtx& Mtx = Param.GetMtx(AId);
623  for(int A1 = 0; A1 < 2; A1++) {
624  for(int A2 = 0; A2 < 2; A2++) {
625  LL += GetProbMu(NId1, NId2, AId, A1, A2, Left, Right) * Mtx.At(A1, A2) * Mtx.At(A1, A2);
626  }
627  }
628  return log(LL);
629 }
630 
631 const double TMAGFitBern::GetProdLinWeight(const int& NId1, const int& NId2) const {
632  const int NAttrs = Param.GetAttrs();
633  double LL = 0.0;
634 
635  for(int l = 0; l < NAttrs; l++) {
636  LL += GetThetaLL(NId1, NId2, l);
637  }
638 // return LL;
639  return LL + log(NormConst);
640 }
641 
642 const double TMAGFitBern::GetAvgProdLinWeight(const int& NId1, const int& NId2, const bool Left, const bool Right) const {
643  const int NAttrs = Param.GetAttrs();
644  double LL = 0.0;
645 
646  for(int l = 0; l < NAttrs; l++) {
647  LL += GetAvgThetaLL(NId1, NId2, l, Left, Right);
648  }
649 // return LL;
650  return LL + log(NormConst);
651 }
652 
653 const double TMAGFitBern::GetProdSqWeight(const int& NId1, const int& NId2) const {
654  const int NAttrs = Param.GetAttrs();
655  double LL = 0.0;
656 
657  for(int l = 0; l < NAttrs; l++) {
658  LL += GetSqThetaLL(NId1, NId2, l);
659  }
660 // return LL;
661  return LL + 2 * log(NormConst);
662 }
663 
664 const double TMAGFitBern::GetAvgProdSqWeight(const int& NId1, const int& NId2, const bool Left, const bool Right) const {
665  const int NAttrs = Param.GetAttrs();
666  double LL = 0.0;
667 
668  for(int l = 0; l < NAttrs; l++) {
669  LL += GetAvgSqThetaLL(NId1, NId2, l, Left, Right);
670  }
671 // return LL;
672  return LL + 2 * log(NormConst);
673 }
674 
675 const double LogSumExp(const double LogVal1, const double LogVal2) {
676  double MaxExp = (LogVal1 > LogVal2) ? LogVal1 : LogVal2;
677  double Sum = exp(LogVal1 - MaxExp) + exp(LogVal2 - MaxExp);
678  return (log(Sum) + MaxExp);
679 }
680 
681 const double LogSumExp(const TFltV& LogValV) {
682  const int Len = LogValV.Len();
683  double MaxExp = -DBL_MAX;
684 
685  for(int i = 0; i < Len; i++) {
686  if(MaxExp < LogValV[i]) { MaxExp = LogValV[i]; }
687  }
688 
689  double Sum = 0.0;
690  for(int i = 0; i < Len; i++) {
691  Sum += exp(LogValV[i] - MaxExp);
692  }
693 
694  return (log(Sum) + MaxExp);
695 }
696 
697 const double LogSumExp(const double *LogValArray, const int Len) {
698  TFltV TmpV(Len);
699  for(int i = 0; i < Len; i++) { TmpV[i] = LogValArray[i]; }
700  return LogSumExp(TmpV);
701 }
702 
703 const double TMAGFitBern::GradPhiMI(const double& x, const int& NId, const int& AId, const double& Lambda, const double& DeltaQ, const TFltVV& CntVV) {
704  const int NAttrs = CntVV.GetYDim();
705  double Grad = DeltaQ - log(x) + log(1.0-x);
706 
707  for(int l = 0; l < NAttrs; l++) {
708  if(l == AId) { continue; }
709  const double C0 = PhiVV(NId, l);
710  const double C1 = 1.0 - C0;
711  Grad -= Lambda * C0 * log(CntVV(0, l) + C0 * x);
712  Grad -= Lambda * C1 * log(CntVV(1, l) + C1 * x);
713  Grad += Lambda * C0 * log(CntVV(2, l) + C0 * (1-x));
714  Grad += Lambda * C1 * log(CntVV(3, l) + C1 * (1-x));
715  Grad -= Lambda * log(CntVV(0, l) + CntVV(1, l) + x);
716  Grad += Lambda * log(CntVV(2, l) + CntVV(3, l) + (1-x));
717  }
718 
719  return Grad;
720 }
721 
722 const double TMAGFitBern::ObjPhiMI(const double& x, const int& NId, const int& AId, const double& Lambda, const double& Q0, const double& Q1, const TFltVV& CntVV) {
723  const int NAttrs = CntVV.GetYDim();
724  double Val = x*(Q0 - log(x)) + (1-x)*(Q1 - log(1.0-x));
725 
726  for(int l = 0; l < NAttrs; l++) {
727  if(l == AId) { continue; }
728  const double C0 = PhiVV(NId, l);
729  const double C1 = 1.0 - C0;
730  Val -= Lambda * (CntVV(0, l) + C0 * x) * log(CntVV(0, l) + C0 * x);
731  Val -= Lambda * (CntVV(1, l) + C1 * x) * log(CntVV(1, l) + C1 * x);
732  Val -= Lambda * (CntVV(2, l) + C0 * (1-x)) * log(CntVV(2, l) + C0 * (1-x));
733  Val -= Lambda * (CntVV(3, l) + C1 * (1-x)) * log(CntVV(3, l) + C1 * (1-x));
734  Val += Lambda * (CntVV(0, l) + CntVV(1, l) + x) * log(CntVV(0, l) + CntVV(1, l) + x);
735  Val += Lambda * (CntVV(2, l) + CntVV(3, l) + 1 - x) * log(CntVV(2, l) + CntVV(3, l) + (1-x));
736 
737  if(!(CntVV(0, l) > 0)) printf("CntVV(0, %d) = %.2f\n", l, double(CntVV(0, l)));
738  if(!(CntVV(1, l) > 0)) printf("CntVV(1, %d) = %.2f\n", l, double(CntVV(1, l)));
739  if(!(CntVV(2, l) > 0)) printf("CntVV(2, %d) = %.2f\n", l, double(CntVV(2, l)));
740  if(!(CntVV(3, l) > 0)) printf("CntVV(3, %d) = %.2f\n", l, double(CntVV(3, l)));
741  }
742 
743  return Val;
744 }
745 
746 const double TMAGFitBern::GetEstNoEdgeLL(const int& NId, const int& AId) const {
747  // const int NNodes = Param.GetNodes();
748  // const int NAttrs = Param.GetAttrs();
749 
750  TMAGNodeBern DistParam = Param.GetNodeAttr();
751  double LL = 0.0;
752 
753  return LL;
754 }
755 
756 const double TMAGFitBern::UpdatePhi(const int& NId, const int& AId, double& Phi) {
757  TMAGAffMtx LLTheta, Theta = Param.GetMtx(AId);
758  TMAGAffMtx SqTheta(Theta);
759  const int NNodes = Param.GetNodes();
760  // const int NAttrs = Param.GetAttrs();
761  Theta.GetLLMtx(LLTheta);
762  TMAGNodeBern DistParam = Param.GetNodeAttr();
763  const double Mu = DistParam.GetMu(AId);
764 
765  for(int i = 0; i < Theta.Len(); i++) {
766  SqTheta.At(i) = SqTheta.At(i) * SqTheta.At(i);
767  }
768 
769  // Using log-sum-exp trick
770  double EdgeQ[2], NonEdgeQ[2], MaxExp[2];
771  TFltV NonEdgeLLV[2];
772  for(int i = 0; i < 2; i++) {
773  EdgeQ[i] = 0.0;
774  NonEdgeQ[i] = 0.0;
775  MaxExp[i] = -DBL_MAX;
776  NonEdgeLLV[i].Gen(4 * NNodes, 0);
777  }
778 
779  for(int j = 0; j < NNodes; j++) {
780  if(j == NId) { continue; }
781 
782  if(Graph->IsEdge(NId, j)) {
783  EdgeQ[0] += GetOutCoeff(NId, j, AId, 0, LLTheta);
784  EdgeQ[1] += GetOutCoeff(NId, j, AId, 1, LLTheta);
785  } else {
786  double LinW = GetProdLinWeight(NId, j) - GetThetaLL(NId, j, AId);
787  double SqW = GetProdSqWeight(NId, j) - GetSqThetaLL(NId, j, AId);
788 
789  for(int i = 0; i < 2; i++) {
790  NonEdgeLLV[i].Add(LinW + log(GetOutCoeff(NId, j, AId, i, Theta)));
791  NonEdgeLLV[i].Add(SqW + log(GetOutCoeff(NId, j, AId, i, SqTheta)) + log(0.5));
792  }
793  }
794 
795  if(Graph->IsEdge(j, NId)) {
796  EdgeQ[0] += GetInCoeff(j, NId, AId, 0, LLTheta);
797  EdgeQ[1] += GetInCoeff(j, NId, AId, 1, LLTheta);
798  } else {
799  double LinW = GetProdLinWeight(j, NId) - GetThetaLL(j, NId, AId);
800  double SqW = GetProdSqWeight(j, NId) - GetSqThetaLL(j, NId, AId);
801 
802  for(int i = 0; i < 2; i++) {
803  NonEdgeLLV[i].Add(LinW + log(GetInCoeff(j, NId, AId, i, Theta)));
804  NonEdgeLLV[i].Add(SqW + log(GetInCoeff(j, NId, AId, i, SqTheta)) + log(0.5));
805  }
806  }
807  }
808 
809  NonEdgeQ[0] = LogSumExp(NonEdgeLLV[0]);
810  NonEdgeQ[1] = LogSumExp(NonEdgeLLV[1]);
811 
812  double Q[2];
813  Q[0] = log(Mu) + EdgeQ[0] - exp(NonEdgeQ[0]);
814  Q[1] = log(1.0 - Mu) + EdgeQ[1] - exp(NonEdgeQ[1]);
815 // double Q = Q1 - Q0;
816 // printf(" [Phi_{%d}{%d}] :: Q0 = %f, Q1 = %f\n", NId, AId, Q0, Q1);
817 
818 // Phi = 1.0 / (1.0 + exp(Q));
819  Phi = Q[0] - LogSumExp(Q, 2);
820  Phi = exp(Phi);
821 
822  return Phi - PhiVV.At(NId, AId);
823 }
824 
825 
826 const double TMAGFitBern::UpdatePhiMI(const double& Lambda, const int& NId, const int& AId, double& Phi) {
827  TMAGAffMtx LLTheta, Theta = Param.GetMtx(AId);
828  TMAGAffMtx SqTheta(Theta);
829  const int NNodes = Param.GetNodes();
830  const int NAttrs = Param.GetAttrs();
831  Theta.GetLLMtx(LLTheta);
832  TMAGNodeBern DistParam = Param.GetNodeAttr();
833  const double Mu = DistParam.GetMu(AId);
834 
835  for(int i = 0; i < Theta.Len(); i++) {
836  SqTheta.At(i) = SqTheta.At(i) * SqTheta.At(i);
837  }
838 
839  // Using log-sum-exp trick
840  double EdgeQ[2], NonEdgeQ[2], MaxExp[2];
841  TFltV NonEdgeLLV[2];
842  TFltVV CntVV(4, NAttrs); CntVV.PutAll(0.0);
843  for(int i = 0; i < 2; i++) {
844  EdgeQ[i] = 0.0;
845  NonEdgeQ[i] = 0.0;
846  MaxExp[i] = -DBL_MAX;
847  NonEdgeLLV[i].Gen(4 * NNodes, 0);
848  }
849 
850  for(int j = 0; j < NNodes; j++) {
851  if(j == NId) { continue; }
852 
853  for(int l = 0; l < NAttrs; l++) {
854  if(l == AId) { continue; }
855  CntVV(0, l) = CntVV(0, l) + PhiVV(j, AId) * PhiVV(j, l);
856  CntVV(1, l) = CntVV(1, l) + PhiVV(j, AId) * (1.0-PhiVV(j, l));
857  CntVV(2, l) = CntVV(2, l) + (1.0-PhiVV(j, AId)) * PhiVV(j, l);
858  CntVV(3, l) = CntVV(3, l) + (1.0-PhiVV(j, AId)) * (1.0-PhiVV(j, l));
859  }
860 
861  if(Graph->IsEdge(NId, j)) {
862  EdgeQ[0] += GetOutCoeff(NId, j, AId, 0, LLTheta);
863  EdgeQ[1] += GetOutCoeff(NId, j, AId, 1, LLTheta);
864  } else {
865  double LinW = GetProdLinWeight(NId, j) - GetThetaLL(NId, j, AId);
866  double SqW = GetProdSqWeight(NId, j) - GetSqThetaLL(NId, j, AId);
867 
868  for(int i = 0; i < 2; i++) {
869  NonEdgeLLV[i].Add(LinW + log(GetOutCoeff(NId, j, AId, i, Theta)));
870  NonEdgeLLV[i].Add(SqW + log(GetOutCoeff(NId, j, AId, i, SqTheta)) + log(0.5));
871  }
872  }
873 
874  if(Graph->IsEdge(j, NId)) {
875  EdgeQ[0] += GetInCoeff(j, NId, AId, 0, LLTheta);
876  EdgeQ[1] += GetInCoeff(j, NId, AId, 1, LLTheta);
877  } else {
878  double LinW = GetProdLinWeight(j, NId) - GetThetaLL(j, NId, AId);
879  double SqW = GetProdSqWeight(j, NId) - GetSqThetaLL(j, NId, AId);
880 
881  for(int i = 0; i < 2; i++) {
882  NonEdgeLLV[i].Add(LinW + log(GetInCoeff(j, NId, AId, i, Theta)));
883  NonEdgeLLV[i].Add(SqW + log(GetInCoeff(j, NId, AId, i, SqTheta)) + log(0.5));
884  }
885  }
886  }
887 
888  NonEdgeQ[0] = LogSumExp(NonEdgeLLV[0]);
889  NonEdgeQ[1] = LogSumExp(NonEdgeLLV[1]);
890 
891  double Q[2];
892  Q[0] = log(Mu) + EdgeQ[0] - exp(NonEdgeQ[0]);
893  Q[1] = log(1.0 - Mu) + EdgeQ[1] - exp(NonEdgeQ[1]);
894  double DeltaQ = Q[0] - Q[1];
895 
896 // double x[] = {0.1, 0.3, 0.5, 0.7, 0.9};
897  double x[] = {PhiVV(NId, AId)};
898 // for(int n = 0; n < 5; n++) {
899  for(int n = 0; n < 1; n++) {
900 // double LrnRate = 0.0002;
901  double LrnRate = 0.001;
902  for(int step = 0; step < 200; step++) {
903  double Grad = GradPhiMI(x[n], NId, AId, Lambda, DeltaQ, CntVV);
904  if(Grad > 0.0) { x[n] += LrnRate; }
905  else { x[n] -= LrnRate; }
906  if(x[n] > 0.9999) { x[n] = 0.9999; }
907  if(x[n] < 0.0001) { x[n] = 0.0001; }
908  LrnRate *= 0.995;
909  }
910  }
911 
912  double MaxVal = -DBL_MAX;
913  int MaxX = -1;
914 // for(int n = 0; n < 5; n++) {
915  for(int n = 0; n < 1; n++) {
916  double Val = ObjPhiMI(x[n], NId, AId, Lambda, Q[0], Q[1], CntVV);
917  if(Val > MaxVal) {
918  MaxVal = Val;
919  MaxX = n;
920  }
921  }
922  IAssert(MaxX >= 0);
923 
924  Phi = x[MaxX];
925 
926  return Phi - PhiVV.At(NId, AId);
927 }
928 
929 
930 const double TMAGFitBern::UpdateApxPhiMI(const double& Lambda, const int& NId, const int& AId, double& Phi, TFltVV& ProdVV) {
931  TMAGAffMtx LLTheta, Theta = Param.GetMtx(AId);
932  const int NNodes = Param.GetNodes();
933  const int NAttrs = Param.GetAttrs();
934  Theta.GetLLMtx(LLTheta);
935  TMAGNodeBern DistParam = Param.GetNodeAttr();
936  const double Mu = DistParam.GetMu(AId);
937 
938  TMAGAffMtx SqTheta(Theta);
939  for(int i = 0; i < Theta.Len(); i++) {
940  SqTheta.At(i) = SqTheta.At(i) * SqTheta.At(i);
941  }
942 
943  TFltV ProdV; ProdVV.GetRow(NId, ProdV);
944  ProdV[0] -= GetAvgThetaLL(NId, NId, AId, true, false);
945  ProdV[1] -= GetAvgThetaLL(NId, NId, AId, false, true);
946  ProdV[2] -= log(2.0) + GetAvgSqThetaLL(NId, NId, AId, true, false);
947  ProdV[3] -= log(2.0) + GetAvgSqThetaLL(NId, NId, AId, false, true);
948 
949  // Using log-sum-exp trick
950  double EdgeQ[2], MaxExp[2];
951  TFltV NonEdgeLLV[2];
952  TFltVV CntVV(4, NAttrs); CntVV.PutAll(0.0);
953  for(int i = 0; i < 2; i++) {
954  EdgeQ[i] = 0.0;
955  MaxExp[i] = -DBL_MAX;
956  NonEdgeLLV[i].Gen(4 * NNodes, 0);
957  }
958 
959  for(int F = 0; F < 2; F++) {
960  NonEdgeLLV[F].Add(ProdV[0] + log(GetAvgOutCoeff(NId, AId, F, Theta)));
961  NonEdgeLLV[F].Add(ProdV[1] + log(GetAvgInCoeff(NId, AId, F, Theta)));
962  NonEdgeLLV[F].Add(ProdV[2] + log(GetAvgOutCoeff(NId, AId, F, SqTheta)));
963  NonEdgeLLV[F].Add(ProdV[3] + log(GetAvgInCoeff(NId, AId, F, SqTheta)));
964  }
965  EdgeQ[0] = -(NNodes - 1) * exp(LogSumExp(NonEdgeLLV[0]));
966  EdgeQ[1] = -(NNodes - 1) * exp(LogSumExp(NonEdgeLLV[1]));
967 
968 
969  for(int l = 0; l < NAttrs; l++) {
970  if(l == AId) { continue; }
971  int BgId = (AId > l) ? AId : l;
972  int SmId = (AId + l) - BgId;
973  int SmL = (l < AId) ? 1 : 0;
974  BgId *= 4;
975  CntVV(0, l) = AvgPhiPairVV(SmId, BgId) - PhiVV(NId, AId) * PhiVV(NId, l);
976  CntVV(1+SmL, l) = AvgPhiPairVV(SmId, BgId+1+SmL) - PhiVV(NId, AId) * (1.0-PhiVV(NId, l));
977  CntVV(2-SmL, l) = AvgPhiPairVV(SmId, BgId+2-SmL) - (1.0-PhiVV(NId, AId)) * PhiVV(NId, l);
978  CntVV(3, l) = AvgPhiPairVV(SmId, BgId+3) - (1.0-PhiVV(NId, AId)) * (1.0-PhiVV(NId, l));
979  }
980 
981  TNGraph::TNodeI NI = Graph->GetNI(NId);
982  for(int d = 0; d < NI.GetOutDeg(); d++) {
983  int Out = NI.GetOutNId(d);
984  if(NId == Out) { continue; }
985  double LinW = GetProdLinWeight(NId, Out) - GetThetaLL(NId, Out, AId);
986  double SqW = GetProdSqWeight(NId, Out) - GetSqThetaLL(NId, Out, AId);
987 
988  for(int F = 0; F < 2; F++) {
989  EdgeQ[F] += GetOutCoeff(NId, Out, AId, F, LLTheta);
990  EdgeQ[F] += exp(LinW + log(GetOutCoeff(NId, Out, AId, F, Theta)));
991  EdgeQ[F] += 0.5 * exp(SqW + log(GetOutCoeff(NId, Out, AId, F, SqTheta)));
992  }
993  }
994  for(int d = 0; d < NI.GetInDeg(); d++) {
995  int In = NI.GetInNId(d);
996  if(NId == In) { continue; }
997  double LinW = GetProdLinWeight(In, NId) - GetThetaLL(In, NId, AId);
998  double SqW = GetProdSqWeight(In, NId) - GetSqThetaLL(In, NId, AId);
999 
1000  for(int F = 0; F < 2; F++) {
1001  EdgeQ[F] += GetInCoeff(In, NId, AId, F, LLTheta);
1002  EdgeQ[F] += exp(LinW + log(GetInCoeff(In, NId, AId, F, Theta)));
1003  EdgeQ[F] += 0.5 * exp(SqW + log(GetInCoeff(In, NId, AId, F, SqTheta)));
1004  }
1005  }
1006 
1007  EdgeQ[0] += log(Mu);
1008  EdgeQ[1] += log(1.0 - Mu);
1009  double DeltaQ = EdgeQ[0] - EdgeQ[1];
1010 // printf("(%d, %d) :: Q[0] = %f, Q[1] = %f\n", NId, AId, EdgeQ[0], EdgeQ[1]);
1011 
1012 // double x[] = {0.1, 0.3, 0.5, 0.7, 0.9};
1013  double x[] = {PhiVV(NId, AId)};
1014  TFltV ObjValV; ObjValV.Gen(60, 0);
1015 // for(int n = 0; n < 5; n++) {
1016  for(int n = 0; n < 1; n++) {
1017 // double LrnRate = 0.0002;
1018  double LrnRate = 0.001;
1019  for(int step = 0; step < 50; step++) {
1020 // for(int step = 0; step < 10; step++) {
1021  double Grad = GradPhiMI(x[n], NId, AId, Lambda, DeltaQ, CntVV);
1022  if(Grad > 0.0) { x[n] += LrnRate; }
1023  else { x[n] -= LrnRate; }
1024  if(x[n] > 0.9999) { x[n] = 0.9999; }
1025  if(x[n] < 0.0001) { x[n] = 0.0001; }
1026  if(x[n] == 0.9999 || x[n] == 0.0001) {
1027  break;
1028  }
1029  LrnRate *= 0.995;
1030  }
1031  ObjValV.Add(x[n]);
1032 // ObjValV.Add(PhiVV(NId, AId));
1033  }
1034 
1035  double MaxVal = -DBL_MAX;
1036  int MaxX = -1;
1037 // for(int n = 0; n < 5; n++) {
1038  for(int n = 0; n < ObjValV.Len(); n++) {
1039  double Val = ObjPhiMI(ObjValV[n], NId, AId, Lambda, EdgeQ[0], EdgeQ[1], CntVV);
1040  if(Val > MaxVal) {
1041  MaxVal = Val;
1042  MaxX = n;
1043  } else if(MaxX < 0) {
1044  printf("(%d, %d) : %f Q[0] = %f Q[1] = %f Val = %f\n", NId, AId, double(x[n]), double(EdgeQ[0]), double(EdgeQ[1]), Val);
1045  }
1046  }
1047  IAssert(MaxX >= 0);
1048 
1049  Phi = ObjValV[MaxX];
1050 
1051  return Phi - PhiVV.At(NId, AId);
1052 }
1053 
1054 
1055 
1056 double TMAGFitBern::DoEStepOneIter(const TFltV& TrueMuV, TFltVV& NewPhiVV, const double& Lambda) {
1057  const int NNodes = Param.GetNodes();
1058  const int NAttrs = Param.GetAttrs();
1059  double MaxDelta = 0, L1 = 0;
1060  double Val;
1061  TFltIntIntTrV NewVal;
1062  int RndCount = 0;
1063  // double OldMI = 0.0, NewMI = 0.0;
1064  TFltV MuV(NAttrs); MuV.PutAll(0.0);
1065  TIntV NIndV(NNodes), AIndV(NAttrs);
1066 
1067  // Update Phi
1068  /*
1069  for(int i = 0; i < NNodes; i++) { NIndV[i] = i; }
1070  for(int l = 0; l < NAttrs; l++) { AIndV[l] = l; }
1071  if(Randomized) {
1072  NIndV.Shuffle(TMAGNodeBern::Rnd);
1073  AIndV.Shuffle(TMAGNodeBern::Rnd);
1074  }
1075  */
1076 
1077  NewVal.Gen(NAttrs * 2);
1078  for(int i = 0; i < NNodes; i++) {
1079 // const int NId = NIndV[i]%NNodes;
1080  for(int l = 0; l < NAttrs * 2; l++) {
1081  const int NId = TMAGNodeBern::Rnd.GetUniDevInt(NNodes);
1082  const int AId = TMAGNodeBern::Rnd.GetUniDevInt(NAttrs);
1083 // const int AId = AIndV[l]%NAttrs;
1084 // double Delta = UpdatePhi(NId, AId, Val);
1085  double Delta = 0.0;
1086  if(KnownVV(NId, AId)) {
1087  Val = PhiVV.At(NId, AId);
1088  } else {
1089  Delta = UpdatePhiMI(Lambda, NId, AId, Val);
1090  }
1091 
1092 // PhiVV.At(NId, AId) = Val;
1093  NewVal[l] = TFltIntIntTr(Val, NId, AId);
1094 
1095 // MuV[AId] = MuV[AId] + Val;
1096  if(fabs(Delta) > MaxDelta) {
1097  MaxDelta = fabs(Delta);
1098  }
1099  if(Val > 0.3 && Val < 0.7) { RndCount++; }
1100  }
1101 
1102  for(int l = 0; l < NAttrs * 2; l++) {
1103  const int NId = NewVal[l].Val2;
1104  const int AId = NewVal[l].Val3;
1105  PhiVV.At(NId, AId) = NewVal[l].Val1;
1106  }
1107  }
1108  for(int i = 0; i < NNodes; i++) {
1109  for(int l = 0; l < NAttrs; l++) {
1110  MuV[l] = MuV[l] + PhiVV.At(i, l);
1111  }
1112  }
1113  for(int l = 0; l < NAttrs; l++) {
1114  MuV[l] = MuV[l] / double(NNodes);
1115  }
1116 
1117  TFltV SortMuV = MuV;
1118  double Avg = 0.0;
1119  SortMuV.Sort(false);
1120  for(int l = 0; l < NAttrs; l++) {
1121  printf(" F[%d] = %.3f", l, double(MuV[l]));
1122  Avg += SortMuV[l];
1123  L1 += fabs(TrueMuV[l] - SortMuV[l]);
1124  }
1125  printf("\n");
1126  printf(" Rnd = %d(%.3f)", RndCount, double(RndCount) / double(NNodes * NAttrs));
1127  printf(" Avg = %.3f\n", Avg / double(NAttrs));
1128  L1 /= double(NAttrs);
1129 
1130  return L1;
1131 }
1132 
1133 
1134 double TMAGFitBern::DoEStepApxOneIter(const TFltV& TrueMuV, TFltVV& NewPhiVV, const double& Lambda) {
1135  const int NNodes = Param.GetNodes();
1136  const int NAttrs = Param.GetAttrs();
1137  double MaxDelta = 0, L1 = 0;
1138  double Val;
1139  TFltIntIntTrV NewVal;
1140  int RndCount = 0;
1141  // double OldMI = 0.0, NewMI = 0.0;
1142  TFltV MuV(NAttrs); MuV.PutAll(0.0);
1143  TFltVV ProdVV(NNodes, 4); ProdVV.PutAll(0.0);
1144  TIntV NIndV(NNodes), AIndV(NAttrs);
1145 
1146  // Update Phi
1147  /*
1148  for(int i = 0; i < NNodes; i++) { NIndV[i] = i; }
1149  for(int l = 0; l < NAttrs; l++) { AIndV[l] = l; }
1150  if(Randomized) {
1151  NIndV.Shuffle(TMAGNodeBern::Rnd);
1152  AIndV.Shuffle(TMAGNodeBern::Rnd);
1153  }
1154  */
1155 
1156  AvgPhiV.Gen(NAttrs); AvgPhiV.PutAll(0.0);
1157  AvgPhiPairVV.Gen(NAttrs, 4*NAttrs); AvgPhiPairVV.PutAll(0.0);
1158  for(int i = 0; i < NNodes; i++) {
1159  for(int l = 0; l < NAttrs; l++) {
1160  for(int p = l+1; p < NAttrs; p++) {
1161  int index = 4 * p;
1162  AvgPhiPairVV(l, index) += PhiVV(i, l) * PhiVV(i, p);
1163  AvgPhiPairVV(l, index+1) += PhiVV(i, l) * (1.0-PhiVV(i, p));
1164  AvgPhiPairVV(l, index+2) += (1.0-PhiVV(i, l)) * PhiVV(i, p);
1165  AvgPhiPairVV(l, index+3) += (1.0-PhiVV(i, l)) * (1.0-PhiVV(i, p));
1166  }
1167  AvgPhiV[l] += PhiVV(i, l);
1168  }
1169  }
1170  for(int i = 0; i < NNodes; i++) {
1171  ProdVV(i, 0) = GetAvgProdLinWeight(i, i, true, false);
1172  ProdVV(i, 1) = GetAvgProdLinWeight(i, i, false, true);
1173  ProdVV(i, 2) = GetAvgProdSqWeight(i, i, true, false);
1174  ProdVV(i, 3) = GetAvgProdSqWeight(i, i, false, true);
1175  }
1176 
1177  const int Iter = 3;
1178 
1179  NewVal.Gen(NAttrs * Iter);
1180  for(int i = 0; i < NNodes * Iter; i++) {
1181  for(int l = 0; l < NAttrs; l++) {
1182  const int NId = TMAGNodeBern::Rnd.GetUniDevInt(NNodes);
1183  const int AId = TMAGNodeBern::Rnd.GetUniDevInt(NAttrs);
1184  double Delta = 0.0;
1185  if(KnownVV(NId, AId)) {
1186  Val = PhiVV.At(NId, AId);
1187  } else {
1188  Delta = UpdateApxPhiMI(Lambda, NId, AId, Val, ProdVV);
1189  }
1190 
1191 // PhiVV.At(NId, AId) = Val;
1192  NewVal[l] = TFltIntIntTr(Val, NId, AId);
1193 
1194 // MuV[AId] = MuV[AId] + Val;
1195  if(fabs(Delta) > MaxDelta) {
1196  MaxDelta = fabs(Delta);
1197  }
1198  if(Val > 0.3 && Val < 0.7) { RndCount++; }
1199  }
1200 
1201  for(int l = 0; l < NAttrs; l++) {
1202  const int NId = NewVal[l].Val2;
1203  const int AId = NewVal[l].Val3;
1204 
1205  ProdVV(NId, 0) -= GetAvgThetaLL(NId, NId, AId, true, false);
1206  ProdVV(NId, 1) -= GetAvgThetaLL(NId, NId, AId, false, true);
1207  ProdVV(NId, 2) -= GetAvgSqThetaLL(NId, NId, AId, true, false);
1208  ProdVV(NId, 3) -= GetAvgSqThetaLL(NId, NId, AId, false, true);
1209  for(int p = 0; p < NAttrs; p++) {
1210  if(p > AId) {
1211  int index = 4 * p;
1212  AvgPhiPairVV(AId, index) -= PhiVV(NId, AId) * PhiVV(NId, p);
1213  AvgPhiPairVV(AId, index+1) -= PhiVV(NId, AId) * (1.0-PhiVV(NId, p));
1214  AvgPhiPairVV(AId, index+2) -= (1.0-PhiVV(NId, AId)) * PhiVV(NId, p);
1215  AvgPhiPairVV(AId, index+3) -= (1.0-PhiVV(NId, AId)) * (1.0-PhiVV(NId, p));
1216  } else if (p < AId) {
1217  int index = 4 * AId;
1218  AvgPhiPairVV(p, index) -= PhiVV(NId, p) * PhiVV(NId, AId);
1219  AvgPhiPairVV(p, index+1) -= PhiVV(NId, p) * (1.0-PhiVV(NId, AId));
1220  AvgPhiPairVV(p, index+2) -= (1.0-PhiVV(NId, p)) * PhiVV(NId, AId);
1221  AvgPhiPairVV(p, index+3) -= (1.0-PhiVV(NId, p)) * (1.0-PhiVV(NId, AId));
1222  }
1223  }
1224  AvgPhiV[AId] -= PhiVV(NId, AId);
1225 
1226  PhiVV.At(NId, AId) = NewVal[l].Val1;
1227 
1228  ProdVV(NId, 0) += GetAvgThetaLL(NId, NId, AId, true, false);
1229  ProdVV(NId, 1) += GetAvgThetaLL(NId, NId, AId, false, true);
1230  ProdVV(NId, 2) += GetAvgSqThetaLL(NId, NId, AId, true, false);
1231  ProdVV(NId, 3) += GetAvgSqThetaLL(NId, NId, AId, false, true);
1232  for(int p = 0; p < NAttrs; p++) {
1233  if(p > AId) {
1234  int index = 4 * p;
1235  AvgPhiPairVV(AId, index) += PhiVV(NId, AId) * PhiVV(NId, p);
1236  AvgPhiPairVV(AId, index+1) += PhiVV(NId, AId) * (1.0-PhiVV(NId, p));
1237  AvgPhiPairVV(AId, index+2) += (1.0-PhiVV(NId, AId)) * PhiVV(NId, p);
1238  AvgPhiPairVV(AId, index+3) += (1.0-PhiVV(NId, AId)) * (1.0-PhiVV(NId, p));
1239  } else if (p < AId) {
1240  int index = 4 * AId;
1241  AvgPhiPairVV(p, index) += PhiVV(NId, p) * PhiVV(NId, AId);
1242  AvgPhiPairVV(p, index+1) += PhiVV(NId, p) * (1.0-PhiVV(NId, AId));
1243  AvgPhiPairVV(p, index+2) += (1.0-PhiVV(NId, p)) * PhiVV(NId, AId);
1244  AvgPhiPairVV(p, index+3) += (1.0-PhiVV(NId, p)) * (1.0-PhiVV(NId, AId));
1245  }
1246  }
1247  AvgPhiV[AId] += PhiVV(NId, AId);
1248  }
1249  }
1250 
1251  for(int l = 0; l < NAttrs; l++) {
1252  MuV[l] = AvgPhiV[l] / double(NNodes);
1253  }
1254 
1255  TFltV SortMuV = MuV;
1256  double Avg = 0.0;
1257 // SortMuV.Sort(false);
1258  for(int l = 0; l < NAttrs; l++) {
1259  printf(" F[%d] = %.3f", l, double(MuV[l]));
1260  Avg += SortMuV[l];
1261 // L1 += fabs(TrueMuV[l] - SortMuV[l]);
1262  }
1263  printf("\n");
1264  printf(" Rnd = %d(%.3f)", RndCount, double(RndCount) / double(NNodes * NAttrs));
1265  printf(" Avg = %.3f\n", Avg / double(NAttrs));
1266 // printf(" Linf = %f\n", MaxDelta);
1267 // L1 /= double(NAttrs);
1268 
1269  return L1;
1270 }
1271 
1272 double TMAGFitBern::DoEStep(const TFltV& TrueMuV, const int& NIter, double& LL, const double& Lambda) {
1273  const int NNodes = Param.GetNodes();
1274  const int NAttrs = Param.GetAttrs();
1275  TFltVV NewPhiVV(NNodes, NAttrs);
1276  // double MI;
1277 
1278  TFltV Delta(NIter);
1279 
1280  for(int i = 0; i < NIter; i++) {
1281  TExeTm IterTm;
1282  printf("EStep iteration : %d\n", (i+1));
1283  if(ESpeedUp) {
1284  Delta[i] = DoEStepApxOneIter(TrueMuV, NewPhiVV, Lambda);
1285  } else {
1286  Delta[i] = DoEStepOneIter(TrueMuV, NewPhiVV, Lambda);
1287  }
1288 // PhiVV = NewPhiVV;
1289 
1290  printf(" (Time = %s)\n", IterTm.GetTmStr());
1291  }
1292  printf("\n");
1293 
1294  NewPhiVV.Clr();
1295 
1296  return Delta.Last();
1297 }
1298 
1299 const double TMAGFitBern::UpdateMu(const int& AId) {
1300  const int NNodes = Param.GetNodes();
1301  TMAGNodeBern DistParam = Param.GetNodeAttr();
1302  const double OldMu = DistParam.GetMu(AId);
1303  double NewMu = 0.0;
1304 
1305  for(int i = 0; i < NNodes; i++) {
1306  NewMu += PhiVV.At(i, AId);
1307  }
1308  AvgPhiV[AId] = NewMu;
1309  NewMu /= double(NNodes);
1310 
1311  printf(" [Posterior Mu] = %.4f\n", NewMu);
1312 
1313  double Delta = fabs(NewMu - OldMu);
1314  DistParam.SetMu(AId, NewMu);
1315  Param.SetNodeAttr(DistParam);
1316 
1317  return Delta;
1318 }
1319 
1320 const void TMAGFitBern::GradAffMtx(const int& AId, const TFltVV& ProdVV, const TFltVV& SqVV, const TMAGAffMtx& CurMtx, TFltV& GradV) {
1321  const int NNodes = Param.GetNodes();
1322  // const int NAttrs = Param.GetAttrs();
1323  GradV.PutAll(0.0);
1324 
1325  for(int i = 0; i < NNodes; i++) {
1326  for(int j = 0; j < NNodes; j++) {
1327  double Prod = ProdVV(i, j) - GetThetaLL(i, j, AId);
1328  double Sq = SqVV(i, j) - GetSqThetaLL(i, j, AId);
1329 
1330  for(int p = 0; p < 4; p++) {
1331  int Ai = p / 2;
1332  int Aj = p % 2;
1333  double Prob = GetProbPhi(i, j, AId, Ai, Aj);
1334  if(Graph->IsEdge(i, j)) {
1335  GradV[p] += Prob / CurMtx.At(p);
1336  } else {
1337  GradV[p] -= Prob * exp(Prod);
1338  GradV[p] -= Prob * exp(Sq) * CurMtx.At(p);
1339  }
1340  }
1341  }
1342  }
1343 }
1344 
1345 
1346 const void TMAGFitBern::GradApxAffMtx(const int& AId, const TFltVV& ProdVV, const TFltVV& SqVV, const TMAGAffMtx& CurMtx, TFltV& GradV) {
1347  const int NNodes = Param.GetNodes();
1348  // const int NAttrs = Param.GetAttrs();
1349  // const int NSq = NNodes * (NNodes - 1);
1350  GradV.PutAll(0.0);
1351 
1352  TFltV LogSumV;
1353  for(int p = 0; p < 4; p++) {
1354  int Ai = p / 2;
1355  int Aj = p % 2;
1356  LogSumV.Gen(NNodes * 4, 0);
1357 
1358  for(int i = 0; i < NNodes; i++) {
1359  const double LProd = ProdVV(i, 0) - GetAvgThetaLL(i, i, AId, true, false);
1360  const double LSq = SqVV(i, 0) - GetAvgSqThetaLL(i, i, AId, true, false);
1361  const double RProd = ProdVV(i, 1) - GetAvgThetaLL(i, i, AId, false, true);
1362  const double RSq = SqVV(i, 1) - GetAvgSqThetaLL(i, i, AId, false, true);
1363 
1364  LogSumV.Add(LProd + log(GetProbMu(i, i, AId, Ai, Aj, true, false)));
1365  LogSumV.Add(LSq + log(GetProbMu(i, i, AId, Ai, Aj, true, false)) + log(CurMtx.At(p)));
1366  LogSumV.Add(RProd + log(GetProbMu(i, i, AId, Ai, Aj, false, true)));
1367  LogSumV.Add(RSq + log(GetProbMu(i, i, AId, Ai, Aj, false, true)) + log(CurMtx.At(p)));
1368  }
1369  double LogSum = LogSumExp(LogSumV);
1370  GradV[p] -= (NNodes - 1) * 0.5 * exp(LogSum);
1371  }
1372 
1373  for(TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
1374  const int NId1 = EI.GetSrcNId();
1375  const int NId2 = EI.GetDstNId();
1376  const double ProdOne = GetProdLinWeight(NId1, NId2) - GetThetaLL(NId1, NId2, AId);
1377  const double SqOne = GetProdSqWeight(NId1, NId2) - GetSqThetaLL(NId1, NId2, AId);
1378 
1379  for(int p = 0; p < 4; p++) {
1380  int Ai = p / 2;
1381  int Aj = p % 2;
1382  double Prob = GetProbPhi(NId1, NId2, AId, Ai, Aj);
1383  GradV[p] += Prob / CurMtx.At(p);
1384  GradV[p] += Prob * exp(ProdOne);
1385  GradV[p] += Prob * exp(SqOne) * CurMtx.At(p);
1386  }
1387  }
1388 
1389 #if 0
1390  const double Prod = ProdVV(0, 0) - GetAvgThetaLL(0, 0, AId, false, false);
1391  const double Sq = SqVV(0, 0) - GetAvgSqThetaLL(0, 0, AId, false, false);
1392  for(int p = 0; p < 4; p++) {
1393  int Ai = p / 2;
1394  int Aj = p % 2;
1395  GradV[p] -= NSq * exp(Prod) * GetProbMu(0, 0, AId, Ai, Aj, false, false);
1396  GradV[p] -= NSq * exp(Sq) * GetProbMu(0, 0, AId, Ai, Aj, false, false) * CurMtx.At(p);
1397  }
1398 
1399  for(TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
1400  const int NId1 = EI.GetSrcNId();
1401  const int NId2 = EI.GetDstNId();
1402  const double ProdOne = GetProdLinWeight(NId1, NId2) - GetThetaLL(NId1, NId2, AId);
1403  const double SqOne = GetProdSqWeight(NId1, NId2) - GetSqThetaLL(NId1, NId2, AId);
1404 
1405  for(int p = 0; p < 4; p++) {
1406  int Ai = p / 2;
1407  int Aj = p % 2;
1408  double Prob = GetProbPhi(NId1, NId2, AId, Ai, Aj);
1409 // GradV[p] += Prob / CurMtx.At(p);
1410 // GradV[p] += Prob * exp(ProdOne);
1411 // GradV[p] += Prob * exp(SqOne) * CurMtx.At(p);
1412  }
1413  }
1414 #endif
1415 }
1416 
1417 
1418 const double TMAGFitBern::UpdateAffMtx(const int& AId, const double& LrnRate, const double& MaxGrad, const double& Lambda, TFltVV& ProdVV, TFltVV& SqVV, TMAGAffMtx& NewMtx) {
1419  double Delta = 0.0;
1420  // const int NNodes = Param.GetNodes();
1421  // const int NAttrs = Param.GetAttrs();
1422  TMAGAffMtx AffMtx = Param.GetMtx(AId);
1423 
1424  TFltV GradV(4);
1425  TFltV HessV(4);
1426  if(MSpeedUp) {
1427  GradApxAffMtx(AId, ProdVV, SqVV, NewMtx, GradV);
1428  } else {
1429  GradAffMtx(AId, ProdVV, SqVV, NewMtx, GradV);
1430  }
1431 
1432  double Ratio = 1.0;
1433  for(int p = 0; p < 4; p++) {
1434  if(fabs(Ratio * LrnRate * GradV[p]) > MaxGrad) {
1435  Ratio = MaxGrad / fabs(LrnRate * GradV[p]);
1436  }
1437  }
1438 
1439  for(int p = 0; p < 4; p++) {
1440  GradV[p] *= (Ratio * LrnRate);
1441  NewMtx.At(p) = AffMtx.At(p) + GradV[p];
1442 // if(NewMtx.At(p) > 0.9999) { NewMtx.At(p) = 0.9999; }
1443  if(NewMtx.At(p) < 0.0001) { NewMtx.At(p) = 0.0001; }
1444  }
1445 
1446  printf(" [Attr = %d]\n", AId);
1447  printf(" %s + [%f, %f; %f %f] -----> %s\n", (AffMtx.GetMtxStr()).GetCStr(), double(GradV[0]), double(GradV[1]), double(GradV[2]), double(GradV[3]), (NewMtx.GetMtxStr()).GetCStr());
1448 
1449 // Param.SetMtx(AId, NewMtx);
1450  return Delta;
1451 }
1452 
1453 
1454 void TMAGFitBern::NormalizeAffMtxV(TMAGAffMtxV& MtxV, const bool UseMu) {
1455  const int NNodes = Param.GetNodes();
1456  const int NAttrs = MtxV.Len();
1457  TFltV MuV = GetMuV();
1458  double Product = 1.0, ExpEdge = NNodes * (NNodes - 1);
1459 
1460  TFltV SumV(NAttrs), EdgeSumV(NAttrs);
1461  SumV.PutAll(0.0); EdgeSumV.PutAll(0.0);
1462  for(int l = 0; l < NAttrs; l++) {
1463  double Mu = (UseMu) ? double(MuV[l]) : (AvgPhiV[l] / double(NNodes));
1464  EdgeSumV[l] += Mu * Mu * MtxV[l].At(0, 0);
1465  EdgeSumV[l] += Mu * (1.0-Mu) * MtxV[l].At(0, 1);
1466  EdgeSumV[l] += Mu * (1.0-Mu) * MtxV[l].At(1, 0);
1467  EdgeSumV[l] += (1.0-Mu) * (1.0-Mu) * MtxV[l].At(1, 1);
1468  SumV[l] = SumV[l] + MtxV[l].At(0, 0);
1469  SumV[l] = SumV[l] + MtxV[l].At(0, 1);
1470  SumV[l] = SumV[l] + MtxV[l].At(1, 0);
1471  SumV[l] = SumV[l] + MtxV[l].At(1, 1);
1472  Product *= SumV[l];
1473  ExpEdge *= EdgeSumV[l];
1474  }
1475  ExpEdge = Graph->GetEdges() / ExpEdge;
1476  NormConst *= Product;
1477 // NormConst = ExpEdge;
1478  Product = 1.0;
1479 // Product = pow(Product * ExpEdge, 1.0 / double(NAttrs));
1480 
1481  for(int l = 0; l < NAttrs; l++) {
1482  for(int p = 0; p < 4; p++) {
1483  MtxV[l].At(p) = MtxV[l].At(p) * Product / SumV[l];
1484 // MtxV[l].At(p) = MtxV[l].At(p) * Product / MtxV[l].At(0, 0);
1485 // MtxV[l].At(p) = MtxV[l].At(p) * Product;
1486 // if(MtxV[l].At(p) > 0.9999) { MtxV[l].At(p) = 0.9999; }
1487 // if(MtxV[l].At(p) < 0.0001) { MtxV[l].At(p) = 0.0001; }
1488  }
1489  }
1490 }
1491 
1492 void TMAGFitBern::UnNormalizeAffMtxV(TMAGAffMtxV& MtxV, const bool UseMu) {
1493  const int NNodes = Param.GetNodes();
1494  const int NAttrs = MtxV.Len();
1495  TFltIntPrV MaxEntV(NAttrs);
1496  TFltV MuV = GetMuV();
1497  NormalizeAffMtxV(MtxV, UseMu);
1498 
1499  double ExpEdge = NNodes * (NNodes - 1);
1500  for(int l = 0; l < NAttrs; l++) {
1501  double Mu = MuV[l];
1502  double EdgeSum = Mu * Mu * MtxV[l].At(0, 0);
1503  EdgeSum += Mu * (1.0-Mu) * MtxV[l].At(0, 1);
1504  EdgeSum += Mu * (1.0-Mu) * MtxV[l].At(1, 0);
1505  EdgeSum += (1.0-Mu) * (1.0-Mu) * MtxV[l].At(1, 1);
1506  ExpEdge *= EdgeSum;
1507  }
1508  NormConst = double(Graph->GetEdges()) / ExpEdge;
1509 // NormConst *= ExpEdge;
1510 
1511  for(int l = 0; l < NAttrs; l++) {
1512  MaxEntV[l] = TFltIntPr(-1, l);
1513  for(int p = 0; p < 4; p++) {
1514  if(MaxEntV[l].Val1 < MtxV[l].At(p)) { MaxEntV[l].Val1 = MtxV[l].At(p); }
1515  }
1516  }
1517  MaxEntV.Sort(false);
1518 
1519  for(int l = 0; l < NAttrs; l++) {
1520  int CurId = MaxEntV[l].Val2;
1521  double Factor = pow(NormConst, 1.0 / double(NAttrs - l));
1522  double MaxFactor = 0.9999 / MaxEntV[l].Val1;
1523  Factor = (Factor > MaxFactor) ? MaxFactor : Factor;
1524  NormConst = NormConst / Factor;
1525 
1526  for(int p = 0; p < 4; p++) {
1527  MtxV[CurId].At(p) = MtxV[CurId].At(p) * Factor;
1528  }
1529  }
1530 }
1531 
1532 const void TMAGFitBern::PrepareUpdateAffMtx(TFltVV& ProdVV, TFltVV& SqVV) {
1533  const int NNodes = Param.GetNodes();
1534  ProdVV.Gen(NNodes, NNodes);
1535  SqVV.Gen(NNodes, NNodes);
1536 
1537  for(int i = 0; i < NNodes; i++) {
1538  for(int j = 0; j < NNodes; j++) {
1539  ProdVV(i, j) = GetProdLinWeight(i, j);
1540  SqVV(i, j) = GetProdSqWeight(i, j);
1541  }
1542  }
1543 }
1544 
1546  const int NNodes = Param.GetNodes();
1547  ProdVV.Gen(NNodes, 2);
1548  SqVV.Gen(NNodes, 2);
1549 
1550  for(int i = 0; i < NNodes; i++) {
1551  ProdVV(i, 0) = GetAvgProdLinWeight(i, i, true, false);
1552  ProdVV(i, 1) = GetAvgProdLinWeight(i, i, false, true);
1553  SqVV(i, 0) = GetAvgProdSqWeight(i, i, true, false);
1554  SqVV(i, 1) = GetAvgProdSqWeight(i, i, false, true);
1555  }
1556 }
1557 
1558 const double TMAGFitBern::UpdateAffMtxV(const int& GradIter, const double& LrnRate, const double& MaxGrad, const double& Lambda, const int& NReal) {
1559  const int NNodes = Param.GetNodes();
1560  const int NAttrs = Param.GetAttrs();
1561  const TMAGNodeBern DistParam = Param.GetNodeAttr();
1562  const TFltV MuV = DistParam.GetMuV();
1563  double Delta = 0.0;
1564  double DecLrnRate = LrnRate, DecMaxGrad = MaxGrad;
1565 
1566  TFltVV ProdVV(NNodes, NNodes), SqVV(NNodes, NNodes);
1567  TMAGAffMtxV NewMtxV, OldMtxV;
1568  Param.GetMtxV(OldMtxV);
1569  Param.GetMtxV(NewMtxV);
1570 
1571  for(int g = 0; g < GradIter; g++) {
1572  if(MSpeedUp) {
1573  PrepareUpdateApxAffMtx(ProdVV, SqVV);
1574  } else {
1575  PrepareUpdateAffMtx(ProdVV, SqVV);
1576  }
1577 
1578  printf(" [Grad step = %d]\n", (g+1));
1579 // for(int l = 0; l < NAttrs; l++) {
1580  for(int l = NReal; l < NAttrs; l++) {
1581  UpdateAffMtx(l, DecLrnRate, DecMaxGrad, Lambda, ProdVV, SqVV, NewMtxV[l]);
1582  Param.SetMtxV(NewMtxV);
1583  }
1584  DecLrnRate *= 0.97;
1585  DecMaxGrad *= 0.97;
1586 
1587  printf("\n");
1588  NormalizeAffMtxV(NewMtxV, true);
1589  Param.SetMtxV(NewMtxV);
1590  }
1591  NormalizeAffMtxV(NewMtxV, true);
1592 
1593  printf( "\nFinal\n");
1594  for(int l = 0; l < NAttrs; l++) {
1595  printf(" [");
1596  for(int p = 0; p < 4; p++) {
1597 // NewMtxV[l].At(p) = NewMtxV[l].At(p) * Product / SumV[l];
1598  Delta += fabs(OldMtxV[l].At(p) - NewMtxV[l].At(p));
1599  printf(" %.4f ", double(NewMtxV[l].At(p)));
1600  }
1601  printf("]\n");
1602  }
1603  Param.SetMtxV(NewMtxV);
1604  ProdVV.Clr(); SqVV.Clr();
1605  return Delta;
1606 }
1607 
1608 void TMAGFitBern::DoMStep(const int& GradIter, const double& LrnRate, const double& MaxGrad, const double& Lambda, const int& NReal) {
1609  // const int NNodes = Param.GetNodes();
1610  const int NAttrs = Param.GetAttrs();
1611  double MuDelta = 0.0, AffMtxDelta = 0.0;
1612 
1613  TExeTm ExeTm;
1614 
1615  printf("\n");
1616  AvgPhiV.Gen(NAttrs); AvgPhiV.PutAll(0.0);
1617  for(int l = 0; l < NAttrs; l++) {
1618 // printf(" [Attr = %d]\n", l);
1619  MuDelta += UpdateMu(l);
1620  }
1621  printf("\n");
1622 
1623  printf(" == Update Theta\n");
1624  AffMtxDelta += UpdateAffMtxV(GradIter, LrnRate, MaxGrad, Lambda, NReal);
1625  printf("\n");
1626  printf("Elpased time = %s\n", ExeTm.GetTmStr());
1627  printf("\n");
1628 }
1629 
1630 void TMAGFitBern::DoEMAlg(const int& NStep, const int& NEstep, const int& NMstep, const double& LrnRate, const double& MaxGrad, const double& Lambda, const double& ReInit, const int& NReal) {
1631  const int NNodes = Param.GetNodes();
1632  const int NAttrs = Param.GetAttrs();
1633  TIntV IndexV;
1634  double LL;
1635  // double MuDist, MtxDist;
1636 
1637  MuHisV.Gen(NStep + 1, 0);
1638  MtxHisV.Gen(NStep + 1, 0);
1639  LLHisV.Gen(NStep + 1, 0);
1640 
1641  printf("--------------------------------------------\n");
1642  printf("Before EM Iteration\n");
1643  printf("--------------------------------------------\n");
1644 
1645  TMAGAffMtxV InitMtxV;
1646  TMAGNodeBern NodeAttr = Param.GetNodeAttr();
1647  Param.GetMtxV(InitMtxV);
1648  TFltV InitMuV = NodeAttr.GetMuV();
1649 
1650  for(int i = 0; i < NNodes; i++) {
1651  for(int l = 0; l < NAttrs; l++) {
1652  if(! KnownVV(i, l)) {
1653  PhiVV.At(i, l) = TMAGNodeBern::Rnd.GetUniDev();
1654  }
1655  }
1656  }
1657 
1658  if(Debug) {
1659  double LL = ComputeApxLL();
1660  MuHisV.Add(InitMuV);
1661  MtxHisV.Add(InitMtxV);
1662  LLHisV.Add(LL);
1663  }
1664 
1665  NormalizeAffMtxV(InitMtxV, true);
1666  Param.SetMtxV(InitMtxV);
1667 
1668  for(int n = 0; n < NStep; n++) {
1669  printf("--------------------------------------------\n");
1670  printf("EM Iteration : %d\n", (n+1));
1671  printf("--------------------------------------------\n");
1672 
1673  NodeAttr = Param.GetNodeAttr();
1674  for(int i = 0; i < NNodes; i++) {
1675  for(int l = 0; l < NAttrs; l++) {
1676  if(!KnownVV(i, l) && TMAGNodeBern::Rnd.GetUniDev() < ReInit) {
1677  PhiVV.At(i, l) = TMAGNodeBern::Rnd.GetUniDev();
1678  }
1679  }
1680  }
1681  DoEStep(InitMuV, NEstep, LL, Lambda);
1682  Param.GetMtxV(InitMtxV);
1683 // NormalizeAffMtxV(InitMtxV);
1684  Param.SetMtxV(InitMtxV);
1685  DoMStep(NMstep, LrnRate, MaxGrad, Lambda, NReal);
1686 
1687  printf("\n");
1688 
1689  if(Debug) {
1690  double LL = ComputeApxLL();
1691  MuHisV.Add(InitMuV);
1692  MtxHisV.Add(InitMtxV);
1693  LLHisV.Add(LL);
1694  printf(" ApxLL = %.2f (Const = %f)\n", LL, double(NormConst));
1695  }
1696 
1697  }
1698  Param.GetMtxV(InitMtxV);
1699  UnNormalizeAffMtxV(InitMtxV, true);
1700  Param.SetMtxV(InitMtxV);
1701 }
1702 
1703 void TMAGFitBern::MakeCCDF(const TFltPrV& RawV, TFltPrV& CcdfV) {
1704  double Total = 0.0;
1705  CcdfV.Gen(RawV.Len(), 0);
1706 
1707  for(int i = 0; i < RawV.Len(); i++) {
1708  if(RawV[i].Val2 <= 0) { continue; }
1709  Total += RawV[i].Val2;
1710  CcdfV.Add(RawV[i]);
1711  IAssert(RawV[i].Val2 > 0);
1712  }
1713  for(int i = 1; i < CcdfV.Len(); i++) {
1714  CcdfV[i].Val2 += CcdfV[i-1].Val2;
1715  }
1716 
1717  for(int i = CcdfV.Len() - 1; i > 0; i--) {
1718  CcdfV[i].Val2 = (Total - CcdfV[i-1].Val2) ;
1719  if(CcdfV[i].Val2 <= 0) { printf("CCDF = %f\n", double(CcdfV[i].Val2));}
1720  IAssert(CcdfV[i].Val2 > 0);
1721  }
1722  CcdfV[0].Val2 = Total;
1723 // CcdfV[0].Val2 = 1.0;
1724 }
1725 
1726 
1728  const int NNodes = Param.GetNodes();
1729  const int NAttrs = Param.GetAttrs();
1730  TMAGParam<TMAGNodeBern> MAGGen(NNodes, NAttrs);
1731  TMAGNodeBern MAGNode = Param.GetNodeAttr();
1732  MAGGen.SetNodeAttr(MAGNode);
1733  TMAGAffMtxV MtxV; Param.GetMtxV(MtxV);
1734  MAGGen.SetMtxV(MtxV);
1735 
1736  PNGraph TrG = new TNGraph;
1737  *TrG = *Graph;
1738 
1739  TIntVV AttrVV(NNodes, NAttrs);
1740  for(int i = 0; i < NNodes; i++) {
1741  for(int j = 0; j < NAttrs; j++) {
1742  if(PhiVV(i, j) > TMAGNodeBern::Rnd.GetUniDev()) AttrVV(i, j) = 0;
1743  else AttrVV(i, j) = 1;
1744  }
1745  }
1746  PNGraph MAG = MAGGen.GenMAG(AttrVV, true, 10000);
1747 // PNGraph MAG = MAGGen.GenAttrMAG(AttrVV, true, 10000);
1748  printf("%d edges created for MAG...\n", MAG->GetEdges());
1749 
1752 
1754 
1755  TGnuPlot InDegP(FNm + "-InDeg"), OutDegP(FNm + "-OutDeg"), SvalP(FNm + "-Sval"), SvecP(FNm + "-Svec"), WccP(FNm + "-Wcc"), HopP(FNm + "-Hop"), TriadP(FNm + "-Triad"), CcfP(FNm + "-Ccf");;
1756 
1757  InDegP.SetXYLabel("Degree", "# of nodes");
1758  OutDegP.SetXYLabel("Degree", "# of nodes");
1759  SvalP.SetXYLabel("Rank", "Singular value");
1760  SvecP.SetXYLabel("Rank", "Primary SngVec component");
1761  WccP.SetXYLabel("Size of component", "# of components");
1762  CcfP.SetXYLabel("Degree", "Clustering coefficient");
1763  HopP.SetXYLabel("Hops", "# of node pairs");
1764  TriadP.SetXYLabel("# of triads", "# of participating nodes");
1765 
1766  InDegP.SetScale(gpsLog10XY); InDegP.AddCmd("set key top right");
1767  OutDegP.SetScale(gpsLog10XY); OutDegP.AddCmd("set key top right");
1768  SvalP.SetScale(gpsLog10XY); SvalP.AddCmd("set key top right");
1769  SvecP.SetScale(gpsLog10XY); SvecP.AddCmd("set key top right");
1770  CcfP.SetScale(gpsLog10XY); CcfP.AddCmd("set key top right");
1771  HopP.SetScale(gpsLog10XY); HopP.AddCmd("set key top right");
1772  TriadP.SetScale(gpsLog10XY); TriadP.AddCmd("set key top right");
1773  InDegP.ShowGrid(false);
1774  OutDegP.ShowGrid(false);
1775  SvalP.ShowGrid(false);
1776  SvecP.ShowGrid(false);
1777  CcfP.ShowGrid(false);
1778  HopP.ShowGrid(false);
1779  TriadP.ShowGrid(false);
1780 
1781  const TStr Style[2] = {"lt 1 lw 3 lc rgb 'black'", "lt 2 lw 3 lc rgb 'red'"};
1782  const TStr Name[2] = {"Real", "MAG"};
1783  GS.Add(Graph, TSecTm(1), "Real Graph");
1784  GS.Add(MAG, TSecTm(2), "MAG");
1785 
1786  TFltPrV InDegV, OutDegV, SvalV, SvecV, HopV, WccV, CcfV, TriadV;
1787  for(int i = 0; i < GS.Len(); i++) {
1788  MakeCCDF(GS.At(i)->GetDistr(gsdInDeg), InDegV);
1789  MakeCCDF(GS.At(i)->GetDistr(gsdOutDeg), OutDegV);
1790  SvalV = GS.At(i)->GetDistr(gsdSngVal);
1791  SvecV = GS.At(i)->GetDistr(gsdSngVec);
1792  MakeCCDF(GS.At(i)->GetDistr(gsdClustCf), CcfV);
1793  HopV = GS.At(i)->GetDistr(gsdHops);
1794  MakeCCDF(GS.At(i)->GetDistr(gsdTriadPart), TriadV);
1795 
1796  InDegP.AddPlot(InDegV, gpwLines, Name[i], Style[i]);
1797  OutDegP.AddPlot(OutDegV, gpwLines, Name[i], Style[i]);
1798  SvalP.AddPlot(SvalV, gpwLines, Name[i], Style[i]);
1799  SvecP.AddPlot(SvecV, gpwLines, Name[i], Style[i]);
1800  CcfP.AddPlot(CcfV, gpwLines, Name[i], Style[i]);
1801  HopP.AddPlot(HopV, gpwLines, Name[i], Style[i]);
1802  TriadP.AddPlot(TriadV, gpwLines, Name[i], Style[i]);
1803  }
1804 
1805  InDegP.SaveEps(30);
1806  OutDegP.SaveEps(30);
1807  SvalP.SaveEps(30);
1808  SvecP.SaveEps(30);
1809  CcfP.SaveEps(30);
1810  HopP.SaveEps(30);
1811  TriadP.SaveEps(30);
1812 }
1813 
1814 void TMAGFitBern::CountAttr(TFltV& EstMuV) const {
1815  const int NNodes = PhiVV.GetXDim();
1816  const int NAttrs = PhiVV.GetYDim();
1817  EstMuV.Gen(NAttrs);
1818  EstMuV.PutAll(0.0);
1819 
1820  for(int l = 0; l < NAttrs; l++) {
1821  for(int i = 0; i < NNodes; i++) {
1822  EstMuV[l] = EstMuV[l] + PhiVV(i, l);
1823  }
1824  EstMuV[l] = EstMuV[l] / double(NNodes);
1825  }
1826 }
1827 
1828 void TMAGFitBern::SortAttrOrdering(const TFltV& TrueMuV, TIntV& IndexV) const {
1829  const int NAttrs = TrueMuV.Len();
1830  // const int NNodes = PhiVV.GetXDim();
1831  TFltV EstMuV, SortedTrueMuV, SortedEstMuV, TrueIdxV, EstIdxV;
1832  IndexV.Gen(NAttrs);
1833  TrueIdxV.Gen(NAttrs);
1834  EstIdxV.Gen(NAttrs);
1835 
1836  for(int l = 0; l < NAttrs; l++) {
1837  TrueIdxV[l] = l;
1838  EstIdxV[l] = l;
1839  }
1840 
1841  CountAttr(EstMuV);
1842  SortedTrueMuV = TrueMuV;
1843  SortedEstMuV = EstMuV;
1844  for(int i = 0; i < NAttrs; i++) {
1845  if(SortedTrueMuV[i] > 0.5) { SortedTrueMuV[i] = 1.0 - SortedTrueMuV[i]; }
1846  if(SortedEstMuV[i] > 0.5) { SortedEstMuV[i] = 1.0 - SortedEstMuV[i]; }
1847  }
1848 
1849  for(int i = 0; i < NAttrs; i++) {
1850  for(int j = i+1; j < NAttrs; j++) {
1851  if(SortedTrueMuV[i] < SortedTrueMuV[j]) {
1852  SortedTrueMuV.Swap(i, j);
1853  TrueIdxV.Swap(i, j);
1854  }
1855  if(SortedEstMuV[i] < SortedEstMuV[j]) {
1856  EstIdxV.Swap((int)SortedEstMuV[i], (int)SortedEstMuV[j]);
1857  SortedEstMuV.Swap(i, j);
1858  }
1859  }
1860  }
1861 
1862  for(int l = 0; l < NAttrs; l++) {
1863  IndexV[l] = (int)TrueIdxV[(int)EstIdxV[l]];
1864  }
1865 }
1866 
1867 const bool TMAGFitBern::NextPermutation(TIntV& IndexV) const {
1868  const int NAttrs = IndexV.Len();
1869  int Pos = NAttrs - 1;
1870  while(Pos > 0) {
1871  if(IndexV[Pos-1] < IndexV[Pos]) {
1872  break;
1873  }
1874  Pos--;
1875  }
1876  if(Pos == 0) {
1877  return false;
1878  }
1879 
1880  int Val = NAttrs, NewPos = -1;
1881  for(int i = Pos; i < NAttrs; i++) {
1882  if(IndexV[i] > IndexV[Pos - 1] && IndexV[i] < Val) {
1883  NewPos = i;
1884  Val = IndexV[i];
1885  }
1886  }
1887  IndexV[NewPos] = IndexV[Pos - 1];
1888  IndexV[Pos - 1] = Val;
1889 
1890  TIntV SubIndexV;
1891  IndexV.GetSubValV(Pos, NAttrs - 1, SubIndexV);
1892  SubIndexV.Sort(true);
1893  for(int i = Pos; i < NAttrs; i++) {
1894  IndexV[i] = SubIndexV[i - Pos];
1895  }
1896 
1897  return true;
1898 }
1899 
1900 const double TMAGFitBern::ComputeJointOneLL(const TIntVV& AttrVV) const {
1901  double LL = 0.0;
1902  const int NNodes = Param.GetNodes();
1903  const int NAttrs = Param.GetAttrs();
1904  TMAGAffMtxV MtxV(NAttrs); Param.GetMtxV(MtxV);
1905  const TMAGNodeBern NodeAttr = Param.GetNodeAttr();
1906  const TFltV MuV = NodeAttr.GetMuV();
1907 
1908  for(int l = 0; l < NAttrs; l++) {
1909  for(int i = 0; i < MtxV[l].Len(); i++) {
1910  MtxV[l].At(i) = log(MtxV[l].At(i));
1911  }
1912  }
1913 
1914  for(int i = 0; i < NNodes; i++) {
1915  for(int l = 0; l < NAttrs; l++) {
1916  if(AttrVV.At(i, l) == 0) {
1917  LL += log(MuV[l]);
1918  } else {
1919  LL += log(1.0 - MuV[l]);
1920  }
1921  }
1922 
1923  for(int j = 0; j < NNodes; j++) {
1924  if(i == j) { continue; }
1925 
1926  double ProbLL = 0.0;
1927  for(int l = 0; l < NAttrs; l++) {
1928  ProbLL += MtxV[l].At(AttrVV.At(i, l), AttrVV.At(j, l));
1929  }
1930 
1931  if(Graph->IsEdge(i, j)) {
1932  LL += ProbLL;
1933  } else {
1934  LL += log(1-exp(ProbLL));
1935  }
1936  }
1937  }
1938 
1939  return LL;
1940 }
1941 
1942 const double TMAGFitBern::ComputeJointAdjLL(const TIntVV& AttrVV) const {
1943  double LL = 0.0;
1944  const int NNodes = Param.GetNodes();
1945  const int NAttrs = Param.GetAttrs();
1946  TMAGAffMtxV MtxV(NAttrs); Param.GetMtxV(MtxV);
1947  const TMAGNodeBern NodeAttr = Param.GetNodeAttr();
1948  const TFltV MuV = NodeAttr.GetMuV();
1949 
1950  for(int l = 0; l < NAttrs; l++) {
1951  for(int i = 0; i < MtxV[l].Len(); i++) {
1952  MtxV[l].At(i) = log(MtxV[l].At(i));
1953  }
1954  }
1955 
1956  for(int i = 0; i < NNodes; i++) {
1957  for(int j = 0; j < NNodes; j++) {
1958  if(i == j) { continue; }
1959 
1960  double ProbLL = 0.0;
1961  for(int l = 0; l < NAttrs; l++) {
1962  ProbLL += MtxV[l].At(AttrVV.At(i, l), AttrVV.At(j, l));
1963  }
1964 
1965  if(Graph->IsEdge(i, j)) {
1966  LL += ProbLL;
1967  } else {
1968  LL += log(1-exp(ProbLL));
1969  }
1970  }
1971  }
1972 
1973  return LL;
1974 }
1975 
1976 const double TMAGFitBern::ComputeJointLL(int NSample) const {
1977  double LL = 0.0;
1978  const int NNodes = Param.GetNodes();
1979  const int NAttrs = Param.GetAttrs();
1980 
1981  TRnd Rnd(2000);
1982  TIntVV AttrVV(NNodes, NAttrs);
1983  int count = 0;
1984  for(int s = 0; s < NSample; s++) {
1985  for(int i = 0; i < NNodes; i++) {
1986  for(int l = 0; l < NAttrs; l++) {
1987 
1988  if(Rnd.GetUniDev() <= PhiVV(i, l)) {
1989  AttrVV.At(i, l) = 0;
1990  } else {
1991  AttrVV.At(i, l) = 1;
1992  }
1993 
1994  if(PhiVV(i, l) > 0.05 && PhiVV(i, l) < 0.95) count++;
1995  }
1996  }
1997 
1998  LL += ComputeJointOneLL(AttrVV);
1999  }
2000  AttrVV.Clr();
2001 
2002  return LL / double(NSample);
2003 }
2004 
2005 const double TMAGFitBern::ComputeApxLL() const {
2006  double LL = 0.0;
2007  // double LLSelf = 0.0;
2008  const int NNodes = Param.GetNodes();
2009  const int NAttrs = Param.GetAttrs();
2010  TMAGNodeBern NodeAttr = Param.GetNodeAttr();
2011  TFltV MuV = NodeAttr.GetMuV();
2012  TMAGAffMtxV LLMtxV(NAttrs);
2013 
2014  for(int l = 0; l < NAttrs; l++) {
2015  for(int i = 0; i < NNodes; i++) {
2016  LL += PhiVV(i, l) * log(MuV[l]);
2017  LL += (1.0 - PhiVV(i, l)) * log(1.0 - MuV[l]);
2018  LL -= PhiVV(i, l) * log(PhiVV(i, l));
2019  LL -= (1.0 - PhiVV(i, l)) * log(1.0 - PhiVV(i, l));
2020  }
2021  TMAGAffMtx Theta = Param.GetMtx(l);
2022  Theta.GetLLMtx(LLMtxV[l]);
2023  }
2024 
2025  for(int i = 0; i < NNodes; i++) {
2026  for(int j = 0; j < NNodes; j++) {
2027  if(i == j) { continue; }
2028 
2029  if(Graph->IsEdge(i, j)) {
2030  for(int l = 0; l < NAttrs; l++) {
2031  LL += GetProbPhi(i, j, l, 0, 0) * LLMtxV[l].At(0, 0);
2032  LL += GetProbPhi(i, j, l, 0, 1) * LLMtxV[l].At(0, 1);
2033  LL += GetProbPhi(i, j, l, 1, 0) * LLMtxV[l].At(1, 0);
2034  LL += GetProbPhi(i, j, l, 1, 1) * LLMtxV[l].At(1, 1);
2035  }
2036  LL += log(NormConst);
2037  } else {
2038  LL += log(1-exp(GetProdLinWeight(i, j)));
2039  }
2040  }
2041  }
2042 
2043  return LL;
2044 }
2045 
2046 const double TMAGFitBern::ComputeApxAdjLL() const {
2047  double LL = 0.0;
2048  // double LLSelf = 0.0;
2049  const int NNodes = Param.GetNodes();
2050  const int NAttrs = Param.GetAttrs();
2051  TMAGNodeBern NodeAttr = Param.GetNodeAttr();
2052  TFltV MuV = NodeAttr.GetMuV();
2053  MuV.PutAll(0.0);
2054  TMAGAffMtxV LLMtxV(NAttrs);
2055  double TotalEdge = 0.0;
2056  for(int l = 0; l < NAttrs; l++) {
2057  TMAGAffMtx Theta = Param.GetMtx(l);
2058  Theta.GetLLMtx(LLMtxV[l]);
2059  }
2060 
2061  for(int i = 0; i < NNodes; i++) {
2062  for(int j = 0; j < NNodes; j++) {
2063  if(i == j) { continue; }
2064 
2065  if(Graph->IsEdge(i, j)) {
2066  for(int l = 0; l < NAttrs; l++) {
2067  LL += GetProbPhi(i, j, l, 0, 0) * LLMtxV[l].At(0, 0);
2068  LL += GetProbPhi(i, j, l, 0, 1) * LLMtxV[l].At(0, 1);
2069  LL += GetProbPhi(i, j, l, 1, 0) * LLMtxV[l].At(1, 0);
2070  LL += GetProbPhi(i, j, l, 1, 1) * LLMtxV[l].At(1, 1);
2071  }
2072  } else {
2073  LL += log(1-exp(GetProdLinWeight(i, j)));
2074  }
2075 
2076  double TempLL = 1.0;
2077  for(int l = 0; l < NAttrs; l++) {
2078  int Ai = (double(PhiVV(i, l)) > 0.5) ? 0 : 1;
2079  int Aj = (double(PhiVV(j, l)) > 0.5) ? 0 : 1;
2080  TempLL *= Param.GetMtx(l).At(Ai, Aj);
2081  }
2082  if(TMAGNodeBern::Rnd.GetUniDev() < TempLL) {
2083  TotalEdge += 1.0;
2084  }
2085  }
2086  }
2087 
2088  return LL;
2089 }
2090 
2091 const double TMAGFitBern::ComputeMI(const TIntVV& AttrV, const int AId1, const int AId2) {
2092  const int NNodes = AttrV.GetXDim();
2093  double MI = 0.0;
2094  double Cor = 0.0;
2095 
2096  TFltVV Pxy(2,2);
2097  TFltV Px(2), Py(2);
2098  Pxy.PutAll(0.0);
2099  Px.PutAll(0.0);
2100  Py.PutAll(0.0);
2101 
2102  for(int i = 0; i < NNodes; i++) {
2103  int X = AttrV(i, AId1);
2104  int Y = AttrV(i, AId2);
2105  Pxy(X, Y) = Pxy(X, Y) + 1;
2106  Px[X] = Px[X] + 1;
2107  Py[Y] = Py[Y] + 1;
2108  Cor += double(X * Y);
2109  }
2110 
2111  for(int x = 0; x < 2; x++) {
2112  for(int y = 0; y < 2; y++) {
2113  MI += Pxy(x, y) / double(NNodes) * (log(Pxy(x, y).Val) - log(Px[x].Val) - log(Py[y].Val) + log((double)NNodes));
2114  }
2115  }
2116 
2117  return MI;
2118 }
2119 
2120 const double TMAGFitBern::ComputeMI(const TFltVV& AttrV, const int AId1, const int AId2) {
2121  const int NNodes = AttrV.GetXDim();
2122  double MI = 0.0;
2123  double Cor = 0.0;
2124 
2125  TFltVV Pxy(2,2);
2126  TFltV Px(2), Py(2);
2127  Pxy.PutAll(0.0);
2128  Px.PutAll(0.0);
2129  Py.PutAll(0.0);
2130 
2131  for(int i = 0; i < NNodes; i++) {
2132  double X = AttrV(i, AId1);
2133  double Y = AttrV(i, AId2);
2134  Pxy(0, 0) = Pxy(0, 0) + X * Y;
2135  Pxy(0, 1) = Pxy(0, 1) + X * (1 - Y);
2136  Pxy(1, 0) = Pxy(1, 0) + (1 - X) * Y;
2137  Pxy(1, 1) = (i+1) - Pxy(0, 0) - Pxy(0, 1) - Pxy(1, 0);
2138  Px[0] = Px[0] + X;
2139  Py[0] = Py[0] + Y;
2140  Cor += double((1-X) * (1-Y));
2141  }
2142  Px[1] = NNodes - Px[0];
2143  Py[1] = NNodes - Py[0];
2144 
2145  for(int x = 0; x < 2; x++) {
2146  for(int y = 0; y < 2; y++) {
2147  MI += Pxy(x, y) / double(NNodes) * (log(Pxy(x, y)) - log(Px[x]) - log(Py[y]) + log(double(NNodes)));
2148  }
2149  }
2150 
2151  return MI;
2152 }
2153 
2154 const double TMAGFitBern::ComputeMI(const TIntVV& AttrV) {
2155  // const int NNodes = AttrV.GetXDim();
2156  const int NAttrs = AttrV.GetYDim();
2157  double MI = 0.0;
2158 
2159  for(int l = 0; l < NAttrs; l++) {
2160  for(int k = l+1; k < NAttrs; k++) {
2161  MI += ComputeMI(AttrV, l, k);
2162  }
2163  }
2164 
2165  return MI;
2166 }
2167 
2168 const double TMAGFitBern::ComputeMI(const TFltVV& AttrV) {
2169  // const int NNodes = AttrV.GetXDim();
2170  const int NAttrs = AttrV.GetYDim();
2171  double MI = 0.0;
2172 
2173  for(int l = 0; l < NAttrs; l++) {
2174  for(int k = l+1; k < NAttrs; k++) {
2175  MI += ComputeMI(AttrV, l, k);
2176  }
2177  }
2178 
2179  return MI;
2180 }
const double ComputeJointAdjLL(const TIntVV &AttrVV) const
Definition: mag.cpp:1942
#define IAssert(Cond)
Definition: bd.h:262
char * GetCStr() const
Definition: dt.h:688
void SetNodeAttr(const TNodeAttr &Dist)
Definition: mag.h:176
TMAGNodeBern & operator=(const TMAGNodeBern &Dist)
Definition: mag.cpp:244
const double GetAvgSqThetaLL(const int &NId1, const int &NId2, const int &AId, const bool Left=false, const bool Right=false) const
Definition: mag.cpp:620
void DoEMAlg(const int &NStep, const int &NEstep, const int &NMstep, const double &LrnRate, const double &MaxGrad, const double &Lambda, const double &ReInit, const int &NReal=0)
Definition: mag.cpp:1630
void Clr()
Definition: ds.h:2246
TFlt Mu
Definition: mag.h:71
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
Definition: graph.cpp:376
PGStat Add()
Definition: gstat.cpp:449
#define IAssertR(Cond, Reason)
Definition: bd.h:265
const bool NextPermutation(TIntV &IndexV) const
Definition: mag.cpp:1867
TPair< TFlt, TInt > TFltIntPr
Definition: ds.h:97
bool Debug
Definition: mag.h:347
TVec< TFltV > MuHisV
Definition: mag.h:352
Definition: tm.h:355
TFltV LLHisV
Definition: mag.h:354
const int GetAttrs() const
Definition: mag.h:174
TMAGParam< TMAGNodeBern > Param
Definition: mag.h:346
Definition: dt.h:11
const double GetAvgInCoeff(const int &i, const int &AId, const int &A, const TMAGAffMtx &Theta) const
Definition: mag.cpp:558
TMAGAffMtx & operator=(const TMAGAffMtx &Kronecker)
Definition: mag.cpp:19
const double GetEstNoEdgeLL(const int &NId, const int &AId) const
Definition: mag.cpp:746
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:552
void Swap(TMAGAffMtx &Mtx)
Definition: mag.cpp:90
TFltV BetaV
Definition: mag.h:120
TInt Dim
Definition: mag.h:122
TFltV SeedMtx
Definition: mag.h:14
void Dump(const TStr &MtxNm=TStr(), const bool &Sort=false) const
Definition: mag.cpp:128
TEdgeI EndEI() const
Returns an iterator referring to the past-the-end edge in the graph.
Definition: graph.h:596
TFltV MuV
Definition: mag.h:121
static double GetAvgAbsErr(const TMAGAffMtx &Mtx1, const TMAGAffMtx &Mtx2)
Definition: mag.cpp:147
void NormalizeAffMtxV(TMAGAffMtxV &MtxV, const bool UseMu=false)
Definition: mag.cpp:1454
Definition: gstat.h:28
const double GetOutCoeff(const int &i, const int &j, const int &l, const int &A, const TMAGAffMtx &Theta) const
Definition: mag.cpp:554
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:313
TInt Dim
Definition: mag.h:93
Definition: bits.h:119
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
void MakeCCDF(const TFltPrV &RawV, TFltPrV &CcdfV)
Definition: mag.cpp:1703
TEdgeI BegEI() const
Returns an iterator referring to the first edge in the graph.
Definition: graph.h:594
static const double NInf
Definition: mag.h:11
void CountAttr(TFltV &EstMuV) const
Definition: mag.cpp:1814
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:503
void SetXYLabel(const TStr &XLabel, const TStr &YLabel)
Definition: gnuplot.h:73
void SaveTxt(TStrV &OutStrV) const
Definition: mag.cpp:232
double GetRowSum(const int &RowId) const
Definition: mag.cpp:102
TMAGNodeBeta & operator=(const TMAGNodeBeta &Dist)
Definition: mag.cpp:299
const double UpdatePhiMI(const double &Lambda, const int &NId, const int &AId, double &Phi)
Definition: mag.cpp:826
const double GetSqThetaLL(const int &NId1, const int &NId2, const int &AId) const
Definition: mag.cpp:609
double Normalize()
Definition: mag.cpp:116
const double GetProdSqWeight(const int &NId1, const int &NId2) const
Definition: mag.cpp:653
TVec< TMAGAffMtxV > MtxHisV
Definition: mag.h:353
int ChangeChAll(const char &SrcCh, const char &DstCh)
Definition: dt.cpp:1113
void LoadTxt(const TStr &InFNm)
Definition: mag.cpp:343
const void GradApxAffMtx(const int &AId, const TFltVV &ProdVV, const TFltVV &SqVV, const TMAGAffMtx &CurMtx, TFltV &GradV)
Definition: mag.cpp:1346
int Len() const
Definition: gstat.h:183
static TRnd Rnd
Definition: mag.h:117
Definition: dt.h:1386
TBool Dirty
Definition: mag.h:123
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:570
Definition: gstat.h:29
double GetGammaDev(const int &Order)
Definition: dt.cpp:95
double GetFlt() const
Definition: dt.h:631
const int MaxExp
Definition: word2vec.h:10
void UnNormalizeAffMtxV(TMAGAffMtxV &MtxV, const bool UseMu=false)
Definition: mag.cpp:1492
const double LogSumExp(const double LogVal1, const double LogVal2)
Definition: mag.cpp:675
void AttrGen(TIntVV &AttrVV, const int &NNodes)
Definition: mag.cpp:201
void SetPhiVV(const TIntVV &AttrVV, const int KnownIds=0)
Definition: mag.cpp:399
Graph Statistics Sequence.
Definition: gstat.h:155
void Swap(TVec< TVal, TSizeTy > &Vec)
Swaps the contents of the vector with Vec.
Definition: ds.h:1101
void RandomInit(const TFltV &MuV, const TMAGAffMtxV &AffMtxV, const int &Seed)
Definition: mag.cpp:510
const char * GetTmStr() const
Definition: tm.h:370
static double GetAvgFroErr(const TMAGAffMtx &Mtx1, const TMAGAffMtx &Mtx2)
Definition: mag.cpp:160
void GetMtxV(TMAGAffMtxV &MtxV) const
Definition: mag.h:188
void LoadTxt(const TStr &InFNm)
Definition: mag.cpp:215
void DelZeroDegNodes(PGraph &Graph)
Removes all the zero-degree nodes, that isolated nodes, from the graph.
Definition: alg.h:432
void SetGraph(const PNGraph &GraphPt)
Definition: mag.cpp:385
bool IsProbMtx() const
Definition: mag.cpp:27
const double GetAvgProdSqWeight(const int &NId1, const int &NId2, const bool Left=false, const bool Right=false) const
Definition: mag.cpp:664
const double GetInCoeff(const int &i, const int &j, const int &l, const int &A, const TMAGAffMtx &Theta) const
Definition: mag.cpp:550
const void GradAffMtx(const int &AId, const TFltVV &ProdVV, const TFltVV &SqVV, const TMAGAffMtx &CurMtx, TFltV &GradV)
Definition: mag.cpp:1320
bool ESpeedUp
Definition: mag.h:347
void PlotProperties(const TStr &FNm)
Definition: mag.cpp:1727
void AddCmd(const TStr &Cmd)
Definition: gnuplot.h:81
const TFltV & GetMuV() const
Definition: mag.h:379
int Len() const
Definition: mag.h:27
void GetProbMtx(TMAGAffMtx &ProbMtx)
Definition: mag.cpp:82
const double GetAvgThetaLL(const int &NId1, const int &NId2, const int &AId, const bool Left=false, const bool Right=false) const
Definition: mag.cpp:598
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1318
Edge iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:430
const double UpdateAffMtx(const int &AId, const double &LrnRate, const double &MaxGrad, const double &Lambda, TFltVV &ProdVV, TFltVV &SqVV, TMAGAffMtx &NewMtx)
Definition: mag.cpp:1418
Definition: ds.h:2223
TInt MtxDim
Definition: mag.h:13
TBoolVV KnownVV
Definition: mag.h:344
const void PrepareUpdateApxAffMtx(TFltVV &ProdVV, TFltVV &SqVV)
Definition: mag.cpp:1545
const double GetProdLinWeight(const int &NId1, const int &NId2) const
Definition: mag.cpp:631
const double UpdateMu(const int &AId)
Definition: mag.cpp:1299
TFltV MuV
Definition: mag.h:92
const double ComputeJointOneLL(const TIntVV &AttrVV) const
Definition: mag.cpp:1900
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
Definition: ds.h:1229
void SetBetaV(const TFltV &_AlphaV, const TFltV &_BetaV)
Definition: mag.cpp:315
TSizeTy GetYDim() const
Definition: ds.h:2251
const TFltV & GetMuV() const
Definition: mag.h:103
bool IsEdge(const int &SrcNId, const int &DstNId, const bool &IsDir=true) const
Tests whether an edge from node IDs SrcNId to DstNId exists in the graph.
Definition: graph.cpp:363
void PutAll(const TVal &Val)
Definition: ds.h:2268
const double GetThetaLL(const int &NId1, const int &NId2, const int &AId) const
Definition: mag.cpp:587
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
Definition: subgraph.cpp:7
const double UpdatePhi(const int &NId, const int &AId, double &Phi)
Definition: mag.cpp:756
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:546
double GetMu(const int &Attr) const
Definition: mag.h:105
double GetColSum(const int &ColId) const
Definition: mag.cpp:109
double DoEStepOneIter(const TFltV &TrueMuV, TFltVV &NewPhi, const double &Lambda)
Definition: mag.cpp:1056
TMAGAffMtx()
Definition: mag.h:16
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:579
void AttrGen(TIntVV &AttrVV, const int &NNodes)
Definition: mag.cpp:250
void SetBeta(const int &Attr, const double &Alpha, const double &Beta)
Definition: mag.cpp:308
TFlt NormConst
Definition: mag.h:350
const double GetProbPhi(const int &NId1, const int &NId2, const int &AId, const int &Attr1, const int &Attr2) const
Definition: mag.cpp:570
double DoEStepApxOneIter(const TFltV &TrueMuV, TFltVV &NewPhi, const double &Lambda)
Definition: mag.cpp:1134
Definition: tm.h:14
Definition: mag.h:10
void SaveEps(const int &FontSz=30, const TStr &Comment=TStr())
Definition: gnuplot.h:123
void AttrGen(TIntVV &AttrVV, const int &NNodes)
Definition: mag.cpp:323
void GetRow(const TSizeTy &RowN, TVec< TVal, TSizeTy > &Vec) const
Definition: ds.h:2382
void SetMtxV(const TMAGAffMtxV &MtxV)
Definition: mag.h:184
static TMAGAffMtx GetRndMtx(TRnd &Rnd, const int &Dim=2, const double &MinProb=0.0)
Definition: mag.cpp:191
TFltV AlphaV
Definition: mag.h:119
const double ComputeJointLL(int NSample) const
Definition: mag.cpp:1976
TFltVV PhiVV
Definition: mag.h:343
const TMAGAffMtx & GetMtx(const int &Attr) const
Definition: mag.h:187
const double GetAvgProdLinWeight(const int &NId1, const int &NId2, const bool Left=false, const bool Right=false) const
Definition: mag.cpp:642
void SortAttrOrdering(const TFltV &TrueMuV, TIntV &IndexV) const
Definition: mag.cpp:1828
Definition: dt.h:201
TStr GetMtxStr() const
Definition: mag.cpp:63
Directed graph.
Definition: graph.h:346
TFltV AvgPhiV
Definition: mag.h:348
PNGraph GenMAG(TIntVV &AttrVV, const bool &IsDir=false, const int &Seed=1)
Definition: mag.h:295
double GetNrmDev()
Definition: dt.cpp:63
Definition: tm.h:81
void PutSeed(const int &_Seed)
Definition: dt.cpp:18
PGStat At(const int &ValN) const
Definition: gstat.h:186
const double ComputeApxAdjLL() const
Definition: mag.cpp:2046
Definition: gstat.h:28
void SaveTxt(TStrV &OutStrV) const
Definition: mag.cpp:287
void SetScale(const TGpScaleTy &GpScaleTy)
Definition: gnuplot.h:78
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:406
void GetLLMtx(TMAGAffMtx &LLMtx)
Definition: mag.cpp:74
static TRnd Rnd
Definition: mag.h:69
Definition: dt.h:412
const void PrepareUpdateAffMtx(TFltVV &ProdVV, TFltVV &SqVV)
Definition: mag.cpp:1532
bool Empty() const
Definition: dt.h:491
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
TSizeTy GetXDim() const
Definition: ds.h:2250
double DoEStep(const TFltV &TrueMuV, const int &NIter, double &LL, const double &Lambda)
Definition: mag.cpp:1272
const double GetAvgOutCoeff(const int &i, const int &AId, const int &A, const TMAGAffMtx &Theta) const
Definition: mag.cpp:564
const TNodeAttr & GetNodeAttr() const
Definition: mag.h:177
void SplitOnAllCh(const char &SplitCh, TStrV &StrV, const bool &SkipEmpty=true) const
Definition: dt.cpp:926
TInt Dim
Definition: mag.h:72
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:383
static TRnd Rnd
Definition: mag.h:90
double GetUniDev()
Definition: dt.h:30
void SetMu(const int &Attr, const double &Prob)
Definition: mag.h:104
void SetEpsMtx(const double &Eps1, const double &Eps0, const int &Eps1Val=1, const int &Eps0Val=0)
Definition: mag.cpp:44
int AddPlot(const TIntV &YValV, const TGpSeriesTy &SeriesTy=gpwLinesPoints, const TStr &Label=TStr(), const TStr &Style=TStr())
Definition: gnuplot.cpp:186
double GetMtxSum() const
Definition: mag.cpp:95
void SetRndMtx(TRnd &Rnd, const int &PrmMtxDim=2, const double &MinProb=0.0)
Definition: mag.cpp:34
void SaveTxt(const TStr &FNm)
Definition: mag.cpp:423
const int GetNodes() const
Definition: mag.h:171
const double UpdateApxPhiMI(const double &Lambda, const int &NId, const int &AId, double &Phi, TFltVV &ProdVV)
Definition: mag.cpp:930
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
void DoMStep(const int &GradIter, const double &LrnRate, const double &MaxGrad, const double &Lambda, const int &NReal=0)
Definition: mag.cpp:1608
const double & At(const int &Row, const int &Col) const
Definition: mag.h:41
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
void ShowGrid(const bool &Show)
Definition: gnuplot.h:76
TFltV & GetMtx()
Definition: mag.h:31
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:404
void SaveTxt(TStrV &OutStrV) const
Definition: mag.cpp:373
char * CStr()
Definition: dt.h:479
int GetInNId(const int &NodeN) const
Returns ID of NodeN-th in-node (the node pointing to the current node).
Definition: graph.h:412
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
static const double ComputeMI(const TIntVV &AttrV, const int AId1, const int AId2)
Definition: mag.cpp:2091
void AddRndNoise(TRnd &Rnd, const double &SDev)
Definition: mag.cpp:52
PNGraph Graph
Definition: mag.h:345
const double UpdateAffMtxV(const int &GradIter, const double &LrnRate, const double &MaxGrad, const double &Lambda, const int &NReal=0)
Definition: mag.cpp:1558
bool MSpeedUp
Definition: mag.h:347
void Init(const TFltV &MuV, const TMAGAffMtxV &AffMtxV)
Definition: mag.cpp:452
int GetDim() const
Definition: mag.h:26
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:416
void Gen(const TSizeTy &_XDim, const TSizeTy &_YDim)
Definition: ds.h:2247
TFltVV AvgPhiPairVV
Definition: mag.h:349
void PutY(const TSizeTy &Y, const TVal &Val)
Definition: ds.h:2271
TTriple< TFlt, TInt, TInt > TFltIntIntTr
Definition: ds.h:182
const double ComputeApxLL() const
Definition: mag.cpp:2005
const double ObjPhiMI(const double &x, const int &NId, const int &AId, const double &Lambda, const double &Q0, const double &Q1, const TFltVV &CntVV)
Definition: mag.cpp:722
void LoadTxt(const TStr &InFNm)
Definition: mag.cpp:264
void GenMtx(const int &Dim)
Definition: mag.h:36
const double GetProbMu(const int &NId1, const int &NId2, const int &AId, const int &Attr1, const int &Attr2, const bool Left=false, const bool Right=false) const
Definition: mag.cpp:576
const double GradPhiMI(const double &x, const int &NId, const int &AId, const double &Lambda, const double &DeltaQ, const TFltVV &CntVV)
Definition: mag.cpp:703
void GetSubValV(const TSizeTy &BValN, const TSizeTy &EValN, TVec< TVal, TSizeTy > &ValV) const
Fills ValV with elements at positions BValN...EValN.
Definition: ds.h:1170
const TVal & At(const TSizeTy &X, const TSizeTy &Y) const
Definition: ds.h:2256