SNAP Library 6.0, User Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
agmdirected.h
Go to the documentation of this file.
1 #ifndef yanglib_agmdirected_h
2 #define yanglib_agmdirected_h
3 #include "Snap.h"
4 #include "agm.h"
5 #include "agmfast.h"
6 
7 class TCoda { //sparse AGM-fast with coordinate ascent for directed affiliation
8 private:
9  PNGraph G; //graph to fit
10  TVec<TIntFltH> F; // outdegree membership for each user (Size: Nodes * Coms)
11  TVec<TIntFltH> H; // in-degree membership for each user (Size: Nodes * Coms) A ~ F * H'
12  TRnd Rnd; // random number generator
13  TIntV NIDV; // original node ID vector
14  TFlt RegCoef; //Regularization coefficient when we fit for P_c +: L1, -: L2
15  TFltV SumFV; // sum_u F_uc for each community c. Needed for efficient calculation
16  TFltV SumHV; // sum_u H_uc for each community c. Needed for efficient calculation
17  TBool NodesOk; // Node ID is from 0 ~ N-1
18  TInt NumComs; // number of communities
19  TVec<TIntSet> HOVIDSV; //NID pairs to hold out for cross validation
20 public:
21  TFlt MinVal; // minimum value of F (0)
22  TFlt MaxVal; // maximum value of F (for numerical reason)
23  TFlt NegWgt; // weight of negative example (a pair of nodes without an edge)
24  TFlt PNoCom; // base probability \varepsilon (edge probability between a pair of nodes sharing no community
25  TBool DoParallel; // whether to use parallelism for computation
26 
27  TCoda(const PNGraph& GraphPt, const int& InitComs, const int RndSeed = 0): Rnd(RndSeed), RegCoef(0),
28  NodesOk(true), MinVal(0.0), MaxVal(1000.0), NegWgt(1.0) { SetGraph(GraphPt); RandomInit(InitComs); }
29  TCoda() { G = TNGraph::New(); }
30  void SetGraph(const PNGraph& GraphPt);
31  PNGraph GetGraph() { return G; }
33  void SetRegCoef(const double _RegCoef) { RegCoef = _RegCoef; }
34  double GetRegCoef() { return RegCoef; }
35  void RandomInit(const int InitComs);
36  int GetNumComs() { return NumComs.Val; }
37  void NeighborComInit(const int InitComs);
38  void NeighborComInit(TFltIntPrV& NIdPhiV, const int InitComs);
39  void SetCmtyVV(const TVec<TIntV>& CmtyVVOut, const TVec<TIntV>& CmtyVVIn);
40  double Likelihood(const bool DoParallel = false);
41  double LikelihoodForNode(const bool IsRow, const int UID);
42  double LikelihoodForNode(const bool IsRow, const int UID, const TIntFltH& FU);
43  void GetNonEdgePairScores(TFltIntIntTrV& ScoreV);
44  void GetNIDValH(TIntFltH& NIdValInOutH, TIntFltH& NIdValOutH, TIntFltH& NIdValInH, const int CID, const double Thres);
45  void DumpMemberships(const TStr& OutFNm, const TStrHash<TInt>& NodeNameH) { DumpMemberships(OutFNm, NodeNameH, sqrt(PNoCom)); }
46  void DumpMemberships(const TStr& OutFNm, const TStrHash<TInt>& NodeNameH, const double Thres);
47  void GetCmtyS(TIntSet& CmtySOut, TIntSet& CmtySIn, const int CID, const double Thres);
48  void DumpMemberships(const TStr& OutFNm, const double Thres);
49  void DumpMemberships(const TStr& OutFNm) { DumpMemberships(OutFNm, sqrt(PNoCom)); }
50  void GetCommunity(TIntV& CmtyVIn, TIntV& CmtyVOut, const int CID) { GetCommunity(CmtyVIn, CmtyVOut, CID, sqrt(PNoCom)); }
51  void GetCommunity(TIntV& CmtyVIn, TIntV& CmtyVOut, const int CID, const double Thres);
52  void GetTopCIDs(TIntV& CIdV, const int TopK, const int IsAverage = 1, const int MinSz = 1);
53  void GradientForNode(const bool IsRow, const int UID, TIntFltH& GradU, const TIntSet& CIDSet);
54  void SetHoldOut(const double HOFrac) { TVec<TIntSet> HoldOut; TAGMFastUtil::GenHoldOutPairs(G, HoldOut, HOFrac, Rnd); HOVIDSV = HoldOut; }
55  //double LikelihoodForRow(const int UID);
56  //double LikelihoodForRow(const int UID, const TIntFltH& FU);
57  //double LikelihoodForCol(const int VID);
58  //double LikelihoodForCol(const int VID, const TIntFltH& HV);
59  //void GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet);
60  void GetCmtyVV(TVec<TIntV>& CmtyVVOut, TVec<TIntV>& CmtyVVIn, const int MinSz = 3);
61  void GetCmtyVV(TVec<TIntV>& CmtyVVOut, TVec<TIntV>& CmtyVVIn, const double ThresOut, const double ThresIn, const int MinSz = 3);
62  void GetCmtyVV(const bool IsOut, TVec<TIntV>& CmtyVV);
63  void GetCmtyVV(const bool IsOut, TVec<TIntV>& CmtyVV, const double Thres, const int MinSz = 3);
64  void GetCmtyVVUnSorted(const bool IsOut, TVec<TIntV>& CmtyVV, const double Thres, const int MinSz = 3);
65  void GetCmtyVVUnSorted(TVec<TIntV>& CmtyVVOut, TVec<TIntV>& CmtyVVIn);
66  //void GetCmtyVVIn(TVec<TIntV>& CmtyVV);
67  //void GetCmtyVVIn(TVec<TIntV>& CmtyVV, const double Thres, const int MinSz = 3);
68  int FindComsByCV(TIntV& ComsV, const double HOFrac = 0.2, const int NumThreads = 20, const TStr PlotLFNm = TStr(), const int EdgesForCV = 100, const double StepAlpha = 0.3, const double StepBeta = 0.1);
69  int FindComsByCV(const int NumThreads, const int MaxComs, const int MinComs, const int DivComs, const TStr OutFNm, const int EdgesForCV = 100, const double StepAlpha = 0.3, const double StepBeta = 0.3);
70  double LikelihoodHoldOut(const bool DoParallel = false);
71  double GetStepSizeByLineSearch(const bool IsRow, const int UID, const TIntFltH& DeltaV, const TIntFltH& GradV, const double& Alpha, const double& Beta, const int MaxIter = 10);
72  int MLEGradAscent(const double& Thres, const int& MaxIter, const TStr PlotNm, const double StepAlpha = 0.3, const double StepBeta = 0.1);
73  int MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha = 0.3, const double StepBeta = 0.1);
74  int MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const TStr PlotNm = TStr(), const double StepAlpha = 0.3, const double StepBeta = 0.1) {
75  int ChunkSize = G->GetNodes() / 10 / ChunkNum;
76  if (ChunkSize == 0) { ChunkSize = 1; }
77  return MLEGradAscentParallel(Thres, MaxIter, ChunkNum, ChunkSize, PlotNm, StepAlpha, StepBeta);
78  }
79  //int MLENewton(const double& Thres, const int& MaxIter, const TStr PlotNm = TStr());
80  //double GradientForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val);
81  //double HessianForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val);
82  //double LikelihoodForOneVar(const TFltV& AlphaKV, const int UID, const int CID, const double& Val);
83  //double FindOptimalThres(const TVec<TIntV>& TrueCmtyVV, TVec<TIntV>& CmtyVV);
84  void Save(TSOut& SOut);
85  void Load(TSIn& SIn, const int& RndSeed = 0);
86  TFlt& GetSumVal(const bool IsOut, const int CID) {
87  if (IsOut) {
88  return SumFV[CID];
89  } else {
90  return SumHV[CID];
91  }
92  }
93  double inline GetCom(const bool IsOut, const int& NID, const int& CID) {
94  if (IsOut) {
95  return GetComOut(NID, CID);
96  } else {
97  return GetComIn(NID, CID);
98  }
99  }
100  double inline GetComOut(const int& NID, const int& CID) {
101  if (F[NID].IsKey(CID)) {
102  return F[NID].GetDat(CID);
103  } else {
104  return 0.0;
105  }
106  }
107  double inline GetComIn(const int& NID, const int& CID) {
108  if (H[NID].IsKey(CID)) {
109  return H[NID].GetDat(CID);
110  } else {
111  return 0.0;
112  }
113  }
114  void inline AddCom(const bool IsOut, const int& NID, const int& CID, const double& Val) {
115  if (IsOut) {
116  AddComOut(NID, CID, Val);
117  } else {
118  AddComIn(NID, CID, Val);
119  }
120  }
121  void inline AddComOut(const int& NID, const int& CID, const double& Val) {
122  if (F[NID].IsKey(CID)) {
123  SumFV[CID] -= F[NID].GetDat(CID);
124  }
125  F[NID].AddDat(CID) = Val;
126  SumFV[CID] += Val;
127  }
128  void inline AddComIn(const int& NID, const int& CID, const double& Val) {
129  if (H[NID].IsKey(CID)) {
130  SumHV[CID] -= H[NID].GetDat(CID);
131  }
132  H[NID].AddDat(CID) = Val;
133  SumHV[CID] += Val;
134  }
135  void inline DelCom(const bool IsOut, const int& NID, const int& CID) {
136  if (IsOut) {
137  return DelComOut(NID, CID);
138  } else {
139  return DelComIn(NID, CID);
140  }
141  }
142  void inline DelComOut(const int& NID, const int& CID) {
143  if (F[NID].IsKey(CID)) {
144  SumFV[CID] -= F[NID].GetDat(CID);
145  F[NID].DelKey(CID);
146  }
147  }
148  void inline DelComIn(const int& NID, const int& CID) {
149  if (H[NID].IsKey(CID)) {
150  SumHV[CID] -= H[NID].GetDat(CID);
151  H[NID].DelKey(CID);
152  }
153  }
154  double inline DotProduct(const TIntFltH& UV, const TIntFltH& VV) {
155  double DP = 0;
156  if (UV.Len() > VV.Len()) {
157  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
158  if (VV.IsKey(HI.GetKey())) {
159  DP += VV.GetDat(HI.GetKey()) * HI.GetDat();
160  }
161  }
162  } else {
163  for (TIntFltH::TIter HI = VV.BegI(); HI < VV.EndI(); HI++) {
164  if (UV.IsKey(HI.GetKey())) {
165  DP += UV.GetDat(HI.GetKey()) * HI.GetDat();
166  }
167  }
168  }
169  return DP;
170  }
171  double inline DotProductUtoV(const int& UID, const int& VID) {
172  return DotProduct(F[UID], H[VID]);
173  }
174  double inline Prediction(const TIntFltH& FU, const TIntFltH& HV) {
175  double DP = log (1.0 / (1.0 - PNoCom)) + DotProduct(FU, HV);
176  IAssertR(DP > 0.0, TStr::Fmt("DP: %f", DP));
177  return exp(- DP);
178  }
179  double inline Prediction(const int& UID, const int& VID) {
180  return Prediction(F[UID], H[VID]);
181  }
182  double inline Sum(const TIntFltH& UV) {
183  double N = 0.0;
184  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
185  N += HI.GetDat();
186  }
187  return N;
188  }
189  double inline Norm2(const TIntFltH& UV) {
190  double N = 0.0;
191  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
192  N += HI.GetDat() * HI.GetDat();
193  }
194  return N;
195  }
196 };
197 
199 public:
205  TCodaAnalyzer(TCoda& Coda, const double MemThres = -1.0) {
206  G = Coda.GetGraphRawNID();
207  printf("graph copied (%d nodes %d edges)\n", G->GetNodes(), G->GetEdges());
208  TIntV CIdV;
209  Coda.GetTopCIDs(CIdV, Coda.GetNumComs());
210  double Delta = MemThres == -1.0 ? sqrt(Coda.PNoCom): MemThres;
211  for (int c = 0; c < CIdV.Len(); c++) {
212  int CID = CIdV[c];
213  TIntFltH InMemH, OutMemH, InOutMemH;
214  Coda.GetNIDValH(InOutMemH, OutMemH, InMemH, CID, Delta);
215  InCmtyValHV.Add(InMemH);
216  OutCmtyValHV.Add(OutMemH);
217  InOutCmtyValHV.Add(InOutMemH);
218  }
219  printf("Communities copied (%d communities)\n", InCmtyValHV.Len());
220  }
221  void GetAllCmtyVV(TVec<TIntV>& CmtyVV, const int MinSz) {
222  for (int c = 0; c < InCmtyValHV.Len(); c++) {
223  TIntV CmtyVIn, CmtyVOut, CmtyVInOut;
224  if (InCmtyValHV[c].Len() < MinSz || OutCmtyValHV[c].Len() < MinSz) { continue; }
225  InOutCmtyValHV[c].GetKeyV(CmtyVInOut);
226  InCmtyValHV[c].GetKeyV(CmtyVIn);
227  OutCmtyValHV[c].GetKeyV(CmtyVOut);
228  CmtyVV.Add(CmtyVInOut);
229  CmtyVV.Add(CmtyVOut);
230  CmtyVV.Add(CmtyVIn);
231  }
232  }
233 
234  double GetFrac2Mode(const double Thres2Mode = 0.2, const int MinSzEach = 2) {
235  int Cnt2Mode = 0;
236  int CntAll = 0;
237  for (int c = 0; c < InCmtyValHV.Len(); c++) {
238  double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].Len() - InOutCmtyValHV[c].Len());
239  if (InCmtyValHV[c].Len() < MinSzEach || OutCmtyValHV[c].Len() < MinSzEach) { continue; }
240  if (Jacc <= Thres2Mode) { Cnt2Mode++; }
241  CntAll++;
242  }
243  return (double) Cnt2Mode / (double) CntAll;
244  }
245 
246  void Summary(const int TopK = 10, const double Thres2Mode = 0.2) {
247  int Cnt2Mode = 0;
248  double SumJacc = 0.0;
249  for (int c = 0; c < InCmtyValHV.Len(); c++) {
250  double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].Len() - InOutCmtyValHV[c].Len());
251  if (Jacc <= Thres2Mode) { Cnt2Mode++; }
252  SumJacc += Jacc;
253  if (c < TopK) {
254  printf("Cmty %d: InOut: %d, In:%d, Out:%d, Jacc;%.3f\n", c, InCmtyValHV[c].Len(), InCmtyValHV[c].Len(), OutCmtyValHV[c].Len(), Jacc);
255  }
256  }
257  double AvgJacc = SumJacc / (double) InCmtyValHV.Len();
258  printf("Average jaccard similarity = %.3f. (%d / %d communities are 2-mode)\n", AvgJacc, Cnt2Mode, InCmtyValHV.Len());
259  }
260  int GetNumComs() { return InCmtyValHV.Len(); }
262 
263  void GetCmtyVAll(TIntV& CmtyVAll, const int CID) {
264  TIntV CmtyVIn, CmtyVOut;
265  InCmtyValHV[CID].GetKeyV(CmtyVIn);
266  OutCmtyValHV[CID].GetKeyV(CmtyVOut);
267  CmtyVIn.Sort();
268  CmtyVOut.Sort();
269  CmtyVAll.Gen(CmtyVIn.Len() + CmtyVOut.Len(), 0);
270  CmtyVIn.Union(CmtyVOut, CmtyVAll);
271  }
272 
273  PNGraph Net2ModeCommunities(const double MaxJac, const double JacEdge, const bool GetWcc = true) {
274  //if In(A) is similar to Out(B), create an edge A->B between 2 communities A, B
275  int Coms = InCmtyValHV.Len();
276  PNGraph ComG = TNGraph::New(Coms, -1);
277  for (int c = 0; c < InCmtyValHV.Len(); c++) {
278  double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].Len() - InOutCmtyValHV[c].Len());
279  if (Jacc > MaxJac) { continue; }
280  ComG->AddNode(c);
281  }
282  TVec<TIntSet> CmtySVIn, CmtySVOut;
283  for (int c = 0; c < Coms; c++) {
284  TIntV CmtyVIn, CmtyVOut;
285  InCmtyValHV[c].GetKeyV(CmtyVIn);
286  OutCmtyValHV[c].GetKeyV(CmtyVOut);
287  TIntSet CmtySIn(CmtyVIn), CmtySOut(CmtyVOut);
288  CmtySVIn.Add(CmtySIn);
289  CmtySVOut.Add(CmtySOut);
290  }
291  for (int c1 = 0; c1 < Coms; c1++) {
292  if (! ComG->IsNode(c1)) { continue; }
293  for (int c2 = 0; c2 < Coms; c2++) {
294  if (! ComG->IsNode(c2)) { continue; }
295  int IntC1C2 = TAGMUtil::Intersection(CmtySVIn[c1], CmtySVOut[c2]);
296  double Jac = (double) IntC1C2 / (CmtySVIn[c1].Len() + CmtySVOut[c2].Len() - IntC1C2);
297  if (Jac >= JacEdge) {
298  ComG->AddEdge(c1, c2);
299  }
300  }
301  }
302  //PNGraph Wcc = TSnap::GetMxWcc(ComG);
303  TIntV NIDV;
304  ComG->GetNIdV(NIDV);
305  for (int u = 0; u < NIDV.Len(); u++) {
306  int NID = NIDV[u];
307  TNGraph::TNodeI NI = ComG->GetNI(NID);
308  if (NI.GetDeg() == 0) { ComG->DelNode(NID); }
309  if (NI.GetInDeg() == 1 && NI.GetOutDeg() == 1 && NI.GetOutNId(0) == NID) { ComG->DelNode(NID); }
310  }
311  printf("Community graph made (Jaccard similarity for edges: %f, %d nodes, %d edges)\n", JacEdge, ComG->GetNodes(), ComG->GetEdges());
312  return ComG;
313  }
314 
315  // RS:2014/03/11 default parameter values do not compile on OS X with g++-4.2
316  //void Dump2ModeCommunities(const TStr& OutFNm, const double MaxJac, const TIntStrH& NIDNameH = THash<TInt, TStr>()) {
317  void Dump2ModeCommunities(const TStr& OutFNm, const double MaxJac, const TIntStrH& NIDNameH) {
318  FILE* F = fopen(OutFNm.CStr(), "wt");
319  for (int c = 0; c < InCmtyValHV.Len(); c++) {
320  double Jacc = (double) InOutCmtyValHV[c].Len() / (double) (InCmtyValHV[c].Len() + OutCmtyValHV[c].Len() - InOutCmtyValHV[c].Len());
321  if (Jacc > MaxJac) { continue; }
322  TIntV CmtyVIn, CmtyVOut, CmtyVAll;
323  InCmtyValHV[c].GetKeyV(CmtyVIn);
324  OutCmtyValHV[c].GetKeyV(CmtyVOut);
325  GetCmtyVAll(CmtyVAll, c);
326  //adjust for the nodes who belong to both cmtyvin and cmtyvout
327  for (int u = 0; u < InOutCmtyValHV[c].Len(); u++) {
328  int UID = InOutCmtyValHV[c].GetKey(u);
329  if (CmtyVIn.Len() >= CmtyVOut.Len()) {
330  CmtyVIn.DelIfIn(UID);
331  } else {
332  CmtyVOut.DelIfIn(UID);
333  }
334  }
335  if (CmtyVAll.Len() == 0) { continue; }
336  fprintf(F, "Com %d\n", c);
337  for (int u = 0; u < CmtyVOut.Len(); u++) {
338  int NID = CmtyVOut[u];
339  TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): TStr::Fmt("Concept %d", NID);
340  fprintf(F, "%s:%f\n", Label.CStr(), OutCmtyValHV[c].GetDat(NID).Val);
341  }
342  fprintf(F, "||==>||\n");
343  for (int u = 0; u < CmtyVIn.Len(); u++) {
344  int NID = CmtyVIn[u];
345  TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): TStr::Fmt("Concept %d", NID);
346  fprintf(F, "%s:%f\n", Label.CStr(), InCmtyValHV[c].GetDat(NID).Val);
347  }
348  fprintf(F, "\n");
349  }
350  fclose(F);
351  }
352 
353  // RS:2014/03/11 default parameter values do not compile on OS X with g++-4.2
354  //void Draw2ModeCommunity(const int CID, const TStr& OutFNm, const TIntStrH& NIDNameH = THash<TInt, TStr>(), const THash<TInt, TIntTr>& NIDColorH = THash<TInt, TIntTr>() ) {
355  void Draw2ModeCommunity(const int CID, const TStr& OutFNm, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH) {
356  TIntV CmtyVIn, CmtyVOut, CmtyVAll;
357  InCmtyValHV[CID].GetKeyV(CmtyVIn);
358  OutCmtyValHV[CID].GetKeyV(CmtyVOut);
359  GetCmtyVAll(CmtyVAll, CID);
360 
361  //adjust for the nodes who belong to both cmtyvin and cmtyvout
362  for (int u = 0; u < InOutCmtyValHV[CID].Len(); u++) {
363  int UID = InOutCmtyValHV[CID].GetKey(u);
364  if (CmtyVIn.Len() >= CmtyVOut.Len()) {
365  CmtyVIn.DelIfIn(UID);
366  } else {
367  CmtyVOut.DelIfIn(UID);
368  }
369  }
370 
371  PNGraph SG = TSnap::GetSubGraph(G, CmtyVAll);
373  if (CmtyVAll.Len() == 0) { return; }
374  double OXMin = 0.1, YMin = 0.1, OXMax = 2500.00, YMax = 1000.0, IXMin = 0.1, IXMax = 2500.00;
375  double OStep = (OXMax - OXMin) / (double) CmtyVOut.Len(), IStep = (IXMax - IXMin) / (double) CmtyVIn.Len();
376 
377  FILE* F = fopen(OutFNm.CStr(), "wt");
378  fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n");
379  fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n");
380  fprintf(F, "\t<graph mode='static' defaultedgetype='directed'>\n");
381  fprintf(F, "\t\t<nodes>\n");
382  for (int c = 0; c < CmtyVOut.Len(); c++) {
383  int NID = CmtyVOut[c];
384  double XPos = c * OStep + OXMin;
385  TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
386  Label.ChangeChAll('<', ' ');
387  Label.ChangeChAll('>', ' ');
388  Label.ChangeChAll('&', ' ');
389  Label.ChangeChAll('\'', ' ');
390  TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);
391  fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
392  fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val);
393  fprintf(F, "\t\t\t\t<viz:size value='4.0'/>\n");
394  fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
395  fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMax);
396  fprintf(F, "\t\t\t</node>\n");
397  }
398 
399  for (int u = 0; u < CmtyVIn.Len(); u++) {
400  int NID = CmtyVIn[u];
401  TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): "";
402  Label.ChangeChAll('<', ' ');
403  Label.ChangeChAll('>', ' ');
404  Label.ChangeChAll('&', ' ');
405  Label.ChangeChAll('\'', ' ');
406  double XPos = IXMin + u * IStep;
407  TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120);
408  double Alpha = 1.0;
409  fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr());
410  fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha);
411  fprintf(F, "\t\t\t\t<viz:size value='4.0'/>\n");
412  fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n");
413  fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMin);
414  fprintf(F, "\t\t\t</node>\n");
415  }
416  fprintf(F, "\t\t</nodes>\n");
417  //plot edges
418  int EID = 0;
419  fprintf(F, "\t\t<edges>\n");
420  for (TNGraph::TNodeI NI = SG->BegNI(); NI < SG->EndNI(); NI++) {
421  if (NI.GetOutDeg() == 0 && NI.GetInDeg() == 0 ) { continue; }
422  for (int e = 0; e < NI.GetOutDeg(); e++) {
423  fprintf(F, "\t\t\t<edge id='%d' source='%d' target='%d'/>\n", EID++, NI.GetId(), NI.GetOutNId(e));
424  }
425  }
426  fprintf(F, "\t\t</edges>\n");
427  fprintf(F, "\t</graph>\n");
428  fprintf(F, "</gexf>\n");
429  fclose(F);
430  }
431 
432 };
433 
434 #endif
PNGraph GetGraphRawNID()
bool DelIfIn(const TVal &Val)
Removes the first occurrence of element Val.
Definition: ds.h:1212
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
Definition: graph.cpp:376
#define IAssertR(Cond, Reason)
Definition: bd.h:265
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
Definition: graph.h:548
double LikelihoodForNode(const bool IsRow, const int UID)
void AddComIn(const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:128
TFlt NegWgt
Definition: agmdirected.h:23
void DelComOut(const int &NID, const int &CID)
Definition: agmdirected.h:142
void Dump2ModeCommunities(const TStr &OutFNm, const double MaxJac, const TIntStrH &NIDNameH)
Definition: agmdirected.h:317
double GetRegCoef()
Definition: agmdirected.h:34
Definition: dt.h:11
Definition: ds.h:130
void DumpMemberships(const TStr &OutFNm)
Definition: agmdirected.h:49
int Val
Definition: dt.h:1139
static PNGraph New()
Static constructor that returns a pointer to the graph. Call: PNGraph Graph = TNGraph::New().
Definition: graph.h:481
TFlt MinVal
Definition: agmdirected.h:21
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:552
void SetGraph(const PNGraph &GraphPt)
void GetNIDValH(TIntFltH &NIdValInOutH, TIntFltH &NIdValOutH, TIntFltH &NIdValInH, const int CID, const double Thres)
void GetAllCmtyVV(TVec< TIntV > &CmtyVV, const int MinSz)
Definition: agmdirected.h:221
double Likelihood(const bool DoParallel=false)
void NeighborComInit(const int InitComs)
Definition: agmdirected.cpp:83
TVec< TIntFltH > H
Definition: agmdirected.h:11
TIter BegI() const
Definition: hash.h:213
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:313
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
void DelComIn(const int &NID, const int &CID)
Definition: agmdirected.h:148
static int Intersection(const TIntV &C1, const TIntV &C2)
Definition: agm.cpp:399
TVec< TIntFltH > OutCmtyValHV
Definition: agmdirected.h:202
double GetComOut(const int &NID, const int &CID)
Definition: agmdirected.h:100
void SetCmtyVV(const TVec< TIntV > &CmtyVVOut, const TVec< TIntV > &CmtyVVIn)
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:503
TFlt & GetSumVal(const bool IsOut, const int CID)
Definition: agmdirected.h:86
int AddNode(int NId=-1)
Adds a node of ID NId to the graph.
Definition: graph.cpp:236
void GetCmtyS(TIntSet &CmtySOut, TIntSet &CmtySIn, const int CID, const double Thres)
TVal1 Val1
Definition: ds.h:132
int GetNumComs()
Definition: agmdirected.h:260
int FindComsByCV(TIntV &ComsV, const double HOFrac=0.2, const int NumThreads=20, const TStr PlotLFNm=TStr(), const int EdgesForCV=100, const double StepAlpha=0.3, const double StepBeta=0.1)
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
TVec< TIntFltH > InCmtyValHV
Definition: agmdirected.h:201
TIter EndI() const
Definition: hash.h:218
void GetTopCIDs(TIntV &CIdV, const int TopK, const int IsAverage=1, const int MinSz=1)
void SetRegCoef(const double _RegCoef)
Definition: agmdirected.h:33
TBool DoParallel
Definition: agmdirected.h:25
TFlt RegCoef
Definition: agmdirected.h:14
int ChangeChAll(const char &SrcCh, const char &DstCh)
Definition: dt.cpp:1113
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
Definition: agmdirected.h:154
Definition: dt.h:1386
Definition: fl.h:58
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const TStr PlotNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmdirected.h:74
void GetCmtyVVUnSorted(const bool IsOut, TVec< TIntV > &CmtyVV, const double Thres, const int MinSz=3)
double GetFrac2Mode(const double Thres2Mode=0.2, const int MinSzEach=2)
Definition: agmdirected.h:234
void DelNode(const int &NId)
Deletes node of ID NId from the graph.
Definition: graph.cpp:294
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
TIntV NIDV
Definition: agmdirected.h:13
double Prediction(const int &UID, const int &VID)
Definition: agmdirected.h:179
double GetStepSizeByLineSearch(const bool IsRow, const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
void AddComOut(const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:121
TVal2 Val2
Definition: ds.h:133
void GetCmtyVV(TVec< TIntV > &CmtyVVOut, TVec< TIntV > &CmtyVVIn, const int MinSz=3)
PNGraph GetGraph()
Definition: agmdirected.h:31
int GetNumComs()
Definition: agmdirected.h:36
TVec< TIntSet > HOVIDSV
Definition: agmdirected.h:19
double LikelihoodHoldOut(const bool DoParallel=false)
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1318
TFltV SumFV
Definition: agmdirected.h:15
int AddEdge(const int &SrcNId, const int &DstNId)
Adds an edge from node SrcNId to node DstNId to the graph.
Definition: graph.cpp:321
TCoda(const PNGraph &GraphPt, const int &InitComs, const int RndSeed=0)
Definition: agmdirected.h:27
void Summary(const int TopK=10, const double Thres2Mode=0.2)
Definition: agmdirected.h:246
void Save(TSOut &SOut)
Definition: agmdirected.cpp:8
const TVal & GetDat(const TVal &Val) const
Returns reference to the first occurrence of element Val.
Definition: ds.h:838
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
Definition: subgraph.cpp:7
int MLEGradAscent(const double &Thres, const int &MaxIter, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
TVec< TIntFltH > F
Definition: agmdirected.h:10
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:546
void Load(TSIn &SIn, const int &RndSeed=0)
Definition: agmdirected.cpp:25
TCodaAnalyzer(TCoda &Coda, const double MemThres=-1.0)
Definition: agmdirected.h:205
double Prediction(const TIntFltH &FU, const TIntFltH &HV)
Definition: agmdirected.h:174
TFlt PNoCom
Definition: agmdirected.h:24
PNGraph G
Definition: agmdirected.h:9
double GetComIn(const int &NID, const int &CID)
Definition: agmdirected.h:107
int GetDeg() const
Returns degree of the current node, the sum of in-degree and out-degree.
Definition: graph.h:402
Definition: fl.h:128
TInt NumComs
Definition: agmdirected.h:18
TBool NodesOk
Definition: agmdirected.h:17
Definition: dt.h:1137
void DelCom(const bool IsOut, const int &NID, const int &CID)
Definition: agmdirected.h:135
Definition: hash.h:781
void GetNonEdgePairScores(TFltIntIntTrV &ScoreV)
void RandomInit(const int InitComs)
Definition: agmdirected.cpp:43
PNGraph Net2ModeCommunities(const double MaxJac, const double JacEdge, const bool GetWcc=true)
Definition: agmdirected.h:273
TCoda()
Definition: agmdirected.h:29
double Norm2(const TIntFltH &UV)
Definition: agmdirected.h:189
void SetHoldOut(const double HOFrac)
Definition: agmdirected.h:54
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
Definition: graph.h:550
double DotProductUtoV(const int &UID, const int &VID)
Definition: agmdirected.h:171
void Union(const TVec< TVal, TSizeTy > &ValV)
Sets this vector to its union with ValV. Assumes the vectors are sorted!
Definition: ds.h:1418
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:406
Definition: dt.h:412
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
TFltV SumHV
Definition: agmdirected.h:16
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:383
void DumpMemberships(const TStr &OutFNm, const TStrHash< TInt > &NodeNameH)
Definition: agmdirected.h:45
static void GenHoldOutPairs(const PGraph &G, TVec< TIntSet > &HoldOutSet, double HOFrac, TRnd &Rnd)
Definition: agmfast.h:159
void AddCom(const bool IsOut, const int &NID, const int &CID, const double &Val)
Definition: agmdirected.h:114
TTriple< TInt, TInt, TInt > TIntTr
Definition: ds.h:171
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:523
void GetCommunity(TIntV &CmtyVIn, TIntV &CmtyVOut, const int CID)
Definition: agmdirected.h:50
void GetCmtyVAll(TIntV &CmtyVAll, const int CID)
save bipartite community affiliation into gexf file
Definition: agmdirected.h:263
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:404
TRnd Rnd
Definition: agmdirected.h:12
char * CStr()
Definition: dt.h:479
bool IsKey(const TKey &Key) const
Definition: hash.h:258
TFlt MaxVal
Definition: agmdirected.h:22
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
Definition: dt.h:974
double Sum(const TIntFltH &UV)
Definition: agmdirected.h:182
double GetCom(const bool IsOut, const int &NID, const int &CID)
Definition: agmdirected.h:93
int Len() const
Definition: hash.h:228
TVec< TIntFltH > InOutCmtyValHV
Definition: agmdirected.h:203
void GradientForNode(const bool IsRow, const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:416
TVal3 Val3
Definition: ds.h:134
void Draw2ModeCommunity(const int CID, const TStr &OutFNm, const TIntStrH &NIDNameH, const THash< TInt, TIntTr > &NIDColorH)
Definition: agmdirected.h:355
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430