SNAP Library, Developer Reference
2012-10-02 12:56:23
SNAP, a general purpose network analysis and graph mining library
|
00001 00002 00003 class TGraphKey { 00004 public: 00005 static const int RoundTo; 00006 private: 00007 public: 00008 TInt Nodes; 00009 TIntPrV EdgeV; // renumbers the graph (node ids 0..nodes-1) 00010 TFltV SigV; // signature (for hashing) 00011 TInt VariantId; // if graphs have same signature but are different 00012 public: 00013 TGraphKey() : Nodes(-1), EdgeV(), SigV(), VariantId(0) { } 00014 TGraphKey(const TSFltV& GraphSigV); 00015 TGraphKey(const TIntV& GraphSigV); 00016 TGraphKey(const TFltV& GraphSigV); 00017 TGraphKey(const TGraphKey& GraphKey); 00018 TGraphKey(TSIn& SIn); 00019 void Save(TSOut& SOut) const; 00020 TGraphKey& operator = (const TGraphKey& GraphKey); 00021 bool operator == (const TGraphKey& GraphKey) const { return SigV==GraphKey.SigV && VariantId==GraphKey.VariantId; } 00022 00023 int GetPrimHashCd() const { return abs(SigV.GetPrimHashCd() ^ VariantId); } 00024 int GetSecHashCd() const { return abs(SigV.GetSecHashCd() ^ VariantId<<8); } 00025 00026 int GetNodes() const { return Nodes; } 00027 int GetEdges() const { return EdgeV.Len(); } 00028 int GetSigLen() const { return SigV.Len(); } 00029 int GetVariant() const { return VariantId; } 00030 void SetVariant(const int& Variant) { VariantId = Variant; } 00031 void SetEdgeV(const TIntPrV& EdgeIdV) { EdgeV = EdgeIdV; } 00032 00033 PNGraph GetNGraph() const; 00034 void TakeGraph(const PNGraph& Graph); // renumbers the nodes 00035 void TakeGraph(const PNGraph& Graph, TIntPrV& NodeMap); // renumbers the nodes 00036 void TakeSig(const PNGraph& Graph, const int& MnSvdGraph, const int& MxSvdGraph); // create graph signature 00037 00038 void SaveTxt(FILE *F) const; 00039 void SaveGViz(const TStr& OutFNm, const TStr& Desc = TStr(), const TStr& NodeAttrs="", const int& Size=-1) const; 00040 void DrawGViz(const TStr& OutFNm, const TStr& Desc = TStr(), const TStr& NodeAttrs="", const int& Size=-1) const; 00041 static bool IsIsomorph(const TGraphKey& Key1, const TGraphKey& Key2, const TIntV& NodeIdMap); 00042 static bool IsIsomorph(const TGraphKey& Key1, const TGraphKey& Key2, const TVec<TIntV>& NodeIdMapV); 00043 static bool IsIsomorph(const TGraphKey& Key1, const TGraphKey& Key2, const TVec<TIntV>& NodeIdMapV, int& IsoPermId); 00044 }; 00045 00049 template <class TDat> 00050 class TGHash { 00051 public: 00052 typedef typename THash<TGraphKey, TDat>::TIter TIter; 00053 private: 00054 TInt MxIsoCheck; // brute force graph isomorphism check 00055 TInt MxSvdGraph; // SVD isomorphism check 00056 THash<TInt, TVec<TIntV> > GSzToPermH; // node permutations up to MxIsoCkeck nodes 00057 TBool HashOnlyTrees; // hashing only trees (exact isomorphism test) 00058 THash<TGraphKey, TDat> GraphH; 00059 private: 00060 void InitPermutations(); 00061 int IsGetKeyId(const PNGraph& Graph) const; 00062 int IsGetKeyId(const PNGraph& Graph, TGraphKey& GKey) const; 00063 int IsGetKeyId(const PNGraph& Graph, TGraphKey& GKey, TIntPrV& NodeMap) const; 00064 public: 00065 TGHash(const bool& HashTrees, const int& MaxIsoCheck=8, const int& MaxSvdGraph=500); 00066 TGHash(TSIn& SIn); 00067 void Save(TSOut& SOut) const; 00068 00069 const TDat& operator [] (const int& KeyId) const { return GraphH[KeyId]; } 00070 TDat& operator [] (const int& KeyId) { return GraphH[KeyId]; } 00071 const TDat& operator () (const TGraphKey& Key) const { return GraphH.GetDat(Key); } 00072 TDat& operator () (const TGraphKey& Key) { return GraphH.GetDat(Key); } 00073 TIter BegI() const { return GraphH.BegI(); } 00074 TIter EndI() const { return GraphH.EndI(); } 00075 TIter GetI(const int& KeyId) const { return GraphH.GetI(KeyId); } 00076 00077 bool HashTrees() const { return HashOnlyTrees; } 00078 00079 void Gen(const int& Ports) { GraphH.Gen(Ports); } 00080 void Clr(const bool& DoDel=true, const int& NoDelLim=-1) { GraphH.Clr(DoDel, NoDelLim); } 00081 bool Empty() const { return GraphH.Empty(); } 00082 int Len() const { return GraphH.Len(); } 00083 int GetPorts() const { return GraphH.GetPorts(); } 00084 bool IsAutoSize() const { return GraphH.IsAutoSize(); } 00085 int GetMxKeyIds() const { return GraphH.GetMxKeyIds(); } 00086 bool IsKeyIdEqKeyN() const { return GraphH.IsKeyIdEqKeyN(); } 00087 00088 int AddKey(const PNGraph& Graph); 00089 TDat& AddDat(const PNGraph& Graph) { return GraphH[AddKey(Graph)]; } 00090 TDat& AddDat(const PNGraph& Graph, const TDat& Dat) { return GraphH[AddKey(Graph)] = Dat; } 00091 00092 bool IsKey(const PNGraph& Graph) const { int k=IsGetKeyId(Graph); return k!=-1; } 00093 int GetKeyId(const PNGraph& Graph) const { int k=IsGetKeyId(Graph); IAssert(k!=-1); return k; } 00094 const TDat& GetDat(const PNGraph& Graph) const { return GraphH[GetKeyId(Graph)]; } 00095 TDat& GetDat(const PNGraph& Graph) { return GraphH[GetKeyId(Graph)]; } 00096 00097 const TGraphKey& GetKey(const int& KeyId) const { return GraphH.GetKey(KeyId); } 00098 int GetKeyId(const TGraphKey& Key) const { return GraphH.GetKeyId(Key); } 00099 bool IsKey(const TGraphKey& Key) const { return GraphH.IsKey(Key); } 00100 bool IsKey(const TGraphKey& Key, int& KeyId) const { return GraphH.IsKey(Key, KeyId); } 00101 bool IsKeyId(const int& KeyId) const { return GraphH.IsKeyId(KeyId); } 00102 const TDat& GetDat(const TGraphKey& Key) const { return GraphH.GetDat(Key); } 00103 TDat& GetDat(const TGraphKey& Key) { return GraphH.GetDat(Key); } 00104 const TDat& GetDatId(const int& KeyId) const { return GraphH[KeyId]; } 00105 TDat& GetDatId(const int& KeyId) { return GraphH[KeyId]; } 00106 00107 void GetKeyDat(const int& KeyId, TGraphKey& Key, TDat& Dat) const { GraphH.GetKeyDat(KeyId, Key, Dat); } 00108 bool IsKeyGetDat(const TGraphKey& Key, TDat& Dat) const { return GraphH.IsKeyGetDat(Key, Dat); } 00109 00110 bool GetNodeMap(const PNGraph& Graph, TIntPrV& NodeMapV) const; 00111 bool GetNodeMap(const PNGraph& Graph, TIntPrV& NodeMapV, int& KeyId) const; 00112 00113 int FFirstKeyId() const { return 0-1; } 00114 bool FNextKeyId(int& KeyId) const { return GraphH.FNextKeyId(KeyId); } 00115 void GetKeyV(TVec<TGraphKey>& KeyV) const { GraphH.GetKeyV(KeyV); } 00116 void GetDatV(TVec<TDat>& DatV) const { GraphH.GetDatV(DatV); } 00117 void GetKeyIdByDat(TIntV& KeyIdV, const bool& Asc = true) const; // order keyIds by data 00118 void GetKeyIdByGSz(TIntV& KeyIdV, const bool& Asc = true) const; // order keyIds by graph size 00119 void GetKeyDatPrV(TVec<TPair<TGraphKey, TDat> >& KeyDatPrV) const { GraphH.GetKeyDatPrV(KeyDatPrV); } 00120 void GetDatKeyPrV(TVec<TPair<TDat, TGraphKey> >& DatKeyPrV) const { GraphH.GetDatKeyPrV(DatKeyPrV); } 00121 00122 void Defrag() { GraphH.Defrag(); } 00123 void Pack() { GraphH.Pack(); } 00124 00125 void DrawGViz(const int& KeyId, const TStr& OutFNmPref, const TStr& OutputType = "gif", TStr Desc="") const; 00126 void DrawGViz(const TIntV& KeyIdV, const TStr& OutFNmPref, const TStr& OutputType = "gif") const; 00127 void SaveTxt(const TStr& OutFNm, const TStr& Desc, const TStr& DatColNm, const bool& SortByKeyVal=true) const; 00128 void SaveDetailTxt(const TStr& OutFNm, const TStr& Desc, const TStr& DatColNm) const; 00129 }; 00130 00131 template <class TDat> 00132 void TGHash<TDat>::InitPermutations() { 00133 GSzToPermH.Clr(); 00134 for (int nodes = 2; nodes <= MxIsoCheck; nodes++) { 00135 TVec<TIntV> NodePermutationV; 00136 TIntV NodeIdV(nodes, 0); 00137 for (int i = 0; i < nodes; i++) NodeIdV.Add(i); 00138 NodeIdV.Pack(); 00139 NodePermutationV.Add(NodeIdV); 00140 while (NodeIdV.NextPerm()) { 00141 NodePermutationV.Add(NodeIdV); 00142 } 00143 NodePermutationV.Pack(); 00144 GSzToPermH.AddDat(nodes, NodePermutationV); 00145 } 00146 } 00147 00148 template <class TDat> 00149 TGHash<TDat>::TGHash(const bool& HashTrees, const int& MaxIsoCheck, const int& MaxSvdGraph) : 00150 MxIsoCheck(MaxIsoCheck), MxSvdGraph(MaxSvdGraph), GSzToPermH(), HashOnlyTrees(HashTrees), GraphH() { 00151 if (! HashTrees) { 00152 InitPermutations(); 00153 } 00154 } 00155 00156 template <class TDat> 00157 TGHash<TDat>::TGHash(TSIn& SIn) : MxIsoCheck(SIn), MxSvdGraph(SIn), GSzToPermH(), HashOnlyTrees(SIn), GraphH(SIn) { 00158 if (! HashOnlyTrees) { 00159 InitPermutations(); 00160 } 00161 } 00162 00163 template <class TDat> 00164 void TGHash<TDat>::Save(TSOut& SOut) const { 00165 MxIsoCheck.Save(SOut); 00166 MxSvdGraph.Save(SOut); 00167 HashOnlyTrees.Save(SOut); 00168 GraphH.Save(SOut); 00169 } 00170 00171 template <class TDat> 00172 int TGHash<TDat>::AddKey(const PNGraph& Graph) { 00173 if (HashOnlyTrees) { 00174 int RootNId; IAssert(TSnap::IsTree(Graph, RootNId)); 00175 TIntV TreeSig; TSnap::GetTreeSig(Graph, RootNId, TreeSig); 00176 TGraphKey GKey(TreeSig); 00177 const int KeyId = GraphH.GetKeyId(GKey); 00178 if (KeyId == -1) { 00179 GKey.TakeGraph(Graph); 00180 return GraphH.AddKey(GKey); 00181 } 00182 return KeyId; 00183 } else { 00184 TGraphKey GKey; 00185 GKey.TakeSig(Graph, MxIsoCheck+1, MxSvdGraph); // get signature 00186 const int Nodes = GKey.GetNodes(); 00187 if (Nodes > 2 && Nodes <= MxIsoCheck) { 00188 GKey.TakeGraph(Graph); 00189 // check all variants with same signature 00190 for (int variant = 1; ; variant++) { 00191 GKey.SetVariant(variant); 00192 int KeyId = GraphH.GetKeyId(GKey); 00193 if (KeyId == -1) { 00194 KeyId = GraphH.AddKey(GKey); 00195 //printf(" new variant: %d (%d)\n", KeyId, variant); 00196 return KeyId; 00197 00198 } 00199 if (TGraphKey::IsIsomorph(GKey, GraphH.GetKey(KeyId), GSzToPermH.GetDat(Nodes))) { 00200 //printf(" isomorphic to: %d\n", KeyId); 00201 return KeyId; 00202 } // found isomorphic graph 00203 } 00204 } else { 00205 const int KeyId = GraphH.GetKeyId(GKey); 00206 if (KeyId == -1) { 00207 GKey.TakeGraph(Graph); 00208 return GraphH.AddKey(GKey); 00209 } 00210 return KeyId; 00211 } 00212 } 00213 Fail; 00214 return -1; 00215 } 00216 00217 template <class TDat> 00218 int TGHash<TDat>::IsGetKeyId(const PNGraph& Graph) const { 00219 TGraphKey GKey; 00220 return IsGetKeyId(Graph, GKey); 00221 } 00222 00223 template <class TDat> 00224 int TGHash<TDat>::IsGetKeyId(const PNGraph& Graph, TGraphKey& GKey) const { 00225 if (HashOnlyTrees) { 00226 int RootNId; IAssert(TSnap::IsTree(Graph, RootNId)); 00227 TIntV TreeSig; TSnap::GetTreeSig(Graph, RootNId, TreeSig); 00228 GKey = TGraphKey(TreeSig); 00229 const int KeyId = GraphH.GetKeyId(GKey); 00230 //IAssert(KeyId != -1); 00231 return KeyId; 00232 } else { 00233 GKey.TakeSig(Graph, MxIsoCheck+1, MxSvdGraph); 00234 const int Nodes = GKey.GetNodes(); 00235 if (Nodes > 2 && Nodes <= MxIsoCheck) { 00236 GKey.TakeGraph(Graph); 00237 for (int variant = 1; ; variant++) { 00238 GKey.SetVariant(variant); 00239 int KeyId = GraphH.GetKeyId(GKey); 00240 //IAssert(KeyId != -1); 00241 if (TGraphKey::IsIsomorph(GKey, GraphH.GetKey(KeyId), GSzToPermH.GetDat(Nodes))) { return KeyId; } 00242 } 00243 return -1; 00244 } else { 00245 const int KeyId = GraphH.GetKeyId(GKey); 00246 //IAssert(KeyId != -1); 00247 return KeyId; 00248 } 00249 } 00250 Fail; 00251 return -1; 00252 } 00253 00254 template <class TDat> 00255 bool TGHash<TDat>::GetNodeMap(const PNGraph& Graph, TIntPrV& NodeMapV) const { 00256 int KeyId; 00257 return GetNodeMap(Graph, NodeMapV, KeyId); 00258 } 00259 00260 template <class TDat> 00261 bool TGHash<TDat>::GetNodeMap(const PNGraph& Graph, TIntPrV& NodeMapV, int& KeyId) const { 00262 NodeMapV.Clr(false); 00263 if (HashOnlyTrees) { 00264 int RootNId; IAssert(TSnap::IsTree(Graph, RootNId)); 00265 TIntV TreeSig; TSnap::GetTreeSig(Graph, RootNId, TreeSig, NodeMapV); 00266 TGraphKey GKey(TreeSig); 00267 KeyId = GraphH.GetKeyId(GKey); 00268 return KeyId != -1; 00269 } else { 00270 const int Nodes = Graph->GetNodes(); 00271 int IsoPermId = -1; 00272 NodeMapV.Clr(false); 00273 if (Nodes == 0) { return true; } 00274 else if (Nodes == 1) { 00275 NodeMapV.Add(TIntPr(Graph->BegNI().GetId(), 0)); return true; } 00276 else if (Nodes <= MxIsoCheck) { 00277 TGraphKey GKey; 00278 GKey.TakeSig(Graph, MxIsoCheck+1, MxSvdGraph); 00279 GKey.TakeGraph(Graph, NodeMapV); 00280 for (int variant = 1; ; variant++) { 00281 GKey.SetVariant(variant); 00282 KeyId = GraphH.GetKeyId(GKey); 00283 if (KeyId == -1) return false; 00284 if (TGraphKey::IsIsomorph(GKey, GraphH.GetKey(KeyId), GSzToPermH.GetDat(Nodes), IsoPermId)) { 00285 const TIntV& K1K2Perm = GSzToPermH.GetDat(Nodes)[IsoPermId]; 00286 // map from graph to key1 to key2 00287 for (int i = 0; i < NodeMapV.Len(); i++) { 00288 NodeMapV[i].Val2 = K1K2Perm[NodeMapV[i].Val2]; 00289 } 00290 return true; 00291 } 00292 } 00293 return false; 00294 } else { 00295 return false; // graph too big to find the mapping 00296 } 00297 } 00298 Fail; return false; 00299 } 00300 00301 template <class TDat> 00302 void TGHash<TDat>::GetKeyIdByDat(TIntV& KeyIdV, const bool& Asc) const { 00303 TVec<TQuad<TDat, TInt,TInt, TInt> > DatKeyIdV(Len(), 0); // <TDat,Nodes,Edges,KeyId> 00304 for (int i = FFirstKeyId(); FNextKeyId(i); ) { 00305 DatKeyIdV.Add(TQuad<TDat, TInt,TInt, TInt>(GetDatId(i), GetKey(i).GetNodes(), GetKey(i).GetEdges(), i)); 00306 } 00307 DatKeyIdV.Sort(Asc); 00308 KeyIdV.Gen(Len(), 0); 00309 for (int i = 0; i < Len(); i++) { 00310 KeyIdV.Add(DatKeyIdV[i].Val4); 00311 } 00312 } 00313 00314 template <class TDat> 00315 void TGHash<TDat>::GetKeyIdByGSz(TIntV& KeyIdV, const bool& Asc) const { 00316 TVec<TQuad<TInt,TInt, TDat, TInt> > DatKeyIdV(Len(), 0); // <Nodes,Edges,TDat,KeyId> 00317 for (int i = FFirstKeyId(); FNextKeyId(i); ) { 00318 DatKeyIdV.Add(TQuad< TInt,TInt, TDat, TInt>(GetKey(i).GetNodes(), GetKey(i).GetEdges(), GetDatId(i), i)); 00319 } 00320 DatKeyIdV.Sort(Asc); 00321 KeyIdV.Gen(Len(), 0); 00322 for (int i = 0; i < Len(); i++) { 00323 KeyIdV.Add(DatKeyIdV[i].Val4); 00324 } 00325 } 00326 00327 template <class TDat> 00328 void TGHash<TDat>::DrawGViz(const int& KeyId, const TStr& OutFNmPref, const TStr& OutputType, TStr Desc) const { 00329 IAssert(OutputType == "ps" || OutputType == "gif" || OutputType == "png"); 00330 const TGraphKey& GKey = GetKey(KeyId); 00331 const TStr Desc1 = TStr::Fmt("%s (%d, %d)", Desc.CStr(), GKey.GetNodes(), GKey.GetEdges()); 00332 GKey.SaveGViz(OutFNmPref+".dot", Desc1); 00333 TSnap::TSnapDetail::GVizDoLayout(OutFNmPref+".dot", OutFNmPref+"."+OutputType, gvlDot); 00334 } 00335 00336 template <class TDat> 00337 void TGHash<TDat>::DrawGViz(const TIntV& KeyIdV, const TStr& OutFNmPref, const TStr& OutputType) const { 00338 IAssert(OutputType == "ps" || OutputType == "gif" || OutputType == "png"); 00339 TExeTm ExeTm; 00340 printf("Plotting %d graphs\n", KeyIdV.Len()); 00341 for (int i = 0; i < KeyIdV.Len(); i++) { 00342 const TStr FNm = TStr::Fmt("%s.%03d.key%d.", OutFNmPref.CStr(), i+1, KeyIdV[i]()); 00343 const TStr Desc = TStr::Fmt("KeyId:%d", KeyIdV[i]()); 00344 const TGraphKey& GKey = GetKey(KeyIdV[i]); 00345 printf("\r %d g(%d, %d) ", i, GKey.GetNodes(), GKey.GetEdges()); 00346 GKey.SaveGViz(FNm+"dot", Desc); 00347 TSnap::TSnapDetail::GVizDoLayout(FNm+"dot", FNm+OutputType, gvlDot); 00348 } 00349 printf("done [%s].\n", ExeTm.GetTmStr()); 00350 } 00351 00352 template <class TDat> 00353 void TGHash<TDat>::SaveTxt(const TStr& OutFNm, const TStr& Desc, const TStr& DatColNm, const bool& SortByKeyVal) const { 00354 TIntV KeyIdV; 00355 if (SortByKeyVal) GetKeyIdByDat(KeyIdV, false); 00356 else GetKeyIdByGSz(KeyIdV, true); 00357 FILE *F = fopen(OutFNm.CStr(), "wt"); 00358 fprintf(F, "Graph-Hash-Table"); 00359 fprintf(F, "%s\n", Desc.CStr()); 00360 fprintf(F, "%d graphs\n", KeyIdV.Len()); 00361 fprintf(F, "Rank\tKeyId\tNodes\tEdges\t%s\n", DatColNm.CStr()); 00362 for (int i = 0; i < KeyIdV.Len(); i++) { 00363 const TGraphKey& Key = GetKey(KeyIdV[i]); 00364 fprintf(F, "%d\t%d\t%d\t%d\t%s\n", i+1, KeyIdV[i](), Key.GetNodes(), Key.GetEdges(), 00365 GetDatId(KeyIdV[i]).GetStr().CStr()); 00366 } 00367 fclose(F); 00368 } 00369 00370 template <class TDat> 00371 void TGHash<TDat>::SaveDetailTxt(const TStr& OutFNm, const TStr& Desc, const TStr& DatColNm) const { 00372 TIntV KeyIdV; GetKeyIdByDat(KeyIdV, false); 00373 FILE *F = fopen(OutFNm.CStr(), "wt"); 00374 fprintf(F, "Graph-Hash-Table\n"); 00375 fprintf(F, "%s\n", Desc.CStr()); 00376 fprintf(F, "%d graphs", KeyIdV.Len()); 00377 for (int i = 0; i < KeyIdV.Len(); i++) { 00378 fprintf(F, "\n\n[%5d]\tRank: %d\n", KeyIdV[i](), i+1); 00379 fprintf(F, "Dat: %s\n", GetDat(KeyIdV[i]).GetStr().CStr()); 00380 GetDatId(KeyIdV[i]).SaveTxt(F); 00381 } 00382 fclose(F); 00383 } 00384 00387 class TSimpleGraph { 00388 private: 00389 TIntPrV EdgeV; 00390 public: 00391 TSimpleGraph() { } 00392 TSimpleGraph(const TIntPrV& GEdgeV) : EdgeV(GEdgeV) { } 00393 bool operator == (const TSimpleGraph& Graph) const { return EdgeV == Graph.EdgeV; } 00394 bool operator < (const TSimpleGraph& Graph) const { return EdgeV < Graph.EdgeV; } 00395 00396 int GetEdges() const { return EdgeV.Len(); } 00397 void AddEdge(const int& SrcNId, const int& DstNId) { EdgeV.Add(TIntPr(SrcNId, DstNId)); } 00398 bool Join(const TSimpleGraph& G1, const TSimpleGraph& G2); 00399 TIntPrV& GetEdgeV() { return EdgeV; } 00400 TIntPrV& operator () () { return EdgeV; } 00401 00402 void Dump(const TStr& Desc = TStr()) const; 00403 }; 00404 typedef TVec<TSimpleGraph> TSimpleGraphV; 00405 00408 class TSubGraphsEnum { 00409 private: 00410 TSimpleGraphV SgV, NextSgV; 00411 THash<TIntPr, TIntH> EdgeH; 00412 PNGraph NGraph; 00413 public: 00414 TSubGraphsEnum(PNGraph Graph) : NGraph(Graph) { } 00415 00416 void Gen2Graphs(); 00417 void EnumSubGraphs(const int& MaxEdges); 00418 void RecurBfs(const int& MxDepth); 00419 void RecurBfs(const int& NId, const int& Depth, TSimpleGraph& PrevG); 00420 void RecurBfs1(const int& MxDepth); 00421 void RecurBfs1(const int& NId, const int& Depth); 00422 //void RecurBfs(const int& NId, const int& Depth, const THash<TIntPr, TInt>& EdgeH); 00423 }; 00424 00425