SNAP Library, Developer Reference
2012-10-02 12:56:23
SNAP, a general purpose network analysis and graph mining library
|
00001 00002 // Graph Hash Table Key 00003 const int TGraphKey::RoundTo = 4; // round to 4 decimal places 00004 00005 TGraphKey::TGraphKey(const TSFltV& GraphSigV) : Nodes(-1), EdgeV(), SigV(), VariantId(0) { 00006 SigV.Gen(GraphSigV.Len()); 00007 for (int i = 0; i < GraphSigV.Len(); i++) { 00008 SigV[i] = TFlt(TMath::Round(GraphSigV[i], RoundTo)); 00009 } 00010 } 00011 00012 TGraphKey::TGraphKey(const TIntV& GraphSigV) : Nodes(-1), EdgeV(), SigV(), VariantId(0) { 00013 SigV.Gen(GraphSigV.Len()); 00014 for (int i = 0; i < GraphSigV.Len(); i++) { 00015 SigV[i] = TFlt(GraphSigV[i]()); 00016 } 00017 } 00018 00019 TGraphKey::TGraphKey(const TFltV& GraphSigV) : Nodes(-1), EdgeV(), SigV(), VariantId(0) { 00020 SigV.Gen(GraphSigV.Len()); 00021 for (int i = 0; i < GraphSigV.Len(); i++) { 00022 SigV[i] = TFlt(TMath::Round(GraphSigV[i], RoundTo)); 00023 } 00024 } 00025 00026 TGraphKey::TGraphKey(const TGraphKey& GraphKey) : Nodes(GraphKey.Nodes), 00027 EdgeV(GraphKey.EdgeV), SigV(GraphKey.SigV), VariantId(GraphKey.VariantId) { 00028 } 00029 00030 TGraphKey::TGraphKey(TSIn& SIn) : Nodes(SIn), EdgeV(SIn), SigV(SIn), VariantId(SIn) { } 00031 00032 void TGraphKey::Save(TSOut& SOut) const { 00033 Nodes.Save(SOut); EdgeV.Save(SOut); 00034 SigV.Save(SOut); VariantId.Save(SOut); 00035 } 00036 00037 TGraphKey& TGraphKey::operator = (const TGraphKey& GraphKey) { 00038 if (this != &GraphKey) { 00039 Nodes = GraphKey.Nodes; 00040 EdgeV = GraphKey.EdgeV; 00041 SigV = GraphKey.SigV; 00042 VariantId = GraphKey.VariantId; 00043 } 00044 return *this; 00045 } 00046 00047 PNGraph TGraphKey::GetNGraph() const { 00048 PNGraph G = TNGraph::New(); 00049 for (int i = 0; i < GetNodes(); i++) G->AddNode(i); 00050 for (int e = 0; e < GetEdges(); e++) { 00051 G->AddEdge(EdgeV[e].Val1, EdgeV[e].Val2); 00052 } 00053 G->Defrag(); 00054 return G; 00055 } 00056 00057 // renumbers nodes 00058 void TGraphKey::TakeGraph(const PNGraph& Graph) { 00059 TIntH NodeIdH; 00060 for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00061 NodeIdH.AddKey(NI.GetId()); } 00062 Nodes = Graph->GetNodes(); 00063 EdgeV.Gen(Nodes, 0); 00064 for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00065 const int NewNId = NodeIdH.GetKeyId(NI.GetId()); 00066 for (int i = 0; i < NI.GetOutDeg(); i++) { 00067 EdgeV.Add(TIntPr(NewNId, NodeIdH.GetKeyId(NI.GetOutNId(i)))); 00068 } 00069 } 00070 EdgeV.Sort(true); 00071 EdgeV.Pack(); 00072 } 00073 00074 void TGraphKey::TakeGraph(const PNGraph& Graph, TIntPrV& NodeMap) { 00075 TIntSet NodeIdH; 00076 int n = 0; 00077 NodeMap.Gen(Graph->GetNodes(), 0); 00078 for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, n++) { 00079 NodeIdH.AddKey(NI.GetId()); 00080 NodeMap.Add(TIntPr(NI.GetId(), n)); 00081 } 00082 Nodes = Graph->GetNodes(); 00083 EdgeV.Gen(Nodes, 0); 00084 for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00085 const int NewNId = NodeIdH.GetKeyId(NI.GetId()); 00086 for (int i = 0; i < NI.GetOutDeg(); i++) { 00087 EdgeV.Add(TIntPr(NewNId, NodeIdH.GetKeyId(NI.GetOutNId(i)))); 00088 } 00089 } 00090 EdgeV.Sort(true); 00091 EdgeV.Pack(); 00092 } 00093 00094 void TGraphKey::TakeSig(const PNGraph& Graph, const int& MnSvdGraph, const int& MxSvdGraph) { 00095 const int Edges = Graph->GetEdges(); 00096 Nodes = Graph->GetNodes(); 00097 VariantId = 0; 00098 SigV.Gen(2+Nodes, 0); 00099 // degree sequence 00100 TIntPrV DegV(Nodes, 0); 00101 for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { 00102 DegV.Add(TIntPr(NodeI.GetInDeg(), NodeI.GetOutDeg())); 00103 } 00104 DegV.Sort(false); 00105 SigV.Add(TFlt(Nodes)); 00106 SigV.Add(TFlt(Edges)); 00107 for (int i = 0; i < DegV.Len(); i++) { 00108 SigV.Add(DegV[i].Val1()); 00109 SigV.Add(DegV[i].Val2()); 00110 } 00111 // singular values signature 00112 // it turns out that it is cheaper to do brute force isomorphism 00113 // checking than to calculate SVD and then check isomorphism 00114 if (Nodes >= MnSvdGraph && Nodes < MxSvdGraph) { 00115 // perform full SVD 00116 TFltVV AdjMtx(Nodes+1, Nodes+1); 00117 TFltV SngValV; 00118 TFltVV LSingV, RSingV; 00119 TIntH NodeIdH; 00120 // create adjecency matrix 00121 for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { 00122 NodeIdH.AddKey(NodeI.GetId()); 00123 } 00124 for (TNGraph::TNodeI NodeI = Graph->BegNI(); NodeI < Graph->EndNI(); NodeI++) { 00125 const int NodeId = NodeIdH.GetKeyId(NodeI.GetId()) + 1; 00126 for (int e = 0; e < NodeI.GetOutDeg(); e++) { 00127 const int DstNId = NodeIdH.GetKeyId(NodeI.GetOutNId(e)) + 1; // no self edges 00128 if (NodeId != DstNId) AdjMtx.At(NodeId, DstNId) = 1; 00129 } 00130 } 00131 try { // can fail to converge but results seem to be good 00132 TSvd::Svd(AdjMtx, LSingV, SngValV, RSingV); 00133 } catch(...) { 00134 printf("\n***No SVD convergence: G(%d, %d): SngValV.Len():%d\n", Nodes(), Graph->GetEdges(), SngValV.Len()); 00135 } 00136 // round singular values 00137 SngValV.Sort(false); 00138 for (int i = 0; i < SngValV.Len(); i++) { 00139 SigV.Add(TMath::Round(SngValV[i], RoundTo)); 00140 } 00141 } 00142 //printf("SIG:\n"); for (int i = 0; i < SigV.Len(); i++) { printf("\t%f\n", SigV[i]); } 00143 SigV.Pack(); 00144 } 00145 00146 void TGraphKey::SaveTxt(FILE *F) const { 00147 fprintf(F, "nodes: %d. edges: %d\n", GetNodes(), GetEdges()); 00148 for (int i = 0; i < EdgeV.Len(); i++) { 00149 fprintf(F," %d\t%d\n", EdgeV[i].Val1(), EdgeV[i].Val2()); 00150 } 00151 } 00152 00153 void TGraphKey::SaveGViz(const TStr& OutFNm, const TStr& Desc, const TStr& NodeAttrs, const int& Size) const { 00154 FILE *F = fopen(OutFNm.CStr(), "wt"); 00155 fprintf(F, "/*****\n"); 00156 fprintf(F, " Graph (%d, %d)\n", GetNodes(), GetEdges()); 00157 //if (! Desc.Empty()) fprintf(F, " %s\n", Desc.CStr()); 00158 fprintf(F, "*****/\n\n"); 00159 fprintf(F, "digraph G {\n"); 00160 if (Size != -1) fprintf(F, " size=\"%d,%d\";\n", Size, Size); 00161 fprintf(F, " graph [splines=true overlap=false]\n"); //size=\"12,10\" ratio=fill 00162 if (NodeAttrs.Empty()) fprintf(F, " node [shape=ellipse, width=0.3, height=0.3]\n"); 00163 else fprintf(F, " node [shape=ellipse, %s]\n", NodeAttrs.CStr()); 00164 if (! EdgeV.Empty()) { 00165 for (int e = 0; e < EdgeV.Len(); e++) { 00166 fprintf(F, " %d -> %d;\n", EdgeV[e].Val1(), EdgeV[e].Val2()); } 00167 } else { 00168 for (int n = 0; n < Nodes; n++) { fprintf(F, " %d;\n", n); } 00169 } 00170 if (! Desc.Empty()) { 00171 fprintf(F, " label = \"\\n%s\\n\";", Desc.CStr()); 00172 fprintf(F, " fontsize=24;\n"); 00173 } 00174 fprintf(F, "}\n"); 00175 fclose(F); 00176 } 00177 00178 // width=0.3, height=0.3, label=\"\", style=filled, color=black 00179 void TGraphKey::DrawGViz(const TStr& OutFNm, const TStr& Desc, const TStr& NodeAttrs, const int& Size) const { 00180 const TStr DotFNm = OutFNm.GetFMid()+".dot"; 00181 SaveGViz(DotFNm, Desc, NodeAttrs, Size); 00182 TSnap::TSnapDetail::GVizDoLayout(DotFNm, OutFNm, gvlDot); 00183 } 00184 00185 bool TGraphKey::IsIsomorph(const TGraphKey& Key1, const TGraphKey& Key2, const TIntV& NodeIdMap) { 00186 const TIntPrV& EdgeV1 = Key1.EdgeV; 00187 const TIntPrV& EdgeV2 = Key2.EdgeV; 00188 if (Key1.Nodes != Key2.Nodes || EdgeV1.Len() != EdgeV2.Len()) return false; 00189 for (int e1 = 0; e1 < EdgeV1.Len(); e1++) { 00190 const TIntPr Edge2(NodeIdMap[EdgeV1[e1].Val1], NodeIdMap[EdgeV1[e1].Val2]); 00191 if (EdgeV2.SearchBin(Edge2) == -1) return false; 00192 } 00193 return true; 00194 } 00195 00196 bool TGraphKey::IsIsomorph(const TGraphKey& Key1, const TGraphKey& Key2, const TVec<TIntV>& NodeIdMapV) { 00197 int IsoPermId; 00198 return IsIsomorph(Key1, Key2, NodeIdMapV, IsoPermId); 00199 } 00200 00201 bool TGraphKey::IsIsomorph(const TGraphKey& Key1, const TGraphKey& Key2, const TVec<TIntV>& NodeIdMapV, int& IsoPermId) { 00202 const TIntPrV& EdgeV1 = Key1.EdgeV; 00203 const TIntPrV& EdgeV2 = Key2.EdgeV; 00204 //for (int i = 0; i < EdgeV1.Len(); i++) printf("\t%d - %d\n", EdgeV1[i].Val1, EdgeV1[i].Val2); printf("\n"); 00205 //for (int i = 0; i < EdgeV2.Len(); i++) printf("\t%d - %d\n", EdgeV2[i].Val1, EdgeV2[i].Val2); 00206 if (Key1.Nodes != Key2.Nodes || EdgeV1.Len() != EdgeV2.Len()) return false; 00207 const int Nodes = NodeIdMapV[0].Len(); 00208 // fast adjecency matrix 00209 TIntV AdjMtx2(Nodes*Nodes); 00210 for (int i = 0; i < EdgeV2.Len(); i++) { 00211 AdjMtx2[EdgeV2[i].Val1*Nodes + EdgeV2[i].Val2] = 1; 00212 } 00213 for (int perm = 0; perm < NodeIdMapV.Len(); perm++) { 00214 const TIntV& NodeIdMap = NodeIdMapV[perm]; 00215 bool IsIso = true; 00216 for (int e1 = 0; e1 < EdgeV1.Len(); e1++) { 00217 const int NId1 = NodeIdMap[EdgeV1[e1].Val1]; 00218 const int NId2 = NodeIdMap[EdgeV1[e1].Val2]; 00219 if (AdjMtx2[NId1*Nodes + NId2] != 1) { 00220 IsIso = false; break; } 00221 } 00222 if (IsIso) { 00223 IsoPermId = perm; 00224 return true; } 00225 } 00226 IsoPermId = -1; 00227 return false; 00228 } 00229 00231 // Simple Edge Graph 00232 bool TSimpleGraph::Join(const TSimpleGraph& G1, const TSimpleGraph& G2) { 00233 const int Edges1 = G1.GetEdges(); 00234 const int Edges2 = G2.GetEdges(); 00235 const TIntPrV& EdgeV1 = G1.EdgeV; 00236 const TIntPrV& EdgeV2 = G2.EdgeV; 00237 const int MxEdges = Edges1+1; 00238 if (GetEdges() != MxEdges) EdgeV.Gen(MxEdges); 00239 IAssert(Edges1 == Edges2); 00240 00241 int e=0, g1=0, g2=0; 00242 while (g1 < Edges1 && g2 < Edges2) { 00243 if (e == MxEdges) return false; 00244 if (abs(g1 - g2) > 1) return false; 00245 if (EdgeV1[g1] == EdgeV2[g2]) { e++; g1++; g2++; } 00246 else if (EdgeV1[g1] < EdgeV2[g2]) { e++; g1++; } 00247 else { e++; g2++; } 00248 } 00249 00250 e=0; g1=0; g2=0; 00251 while (g1 < Edges1 && g2 < Edges2) { 00252 if (EdgeV1[g1] == EdgeV2[g2]) { 00253 EdgeV[e] = EdgeV1[g1]; e++; g1++; g2++; } 00254 else if (EdgeV1[g1] < EdgeV2[g2]) { 00255 EdgeV[e] = EdgeV1[g1]; e++; g1++; } 00256 else { 00257 EdgeV[e] = EdgeV2[g2]; e++; g2++; } 00258 } 00259 if (g1 == Edges1 && g2 == Edges2 && e == MxEdges) return true; 00260 if (e+1 == MxEdges) { 00261 if (g1+1 == Edges1 && g2 == Edges2) { 00262 EdgeV[e] = EdgeV1.Last(); 00263 return true; 00264 } 00265 if (g1 == Edges1 && g2+1 == Edges2) { 00266 EdgeV[e] = EdgeV2.Last(); 00267 return true; 00268 } 00269 } 00270 return false; 00271 } 00272 00273 void TSimpleGraph::Dump(const TStr& Desc) const { 00274 if (! Desc.Empty()) printf("%s. Edges: %d\n", Desc.CStr(), EdgeV.Len()); 00275 else printf("Edges: %d\n", EdgeV.Len()); 00276 for (int i = 0; i < EdgeV.Len(); i++) { 00277 printf("\t%d\t%d\n", EdgeV[i].Val1(), EdgeV[i].Val2()); 00278 } 00279 } 00280 00281 void TSubGraphsEnum::Gen2Graphs() { 00282 // singe edge sub-graphs 00283 SgV.Gen(NGraph->GetEdges(), 0); 00284 TSimpleGraph SimpleG; 00285 TIntPrV& EdgeV = SimpleG.GetEdgeV(); 00286 EdgeV.Gen(1); 00287 for (TNGraph::TNodeI NI = NGraph->BegNI(); NI < NGraph->EndNI(); NI++) { 00288 for (int e = 0; e < NI.GetOutDeg(); e++) { 00289 EdgeV[0] = TIntPr(NI.GetId(), NI.GetOutNId(e)); 00290 SgV.Add(SimpleG); 00291 } 00292 } 00293 SgV.Sort(); 00294 // two edge sub-graphs 00295 EdgeV.Gen(2); 00296 for (int g1 = 0; g1 < SgV.Len()-1; g1++) { 00297 const TIntPr& E1 = SgV[g1].GetEdgeV()[0]; 00298 for (int g2 = g1+1; g2 < SgV.Len(); g2++) { 00299 const TIntPr& E2 = SgV[g2].GetEdgeV()[0]; 00300 if (E1.Val2 == E2.Val1 || E1.Val1 == E2.Val2 || E1.Val1 == E2.Val1 || E1.Val2 == E2.Val2) { 00301 EdgeV[0] = TMath::Mn(E1, E2); 00302 EdgeV[1] = TMath::Mx(E1, E2); 00303 SimpleG.Dump(); 00304 NextSgV.Add(SimpleG); 00305 } 00306 } 00307 } 00308 SgV.MoveFrom(NextSgV); 00309 } 00310 00311 void TSubGraphsEnum::EnumSubGraphs(const int& MaxEdges) { 00312 TExeTm ExeTm; 00313 Gen2Graphs(); 00314 printf(" %2d edge graphs: %d\t[%s]\n", 2, SgV.Len(), ExeTm.GetTmStr()); ExeTm.Tick(); 00315 //for (int i = 0; i < SgV.Len(); i++) { SgV[i].Dump(TStr::Fmt(" %d", i+1)); } 00316 //printf("**************************************************************\n"); 00317 TSimpleGraph SimpleG; 00318 TIntPrV& EdgeV = SimpleG.GetEdgeV(); 00319 // multiple edge sub-graphs 00320 for (int edges = 3; edges <= MaxEdges; edges++) { 00321 EdgeV.Clr(); 00322 printf(" %2d edge graphs:", edges); 00323 for (int g1 = 0; g1 < SgV.Len()-1; g1++) { 00324 for (int g2 = g1+1; g2 < SgV.Len(); g2++) { 00325 if (SimpleG.Join(SgV[g1], SgV[g2])) { NextSgV.Add(SimpleG); } 00326 } 00327 } 00328 printf(" candidates: %8d [%s]", NextSgV.Len(), ExeTm.GetTmStr()); ExeTm.Tick(); 00329 NextSgV.Sort(); 00330 SgV.Gen(NextSgV.Len(), 0); 00331 SgV.Add(NextSgV[0]); 00332 for (int i = 1; i < NextSgV.Len(); i++) { 00333 if (SgV.Last() != NextSgV[i]) { 00334 SgV.Add(NextSgV[i]); 00335 } 00336 } 00337 NextSgV.Clr(false); 00338 printf(" total: %8d [%s]\n", SgV.Len(), ExeTm.GetTmStr()); ExeTm.Tick(); 00339 //for (int i = 0; i < SgV.Len(); i++) { SgV[i].Dump(TStr::Fmt(" %d", i+1)); } 00340 //printf("**************************************************************\n"); 00341 } 00342 } 00343 00344 void TSubGraphsEnum::RecurBfs(const int& MxDepth) { 00345 TExeTm ExeTm; 00346 SgV.Clr(true); 00347 for (TNGraph::TNodeI NI = NGraph->BegNI(); NI < NGraph->EndNI(); NI++) { 00348 TSimpleGraph SimpleG; 00349 RecurBfs(NI.GetId(), MxDepth, SimpleG); 00350 //NGraph->DelNode(NI.GetId()); 00351 printf("."); 00352 } 00353 printf("\ncandidates: %d\n", SgV.Len()); 00354 SgV.Sort(); 00355 int Cnt = 1; 00356 for (int i = 1; i < SgV.Len(); i++) { 00357 if (SgV[i-1] != SgV[i]) Cnt++; 00358 } 00359 printf("distinct: %d\t[%s]\n", Cnt, ExeTm.GetTmStr()); 00360 } 00361 00362 void TSubGraphsEnum::RecurBfs(const int& NId, const int& Depth, TSimpleGraph& PrevG) { 00363 if (Depth == 0) { 00364 TIntPrV& EdgeV = PrevG(); 00365 EdgeV.Sort(); 00366 for (int i = 1; i < EdgeV.Len(); i++) { 00367 if (EdgeV[i-1] == EdgeV[i]) { return; } 00368 } 00369 SgV.Add(PrevG); 00370 return; 00371 } 00372 const TNGraph::TNodeI NI = NGraph ->GetNI(NId); 00373 for (int e = 0; e < NI.GetOutDeg(); e++) { 00374 TSimpleGraph CurG = PrevG; 00375 CurG.AddEdge(NI.GetId(), NI.GetOutNId(e)); 00376 RecurBfs(NI.GetOutNId(e), Depth-1, CurG); 00377 } 00378 for (int e = 0; e < NI.GetInDeg(); e++) { 00379 TSimpleGraph CurG = PrevG; 00380 CurG.AddEdge(NI.GetInNId(e), NI.GetId()); 00381 RecurBfs(NI.GetInNId(e), Depth-1, CurG); 00382 } 00383 } 00384 00385 void TSubGraphsEnum::RecurBfs1(const int& MxDepth) { 00386 TExeTm ExeTm; 00387 SgV.Clr(true); 00388 for (TNGraph::TNodeI NI = NGraph->BegNI(); NI < NGraph->EndNI(); NI++) { 00389 TSimpleGraph SimpleG; 00390 RecurBfs1(NI.GetId(), MxDepth); 00391 //NGraph->DelNode(NI.GetId()); 00392 printf("."); 00393 } 00394 printf("candidates: %d\n", SgV.Len()); 00395 SgV.Sort(); 00396 int Cnt = 1; 00397 for (int i = 1; i < SgV.Len(); i++) { 00398 if (SgV[i-1]!=SgV[i]) { 00399 //SgV[i].Dump(); 00400 Cnt++; 00401 } 00402 } 00403 printf("distinct: %d\t[%s]\n", Cnt, ExeTm.GetTmStr()); 00404 } 00405 00406 void TSubGraphsEnum::RecurBfs1(const int& NId, const int& Depth) { 00407 if (Depth == 0) { 00408 TIntPrV EdgeV; 00409 EdgeH.GetKeyV(EdgeV); 00410 EdgeV.Sort(); 00411 SgV.Add(EdgeV); 00412 return; 00413 } 00414 const TNGraph::TNodeI NI = NGraph ->GetNI(NId); 00415 for (int e = 0; e < NI.GetOutDeg(); e++) { 00416 const TIntPr Edge(NId, NI.GetOutNId(e)); 00417 if (! EdgeH.IsKey(Edge)) { 00418 EdgeH.AddKey(Edge); 00419 RecurBfs1(NI.GetOutNId(e), Depth-1); 00420 EdgeH.DelKey(Edge); 00421 } 00422 } 00423 for (int e = 0; e < NI.GetInDeg(); e++) { 00424 const TIntPr Edge(NI.GetInNId(e), NId); 00425 if (! EdgeH.IsKey(Edge)) { 00426 EdgeH.AddKey(Edge); 00427 RecurBfs1(NI.GetInNId(e), Depth-1); 00428 EdgeH.DelKey(Edge); 00429 } 00430 } 00431 }