SNAP Library, Developer Reference
2012-10-02 12:56:23
SNAP, a general purpose network analysis and graph mining library
|
00001 // TODO ROK, Jure included basic documentation, finalize reference doc 00002 00004 // Loading and saving graphs from/to various file formats. 00005 namespace TSnap { 00006 00008 template <class PGraph> PGraph LoadEdgeList(const TStr& InFNm, const int& SrcColId=0, const int& DstColId=1); 00010 template <class PGraph> PGraph LoadEdgeList(const TStr& InFNm, const int& SrcColId, const int& DstColId, const char& Separator); 00012 template <class PGraph> PGraph LoadEdgeListStr(const TStr& InFNm, const int& SrcColId=0, const int& DstColId=1); 00014 template <class PGraph> PGraph LoadEdgeListStr(const TStr& InFNm, const int& SrcColId, const int& DstColId, TStrHash<TInt>& StrToNIdH); 00016 template <class PGraph> PGraph LoadConnList(const TStr& InFNm); 00018 template <class PGraph> PGraph LoadConnListStr(const TStr& InFNm, TStrHash<TInt>& StrToNIdH); 00019 00023 template <class PGraph> PGraph LoadPajek(const TStr& InFNm); 00025 PNGraph LoadDyNet(const TStr& FNm); 00027 TVec<PNGraph> LoadDyNetGraphV(const TStr& FNm); 00028 00029 //TODO: Sparse/Dense adjacency matrix which values we threshold at Thresh to obtain an adjacency matrix. 00030 //template <class PGraph> PGraph LoadAdjMtx(const TStr& FNm, const int Thresh); 00031 //TODO: Load from a GML file format (http://en.wikipedia.org/wiki/Graph_Modelling_Language) 00032 //template <class PGraph> PGraph LoadGml(const TStr& FNm, const int Thresh); 00033 00034 00036 template <class PGraph> void SaveEdgeList(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc=TStr()); 00038 template <class PGraph> void SavePajek(const PGraph& Graph, const TStr& OutFNm); 00040 template <class PGraph> void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH); 00042 template <class PGraph> void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH, const TIntStrH& NIdLabelH); 00044 template <class PGraph> void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH, const TIntStrH& NIdLabelH, const TIntStrH& EIdColorH); 00046 template <class PGraph> void SaveMatlabSparseMtx(const PGraph& Graph, const TStr& OutFNm); 00049 template<class PGraph> void SaveGViz(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc=TStr(), const bool& NodeLabels=false, const TIntStrH& NIdColorH=TIntStrH()); 00052 template<class PGraph> void SaveGViz(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc, const TIntStrH& NIdLabelH); 00053 00054 //TODO: Save to a GML file format (http://en.wikipedia.org/wiki/Graph_Modelling_Language) 00055 //template <class PGraph> SaveGml(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc); 00056 00058 // Implementation 00059 00064 template <class PGraph> 00065 PGraph LoadEdgeList(const TStr& InFNm, const int& SrcColId, const int& DstColId) { 00066 TSsParser Ss(InFNm, ssfWhiteSep, true, true, true); 00067 PGraph Graph = PGraph::TObj::New(); 00068 int SrcNId, DstNId; 00069 while (Ss.Next()) { 00070 if (! Ss.GetInt(SrcColId, SrcNId) || ! Ss.GetInt(DstColId, DstNId)) { continue; } 00071 if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); } 00072 if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); } 00073 Graph->AddEdge(SrcNId, DstNId); 00074 } 00075 Graph->Defrag(); 00076 return Graph; 00077 } 00078 00083 template <class PGraph> 00084 PGraph LoadEdgeList(const TStr& InFNm, const int& SrcColId, const int& DstColId, const char& Separator) { 00085 TSsParser Ss(InFNm, Separator); 00086 PGraph Graph = PGraph::TObj::New(); 00087 int SrcNId, DstNId; 00088 while (Ss.Next()) { 00089 if (! Ss.GetInt(SrcColId, SrcNId) || ! Ss.GetInt(DstColId, DstNId)) { continue; } 00090 if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); } 00091 if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); } 00092 Graph->AddEdge(SrcNId, DstNId); 00093 } 00094 Graph->Defrag(); 00095 return Graph; 00096 } 00097 00102 template <class PGraph> 00103 PGraph LoadEdgeListStr(const TStr& InFNm, const int& SrcColId, const int& DstColId) { 00104 TSsParser Ss(InFNm, ssfWhiteSep); 00105 PGraph Graph = PGraph::TObj::New(); 00106 TStrHash<TInt> StrToNIdH(Mega(1), true); // hash-table mapping strings to integer node ids 00107 while (Ss.Next()) { 00108 const int SrcNId = StrToNIdH.AddKey(Ss[SrcColId]); 00109 const int DstNId = StrToNIdH.AddKey(Ss[DstColId]); 00110 if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); } 00111 if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); } 00112 Graph->AddEdge(SrcNId, DstNId); 00113 } 00114 Graph->Defrag(); 00115 return Graph; 00116 } 00117 00123 template <class PGraph> 00124 PGraph LoadEdgeListStr(const TStr& InFNm, const int& SrcColId, const int& DstColId, TStrHash<TInt>& StrToNIdH) { 00125 TSsParser Ss(InFNm, ssfWhiteSep); 00126 PGraph Graph = PGraph::TObj::New(); 00127 while (Ss.Next()) { 00128 const int SrcNId = StrToNIdH.AddKey(Ss[SrcColId]); 00129 const int DstNId = StrToNIdH.AddKey(Ss[DstColId]); 00130 if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); } 00131 if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); } 00132 Graph->AddEdge(SrcNId, DstNId); 00133 } 00134 Graph->Defrag(); 00135 return Graph; 00136 } 00137 00141 template <class PGraph> 00142 PGraph LoadConnList(const TStr& InFNm) { 00143 TSsParser Ss(InFNm, ssfWhiteSep, true, true, true); 00144 PGraph Graph = PGraph::TObj::New(); 00145 while (Ss.Next()) { 00146 if (! Ss.IsInt(0)) { continue; } 00147 const int SrcNId = Ss.GetInt(0); 00148 if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); } 00149 for (int dst = 1; dst < Ss.Len(); dst++) { 00150 const int DstNId = Ss.GetInt(dst); 00151 if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); } 00152 Graph->AddEdge(SrcNId, DstNId); 00153 } 00154 } 00155 Graph->Defrag(); 00156 return Graph; 00157 } 00158 00163 template <class PGraph> 00164 PGraph LoadConnListStr(const TStr& InFNm, TStrHash<TInt>& StrToNIdH) { 00165 TSsParser Ss(InFNm, ssfWhiteSep, true, true, true); 00166 PGraph Graph = PGraph::TObj::New(); 00167 while (Ss.Next()) { 00168 const int SrcNId = StrToNIdH.AddDatId(Ss[0]); 00169 if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); } 00170 for (int dst = 1; dst < Ss.Len(); dst++) { 00171 const int DstNId = StrToNIdH.AddDatId(Ss[dst]); 00172 if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); } 00173 Graph->AddEdge(SrcNId, DstNId); 00174 } 00175 } 00176 Graph->Defrag(); 00177 return Graph; 00178 } 00179 00180 template <class PGraph> 00181 PGraph LoadPajek(const TStr& InFNm) { 00182 PGraph Graph = PGraph::TObj::New(); 00183 TSsParser Ss(InFNm, ssfSpaceSep, true, true, true); 00184 while ((Ss.Len()==0 || strstr(Ss[0], "*vertices") == NULL) && ! Ss.Eof()) { 00185 Ss.Next(); Ss.ToLc(); } 00186 // nodes 00187 bool EdgeList = true; 00188 EAssert(strstr(Ss[0], "*vertices") != NULL); 00189 while (Ss.Next()) { 00190 Ss.ToLc(); 00191 if (Ss.Len()>0 && Ss[0][0] == '%') { continue; } // comment 00192 if (strstr(Ss[0], "*arcslist")!=NULL || strstr(Ss[0],"*edgeslist")!=NULL) { EdgeList=false; break; } 00193 if (strstr(Ss[0], "*arcs")!=NULL || strstr(Ss[0],"*edges")!=NULL) { break; } // arcs are directed, edges are undirected 00194 Graph->AddNode(Ss.GetInt(0)); 00195 } 00196 // edges 00197 while (Ss.Next()) { 00198 if (Ss.Len()>0 && Ss[0][0] == '%') { continue; } // comment 00199 if (Ss.Len()>0 && Ss[0][0] == '*') { break; } 00200 if (EdgeList) { 00201 // <source> <destination> <weight> 00202 if (Ss.Len() == 3 && Ss.IsInt(0) && Ss.IsInt(1)) { 00203 Graph->AddEdge(Ss.GetInt(0), Ss.GetInt(1)); } 00204 } else { 00205 // <source> <destination1> <destination2> <destination3> ... 00206 const int SrcNId = Ss.GetInt(0); 00207 for (int i = 1; i < Ss.Len(); i++) { 00208 Graph->AddEdge(SrcNId, Ss.GetInt(i)); } 00209 } 00210 } 00211 return Graph; 00212 } 00213 00214 template <class PGraph> 00215 void SaveEdgeList(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc) { 00216 FILE *F = fopen(OutFNm.CStr(), "wt"); 00217 if (HasGraphFlag(typename PGraph::TObj, gfDirected)) { fprintf(F, "# Directed graph: %s \n", OutFNm.CStr()); } 00218 else { fprintf(F, "# Undirected graph (each unordered pair of nodes is saved once): %s\n", OutFNm.CStr()); } 00219 if (! Desc.Empty()) { fprintf(F, "# %s\n", Desc.CStr()); } 00220 fprintf(F, "# Nodes: %d Edges: %d\n", Graph->GetNodes(), Graph->GetEdges()); 00221 if (HasGraphFlag(typename PGraph::TObj, gfDirected)) { fprintf(F, "# FromNodeId\tToNodeId\n"); } 00222 else { fprintf(F, "# NodeId\tNodeId\n"); } 00223 for (typename PGraph::TObj::TEdgeI ei = Graph->BegEI(); ei < Graph->EndEI(); ei++) { 00224 fprintf(F, "%d\t%d\n", ei.GetSrcNId(), ei.GetDstNId()); 00225 } 00226 fclose(F); 00227 } 00228 00229 template <class PGraph> 00230 void SavePajek(const PGraph& Graph, const TStr& OutFNm) { 00231 TIntH NIdToIdH(Graph->GetNodes(), true); 00232 FILE *F = fopen(OutFNm.CStr(), "wt"); 00233 fprintf(F, "*Vertices %d\n", Graph->GetNodes()); 00234 int i = 0; 00235 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, i++) { 00236 fprintf(F, "%d \"%d\" ic Red fos 10\n", i+1, NI.GetId()); // ic: internal color, fos: font size 00237 NIdToIdH.AddDat(NI.GetId(), i+1); 00238 } 00239 if (HasGraphFlag(typename PGraph::TObj, gfDirected)) { 00240 fprintf(F, "*Arcs %d\n", Graph->GetEdges()); } // arcs are directed, edges are undirected 00241 else { 00242 fprintf(F, "*Edges %d\n", Graph->GetEdges()); 00243 } 00244 for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { 00245 const int SrcNId = NIdToIdH.GetDat(EI.GetSrcNId()); 00246 const int DstNId = NIdToIdH.GetDat(EI.GetDstNId()); 00247 fprintf(F, "%d %d %d c Black\n", SrcNId, DstNId, 1); // width=1 00248 } 00249 fclose(F); 00250 } 00251 00254 template <class PGraph> 00255 void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH) { 00256 TIntH NIdToIdH(Graph->GetNodes(), true); 00257 FILE *F = fopen(OutFNm.CStr(), "wt"); 00258 fprintf(F, "*Vertices %d\n", Graph->GetNodes()); 00259 int i = 0; 00260 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, i++) { 00261 fprintf(F, "%d \"%d\" ic %s fos 10\n", i+1, NI.GetId(), 00262 NIdColorH.IsKey(NI.GetId()) ? NIdColorH.GetDat(NI.GetId()).CStr() : "Red"); 00263 NIdToIdH.AddDat(NI.GetId(), i+1); 00264 } 00265 if (HasGraphFlag(typename PGraph::TObj, gfDirected)) { 00266 fprintf(F, "*Arcs %d\n", Graph->GetEdges()); } // arcs are directed, edges are undirected 00267 else { 00268 fprintf(F, "*Edges %d\n", Graph->GetEdges()); 00269 } 00270 for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { 00271 const int SrcNId = NIdToIdH.GetDat(EI.GetSrcNId()); 00272 const int DstNId = NIdToIdH.GetDat(EI.GetDstNId()); 00273 fprintf(F, "%d %d %d c Black\n", SrcNId, DstNId, 1); 00274 } 00275 fclose(F); 00276 } 00277 00281 template <class PGraph> 00282 void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH, const TIntStrH& NIdLabelH) { 00283 TIntH NIdToIdH(Graph->GetNodes(), true); 00284 FILE *F = fopen(OutFNm.CStr(), "wt"); 00285 fprintf(F, "*Vertices %d\n", Graph->GetNodes()); 00286 int i = 0; 00287 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, i++) { 00288 fprintf(F, "%d \"%s\" ic %s fos 10\n", i+1, 00289 NIdLabelH.IsKey(NI.GetId()) ? NIdLabelH.GetDat(NI.GetId()).CStr() : TStr::Fmt("%d", NI.GetId()).CStr(), 00290 NIdColorH.IsKey(NI.GetId()) ? NIdColorH.GetDat(NI.GetId()).CStr() : "Red"); 00291 NIdToIdH.AddDat(NI.GetId(), i+1); 00292 } 00293 if (HasGraphFlag(typename PGraph::TObj, gfDirected)) { 00294 fprintf(F, "*Arcs %d\n", Graph->GetEdges()); } // arcs are directed, edges are undirected 00295 else { 00296 fprintf(F, "*Edges %d\n", Graph->GetEdges()); 00297 } 00298 for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { 00299 const int SrcNId = NIdToIdH.GetDat(EI.GetSrcNId()); 00300 const int DstNId = NIdToIdH.GetDat(EI.GetDstNId()); 00301 fprintf(F, "%d %d %d c Black\n", SrcNId, DstNId, 1); 00302 } 00303 fclose(F); 00304 } 00305 00310 template <class PGraph> 00311 void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH, const TIntStrH& NIdLabelH, const TIntStrH& EIdColorH) { 00312 CAssert(HasGraphFlag(typename PGraph::TObj, gfMultiGraph)); // network needs to have edge ids 00313 TIntH NIdToIdH(Graph->GetNodes(), true); 00314 FILE *F = fopen(OutFNm.CStr(), "wt"); 00315 fprintf(F, "*Vertices %d\n", Graph->GetNodes()); 00316 int i = 0; 00317 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, i++) { 00318 fprintf(F, "%d \"%s\" ic %s fos 10\n", i+1, 00319 NIdLabelH.IsKey(NI.GetId()) ? NIdLabelH.GetDat(NI.GetId()).CStr() : TStr::Fmt("%d", NI.GetId()).CStr(), 00320 NIdColorH.IsKey(NI.GetId()) ? NIdColorH.GetDat(NI.GetId()).CStr() : "Red"); 00321 NIdToIdH.AddDat(NI.GetId(), i+1); 00322 } 00323 if (HasGraphFlag(typename PGraph::TObj, gfDirected)) { 00324 fprintf(F, "*Arcs %d\n", Graph->GetEdges()); } // arcs are directed, edges are undirected 00325 else { 00326 fprintf(F, "*Edges %d\n", Graph->GetEdges()); 00327 } 00328 for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { 00329 const int SrcNId = NIdToIdH.GetDat(EI.GetSrcNId()); 00330 const int DstNId = NIdToIdH.GetDat(EI.GetDstNId()); 00331 fprintf(F, "%d %d 1 c %s\n", SrcNId, DstNId, 1, 00332 EIdColorH.IsKey(EI.GetId()) ? EIdColorH.GetDat(EI.GetId()).CStr() : "Black"); 00333 } 00334 fclose(F); 00335 } 00336 00338 template <class PGraph> 00339 void SaveMatlabSparseMtx(const PGraph& Graph, const TStr& OutFNm) { 00340 FILE *F = fopen(OutFNm.CStr(), "wt"); 00341 TIntSet NIdSet(Graph->GetNodes()); // so that 00342 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00343 NIdSet.AddKey(NI.GetId()); 00344 } 00345 for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { 00346 const int Src = NIdSet.GetKeyId(EI.GetSrcNId())+1; 00347 const int Dst = NIdSet.GetKeyId(EI.GetDstNId())+1; 00348 fprintf(F, "%d\t%d\t1\n", Src, Dst); 00349 if (! HasGraphFlag(typename PGraph::TObj, gfDirected) && Src!=Dst) { 00350 fprintf(F, "%d\t%d\t1\n", Dst, Src); 00351 } 00352 } 00353 fclose(F); 00354 } 00355 00356 template<class PGraph> 00357 void SaveGViz(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc, const bool& NodeLabels, const TIntStrH& NIdColorH) { 00358 const bool IsDir = HasGraphFlag(typename PGraph::TObj, gfDirected); 00359 FILE *F = fopen(OutFNm.CStr(), "wt"); 00360 if (! Desc.Empty()) fprintf(F, "/*****\n%s\n*****/\n\n", Desc.CStr()); 00361 if (IsDir) { fprintf(F, "digraph G {\n"); } else { fprintf(F, "graph G {\n"); } 00362 fprintf(F, " graph [splines=false overlap=false]\n"); //size=\"12,10\" ratio=fill 00363 // node [width=0.3, height=0.3, label=\"\", style=filled, color=black] 00364 // node [shape=box, width=0.3, height=0.3, label=\"\", style=filled, fillcolor=red] 00365 fprintf(F, " node [shape=ellipse, width=0.3, height=0.3%s]\n", NodeLabels?"":", label=\"\""); 00366 // node colors 00367 //for (int i = 0; i < NIdColorH.Len(); i++) { 00368 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00369 if (NIdColorH.IsKey(NI.GetId())) { 00370 fprintf(F, " %d [style=filled, fillcolor=\"%s\"];\n", NI.GetId(), NIdColorH.GetDat(NI.GetId()).CStr()); } 00371 else { 00372 fprintf(F, " %d ;\n", NI.GetId()); 00373 } 00374 } 00375 // edges 00376 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00377 if (NI.GetOutDeg()==0 && NI.GetInDeg()==0 && !NIdColorH.IsKey(NI.GetId())) { 00378 fprintf(F, "%d;\n", NI.GetId()); } 00379 else { 00380 for (int e = 0; e < NI.GetOutDeg(); e++) { 00381 if (! IsDir && NI.GetId() > NI.GetOutNId(e)) { continue; } 00382 fprintf(F, " %d %s %d;\n", NI.GetId(), IsDir?"->":"--", NI.GetOutNId(e)); 00383 } 00384 } 00385 } 00386 if (! Desc.Empty()) { 00387 fprintf(F, " label = \"\\n%s\\n\";", Desc.CStr()); 00388 fprintf(F, " fontsize=24;\n"); 00389 } 00390 fprintf(F, "}\n"); 00391 fclose(F); 00392 } 00393 00394 template<class PGraph> 00395 void SaveGViz(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc, const TIntStrH& NIdLabelH) { 00396 const bool IsDir = Graph->HasFlag(gfDirected); 00397 FILE *F = fopen(OutFNm.CStr(), "wt"); 00398 if (! Desc.Empty()) fprintf(F, "/*****\n%s\n*****/\n\n", Desc.CStr()); 00399 if (IsDir) { fprintf(F, "digraph G {\n"); } else { fprintf(F, "graph G {\n"); } 00400 fprintf(F, " graph [splines=true overlap=false]\n"); //size=\"12,10\" ratio=fill 00401 fprintf(F, " node [shape=ellipse, width=0.3, height=0.3]\n"); 00402 // node colors 00403 //for (int i = 0; i < NodeLabelH.Len(); i++) { 00404 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00405 fprintf(F, " %d [label=\"%s\"];\n", NI.GetId(), NIdLabelH.GetDat(NI.GetId()).CStr()); 00406 } 00407 // edges 00408 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00409 if (NI.GetOutDeg()==0 && NI.GetInDeg()==0 && ! NIdLabelH.IsKey(NI.GetId())) { 00410 fprintf(F, "%d;\n", NI.GetId()); } 00411 else { 00412 for (int e = 0; e < NI.GetOutDeg(); e++) { 00413 if (! IsDir && NI.GetId() > NI.GetOutNId(e)) { continue; } 00414 fprintf(F, " %d %s %d;\n", NI.GetId(), IsDir?"->":"--", NI.GetOutNId(e)); 00415 } 00416 } 00417 } 00418 if (! Desc.Empty()) { 00419 fprintf(F, " label = \"\\n%s\\n\";", Desc.CStr()); 00420 fprintf(F, " fontsize=24;\n"); 00421 } 00422 fprintf(F, "}\n"); 00423 fclose(F); 00424 } 00425 00426 } // namespace TSnap