SNAP Library, Developer Reference
2012-10-02 12:56:23
SNAP, a general purpose network analysis and graph mining library
|
00001 namespace TSnap { 00002 00004 // BFS and DFS 00008 template <class PGraph> PNGraph GetBfsTree(const PGraph& Graph, const int& StartNId, const bool& FollowOut, const bool& FollowIn); 00010 template <class PGraph> int GetSubTreeSz(const PGraph& Graph, const int& StartNId, const bool& FollowOut, const bool& FollowIn, int& TreeSz, int& TreeDepth); 00013 template <class PGraph> int GetNodesAtHop(const PGraph& Graph, const int& StartNId, const int& Hop, TIntV& NIdV, const bool& IsDir=false); 00016 template <class PGraph> int GetNodesAtHops(const PGraph& Graph, const int& StartNId, TIntPrV& HopCntV, const bool& IsDir=false); 00017 00019 // Shortest paths 00022 template <class PGraph> int GetShortPath(const PGraph& Graph, const int& SrcNId, const int& DstNId, const bool& IsDir=false); 00027 template <class PGraph> int GetShortPath(const PGraph& Graph, const int& SrcNId, TIntH& NIdToDistH, const bool& IsDir=false, const int& MaxDist=TInt::Mx); 00028 00030 // Diameter 00031 00034 template <class PGraph> int GetBfsFullDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir=false); 00037 template <class PGraph> double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir=false); 00040 template <class PGraph> double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir, double& EffDiam, int& FullDiam); 00043 template <class PGraph> double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir, double& EffDiam, int& FullDiam, double& AvgDiam); 00046 template <class PGraph> double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const TIntV& SubGraphNIdV, const bool& IsDir, double& EffDiam, int& FullDiam); 00047 00048 // TODO: Implement in the future 00049 //template <class PGraph> int GetRangeDist(const PGraph& Graph, const int& SrcNId, const int& DstNId, const bool& IsDir=false); 00050 //template <class PGraph> int GetShortPath(const PGraph& Graph, const int& SrcNId, TIntH& NIdToDistH, const bool& IsDir=false, const int& MaxDist=1000); 00051 //template <class PGraph> int GetShortPath(const PGraph& Graph, const int& SrcNId, const TIntSet& TargetSet, const bool& IsDir, TIntV& PathNIdV); 00052 //template <class PGraph> int GetShortPath(TIntH& NIdPrnH, TCcQueue<int>& NIdQ, const PGraph& Graph, const int& SrcNId, const TIntSet& TargetSet, const bool& IsDir, TIntV& PathNIdV); 00053 //template <class PGraph> int GetMxShortDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir=false); 00054 //template <class PGraph> int GetMxShortDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir, int& MxDistNId); 00055 //template <class PGraph> int GetMxShortDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir, int& MxDistNId, TCcQueue<int>& NIdQ, TCcQueue<int>& DistQ, TIntSet& VisitedH); 00056 //template <class PGraph> int GetMxGreedyDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir=false); 00057 //template <class PGraph> int GetMxGreedyDist(const PGraph& Graph, const int& SrcNId, const bool& IsDir, TCcQueue<int>& NIdQ, TCcQueue<int>& DistQ, TIntSet& VisitedH); 00058 //template <class PGraph> PNGraph GetShortPathsSubGraph(const PGraph& Graph, const TIntV& SubGraphNIdV); 00059 //template <class PGraph> PGraph GetWccPathsSubGraph(const PGraph& Graph, const TIntV& NIdV); 00060 //template <class PGraph> void GetSubTreeSz(const PGraph& Graph, const int& StartNId, const bool& FollowOutEdges, int& TreeSz, int& TreeDepth); 00061 00062 } // namespace TSnap 00063 00067 template<class PGraph> 00068 class TBreathFS { 00069 public: 00070 PGraph Graph; 00071 TSnapQueue<int> Queue; 00072 TInt StartNId; 00073 TIntH NIdDistH; 00074 public: 00075 TBreathFS(const PGraph& GraphPt, const bool& InitBigQ=true) : 00076 Graph(GraphPt), Queue(InitBigQ?Graph->GetNodes():1024), NIdDistH(InitBigQ?Graph->GetNodes():1024) { } 00078 void SetGraph(const PGraph& GraphPt); 00080 int DoBfs(const int& StartNode, const bool& FollowOut, const bool& FollowIn, const int& TargetNId=-1, const int& MxDist=TInt::Mx); 00082 int GetNVisited() const { return NIdDistH.Len(); } 00084 void GetVisitedNIdV(TIntV& NIdV) const { NIdDistH.GetKeyV(NIdV); } 00087 int GetHops(const int& SrcNId, const int& DstNId) const; 00090 int GetRndPath(const int& SrcNId, const int& DstNId, TIntV& PathNIdV) const; 00091 }; 00092 00093 template<class PGraph> 00094 void TBreathFS<PGraph>::SetGraph(const PGraph& GraphPt) { 00095 Graph=GraphPt; 00096 const int N=GraphPt->GetNodes(); 00097 if (Queue.Reserved() < N) { Queue.Gen(N); } 00098 if (NIdDistH.GetReservedKeyIds() < N) { NIdDistH.Gen(N); } 00099 } 00100 00101 template<class PGraph> 00102 int TBreathFS<PGraph>::DoBfs(const int& StartNode, const bool& FollowOut, const bool& FollowIn, const int& TargetNId, const int& MxDist) { 00103 StartNId = StartNode; 00104 IAssert(Graph->IsNode(StartNId)); 00105 NIdDistH.Clr(false); NIdDistH.AddDat(StartNId, 0); 00106 Queue.Clr(false); Queue.Push(StartNId); 00107 int v, MaxDist = 0; 00108 while (! Queue.Empty()) { 00109 const int NId = Queue.Top(); Queue.Pop(); 00110 const int Dist = NIdDistH.GetDat(NId); 00111 if (Dist == MxDist) { break; } // max distance limit reached 00112 const typename PGraph::TObj::TNodeI NodeI = Graph->GetNI(NId); 00113 if (FollowOut) { // out-links 00114 for (v = 0; v < NodeI.GetOutDeg(); v++) { // out-links 00115 const int DstNId = NodeI.GetOutNId(v); 00116 if (! NIdDistH.IsKey(DstNId)) { 00117 NIdDistH.AddDat(DstNId, Dist+1); 00118 MaxDist = TMath::Mx(MaxDist, Dist+1); 00119 if (DstNId == TargetNId) { return MaxDist; } 00120 Queue.Push(DstNId); 00121 } 00122 } 00123 } 00124 if (FollowIn) { // in-links 00125 for (v = 0; v < NodeI.GetInDeg(); v++) { 00126 const int DstNId = NodeI.GetInNId(v); 00127 if (! NIdDistH.IsKey(DstNId)) { 00128 NIdDistH.AddDat(DstNId, Dist+1); 00129 MaxDist = TMath::Mx(MaxDist, Dist+1); 00130 if (DstNId == TargetNId) { return MaxDist; } 00131 Queue.Push(DstNId); 00132 } 00133 } 00134 } 00135 } 00136 return MaxDist; 00137 } 00138 00139 template<class PGraph> 00140 int TBreathFS<PGraph>::GetHops(const int& SrcNId, const int& DstNId) const { 00141 TInt Dist; 00142 if (SrcNId!=StartNId) { return -1; } 00143 if (! NIdDistH.IsKeyGetDat(DstNId, Dist)) { return -1; } 00144 return Dist.Val; 00145 } 00146 00147 template<class PGraph> 00148 int TBreathFS<PGraph>::GetRndPath(const int& SrcNId, const int& DstNId, TIntV& PathNIdV) const { 00149 PathNIdV.Clr(false); 00150 if (SrcNId!=StartNId || ! NIdDistH.IsKey(DstNId)) { return -1; } 00151 PathNIdV.Add(DstNId); 00152 TIntV CloserNIdV; 00153 int CurNId = DstNId; 00154 TInt CurDist, NextDist; 00155 while (CurNId != SrcNId) { 00156 typename PGraph::TObj::TNodeI NI = Graph->GetNI(CurNId); 00157 IAssert(NIdDistH.IsKeyGetDat(CurNId, CurDist)); 00158 CloserNIdV.Clr(false); 00159 for (int e = 0; e < NI.GetDeg(); e++) { 00160 const int Next = NI.GetNbrNId(e); 00161 IAssert(NIdDistH.IsKeyGetDat(Next, NextDist)); 00162 if (NextDist == CurDist-1) { CloserNIdV.Add(Next); } 00163 } 00164 IAssert(! CloserNIdV.Empty()); 00165 CurNId = CloserNIdV[TInt::Rnd.GetUniDevInt(CloserNIdV.Len())]; 00166 PathNIdV.Add(CurNId); 00167 } 00168 PathNIdV.Reverse(); 00169 return PathNIdV.Len()-1; 00170 } 00171 00173 // Implementation 00174 namespace TSnap { 00175 00176 template <class PGraph> 00177 PNGraph GetBfsTree(const PGraph& Graph, const int& StartNId, const bool& FollowOut, const bool& FollowIn) { 00178 TBreathFS<PGraph> BFS(Graph, false); 00179 BFS.DoBfs(StartNId, FollowOut, FollowIn, -1, TInt::Mx); 00180 PNGraph Tree = TNGraph::New(); 00181 BFS.NIdDistH.SortByDat(); 00182 Tree->AddNode(StartNId); 00183 for (int i = 0; i < BFS.NIdDistH.Len(); i++) { 00184 const int NId = BFS.NIdDistH.GetKey(i); 00185 const int Dist = BFS.NIdDistH[i]; 00186 typename PGraph::TObj::TNodeI NI = Graph->GetNI(NId); 00187 Tree->AddNode(NId); 00188 if (FollowOut) { 00189 for (int e = 0; e < NI.GetInDeg(); e++) { 00190 const int Prev = NI.GetInNId(e); 00191 if (Tree->IsNode(Prev) && BFS.NIdDistH.GetDat(Prev)==Dist-1) { 00192 Tree->AddEdge(Prev, NId); } 00193 } 00194 } 00195 if (FollowIn) { 00196 for (int e = 0; e < NI.GetOutDeg(); e++) { 00197 const int Prev = NI.GetOutNId(e); 00198 if (Tree->IsNode(Prev) && BFS.NIdDistH.GetDat(Prev)==Dist-1) { 00199 Tree->AddEdge(Prev, NId); } 00200 } 00201 } 00202 } 00203 return Tree; 00204 } 00205 00206 template <class PGraph> 00207 int GetSubTreeSz(const PGraph& Graph, const int& StartNId, const bool& FollowOut, const bool& FollowIn, int& TreeSz, int& TreeDepth) { 00208 TBreathFS<PGraph> BFS(Graph); 00209 BFS.DoBfs(StartNId, FollowOut, FollowIn, -1, TInt::Mx); 00210 TreeSz = BFS.NIdDistH.Len(); 00211 TreeDepth = 0; 00212 for (int i = 0; i < BFS.NIdDistH.Len(); i++) { 00213 TreeDepth = TMath::Mx(TreeDepth, BFS.NIdDistH[i].Val); 00214 } 00215 return TreeSz; 00216 } 00217 00218 template <class PGraph> 00219 int GetNodesAtHop(const PGraph& Graph, const int& StartNId, const int& Hop, TIntV& NIdV, const bool& IsDir) { 00220 TBreathFS<PGraph> BFS(Graph); 00221 BFS.DoBfs(StartNId, true, !IsDir, -1, Hop); 00222 NIdV.Clr(false); 00223 for (int i = 0; i < BFS.NIdDistH.Len(); i++) { 00224 if (BFS.NIdDistH[i] == Hop) { 00225 NIdV.Add(BFS.NIdDistH.GetKey(i)); } 00226 } 00227 return NIdV.Len(); 00228 } 00229 00230 template <class PGraph> 00231 int GetNodesAtHops(const PGraph& Graph, const int& StartNId, TIntPrV& HopCntV, const bool& IsDir) { 00232 TBreathFS<PGraph> BFS(Graph); 00233 BFS.DoBfs(StartNId, true, !IsDir, -1, TInt::Mx); 00234 TIntH HopCntH; 00235 for (int i = 0; i < BFS.NIdDistH.Len(); i++) { 00236 HopCntH.AddDat(BFS.NIdDistH[i]) += 1; 00237 } 00238 HopCntH.GetKeyDatPrV(HopCntV); 00239 HopCntV.Sort(); 00240 return HopCntV.Len(); 00241 } 00242 00243 template <class PGraph> 00244 int GetShortPath(const PGraph& Graph, const int& SrcNId, TIntH& NIdToDistH, const bool& IsDir, const int& MaxDist) { 00245 TBreathFS<PGraph> BFS(Graph); 00246 BFS.DoBfs(SrcNId, true, ! IsDir, -1, MaxDist); 00247 NIdToDistH.Clr(); 00248 NIdToDistH.Swap(BFS.NIdDistH); 00249 return NIdToDistH[NIdToDistH.Len()-1]; 00250 } 00251 00252 template <class PGraph> 00253 int GetShortPath(const PGraph& Graph, const int& SrcNId, const int& DstNId, const bool& IsDir) { 00254 TBreathFS<PGraph> BFS(Graph); 00255 BFS.DoBfs(SrcNId, true, ! IsDir, DstNId, TInt::Mx); 00256 return BFS.GetHops(SrcNId, DstNId); 00257 } 00258 00259 template <class PGraph> 00260 int GetBfsFullDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir) { 00261 int FullDiam; 00262 double EffDiam; 00263 GetBfsEffDiam(Graph, NTestNodes, IsDir, EffDiam, FullDiam); 00264 return FullDiam; 00265 } 00266 00267 template <class PGraph> 00268 double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir) { 00269 int FullDiam; 00270 double EffDiam; 00271 GetBfsEffDiam(Graph, NTestNodes, IsDir, EffDiam, FullDiam); 00272 return EffDiam; 00273 } 00274 00275 template <class PGraph> 00276 double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir, double& EffDiam, int& FullDiam) { 00277 double AvgDiam; 00278 EffDiam = -1; FullDiam = -1; 00279 return GetBfsEffDiam(Graph, NTestNodes, IsDir, EffDiam, FullDiam, AvgDiam); 00280 } 00281 00282 template <class PGraph> 00283 double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const bool& IsDir, double& EffDiam, int& FullDiam, double& AvgDiam) { 00284 EffDiam = -1; FullDiam = -1; AvgDiam = -1; 00285 TIntFltH DistToCntH; 00286 TBreathFS<PGraph> BFS(Graph); 00287 // shotest paths 00288 TIntV NodeIdV; 00289 Graph->GetNIdV(NodeIdV); NodeIdV.Shuffle(TInt::Rnd); 00290 for (int tries = 0; tries < TMath::Mn(NTestNodes, Graph->GetNodes()); tries++) { 00291 const int NId = NodeIdV[tries]; 00292 BFS.DoBfs(NId, true, ! IsDir, -1, TInt::Mx); 00293 for (int i = 0; i < BFS.NIdDistH.Len(); i++) { 00294 DistToCntH.AddDat(BFS.NIdDistH[i]) += 1; } 00295 } 00296 TIntFltKdV DistNbrsPdfV; 00297 double SumPathL=0, PathCnt=0; 00298 for (int i = 0; i < DistToCntH.Len(); i++) { 00299 DistNbrsPdfV.Add(TIntFltKd(DistToCntH.GetKey(i), DistToCntH[i])); 00300 SumPathL += DistToCntH.GetKey(i) * DistToCntH[i]; 00301 PathCnt += DistToCntH[i]; 00302 } 00303 DistNbrsPdfV.Sort(); 00304 EffDiam = TSnap::TSnapDetail::CalcEffDiamPdf(DistNbrsPdfV, 0.9); // effective diameter (90-th percentile) 00305 FullDiam = DistNbrsPdfV.Last().Key; // approximate full diameter (max shortest path length over the sampled nodes) 00306 AvgDiam = SumPathL/PathCnt; // average shortest path length 00307 return EffDiam; 00308 } 00309 00310 template <class PGraph> 00311 double GetBfsEffDiam(const PGraph& Graph, const int& NTestNodes, const TIntV& SubGraphNIdV, const bool& IsDir, double& EffDiam, int& FullDiam) { 00312 EffDiam = -1; 00313 FullDiam = -1; 00314 TIntFltH DistToCntH; 00315 TBreathFS<PGraph> BFS(Graph); 00316 // shotest paths 00317 TIntV NodeIdV(SubGraphNIdV); NodeIdV.Shuffle(TInt::Rnd); 00318 TInt Dist; 00319 for (int tries = 0; tries < TMath::Mn(NTestNodes, Graph->GetNodes()); tries++) { 00320 const int NId = NodeIdV[tries]; 00321 BFS.DoBfs(NId, true, ! IsDir, -1, TInt::Mx); 00322 for (int i = 0; i < SubGraphNIdV.Len(); i++) { 00323 if (BFS.NIdDistH.IsKeyGetDat(SubGraphNIdV[i], Dist)) { 00324 DistToCntH.AddDat(Dist) += 1; } 00325 } 00326 } 00327 TIntFltKdV DistNbrsPdfV; 00328 for (int i = 0; i < DistToCntH.Len(); i++) { 00329 DistNbrsPdfV.Add(TIntFltKd(DistToCntH.GetKey(i), DistToCntH[i])); 00330 } 00331 DistNbrsPdfV.Sort(); 00332 EffDiam = TSnap::TSnapDetail::CalcEffDiamPdf(DistNbrsPdfV, 0.9); // effective diameter (90-th percentile) 00333 FullDiam = DistNbrsPdfV.Last().Key; // approximate full diameter (max shortest path length over the sampled nodes) 00334 return EffDiam; // average shortest path length 00335 } 00336 00337 } // namespace TSnap