SNAP Library 2.2, User Reference
2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 //#////////////////////////////////////////////// 00003 00010 template <class TNodeData, bool IsDir> 00011 class TBigNet { 00012 public: 00013 typedef TNodeData TNodeDat; 00014 typedef TBigNet<TNodeData, IsDir> TNet; 00015 typedef TPt<TNet> PNet; 00016 public: 00017 class TNode; 00018 typedef THash<TInt, TNode> TNodeH; // use SaveToDisk to convert between the two hash table types 00019 typedef TPt<TBigNet<TNodeData, IsDir> > PBigNet; 00020 typedef TVecPool<TInt> TVPool; 00021 typedef TPt<TVPool> PVPool; 00022 00024 00026 class TNode { 00027 public: 00029 TInt InVId; 00031 00033 TInt OutVId; 00035 TNodeDat Dat; 00036 public: 00037 TNode() : InVId(-1), OutVId(-1), Dat() { } 00038 TNode(const int& InVecId, const int& OutVecId) : InVId(InVecId), OutVId(OutVecId), Dat() { } 00039 TNode(const int& InVecId, const int& OutVecId, const TNodeDat& NodeDat) : 00040 InVId(InVecId), OutVId(OutVecId), Dat(NodeDat) { } 00041 TNode(const TNode& Node) : InVId(Node.InVId), OutVId(Node.OutVId), Dat(Node.Dat) { } 00042 TNode(TSIn& SIn) : InVId(SIn), OutVId(SIn), Dat(SIn) { } 00043 void Save(TSOut& SOut) const { SOut.Save(InVId); SOut.Save(OutVId); Dat.Save(SOut); } 00045 bool IsUnused() const { return InVId==-1 && OutVId==-1; } 00046 }; 00048 00050 class TNodeI { 00051 protected: 00052 typedef typename TNodeH::TIter THashIter; 00053 THashIter NodeHI; 00054 TVPool *Pool; 00055 int InDeg, OutDeg, *InNIdV, *OutNIdV; // if undirected, InNIdV==OutNIdV 00056 public: 00057 inline void GetInOutNIdV(); 00058 int GetInVId() const { return NodeHI->Dat.InVId; } 00059 int GetOutVId() const { return NodeHI->Dat.OutVId; } 00060 public: 00061 TNodeI() : NodeHI(), Pool(NULL), InDeg(0), OutDeg(0), InNIdV(NULL), OutNIdV(NULL) { } 00062 TNodeI(const THashIter& NodeHIter, TVPool *PoolPt) : NodeHI(NodeHIter), Pool(PoolPt) { GetInOutNIdV(); } 00063 TNodeI(const TNodeI& NodeI) : NodeHI(NodeI.NodeHI), Pool(NodeI.Pool) { GetInOutNIdV(); } 00064 TNodeI& operator = (const TNodeI& NI) { NodeHI=NI.NodeHI; Pool=NI.Pool; GetInOutNIdV(); return *this; } 00066 TNodeI& operator++ (int) { NodeHI++; GetInOutNIdV(); return *this; } 00067 bool operator < (const TNodeI& NI) const { return NodeHI < NI.NodeHI; } 00068 bool operator == (const TNodeI& NI) const { return NodeHI == NI.NodeHI; } 00069 int GetId() const { return NodeHI->Key(); } 00070 int GetDeg() const { return GetInDeg()+(InNIdV!=OutNIdV?GetOutDeg():0); } 00071 int GetInDeg() const { return InDeg; } 00072 int GetOutDeg() const { return OutDeg; } 00073 int GetInNId(const int& NodeN) const { return InNIdV[NodeN]; } 00074 int GetOutNId(const int& NodeN) const { return OutNIdV[NodeN]; } 00075 int GetOutNbrId(const int& NodeN) const { return NodeN<OutDeg ? OutNIdV[NodeN]:InNIdV[NodeN-OutDeg]; } 00076 bool IsInNId(const int& NId) const { return BinSearch(InNIdV, InNIdV+InDeg, NId)!=NULL; } 00077 bool IsOutNId(const int& NId) const { return BinSearch(OutNIdV, OutNIdV+OutDeg, NId)!=NULL; } 00078 bool IsNbrNId(const int& NId) const { return IsOutNId(NId) || IsInNId(NId); } 00079 const TNodeData& operator () () const { return GetDat(); } 00080 TNodeData& operator () () { return GetDat(); } 00081 const TNodeData& GetDat() const { return NodeHI->Dat.Dat; } 00082 TNodeData& GetDat() { return NodeHI->Dat.Dat; } 00083 // requires pointer back to the network (see as in TNodeNet) 00084 //const TNodeData& GetInNDat(const int& NodeN) const { return Net->GetNDat(GetInNId(NodeN)); } 00085 //TNodeData& GetInNDat(const int& NodeN) { return Net->GetNDat(GetInNId(NodeN)); } 00086 //const TNodeData& GetOutNDat(const int& NodeN) const { return Net->GetNDat(GetOutNId(NodeN)); } 00087 //TNodeData& GetOutNDat(const int& NodeN) { return Net->GetNDat(GetOutNId(NodeN)); } 00088 //const TNodeData& GetNbrNDat(const int& NodeN) const { return Net->GetNDat(GetNbrNId(NodeN)); } 00089 //TNodeData& GetNbrNDat(const int& NodeN) { return Net->GetNDat(GetNbrNId(NodeN)); } 00090 void Dump() const; 00091 friend class TBigNet<TNodeData, IsDir>; 00092 }; 00093 00095 00097 class TEdgeI { 00098 private: 00099 TNodeI CurNode, EndNode; 00100 int CurEdge; 00101 public: 00102 TEdgeI() : CurNode(), EndNode(), CurEdge(0) { } 00103 TEdgeI(const TNodeI& NodeI, const TNodeI& EndNodeI, const int& EdgeN=0) : CurNode(NodeI), EndNode(EndNodeI), CurEdge(0) { } 00104 TEdgeI(const TEdgeI& EdgeI) : CurNode(EdgeI.CurNode), EndNode(EdgeI.EndNode), CurEdge(EdgeI.CurEdge) { } 00105 TEdgeI& operator = (const TEdgeI& EdgeI) { if (this!=&EdgeI) { CurNode=EdgeI.CurNode; EndNode=EdgeI.EndNode; CurEdge=EdgeI.CurEdge; } return *this; } 00106 TEdgeI& operator++ (int) { CurEdge++; if (CurEdge >= CurNode.GetOutDeg()) { CurEdge=0; CurNode++; 00107 while (CurNode < EndNode && CurNode.GetOutDeg()==0) { CurNode++; } } return *this; } 00108 bool operator < (const TEdgeI& EdgeI) const { return CurNode<EdgeI.CurNode || (CurNode==EdgeI.CurNode && CurEdge<EdgeI.CurEdge); } 00109 bool operator == (const TEdgeI& EdgeI) const { return CurNode == EdgeI.CurNode && CurEdge == EdgeI.CurEdge; } 00110 int GetId() const { return -1; } 00111 int GetSrcNId() const { return CurNode.GetId(); } 00112 int GetDstNId() const { return CurNode.GetOutNId(CurEdge); } 00113 const TNodeData& GetSrcNDat() const { return CurNode.GetNDat(); } 00114 TNodeData& GetDstNDat() { return CurNode.GetOutNDat(CurEdge); } 00115 friend class TNodeNet<TNodeData>; 00116 }; 00117 protected: 00118 bool IsNode(const int& NId, TNode& Node) const { return NodeH.IsKeyGetDat(NId, Node); } 00119 int* GetInNIdVPt(const int& NId) const { return (int *) Pool.GetValVPt(GetNode(NId).InVId); } 00120 int* GetOutNIdVPt(const int& NId) const { return (int *) Pool.GetValVPt(GetNode(NId).OutVId); } 00121 static void AddSorted(int* Beg, int* End, const int& Val); 00122 static const int* BinSearch(const int* Beg, const int* End, const int& Val); 00123 static const int* BinSearch2(const int* Beg, const int* End, const int& Val); 00124 const TNode& GetNode(const int& NId) const { return NodeH.GetDat(NId); } 00125 TNode& GetNode(const int& NId) { return NodeH.GetDat(NId); } 00126 public: 00127 enum { DelNId = INT_MAX }; // id of a deleted node 00128 protected: 00129 TCRef CRef; 00130 TInt MxNId; 00131 TB32Set Flags; 00132 TVPool Pool; 00133 TNodeH NodeH; 00134 public: 00135 TBigNet(const int& Nodes, const TSize& Edges, const bool& Sources=false); 00136 TBigNet(TSIn& SIn) : MxNId(SIn), Flags(SIn), Pool(SIn), NodeH(SIn) { TBool Dir(SIn); IAssert(Dir()==IsDir); } 00137 virtual ~TBigNet() { } 00138 virtual void Save(TSOut& SOut) const; 00139 static PBigNet New(const int& Nodes, const TSize& Edges, const bool& Sources=false) { 00140 return PBigNet(new TBigNet(Nodes, Edges, Sources)); } 00141 static PBigNet Load(TSIn& SIn) { return PBigNet(new TBigNet(SIn)); } 00142 TBigNet& operator = (const TBigNet& Net) { if (this!=&Net) { 00143 MxNId=Net.MxNId; Flags=Net.Flags; Pool=Net.Pool; NodeH=Net.NodeH; } return *this; } 00144 00145 bool OnlySources() const { return Flags.In(gfSources); } 00146 bool HasFlag(const TGraphFlag& Flag) const { 00147 return HasGraphFlag(typename TBigNet, Flag) || (Flag==gfSources && OnlySources()); } 00148 void DumpFlags() const; 00149 00150 // vertices 00151 int GetNodes() const { return NodeH.Len(); } 00153 int GetMxNId() const { return MxNId; } 00154 int AddNode(int NId, const int& InDeg, const int& OutDeg); 00155 int AddNode(int NId, const int& InDeg, const int& OutDeg, const TNodeDat& NodeDat); 00156 int AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV); 00157 int AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV, const TNodeDat& NodeDat); 00158 int AddUndirNode(int NId, const int& Deg); 00159 int AddUndirNode(int NId, const int& Deg, const TNodeDat& NodeDat); 00160 int AddUndirNode(int NId, const TIntV& EdgeNIdV); 00161 int AddUndirNode(int NId, const TIntV& EdgeNIdV, const TNodeDat& NodeDat); 00162 void SetInNIdV(int NId, const TIntV& InNIdV); 00163 void SetOutNIdV(int NId, const TIntV& OutNIdV); 00164 void GetInNIdV(int NId, TIntV& OutNIdV) const; 00165 void GetOutNIdV(int NId, TIntV& OutNIdV) const; 00166 bool IsNode(const int& NId) const { return NodeH.IsKey(NId); } 00167 TNodeI BegNI() const { return TNodeI(NodeH.BegI(), (TVPool *)&Pool); } 00168 TNodeI EndNI() const { return TNodeI(NodeH.EndI(), (TVPool *)&Pool); } 00169 TNodeI GetNI(const int& NId) const { return TNodeI(NodeH.GetI(NId), (TVPool *)&Pool); } 00170 TNodeDat& GetNDat(const int& NId) { return NodeH.GetDat(NId).Dat; } 00171 const TNodeDat& GetNDat(const int& NId) const { return NodeH.GetDat(NId).Dat; } 00172 // edges 00173 TEdgeI BegEI() const { TNodeI NI=BegNI(); while(NI<EndNI() && NI.GetOutDeg()==0) NI++; return TEdgeI(NI, EndNI()); } 00174 TEdgeI EndEI() const { return TEdgeI(EndNI(), EndNI()); } 00175 TEdgeI GetEI(const int& EId) const; // not supported 00176 00177 // delete nodes 00178 int IsolateNode(int NId); // isolate the node by setting edge endpoints to point to node id DelNId, IsNode(DelNId)==false) 00179 int DelNode(int NId); // set neighbors to point to DelNId, delete node from the node table 00180 bool IsIsoNode(const int& NId) const; 00181 uint GetDelEdges(); // the number deleted edges 00182 void CompactEdgePool(); // after nodes are isolated and deleted, we compact the empty space 00183 00184 // edges 00185 ::TSize GetEdges() const { return Pool.GetVals(); } 00186 int AddEdge(const int& SrcNId, const int& DstNId); 00187 bool IsEdge(const int& SrcNId, const int& DstNId, const bool& Dir=true) const; 00188 00189 void SortEdgeV(); 00190 void InvertFromSources(uint ExpectNodes=0); // add missing nodes and in-links 00191 void Rewire(TRnd& Rnd = TInt::Rnd); // create a random network with same degree distribution (configuration model) 00192 00193 // manipulation 00194 PNGraph GetNGraph(const bool& RenumberNodes=false) const; 00195 PNGraph GetSubNGraph(const TIntV& NIdV) const; 00196 PBigNet GetSubGraph(const TIntV& NIdV, const bool& RenumberNodes=false) const; 00197 void GetSubGraph(const TIntV& NIdV, TBigNet* NewNet, const bool& RenumberNodes=false) const; 00198 00199 int GetRndNId(TRnd& Rnd=TInt::Rnd) const { return NodeH.GetKey(NodeH.GetRndKeyId(Rnd)); } 00200 TNodeI GetRndNI(TRnd& Rnd=TInt::Rnd) const { return GetNI(GetRndNId(Rnd)); } 00201 void GetNIdV(TIntV& NIdV) const; 00202 00203 bool Empty() const { return GetNodes()==0; } 00204 void Clr(const bool& DoDel = true) { MxNId = 0; NodeH.Clr(DoDel); Pool.Clr(DoDel); } 00205 void Reserve(const int& Nodes, const TSize& Edges); 00206 void Defrag(const bool& OnlyNodeLinks = false) { } 00207 bool IsOk() const; 00208 void Dump(const TStr& Desc = TStr()) const; 00209 void SaveForDisk(const TStr& OutFNm) const; 00210 00211 static void LoadNodeDatH(const TStr& InFNm, TNodeH& NodeH); 00212 static void SaveToDisk(const TStr& InFNm, const TStr& OutFNm, const bool& SaveSparseHash); 00213 friend class TPt<TBigNet>; 00214 }; 00215 00216 // set flags 00217 namespace TSnap { 00218 template <class TNodeData, bool IsDir> struct IsDirected<TBigNet<TNodeData, IsDir> > { enum { Val = 0 }; }; 00219 template <class TNodeData> struct IsDirected<TBigNet<TNodeData, true> > { enum { Val = 1 }; }; 00220 template <class TNodeData, bool IsDir> struct IsNodeDat<TBigNet<TNodeData, IsDir> > { enum { Val = 1 }; }; 00221 } 00222 00223 template <class TNodeData, bool IsDir> 00224 inline void TBigNet<TNodeData, IsDir>::TNodeI::GetInOutNIdV() { 00225 if (NodeHI.IsEnd()) return; 00226 const TNode& N = NodeHI->Dat; 00227 if (! Pool->IsVId(N.InVId)) { 00228 InDeg=0; OutDeg=0; } 00229 else { 00230 InDeg=Pool->GetVLen(N.InVId); 00231 OutDeg=Pool->GetVLen(N.OutVId); 00232 InNIdV=(int *)Pool->GetValVPt(N.InVId); 00233 OutNIdV=(int *)Pool->GetValVPt(N.OutVId); 00234 } 00235 } 00236 00237 template <class TNodeData, bool IsDir> 00238 void TBigNet<TNodeData, IsDir>::TNodeI::Dump() const { 00239 printf("NodeId: %d\n", GetId()); 00240 printf(" I:%4d]", GetInDeg()); 00241 for (int i = 0; i < GetInDeg(); i++) { printf(" %d", GetInNId(i)); } 00242 printf("\n O:%4d]", GetOutDeg()); 00243 for (int i = 0; i < GetOutDeg(); i++) { printf(" %d", GetOutNId(i)); } 00244 printf("\n"); 00245 } 00246 00247 template <class TNodeData, bool IsDir> 00248 void TBigNet<TNodeData, IsDir>::AddSorted(int* Beg, int* End, const int& Val) { 00249 // there is at least one free slot available for the Val 00250 IAssertR(*(End-1)==TInt::Mx, "Edge can not be added: no free space"); 00251 // find position to insert 00252 int Half, Len = int(End-Beg); 00253 while (Len > 0) { 00254 Half = Len/2; 00255 int* Mid=Beg+Half; 00256 if (*Mid < Val) { Beg=Mid+1; Len=Len-Half-1; } else { Len=Half; } } 00257 IAssertR(*Beg != Val, "Value already existis"); 00258 // insert 00259 memmove(Beg+1, Beg, sizeof(int)*(End-Beg-1)); 00260 *Beg = Val; 00261 } 00262 00263 template <class TNodeData, bool IsDir> 00264 const int* TBigNet<TNodeData, IsDir>::BinSearch(const int* Beg, const int* End, const int& Val) { 00265 End--; const int *Mid; 00266 while (Beg <= End) { Mid = Beg+(End-Beg)/2; 00267 if (*Mid == Val) { return Mid; } 00268 else if (Val < *Mid) { End=Mid-1; } else { Beg=Mid+1; } 00269 } 00270 return NULL; 00271 } 00272 00273 template <class TNodeData, bool IsDir> 00274 const int* TBigNet<TNodeData, IsDir>::BinSearch2(const int* Beg, const int* End, const int& Val) { 00275 const int* First = Beg; 00276 const int* Last = End; 00277 const int* Mid; 00278 TSize len = End-Beg, half; 00279 while (len > 0) { 00280 half = len>>1; Mid=First+half; 00281 if (*Mid < Val) { First = Mid; First++; len=len-half-1; } 00282 else { len=half; } 00283 } 00284 return First==Last ? Last-1 : First; 00285 } 00286 00287 template <class TNodeData, bool IsDir> 00288 TBigNet<TNodeData, IsDir>::TBigNet(const int& Nodes, const TSize& Edges, const bool& Sources) : 00289 CRef(), MxNId(0), Flags(), Pool(IsDir ? 2*Edges:Edges, 10000000, true, TInt::Mx), NodeH(Nodes) { 00290 //Flags.Incl(gfNodeGraph); 00291 //Flags.Incl(gfNetwork); 00292 //if (IsDir) { Flags.Incl(gfDirected); } 00293 if (Sources) { 00294 Flags.Incl(gfSources); 00295 IAssertR(! IsDir, "Jure: not clear what happens is graph is Undirected and has only sources."); 00296 } 00297 //DumpFlags(); 00298 } 00299 00300 template <class TNodeData, bool IsDir> 00301 void TBigNet<TNodeData, IsDir>::Save(TSOut& SOut) const { 00302 MxNId.Save(SOut); 00303 Flags.Save(SOut); 00304 Pool.Save(SOut); 00305 NodeH.Save(SOut); 00306 TBool(IsDir).Save(SOut); 00307 } 00308 00309 template <class TNodeData, bool IsDir> 00310 void TBigNet<TNodeData, IsDir>::DumpFlags() const { 00311 for (int i = 1; i <int(gfMx); i++) { 00312 if (HasFlag(TGraphFlag(i))) { printf(" +"); } 00313 else { printf(" -"); } 00314 printf("%s", TSnap::GetFlagStr(TGraphFlag(i)).CStr()); 00315 } 00316 printf("\n"); 00317 } 00318 00319 template <class TNodeData, bool IsDir> 00320 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const int& InDeg, const int& OutDeg) { 00321 CAssert(IsDir); 00322 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00323 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00324 TNode& Node = NodeH.AddDat(NId); 00325 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00326 Node.InVId = Pool.AddEmptyV(InDeg); 00327 Node.OutVId = Pool.AddEmptyV(OutDeg); 00328 return NId; 00329 } 00330 00331 template <class TNodeData, bool IsDir> 00332 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const int& InDeg, const int& OutDeg, const TNodeData& NodeDat) { 00333 CAssert(IsDir); 00334 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00335 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00336 TNode& Node = NodeH.AddDat(NId); 00337 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00338 Node.InVId = Pool.AddEmptyV(InDeg); 00339 Node.OutVId = Pool.AddEmptyV(OutDeg); 00340 Node.Dat = NodeDat; 00341 return NId; 00342 } 00343 00344 template <class TNodeData, bool IsDir> 00345 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const int& Deg) { 00346 CAssert(! IsDir); 00347 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00348 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00349 TNode& Node = NodeH.AddDat(NId); 00350 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00351 Node.InVId = Pool.AddEmptyV(Deg); 00352 Node.OutVId = Node.InVId; // same vector 00353 return NId; 00354 } 00355 00356 template <class TNodeData, bool IsDir> 00357 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const int& Deg, const TNodeData& NodeDat) { 00358 CAssert(! IsDir); 00359 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00360 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00361 TNode& Node = NodeH.AddDat(NId); 00362 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00363 Node.InVId = Pool.AddEmptyV(Deg); 00364 Node.OutVId = Node.InVId; // same vector 00365 Node.Dat = NodeDat; 00366 return NId; 00367 } 00368 00369 template <class TNodeData, bool IsDir> 00370 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV) { 00371 CAssert(IsDir); 00372 IAssert(InNIdV.IsSorted() && OutNIdV.IsSorted()); 00373 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00374 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00375 TNode& Node = NodeH.AddDat(NId); 00376 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00377 Node.InVId = Pool.AddV(InNIdV); 00378 Node.OutVId = Pool.AddV(OutNIdV); 00379 return NId; 00380 } 00381 00382 template <class TNodeData, bool IsDir> 00383 int TBigNet<TNodeData, IsDir>::AddNode(int NId, const TIntV& InNIdV, const TIntV& OutNIdV, const TNodeData& NodeDat) { 00384 CAssert(IsDir); 00385 IAssert(InNIdV.IsSorted() && OutNIdV.IsSorted()); 00386 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00387 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00388 TNode& Node = NodeH.AddDat(NId); 00389 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00390 Node.InVId = Pool.AddV(InNIdV); 00391 Node.OutVId = Pool.AddV(OutNIdV); 00392 Node.Dat = NodeDat; 00393 return NId; 00394 } 00395 00396 template <class TNodeData, bool IsDir> 00397 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const TIntV& EdgeNIdV) { 00398 CAssert(! IsDir); 00399 IAssert(EdgeNIdV.IsSorted()); 00400 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00401 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00402 TNode& Node = NodeH.AddDat(NId); 00403 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00404 Node.InVId = Pool.AddV(EdgeNIdV); 00405 Node.OutVId = Node.InVId; 00406 return NId; 00407 } 00408 00409 template <class TNodeData, bool IsDir> 00410 int TBigNet<TNodeData, IsDir>::AddUndirNode(int NId, const TIntV& EdgeNIdV, const TNodeData& NodeDat) { 00411 CAssert(! IsDir); 00412 IAssert(EdgeNIdV.IsSorted()); 00413 if (NId == -1) { NId = MxNId; MxNId.Val++; } 00414 else { MxNId = TMath::Mx(NId+1, MxNId()); } 00415 TNode& Node = NodeH.AddDat(NId); 00416 IAssertR(Node.IsUnused(), TStr::Fmt("NodeId %d already exists", NId)); 00417 Node.InVId = Pool.AddV(EdgeNIdV); 00418 Node.OutVId = Node.InVId; 00419 Node.Dat = NodeDat; 00420 return NId; 00421 } 00422 00423 template <class TNodeData, bool IsDir> 00424 void TBigNet<TNodeData, IsDir>::SetInNIdV(int NId, const TIntV& InNIdV) { 00425 TNode Node; EAssert(IsNode(NId, Node)); 00426 TIntV InNodesV; Pool.GetV(Node.InVId, InNodesV); 00427 EAssert(InNIdV.Len() == InNodesV.Len()); 00428 memcpy(InNodesV.BegI(), InNIdV.BegI(), sizeof(TInt)*InNIdV.Len()); 00429 } 00430 00431 template <class TNodeData, bool IsDir> 00432 void TBigNet<TNodeData, IsDir>::SetOutNIdV(int NId, const TIntV& OutNIdV) { 00433 TNode Node; EAssert(IsNode(NId, Node)); 00434 TIntV OutNodesV; Pool.GetV(Node.OutVId, OutNodesV); 00435 EAssert(OutNIdV.Len() == OutNodesV.Len()); 00436 memcpy(OutNodesV.BegI(), OutNIdV.BegI(), sizeof(TInt)*OutNIdV.Len()); 00437 } 00438 00439 template <class TNodeData, bool IsDir> 00440 void TBigNet<TNodeData, IsDir>::GetInNIdV(int NId, TIntV& InNIdV) const { 00441 TNode Node; EAssertR(IsNode(NId, Node), TStr::Fmt("Not node: NId=%d\n", NId).CStr()); 00442 Pool.GetV(Node.InVId, InNIdV); 00443 } 00444 00445 template <class TNodeData, bool IsDir> 00446 void TBigNet<TNodeData, IsDir>::GetOutNIdV(int NId, TIntV& OutNIdV) const { 00447 TNode Node; EAssert(IsNode(NId, Node)); 00448 Pool.GetV(Node.OutVId, OutNIdV); 00449 } 00450 00451 // Node is deleted by setting edge endpoints to point to node id -1 (DelNId) 00452 // No memory is freed 00453 template <class TNodeData, bool IsDir> 00454 int TBigNet<TNodeData, IsDir>::IsolateNode(int NId) { 00455 TIntV OutV; 00456 int NDel = 0; 00457 // out-edges 00458 GetOutNIdV(NId, OutV); 00459 for (int i = 0; i < OutV.Len(); i++) { 00460 if (OutV[i] == DelNId) { break; } // because they are sorted 00461 // fast implementation 00462 const TNode& N = NodeH.GetDat(OutV[i]); 00463 int *InNIdV = (int *) Pool.GetValVPt(N.InVId); 00464 const int Deg = Pool.GetVLen(N.InVId); 00465 int* Val = (int *) BinSearch(InNIdV, InNIdV+Deg, NId); 00466 if (Val == NULL) { 00467 printf("BAD: Can't find: OUT: NId: %d -- OutNbrId: %d\n", NId, OutV[i]()); 00468 continue; 00469 } 00470 IAssert(Val != 0); 00471 memcpy(Val, Val+1, sizeof(int)*int(InNIdV+Deg-Val)); 00472 *(InNIdV+Deg-1) = DelNId; NDel++; 00473 } 00474 OutV.PutAll(DelNId); 00475 // in-edges 00476 if (IsDir) { 00477 TIntV InV; 00478 GetInNIdV(NId, InV); 00479 for (int i = 0; i < InV.Len(); i++) { 00480 if (InV[i] == DelNId) { break; } 00481 // fast implementation 00482 const TNode& N = NodeH.GetDat(InV[i]); 00483 int *OutNIdV = (int *) Pool.GetValVPt(N.OutVId); 00484 const int Deg = Pool.GetVLen(N.OutVId); 00485 int* Val = (int *) BinSearch(OutNIdV, OutNIdV+Deg, NId); 00486 if (Val == NULL) { 00487 printf("IN: NId: %d -- InNbrId: %d\n", NId, OutV[i]()); 00488 continue; 00489 } 00490 IAssert(Val != 0); 00491 memcpy(Val, Val+1, sizeof(int)*int(OutNIdV+Deg-Val)); 00492 *(OutNIdV+Deg-1) = DelNId; NDel++; 00493 } 00494 InV.PutAll(DelNId); 00495 } 00496 return NDel; 00497 } 00498 00499 // set neigbors to point to DelNId, delete node from the node table 00500 template <class TNodeData, bool IsDir> 00501 int TBigNet<TNodeData, IsDir>::DelNode(int NId) { 00502 const int DelEdges = IsolateNode(NId); 00503 NodeH.DelKey(NId); 00504 return DelEdges; 00505 } 00506 00507 template <class TNodeData, bool IsDir> 00508 bool TBigNet<TNodeData, IsDir>::IsIsoNode(const int& NId) const { 00509 if (NId == DelNId) { return true; } 00510 TIntV OutV; 00511 GetOutNIdV(NId, OutV); 00512 if (OutV.Empty() || OutV[0] == DelNId) { return true; } 00513 return false; 00514 } 00515 00516 // the number deleted edges 00517 template <class TNodeData, bool IsDir> 00518 uint TBigNet<TNodeData, IsDir>::GetDelEdges() { 00519 uint DelEdges = 0; 00520 TIntV OutV; 00521 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00522 const int NId = NI.GetId(); 00523 GetOutNIdV(NId, OutV); 00524 for (int i = 0; i < OutV.Len(); i++) { 00525 if (OutV[i] == DelNId) { DelEdges++; } 00526 } 00527 } 00528 return DelEdges; 00529 } 00530 00531 template <class TNodeData, bool IsDir> 00532 void TBigNet<TNodeData, IsDir>::CompactEdgePool() { 00533 Pool.CompactPool(DelNId); 00534 } 00535 00536 template <class TNodeData, bool IsDir> 00537 int TBigNet<TNodeData, IsDir>::AddEdge(const int& SrcNId, const int& DstNId) { 00538 TNode Src; IAssert(IsNode(SrcNId, Src)); 00539 int* OutV = (int *)Pool.GetValVPt(Src.OutVId); 00540 const int OutVLen = Pool.GetVLen(Src.OutVId); 00541 Assert(BinSearch(OutV, OutV+OutVLen, DstNId)==NULL); // no edge yet 00542 AddSorted(OutV, OutV+OutVLen, DstNId); 00543 if (! OnlySources()) { 00544 TNode Dst; IAssert(IsNode(DstNId, Dst)); 00545 int* InV = (int *)Pool.GetValVPt(Dst.InVId); 00546 const int InVLen = Pool.GetVLen(Dst.InVId); 00547 AddSorted(InV, InV+InVLen, SrcNId); 00548 } 00549 return -1; // edge id 00550 } 00551 00552 template <class TNodeData, bool IsDir> 00553 bool TBigNet<TNodeData, IsDir>::IsEdge(const int& SrcNId, const int& DstNId, const bool& Dir) const { 00554 TNode Src, Dst; 00555 if (! IsNode(SrcNId, Src)) { return false; } // no source node 00556 int* OutV1 = (int *)Pool.GetValVPt(Src.OutVId); 00557 const bool IsEdge = BinSearch(OutV1, OutV1+Pool.GetVLen(Src.OutVId), DstNId) != NULL; 00558 if (IsEdge && OnlySources()) { return true; } 00559 const bool IsDstNode = IsNode(DstNId, Dst); 00560 if (! IsDstNode) { return false; } // no destination node 00561 else if (IsEdge) { return true; } // destination and link found 00562 else if (! Dir) { // check for the undirected version of the edge 00563 int* OutV2 = (int *)Pool.GetValVPt(Dst.OutVId); 00564 return BinSearch(OutV2, OutV2+Pool.GetVLen(Dst.OutVId), SrcNId) != NULL; } 00565 else { return false; } 00566 } 00567 00568 template <class TNodeData, bool IsDir> 00569 void TBigNet<TNodeData, IsDir>::GetNIdV(TIntV& NIdV) const { 00570 NIdV.Reserve(GetNodes(), 0); 00571 for (typename TNodeH::TIter I = NodeH.BegI(); I < NodeH.EndI(); I++) { 00572 NIdV.Add(I->Key); } 00573 } 00574 00575 template <class TNodeData, bool IsDir> 00576 void TBigNet<TNodeData, IsDir>::SortEdgeV() { 00577 printf("Sorting Edges... "); 00578 TExeTm ExeTm; 00579 TIntV OutV, InV; 00580 int cnt = 0; 00581 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00582 const int NId = NI.GetId(); 00583 GetOutNIdV(NId, OutV); 00584 OutV.Sort(); 00585 if (IsDir) { 00586 GetInNIdV(NId, InV); 00587 InV.Sort(); 00588 } 00589 if (++cnt % Mega(1) == 0) { printf("\r sort:%dm %s", cnt/Mega(1), ExeTm.GetStr()); } 00590 } 00591 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00592 const int NId = NI.GetId(); 00593 GetOutNIdV(NId, OutV); 00594 IAssert(OutV.IsSorted()); 00595 GetInNIdV(NId, InV); 00596 IAssert(InV.IsSorted()); 00597 if (++cnt % Mega(1) == 0) { printf("\r check sorted:%dm %s", cnt/Mega(1), ExeTm.GetStr()); } 00598 } 00599 printf("done. [%s]\n", ExeTm.GetStr()); 00600 } 00601 00602 // add missing nodes and in-links from a network of sources 00603 template <class TNodeData, bool IsDir> 00604 void TBigNet<TNodeData, IsDir>::InvertFromSources(uint ExpectNodes) { 00605 typedef THash<TInt, TInt> TDegHash; 00606 typedef int* TIntPt; 00607 if (ExpectNodes == 0) ExpectNodes=4*GetNodes(); 00608 printf("\nInverting BigNet: reserved for %s nodes\n", TInt::GetMegaStr(ExpectNodes).CStr()); 00609 CAssert(IsDir); 00610 IAssert(OnlySources()); 00611 TDegHash InDegH(ExpectNodes); 00612 TSize NDest=0; 00613 // get node in-degree 00614 uint c=0; TExeTm ExeTm; 00615 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) { 00616 for (int e = 0; e < NI.GetOutDeg(); e++) { 00617 InDegH.AddDat(NI.GetOutNId(e)) += 1; } 00618 if (c%100000==0) printf("\r%s h:%d [%g] ", TInt::GetMegaStr(c).CStr(), InDegH.Len(), ExeTm.GetSecs()); 00619 } 00620 printf("\n Resizing NodePool: %lld -> %lld\n", uint64(Pool.Reserved()), uint64(GetEdges())); 00621 if (2*GetEdges() > Pool.Reserved()) { 00622 Pool.Reserve(2*GetEdges()); } 00623 // add nodes 00624 printf("NodeH: %s nodes, InDeg: %s nodes, Reserve: %s\n", TInt::GetMegaStr(NodeH.Len()).CStr(), 00625 TInt::GetMegaStr(InDegH.Len()).CStr(), TInt::GetMegaStr(ExpectNodes).CStr()); 00626 NodeH.Reserve(ExpectNodes); 00627 for (TDegHash::TIter I = InDegH.BegI(); I < InDegH.EndI(); I++) { 00628 const int NId = I->Key, InDeg = I->Dat; 00629 if (! IsNode(NId)) { 00630 AddNode(NId, InDeg, 0); NDest++; } // new destination node, no out-links 00631 else { 00632 TNode& Node = GetNode(NId); 00633 IAssert(Node.InVId == 0); // no in-links 00634 Node.InVId = Pool.AddEmptyV(InDeg); } 00635 } 00636 InDegH.Clr(true); 00637 printf("Added: %lld destination nodes\n", uint64(NDest)); 00638 printf("Graph nodes: %lld nodes\n", uint64(GetNodes())); 00639 // pointer to in-links vector 00640 THash<TInt, int*> NIdToPtH(GetNodes()); 00641 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) 00642 NIdToPtH.AddDat(NI.GetId(), (int *)Pool.GetValVPt(NI.GetInVId())); 00643 // add in-edges 00644 printf("Adding edges...\n"); 00645 c = 0; 00646 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) { 00647 const int SrcNId = NI.GetId(); 00648 for (int e = 0; e < NI.GetOutDeg(); e++) { 00649 TIntPt& InV = NIdToPtH.GetDat(NI.GetOutNId(e)); 00650 IAssert(*InV == TInt::Mx); 00651 *InV = SrcNId; InV++; 00652 } 00653 if (c%100000==0) printf("\r%s [%g] ", TInt::GetMegaStr(c).CStr(), ExeTm.GetSecs()); 00654 } 00655 // sort in-links 00656 printf("\nSorting in-links...\n"); 00657 TIntV InNIdV; c = 0; 00658 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, c++) { 00659 Pool.GetV(NI.GetInVId(), InNIdV); 00660 InNIdV.Sort(); 00661 if (c%100000==0) printf("\r%s [%g] ", TInt::GetMegaStr(c).CStr(), ExeTm.GetSecs()); 00662 } 00663 printf("\r...done [%g]\n", ExeTm.GetSecs()); 00664 Flags.Excl(gfSources); 00665 } 00666 00667 template <class TNodeData, bool IsDir> 00668 void TBigNet<TNodeData, IsDir>::Rewire(TRnd& Rnd) { 00669 uint64 NDup=0, NResolve=0, NAddit=0, cnt = 0; 00670 TExeTm ExeTm; 00671 IAssertR(! IsDir, "Only undirected networks are supported"); 00672 printf("Rewiring the network... 1"); 00673 Pool.ShuffleAll(Rnd); printf("[%s]\n", ExeTm.GetStr()); 00674 //Pool.ShuffleAll(Rnd); printf(" done [%s]\n", ExeTm.GetStr()); 00675 printf(" sorting edges...\n"); 00676 SortEdgeV(); // sort individual edge vectors 00677 printf(" done [%s]\n", ExeTm.GetStr()); 00678 // remove duplicates and self edges 00679 printf(" removing duplicates...\n"); 00680 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, cnt++) { 00681 const int VId = NI.GetOutVId(); 00682 int Len = Pool.GetVLen(VId); 00683 int* V = (int *)Pool.GetValVPt(VId); 00684 for (int i = 0; i < Len-1 && V[i]!=DelNId; i++) { 00685 if (V[i] == V[i+1] || V[i]==NI.GetId()) { 00686 memcpy(V+i, V+i+1, sizeof(int)*(Len-i-1)); i--; 00687 V[Len-1] = DelNId; NDup++; 00688 } 00689 } 00690 if (Len > 0 && V[Len-1]==NI.GetId()) { V[Len-1] = DelNId; NDup++; } 00691 if (cnt % Mega(10) == 0) { printf("."); fflush(stdout); } 00692 } 00693 printf("\n %llu duplicate edges removed [%s]\n", NDup, ExeTm.GetStr()); 00694 if (OnlySources()) { return; } 00695 // resolve one way edges 00696 printf(" resolving one way edges...\n"); cnt=0; fflush(stdout); 00697 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, cnt++) { 00698 for (int e = 0; e < NI.GetOutDeg(); e++) { 00699 if (NI.GetOutNId(e) == DelNId) { continue; } // skip deleted nodes 00700 const int InVId = GetNode(NI.GetOutNId(e)).InVId; 00701 const int InVLen = Pool.GetVLen(InVId); IAssert(InVLen>=0 && InVLen < Mega(3)); 00702 int* InV = (int *) Pool.GetValVPt(InVId); 00703 int* Val = (int *) BinSearch2(InV, InV+InVLen, NI.GetId()); 00704 if (*Val == NI.GetId()) { continue; } // points back, everything is ok 00705 if (InVLen>0 && InV[InVLen-1] == DelNId) { // there is space at the end, insert 00706 IAssert((InVLen-(Val-InV)-1) >= 0); 00707 memmove(Val+1, Val, sizeof(int)*(InVLen-(Val-InV)-1)); 00708 *Val = NI.GetId(); 00709 } else { // the other end could point at us but it doesn't 00710 memmove(NI.OutNIdV+e, NI.OutNIdV+e+1, sizeof(int)*(NI.GetOutDeg()-e-1)); 00711 NI.OutNIdV[NI.GetOutDeg()-1]=DelNId; e--; 00712 } 00713 NResolve++; 00714 } 00715 if (cnt % Mega(10) == 0) { printf("."); fflush(stdout); } 00716 } 00717 printf("\n %llu resolved edges [%s]\n", NResolve, ExeTm.GetStr()); 00718 // check if there are two nodes that still have space and are not yet connected and connect them 00719 printf(" filling empty edge slots...\n"); 00720 TIntPrV SlotNIdV; 00721 THash<TInt, TInt> SlotNIdH; 00722 int CumSlots=0; 00723 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00724 if (NI.GetOutNId(NI.GetOutDeg()-1) == DelNId) { 00725 int NSlots = 0; 00726 for (int s = NI.GetOutDeg()-1; s >= 0; s--) { 00727 if (NI.GetOutNId(s) == DelNId) { NSlots++; } 00728 else { break; } 00729 } 00730 SlotNIdV.Add(TIntPr(NI.GetId(), NSlots)); 00731 SlotNIdH.AddDat(NI.GetId(), NSlots); 00732 CumSlots+=NSlots; 00733 } 00734 } 00735 printf(" %d nodes, with %d spokes available, %d edges\n", SlotNIdH.Len(), CumSlots, CumSlots/2); 00736 TIntV NIdV; SlotNIdH.GetKeyV(NIdV); 00737 for (int ni1 = 0; ni1 < NIdV.Len(); ni1++) { 00738 const int nid = NIdV[ni1]; 00739 if (! SlotNIdH.IsKey(nid) || SlotNIdH.GetDat(nid) == 0) { continue; } 00740 const int NI1Slots = SlotNIdH.GetDat(nid); 00741 if ((SlotNIdH.GetMxKeyIds() - SlotNIdH.Len())/double(SlotNIdH.GetMxKeyIds()) > 0.5) { SlotNIdH.Defrag(); } 00742 TNodeI NI = GetNI(nid); 00743 for (int NTries = 0; NTries < 4*NI1Slots && NI.GetOutNId(NI.GetOutDeg()-1) == DelNId; NTries++) { 00744 const int nid2 = SlotNIdH.GetKey(SlotNIdH.GetRndKeyId(Rnd)); 00745 if (nid == nid2) { continue; } 00746 TNodeI NI2 = GetNI(nid2); 00747 // insert the edge 00748 if (NI2.GetOutNId(NI2.GetOutDeg()-1)==DelNId && ! NI2.IsInNId(NI.GetId())) { 00749 int *V1 = (int *) BinSearch2(NI.OutNIdV, NI.OutNIdV+NI.GetOutDeg(), NI2.GetId()); 00750 int *V2 = (int *) BinSearch2(NI2.InNIdV, NI2.InNIdV+NI2.GetInDeg(), NI.GetId()); 00751 if (*V1 != DelNId) { memmove(V1+1, V1, sizeof(int)*(NI.GetOutDeg()-(V1-NI.OutNIdV)-1)); } 00752 if (*V2 != DelNId) { memmove(V2+1, V2, sizeof(int)*(NI2.GetInDeg()-(V2-NI2.InNIdV)-1)); } 00753 *V1 = NI2.GetId(); *V2 = NI.GetId(); 00754 NAddit++; 00755 SlotNIdH.GetDat(nid)--; SlotNIdH.GetDat(nid2)--; 00756 } 00757 if (SlotNIdH.GetDat(nid2) == 0) { SlotNIdH.DelKey(nid2); continue; } 00758 if (SlotNIdH.GetDat(nid) == 0) { SlotNIdH.DelKey(nid); break; } 00759 } 00760 if (ni1 % Mega(1) == 0) { printf("."); fflush(stdout); } 00761 } 00762 printf(" %llu edges added.\nDONE. TOTAL REWIRE TIME [%s]\n\n", NAddit, ExeTm.GetStr()); 00763 } 00764 00765 template <class TNodeData, bool IsDir> 00766 PNGraph TBigNet<TNodeData, IsDir>::GetNGraph(const bool& RenumberNodes) const { 00767 IAssert(RenumberNodes == false); 00768 PNGraph Graph = TNGraph::New(); 00769 Graph->Reserve(GetNodes(), 0); 00770 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 00771 Graph->AddNode(NI.GetId(), Pool, NI.GetInVId(), NI.GetOutVId()); 00772 } 00773 return Graph; 00774 } 00775 00776 template <class TNodeData, bool IsDir> 00777 PNGraph TBigNet<TNodeData, IsDir>::GetSubNGraph(const TIntV& NIdV) const { 00778 PNGraph Graph = TNGraph::New(NIdV.Len(), 0); 00779 // add nodes 00780 for (int i = 0; i < NIdV.Len(); i++) { 00781 Graph->AddNode(NIdV[i]); } 00782 // reserve node in- and out-degree 00783 for (int i = 0; i < NIdV.Len(); i++) { 00784 const int SrcNId = NIdV[i]; 00785 const TNodeI NI = GetNI(SrcNId); 00786 int InDeg=0, OutDeg=0; 00787 for (int e = 0; e < NI.GetInDeg(); e++) { if (Graph->IsNode(NI.GetInNId(e))) InDeg++; } 00788 for (int e = 0; e < NI.GetOutDeg(); e++) { if (Graph->IsNode(NI.GetOutNId(e))) OutDeg++; } 00789 Graph->ReserveNIdInDeg(SrcNId, InDeg); 00790 Graph->ReserveNIdOutDeg(SrcNId, OutDeg); 00791 } 00792 // add edges 00793 for (int i = 0; i < NIdV.Len(); i++) { 00794 const int SrcNId = NIdV[i]; 00795 const TNodeI NI = GetNI(SrcNId); 00796 for (int e = 0; e < NI.GetOutDeg(); e++) { 00797 const int DstNId = NI.GetOutNId(e); 00798 if (Graph->IsNode(DstNId)) { 00799 Graph->AddEdge(SrcNId, DstNId); } 00800 } 00801 } 00802 return Graph; 00803 } 00804 00805 template <class TNodeData, bool IsDir> 00806 TPt<TBigNet<TNodeData, IsDir> > TBigNet<TNodeData, IsDir>::GetSubGraph(const TIntV& NIdV, const bool& RenumberNodes) const { 00807 const bool isDir = IsDir, onlySources = HasFlag(gfSources); 00808 TSize Edges=0; 00809 // find in- and out-degree 00810 TSparseHash<TInt, TIntPr> NIdDegH(NIdV.Len()); 00811 for (int i = 0; i < NIdV.Len(); i++) { NIdDegH.AddDat(NIdV[i]); } 00812 for (int i = 0; i < NIdV.Len(); i++) { 00813 const TNodeI NI = GetNI(NIdV[i]); 00814 int InDeg=0, OutDeg=0; 00815 for (int n = 0; n < NI.GetOutDeg(); n++) { 00816 if (NIdDegH.IsKey(NI.GetOutNId(n))) OutDeg++; } 00817 if (! onlySources && isDir) { 00818 for (int n = 0; n < NI.GetInDeg(); n++) { 00819 if (NIdDegH.IsKey(NI.GetInNId(n))) InDeg++; } 00820 } 00821 Edges += OutDeg; 00822 NIdDegH.GetDat(NIdV[i]) = TIntPr(InDeg, OutDeg); 00823 } 00824 // make network 00825 typedef TBigNet<TNodeData, IsDir> TBNet; 00826 TPt<TBNet> NewNetPt = TBNet::New(NIdV.Len(), Edges, HasFlag(gfDirected)); 00827 TBNet& NewNet = *NewNetPt; 00828 NewNet.Flags = Flags; 00829 // add nodes 00830 if (isDir || onlySources) { 00831 for (int i = 0; i < NIdV.Len(); i++) { 00832 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00833 NewNet.AddNode(NIdV[i], Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00834 } else { 00835 for (int i = 0; i < NIdV.Len(); i++) { 00836 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00837 NewNet.AddUndirNode(NIdV[i], Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00838 } 00839 // add edges 00840 for (int i = 0; i < NIdV.Len(); i++) { 00841 int NId = NIdV[i]; 00842 const TNodeI NI = GetNI(NId); 00843 int *NIdVPt = (int *) NewNet.GetOutNIdVPt(NId); 00844 int deg = 0; 00845 for (int e = 0; e < NI.GetOutDeg(); e++) { 00846 const int Dst = NI.GetOutNId(e); 00847 if (NewNet.IsNode(Dst)) { 00848 *NIdVPt = Dst; NIdVPt++; deg++; } 00849 } 00850 EAssert(deg == NIdDegH.GetDat(NId).Val2); 00851 if (isDir && ! onlySources) { 00852 EAssert((NI.GetInVId()==NI.GetOutVId() && NI.GetInVId()==0) 00853 || (NI.GetInVId() != NI.GetOutVId())); 00854 int * NIdVPt = (int *) NewNet.GetInNIdVPt(NId); 00855 int deg = 0; 00856 for (int e = 0; e < NI.GetInDeg(); e++) { 00857 const int Dst = NI.GetInNId(e); 00858 if (NewNet.IsNode(Dst)) { 00859 *NIdVPt = Dst; NIdVPt++; deg++; } 00860 } 00861 EAssert(deg == NIdDegH.GetDat(NId).Val1); 00862 } 00863 } 00864 return NewNetPt; 00865 } 00866 00867 template <class TNodeData, bool IsDir> 00868 void TBigNet<TNodeData, IsDir>::GetSubGraph(const TIntV& NIdV, TBigNet* NewNetPt, const bool& RenumberNodes) const { 00869 printf("Set subgraph on %d nodes\n", NIdV.Len()); TExeTm ExeTm; 00870 const bool isDir = IsDir, onlySources = HasFlag(gfSources); 00871 TSize Edges=0; 00872 // find in- and out-degree 00873 THash<TInt, TIntPr> NIdDegH(NIdV.Len()); 00874 for (int i = 0; i < NIdV.Len(); i++) { NIdDegH.AddDat(NIdV[i]); } 00875 for (int i = 0; i < NIdV.Len(); i++) { 00876 const TNodeI NI = GetNI(NIdV[i]); 00877 int InDeg=0, OutDeg=0; 00878 for (int n = 0; n < NI.GetOutDeg(); n++) { 00879 if (NIdDegH.IsKey(NI.GetOutNId(n))) OutDeg++; } 00880 if (! onlySources && isDir) { 00881 for (int n = 0; n < NI.GetInDeg(); n++) { 00882 if (NIdDegH.IsKey(NI.GetInNId(n))) InDeg++; } 00883 } 00884 Edges += OutDeg; 00885 NIdDegH.GetDat(NIdV[i]) = TIntPr(InDeg, OutDeg); 00886 } 00887 // make network 00888 //typedef TBigNet<TNodeData, IsDir> TBNet; 00889 //TPt<TBNet> NewNetPt = TBNet::New(NIdV.Len(), Edges, HasFlag(gfDirected)); 00890 NewNetPt->Reserve(NIdV.Len(), Edges); 00891 TBigNet<TNodeData, IsDir>& NewNet = *NewNetPt; 00892 NewNet.Flags = Flags; 00893 TIntSet NIdMap; 00894 // add nodes 00895 if (! RenumberNodes) { 00896 if (isDir || onlySources) { 00897 for (int i = 0; i < NIdV.Len(); i++) { 00898 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00899 NewNet.AddNode(NIdV[i], Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00900 } else { 00901 for (int i = 0; i < NIdV.Len(); i++) { 00902 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00903 NewNet.AddUndirNode(NIdV[i], Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00904 } 00905 } else { // renumber nodes 00906 NIdMap.Gen(NIdV.Len()); 00907 for (int i = 0; i < NIdV.Len(); i++) { NIdMap.AddKey(NIdV[i]); } 00908 if (isDir || onlySources) { 00909 for (int i = 0; i < NIdV.Len(); i++) { 00910 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00911 NewNet.AddNode(NIdMap.GetKeyId(NIdV[i]), Deg.Val1, Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00912 } else { 00913 for (int i = 0; i < NIdV.Len(); i++) { 00914 const TIntPr& Deg = NIdDegH.GetDat(NIdV[i]); 00915 NewNet.AddUndirNode(NIdMap.GetKeyId(NIdV[i]), Deg.Val2, GetNDat(NIdV[i])); } // also copy the node data 00916 } 00917 } 00918 // add edges 00919 for (int i = 0; i < NIdV.Len(); i++) { 00920 int NId = NIdV[i]; 00921 const TNodeI NI = GetNI(NId); 00922 int *NIdVPt = (int *) NewNet.GetOutNIdVPt(RenumberNodes?NIdMap.GetKeyId(NId):NId); 00923 int deg = 0; 00924 for (int e = 0; e < NI.GetOutDeg(); e++) { 00925 const int Dst = RenumberNodes?NIdMap.GetKeyId(NI.GetOutNId(e)):NI.GetOutNId(e); 00926 if (NewNet.IsNode(Dst)) { 00927 *NIdVPt = Dst; NIdVPt++; deg++; } 00928 } 00929 EAssert(deg == NIdDegH.GetDat(NId).Val2); 00930 if (isDir && ! onlySources) { 00931 EAssert((NI.GetInVId()==NI.GetOutVId() && NI.GetInVId()==0) 00932 || (NI.GetInVId() != NI.GetOutVId())); 00933 int * NIdVPt = (int *) NewNet.GetInNIdVPt(RenumberNodes?NIdMap.GetKeyId(NId):NId); 00934 int deg = 0; 00935 for (int e = 0; e < NI.GetInDeg(); e++) { 00936 const int Dst = RenumberNodes?NIdMap.GetKeyId(NI.GetInNId(e)):NI.GetInNId(e); 00937 if (NewNet.IsNode(Dst)) { 00938 *NIdVPt = Dst; NIdVPt++; deg++; } 00939 } 00940 EAssert(deg == NIdDegH.GetDat(NId).Val1); 00941 } 00942 } 00943 printf(" %u edges [%s]\n", uint(Edges), ExeTm.GetStr()); 00944 } 00945 00946 template <class TNodeData, bool IsDir> 00947 void TBigNet<TNodeData, IsDir>::Reserve(const int& Nodes, const TSize& Edges) { 00948 NodeH.Gen(TMath::Mx(int(1.1*Nodes), 100)); 00949 Pool = TVPool(TMath::Mx(Edges, (TSize) Mega(1)), Mega(100), true); 00950 } 00951 00952 // check that in- and out-edges matchs, the table is sorted and so on 00953 template <class TNodeData, bool IsDir> 00954 bool TBigNet<TNodeData, IsDir>::IsOk() const { 00955 // check the node pool 00956 TIntV ValV; TExeTm ExeTm; 00957 printf("Is ok network:\n Check Vec..."); 00958 for (uint id = 1; id < Pool.GetVecs(); id++) { 00959 Pool.GetV(id, ValV); 00960 if (! ValV.Empty()) { 00961 EAssert((0<=ValV[0] && ValV[0]<MxNId) || ValV[0]==DelNId); 00962 try { 00963 for (int i = 1; i < ValV.Len(); i++) { 00964 //if (ValV[i]==DelNId) { continue; } 00965 // sorted & no duplicates & no empty values (can have deleted nodes) 00966 EAssertR((ValV[i-1]<ValV[i]) || (ValV[i-1]==ValV[i] && ValV[i]==DelNId), 00967 TStr::Fmt("NId1: %d NId2:%d", ValV[i-1](),ValV[i]())); 00968 EAssertR((0<=ValV[i] && ValV[i]<MxNId) || ValV[i]==DelNId, 00969 TStr::Fmt("NId1: %dm MxNId: %d", ValV[i](), MxNId)); 00970 if (! OnlySources()) { 00971 EAssertR(IsNode(ValV[i]) || ValV[i]==DelNId, 00972 TStr::Fmt("NId1: %dm MxNId: %d", ValV[i](), MxNId)); } // every link is a node 00973 } 00974 } catch (PExcept Except){ 00975 printf(" %s\n", Except->GetStr().CStr()); 00976 printf(" vec id: %d, lenght: %d\n", id, ValV.Len()); 00977 for (int i = 1; i < ValV.Len(); i++) { 00978 printf(" %d", ValV[i]()); 00979 if (!((ValV[i-1]<ValV[i]) || (ValV[i-1]==ValV[i] && ValV[i]==DelNId))) { printf("*"); } 00980 } 00981 printf("\n"); 00982 return false; 00983 } 00984 } 00985 if (id % 10000 == 0) { 00986 printf("\r %dk / %dk [%s]", id/1000, Pool.GetVecs()/1000, ExeTm.GetStr()); } 00987 } 00988 printf("[%s]\n Check Edges...\n", ExeTm.GetStr()); 00989 // check edges 00990 int ErrCnt = 0; 00991 if (! OnlySources()) { 00992 int Cnt=0; 00993 for (TNodeI NI = BegNI(); NI < EndNI(); NI++, Cnt++) { 00994 // nodes that point to NI, have it on out-list 00995 for (int e = 0; e < NI.GetInDeg(); e++) { 00996 if (NI.GetInNId(e) == DelNId) { continue; } // skip deleted nodes 00997 if (! IsEdge(NI.GetInNId(e), NI.GetId())) { 00998 printf("\nno edge: %d --> %d", NI.GetInNId(e), NI.GetId()); 00999 printf("NId: %d deg %d,%d\n", NI.GetId(), NI.GetOutDeg(), NI.GetInDeg()); 01000 for (int i = 0; i < NI.GetInDeg(); i++) { printf(" %d", NI.GetInNId(i)); } 01001 TNodeI NI2 = GetNI(NI.GetInNId(e)); 01002 printf("\nNId2: %d deg %d,%d\n", NI2.GetId(), NI2.GetOutDeg(), NI2.GetInDeg()); 01003 for (int j = 0; j < NI2.GetOutDeg(); j++) { printf(" %d", NI2.GetOutNId(j)); } 01004 printf("\n"); 01005 ErrCnt++; 01006 } 01007 //EAssertR(IsEdge(NI.GetInNId(e), NI.GetId()), 01008 // TStr::Fmt("no edge: %d --> %d", NI.GetInNId(e), NI.GetId())); 01009 } 01010 // nodes NI points to, have it on in-list 01011 for (int e = 0; e < NI.GetOutDeg(); e++) { 01012 if (NI.GetOutNId(e) == DelNId) { continue; } 01013 const int InVId = GetNode(NI.GetOutNId(e)).InVId; 01014 int* DstInV = (int *)Pool.GetValVPt(InVId); 01015 if (BinSearch(DstInV, DstInV+Pool.GetVLen(InVId), NI.GetId()) == NULL) { 01016 printf("no edge: %d --> %d", NI.GetId(), NI.GetInNId(e)); 01017 printf("NId: %d deg %d,%d\n", NI.GetId(), NI.GetOutDeg(), NI.GetInDeg()); 01018 for (int i = 0; i < NI.GetOutDeg(); i++) { printf(" %d", NI.GetOutNId(i)); } 01019 TNodeI NI2 = GetNI(NI.GetOutNId(e)); 01020 printf("\nNId2: %d deg %d,%d\n", NI2.GetId(), NI2.GetOutDeg(), NI2.GetInDeg()); 01021 for (int j = 0; j < NI2.GetInDeg(); j++) { printf(" %d", NI2.GetInNId(j)); } 01022 printf("\n"); ErrCnt++; 01023 } 01024 //EAssertR(BinSearch(DstInV, DstInV+Pool.GetVLen(InVId), NI.GetId()) != NULL, 01025 // TStr::Fmt("no edge: %d --> %d", NI.GetId(), NI.GetInNId(e))); 01026 } 01027 if (ErrCnt > 5) { FailR("error count too large!"); } 01028 if (Cnt % 100000 == 0) { 01029 printf("\r%s [%s]", TInt::GetMegaStr(Cnt).CStr(), ExeTm.GetStr()); } 01030 } 01031 printf("\r%s [%s]\n", TInt::GetMegaStr(Cnt).CStr(), ExeTm.GetStr()); 01032 } 01033 return true; 01034 } 01035 01036 template <class TNodeData, bool IsDir> 01037 void TBigNet<TNodeData, IsDir>::Dump(const TStr& Desc) const { 01038 if (! Desc.Empty()) { printf("\n%s (%d, %u):\n", Desc.CStr(), GetNodes(), GetEdges()); } 01039 else { printf("\nNodes: %d, Edges: %u\n", GetNodes(), GetEdges()); } 01040 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 01041 printf("%d]\n IN %d]", NI.GetId(), NI.GetInDeg()); 01042 for (int i=0; i<NI.GetInDeg(); i++) { printf(" %d", NI.GetInNId(i)); } 01043 if (IsDir) { 01044 printf("\n OUT %d]", NI.GetOutDeg()); 01045 for (int i=0; i<NI.GetOutDeg(); i++) { printf(" %d", NI.GetOutNId(i)); } 01046 } 01047 printf("\n"); 01048 } 01049 } 01050 01051 // header: <Nodes, Edges, IsDir> 01052 // format: undirected <NId, Dat, OutDeg, OutNodeV> 01053 // format: directed <NId, Dat, OutDeg, OutNodeV, InDeg, InNodeV> 01054 template <class TNodeData, bool IsDir> 01055 void TBigNet<TNodeData, IsDir>::SaveForDisk(const TStr& OutFNm) const { 01056 const bool IsDirected = IsDir; 01057 TFOut FOut(OutFNm); 01058 FOut.Save(GetNodes()); 01059 FOut.Save(GetEdges()); 01060 FOut.Save(IsDirected); 01061 for (TNodeI NI = BegNI(); NI < EndNI(); NI++) { 01062 FOut.Save(NI.GetId()); 01063 NI.GetDat().Save(FOut); 01064 FOut.Save(NI.GetOutDeg()); 01065 for (int i = 0; i < NI.GetOutDeg(); i++) { 01066 FOut.Save(NI.GetOutNId(i)); } 01067 if (IsDirected) { 01068 FOut.Save(NI.GetInDeg()); 01069 for (int i = 0; i < NI.GetInDeg(); i++) { 01070 FOut.Save(NI.GetInNId(i)); } 01071 } 01072 } 01073 } 01074 01075 // skip the edge pool and only load the node data hash table 01076 template <class TNodeData, bool IsDir> 01077 void TBigNet<TNodeData, IsDir>::LoadNodeDatH(const TStr& InFNm, TNodeH& NodeH) { 01078 TFIn SIn(InFNm); 01079 TInt MxNId(SIn); 01080 TB32Set Flags(SIn); 01081 printf("skipping Pool...\n"); 01082 TBool FastCopy(SIn); 01083 uint64 _GrowBy, _MxVals, _Vals; 01084 SIn.Load(_GrowBy); SIn.Load(_MxVals); SIn.Load(_Vals); 01085 TInt EmptyVal(SIn); 01086 int Tmp; 01087 for (TSize ValN = 0; ValN < _Vals; ValN++) { SIn.Load(Tmp); } 01088 TInt MxVals(SIn), Vals(SIn); 01089 uint64 Offset; 01090 for (int ValN = 0; ValN < Vals; ValN++) { SIn.Load(Offset); } 01091 printf("loading Hode hash table...\n"); 01092 NodeH.Load(SIn); 01093 } 01094 01095 // Save the network without loading it. Save the node hash table as THash or TSparseHash 01096 template <class TNodeData, bool IsDir> 01097 void TBigNet<TNodeData, IsDir>::SaveToDisk(const TStr& InFNm, const TStr& OutFNm, const bool& SaveSparseHash) { 01098 TFIn FIn(InFNm); 01099 TFOut FOut(OutFNm); 01100 { TInt MxNId(FIn); MxNId.Save(FOut); 01101 TB32Set Flags(FIn); Flags.Save(FOut); 01102 TVPool Pool(FIn); Pool.Save(FOut); } 01103 { TNodeH NodeH(FIn); 01104 if (! SaveSparseHash) { 01105 THash<TInt, TNode> NewH(NodeH.Len(), true); 01106 for (typename TNodeH::TIter I = NodeH.BegI(); I < NodeH.EndI(); I++) { 01107 NewH.AddDat(I->Key, I->Dat); 01108 } 01109 NewH.Save(FOut); 01110 } else { 01111 FailR("Not implemented"); 01112 } } 01113 }