SNAP Library, Developer Reference
2012-10-15 15:06:59
SNAP, a general purpose network analysis and graph mining library
|
00001 00002 // Approximate Neighborhood Function. 00003 namespace TSnap { 00010 template <class PGraph> void GetAnf(const PGraph& Graph, const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox=32); 00017 template <class PGraph> void GetAnf(const PGraph& Graph, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox=32); 00020 template <class PGraph> double GetAnfEffDiam(const PGraph& Graph, const bool& IsDir, const double& Percentile, const int& NApprox); 00023 template <class PGraph> double GetAnfEffDiam(const PGraph& Graph, const int NRuns=1, int NApprox=-1); 00024 } // namespace TSnap 00025 00032 template <class PGraph> 00033 class TGraphAnf { 00034 private: 00035 typedef TVec<uint64> TAnfBitV; 00036 THash<TInt, uint64> NIdToBitPosH; // NId to byte(!) offset in BitV 00037 TInt NApprox; // maintain N parallel approximations (multiple of 8) 00038 TInt NBits, MoreBits, ApproxBytes; // NBits=logNodes+MoreBits; MoreBits: additional R bits; ApproxBytes: Approx/8; 00039 PGraph Graph; 00040 TRnd Rnd; 00041 private: 00042 UndefDefaultCopyAssign(TGraphAnf); 00043 public: 00044 TGraphAnf(const PGraph& GraphPt, const int& Approx=32, const int& moreBits=5, const int& RndSeed=0) : 00045 NApprox(Approx), MoreBits(moreBits), Graph(GraphPt), Rnd(RndSeed) { IAssert(NApprox%8==0); IAssert(sizeof(uint)==4); } 00046 uint64 GetNIdOffset(const int& NId) const { return NIdToBitPosH.GetDat(NId); } 00047 void InitAnfBits(TAnfBitV& BitV); 00048 void Union(TAnfBitV& BitV1, const uint64& NId1Offset, TAnfBitV& BitV2, const uint64& NId2Offset) const; 00049 double AvgLstZero(const TAnfBitV& BitV, const uint64& NIdOffset) const; 00050 double GetCount(const TAnfBitV& BitV, const uint64& NIdOffset) const { 00051 return pow(2.0, AvgLstZero(BitV, NIdOffset)) / 0.77351; } 00057 void GetNodeAnf(const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir); 00063 void GetGraphAnf(TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir); 00064 }; 00065 00066 template <class PGraph> 00067 void TGraphAnf<PGraph>::InitAnfBits(TAnfBitV& BitV) { 00068 const int NNodes = Graph->GetNodes(); 00069 const int LogNodes = int(ceil(TMath::Log2(NNodes))); 00070 ApproxBytes = NApprox / 8; 00071 NBits = LogNodes + MoreBits; // bits per node 00072 const int BytesPerNd = ApproxBytes * NBits; // total bytes per node 00073 BitV.Gen((NNodes * BytesPerNd)/sizeof(uint)+1); IAssert(BitV.BegI() != NULL); 00074 BitV.PutAll(0); 00075 int SetBit = 0; 00076 uint64 NodeOff = 0; 00077 uchar* BitVPt = (uchar *) BitV.BegI(); 00078 // for each node: 1st bits of all approximations are at BitV[Offset+0], 2nd bits at BitV[Offset+NApprox/32], ... 00079 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI != Graph->EndNI(); NI++) { 00080 NIdToBitPosH.AddDat(NI.GetId(), NodeOff); 00081 // init vertex bits 00082 for (int approx = 0; approx < NApprox; approx++) { 00083 const int RndNum = Rnd.GetUniDevInt(); 00084 for (SetBit = 0; (RndNum & (1<<SetBit))==0 && SetBit < NBits; SetBit++) { } 00085 if (SetBit >= NBits) SetBit = NBits-1; 00086 const int BitPos = ApproxBytes * SetBit + approx / 8; 00087 *(BitVPt + NodeOff + BitPos) |= (1<<(approx % 8)); // magically works better than code below (see anf.c) 00088 } 00089 NodeOff += BytesPerNd; 00090 } 00091 } 00092 00093 template <class PGraph> 00094 void TGraphAnf<PGraph>::Union(TAnfBitV& BitV1, const uint64& NId1Offset, TAnfBitV& BitV2, const uint64& NId2Offset) const { 00095 uchar* DstI = (uchar *) BitV1.BegI() + NId1Offset; 00096 uchar* SrcI = (uchar *) BitV2.BegI() + NId2Offset; 00097 for (int b=0; b < ApproxBytes*NBits; b++, DstI++, SrcI++) { *DstI |= *SrcI; } 00098 } 00099 00100 // Average least zero bit position (least significant zero) 00101 template <class PGraph> 00102 double TGraphAnf<PGraph>::AvgLstZero(const TAnfBitV& BitV, const uint64& NIdOffset) const { //average least zero bit position (least significant zero) 00103 int approx, bit, AvgBitPos = 0; 00104 uchar* BitVPt = (uchar *) BitV.BegI(); 00105 for (approx = 0; approx < NApprox; approx++) { 00106 for (bit = 0; bit < NBits; bit++) { 00107 if ((*(BitVPt+NIdOffset + ApproxBytes*bit + approx/8) & (1<<(approx%8))) == 0) break; } // found zero 00108 if (bit > NBits) bit = NBits; 00109 AvgBitPos += bit; 00110 } 00111 return AvgBitPos/double(NApprox) ; 00112 } 00113 00114 template <class PGraph> 00115 void TGraphAnf<PGraph>::GetNodeAnf(const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir) { 00116 //const int NNodes = Graph->GetNodes(); 00117 TAnfBitV CurBitsV, LastBitsV; 00118 InitAnfBits(CurBitsV); IAssert(CurBitsV.BegI() != NULL); 00119 LastBitsV.Gen(CurBitsV.Len()); IAssert(LastBitsV.BegI() != NULL); 00120 DistNbrsV.Clr(); 00121 DistNbrsV.Add(TIntFltKd(1, Graph->GetNI(SrcNId).GetOutDeg())); 00122 for (int dist = 1; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) { 00123 memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV; 00124 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00125 const uint64 NIdOffset = GetNIdOffset(NI.GetId()); 00126 for (int e = 0; e < NI.GetOutDeg(); e++) { 00127 const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e)); 00128 Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset); 00129 } 00130 if (! IsDir) { 00131 for (int e = 0; e < NI.GetInDeg(); e++) { 00132 const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e)); 00133 Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset); 00134 } 00135 } 00136 } 00137 DistNbrsV.Add(TIntFltKd(dist, GetCount(CurBitsV, GetNIdOffset(SrcNId)))); 00138 if (DistNbrsV.Len() > 1 && DistNbrsV.Last().Dat < 1.001*DistNbrsV[DistNbrsV.Len()-2].Dat) break; // 0.1% change 00139 } 00140 } 00141 00142 template <class PGraph> 00143 void TGraphAnf<PGraph>::GetGraphAnf(TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir) { 00144 TAnfBitV CurBitsV, LastBitsV; 00145 InitAnfBits(CurBitsV); IAssert(CurBitsV.BegI() != NULL); 00146 LastBitsV.Gen(CurBitsV.Len()); IAssert(LastBitsV.BegI() != NULL); 00147 int v, e; 00148 double NPairs = 0.0; 00149 DistNbrsV.Clr(); 00150 DistNbrsV.Add(TIntFltKd(0, Graph->GetNodes())); 00151 DistNbrsV.Add(TIntFltKd(1, Graph->GetEdges())); 00152 //TExeTm ExeTm; 00153 for (int dist = 2; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) { 00154 //printf("ANF dist %d...", dist); ExeTm.Tick(); 00155 memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV; 00156 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00157 const uint64 NIdOffset = GetNIdOffset(NI.GetId()); 00158 for (e = 0; e < NI.GetOutDeg(); e++) { 00159 const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e)); 00160 Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset); 00161 } 00162 if (! IsDir) { 00163 for (e = 0; e < NI.GetInDeg(); e++) { 00164 const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e)); 00165 Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset); 00166 } 00167 } 00168 } 00169 NPairs = 0.0; 00170 for (v = NIdToBitPosH.FFirstKeyId(); NIdToBitPosH.FNextKeyId(v); ) { 00171 NPairs += GetCount(CurBitsV, NIdToBitPosH[v]); 00172 } 00173 DistNbrsV.Add(TIntFltKd(dist, NPairs)); 00174 //printf("pairs: %g %s\n", NPairs, ExeTm.GetTmStr()); 00175 if (NPairs == 0) { break; } 00176 if (DistNbrsV.Len() > 1 && NPairs < 1.001*DistNbrsV.LastLast().Dat) { break; } // 0.1% change 00177 //TGnuPlot::SaveTs(DistNbrsV, "hops.tab", "HOPS, REACHABLE PAIRS"); 00178 } 00179 } 00181 // Approximate Neighborhood Function 00182 namespace TSnap { 00183 00184 namespace TSnapDetail { 00186 double CalcEffDiam(const TIntFltKdV& DistNbrsCdfV, const double& Percentile=0.9); 00188 double CalcEffDiam(const TFltPrV& DistNbrsCdfV, const double& Percentile=0.9); 00190 double CalcEffDiamPdf(const TIntFltKdV& DistNbrsPdfV, const double& Percentile=0.9); 00192 double CalcEffDiamPdf(const TFltPrV& DistNbrsPdfV, const double& Percentile=0.9); 00194 double CalcAvgDiamPdf(const TIntFltKdV& DistNbrsPdfV); 00196 double CalcAvgDiamPdf(const TFltPrV& DistNbrsPdfV); 00197 } // TSnapDetail 00198 00199 template <class PGraph> 00200 void GetAnf(const PGraph& Graph, const int& SrcNId, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox) { 00201 TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0); 00202 Anf.GetNodeAnf(SrcNId, DistNbrsV, MxDist, IsDir); 00203 } 00204 00205 template <class PGraph> 00206 void GetAnf(const PGraph& Graph, TIntFltKdV& DistNbrsV, const int& MxDist, const bool& IsDir, const int& NApprox) { 00207 TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0); 00208 Anf.GetGraphAnf(DistNbrsV, MxDist, IsDir); 00209 } 00210 00211 template <class PGraph> 00212 double GetAnfEffDiam(const PGraph& Graph, const bool& IsDir, const double& Percentile, const int& NApprox) { 00213 TIntFltKdV DistNbrsV; 00214 TGraphAnf<PGraph> Anf(Graph, NApprox, 5, 0); 00215 Anf.GetGraphAnf(DistNbrsV, -1, IsDir); 00216 return TSnap::TSnapDetail::CalcEffDiam(DistNbrsV, Percentile); 00217 } 00218 00219 template<class PGraph> 00220 double GetAnfEffDiam(const PGraph& Graph, const int NRuns, int NApprox) { 00221 //return TSnap::GetEffDiam(Graph, IsDir, 0.9, 32); 00222 TMom Mom; 00223 if (NApprox == -1) { 00224 if (Graph->GetNodes() < 100000) { NApprox = 64; } 00225 else if (Graph->GetNodes() < 1000000) { NApprox = 32; } 00226 else { NApprox = 16; } 00227 } 00228 const bool IsDir = false; 00229 for (int r = 0; r < NRuns; r++) { 00230 Mom.Add(TSnap::GetAnfEffDiam(Graph, IsDir, 0.9, NApprox)); 00231 } 00232 Mom.Def(); 00233 return Mom.GetMean(); 00234 } 00235 00236 template <class PGraph> void TestAnf() { 00237 PGraph Graph = PGraph::TObj::New(); 00238 //Graph: 00239 // 0 2 ----> 3 00240 // ^ | 00241 // | | 00242 // | ^ 00243 // 1 5 <---- 4 00244 for (int v = 0; v < 6; v++) { Graph->AddNode(v); } 00245 Graph->AddEdge(2, 3); 00246 Graph->AddEdge(3, 4); 00247 Graph->AddEdge(4, 5); 00248 Graph->AddEdge(5, 2); 00249 TFltV AnfV; 00250 for (int t = 0; t < 10; t++) { 00251 TGraphAnf<PGraph> Anf(Graph, 128, 5, t+1); 00252 TIntFltKdV DistToNbrsV; 00253 Anf.GetGraphAnf(DistToNbrsV, 5, true); 00254 printf("\n--seed: %d---------------------\n", t+1); 00255 for (int i = 0; i < DistToNbrsV.Len(); i++) { 00256 printf("dist: %d\t hops:%f\n", DistToNbrsV[i].Key(), DistToNbrsV[i].Dat()); 00257 } 00258 AnfV.Add(DistToNbrsV.Last().Dat); 00259 } 00260 TMom Mom(AnfV); 00261 printf("-----------\nAvgAnf: %f StDev: %f\n", Mom.GetMean(), Mom.GetSDev());//*/ 00262 // const int NApprox = 32; 00263 /*printf("\nANF vs. SAMPLE diam test (10 runs of ANF, NApprox=%d):\n", NApprox); 00264 //Graph = TGGen<PGraph>::GenGrid(20, 20); 00265 Graph = TGAlg::GetMxWcc(TGGen<PGraph>::GenRnd(1000, 10000)); 00266 TFltV FullAnf, EffAnf; 00267 for (int tryn = 0; tryn < 10; tryn++) { 00268 FullAnf.Add(GetEffDiam(Graph, false, 1.0, NApprox)); 00269 EffAnf.Add(GetEffDiam(Graph, false, 0.9, NApprox)); 00270 } 00271 TMom FullMom(FullAnf); 00272 TMom AnfMom(EffAnf); 00273 printf(" Sample FullDiam: %d\n", TGAlg::GetBfsFullDiam(Graph, 100, false)); 00274 printf(" Anf FullDiam: %f [%f]\n", FullMom.GetMean(), FullMom.GetSDev()); 00275 printf(" Sample EffDiam [90%%]: %f\n", TGAlg::GetBfsEffDiam(Graph, 100, false)); 00276 printf(" Anf EffDiam [90%%]: %f [%f]\n", AnfMom.GetMean(), AnfMom.GetSDev()); 00277 // epinions 00278 printf("\nEpinions graph:\n"); 00279 { typedef PNGraph PGraph; 00280 PGraph G = TGGen<PGraph>::GenEpinions(); 00281 TIntFltKdV DistToPairsV; 00282 GetAnf(G, DistToPairsV, 50, true); 00283 for(int i = 0; i < DistToPairsV.Len(); i++) { 00284 printf("\t%d\t%f\n", DistToPairsV[i].Key, DistToPairsV[i].Dat); } 00285 printf("\nUndir\n"); 00286 TAnf<PGraph>::GetAnf(G, DistToPairsV, 50, false); 00287 for(int j = 0; j < DistToPairsV.Len(); j++) { 00288 printf("\t%d\t%f\n", DistToPairsV[j].Key, DistToPairsV[j].Dat); } 00289 }//*/ 00290 } 00291 00292 } // namespace TSnap