SNAP Library 3.0, User Reference  2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
TGraphAnf< PGraph > Class Template Reference

#include <anf.h>

Public Member Functions

 TGraphAnf (const PGraph &GraphPt, const int &Approx=32, const int &moreBits=5, const int &RndSeed=0)
 
uint64 GetNIdOffset (const int &NId) const
 
void InitAnfBits (TAnfBitV &BitV)
 
void Union (TAnfBitV &BitV1, const uint64 &NId1Offset, TAnfBitV &BitV2, const uint64 &NId2Offset) const
 
double AvgLstZero (const TAnfBitV &BitV, const uint64 &NIdOffset) const
 
double GetCount (const TAnfBitV &BitV, const uint64 &NIdOffset) const
 
void GetNodeAnf (const int &SrcNId, TIntFltKdV &DistNbrsV, const int &MxDist, const bool &IsDir)
 
void GetGraphAnf (TIntFltKdV &DistNbrsV, const int &MxDist, const bool &IsDir)
 

Private Types

typedef TVec< uint64TAnfBitV
 

Private Member Functions

 UndefDefaultCopyAssign (TGraphAnf)
 

Private Attributes

THash< TInt, uint64NIdToBitPosH
 
TInt NApprox
 
TInt NBits
 
TInt MoreBits
 
TInt ApproxBytes
 
PGraph Graph
 
TRnd Rnd
 

Detailed Description

template<class PGraph>
class TGraphAnf< PGraph >

Approximate Neighborhood Function. Implements the algorithm for computing the diameter of a given graph. The method is based on the approximate counting argument by Flajolet-Martin. For more details see C. R. Palmer, P. B. Gibbons and C. Faloutsos, ANF: A Fast and Scalable Tool for Data Mining in Massive Graphs, KDD 2002 (http://www.cs.cmu.edu/~christos/PUBLICATIONS/kdd02-anf.ps.gz) See TSnap::TestAnf() for example of how to use the class.

Definition at line 33 of file anf.h.

Member Typedef Documentation

template<class PGraph>
typedef TVec<uint64> TGraphAnf< PGraph >::TAnfBitV
private

Definition at line 35 of file anf.h.

Constructor & Destructor Documentation

template<class PGraph>
TGraphAnf< PGraph >::TGraphAnf ( const PGraph &  GraphPt,
const int &  Approx = 32,
const int &  moreBits = 5,
const int &  RndSeed = 0 
)
inline

Definition at line 44 of file anf.h.

44  :
45  NApprox(Approx), MoreBits(moreBits), Graph(GraphPt), Rnd(RndSeed) { IAssert(NApprox%8==0); IAssert(sizeof(uint)==4); }
TRnd Rnd
Definition: anf.h:40
#define IAssert(Cond)
Definition: bd.h:262
PGraph Graph
Definition: anf.h:39
unsigned int uint
Definition: bd.h:11
TInt MoreBits
Definition: anf.h:38
TInt NApprox
Definition: anf.h:37

Member Function Documentation

template<class PGraph >
double TGraphAnf< PGraph >::AvgLstZero ( const TAnfBitV BitV,
const uint64 NIdOffset 
) const

Definition at line 107 of file anf.h.

107  { //average least zero bit position (least significant zero)
108  int approx, bit, AvgBitPos = 0;
109  uchar* BitVPt = (uchar *) BitV.BegI();
110  for (approx = 0; approx < NApprox; approx++) {
111  for (bit = 0; bit < NBits; bit++) {
112  if ((*(BitVPt+NIdOffset + ApproxBytes*bit + approx/8) & (1<<(approx%8))) == 0) break; } // found zero
113  if (bit > NBits) bit = NBits;
114  AvgBitPos += bit;
115  }
116  return AvgBitPos/double(NApprox) ;
117 }
TInt ApproxBytes
Definition: anf.h:38
TInt NBits
Definition: anf.h:38
unsigned char uchar
Definition: bd.h:10
TInt NApprox
Definition: anf.h:37
template<class PGraph>
double TGraphAnf< PGraph >::GetCount ( const TAnfBitV BitV,
const uint64 NIdOffset 
) const
inline

Definition at line 50 of file anf.h.

50  {
51  return pow(2.0, AvgLstZero(BitV, NIdOffset)) / 0.77351; }
double AvgLstZero(const TAnfBitV &BitV, const uint64 &NIdOffset) const
Definition: anf.h:107
template<class PGraph >
void TGraphAnf< PGraph >::GetGraphAnf ( TIntFltKdV DistNbrsV,
const int &  MxDist,
const bool &  IsDir 
)

Returns the number of pairs of nodes reachable in less than H hops. For example, DistNbrsV.GetDat(0) is the number of nodes in the graph, DistNbrsV.GetDat(1) is the number of nodes+edges and so on.

Parameters
DistNbrsVMaps between the distance H (in hops) and the number of nodes reachable in <=H hops.
MxDistMaximum number of hops the algorithm spreads from SrcNId.
IsDirfalse: consider links as undirected (drop link directions).

Definition at line 148 of file anf.h.

148  {
149  TAnfBitV CurBitsV, LastBitsV;
150  InitAnfBits(CurBitsV); IAssert(CurBitsV.BegI() != NULL);
151  LastBitsV.Gen(CurBitsV.Len()); IAssert(LastBitsV.BegI() != NULL);
152  int v, e;
153  double NPairs = 0.0;
154  DistNbrsV.Clr();
155  DistNbrsV.Add(TIntFltKd(0, Graph->GetNodes()));
156  DistNbrsV.Add(TIntFltKd(1, Graph->GetEdges()));
157  //TExeTm ExeTm;
158  for (int dist = 2; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) {
159  //printf("ANF dist %d...", dist); ExeTm.Tick();
160  memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV;
161  for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
162  const uint64 NIdOffset = GetNIdOffset(NI.GetId());
163  for (e = 0; e < NI.GetOutDeg(); e++) {
164  const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e));
165  Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
166  }
167  if (! IsDir) {
168  for (e = 0; e < NI.GetInDeg(); e++) {
169  const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e));
170  Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
171  }
172  }
173  }
174  NPairs = 0.0;
175  for (v = NIdToBitPosH.FFirstKeyId(); NIdToBitPosH.FNextKeyId(v); ) {
176  NPairs += GetCount(CurBitsV, NIdToBitPosH[v]);
177  }
178  DistNbrsV.Add(TIntFltKd(dist, NPairs));
179  //printf("pairs: %g %s\n", NPairs, ExeTm.GetTmStr());
180  if (NPairs == 0) { break; }
181  if (DistNbrsV.Len() > 1 && NPairs < 1.001*DistNbrsV.LastLast().Dat) { break; } // 0.1% change
182  //TGnuPlot::SaveTs(DistNbrsV, "hops.tab", "HOPS, REACHABLE PAIRS");
183  }
184 }
#define IAssert(Cond)
Definition: bd.h:262
PGraph Graph
Definition: anf.h:39
unsigned int uint
Definition: bd.h:11
static const int Mx
Definition: dt.h:1049
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
TKeyDat< TInt, TFlt > TIntFltKd
Definition: ds.h:380
void Union(TAnfBitV &BitV1, const uint64 &NId1Offset, TAnfBitV &BitV2, const uint64 &NId2Offset) const
Definition: anf.h:99
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:971
unsigned long long uint64
Definition: bd.h:38
const TVal & LastLast() const
Returns a reference to the one before last element of the vector.
Definition: ds.h:557
bool FNextKeyId(int &KeyId) const
Definition: hash.h:436
void InitAnfBits(TAnfBitV &BitV)
Definition: anf.h:67
int FFirstKeyId() const
Definition: hash.h:236
uint64 GetNIdOffset(const int &NId) const
Definition: anf.h:46
THash< TInt, uint64 > NIdToBitPosH
Definition: anf.h:36
TVec< uint64 > TAnfBitV
Definition: anf.h:35
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
double GetCount(const TAnfBitV &BitV, const uint64 &NIdOffset) const
Definition: anf.h:50
template<class PGraph>
uint64 TGraphAnf< PGraph >::GetNIdOffset ( const int &  NId) const
inline

Definition at line 46 of file anf.h.

46 { return NIdToBitPosH.GetDat(NId); }
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:220
THash< TInt, uint64 > NIdToBitPosH
Definition: anf.h:36
template<class PGraph >
void TGraphAnf< PGraph >::GetNodeAnf ( const int &  SrcNId,
TIntFltKdV DistNbrsV,
const int &  MxDist,
const bool &  IsDir 
)

Returns the number of nodes reachable from SrcNId in less than H hops.

Parameters
SrcNIdStarting node.
DistNbrsVMaps between the distance H (in hops) and the number of nodes reachable in <=H hops.
MxDistMaximum number of hops the algorithm spreads from SrcNId.
IsDirfalse: consider links as undirected (drop link directions).

Definition at line 120 of file anf.h.

120  {
121  //const int NNodes = Graph->GetNodes();
122  TAnfBitV CurBitsV, LastBitsV;
123  InitAnfBits(CurBitsV); IAssert(CurBitsV.BegI() != NULL);
124  LastBitsV.Gen(CurBitsV.Len()); IAssert(LastBitsV.BegI() != NULL);
125  DistNbrsV.Clr();
126  DistNbrsV.Add(TIntFltKd(1, Graph->GetNI(SrcNId).GetOutDeg()));
127  for (int dist = 1; dist < (MxDist==-1 ? TInt::Mx : MxDist); dist++) {
128  memcpy(LastBitsV.BegI(), CurBitsV.BegI(), sizeof(uint)*CurBitsV.Len()); //LastBitsV = CurBitsV;
129  for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
130  const uint64 NIdOffset = GetNIdOffset(NI.GetId());
131  for (int e = 0; e < NI.GetOutDeg(); e++) {
132  const uint64 NId2Offset = GetNIdOffset(NI.GetOutNId(e));
133  Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
134  }
135  if (! IsDir) {
136  for (int e = 0; e < NI.GetInDeg(); e++) {
137  const uint64 NId2Offset = GetNIdOffset(NI.GetInNId(e));
138  Union(CurBitsV, NIdOffset, LastBitsV, NId2Offset);
139  }
140  }
141  }
142  DistNbrsV.Add(TIntFltKd(dist, GetCount(CurBitsV, GetNIdOffset(SrcNId))));
143  if (DistNbrsV.Len() > 1 && DistNbrsV.Last().Dat < 1.001*DistNbrsV[DistNbrsV.Len()-2].Dat) break; // 0.1% change
144  }
145 }
#define IAssert(Cond)
Definition: bd.h:262
PGraph Graph
Definition: anf.h:39
unsigned int uint
Definition: bd.h:11
static const int Mx
Definition: dt.h:1049
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
TKeyDat< TInt, TFlt > TIntFltKd
Definition: ds.h:380
void Union(TAnfBitV &BitV1, const uint64 &NId1Offset, TAnfBitV &BitV2, const uint64 &NId2Offset) const
Definition: anf.h:99
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:971
unsigned long long uint64
Definition: bd.h:38
void InitAnfBits(TAnfBitV &BitV)
Definition: anf.h:67
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:551
uint64 GetNIdOffset(const int &NId) const
Definition: anf.h:46
TVec< uint64 > TAnfBitV
Definition: anf.h:35
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
double GetCount(const TAnfBitV &BitV, const uint64 &NIdOffset) const
Definition: anf.h:50
template<class PGraph >
void TGraphAnf< PGraph >::InitAnfBits ( TAnfBitV BitV)

Definition at line 67 of file anf.h.

67  {
68  const int NNodes = Graph->GetNodes();
69  const int LogNodes = int(ceil(TMath::Log2(NNodes)));
70  ApproxBytes = NApprox / 8;
71  NBits = LogNodes + MoreBits; // bits per node
72  const int BytesPerNd = ApproxBytes * NBits; // total bytes per node
73  int64 VSize = ((static_cast<int64>(NNodes) * static_cast<int64>(BytesPerNd))/sizeof(uint)) + 1;
74  IAssertR(VSize <= TInt::Mx,
75  TStr::Fmt("Your graph is too large for Approximate Neighborhood Function, %s is larger than %d",
76  TUInt64::GetStr(VSize).CStr(),TInt::Mx));
77  printf("size %d\n", static_cast<int>(VSize));
78  BitV.Gen((const int)VSize); IAssert(BitV.BegI() != NULL);
79  BitV.PutAll(0);
80  int SetBit = 0;
81  uint64 NodeOff = 0;
82  uchar* BitVPt = (uchar *) BitV.BegI();
83  // for each node: 1st bits of all approximations are at BitV[Offset+0], 2nd bits at BitV[Offset+NApprox/32], ...
84  for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI != Graph->EndNI(); NI++) {
85  NIdToBitPosH.AddDat(NI.GetId(), NodeOff);
86  // init vertex bits
87  for (int approx = 0; approx < NApprox; approx++) {
88  const int RndNum = Rnd.GetUniDevInt();
89  for (SetBit = 0; (RndNum & (1<<SetBit))==0 && SetBit < NBits; SetBit++) { }
90  if (SetBit >= NBits) SetBit = NBits-1;
91  const int BitPos = ApproxBytes * SetBit + approx / 8;
92  *(BitVPt + NodeOff + BitPos) |= (1<<(approx % 8)); // magically works better than code below (see anf.c)
93  }
94  NodeOff += BytesPerNd;
95  }
96 }
TRnd Rnd
Definition: anf.h:40
#define IAssert(Cond)
Definition: bd.h:262
PGraph Graph
Definition: anf.h:39
#define IAssertR(Cond, Reason)
Definition: bd.h:265
unsigned int uint
Definition: bd.h:11
static const int Mx
Definition: dt.h:1049
TInt ApproxBytes
Definition: anf.h:38
TInt NBits
Definition: anf.h:38
unsigned long long uint64
Definition: bd.h:38
unsigned char uchar
Definition: bd.h:10
TStr GetStr() const
Definition: dt.h:1270
long long int64
Definition: bd.h:27
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
static double Log2(const double &Val)
Definition: xmath.h:15
THash< TInt, uint64 > NIdToBitPosH
Definition: anf.h:36
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
TDat & AddDat(const TKey &Key)
Definition: hash.h:196
TInt MoreBits
Definition: anf.h:38
TInt NApprox
Definition: anf.h:37
template<class PGraph>
TGraphAnf< PGraph >::UndefDefaultCopyAssign ( TGraphAnf< PGraph >  )
private
template<class PGraph >
void TGraphAnf< PGraph >::Union ( TAnfBitV BitV1,
const uint64 NId1Offset,
TAnfBitV BitV2,
const uint64 NId2Offset 
) const

Definition at line 99 of file anf.h.

99  {
100  uchar* DstI = (uchar *) BitV1.BegI() + NId1Offset;
101  uchar* SrcI = (uchar *) BitV2.BegI() + NId2Offset;
102  for (int b=0; b < ApproxBytes*NBits; b++, DstI++, SrcI++) { *DstI |= *SrcI; }
103 }
TInt ApproxBytes
Definition: anf.h:38
TInt NBits
Definition: anf.h:38
unsigned char uchar
Definition: bd.h:10

Member Data Documentation

template<class PGraph>
TInt TGraphAnf< PGraph >::ApproxBytes
private

Definition at line 38 of file anf.h.

template<class PGraph>
PGraph TGraphAnf< PGraph >::Graph
private

Definition at line 39 of file anf.h.

template<class PGraph>
TInt TGraphAnf< PGraph >::MoreBits
private

Definition at line 38 of file anf.h.

template<class PGraph>
TInt TGraphAnf< PGraph >::NApprox
private

Definition at line 37 of file anf.h.

template<class PGraph>
TInt TGraphAnf< PGraph >::NBits
private

Definition at line 38 of file anf.h.

template<class PGraph>
THash<TInt, uint64> TGraphAnf< PGraph >::NIdToBitPosH
private

Definition at line 36 of file anf.h.

template<class PGraph>
TRnd TGraphAnf< PGraph >::Rnd
private

Definition at line 40 of file anf.h.


The documentation for this class was generated from the following file: