SNAP Library 6.0, Developer Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
sim.cpp
Go to the documentation of this file.
1 
3  int lenA = NI1.GetOutDeg();
4  int lenB = NI2.GetOutDeg();
5  int ct = 0;
6  int j = 0;
7  int i = 0;
8  while (i < lenA && j < lenB) {
9  if (NI1.GetOutNId(i) == NI2.GetOutNId(j)) {
10  ct++; i++; j++;
11  } else if (NI1.GetOutNId(i) > NI2.GetOutNId(j)) {
12  j++;
13  } else {
14  i++;
15  }
16  }
17  return ct*1.0/(lenA+lenB-ct);
18 
19 }
20 
21 void MergeNbrs(TIntV* NeighbourV, TIntV* list1, TNGraph::TNodeI NI2) {
22  int j = 0;
23  int k = 0;
24  int prev = -1;
25  int lenA = list1->Len();
26 
27  int lenB = NI2.GetInDeg();
28  if (lenA > 0 && lenB > 0) {
29  int v1 = (*list1)[j];
30  int v2 = NI2.GetInNId(k);
31  while (1) {
32  if (v1 <= v2) {
33  if (prev != v1) {
34  NeighbourV->Add(v1);
35  prev = v1;
36  }
37  j += 1;
38  if (j >= lenA) {
39  break;
40  }
41  v1 = (*list1)[j];
42  } else {
43  if (prev != v2) {
44  NeighbourV->Add(v2);
45  prev = v2;
46  }
47  k += 1;
48  if (k >= lenB) {
49  break;
50  }
51  v2 = NI2.GetInNId(k);
52  }
53  }
54  }
55  while (j < lenA) {
56  int v = (*list1)[j];
57  if (prev != v) {
58  NeighbourV->Add(v);
59  prev = v;
60  }
61  j += 1;
62  }
63  while (k < lenB) {
64  int v = NI2.GetInNId(k);
65  if (prev != v) {
66  NeighbourV->Add(v);
67  prev = v;
68  }
69  k += 1;
70  }
71 }
72 
73 PNGraph GetBiGraph(PTable P, int index_col_1, int index_col_2) {
74  TVec<TPair<TStr, TAttrType>, int > S = P->GetSchema();
75  PNGraph Graph = TSnap::ToGraph<PNGraph>(P, S[index_col_1].GetVal1(), S[index_col_2].GetVal1(), aaFirst);
76  return Graph;
77 }
78 
79 #ifdef GCC_ATOMIC
81  PNEANet KNN = TNEANet::New();
82  TIntV NIdV;
83  Graph->GetNIdV (NIdV);
84  int size = NIdV.Len();
85  for (int ind = 0; ind < size; ind++) {
86  KNN->AddNode(NIdV[ind]);
87  }
88  KNN->AddFltAttrE("sim");
89  TVec<TVec<TPair<TFlt, TInt>, int >, int > TopKList;
90  TVec<TVec<TPair<TFlt, TInt>, int >, int > ThTopK; // for each thread
91  TIntV NodeList;
92  TIntV ThNodeList;// for each thread
93  int NumThreads = omp_get_max_threads();
94  omp_set_num_threads(NumThreads);
95  #pragma omp parallel private(ThNodeList, ThTopK)
96  {
97  TIntV* Neighbors_old = new TIntV();
98  TIntV* Neighbors = new TIntV();
99  TIntV* temp;
100 
101  #pragma omp for schedule(dynamic,1000)
102  for (int ind = 0; ind < size; ind++) {
103  TNGraph::TNodeI NI = Graph->GetNI(NIdV[ind]);
104  if (NI.GetInDeg() > 0) {
105  continue;
106  }
107  if (NI.GetOutDeg() == 0) {
108  continue;
109  }
110 
111  TVec<TPair<TFlt, TInt>, int > TopK;
112  for (int i = 0; i < K; i++) {
113  TopK.Add(TPair<TFlt,TInt>(0.0, -1));
114  }
115 
116  Neighbors->Clr(false);
117  Neighbors_old->Clr(false);
118 
119  for (int i = 0; i < NI.GetOutDeg(); i++) {
120  TNGraph::TNodeI Inst_NI = Graph->GetNI(NI.GetOutNId(i));
121  MergeNbrs(Neighbors, Neighbors_old, Inst_NI);
122 
123  temp = Neighbors_old;
124  temp->Clr(false);
125  Neighbors_old = Neighbors;
126  Neighbors = temp;
127  }
128 
129  // Swap neighbors and Neighbors_old
130 
131  temp = Neighbors_old;
132  Neighbors_old = Neighbors;
133  Neighbors = temp;
134  for(int j = 0; j< Neighbors->Len(); j++) {
135 
136  TNGraph::TNodeI Auth_NI = Graph->GetNI((*Neighbors)[j]);
137 
138  float similarity = JaccardSim(NI, Auth_NI);
139  if (TopK[K-1].GetVal1() < similarity) {
140  int index = 0;
141  for (int i = K-2; i >= 0; i--)
142  if (TopK[i].GetVal1() < similarity) {
143  TopK.SetVal(i+1, TopK[i]);
144  } else {
145  index = i+1;
146  break;
147  }
148  TopK.SetVal(index, TPair<TFlt, TInt>(similarity, (*Neighbors)[j]));
149  }
150  }
151 
152  ThTopK.Add(TopK);
153  ThNodeList.Add(NIdV[ind]);
154 
155 // if (ct%10000 == 0)
156 // cout<<ct<<" avg neighbor degree = "<<sum_neighbors*1.0/ct<<" "<<currentDateTime()<<endl;
157 
158  }
159  #pragma omp critical
160  {
161  for (int j = 0; j < ThTopK.Len(); j++) {
162  TopKList.Add(ThTopK[j]);
163  NodeList.Add(ThNodeList[j]);
164  }
165  }
166  }
167 
168  int size2 = NodeList.Len();
169  for (int i= 0; i < size2 ; i++) {
170 
171  for (int j = 0; j < K; j++) {
172  if (TopKList[i][j].GetVal2() <= -1) {
173  break;
174  }
175  int EId = KNN->AddEdge(NodeList[i], TopKList[i][j].GetVal2());
176  KNN->AddFltAttrDatE(EId, TopKList[i][j].GetVal1(), "sim");
177  }
178  }
179  return KNN;
180 }
181 #endif
182 
183 PNEANet KNNJaccard(PNGraph Graph, int K) {
184  PNEANet KNN = TNEANet::New();
185 
186  int sum_neighbors = 0;
187  int ct = 0;
188  int end;
189  end = Graph->GetNodes();
190  TIntV* Neighbors_old = new TIntV();
191  TIntV* Neighbors = new TIntV();
192  TIntV* temp;
193  TIntV NIdV;
194  Graph->GetNIdV (NIdV);
195  int size = NIdV.Len();
196  for (int ind = 0; ind < size; ind++) {
197  KNN->AddNode(NIdV[ind]);
198  }
199  KNN->AddFltAttrE("sim");
200 
201  for (int ind = 0; ind < size; ind++) {
202  TNGraph::TNodeI NI = Graph->GetNI(NIdV[ind]);
203  if (NI.GetInDeg() > 0) {
204  continue;
205  }
206  if (NI.GetOutDeg() == 0) {
207  continue;
208  }
209  ct ++;
210 
211  TVec<TPair<TFlt, TInt> > TopK;
212  for (int i = 0; i < K; i++) {
213  TopK.Add(TPair<TFlt,TInt>(0.0, -1));
214  }
215 
216  Neighbors->Clr(false);
217  Neighbors_old->Clr(false);
218 
219  for (int i = 0; i < NI.GetOutDeg(); i++) {
220  TNGraph::TNodeI Inst_NI = Graph->GetNI(NI.GetOutNId(i));
221  MergeNbrs(Neighbors, Neighbors_old, Inst_NI);
222 
223  temp = Neighbors_old;
224  temp->Clr(false);
225  Neighbors_old = Neighbors;
226  Neighbors = temp;
227  }
228  int num = Neighbors_old->Len();
229  sum_neighbors += num;
230 
231  //Swap neighbors and Neighbors_old
232 
233  temp = Neighbors_old;
234  Neighbors_old = Neighbors;
235  Neighbors = temp;
236  for (int j = 0; j< Neighbors->Len(); j++) {
237 
238  TNGraph::TNodeI Auth_NI = Graph->GetNI((*Neighbors)[j]);
239 
240  float similarity = JaccardSim(NI, Auth_NI);
241  if (TopK[K-1].GetVal1() < similarity) {
242  int index = 0;
243  for (int i = K-2; i >= 0; i--)
244  if (TopK[i].GetVal1() < similarity) {
245  TopK.SetVal(i+1, TopK[i]);
246  } else {
247  index = i+1;
248  break;
249  }
250  TopK.SetVal(index, TPair<TFlt, TInt>(similarity, (*Neighbors)[j]));
251  }
252  }
253 
254  for (int i = 0; i < K; i++) {
255  int EId = KNN->AddEdge(NI.GetId(), TopK[i].GetVal2());
256  KNN->AddFltAttrDatE(EId, TopK[i].GetVal1(), "sim");
257  }
258 
259 // if (ct%10000 == 0)
260 // cout<<ct<<" avg neighbor degree = "<<sum_neighbors*1.0/ct<<" "<<currentDateTime()<<endl;
261 
262  }
263 
264  return KNN;
265 }
266 
float JaccardSim(TNGraph::TNodeI NI1, TNGraph::TNodeI NI2)
Definition: sim.cpp:2
PNEANet KNNJaccard(PNGraph Graph, int K)
Definition: sim.cpp:183
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
void MergeNbrs(TIntV *NeighbourV, TIntV *list1, TNGraph::TNodeI NI2)
Definition: sim.cpp:21
PNGraph GetBiGraph(PTable P, int index_col_1, int index_col_2)
Definition: sim.cpp:73
Definition: table.h:257
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
PNEANet KNNJaccardParallel(PNGraph Graph, int K)
Definition: sim.cpp:80
void SetVal(const TSizeTy &ValN, const TVal &Val)
Sets the value of element at position ValN to Val.
Definition: ds.h:653
Definition: ds.h:32
int GetId() const
Returns ID of the current node.
Definition: graph.h:400
int GetOutDeg() const
Returns out-degree of the current node.
Definition: graph.h:406
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:383
TVec< TInt > TIntV
Definition: ds.h:1594
Definition: bd.h:196
int GetInDeg() const
Returns in-degree of the current node.
Definition: graph.h:404
int GetInNId(const int &NodeN) const
Returns ID of NodeN-th in-node (the node pointing to the current node).
Definition: graph.h:412
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
static PNEANet New()
Static cons returns pointer to graph. Ex: PNEANet Graph=TNEANet::New().
Definition: network.h:2226
int GetOutNId(const int &NodeN) const
Returns ID of NodeN-th out-node (the node the current node points to).
Definition: graph.h:416