SNAP Library 2.0, Developer Reference
2013-05-13 16:33:57
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <circles.h>
Public Member Functions | |
TCluster (PGraphAttributes GraphAttributes, TInt K, TFlt Lambda) | |
~TCluster () | |
void | Train (TInt OuterReps, TInt GradientReps, TInt MCMCReps) |
Train the model to predict K Clusters. | |
TVec< TIntSet > | GetCircles (void) |
Public Attributes | |
TCRef | CRef |
Private Member Functions | |
TFlt | LogLikelihood () |
Compute the log-likelihood of Parameters and cluster assignments. | |
TIntSet | MCMC (TInt k, TInt MCMCReps) |
Optimize the cluster assignments for the k'th cluster. | |
void | Gradient () |
Update partial derivatives of log-likelihood. | |
Private Attributes | |
TFlt * | Theta |
TFlt * | Derivative |
TVec< TIntSet > | CHat |
PGraphAttributes | GraphAttributes |
TInt | K |
TFlt | Lambda |
TCluster::TCluster | ( | PGraphAttributes | GraphAttributes, |
TInt | K, | ||
TFlt | Lambda | ||
) | [inline] |
GraphAttributes | attributed graph object with attributes |
K | number of communities to detect |
Lambda | regularization parameter |
Definition at line 35 of file circles.h.
References TVec< TVal, TSizeTy >::Add(), CHat, Derivative, K, TGraphAttributes::NFeatures, and Theta.
: GraphAttributes(GraphAttributes), K(K), Lambda(Lambda) { Theta = new TFlt[K * GraphAttributes->NFeatures]; Derivative = new TFlt[K * GraphAttributes->NFeatures]; for (int k = 0; k < K; k++) { for (int f = 0; f < GraphAttributes->NFeatures; f++) { Theta[k * GraphAttributes->NFeatures + f] = 0; Derivative[k * GraphAttributes->NFeatures + f] = 0; } // Clusters are initially empty. CHat.Add(TIntSet()); } }
TCluster::~TCluster | ( | ) | [inline] |
Definition at line 49 of file circles.h.
References Derivative, and Theta.
{ delete[] Theta; delete[] Derivative; }
TVec<TIntSet> TCluster::GetCircles | ( | void | ) | [inline] |
void TCluster::Gradient | ( | void | ) | [private] |
Update partial derivatives of log-likelihood.
Definition at line 456 of file circles.h.
References THash< TKey, TDat, THashFunc >::BegI(), CHat, Derivative, TGraphAttributes::EdgeFeatures, TGraphAttributes::G, THashKeyDatI< TKey, TDat >::GetDat(), GraphAttributes, Inner(), TUNGraph::IsEdge(), THashKeyDatI< TKey, TDat >::IsEnd(), K, Lambda, TGraphAttributes::NFeatures, Theta, TPair< TVal1, TVal2 >::Val1, and TPair< TVal1, TVal2 >::Val2.
Referenced by Train().
{ for (int i = 0; i < K * GraphAttributes->NFeatures; i++) { if (Theta[i] > 0) { Derivative[i] = -Lambda * Theta[i]; } else { Derivative[i] = Lambda * Theta[i]; } } for (THashKeyDatI<TIntPr, TIntIntH> it = GraphAttributes->EdgeFeatures.BegI(); not it.IsEnd(); it++) { TFlt InnerProduct = 0; TIntPr Edge = it.GetKey(); TInt Src = Edge.Val1; TInt Dst = Edge.Val2; TBool Exists = GraphAttributes->G->IsEdge(Src, Dst); for (int k = 0; k < K; k++) { TFlt d = CHat[k].IsKey(Src) and CHat[k].IsKey(Dst) ? 1 : -1; InnerProduct += d * Inner(it.GetDat(), Theta + k * GraphAttributes->NFeatures); } TFlt expinp = exp(InnerProduct); TFlt q = expinp / (1 + expinp); if (q != q) { q = 1; // Test for nan in case of overflow. } for (int k = 0; k < K; k++) { TBool d_ = CHat[k].IsKey(Src) and CHat[k].IsKey(Dst); TFlt d = d_ ? 1 : -1; for (THashKeyDatI<TInt, TInt> itf = it.GetDat().BegI(); not itf.IsEnd(); itf++) { TInt i = itf.GetKey(); TInt f = itf.GetDat(); if (Exists) { Derivative[k * GraphAttributes->NFeatures + i] += d * f; } Derivative[k * GraphAttributes->NFeatures + i] += -d * f * q; } } } }
TFlt TCluster::LogLikelihood | ( | void | ) | [private] |
Compute the log-likelihood of Parameters and cluster assignments.
Definition at line 499 of file circles.h.
References THash< TKey, TDat, THashFunc >::BegI(), CHat, TGraphAttributes::EdgeFeatures, TGraphAttributes::G, GraphAttributes, Inner(), TUNGraph::IsEdge(), THashKeyDatI< TKey, TDat >::IsEnd(), K, TGraphAttributes::NFeatures, Theta, TPair< TVal1, TVal2 >::Val1, and TPair< TVal1, TVal2 >::Val2.
Referenced by Train().
{ TFlt ll = 0; for (THashKeyDatI<TIntPr, TIntIntH> it = GraphAttributes->EdgeFeatures.BegI(); not it.IsEnd(); it++) { TFlt InnerProduct = 0; TIntPr Edge = it.GetKey(); TInt Src = Edge.Val1; TInt Dst = Edge.Val2; TBool Exists = GraphAttributes->G->IsEdge(Src, Dst); for (int k = 0; k < K; k++) { TFlt d = CHat[k].IsKey(Src) and CHat[k].IsKey(Dst) ? 1 : -1; InnerProduct += d * Inner(it.GetDat(), Theta + k * GraphAttributes->NFeatures); } if (Exists) { ll += InnerProduct; } TFlt ll_ = log(1 + exp(InnerProduct)); ll += -ll_; } if (ll != ll) { printf("ll isnan\n"); exit(1); } return ll; }
TIntSet TCluster::MCMC | ( | TInt | k, |
TInt | MCMCReps | ||
) | [private] |
Optimize the cluster assignments for the k'th cluster.
k | community index on which to run MCMC |
MCMCReps | number of iterations of MCMC |
Definition at line 358 of file circles.h.
References TVec< TVal, TSizeTy >::Add(), THash< TKey, TDat, THashFunc >::AddDat(), THashSet< TKey, THashFunc >::AddKey(), THash< TKey, TDat, THashFunc >::BegI(), CHat, TGraphAttributes::EdgeFeatures, TGraphAttributes::G, THash< TKey, TDat, THashFunc >::GetDat(), THash< TKey, TDat, THashFunc >::GetKeyId(), TRnd::GetUniDev(), GraphAttributes, Inner(), TUNGraph::IsEdge(), THashKeyDatI< TKey, TDat >::IsEnd(), K, TVec< TVal, TSizeTy >::Len(), TGraphAttributes::NFeatures, TGraphAttributes::NodeIDs, Theta, TPair< TVal1, TVal2 >::Val1, and TPair< TVal1, TVal2 >::Val2.
Referenced by Train().
{ TRnd t; THash<TInt, TFlt> CostNotIncludeHash; THash<TInt, TFlt> CostIncludeHash; TVec<TInt> NewLabel; int csize = 0; for (int i = 0; i < GraphAttributes->NodeIDs.Len(); i++) { if (CHat[k].IsKey(GraphAttributes->NodeIDs[i])) { NewLabel.Add(0); } else { NewLabel.Add(1); } if (CHat[k].IsKey(GraphAttributes->NodeIDs[i])) { csize++; } } // Compute edge log-probabilities. for (THashKeyDatI<TIntPr, TIntIntH> it = GraphAttributes->EdgeFeatures.BegI(); not it.IsEnd(); it++) { TIntPr e = it.GetKey(); TInt kv = GraphAttributes->EdgeFeatures.GetKeyId(e); TInt Src = e.Val1; TInt Dst = e.Val2; TBool Exists = GraphAttributes->G->IsEdge(Src, Dst); TFlt InnerProduct = Inner(it.GetDat(), Theta + k * GraphAttributes->NFeatures); TFlt Other = 0; for (int l = 0; l < K; l++) { if (l == k) { continue; } TFlt d = (CHat[l].IsKey(Src) and CHat[l].IsKey(Dst)) ? 1 : -1; Other += d * Inner(it.GetDat(), Theta + l * GraphAttributes->NFeatures); } TFlt CostNotInclude; TFlt CostInclude; if (Exists) { CostNotInclude = -Other + InnerProduct + log(1 + exp(Other - InnerProduct)); CostInclude = -Other - InnerProduct + log(1 + exp(Other + InnerProduct)); } else { CostNotInclude = log(1 + exp(Other - InnerProduct)); CostInclude = log(1 + exp(Other + InnerProduct)); } CostNotIncludeHash.AddDat(kv) = -CostNotInclude; CostIncludeHash.AddDat(kv) = -CostInclude; } // Run MCMC using precomputed probabilities TFlt InitialTemperature = 1.0; // Initial temperature for (int r = 2; r < MCMCReps + 2; r++) { TFlt Temperature = InitialTemperature / log(r); for (int n = 0; n < GraphAttributes->NodeIDs.Len(); n++) { TFlt l0 = 0; TFlt l1 = 0; for (int np = 0; np < GraphAttributes->NodeIDs.Len(); np++) { if (n == np) { continue; } TIntPr ed(GraphAttributes->NodeIDs[n], GraphAttributes->NodeIDs[np]); if (ed.Val1 > ed.Val2) { ed = TIntPr(ed.Val2, ed.Val1); } TInt kv = GraphAttributes->EdgeFeatures.GetKeyId(ed); TFlt m0 = CostNotIncludeHash.GetDat(kv); if (NewLabel[np] == 0) { l0 += m0; l1 += m0; } else { l0 += m0; l1 += CostIncludeHash.GetDat(kv); } } TFlt LogLikelihoodDiff = exp(l1 - l0); TFlt AcceptProb = pow(LogLikelihoodDiff, 1.0 / Temperature); if (t.GetUniDev() < AcceptProb) { NewLabel[n] = 1; } else { NewLabel[n] = 0; } } } TIntSet Result; for (int i = 0; i < GraphAttributes->NodeIDs.Len(); i++) { if (NewLabel[i]) { Result.AddKey(GraphAttributes->NodeIDs[i]); } } return Result; }
void TCluster::Train | ( | TInt | OuterReps, |
TInt | GradientReps, | ||
TInt | MCMCReps | ||
) |
Train the model to predict K Clusters.
OuterReps | number of coordinate ascent iterations |
GradientReps | number of iterations of gradient ascent |
MCMCReps | number of iterations of MCMC |
Definition at line 292 of file circles.h.
References CHat, TVec< TVal, TSizeTy >::Clr(), Derivative, TRnd::GetUniDevInt(), Gradient(), GraphAttributes, K, TVec< TVal, TSizeTy >::Len(), LogLikelihood(), MCMC(), TGraphAttributes::NFeatures, TGraphAttributes::NodeIDs, and Theta.
{ // Learning rate TFlt Increment = 1.0 / (1.0 * GraphAttributes->NodeIDs.Len() * GraphAttributes->NodeIDs.Len()); TRnd t; for (int OuterRep = 0; OuterRep < OuterReps; OuterRep++) { // If it's the first iteration or the solution is degenerate, // randomly initialize the weights and Clusters for (int k = 0; k < K; k++) { if (OuterRep == 0 or CHat[k].Empty() or CHat[k].Len() == GraphAttributes->NodeIDs.Len()) { CHat[k].Clr(); for (int i = 0; i < GraphAttributes->NodeIDs.Len(); i++) { if (t.GetUniDevInt(2) == 0) { CHat[k].AddKey(GraphAttributes->NodeIDs[i]); } } for (int i = 0; i < GraphAttributes->NFeatures; i++) { Theta[k * GraphAttributes->NFeatures + i] = 0; } // Just set a single Feature to 1 as a random initialization. Theta[k * GraphAttributes->NFeatures + t.GetUniDevInt(GraphAttributes->NFeatures)] = 1.0; Theta[k * GraphAttributes->NFeatures] = 1; } } for (int k = 0; k < K; k++) { CHat[k] = MCMC(k, MCMCReps); } TFlt llPrevious = LogLikelihood(); // Perform gradient ascent TFlt ll = 0; for (int gradientRep = 0; gradientRep < GradientReps; gradientRep++) { Gradient(); for (int i = 0; i < K * GraphAttributes->NFeatures; i++) { Theta[i] += Increment * Derivative[i]; } printf("."); fflush( stdout); ll = LogLikelihood(); // If we reduced the objective, undo the update and stop. if (ll < llPrevious) { for (int i = 0; i < K * GraphAttributes->NFeatures; i++) { Theta[i] -= Increment * Derivative[i]; } ll = llPrevious; break; } llPrevious = ll; } printf("\nIteration %d, ll = %f\n", OuterRep + 1, (double) ll); } }
TVec<TIntSet> TCluster::CHat [private] |
Definition at line 73 of file circles.h.
Referenced by GetCircles(), Gradient(), LogLikelihood(), MCMC(), TCluster(), and Train().
TFlt* TCluster::Derivative [private] |
Definition at line 71 of file circles.h.
Referenced by Gradient(), TCluster(), Train(), and ~TCluster().
PGraphAttributes TCluster::GraphAttributes [private] |
Definition at line 74 of file circles.h.
Referenced by Gradient(), LogLikelihood(), MCMC(), and Train().
TInt TCluster::K [private] |
Definition at line 76 of file circles.h.
Referenced by Gradient(), LogLikelihood(), MCMC(), TCluster(), and Train().
TFlt TCluster::Lambda [private] |
Definition at line 77 of file circles.h.
Referenced by Gradient().
TFlt* TCluster::Theta [private] |
Definition at line 70 of file circles.h.
Referenced by Gradient(), LogLikelihood(), MCMC(), TCluster(), Train(), and ~TCluster().