SNAP Library 2.0, Developer Reference
2013-05-13 16:33:57
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <agmfast.h>
Public Member Functions | |
TAGMFast (const PUNGraph &GraphPt, const int &InitComs, const int RndSeed=0) | |
void | SetGraph (const PUNGraph &GraphPt) |
void | SetRegCoef (const double _RegCoef) |
double | GetRegCoef () |
void | RandomInit (const int InitComs) |
void | NeighborComInit (const int InitComs) |
void | SetCmtyVV (const TVec< TIntV > &CmtyVV) |
double | Likelihood (const bool DoParallel=false) |
double | LikelihoodForRow (const int UID) |
double | LikelihoodForRow (const int UID, const TIntFltH &FU) |
int | MLENewton (const double &Thres, const int &MaxIter, const TStr PlotNm=TStr()) |
Newton method: DEPRECATED. | |
void | GradientForRow (const int UID, TIntFltH &GradU, const TIntSet &CIDSet) |
double | GradientForOneVar (const TFltV &AlphaKV, const int UID, const int CID, const double &Val) |
double | HessianForOneVar (const TFltV &AlphaKV, const int UID, const int CID, const double &Val) |
double | LikelihoodForOneVar (const TFltV &AlphaKV, const int UID, const int CID, const double &Val) |
void | GetCmtyVV (TVec< TIntV > &CmtyVV) |
void | GetCmtyVV (TVec< TIntV > &CmtyVV, const double Thres, const int MinSz=3) |
extract community affiliation from F_uc | |
int | FindComsByCV (TIntV &ComsV, const double HOFrac=0.2, const int NumThreads=20, const TStr PlotLFNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1) |
int | FindComsByCV (const int NumThreads, const int MaxComs, const int MinComs, const int DivComs, const TStr OutFNm, const double StepAlpha=0.3, const double StepBeta=0.3) |
estimate number of communities using cross validation | |
double | LikelihoodHoldOut (const bool DoParallel=false) |
double | GetStepSizeByLineSearch (const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10) |
int | MLEGradAscent (const double &Thres, const int &MaxIter, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1) |
int | MLEGradAscentParallel (const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1) |
int | MLEGradAscentParallel (const double &Thres, const int &MaxIter, const int ChunkNum, const TStr PlotNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1) |
void | Save (TSOut &SOut) |
void | Load (TSIn &SIn, const int &RndSeed=0) |
double | GetCom (const int &NID, const int &CID) |
void | AddCom (const int &NID, const int &CID, const double &Val) |
void | DelCom (const int &NID, const int &CID) |
double | DotProduct (const TIntFltH &UV, const TIntFltH &VV) |
double | DotProduct (const int &UID, const int &VID) |
double | Prediction (const TIntFltH &FU, const TIntFltH &FV) |
double | Prediction (const int &UID, const int &VID) |
double | Sum (const TIntFltH &UV) |
double | Norm2 (const TIntFltH &UV) |
Public Attributes | |
TVec< TIntSet > | HOVIDSV |
TFlt | MinVal |
TFlt | MaxVal |
TFlt | NegWgt |
TFlt | PNoCom |
TBool | DoParallel |
Private Attributes | |
PUNGraph | G |
TVec< TIntFltH > | F |
TRnd | Rnd |
TIntV | NIDV |
TFlt | RegCoef |
TFltV | SumFV |
TBool | NodesOk |
TInt | NumComs |
Community detection with AGM. Sparse AGM-fast with coordinate ascent.
TAGMFast::TAGMFast | ( | const PUNGraph & | GraphPt, |
const int & | InitComs, | ||
const int | RndSeed = 0 |
||
) | [inline] |
Definition at line 25 of file agmfast.h.
References RandomInit(), and SetGraph().
: Rnd(RndSeed), RegCoef(0), NodesOk(true), MinVal(0.0), MaxVal(1000.0), NegWgt(1.0) { SetGraph(GraphPt); RandomInit(InitComs); }
void TAGMFast::AddCom | ( | const int & | NID, |
const int & | CID, | ||
const double & | Val | ||
) | [inline] |
Definition at line 64 of file agmfast.h.
References F, TVec< TVal, TSizeTy >::GetDat(), and SumFV.
Referenced by MLEGradAscent(), MLENewton(), NeighborComInit(), RandomInit(), and SetCmtyVV().
{ if (F[NID].IsKey(CID)) { SumFV[CID] -= F[NID].GetDat(CID); } F[NID].AddDat(CID) = Val; SumFV[CID] += Val; }
void TAGMFast::DelCom | ( | const int & | NID, |
const int & | CID | ||
) | [inline] |
Definition at line 72 of file agmfast.h.
References F, TVec< TVal, TSizeTy >::GetDat(), and SumFV.
Referenced by MLEGradAscent(), and MLENewton().
double TAGMFast::DotProduct | ( | const TIntFltH & | UV, |
const TIntFltH & | VV | ||
) | [inline] |
Definition at line 78 of file agmfast.h.
References THash< TKey, TDat, THashFunc >::BegI(), THash< TKey, TDat, THashFunc >::EndI(), THash< TKey, TDat, THashFunc >::GetDat(), THash< TKey, TDat, THashFunc >::IsKey(), and THash< TKey, TDat, THashFunc >::Len().
Referenced by DotProduct(), GetStepSizeByLineSearch(), LikelihoodForRow(), MLENewton(), and Prediction().
{ double DP = 0; if (UV.Len() > VV.Len()) { for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) { if (VV.IsKey(HI.GetKey())) { DP += VV.GetDat(HI.GetKey()) * HI.GetDat(); } } } else { for (TIntFltH::TIter HI = VV.BegI(); HI < VV.EndI(); HI++) { if (UV.IsKey(HI.GetKey())) { DP += UV.GetDat(HI.GetKey()) * HI.GetDat(); } } } return DP; }
double TAGMFast::DotProduct | ( | const int & | UID, |
const int & | VID | ||
) | [inline] |
Definition at line 95 of file agmfast.h.
References DotProduct(), and F.
{ return DotProduct(F[UID], F[VID]); }
int TAGMFast::FindComsByCV | ( | TIntV & | ComsV, |
const double | HOFrac = 0.2 , |
||
const int | NumThreads = 20 , |
||
const TStr | PlotLFNm = TStr() , |
||
const double | StepAlpha = 0.3 , |
||
const double | StepBeta = 0.1 |
||
) |
Definition at line 552 of file agmfast.cpp.
References TVec< TVal, TSizeTy >::Add(), TUNGraph::BegEI(), TStr::Empty(), TUNGraph::EndEI(), G, TVec< TVal, TSizeTy >::Gen(), TUNGraph::GetEdges(), TUNGraph::GetNIdV(), TUNGraph::GetNodes(), TRnd::GetUniDevInt(), HOVIDSV, TVec< TVal, TSizeTy >::Last(), TVec< TVal, TSizeTy >::Len(), Likelihood(), LikelihoodHoldOut(), MLEGradAscent(), MLEGradAscentParallel(), TFlt::Mn, NeighborComInit(), TGnuPlot::PlotValV(), RandomInit(), Rnd, and TMath::Round().
Referenced by FindComsByCV().
{ if (ComsV.Len() == 0) { int MaxComs = G->GetNodes() / 5; ComsV.Add(2); while(ComsV.Last() < MaxComs) { ComsV.Add(ComsV.Last() * 2); } } TIntPrV EdgeV(G->GetEdges(), 0); for (TUNGraph::TEdgeI EI = G->BegEI(); EI < G->EndEI(); EI++) { EdgeV.Add(TIntPr(EI.GetSrcNId(), EI.GetDstNId())); } EdgeV.Shuffle(Rnd); int MaxIterCV = 3; TVec<TVec<TIntSet> > HoldOutSets(MaxIterCV); if (EdgeV.Len() > 50) { //if edges are many enough, use CV printf("generating hold out set\n"); TIntV NIdV1, NIdV2; G->GetNIdV(NIdV1); G->GetNIdV(NIdV2); for (int IterCV = 0; IterCV < MaxIterCV; IterCV++) { // generate holdout sets HoldOutSets[IterCV].Gen(G->GetNodes()); const int HOTotal = int(HOFrac * G->GetNodes() * (G->GetNodes() - 1) / 2.0); int HOCnt = 0; int HOEdges = (int) TMath::Round(HOFrac * G->GetEdges()); printf("holding out %d edges...\n", HOEdges); for (int he = 0; he < (int) HOEdges; he++) { HoldOutSets[IterCV][EdgeV[he].Val1].AddKey(EdgeV[he].Val2); HoldOutSets[IterCV][EdgeV[he].Val2].AddKey(EdgeV[he].Val1); HOCnt++; } printf("%d Edges hold out\n", HOCnt); while(HOCnt++ < HOTotal) { int SrcNID = Rnd.GetUniDevInt(G->GetNodes()); int DstNID = Rnd.GetUniDevInt(G->GetNodes()); HoldOutSets[IterCV][SrcNID].AddKey(DstNID); HoldOutSets[IterCV][DstNID].AddKey(SrcNID); } } printf("hold out set generated\n"); } TFltV HOLV(ComsV.Len()); TIntFltPrV ComsLV; for (int c = 0; c < ComsV.Len(); c++) { const int Coms = ComsV[c]; printf("Try number of Coms:%d\n", Coms); NeighborComInit(Coms); printf("Initialized\n"); if (EdgeV.Len() > 50) { //if edges are many enough, use CV for (int IterCV = 0; IterCV < MaxIterCV; IterCV++) { HOVIDSV = HoldOutSets[IterCV]; if (NumThreads == 1) { printf("MLE without parallelization begins\n"); MLEGradAscent(0.05, 10 * G->GetNodes(), "", StepAlpha, StepBeta); } else { printf("MLE with parallelization begins\n"); MLEGradAscentParallel(0.05, 100, NumThreads, "", StepAlpha, StepBeta); } double HOL = LikelihoodHoldOut(); HOL = HOL < 0? HOL: TFlt::Mn; HOLV[c] += HOL; } } else { HOVIDSV.Gen(G->GetNodes()); MLEGradAscent(0.0001, 100 * G->GetNodes(), ""); double BIC = 2 * Likelihood() - (double) G->GetNodes() * Coms * 2.0 * log ( (double) G->GetNodes()); HOLV[c] = BIC; } } int EstComs = 2; double MaxL = TFlt::Mn; printf("\n"); for (int c = 0; c < ComsV.Len(); c++) { ComsLV.Add(TIntFltPr(ComsV[c].Val, HOLV[c].Val)); printf("%d(%f)\t", ComsV[c].Val, HOLV[c].Val); if (MaxL < HOLV[c]) { MaxL = HOLV[c]; EstComs = ComsV[c]; } } printf("\n"); RandomInit(EstComs); HOVIDSV.Gen(G->GetNodes()); if (! PlotLFNm.Empty()) { TGnuPlot::PlotValV(ComsLV, PlotLFNm, "hold-out likelihood", "communities", "likelihood"); } return EstComs; }
int TAGMFast::FindComsByCV | ( | const int | NumThreads, |
const int | MaxComs, | ||
const int | MinComs, | ||
const int | DivComs, | ||
const TStr | OutFNm, | ||
const double | StepAlpha = 0.3 , |
||
const double | StepBeta = 0.3 |
||
) |
estimate number of communities using cross validation
Definition at line 539 of file agmfast.cpp.
References TVec< TVal, TSizeTy >::Add(), FindComsByCV(), TVec< TVal, TSizeTy >::Last(), TVec< TVal, TSizeTy >::Len(), TMath::Log(), and TInt::Val.
{ double ComsGap = exp(TMath::Log((double) MaxComs / (double) MinComs) / (double) DivComs); TIntV ComsV; ComsV.Add(MinComs); while (ComsV.Len() < DivComs) { int NewComs = int(ComsV.Last() * ComsGap); if (NewComs == ComsV.Last().Val) { NewComs++; } ComsV.Add(NewComs); } if (ComsV.Last() < MaxComs) { ComsV.Add(MaxComs); } return FindComsByCV(ComsV, 0.1, NumThreads, OutFNm + ".CV.likelihood", StepAlpha, StepBeta); }
void TAGMFast::GetCmtyVV | ( | TVec< TIntV > & | CmtyVV | ) |
Definition at line 506 of file agmfast.cpp.
References G, TUNGraph::GetEdges(), and TUNGraph::GetNodes().
void TAGMFast::GetCmtyVV | ( | TVec< TIntV > & | CmtyVV, |
const double | Thres, | ||
const int | MinSz = 3 |
||
) |
extract community affiliation from F_uc
Definition at line 511 of file agmfast.cpp.
References TVec< TVal, TSizeTy >::Add(), THash< TKey, TDat, THashFunc >::AddDat(), F, TVec< TVal, TSizeTy >::Gen(), GetCom(), THash< TKey, TDat, THashFunc >::GetDat(), THash< TKey, TDat, THashFunc >::GetKey(), IAssert, TVec< TVal, TSizeTy >::Len(), NIDV, NodesOk, NumComs, THash< TKey, TDat, THashFunc >::SortByDat(), and SumFV.
{ CmtyVV.Gen(NumComs, 0); TIntFltH CIDSumFH(NumComs); for (int c = 0; c < SumFV.Len(); c++) { CIDSumFH.AddDat(c, SumFV[c]); } CIDSumFH.SortByDat(false); for (int c = 0; c < NumComs; c++) { int CID = CIDSumFH.GetKey(c); TIntFltH NIDFucH(F.Len() / 10); TIntV CmtyV; IAssert(SumFV[CID] == CIDSumFH.GetDat(CID)); if (SumFV[CID] < Thres) { continue; } for (int u = 0; u < F.Len(); u++) { int NID = u; if (! NodesOk) { NID = NIDV[u]; } if (GetCom(u, CID) >= Thres) { NIDFucH.AddDat(NID, GetCom(u, CID)); } } NIDFucH.SortByDat(false); NIDFucH.GetKeyV(CmtyV); if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); } } if ( NumComs != CmtyVV.Len()) { printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len()); } }
double TAGMFast::GetCom | ( | const int & | NID, |
const int & | CID | ||
) | [inline] |
Definition at line 57 of file agmfast.h.
References F, and TVec< TVal, TSizeTy >::GetDat().
Referenced by GetCmtyVV(), GetStepSizeByLineSearch(), GradientForOneVar(), GradientForRow(), LikelihoodForOneVar(), LikelihoodForRow(), MLEGradAscent(), and MLENewton().
double TAGMFast::GetRegCoef | ( | ) | [inline] |
double TAGMFast::GetStepSizeByLineSearch | ( | const int | UID, |
const TIntFltH & | DeltaV, | ||
const TIntFltH & | GradV, | ||
const double & | Alpha, | ||
const double & | Beta, | ||
const int | MaxIter = 10 |
||
) |
Definition at line 663 of file agmfast.cpp.
References DotProduct(), GetCom(), THash< TKey, TDat, THashFunc >::GetDat(), THash< TKey, TDat, THashFunc >::GetKey(), THash< TKey, TDat, THashFunc >::Len(), LikelihoodForRow(), MaxVal, and MinVal.
Referenced by MLEGradAscent(), and MLEGradAscentParallel().
{ double StepSize = 1.0; double InitLikelihood = LikelihoodForRow(UID); TIntFltH NewVarV(DeltaV.Len()); for(int iter = 0; iter < MaxIter; iter++) { for (int i = 0; i < DeltaV.Len(); i++){ int CID = DeltaV.GetKey(i); double NewVal = GetCom(UID, CID) + StepSize * DeltaV.GetDat(CID); if (NewVal < MinVal) { NewVal = MinVal; } if (NewVal > MaxVal) { NewVal = MaxVal; } NewVarV.AddDat(CID, NewVal); } if (LikelihoodForRow(UID, NewVarV) < InitLikelihood + Alpha * StepSize * DotProduct(GradV, DeltaV)) { StepSize *= Beta; } else { break; } if (iter == MaxIter - 1) { StepSize = 0.0; break; } } return StepSize; }
double TAGMFast::GradientForOneVar | ( | const TFltV & | AlphaKV, |
const int | UID, | ||
const int | CID, | ||
const double & | Val | ||
) |
Definition at line 339 of file agmfast.cpp.
References F, G, GetCom(), TVec< TVal, TSizeTy >::GetDat(), TUNGraph::TNodeI::GetDeg(), TUNGraph::TNodeI::GetNbrNId(), TUNGraph::GetNI(), HOVIDSV, IAssert, NegWgt, RegCoef, and SumFV.
Referenced by MLENewton().
{ TUNGraph::TNodeI UI = G->GetNI(UID); double Grad = 0.0, PNoEdge; int VID = 0; for (int e = 0; e < UI.GetDeg(); e++) { VID = UI.GetNbrNId(e); if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; } if (! F[VID].IsKey(CID)) { continue; } PNoEdge = AlphaKV[e] * exp (- F[VID].GetDat(CID) * Val); IAssert(PNoEdge <= 1.0 && PNoEdge >= 0.0); //PNoEdge = PNoEdge >= 1.0 - PNoCom? 1 - PNoCom: PNoEdge; Grad += ((PNoEdge * F[VID].GetDat(CID)) / (1.0 - PNoEdge) + NegWgt * F[VID].GetDat(CID)); } Grad -= NegWgt * (SumFV[CID] - GetCom(UID, CID)); //add regularization if (RegCoef > 0.0) { //L1 Grad -= RegCoef; } if (RegCoef < 0.0) { //L2 Grad += 2 * RegCoef * Val; } return Grad; }
void TAGMFast::GradientForRow | ( | const int | UID, |
TIntFltH & | GradU, | ||
const TIntSet & | CIDSet | ||
) |
Definition at line 250 of file agmfast.cpp.
References THash< TKey, TDat, THashFunc >::AddDat(), DoParallel, G, THash< TKey, TDat, THashFunc >::Gen(), TVec< TVal, TSizeTy >::Gen(), GetCom(), TUNGraph::TNodeI::GetDeg(), THashSet< TKey, THashFunc >::GetKey(), TUNGraph::TNodeI::GetNbrNId(), TUNGraph::GetNI(), HOVIDSV, IAssert, THash< TKey, TDat, THashFunc >::Len(), TVec< TVal, TSizeTy >::Len(), THashSet< TKey, THashFunc >::Len(), NegWgt, Prediction(), RegCoef, and SumFV.
Referenced by MLEGradAscent(), and MLEGradAscentParallel().
{ GradU.Gen(CIDSet.Len()); TFltV HOSumFV; //adjust for Fv of v hold out if (HOVIDSV[UID].Len() > 0) { HOSumFV.Gen(SumFV.Len()); for (int e = 0; e < HOVIDSV[UID].Len(); e++) { for (int c = 0; c < SumFV.Len(); c++) { HOSumFV[c] += GetCom(HOVIDSV[UID][e], c); } } } TUNGraph::TNodeI NI = G->GetNI(UID); int Deg = NI.GetDeg(); TFltV PredV(Deg), GradV(CIDSet.Len()); TIntV CIDV(CIDSet.Len()); if (DoParallel && Deg + CIDSet.Len() > 10) { #pragma omp parallel for schedule(static, 1) for (int e = 0; e < Deg; e++) { if (NI.GetNbrNId(e) == UID) { continue; } if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; } PredV[e] = Prediction(UID, NI.GetNbrNId(e)); } #pragma omp parallel for schedule(static, 1) for (int c = 0; c < CIDSet.Len(); c++) { int CID = CIDSet.GetKey(c); double Val = 0.0; for (int e = 0; e < Deg; e++) { int VID = NI.GetNbrNId(e); if (VID == UID) { continue; } if (HOVIDSV[UID].IsKey(VID)) { continue; } Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID); } double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID)); CIDV[c] = CID; GradV[c] = Val; } } else { for (int e = 0; e < Deg; e++) { if (NI.GetNbrNId(e) == UID) { continue; } if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; } PredV[e] = Prediction(UID, NI.GetNbrNId(e)); } for (int c = 0; c < CIDSet.Len(); c++) { int CID = CIDSet.GetKey(c); double Val = 0.0; for (int e = 0; e < Deg; e++) { int VID = NI.GetNbrNId(e); if (VID == UID) { continue; } if (HOVIDSV[UID].IsKey(VID)) { continue; } Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID); } double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID)); CIDV[c] = CID; GradV[c] = Val; } } //add regularization if (RegCoef > 0.0) { //L1 for (int c = 0; c < GradV.Len(); c++) { GradV[c] -= RegCoef; } } if (RegCoef < 0.0) { //L2 for (int c = 0; c < GradV.Len(); c++) { GradV[c] += 2 * RegCoef * GetCom(UID, CIDV[c]); } } for (int c = 0; c < GradV.Len(); c++) { if (GetCom(UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; } if (fabs(GradV[c]) < 0.0001) { continue; } GradU.AddDat(CIDV[c], GradV[c]); } for (int c = 0; c < GradU.Len(); c++) { if (GradU[c] >= 10) { GradU[c] = 10; } if (GradU[c] <= -10) { GradU[c] = -10; } IAssert(GradU[c] >= -10); } }
double TAGMFast::HessianForOneVar | ( | const TFltV & | AlphaKV, |
const int | UID, | ||
const int | CID, | ||
const double & | Val | ||
) |
Definition at line 394 of file agmfast.cpp.
References F, G, TVec< TVal, TSizeTy >::GetDat(), TUNGraph::TNodeI::GetDeg(), TUNGraph::TNodeI::GetNbrNId(), TUNGraph::GetNI(), HOVIDSV, IAssert, and RegCoef.
Referenced by MLENewton().
{ TUNGraph::TNodeI UI = G->GetNI(UID); double H = 0.0, PNoEdge; int VID = 0; for (int e = 0; e < UI.GetDeg(); e++) { VID = UI.GetNbrNId(e); if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; } if (! F[VID].IsKey(CID)) { continue; } PNoEdge = AlphaKV[e] * exp (- F[VID].GetDat(CID) * Val); IAssert(PNoEdge <= 1.0 && PNoEdge >= 0.0); //PNoEdge = PNoEdge == 1.0? 1 - PNoCom: PNoEdge; H += (- PNoEdge * F[VID].GetDat(CID) * F[VID].GetDat(CID)) / (1.0 - PNoEdge) / (1.0 - PNoEdge); } //add regularization if (RegCoef < 0.0) { //L2 H += 2 * RegCoef; } IAssert (H <= 0.0); return H; }
double TAGMFast::Likelihood | ( | const bool | DoParallel = false | ) |
Definition at line 173 of file agmfast.cpp.
References F, TVec< TVal, TSizeTy >::Len(), and LikelihoodForRow().
Referenced by FindComsByCV(), MLEGradAscent(), MLEGradAscentParallel(), and MLENewton().
{ TExeTm ExeTm; double L = 0.0; if (_DoParallel) { #pragma omp parallel for for (int u = 0; u < F.Len(); u++) { double LU = LikelihoodForRow(u); #pragma omp atomic L += LU; } } else { for (int u = 0; u < F.Len(); u++) { double LU = LikelihoodForRow(u); L += LU; } } return L; }
double TAGMFast::LikelihoodForOneVar | ( | const TFltV & | AlphaKV, |
const int | UID, | ||
const int | CID, | ||
const double & | Val | ||
) |
Definition at line 364 of file agmfast.cpp.
References F, G, GetCom(), TUNGraph::TNodeI::GetDeg(), TUNGraph::TNodeI::GetNbrNId(), TUNGraph::GetNI(), HOVIDSV, IAssert, NegWgt, RegCoef, and SumFV.
{ TUNGraph::TNodeI UI = G->GetNI(UID); double L = 0.0, PNoEdge; int VID = 0; for (int e = 0; e < UI.GetDeg(); e++) { VID = UI.GetNbrNId(e); if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; } if (! F[VID].IsKey(CID)) { PNoEdge = AlphaKV[e]; } else { PNoEdge = AlphaKV[e] * exp (- F[VID].GetDat(CID) * Val); } IAssert(PNoEdge <= 1.0 && PNoEdge >= 0.0); //PNoEdge = PNoEdge >= 1.0 - PNoCom? 1 - PNoCom: PNoEdge; L += log(1.0 - PNoEdge) + NegWgt * GetCom(VID, CID) * Val; // += ((PNoEdge * F[VID].GetDat(CID)) / (1.0 - PNoEdge) + NegWgt * F[VID].GetDat(CID)); } L -= NegWgt * (SumFV[CID] - GetCom(UID, CID)) * Val; //add regularization if (RegCoef > 0.0) { //L1 L -= RegCoef * Val; } if (RegCoef < 0.0) { //L2 L += RegCoef * Val * Val; } return L; }
double TAGMFast::LikelihoodForRow | ( | const int | UID | ) |
Definition at line 193 of file agmfast.cpp.
References F.
Referenced by GetStepSizeByLineSearch(), and Likelihood().
{ return LikelihoodForRow(UID, F[UID]); }
double TAGMFast::LikelihoodForRow | ( | const int | UID, |
const TIntFltH & | FU | ||
) |
Definition at line 198 of file agmfast.cpp.
References THash< TKey, TDat, THashFunc >::BegI(), DoParallel, DotProduct(), THash< TKey, TDat, THashFunc >::EndI(), F, G, TVec< TVal, TSizeTy >::Gen(), GetCom(), TUNGraph::TNodeI::GetDeg(), TUNGraph::TNodeI::GetNbrNId(), TUNGraph::GetNI(), HOVIDSV, TVec< TVal, TSizeTy >::Len(), NegWgt, Norm2(), Prediction(), RegCoef, Sum(), and SumFV.
{ double L = 0.0; TFltV HOSumFV; //adjust for Fv of v hold out if (HOVIDSV[UID].Len() > 0) { HOSumFV.Gen(SumFV.Len()); for (int e = 0; e < HOVIDSV[UID].Len(); e++) { for (int c = 0; c < SumFV.Len(); c++) { HOSumFV[c] += GetCom(HOVIDSV[UID][e], c); } } } TUNGraph::TNodeI NI = G->GetNI(UID); if (DoParallel && NI.GetDeg() > 10) { #pragma omp parallel for schedule(static, 1) for (int e = 0; e < NI.GetDeg(); e++) { int v = NI.GetNbrNId(e); if (v == UID) { continue; } if (HOVIDSV[UID].IsKey(v)) { continue; } double LU = log (1.0 - Prediction(FU, F[v])) + NegWgt * DotProduct(FU, F[v]); #pragma omp atomic L += LU; } for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) { double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist double LU = NegWgt * (SumFV[HI.GetKey()] - HOSum - GetCom(UID, HI.GetKey())) * HI.GetDat(); L -= LU; } } else { for (int e = 0; e < NI.GetDeg(); e++) { int v = NI.GetNbrNId(e); if (v == UID) { continue; } if (HOVIDSV[UID].IsKey(v)) { continue; } L += log (1.0 - Prediction(FU, F[v])) + NegWgt * DotProduct(FU, F[v]); } for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) { double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist L -= NegWgt * (SumFV[HI.GetKey()] - HOSum - GetCom(UID, HI.GetKey())) * HI.GetDat(); } } //add regularization if (RegCoef > 0.0) { //L1 L -= RegCoef * Sum(FU); } if (RegCoef < 0.0) { //L2 L += RegCoef * Norm2(FU); } return L; }
double TAGMFast::LikelihoodHoldOut | ( | const bool | DoParallel = false | ) |
Definition at line 645 of file agmfast.cpp.
References G, HOVIDSV, TUNGraph::IsEdge(), TVec< TVal, TSizeTy >::Len(), NegWgt, and Prediction().
Referenced by FindComsByCV().
{ double L = 0.0; for (int u = 0; u < HOVIDSV.Len(); u++) { for (int e = 0; e < HOVIDSV[u].Len(); e++) { int VID = HOVIDSV[u][e]; if (VID == u) { continue; } double Pred = Prediction(u, VID); if (G->IsEdge(u, VID)) { L += log(1.0 - Pred); } else { L += NegWgt * log(Pred); } } } return L; }
void TAGMFast::Load | ( | TSIn & | SIn, |
const int & | RndSeed = 0 |
||
) |
Definition at line 22 of file agmfast.cpp.
References F, G, HOVIDSV, TUNGraph::Load(), TVec< TVal, TSizeTy >::Load(), TBool::Load(), TInt::Load(), TFlt::Load(), MaxVal, MinVal, NegWgt, NIDV, NodesOk, NumComs, PNoCom, TRnd::PutSeed(), RegCoef, Rnd, and SumFV.
{ G->Load(SIn); F.Load(SIn); NIDV.Load(SIn); RegCoef.Load(SIn); SumFV.Load(SIn); NodesOk.Load(SIn); MinVal.Load(SIn); MaxVal.Load(SIn); NegWgt.Load(SIn); NumComs.Load(SIn); HOVIDSV.Load(SIn); PNoCom.Load(SIn); Rnd.PutSeed(RndSeed); }
int TAGMFast::MLEGradAscent | ( | const double & | Thres, |
const int & | MaxIter, | ||
const TStr | PlotNm, | ||
const double | StepAlpha = 0.3 , |
||
const double | StepBeta = 0.1 |
||
) |
Definition at line 688 of file agmfast.cpp.
References TVec< TVal, TSizeTy >::Add(), AddCom(), THash< TKey, TDat, THashFunc >::BegI(), DelCom(), TStr::Empty(), THash< TKey, TDat, THashFunc >::EndI(), TVec< TVal, TSizeTy >::EndI(), F, G, GetCom(), TUNGraph::TNodeI::GetDeg(), TUNGraph::TNodeI::GetNbrNId(), TUNGraph::GetNI(), TUNGraph::GetNodes(), GetStepSizeByLineSearch(), TExeTm::GetTmStr(), GradientForRow(), HOVIDSV, IAssert, TVec< TVal, TSizeTy >::Len(), Likelihood(), TFlt::Mn, Norm2(), TGnuPlot::PlotValV(), and Rnd.
Referenced by FindComsByCV().
{ time_t InitTime = time(NULL); TExeTm ExeTm, CheckTm; int iter = 0, PrevIter = 0; TIntFltPrV IterLV; TUNGraph::TNodeI UI; double PrevL = TFlt::Mn, CurL = 0.0; TIntV NIdxV(F.Len(), 0); for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); } IAssert(NIdxV.Len() == F.Len()); TIntFltH GradV; while(iter < MaxIter) { NIdxV.Shuffle(Rnd); for (int ui = 0; ui < F.Len(); ui++, iter++) { int u = NIdxV[ui]; // //find set of candidate c (we only need to consider c to which a neighbor of u belongs to) UI = G->GetNI(u); TIntSet CIDSet(5 * UI.GetDeg()); for (int e = 0; e < UI.GetDeg(); e++) { if (HOVIDSV[u].IsKey(UI.GetNbrNId(e))) { continue; } TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)]; for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) { CIDSet.AddKey(CI.GetKey()); } } for (TIntFltH::TIter CI = F[u].BegI(); CI < F[u].EndI(); CI++) { //remove the community membership which U does not share with its neighbors if (! CIDSet.IsKey(CI.GetKey())) { DelCom(u, CI.GetKey()); } } if (CIDSet.Empty()) { continue; } GradientForRow(u, GradV, CIDSet); if (Norm2(GradV) < 1e-4) { continue; } double LearnRate = GetStepSizeByLineSearch(u, GradV, GradV, StepAlpha, StepBeta); if (LearnRate == 0.0) { continue; } for (int ci = 0; ci < GradV.Len(); ci++) { int CID = GradV.GetKey(ci); double Change = LearnRate * GradV.GetDat(CID); double NewFuc = GetCom(u, CID) + Change; if (NewFuc <= 0.0) { DelCom(u, CID); } else { AddCom(u, CID, NewFuc); } } if (! PlotNm.Empty() && (iter + 1) % G->GetNodes() == 0) { IterLV.Add(TIntFltPr(iter, Likelihood(false))); } } printf("\r%d iterations (%f) [%lu sec]", iter, CurL, time(NULL) - InitTime); fflush(stdout); if (iter - PrevIter >= 2 * G->GetNodes() && iter > 10000) { PrevIter = iter; CurL = Likelihood(); if (PrevL > TFlt::Mn && ! PlotNm.Empty()) { printf("\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL, CurL - PrevL); } fflush(stdout); if (CurL - PrevL <= Thres * fabs(PrevL)) { break; } else { PrevL = CurL; } } } printf("\n"); printf("MLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.GetTmStr()); if (! PlotNm.Empty()) { TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q"); } return iter; }
int TAGMFast::MLEGradAscentParallel | ( | const double & | Thres, |
const int & | MaxIter, | ||
const int | ChunkNum, | ||
const int | ChunkSize, | ||
const TStr | PlotNm, | ||
const double | StepAlpha = 0.3 , |
||
const double | StepBeta = 0.1 |
||
) |
Definition at line 759 of file agmfast.cpp.
References TVec< TVal, TSizeTy >::Add(), THash< TKey, TDat, THashFunc >::BegI(), TVec< TVal, TSizeTy >::Clr(), TStr::Empty(), THashKeyDatI< TKey, TDat >::EndI, THash< TKey, TDat, THashFunc >::EndI(), TVec< TVal, TSizeTy >::EndI(), F, G, TSecTm::GetAbsSecs(), TSecTm::GetCurTm(), THash< TKey, TDat, THashFunc >::GetDat(), TVec< TVal, TSizeTy >::GetDat(), TUNGraph::TNodeI::GetDeg(), THash< TKey, TDat, THashFunc >::GetKey(), TUNGraph::TNodeI::GetNbrNId(), TUNGraph::GetNI(), TUNGraph::GetNodes(), GetStepSizeByLineSearch(), TExeTm::GetTmStr(), GradientForRow(), HOVIDSV, IAssert, THash< TKey, TDat, THashFunc >::Len(), TVec< TVal, TSizeTy >::Len(), Likelihood(), TFlt::Mn, Norm2(), TGnuPlot::PlotValV(), TVec< TVal, TSizeTy >::PutAll(), Rnd, and SumFV.
Referenced by FindComsByCV(), and MLEGradAscentParallel().
{ //parallel time_t InitTime = time(NULL); uint64 StartTm = TSecTm::GetCurTm().GetAbsSecs(); TExeTm ExeTm, CheckTm; double PrevL = Likelihood(true); TIntFltPrV IterLV; int PrevIter = 0; int iter = 0; TIntV NIdxV(F.Len(), 0); for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); } TIntV NIDOPTV(F.Len()); //check if a node needs optimization or not 1: does not require optimization NIDOPTV.PutAll(0); TVec<TIntFltH> NewF(ChunkNum * ChunkSize); TIntV NewNIDV(ChunkNum * ChunkSize); for (iter = 0; iter < MaxIter; iter++) { NIdxV.Clr(false); for (int i = 0; i < F.Len(); i++) { if (NIDOPTV[i] == 0) { NIdxV.Add(i); } } IAssert (NIdxV.Len() <= F.Len()); NIdxV.Shuffle(Rnd); // compute gradient for chunk of nodes #pragma omp parallel for schedule(static, 1) for (int TIdx = 0; TIdx < ChunkNum; TIdx++) { TIntFltH GradV; for (int ui = TIdx * ChunkSize; ui < (TIdx + 1) * ChunkSize; ui++) { NewNIDV[ui] = -1; if (ui > NIdxV.Len()) { continue; } int u = NIdxV[ui]; // //find set of candidate c (we only need to consider c to which a neighbor of u belongs to) TUNGraph::TNodeI UI = G->GetNI(u); TIntSet CIDSet(5 * UI.GetDeg()); TIntFltH CurFU = F[u]; for (int e = 0; e < UI.GetDeg(); e++) { if (HOVIDSV[u].IsKey(UI.GetNbrNId(e))) { continue; } TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)]; for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) { CIDSet.AddKey(CI.GetKey()); } } if (CIDSet.Empty()) { CurFU.Clr(); } else { for (TIntFltH::TIter CI = CurFU.BegI(); CI < CurFU.EndI(); CI++) { //remove the community membership which U does not share with its neighbors if (! CIDSet.IsKey(CI.GetKey())) { CurFU.DelIfKey(CI.GetKey()); } } GradientForRow(u, GradV, CIDSet); if (Norm2(GradV) < 1e-4) { NIDOPTV[u] = 1; continue; } double LearnRate = GetStepSizeByLineSearch(u, GradV, GradV, StepAlpha, StepBeta, 5); if (LearnRate <= 1e-5) { NewNIDV[ui] = -2; continue; } for (int ci = 0; ci < GradV.Len(); ci++) { int CID = GradV.GetKey(ci); double Change = LearnRate * GradV.GetDat(CID); double NewFuc = CurFU.IsKey(CID)? CurFU.GetDat(CID) + Change : Change; if (NewFuc <= 0.0) { CurFU.DelIfKey(CID); } else { CurFU.AddDat(CID) = NewFuc; } } CurFU.Defrag(); } //store changes NewF[ui] = CurFU; NewNIDV[ui] = u; } } int NumNoChangeGrad = 0; int NumNoChangeStepSize = 0; for (int ui = 0; ui < NewNIDV.Len(); ui++) { int NewNID = NewNIDV[ui]; if (NewNID == -1) { NumNoChangeGrad++; continue; } if (NewNID == -2) { NumNoChangeStepSize++; continue; } for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) { SumFV[CI.GetKey()] -= CI.GetDat(); } } #pragma omp parallel for for (int ui = 0; ui < NewNIDV.Len(); ui++) { int NewNID = NewNIDV[ui]; if (NewNID < 0) { continue; } F[NewNID] = NewF[ui]; } for (int ui = 0; ui < NewNIDV.Len(); ui++) { int NewNID = NewNIDV[ui]; if (NewNID < 0) { continue; } for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) { SumFV[CI.GetKey()] += CI.GetDat(); } } // update the nodes who are optimal for (int ui = 0; ui < NewNIDV.Len(); ui++) { int NewNID = NewNIDV[ui]; if (NewNID < 0) { continue; } TUNGraph::TNodeI UI = G->GetNI(NewNID); NIDOPTV[NewNID] = 0; for (int e = 0; e < UI.GetDeg(); e++) { NIDOPTV[UI.GetNbrNId(e)] = 0; } } int OPTCnt = 0; for (int i = 0; i < NIDOPTV.Len(); i++) { if (NIDOPTV[i] == 1) { OPTCnt++; } } if (! PlotNm.Empty()) { printf("\r%d iterations [%s] %d secs", iter * ChunkSize * ChunkNum, ExeTm.GetTmStr(), int(TSecTm::GetCurTm().GetAbsSecs() - StartTm)); if (PrevL > TFlt::Mn) { printf(" (%f) %d g %d s %d OPT", PrevL, NumNoChangeGrad, NumNoChangeStepSize, OPTCnt); } fflush(stdout); } if ((iter - PrevIter) * ChunkSize * ChunkNum >= G->GetNodes()) { PrevIter = iter; double CurL = Likelihood(true); IterLV.Add(TIntFltPr(iter * ChunkSize * ChunkNum, CurL)); printf("\r%d iterations, Likelihood: %f, Diff: %f [%d secs]", iter, CurL, CurL - PrevL, int(time(NULL) - InitTime)); fflush(stdout); if (CurL - PrevL <= Thres * fabs(PrevL)) { break; } else { PrevL = CurL; } } } if (! PlotNm.Empty()) { printf("\nMLE completed with %d iterations(%d secs)\n", iter, int(TSecTm::GetCurTm().GetAbsSecs() - StartTm)); TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q"); } else { printf("\rMLE completed with %d iterations(%d secs)", iter, int(time(NULL) - InitTime)); fflush(stdout); } return iter; }
int TAGMFast::MLEGradAscentParallel | ( | const double & | Thres, |
const int & | MaxIter, | ||
const int | ChunkNum, | ||
const TStr | PlotNm = TStr() , |
||
const double | StepAlpha = 0.3 , |
||
const double | StepBeta = 0.1 |
||
) | [inline] |
Definition at line 49 of file agmfast.h.
References G, TUNGraph::GetNodes(), and MLEGradAscentParallel().
{ int ChunkSize = G->GetNodes() / 10 / ChunkNum; if (ChunkSize == 0) { ChunkSize = 1; } return MLEGradAscentParallel(Thres, MaxIter, ChunkNum, ChunkSize, PlotNm, StepAlpha, StepBeta); }
int TAGMFast::MLENewton | ( | const double & | Thres, |
const int & | MaxIter, | ||
const TStr | PlotNm = TStr() |
||
) |
Newton method: DEPRECATED.
Definition at line 416 of file agmfast.cpp.
References TVec< TVal, TSizeTy >::Add(), AddCom(), THashSet< TKey, THashFunc >::AddKey(), THash< TKey, TDat, THashFunc >::BegI(), THashSet< TKey, THashFunc >::BegI(), TVec< TVal, TSizeTy >::Clr(), DelCom(), DotProduct(), TStr::Empty(), TPt< TRec >::Empty(), THashSet< TKey, THashFunc >::Empty(), THash< TKey, TDat, THashFunc >::EndI(), TVec< TVal, TSizeTy >::EndI(), THashSet< TKey, THashFunc >::EndI(), F, TStr::Fmt(), G, GetCom(), TUNGraph::TNodeI::GetDeg(), TUNGraph::TNodeI::GetNbrNId(), TUNGraph::GetNI(), TUNGraph::GetNIdV(), TUNGraph::GetNodes(), TExeTm::GetTmStr(), GradientForOneVar(), HessianForOneVar(), HOVIDSV, IAssertR, THashSet< TKey, THashFunc >::IsKey(), TVec< TVal, TSizeTy >::Len(), Likelihood(), TFlt::Mn, TGnuPlot::PlotValV(), PNoCom, Rnd, TVec< TVal, TSizeTy >::Shuffle(), and TFlt::Val.
{ TExeTm ExeTm; int iter = 0, PrevIter = 0; TIntFltPrV IterLV; double PrevL = TFlt::Mn, CurL; TUNGraph::TNodeI UI; TIntV NIdxV; G->GetNIdV(NIdxV); int CID, UID, NewtonIter; double Fuc, PrevFuc, Grad, H; while(iter < MaxIter) { NIdxV.Shuffle(Rnd); for (int ui = 0; ui < F.Len(); ui++, iter++) { if (! PlotNm.Empty() && iter % G->GetNodes() == 0) { IterLV.Add(TIntFltPr(iter, Likelihood(false))); } UID = NIdxV[ui]; //find set of candidate c (we only need to consider c to which a neighbor of u belongs to) TIntSet CIDSet; UI = G->GetNI(UID); if (UI.GetDeg() == 0) { //if the node is isolated, clear its membership and skip if (! F[UID].Empty()) { F[UID].Clr(); } continue; } for (int e = 0; e < UI.GetDeg(); e++) { if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; } TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)]; for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) { CIDSet.AddKey(CI.GetKey()); } } for (TIntFltH::TIter CI = F[UID].BegI(); CI < F[UID].EndI(); CI++) { //remove the community membership which U does not share with its neighbors if (! CIDSet.IsKey(CI.GetKey())) { DelCom(UID, CI.GetKey()); } } if (CIDSet.Empty()) { continue; } for (TIntSet::TIter CI = CIDSet.BegI(); CI < CIDSet.EndI(); CI++) { CID = CI.GetKey(); //optimize for UID, CID //compute constants TFltV AlphaKV(UI.GetDeg()); for (int e = 0; e < UI.GetDeg(); e++) { if (HOVIDSV[UID].IsKey(UI.GetNbrNId(e))) { continue; } AlphaKV[e] = (1 - PNoCom) * exp(- DotProduct(UID, UI.GetNbrNId(e)) + GetCom(UI.GetNbrNId(e), CID) * GetCom(UID, CID)); IAssertR(AlphaKV[e] <= 1.0, TStr::Fmt("AlphaKV=%f, %f, %f", AlphaKV[e].Val, PNoCom.Val, GetCom(UI.GetNbrNId(e), CID))); } Fuc = GetCom(UID, CID); PrevFuc = Fuc; Grad = GradientForOneVar(AlphaKV, UID, CID, Fuc), H = 0.0; if (Grad <= 1e-3 && Grad >= -0.1) { continue; } NewtonIter = 0; while (NewtonIter++ < 10) { Grad = GradientForOneVar(AlphaKV, UID, CID, Fuc), H = 0.0; H = HessianForOneVar(AlphaKV, UID, CID, Fuc); if (Fuc == 0.0 && Grad <= 0.0) { Grad = 0.0; } if (fabs(Grad) < 1e-3) { break; } if (H == 0.0) { Fuc = 0.0; break; } double NewtonStep = - Grad / H; if (NewtonStep < -0.5) { NewtonStep = - 0.5; } Fuc += NewtonStep; if (Fuc < 0.0) { Fuc = 0.0; } } if (Fuc == 0.0) { DelCom(UID, CID); } else { AddCom(UID, CID, Fuc); } } } if (iter - PrevIter >= 2 * G->GetNodes() && iter > 10000) { PrevIter = iter; CurL = Likelihood(); if (PrevL > TFlt::Mn && ! PlotNm.Empty()) { printf("\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL, CurL - PrevL); } fflush(stdout); if (CurL - PrevL <= Thres * fabs(PrevL)) { break; } else { PrevL = CurL; } } } if (! PlotNm.Empty()) { printf("\nMLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.GetTmStr()); TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q"); } return iter; }
void TAGMFast::NeighborComInit | ( | const int | InitComs | ) |
Definition at line 60 of file agmfast.cpp.
References AddCom(), F, G, TVec< TVal, TSizeTy >::Gen(), TAGMUtil::GetConductance(), TUNGraph::TNodeI::GetDeg(), TUNGraph::GetEdges(), TAGMUtil::GetNbhCom(), TUNGraph::TNodeI::GetNbrNId(), TUNGraph::GetNI(), TUNGraph::GetNodes(), TExeTm::GetTmStr(), TRnd::GetUniDev(), TRnd::GetUniDevInt(), IAssert, TVec< TVal, TSizeTy >::Len(), NumComs, Rnd, and SumFV.
Referenced by FindComsByCV().
{ //initialize with best neighborhood communities (Gleich et.al. KDD'12) F.Gen(G->GetNodes()); SumFV.Gen(InitComs); NumComs = InitComs; const int Edges = G->GetEdges(); //TIntFltH NCPhiH(F.Len()); TFltIntPrV NIdPhiV(F.Len(), 0); TIntSet InvalidNIDS(F.Len()); TIntV ChosenNIDV(InitComs, 0); //FOR DEBUG TExeTm RunTm; //compute conductance of neighborhood community for (int u = 0; u < F.Len(); u++) { TIntSet NBCmty(G->GetNI(u).GetDeg() + 1); double Phi; if (G->GetNI(u).GetDeg() < 5) { //do not include nodes with too few degree Phi = 1.0; } else { TAGMUtil::GetNbhCom(G, u, NBCmty); IAssert(NBCmty.Len() == G->GetNI(u).GetDeg() + 1); Phi = TAGMUtil::GetConductance(G, NBCmty, Edges); } //NCPhiH.AddDat(u, Phi); NIdPhiV.Add(TFltIntPr(Phi, u)); } NIdPhiV.Sort(true); printf("conductance computation completed [%s]\n", RunTm.GetTmStr()); fflush(stdout); //choose nodes with local minimum in conductance int CurCID = 0; for (int ui = 0; ui < NIdPhiV.Len(); ui++) { int UID = NIdPhiV[ui].Val2; fflush(stdout); if (InvalidNIDS.IsKey(UID)) { continue; } ChosenNIDV.Add(UID); //FOR DEBUG //add the node and its neighbors to the current community AddCom(UID, CurCID, 1.0); TUNGraph::TNodeI NI = G->GetNI(UID); fflush(stdout); for (int e = 0; e < NI.GetDeg(); e++) { AddCom(NI.GetNbrNId(e), CurCID, 1.0); } //exclude its neighbors from the next considerations for (int e = 0; e < NI.GetDeg(); e++) { InvalidNIDS.AddKey(NI.GetNbrNId(e)); } CurCID++; fflush(stdout); if (CurCID >= NumComs) { break; } } if (NumComs > CurCID) { printf("%d communities needed to fill randomly\n", NumComs - CurCID); } //assign a member to zero-member community (if any) for (int c = 0; c < SumFV.Len(); c++) { if (SumFV[c] == 0.0) { int ComSz = 10; for (int u = 0; u < ComSz; u++) { int UID = Rnd.GetUniDevInt(G->GetNodes()); AddCom(UID, c, Rnd.GetUniDev()); } } } }
double TAGMFast::Norm2 | ( | const TIntFltH & | UV | ) | [inline] |
Definition at line 113 of file agmfast.h.
References THash< TKey, TDat, THashFunc >::BegI(), and THash< TKey, TDat, THashFunc >::EndI().
Referenced by LikelihoodForRow(), MLEGradAscent(), and MLEGradAscentParallel().
{ double N = 0.0; for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) { N += HI.GetDat() * HI.GetDat(); } return N; }
double TAGMFast::Prediction | ( | const TIntFltH & | FU, |
const TIntFltH & | FV | ||
) | [inline] |
Definition at line 98 of file agmfast.h.
References DotProduct(), TStr::Fmt(), IAssertR, and PNoCom.
Referenced by GradientForRow(), LikelihoodForRow(), LikelihoodHoldOut(), and Prediction().
{ double DP = log (1.0 / (1.0 - PNoCom)) + DotProduct(FU, FV); IAssertR(DP > 0.0, TStr::Fmt("DP: %f", DP)); return exp(- DP); }
double TAGMFast::Prediction | ( | const int & | UID, |
const int & | VID | ||
) | [inline] |
Definition at line 103 of file agmfast.h.
References F, and Prediction().
{ return Prediction(F[UID], F[VID]); }
void TAGMFast::RandomInit | ( | const int | InitComs | ) |
Definition at line 38 of file agmfast.cpp.
References AddCom(), F, G, TVec< TVal, TSizeTy >::Gen(), TUNGraph::TNodeI::GetDeg(), TUNGraph::GetNI(), TUNGraph::GetNodes(), TRnd::GetUniDev(), TRnd::GetUniDevInt(), TVec< TVal, TSizeTy >::Len(), NumComs, Rnd, and SumFV.
Referenced by FindComsByCV(), and TAGMFast().
{ F.Gen(G->GetNodes()); SumFV.Gen(InitComs); NumComs = InitComs; for (int u = 0; u < F.Len(); u++) { //assign to just one community int Mem = G->GetNI(u).GetDeg(); if (Mem > 10) { Mem = 10; } for (int c = 0; c < Mem; c++) { int CID = Rnd.GetUniDevInt(InitComs); AddCom(u, CID, Rnd.GetUniDev()); } } //assign a member to zero-member community (if any) for (int c = 0; c < SumFV.Len(); c++) { if (SumFV[c] == 0.0) { int UID = Rnd.GetUniDevInt(G->GetNodes()); AddCom(UID, c, Rnd.GetUniDev()); } } }
void TAGMFast::Save | ( | TSOut & | SOut | ) |
Definition at line 7 of file agmfast.cpp.
References F, G, HOVIDSV, MaxVal, MinVal, NegWgt, NIDV, NodesOk, NumComs, PNoCom, RegCoef, TUNGraph::Save(), TVec< TVal, TSizeTy >::Save(), TBool::Save(), TInt::Save(), TFlt::Save(), and SumFV.
{ G->Save(SOut); F.Save(SOut); NIDV.Save(SOut); RegCoef.Save(SOut); SumFV.Save(SOut); NodesOk.Save(SOut); MinVal.Save(SOut); MaxVal.Save(SOut); NegWgt.Save(SOut); NumComs.Save(SOut); HOVIDSV.Save(SOut); PNoCom.Save(SOut); }
void TAGMFast::SetCmtyVV | ( | const TVec< TIntV > & | CmtyVV | ) |
Definition at line 125 of file agmfast.cpp.
References AddCom(), THash< TKey, TDat, THashFunc >::AddDat(), F, G, TVec< TVal, TSizeTy >::Gen(), TVec< TVal, TSizeTy >::GetDat(), TUNGraph::GetNodes(), TUNGraph::IsNode(), TVec< TVal, TSizeTy >::Len(), NIDV, NodesOk, NumComs, and SumFV.
{ F.Gen(G->GetNodes()); SumFV.Gen(CmtyVV.Len()); NumComs = CmtyVV.Len(); TIntH NIDIdxH(NIDV.Len()); if (! NodesOk) { for (int u = 0; u < NIDV.Len(); u++) { NIDIdxH.AddDat(NIDV[u], u); } } for (int c = 0; c < CmtyVV.Len(); c++) { for (int u = 0; u < CmtyVV[c].Len(); u++) { int UID = CmtyVV[c][u]; if (! NodesOk) { UID = NIDIdxH.GetDat(UID); } if (G->IsNode(UID)) { AddCom(UID, c, 1.0); } } } }
void TAGMFast::SetGraph | ( | const PUNGraph & | GraphPt | ) |
Definition at line 146 of file agmfast.cpp.
References TSnap::DelSelfEdges(), DoParallel, G, TVec< TVal, TSizeTy >::Gen(), TUNGraph::GetNIdV(), TUNGraph::GetNodes(), TSnap::GetSubGraph(), HOVIDSV, IAssert, TUNGraph::IsNode(), TFlt::Mx, NegWgt, NIDV, NodesOk, and PNoCom.
Referenced by TAGMFast().
{ G = GraphPt; HOVIDSV.Gen(G->GetNodes()); NodesOk = true; GraphPt->GetNIdV(NIDV); // check that nodes IDs are {0,1,..,Nodes-1} for (int nid = 0; nid < GraphPt->GetNodes(); nid++) { if (! GraphPt->IsNode(nid)) { NodesOk = false; break; } } if (! NodesOk) { printf("rearrage nodes\n"); G = TSnap::GetSubGraph(GraphPt, NIDV, true); for (int nid = 0; nid < G->GetNodes(); nid++) { IAssert(G->IsNode(nid)); } } TSnap::DelSelfEdges(G); PNoCom = 1.0 / (double) G->GetNodes(); DoParallel = false; if (1.0 / PNoCom > sqrt(TFlt::Mx)) { PNoCom = 0.99 / sqrt(TFlt::Mx); } // to prevent overflow NegWgt = 1.0; }
void TAGMFast::SetRegCoef | ( | const double | _RegCoef | ) | [inline] |
double TAGMFast::Sum | ( | const TIntFltH & | UV | ) | [inline] |
Definition at line 106 of file agmfast.h.
References THash< TKey, TDat, THashFunc >::BegI(), and THash< TKey, TDat, THashFunc >::EndI().
Referenced by LikelihoodForRow().
{ double N = 0.0; for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) { N += HI.GetDat(); } return N; }
Definition at line 23 of file agmfast.h.
Referenced by GradientForRow(), LikelihoodForRow(), and SetGraph().
TVec<TIntFltH> TAGMFast::F [private] |
Definition at line 10 of file agmfast.h.
Referenced by AddCom(), DelCom(), DotProduct(), GetCmtyVV(), GetCom(), GradientForOneVar(), HessianForOneVar(), Likelihood(), LikelihoodForOneVar(), LikelihoodForRow(), Load(), MLEGradAscent(), MLEGradAscentParallel(), MLENewton(), NeighborComInit(), Prediction(), RandomInit(), Save(), and SetCmtyVV().
PUNGraph TAGMFast::G [private] |
Definition at line 9 of file agmfast.h.
Referenced by FindComsByCV(), GetCmtyVV(), GradientForOneVar(), GradientForRow(), HessianForOneVar(), LikelihoodForOneVar(), LikelihoodForRow(), LikelihoodHoldOut(), Load(), MLEGradAscent(), MLEGradAscentParallel(), MLENewton(), NeighborComInit(), RandomInit(), Save(), SetCmtyVV(), and SetGraph().
Definition at line 18 of file agmfast.h.
Referenced by FindComsByCV(), GradientForOneVar(), GradientForRow(), HessianForOneVar(), LikelihoodForOneVar(), LikelihoodForRow(), LikelihoodHoldOut(), Load(), MLEGradAscent(), MLEGradAscentParallel(), MLENewton(), Save(), and SetGraph().
Definition at line 20 of file agmfast.h.
Referenced by GetStepSizeByLineSearch(), Load(), and Save().
Definition at line 19 of file agmfast.h.
Referenced by GetStepSizeByLineSearch(), Load(), and Save().
Definition at line 21 of file agmfast.h.
Referenced by GradientForOneVar(), GradientForRow(), LikelihoodForOneVar(), LikelihoodForRow(), LikelihoodHoldOut(), Load(), Save(), and SetGraph().
TIntV TAGMFast::NIDV [private] |
Definition at line 12 of file agmfast.h.
Referenced by GetCmtyVV(), Load(), Save(), SetCmtyVV(), and SetGraph().
TBool TAGMFast::NodesOk [private] |
Definition at line 15 of file agmfast.h.
Referenced by GetCmtyVV(), Load(), Save(), SetCmtyVV(), and SetGraph().
TInt TAGMFast::NumComs [private] |
Definition at line 16 of file agmfast.h.
Referenced by GetCmtyVV(), Load(), NeighborComInit(), RandomInit(), Save(), and SetCmtyVV().
Definition at line 22 of file agmfast.h.
Referenced by Load(), MLENewton(), Prediction(), Save(), and SetGraph().
TFlt TAGMFast::RegCoef [private] |
Definition at line 13 of file agmfast.h.
Referenced by GetRegCoef(), GradientForOneVar(), GradientForRow(), HessianForOneVar(), LikelihoodForOneVar(), LikelihoodForRow(), Load(), Save(), and SetRegCoef().
TRnd TAGMFast::Rnd [private] |
Definition at line 11 of file agmfast.h.
Referenced by FindComsByCV(), Load(), MLEGradAscent(), MLEGradAscentParallel(), MLENewton(), NeighborComInit(), and RandomInit().
TFltV TAGMFast::SumFV [private] |
Definition at line 14 of file agmfast.h.
Referenced by AddCom(), DelCom(), GetCmtyVV(), GradientForOneVar(), GradientForRow(), LikelihoodForOneVar(), LikelihoodForRow(), Load(), MLEGradAscentParallel(), NeighborComInit(), RandomInit(), Save(), and SetCmtyVV().