42 for (
int u = 0; u <
F.Len(); u++) {
45 if (Mem > 10) { Mem = 10; }
46 for (
int c = 0; c < Mem; c++) {
52 for (
int c = 0; c <
SumFV.
Len(); c++) {
53 if (
SumFV[c] == 0.0) {
69 TIntV ChosenNIDV(InitComs, 0);
72 for (
int u = 0; u <
F.Len(); u++) {
86 printf(
"conductance computation completed [%s]\n", RunTm.
GetTmStr());
90 for (
int ui = 0; ui < NIdPhiV.Len(); ui++) {
91 int UID = NIdPhiV[ui].Val2;
93 if (InvalidNIDS.IsKey(UID)) {
continue; }
99 for (
int e = 0; e < NI.
GetDeg(); e++) {
103 for (
int e = 0; e < NI.
GetDeg(); e++) {
108 if (CurCID >=
NumComs) {
break; }
111 printf(
"%d communities needed to fill randomly\n",
NumComs - CurCID);
114 for (
int c = 0; c <
SumFV.
Len(); c++) {
115 if (
SumFV[c] == 0.0) {
117 for (
int u = 0; u < ComSz; u++) {
131 for (
int u = 0; u <
NIDV.
Len(); u++) {
135 for (
int c = 0; c < CmtyVV.
Len(); c++) {
136 for (
int u = 0; u < CmtyVV[c].
Len(); u++) {
137 int UID = CmtyVV[c][u];
152 for (
int nid = 0; nid < GraphPt->
GetNodes(); nid++) {
153 if (! GraphPt->
IsNode(nid)) {
159 printf(
"rearrage nodes\n");
161 for (
int nid = 0; nid <
G->
GetNodes(); nid++) {
177 #pragma omp parallel for
178 for (
int u = 0; u <
F.Len(); u++) {
185 for (
int u = 0; u <
F.Len(); u++) {
204 for (
int e = 0; e <
HOVIDSV[UID].Len(); e++) {
205 for (
int c = 0; c <
SumFV.
Len(); c++) {
213 #pragma omp parallel for schedule(static, 1)
214 for (
int e = 0; e < NI.
GetDeg(); e++) {
216 if (v == UID) {
continue; }
217 if (
HOVIDSV[UID].IsKey(v)) {
continue; }
223 double HOSum =
HOVIDSV[UID].Len() > 0? HOSumFV[HI.GetKey()].Val: 0.0;
224 double LU =
NegWgt * (
SumFV[HI.GetKey()] - HOSum -
GetCom(UID, HI.GetKey())) * HI.GetDat();
228 for (
int e = 0; e < NI.
GetDeg(); e++) {
230 if (v == UID) {
continue; }
231 if (
HOVIDSV[UID].IsKey(v)) {
continue; }
235 double HOSum =
HOVIDSV[UID].Len() > 0? HOSumFV[HI.GetKey()].Val: 0.0;
236 L -=
NegWgt * (
SumFV[HI.GetKey()] - HOSum -
GetCom(UID, HI.GetKey())) * HI.GetDat();
257 for (
int e = 0; e <
HOVIDSV[UID].Len(); e++) {
258 for (
int c = 0; c <
SumFV.
Len(); c++) {
266 TFltV PredV(Deg), GradV(CIDSet.
Len());
269 #pragma omp parallel for schedule(static, 1)
270 for (
int e = 0; e < Deg; e++) {
271 if (NI.
GetNbrNId(e) == UID) {
continue; }
276 #pragma omp parallel for schedule(static, 1)
277 for (
int c = 0; c < CIDSet.
Len(); c++) {
278 int CID = CIDSet.
GetKey(c);
280 for (
int e = 0; e < Deg; e++) {
282 if (VID == UID) {
continue; }
283 if (
HOVIDSV[UID].IsKey(VID)) {
continue; }
286 double HOSum =
HOVIDSV[UID].Len() > 0? HOSumFV[CID].Val: 0.0;
293 for (
int e = 0; e < Deg; e++) {
294 if (NI.
GetNbrNId(e) == UID) {
continue; }
299 for (
int c = 0; c < CIDSet.
Len(); c++) {
300 int CID = CIDSet.
GetKey(c);
302 for (
int e = 0; e < Deg; e++) {
304 if (VID == UID) {
continue; }
305 if (
HOVIDSV[UID].IsKey(VID)) {
continue; }
308 double HOSum =
HOVIDSV[UID].Len() > 0? HOSumFV[CID].Val: 0.0;
316 for (
int c = 0; c < GradV.Len(); c++) {
321 for (
int c = 0; c < GradV.Len(); c++) {
327 for (
int c = 0; c < GradV.Len(); c++) {
328 if (
GetCom(UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) {
continue; }
329 if (fabs(GradV[c]) < 0.0001) {
continue; }
330 GradU.
AddDat(CIDV[c], GradV[c]);
332 for (
int c = 0; c < GradU.
Len(); c++) {
333 if (GradU[c] >= 10) { GradU[c] = 10; }
334 if (GradU[c] <= -10) { GradU[c] = -10; }
341 double Grad = 0.0, PNoEdge;
343 for (
int e = 0; e < UI.
GetDeg(); e++) {
346 if (!
F[VID].IsKey(CID)) {
continue; }
347 PNoEdge = AlphaKV[e] * exp (-
F[VID].GetDat(CID) * Val);
348 IAssert(PNoEdge <= 1.0 && PNoEdge >= 0.0);
350 Grad += ((PNoEdge *
F[VID].GetDat(CID)) / (1.0 - PNoEdge) +
NegWgt *
F[VID].GetDat(CID));
366 double L = 0.0, PNoEdge;
368 for (
int e = 0; e < UI.
GetDeg(); e++) {
371 if (!
F[VID].IsKey(CID)) {
372 PNoEdge = AlphaKV[e];
374 PNoEdge = AlphaKV[e] * exp (-
F[VID].GetDat(CID) * Val);
376 IAssert(PNoEdge <= 1.0 && PNoEdge >= 0.0);
378 L += log(1.0 - PNoEdge) +
NegWgt *
GetCom(VID, CID) * Val;
396 double H = 0.0, PNoEdge;
398 for (
int e = 0; e < UI.
GetDeg(); e++) {
401 if (!
F[VID].IsKey(CID)) {
continue; }
402 PNoEdge = AlphaKV[e] * exp (-
F[VID].GetDat(CID) * Val);
403 IAssert(PNoEdge <= 1.0 && PNoEdge >= 0.0);
405 H += (- PNoEdge *
F[VID].GetDat(CID) *
F[VID].GetDat(CID)) / (1.0 - PNoEdge) / (1.0 - PNoEdge);
418 int iter = 0, PrevIter = 0;
424 int CID, UID, NewtonIter;
428 while(iter < MaxIter) {
430 for (
int ui = 0; ui <
F.Len(); ui++, iter++) {
439 if (!
F[UID].Empty()) {
F[UID].Clr(); }
442 for (
int e = 0; e < UI.
GetDeg(); e++) {
446 CIDSet.
AddKey(CI.GetKey());
450 if (! CIDSet.
IsKey(CI.GetKey())) {
454 if (CIDSet.
Empty()) {
continue; }
460 for (
int e = 0; e < UI.
GetDeg(); e++) {
468 if (Grad <= 1e-3 && Grad >= -0.1) {
continue; }
470 while (NewtonIter++ < 10) {
473 if (Fuc == 0.0 && Grad <= 0.0) { Grad = 0.0; }
474 if (fabs(Grad) < 1e-3) {
break; }
475 if (H == 0.0) { Fuc = 0.0;
break; }
476 double NewtonStep = - Grad / H;
477 if (NewtonStep < -0.5) { NewtonStep = - 0.5; }
479 if (Fuc < 0.0) { Fuc = 0.0; }
489 if (iter - PrevIter >= 2 *
G->
GetNodes() && iter > 10000) {
493 printf(
"\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL, CurL - PrevL);
496 if (CurL - PrevL <= Thres * fabs(PrevL)) {
break; }
497 else { PrevL = CurL; }
501 if (! PlotNm.
Empty()) {
502 printf(
"\nMLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.
GetTmStr());
516 for (
int c = 0; c <
SumFV.
Len(); c++) {
520 for (
int c = 0; c <
NumComs; c++) {
521 int CID = CIDSumFH.
GetKey(c);
525 if (
SumFV[CID] < Thres) {
continue; }
526 for (
int u = 0; u <
F.Len(); u++) {
529 if (
GetCom(u, CID) >= Thres) { NIDFucH.AddDat(NID,
GetCom(u, CID)); }
531 NIDFucH.SortByDat(
false);
532 NIDFucH.GetKeyV(CmtyV);
533 if (CmtyV.Len() >= MinSz) { CmtyVV.
Add(CmtyV); }
535 if ( NumComs != CmtyVV.
Len()) {
536 printf(
"Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.
Len());
541 int TAGMFast::FindComsByCV(
const int NumThreads,
const int MaxComs,
const int MinComs,
const int DivComs,
const TStr& OutFNm,
const double StepAlpha,
const double StepBeta) {
542 double ComsGap = exp(
TMath::Log((
double) MaxComs / (
double) MinComs) / (
double) DivComs);
545 while (ComsV.
Len() < DivComs) {
546 int NewComs = int(ComsV.
Last() * ComsGap);
547 if (NewComs == ComsV.
Last().
Val) { NewComs++; }
550 if (ComsV.
Last() < MaxComs) { ComsV.
Add(MaxComs); }
551 return FindComsByCV(ComsV, 0.1, NumThreads, OutFNm +
".CV.likelihood", StepAlpha, StepBeta);
555 if (ComsV.
Len() == 0) {
558 while(ComsV.
Last() < MaxComs) { ComsV.
Add(ComsV.
Last() * 2); }
562 EdgeV.
Add(
TIntPr(EI.GetSrcNId(), EI.GetDstNId()));
568 if (EdgeV.Len() > 50) {
569 printf(
"generating hold out set\n");
573 for (
int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
579 printf(
"holding out %d edges...\n", HOEdges);
580 for (
int he = 0; he < (int) HOEdges; he++) {
581 HoldOutSets[IterCV][EdgeV[he].Val1].AddKey(EdgeV[he].Val2);
582 HoldOutSets[IterCV][EdgeV[he].Val2].AddKey(EdgeV[he].Val1);
585 printf(
"%d Edges hold out\n", HOCnt);
586 while(HOCnt++ < HOTotal) {
589 HoldOutSets[IterCV][SrcNID].AddKey(DstNID);
590 HoldOutSets[IterCV][DstNID].AddKey(SrcNID);
593 printf(
"hold out set generated\n");
598 for (
int c = 0; c < ComsV.
Len(); c++) {
599 const int Coms = ComsV[c];
600 printf(
"Try number of Coms:%d\n", Coms);
602 printf(
"Initialized\n");
604 if (EdgeV.Len() > 50) {
605 for (
int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
608 if (NumThreads == 1) {
609 printf(
"MLE without parallelization begins\n");
612 printf(
"MLE with parallelization begins\n");
630 for (
int c = 0; c < ComsV.
Len(); c++) {
631 ComsLV.Add(
TIntFltPr(ComsV[c].Val, HOLV[c].Val));
632 printf(
"%d(%f)\t", ComsV[c].Val, HOLV[c].Val);
633 if (MaxL < HOLV[c]) {
641 if (! PlotLFNm.
Empty()) {
642 TGnuPlot::PlotValV(ComsLV, PlotLFNm,
"hold-out likelihood",
"communities",
"likelihood");
649 for (
int u = 0; u <
HOVIDSV.Len(); u++) {
650 for (
int e = 0; e <
HOVIDSV[u].Len(); e++) {
652 if (VID == u) {
continue; }
655 L += log(1.0 - Pred);
666 double StepSize = 1.0;
669 for(
int iter = 0; iter < MaxIter; iter++) {
670 for (
int i = 0; i < DeltaV.
Len(); i++){
671 int CID = DeltaV.
GetKey(i);
672 double NewVal =
GetCom(UID, CID) + StepSize * DeltaV.
GetDat(CID);
675 NewVarV.AddDat(CID, NewVal);
682 if (iter == MaxIter - 1) {
691 time_t InitTime = time(NULL);
693 int iter = 0, PrevIter = 0;
696 double PrevL =
TFlt::Mn, CurL = 0.0;
698 for (
int i = 0; i <
F.Len(); i++) { NIdxV.
Add(i); }
701 while(iter < MaxIter) {
703 for (
int ui = 0; ui <
F.Len(); ui++, iter++) {
708 for (
int e = 0; e < UI.
GetDeg(); e++) {
712 CIDSet.AddKey(CI.GetKey());
716 if (! CIDSet.IsKey(CI.GetKey())) {
720 if (CIDSet.Empty()) {
continue; }
722 if (
Norm2(GradV) < 1e-4) {
continue; }
724 if (LearnRate == 0.0) {
continue; }
725 for (
int ci = 0; ci < GradV.Len(); ci++) {
726 int CID = GradV.GetKey(ci);
727 double Change = LearnRate * GradV.GetDat(CID);
728 double NewFuc =
GetCom(u, CID) + Change;
739 printf(
"\r%d iterations (%f) [%lu sec]", iter, CurL, time(NULL) - InitTime);
741 if (iter - PrevIter >= 2 *
G->
GetNodes() && iter > 10000) {
745 printf(
"\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL, CurL - PrevL);
748 if (CurL - PrevL <= Thres * fabs(PrevL)) {
break; }
749 else { PrevL = CurL; }
754 printf(
"MLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.
GetTmStr());
755 if (! PlotNm.
Empty()) {
761 int TAGMFast::MLEGradAscentParallel(
const double& Thres,
const int& MaxIter,
const int ChunkNum,
const int ChunkSize,
const TStr& PlotNm,
const double StepAlpha,
const double StepBeta) {
763 time_t InitTime = time(NULL);
771 for (
int i = 0; i <
F.Len(); i++) { NIdxV.
Add(i); }
775 TIntV NewNIDV(ChunkNum * ChunkSize);
776 for (iter = 0; iter < MaxIter; iter++) {
778 for (
int i = 0; i <
F.Len(); i++) {
779 if (NIDOPTV[i] == 0) { NIdxV.Add(i); }
784 #pragma omp parallel for schedule(static, 1)
785 for (
int TIdx = 0; TIdx < ChunkNum; TIdx++) {
787 for (
int ui = TIdx * ChunkSize; ui < (TIdx + 1) * ChunkSize; ui++) {
789 if (ui > NIdxV.Len()) {
continue; }
795 for (
int e = 0; e < UI.
GetDeg(); e++) {
799 CIDSet.AddKey(CI.GetKey());
802 if (CIDSet.Empty()) {
807 if (! CIDSet.IsKey(CI.GetKey())) {
808 CurFU.DelIfKey(CI.GetKey());
812 if (
Norm2(GradV) < 1e-4) { NIDOPTV[u] = 1;
continue; }
814 if (LearnRate == 0.0) { NewNIDV[ui] = -2;
continue; }
815 for (
int ci = 0; ci < GradV.
Len(); ci++) {
816 int CID = GradV.
GetKey(ci);
817 double Change = LearnRate * GradV.
GetDat(CID);
818 double NewFuc = CurFU.IsKey(CID)? CurFU.GetDat(CID) + Change : Change;
822 CurFU.AddDat(CID) = NewFuc;
832 int NumNoChangeGrad = 0;
833 int NumNoChangeStepSize = 0;
834 for (
int ui = 0; ui < NewNIDV.
Len(); ui++) {
835 int NewNID = NewNIDV[ui];
836 if (NewNID == -1) { NumNoChangeGrad++;
continue; }
837 if (NewNID == -2) { NumNoChangeStepSize++;
continue; }
842 #pragma omp parallel for
843 for (
int ui = 0; ui < NewNIDV.
Len(); ui++) {
844 int NewNID = NewNIDV[ui];
845 if (NewNID < 0) {
continue; }
846 F[NewNID] = NewF[ui];
848 for (
int ui = 0; ui < NewNIDV.
Len(); ui++) {
849 int NewNID = NewNIDV[ui];
850 if (NewNID < 0) {
continue; }
856 for (
int ui = 0; ui < NewNIDV.
Len(); ui++) {
857 int NewNID = NewNIDV[ui];
858 if (NewNID < 0) {
continue; }
861 for (
int e = 0; e < UI.
GetDeg(); e++) {
866 for (
int i = 0; i < NIDOPTV.Len(); i++) {
if (NIDOPTV[i] == 1) { OPTCnt++; } }
867 if (! PlotNm.
Empty()) {
868 printf(
"\r%d iterations [%s] %d secs", iter * ChunkSize * ChunkNum, ExeTm.
GetTmStr(), int(
TSecTm::GetCurTm().GetAbsSecs() - StartTm));
869 if (PrevL >
TFlt::Mn) { printf(
" (%f) %d g %d s %d OPT", PrevL, NumNoChangeGrad, NumNoChangeStepSize, OPTCnt); }
872 if ((iter - PrevIter) * ChunkSize * ChunkNum >=
G->
GetNodes()) {
875 IterLV.
Add(
TIntFltPr(iter * ChunkSize * ChunkNum, CurL));
876 printf(
"\r%d iterations, Likelihood: %f, Diff: %f [%d secs]", iter, CurL, CurL - PrevL,
int(time(NULL) - InitTime));
878 if (CurL - PrevL <= Thres * fabs(PrevL)) {
886 if (! PlotNm.
Empty()) {
887 printf(
"\nMLE completed with %d iterations(%d secs)\n", iter,
int(
TSecTm::GetCurTm().GetAbsSecs() - StartTm));
890 printf(
"\rMLE completed with %d iterations(%d secs)", iter,
int(time(NULL) - InitTime));
double Norm2(const TIntFltH &UV)
Edge iterator. Only forward iteration (operator++) is supported.
TPair< TInt, TInt > TIntPr
void SetCmtyVV(const TVec< TIntV > &CmtyVV)
static void GetNbhCom(const PUNGraph &Graph, const int NID, TIntSet &NBCmtyS)
int MLENewton(const double &Thres, const int &MaxIter, const TStr &PlotNm=TStr())
Newton method: DEPRECATED.
#define IAssertR(Cond, Reason)
TPair< TFlt, TInt > TFltIntPr
TEdgeI EndEI() const
Returns an iterator referring to the past-the-end edge in the graph.
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr &PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
double LikelihoodHoldOut(const bool DoParallel=false)
double Prediction(const TIntFltH &FU, const TIntFltH &FV)
void NeighborComInit(const int InitComs)
double GradientForOneVar(const TFltV &AlphaKV, const int UID, const int CID, const double &Val)
void Save(TSOut &SOut) const
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
void DelCom(const int &NID, const int &CID)
int FindComsByCV(TIntV &ComsV, const double HOFrac=0.2, const int NumThreads=20, const TStr &PlotLFNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
int GetEdges() const
Returns the number of edges in the graph.
TSizeTy Len() const
Returns the number of elements in the vector.
Node iterator. Only forward iteration (operator++) is supported.
void Save(TSOut &SOut) const
const TDat & GetDat(const TKey &Key) const
void Save(TSOut &SOut) const
Saves the graph to a (binary) stream SOut.
static double GetConductance(const PUNGraph &Graph, const TIntSet &CmtyS, const int Edges)
bool IsKey(const TKey &Key) const
int MLEGradAscent(const double &Thres, const int &MaxIter, const TStr &PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
const TKey & GetKey(const int &KeyId) const
int GetNodes() const
Returns the number of nodes in the graph.
double GetCom(const int &NID, const int &CID)
double LikelihoodForOneVar(const TFltV &AlphaKV, const int UID, const int CID, const double &Val)
void Save(TSOut &SOut) const
TPair< TInt, TFlt > TIntFltPr
int GetDeg() const
Returns degree of the current node.
const char * GetTmStr() const
void Gen(const int &ExpectVals)
double GetStepSizeByLineSearch(const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
unsigned long long uint64
const TVal & GetDat(const TVal &Val) const
Returns reference to the first occurrence of element Val.
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
static double Round(const double &Val)
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
void GetCmtyVV(TVec< TIntV > &CmtyVV)
TEdgeI BegEI() const
Returns an iterator referring to the first edge in the graph.
int AddKey(const TKey &Key)
const TVal & Last() const
Returns a reference to the last element of the vector.
void SetGraph(const PUNGraph &GraphPt)
static PUNGraph Load(TSIn &SIn)
Static constructor that loads the graph from a stream SIn and returns a pointer to it...
void Load(TSIn &SIn, const int &RndSeed=0)
double LikelihoodForRow(const int UID)
void PutSeed(const int &_Seed)
double HessianForOneVar(const TFltV &AlphaKV, const int UID, const int CID, const double &Val)
static TStr Fmt(const char *FmtStr,...)
void Shuffle(TRnd &Rnd)
Randomly shuffles the elements of the vector.
double Likelihood(const bool DoParallel=false)
double Sum(const TIntFltH &UV)
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
void AddCom(const int &NID, const int &CID, const double &Val)
static double Log(const double &Val)
int GetUniDevInt(const int &Range=0)
void RandomInit(const int InitComs)
bool IsEdge(const int &SrcNId, const int &DstNId) const
Tests whether an edge between node IDs SrcNId and DstNId exists in the graph.
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
void DelSelfEdges(const PGraph &Graph)
Removes all the self-edges from the graph.
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
TDat & AddDat(const TKey &Key)
void GradientForRow(const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
void Save(TSOut &SOut) const
const TKey & GetKey(const int &KeyId) const
Vector is a sequence TVal objects representing an array that can change in size.
void SortByDat(const bool &Asc=true)