13 for (
int u = 0; u <
F.Len(); u++) {
16 if (Mem > 10) { Mem = 10; }
17 for (
int c = 0; c < Mem; c++) {
23 for (
int c = 0; c <
SumFV.
Len(); c++) {
24 if (
SumFV[c] == 0.0) {
35 TCesnaUtil::GetNIdPhiV<PUNGraph>(
G, NIdPhiV);
46 TIntV ChosenNIDV(InitComs, 0);
49 for (
int ui = 0; ui < NIdPhiV.
Len(); ui++) {
50 int UID = NIdPhiV[ui].Val2;
52 if (InvalidNIDS.IsKey(UID)) {
continue; }
58 for (
int e = 0; e < NI.
GetDeg(); e++) {
62 for (
int e = 0; e < NI.
GetDeg(); e++) {
67 if (CurCID >=
NumComs) {
break; }
70 printf(
"%d communities needed to fill randomly\n",
NumComs - CurCID);
73 for (
int c = 0; c <
SumFV.
Len(); c++) {
74 if (
SumFV[c] == 0.0) {
76 for (
int u = 0; u < ComSz; u++) {
90 for (
int c = 0; c < CmtyVV.
Len(); c++) {
91 for (
int u = 0; u < CmtyVV[c].
Len(); u++) {
92 int UID = CmtyVV[c][u];
108 printf(
"rearrage nodes\n");
110 for (
int nid = 0; nid <
G->
GetNodes(); nid++) {
122 for (
int u = 0; u < NIDAttrH.
Len(); u++) {
123 int UID = NIDAttrH.
GetKey(u);
126 for (
int k = 0; k < NIDAttrH[u].
Len(); k++) {
127 int KID = NIDAttrH[u][k];
130 if (NumAttr < KID + 1) { NumAttr = KID + 1; }
141 #pragma omp parallel for
142 for (
int u = 0; u <
F.Len(); u++) {
149 for (
int u = 0; u <
F.Len(); u++) {
167 for (
int e = 0; e <
HOVIDSV[UID].Len(); e++) {
168 for (
int c = 0; c <
SumFV.
Len(); c++) {
175 for (
int e = 0; e < NI.
GetDeg(); e++) {
177 if (v == UID) {
continue; }
178 if (
HOVIDSV[UID].IsKey(v)) {
continue; }
182 double HOSum =
HOVIDSV[UID].Len() > 0? HOSumFV[HI.GetKey()].Val: 0.0;
183 L -=
NegWgt * (
SumFV[HI.GetKey()] - HOSum -
GetCom(UID, HI.GetKey())) * HI.GetDat();
194 for (
int k = 0; k <
Attrs; k++) {
195 if (
HOKIDSV[UID].IsKey(k)) {
continue; }
206 L = Prob == 0.0? -100.0: log(Prob);
208 L = Prob == 1.0? -100.0: log(1.0 - Prob);
219 for (
int e = 0; e <
HOVIDSV[UID].Len(); e++) {
220 for (
int c = 0; c <
SumFV.
Len(); c++) {
227 TFltV PredV(Deg), GradV(CIDSet.
Len());
229 for (
int e = 0; e < Deg; e++) {
230 if (NI.
GetNbrNId(e) == UID) {
continue; }
235 for (
int c = 0; c < CIDSet.
Len(); c++) {
236 int CID = CIDSet.
GetKey(c);
238 for (
int e = 0; e < Deg; e++) {
240 if (VID == UID) {
continue; }
241 if (
HOVIDSV[UID].IsKey(VID)) {
continue; }
244 double HOSum =
HOVIDSV[UID].Len() > 0? HOSumFV[CID].Val: 0.0;
251 for (
int c = 0; c < GradV.Len(); c++) {
256 for (
int c = 0; c < GradV.Len(); c++) {
260 for (
int c = 0; c < GradV.Len(); c++) {
265 for (
int k = 0; k <
Attrs; k++) {
266 if (
HOKIDSV[UID].IsKey(k)) {
continue; }
269 for (
int c = 0; c < GradV.Len(); c++) {
270 for (
int k = 0; k <
Attrs; k++) {
271 if (
HOKIDSV[UID].IsKey(k)) {
continue; }
277 for (
int c = 0; c < GradV.Len(); c++) {
278 if (
GetCom(UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) {
continue; }
279 if (fabs(GradV[c]) < 0.0001) {
continue; }
280 GradU.
AddDat(CIDV[c], GradV[c]);
282 for (
int c = 0; c < GradU.
Len(); c++) {
283 if (GradU[c] >= 10) { GradU[c] = 10; }
284 if (GradU[c] <= -10) { GradU[c] = -10; }
300 for (
int c = 0; c <
SumFV.
Len(); c++) {
304 for (
int c = 0; c <
NumComs; c++) {
305 int CID = CIDSumFH.
GetKey(c);
309 if (
SumFV[CID] < Thres) {
continue; }
310 for (
int u = 0; u <
F.Len(); u++) {
312 if (
GetCom(u, CID) >= Thres) { NIDFucH.AddDat(NID,
GetCom(u, CID)); }
314 NIDFucH.SortByDat(
false);
316 if (CmtyV.Len() < MinSz) {
continue; }
319 for (
int k = 0; k <
Attrs; k++) {
320 WV[k] =
GetW(CID, k);
324 if ( NumComs != CmtyVV.
Len()) {
325 printf(
"Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.
Len());
335 for (
int c = 0; c <
NumComs; c++) {
337 for (
int u = 0; u <
G->
GetNodes(); u++) {
340 if (CmtyV.
Len() >= MinSz) { CmtyVV.
Add(CmtyV); }
342 if ( NumComs != CmtyVV.
Len()) {
343 printf(
"Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.
Len());
348 int TCesna::FindComs(
const int NumThreads,
const int MaxComs,
const int MinComs,
const int DivComs,
const TStr OutFNm,
const bool UseBIC,
const double HOFrac,
const double StepAlpha,
const double StepBeta) {
349 double ComsGap = exp(
TMath::Log((
double) MaxComs / (
double) MinComs) / (
double) DivComs);
352 while (ComsV.
Len() < DivComs) {
353 int NewComs = int(ComsV.
Last() * ComsGap);
354 if (NewComs == ComsV.
Last().
Val) { NewComs++; }
357 if (ComsV.
Last() < MaxComs) { ComsV.
Add(MaxComs); }
358 return FindComs(ComsV, UseBIC, HOFrac, NumThreads, OutFNm, StepAlpha, StepBeta);
361 int TCesna::FindComs(
TIntV& ComsV,
const bool UseBIC,
const double HOFrac,
const int NumThreads,
const TStr PlotLFNm,
const double StepAlpha,
const double StepBeta) {
362 if (ComsV.
Len() == 0) {
365 while(ComsV.
Last() < MaxComs) { ComsV.
Add(ComsV.
Last() * 2); }
371 TCesnaUtil::GetNIdPhiV<PUNGraph>(
G, NIdPhiV);
377 for (
int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
387 for (
int c = 0; c < ComsV.
Len(); c++) {
388 const int Coms = ComsV[c];
392 for (
int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
394 HOKIDSV = HoldOutSetsAttr[IterCV];
398 if (NumThreads == 1) {
400 MLEGradAscent(0.01, 100 * G->GetNodes(),
"", StepAlpha, StepBeta);
414 if (NumThreads == 1) {
415 MLEGradAscent(0.005, 100 * G->GetNodes(),
"", StepAlpha, StepBeta);
423 double BIC = 2 *
Likelihood() - NumParams * log (Observations);
430 printf(
"Number of communities vs likelihood (criterion: BIC)\n");
432 printf(
"Number of communities vs likelihood (criterion: Cross validation)\n");
434 for (
int c = 0; c < ComsV.
Len(); c++) {
435 ComsLV.Add(
TIntFltPr(ComsV[c].Val, HOLV[c].Val));
436 printf(
"%d(%f)\t", ComsV[c].Val, HOLV[c].Val);
437 if (MaxL < HOLV[c]) {
446 if (! PlotLFNm.
Empty()) {
447 TGnuPlot::PlotValV(ComsLV, PlotLFNm,
"hold-out likelihood",
"communities",
"likelihood");
454 for (
int u = 0; u <
HOVIDSV.Len(); u++) {
455 for (
int e = 0; e <
HOVIDSV[u].Len(); e++) {
457 if (VID == u) {
continue; }
460 L += log(1.0 - Pred);
470 for (
int u = 0; u <
HOKIDSV.Len(); u++) {
471 for (
int e = 0; e <
HOKIDSV[u].Len(); e++) {
481 double StepSize = 1.0;
484 for(
int iter = 0; iter < MaxIter; iter++) {
485 for (
int i = 0; i < DeltaV.
Len(); i++){
486 int CID = DeltaV.
GetKey(i);
487 double NewVal =
GetCom(UID, CID) + StepSize * DeltaV.
GetDat(CID);
490 NewVarV.AddDat(CID, NewVal);
497 if (iter == MaxIter - 1) {
506 time_t InitTime = time(NULL);
508 int iter = 0, PrevIter = 0;
511 double PrevL =
TFlt::Mn, CurL = 0.0;
513 for (
int i = 0; i <
F.Len(); i++) { NIdxV.
Add(i); }
518 while(iter < MaxIter) {
520 for (
int ui = 0; ui <
F.Len(); ui++, iter++) {
542 if (
Norm2(GradV) < 1e-4) {
continue; }
544 if (LearnRate == 0.0) {
continue; }
545 for (
int ci = 0; ci < GradV.
Len(); ci++) {
546 int CID = GradV.
GetKey(ci);
547 double Change = LearnRate * GradV.
GetDat(CID);
548 double NewFuc =
GetCom(u, CID) + Change;
560 for (
int k = 0; k <
Attrs; k++) {
561 TFltV GradWV(NumComs);
565 if (LearnRate == 0.0) {
continue; }
566 for (
int c = 0; c < GradWV.
Len(); c++){
567 W[k][c] += LearnRate * GradWV[c];
572 printf(
"\r%d iterations (%f) [%lu sec]", iter, CurL, time(NULL) - InitTime);
574 if (iter - PrevIter >= 2 *
G->
GetNodes() && iter > 10000) {
578 printf(
"\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL, CurL - PrevL);
581 if (CurL - PrevL <= Thres * fabs(PrevL)) {
break; }
582 else { PrevL = CurL; }
587 printf(
"MLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.
GetTmStr());
588 if (! PlotNm.
Empty()) {
594 int TCesna::MLEGradAscentParallel(
const double& Thres,
const int& MaxIter,
const int ChunkNum,
const int ChunkSize,
const TStr PlotNm,
const double StepAlpha,
const double StepBeta) {
596 time_t InitTime = time(NULL);
604 for (
int i = 0; i <
F.Len(); i++) { NIdxV.
Add(i); }
608 TIntV NewNIDV(ChunkNum * ChunkSize);
609 for (iter = 0; iter < MaxIter; iter++) {
611 for (
int i = 0; i <
F.Len(); i++) {
612 if (NIDOPTV[i] == 0) { NIdxV.Add(i); }
617 #pragma omp parallel for schedule(static, 1)
618 for (
int TIdx = 0; TIdx < ChunkNum; TIdx++) {
620 for (
int ui = TIdx * ChunkSize; ui < (TIdx + 1) * ChunkSize; ui++) {
622 if (ui >= NIdxV.Len()) {
continue; }
628 for (
int e = 0; e < UI.
GetDeg(); e++) {
632 CIDSet.AddKey(CI.GetKey());
635 if (CIDSet.Empty()) {
640 if (! CIDSet.IsKey(CI.GetKey())) {
641 CurFU.DelIfKey(CI.GetKey());
645 if (
Norm2(GradV) < 1e-4) { NIDOPTV[u] = 1;
continue; }
647 if (LearnRate == 0.0) { NewNIDV[ui] = -2;
continue; }
648 for (
int ci = 0; ci < GradV.
Len(); ci++) {
649 int CID = GradV.
GetKey(ci);
650 double Change = LearnRate * GradV.
GetDat(CID);
651 double NewFuc = CurFU.IsKey(CID)? CurFU.GetDat(CID) + Change : Change;
655 CurFU.AddDat(CID) = NewFuc;
665 int NumNoChangeGrad = 0;
666 int NumNoChangeStepSize = 0;
667 for (
int ui = 0; ui < NewNIDV.
Len(); ui++) {
668 int NewNID = NewNIDV[ui];
669 if (NewNID == -1) { NumNoChangeGrad++;
continue; }
670 if (NewNID == -2) { NumNoChangeStepSize++;
continue; }
675 #pragma omp parallel for
676 for (
int ui = 0; ui < NewNIDV.
Len(); ui++) {
677 int NewNID = NewNIDV[ui];
678 if (NewNID < 0) {
continue; }
679 F[NewNID] = NewF[ui];
681 for (
int ui = 0; ui < NewNIDV.
Len(); ui++) {
682 int NewNID = NewNIDV[ui];
683 if (NewNID < 0) {
continue; }
689 for (
int ui = 0; ui < NewNIDV.
Len(); ui++) {
690 int NewNID = NewNIDV[ui];
691 if (NewNID < 0) {
continue; }
694 for (
int e = 0; e < UI.
GetDeg(); e++) {
699 for (
int i = 0; i < NIDOPTV.Len(); i++) {
if (NIDOPTV[i] == 1) { OPTCnt++; } }
701 if (! PlotNm.
Empty()) {
703 if (PrevL >
TFlt::Mn) { printf(
" (%f) %d g %d s %d OPT", PrevL, NumNoChangeGrad, NumNoChangeStepSize, OPTCnt); }
706 if (iter == 0 || (iter - PrevIter) * ChunkSize * ChunkNum >=
G->
GetNodes()) {
707 #pragma omp parallel for
708 for (
int k = 0; k <
Attrs; k++) {
713 if (LearnRate == 0.0) {
continue; }
714 for (
int c = 0; c < GradWV.
Len(); c++){
715 W[k][c] += LearnRate * GradWV[c];
722 IterLV.
Add(
TIntFltPr(iter * ChunkSize * ChunkNum, CurL));
723 printf(
"\r%d iterations, Likelihood: %f, Diff: %f [%lu secs]", iter, CurL, CurL - PrevL, time(NULL) - InitTime);
725 if (CurL - PrevL <= Thres * fabs(PrevL)) {
733 if (! PlotNm.
Empty()) {
737 printf(
"\rMLE completed with %d iterations(%lu secs)", iter, time(NULL) - InitTime);
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
void GenHoldOutAttr(const double HOFrac, TVec< TIntSet > &HOSetV)
double Sum(const TIntFltH &UV)
double GetCom(const int &NID, const int &CID)
void AddKeyV(const TVec< TKey > &KeyV)
int MLEGradAscent(const double &Thres, const int &MaxIter, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
double Prediction(const TIntFltH &FU, const TIntFltH &FV)
int GetKeyId(const TKey &Key) const
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
int GetEdges() const
Returns the number of edges in the graph.
TSizeTy Len() const
Returns the number of elements in the vector.
Node iterator. Only forward iteration (operator++) is supported.
void GetKeyV(TVec< TKey > &KeyV) const
void Gen(const int &ExpectVals)
double GetAttr(const int &NID, const int &K)
const TDat & GetDat(const TKey &Key) const
void GetW(TVec< TFltV > &_W)
double GetStepSizeByLineSearch(const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
double PredictAttrK(const TIntFltH &FU, const TFltV &WK)
bool IsKey(const TKey &Key) const
double Likelihood(const bool DoParallel=false)
const TKey & GetKey(const int &KeyId) const
int GetNodes() const
Returns the number of nodes in the graph.
double LikelihoodAttrKForRow(const int UID, const int K)
TPair< TInt, TFlt > TIntFltPr
int GetDeg() const
Returns degree of the current node.
const char * GetTmStr() const
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
void Gen(const int &ExpectVals)
static double Norm2(const TFltV &x)
void RandomInit(const int InitComs)
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
double LikelihoodHoldOut()
unsigned long long uint64
const TVal & GetDat(const TVal &Val) const
Returns reference to the first occurrence of element Val.
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
double Norm2(const TIntFltH &UV)
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
int AddKey(const TKey &Key)
const TVal & Last() const
Returns a reference to the last element of the vector.
void AddCom(const int &NID, const int &CID, const double &Val)
void GetCmtyVVUnSorted(TVec< TIntV > &CmtyVV)
static void GenHoldOutPairs(const PGraph &G, TVec< TIntSet > &HoldOutSet, double HOFrac, TRnd &Rnd)
void GetCmtyVV(TVec< TIntV > &CmtyVV)
void SetCmtyVV(const TVec< TIntV > &CmtyVV)
void NeighborComInit(const int InitComs)
double GetStepSizeByLineSearchForWK(const int K, const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
double LikelihoodForRow(const int UID)
void GradientForRow(const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
void SetGraph(const PUNGraph &GraphPt, const THash< TInt, TIntV > &NIDAttrH)
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
void GradientForWK(TFltV &GradV, const int K)
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
static double Log(const double &Val)
int GetUniDevInt(const int &Range=0)
int FindComs(TIntV &ComsV, const bool UseBIC=false, const double HOFrac=0.2, const int NumThreads=20, const TStr PlotLFNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
bool IsEdge(const int &SrcNId, const int &DstNId) const
Tests whether an edge between node IDs SrcNId and DstNId exists in the graph.
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
void DelSelfEdges(const PGraph &Graph)
Removes all the self-edges from the graph.
TDat & AddDat(const TKey &Key)
void DelCom(const int &NID, const int &CID)
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
const TKey & GetKey(const int &KeyId) const
Vector is a sequence TVal objects representing an array that can change in size.
void SortByDat(const bool &Asc=true)