42 void TAGMFit::RandomInitCmtyVV(
const int InitComs,
const double ComSzAlpha,
const double MemAlpha,
const int MinComSz,
const int MaxComSz,
const int MinMem,
const int MaxMem) {
54 int SrcNID = SrcNI.GetId();
55 for (
int v = 0; v < SrcNI.GetDeg(); v++) {
56 int DstNID = SrcNI.GetNbrNId(v);
57 if (SrcNID >= DstNID) {
continue; }
63 for (
int k = 0; k < JointCom.
Len(); k++) {
79 LEdges = 0.0; LNoEdges = 0.0;
80 for (
int e = 0; e <
EdgeComVH.Len(); e++) {
83 double Puv = 1 - exp(- LambdaSum);
84 if (JointCom.
Len() == 0) { Puv =
PNoCom; }
88 for (
int k = 0; k < NewLambdaV.
Len(); k++) {
91 if(NotEdgesInCom > 0) {
92 if (LNoEdges >=
TFlt::Mn +
double(NotEdgesInCom) * NewLambdaV[k]) {
93 LNoEdges -= double(NotEdgesInCom) * NewLambdaV[k];
101 return LEdges + LNoEdges + LReg;
110 double StepSize = 1.0;
114 for (
int iter = 0; ; iter++) {
116 NewLambdaV[i] =
LambdaV[i] + StepSize * DeltaV[i];
136 double GradCutOff = 1000;
137 for (iter = 0; iter < MaxIter; iter++) {
140 if (GradV[i] < -GradCutOff) { GradV[i] = -GradCutOff; }
141 if (GradV[i] > GradCutOff) { GradV[i] = GradCutOff; }
145 double Alpha = 0.15, Beta = 0.2;
146 if (Edges >
Kilo(100)) { Alpha = 0.00015; Beta = 0.3;}
150 double Change = LearnRate * GradV[i];
155 if (! PlotNm.
Empty()) {
161 if (! PlotNm.
Empty()) {
164 printf(
"MLE for Lambda completed with %d iterations(%s)\n",iter,ExeTm.
GetTmStr());
171 for (
int c = 0; c < MaxK; c++) {
176 for (
int v = 0; v < NC; v++) {
191 TIntV ChosenNIDV(InitComs, 0);
196 for (
int u = 0; u < NIdV.
Len(); u++) {
209 printf(
"conductance computation completed [%s]\n", RunTm.
GetTmStr());
213 for (
int ui = 0; ui < NIdPhiV.Len(); ui++) {
214 int UID = NIdPhiV[ui].Val2;
216 if (InvalidNIDS.IsKey(UID)) {
continue; }
222 for (
int e = 0; e < NI.
GetDeg(); e++) {
226 for (
int e = 0; e < NI.
GetDeg(); e++) {
231 if (CurCID >= InitComs) {
break; }
233 if (InitComs > CurCID) {
234 printf(
"%d communities needed to fill randomly\n", InitComs - CurCID);
237 for (
int c = 0; c <
CIDNSetV.Len(); c++) {
240 for (
int u = 0; u < ComSz; u++) {
254 TIntV TmpV = CmtyVV[0];
273 for (
int c = 0; c <
CIDNSetV.Len(); c++) {
285 for (
int c = 0; c <
CIDNSetV.Len(); c++) {
295 for (
int e = 0; e < NI.
GetDeg(); e++) {
297 if (
NIDComVH.GetDat(VID).IsKey(CID)) {
299 EdgeComVH.GetDat(SrcDstNIDPr).DelKey(CID);
311 for (
int e = 0; e < NI.
GetDeg(); e++) {
313 if (
NIDComVH.GetDat(VID).IsKey(JoinCID)) {
315 EdgeComVH.GetDat(SrcDstNIDPr).AddKey(JoinCID);
320 NIDComVH.GetDat(NID).AddKey(JoinCID);
336 if (TryCnt < MaxTryCnt) {
340 else if (Option == 1) {
344 LeaveCID = NIDCIDPr.
Val2;
345 }
while (TryCnt++ < MaxTryCnt && LeaveCID ==
BaseCID);
346 if (TryCnt < MaxTryCnt) {
354 LeaveCID = NIDCIDPr.
Val2;
359 if (TryCnt < MaxTryCnt) {
369 double BestL = PrevL;
370 printf(
"initial likelihood = %f\n",PrevL);
371 TIntFltPrV IterTrueLV, IterJoinV, IterLeaveV, IterAcceptV, IterSwitchV, IterLBV;
376 int SwitchCnt = 0, LeaveCnt = 0, JoinCnt = 0, AcceptCnt = 0, ProbBinSz;
382 for (
int iter = 0; iter < MaxIter; iter++) {
385 int JoinCID = -1, LeaveCID = -1;
391 if (JoinCID > -1 && JoinCID !=
BaseCID) {
JoinCom(NID, JoinCID); }
392 if (LeaveCID > -1 && JoinCID > -1 && JoinCID !=
BaseCID && LeaveCID !=
BaseCID) { SwitchCnt++; }
393 else if (LeaveCID > -1 && LeaveCID !=
BaseCID) { LeaveCnt++;}
394 else if (JoinCID > -1 && JoinCID !=
BaseCID) { JoinCnt++;}
396 if ((iter + 1) % EvalLambdaIter == 0) {
402 OptL = PrevL + DeltaL;
404 if (BestL <= OptL &&
CIDNSetV.Len() > 0) {
410 if (iter > 0 && (iter % ProbBinSz == 0) && PlotFPrx.
Len() > 0) {
412 IterSwitchV.
Add(
TIntFltPr(iter, (
double) SwitchCnt / (
double) AcceptCnt));
413 IterLeaveV.
Add(
TIntFltPr(iter, (
double) LeaveCnt / (
double) AcceptCnt));
414 IterJoinV.
Add(
TIntFltPr(iter, (
double) JoinCnt / (
double) AcceptCnt));
415 IterAcceptV.
Add(
TIntFltPr(iter, (
double) AcceptCnt / (
double) ProbBinSz));
416 SwitchCnt = JoinCnt = LeaveCnt = AcceptCnt = 0;
419 if ((iter + 1) % 10000 == 0) {
420 printf(
"\r%d iterations completed [%.2f]", iter, (
double) iter / (
double) MaxIter);
425 if (PlotFPrx.
Len() > 0) {
428 GP1.
SetDataPlotFNm(PlotFPrx +
".likelihood.tab", PlotFPrx +
".likelihood.plt");
430 GP1.
SetTitle(PlotFPrx +
".likelihood" + TitleStr);
431 GP1.
SavePng(PlotFPrx +
".likelihood.png");
438 GP2.
SetTitle(PlotFPrx +
".transition");
439 GP2.
SetDataPlotFNm(PlotFPrx +
"transition_prob.tab", PlotFPrx +
"transition_prob.plt");
440 GP2.
SavePng(PlotFPrx +
"transition_prob.png");
447 printf(
"\nMCMC completed (best likelihood: %.2f) [%s]\n", BestL, TotalTm.
GetTmStr());
461 for (
int c = 0; c <
CIDNSetV.Len(); c++) {
481 for (
int e = 0; e < NI.
GetDeg(); e++) {
483 if (!
NIDComVH.GetDat(VID).IsKey(CID)) {
continue; }
487 CurPuv = 1 - exp(- LambdaSum);
488 NewPuv = 1 - exp(- LambdaSum +
LambdaV[CID]);
490 if (JointCom.
Len() == 1) {
493 Delta += (log(NewPuv) - log(CurPuv));
507 for (
int e = 0; e < NI.
GetDeg(); e++) {
509 if (!
NIDComVH.GetDat(VID).IsKey(CID)) {
continue; }
513 CurPuv = 1 - exp(- LambdaSum);
514 if (JointCom.
Len() == 0) { CurPuv =
PNoCom; }
515 NewPuv = 1 - exp(- LambdaSum -
LambdaV[CID]);
516 Delta += (log(NewPuv) - log(CurPuv));
531 for (
int e = 0; e < NI.
GetDeg(); e++) {
533 if (!
NIDComVH.GetDat(VID).IsKey(CurCID) || !
NIDComVH.GetDat(VID).IsKey(NewCID)) {
continue;}
536 double CurPuv, NewPuvAfterJoin, NewPuvAfterLeave, NewPuvAfterSwitch, LambdaSum =
SelectLambdaSum(JointCom);
537 CurPuv = 1 - exp(- LambdaSum);
538 NewPuvAfterLeave = 1 - exp(- LambdaSum +
LambdaV[CurCID]);
539 NewPuvAfterJoin = 1 - exp(- LambdaSum -
LambdaV[NewCID]);
540 NewPuvAfterSwitch = 1 - exp(- LambdaSum -
LambdaV[NewCID] +
LambdaV[CurCID]);
541 if (JointCom.
Len() == 1 || NewPuvAfterLeave == 0.0) {
542 NewPuvAfterLeave =
PNoCom;
544 Delta += (log(NewPuvAfterSwitch) + log(CurPuv) - log(NewPuvAfterLeave) - log(NewPuvAfterJoin));
546 printf(
"NS:%f C:%f NL:%f NJ:%f PNoCom:%f", NewPuvAfterSwitch, CurPuv, NewPuvAfterLeave, NewPuvAfterJoin,
PNoCom.
Val);
563 for (
int c = 0; c <
CIDNSetV.Len(); c++) {
566 CIDLambdaH.SortByDat(
false);
567 for (
int c = 0; c <
CIDNSetV.Len(); c++) {
568 int CID = CIDLambdaH.GetKey(c);
570 double Q = exp( - (
double)
LambdaV[CID]);
571 if (Q > QMax) {
continue; }
574 if (CmtyV.
Len() == 0) {
continue; }
586 for (
int c = 0; c <
CIDNSetV.Len(); c++) {
598 for (
int e = 0; e <
EdgeComVH.Len(); e++) {
601 double Puv = 1 - exp(- LambdaSum);
602 if (JointCom.
Len() == 0) { Puv =
PNoCom; }
604 SumEdgeProbsV[SI.GetKey()] += (1 - Puv) / Puv;
609 int NotEdgesInCom = MaxEk -
ComEdgesV[k];
610 GradV[k] = SumEdgeProbsV[k] - (double) NotEdgesInCom;
625 IAssert(NewLambdaV[SI.GetKey()] >= 0);
626 Result += NewLambdaV[SI.GetKey()];
635 uint64 PairNoCom = 0, EdgesNoCom = 0;
636 for (
int u = 0; u < NIdV.
Len(); u++) {
637 for (
int v = u + 1; v < NIdV.
Len(); v++) {
638 int SrcNID = NIdV[u], DstNID = NIdV[v];
641 if(JointCom.
Len() == 0) {
643 if (
G->
IsEdge(SrcNID, DstNID)) { EdgesNoCom++; }
644 if (SamplePairs > 0 && PairNoCom >= (
uint64) SamplePairs) {
break; }
647 if (SamplePairs > 0 && PairNoCom >= (
uint64) SamplePairs) {
break; }
650 if (EdgesNoCom > 0) {
651 PNoCom = (double) EdgesNoCom / (
double) PairNoCom;
661 for (
int c = 0; c <
CIDNSetV.Len(); c++) {
664 CIDLambdaH.SortByDat(
false);
667 int CID = CIDLambdaH.GetKey(i);
668 if (
LambdaV[CID] <= 0.0001) {
continue; }
669 printf(
"P_c : %.3f Com Sz: %d, Total Edges inside: %d \n", 1.0 - exp(-
LambdaV[CID]),
CIDNSetV[CID].Len(), (
int)
ComEdgesV[CID]);
static const T & Mn(const T &LVal, const T &RVal)
void Load(TSIn &SIn, const int &RndSeed=0)
TPair< TInt, TInt > TIntPr
static void GetNbhCom(const PUNGraph &Graph, const int NID, TIntSet &NBCmtyS)
static double Norm(const TFltV &x)
TPair< TFlt, TInt > TFltIntPr
double SeekSwitch(const int &UID, const int &CurCID, const int &NewCID)
static double SumVec(const TFltV &x)
static void GetNodeMembership(THash< TInt, TIntSet > &NIDComVH, const TVec< TIntV > &CmtyVV)
get hash table of
void SavePng(const int &SizeX=1000, const int &SizeY=800, const TStr &Comment=TStr())
static const T & Mx(const T &LVal, const T &RVal)
void SetDefaultPNoCom()
Set Epsilon (the probability that two nodes sharing no communities connect) to the default value...
void Save(TSOut &SOut) const
THash< TIntPr, TInt > NIDCIDPrS
pairs (for sampling MCMC moves).
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
void SetTitle(const TStr &PlotTitle)
double GetStepSizeByLineSearchForLambda(const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta)
Step size search for updating P_c (which is parametarized by regularization parameter lambda)...
double Likelihood()
COMMENT.
int GetEdges() const
Returns the number of edges in the graph.
TSizeTy Len() const
Returns the number of elements in the vector.
Node iterator. Only forward iteration (operator++) is supported.
TFltV LambdaV
Parametrization of P_c (edge probability in community c), P_c = 1 - exp(-lambda). ...
void Save(TSOut &SOut) const
void PrintSummary()
COMMENT.
TInt BaseCID
ID of the Epsilon-community (in case we fit P_c of the epsilon community). We do not fit for the Epsi...
static void GetIntersection(const THashSet< TInt > &A, const THashSet< TInt > &B, THashSet< TInt > &C)
void Save(TSOut &SOut) const
Saves the graph to a (binary) stream SOut.
void SetDataPlotFNm(const TStr &DatFNm, const TStr &PltFNm)
THash< TIntPr, TIntSet > EdgeComVH
Edge -> Shared Community ID Set.
static double GetConductance(const PUNGraph &Graph, const TIntSet &CmtyS, const int Edges)
void GetEdgeJointCom()
For each (u, v) in edges, precompute C_uv (the set of communities nodes u and v share).
int GetNodes() const
Returns the number of nodes in the graph.
void Save(TSOut &SOut) const
int GetRndNId(TRnd &Rnd=TInt::Rnd)
Returns an ID of a random node in the graph.
TPair< TInt, TFlt > TIntFltPr
int GetDeg() const
Returns degree of the current node.
const char * GetTmStr() const
void GradLogLForLambda(TFltV &GradV)
void DelKey(const TKey &Key)
double CalcPNoComByCmtyVV(const int &SamplePairs=-1)
Compute the empirical edge probability between a pair of nodes who share no community (epsilon)...
void LeaveCom(const int &NID, const int &CID)
After MCMC, NID leaves community CID.
TFlt MinLambda
Minimum value of regularization parameter lambda (default = 1e-5).
void GetCmtyVV(TVec< TIntV > &CmtyVV, const double QMax=2.0)
Get communities whose p_c is higher than 1 - QMax.
int RemoveEmptyCom()
Remove all communities with no members.
int MLEGradAscentGivenCAG(const double &Thres=0.001, const int &MaxIter=10000, const TStr PlotNm=TStr())
Gradient descent for p_c while keeping the community affiliation graph (CAG) fixed.
void Gen(const int &ExpectVals)
THash< TInt, TIntSet > NIDComVH
Node ID -> Communitie IDs the node belongs to.
unsigned long long uint64
double SelectLambdaSum(const TIntSet &ComK)
Compute sum of lambda_c (which is log (1 - p_c)) over C_uv (ComK). The function is used to compute ed...
static void GenCmtyVVFromPL(TVec< TIntV > &CmtyVV, const PUNGraph &Graph, const int &Nodes, const int &Coms, const double &ComSzAlpha, const double &MemAlpha, const int &MinSz, const int &MaxSz, const int &MinK, const int &MaxK, TRnd &Rnd)
Generate bipartite community affiliation from given power law coefficients for membership distributio...
TFlt RegCoef
Regularization parameter when we fit for P_c (for finding # communities).
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
const TVal & Last() const
Returns a reference to the last element of the vector.
void NeighborComInit(const int InitComs)
Initialize node community memberships using best neighborhood communities (see D. Gleich et al...
void GetQV(TFltV &OutV)
Returns QV, a vector of (1 - p_c) for each community c.
TNodeI GetRndNI(TRnd &Rnd=TInt::Rnd)
Returns an interator referring to a random node in the graph.
static PUNGraph Load(TSIn &SIn)
Static constructor that loads the graph from a stream SIn and returns a pointer to it...
void PutSeed(const int &_Seed)
int AddKey(const TKey &Key)
double SeekJoin(const int &UID, const int &CID)
Compute the change in likelihood (Delta) if node UID joins community CID.
void RunMCMC(const int &MaxIter, const int &EvalLambdaIter, const TStr &PlotFPrx=TStr())
Main procedure for fitting the AGM to a given graph using MCMC.
void InitNodeData()
COMMENT.
static TStr Fmt(const char *FmtStr,...)
double SeekLeave(const int &UID, const int &CID)
Compute the change in likelihood (Delta) if node UID leaves community CID.
void JoinCom(const int &NID, const int &JoinCID)
void RandomInitCmtyVV(const int InitComs, const double ComSzAlpha=1.3, const double MemAlpha=1.8, const int MinComSz=8, const int MaxComSz=200, const int MinMem=1, const int MaxMem=10)
Randomly initialize bipartite community affiliation graph.
TFlt MaxLambda
Maximum value of regularization parameter lambda (default = 10).
TVec< TIntSet > CIDNSetV
Community ID -> Member Node ID Sets.
TNodeI EndNI() const
Returns an iterator referring to the past-the-end node in the graph.
void AddBaseCmty()
Add Epsilon community (base community which includes all nodes) into community affiliation graph...
void SetCmtyVV(const TVec< TIntV > &CmtyVV)
COMMENT.
int AddPlot(const TIntV &YValV, const TGpSeriesTy &SeriesTy=gpwLinesPoints, const TStr &Label=TStr(), const TStr &Style=TStr())
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
static double DotProduct(const TFltV &x, const TFltV &y)
int GetRndKeyId(TRnd &Rnd) const
Get an index of a random element. If the hash table has many deleted keys, this may take a long time...
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
int GetUniDevInt(const int &Range=0)
int GetId() const
Returns ID of the current node.
bool IsEdge(const int &SrcNId, const int &DstNId) const
Tests whether an edge between node IDs SrcNId and DstNId exists in the graph.
bool IsKey(const TKey &Key) const
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
TNodeI BegNI() const
Returns an iterator referring to the first node in the graph.
void DelSelfEdges(const PGraph &Graph)
Removes all the self-edges from the graph.
void DelLast()
Removes the last element of the vector.
TDat & AddDat(const TKey &Key)
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
TIntV ComEdgesV
The number of edges in each community.
void SampleTransition(int &NID, int &JoinCID, int &LeaveCID, double &DeltaL)
Sample MMCM transitions: Choose among (join, leave, switch), and then sample (NID, CID).
void Save(TSOut &SOut) const
const TKey & GetKey(const int &KeyId) const
TFlt PNoCom
Probability of edge when two nodes share no community (epsilon in the paper).
void RandomInit(const int &MaxK)
COMMENT.
THash< TIntPr, TFlt > NIDCIDPrH
pairs (for sampling MCMC moves).