SNAP Library 3.0, User Reference  2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
TCesna Class Reference

#include <agmattr.h>

Public Member Functions

 TCesna ()
 
 TCesna (const PUNGraph &GraphPt, const THash< TInt, TIntV > &NIDAttrH, const int &InitComs, const int RndSeed=0)
 
void Save (TSOut &SOut)
 
void Load (TSIn &SIn, const int &RndSeed=0)
 
void SetGraph (const PUNGraph &GraphPt, const THash< TInt, TIntV > &NIDAttrH)
 
void SetRegCoef (const double _RegCoef)
 
double GetRegCoef ()
 
void SetWeightAttr (const double _WeightAttr)
 
double GetWeightAttr ()
 
void SetLassoCoef (const double _LassoCoef)
 
int GetAttrs ()
 
double GetComFromNID (const int &NID, const int &CID)
 
double GetLassoCoef ()
 
void InitW ()
 
void SetAttrHoldOut (const int NID, const int KID)
 
void SetAttrHoldOutForOneNode (const int NID)
 
void GetW (TVec< TFltV > &_W)
 
void SetW (TVec< TFltV > &_W)
 
void RandomInit (const int InitComs)
 
void NeighborComInit (const int InitComs)
 
void NeighborComInit (TFltIntPrV &NIdPhiV, const int InitComs)
 
int GetNumComs ()
 
void SetCmtyVV (const TVec< TIntV > &CmtyVV)
 
double Likelihood (const bool DoParallel=false)
 
double LikelihoodForRow (const int UID)
 
double LikelihoodForRow (const int UID, const TIntFltH &FU)
 
double LikelihoodAttrKForRow (const int UID, const int K)
 
double LikelihoodAttrKForRow (const int UID, const int K, const TIntFltH &FU)
 
double LikelihoodAttrKForRow (const int UID, const int K, const TIntFltH &FU, const TFltV &WK)
 
double LikelihoodForWK (const int K, const TFltV &WK)
 
double LikelihoodForWK (const int K)
 
double LikelihoodAttr ()
 
double LikelihoodGraph ()
 
void GenHoldOutAttr (const double HOFrac, TVec< TIntSet > &HOSetV)
 
void SetHoldOut (const double HOFrac)
 
void GradientForRow (const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
 
void GradientForWK (TFltV &GradV, const int K)
 
void GetCmtyVV (TVec< TIntV > &CmtyVV)
 
void GetCmtyVV (TVec< TIntV > &CmtyVV, TVec< TFltV > &Wck, const double Thres, const int MinSz=3)
 extract community affiliation from F_uc Wck[c][k] = W_c for k-th attribute More...
 
void GetCmtyVV (TVec< TIntV > &CmtyVV, const double Thres, const int MinSz=3)
 
void GetCmtyVV (TVec< TIntV > &CmtyVV, TVec< TFltV > &Wck)
 
void GetCmtyVVUnSorted (TVec< TIntV > &CmtyVV)
 
void GetCmtyVVUnSorted (TVec< TIntV > &CmtyVV, const double Thres, const int MinSz=3)
 
int FindComs (TIntV &ComsV, const bool UseBIC=false, const double HOFrac=0.2, const int NumThreads=20, const TStr PlotLFNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
 
int FindComs (const int NumThreads, const int MaxComs, const int MinComs, const int DivComs, const TStr OutFNm, const bool UseBIC=false, const double HOFrac=0.1, const double StepAlpha=0.3, const double StepBeta=0.3)
 estimate number of communities using cross validation More...
 
void DisplayAttrs (const int TopK, const TStrHash< TInt > &NodeNameH)
 
double LikelihoodHoldOut ()
 
double GetStepSizeByLineSearch (const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
 
double GetStepSizeByLineSearchForWK (const int K, const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
 
int GetPositiveW ()
 
int MLEGradAscent (const double &Thres, const int &MaxIter, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
 
int MLEGradAscentParallel (const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
 
int MLEGradAscentParallel (const double &Thres, const int &MaxIter, const int ChunkNum, const TStr PlotNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
 
double GetCom (const int &NID, const int &CID)
 
double GetAttr (const int &NID, const int &K)
 
void AddCom (const int &NID, const int &CID, const double &Val)
 
void DelCom (const int &NID, const int &CID)
 
double DotProduct (const TIntFltH &UV, const TIntFltH &VV)
 
double DotProduct (const int &UID, const int &VID)
 
double Prediction (const TIntFltH &FU, const TIntFltH &FV)
 
double PredictAttrK (const TIntFltH &FU, const TFltV &WK)
 
double PredictAttrK (const TIntFltH &FU, const int K)
 
double PredictAttrK (const int UID, const int K)
 
double GetW (const int CID, const int K)
 
double Prediction (const int &UID, const int &VID)
 
double Sum (const TIntFltH &UV)
 
double Norm2 (const TIntFltH &UV)
 
double Sigmoid (const double X)
 

Public Attributes

TFlt MinVal
 
TFlt MaxVal
 
TFlt MinValW
 
TFlt MaxValW
 
TFlt NegWgt
 
TFlt LassoCoef
 
TFlt WeightAttr
 
TFlt PNoCom
 
TBool DoParallel
 

Private Attributes

PUNGraph G
 
TVec< TIntSetX
 
TVec< TIntFltHF
 
TVec< TFltVW
 
TInt Attrs
 
TRnd Rnd
 
TIntSet NIDToIdx
 
TFlt RegCoef
 
TFltV SumFV
 
TInt NumComs
 
TVec< TIntSetHOVIDSV
 
TVec< TIntSetHOKIDSV
 

Detailed Description

Definition at line 246 of file agmattr.h.

Constructor & Destructor Documentation

TCesna::TCesna ( )
inline

Definition at line 271 of file agmattr.h.

271 { G = TUNGraph::New(10, -1); }
PUNGraph G
Definition: agmattr.h:248
static PUNGraph New()
Static constructor that returns a pointer to the graph. Call: PUNGraph Graph = TUNGraph::New().
Definition: graph.h:155
TCesna::TCesna ( const PUNGraph GraphPt,
const THash< TInt, TIntV > &  NIDAttrH,
const int &  InitComs,
const int  RndSeed = 0 
)
inline

Definition at line 272 of file agmattr.h.

272  : Rnd(RndSeed), RegCoef(0),
273  MinVal(0.0), MaxVal(10.0), MinValW(-10.0), MaxValW(10.0), NegWgt(1.0), LassoCoef(1.0), WeightAttr(1.0) { SetGraph(GraphPt, NIDAttrH); NeighborComInit(InitComs); }
TFlt MaxVal
Definition: agmattr.h:262
TFlt LassoCoef
Definition: agmattr.h:266
TFlt NegWgt
Definition: agmattr.h:265
TFlt MinVal
Definition: agmattr.h:261
TFlt MaxValW
Definition: agmattr.h:264
TFlt MinValW
Definition: agmattr.h:263
TFlt RegCoef
Definition: agmattr.h:255
TRnd Rnd
Definition: agmattr.h:253
void NeighborComInit(const int InitComs)
Definition: agmattr.cpp:32
void SetGraph(const PUNGraph &GraphPt, const THash< TInt, TIntV > &NIDAttrH)
Definition: agmattr.cpp:99
TFlt WeightAttr
Definition: agmattr.h:267

Member Function Documentation

void TCesna::AddCom ( const int &  NID,
const int &  CID,
const double &  Val 
)
inline

Definition at line 541 of file agmattr.h.

541  {
542  if (F[NID].IsKey(CID)) {
543  SumFV[CID] -= F[NID].GetDat(CID);
544  }
545  F[NID].AddDat(CID) = Val;
546  SumFV[CID] += Val;
547  }
TFltV SumFV
Definition: agmattr.h:256
TVec< TIntFltH > F
Definition: agmattr.h:250
void TCesna::DelCom ( const int &  NID,
const int &  CID 
)
inline

Definition at line 549 of file agmattr.h.

549  {
550  if (F[NID].IsKey(CID)) {
551  SumFV[CID] -= F[NID].GetDat(CID);
552  F[NID].DelKey(CID);
553  }
554  }
TFltV SumFV
Definition: agmattr.h:256
TVec< TIntFltH > F
Definition: agmattr.h:250
void TCesna::DisplayAttrs ( const int  TopK,
const TStrHash< TInt > &  NodeNameH 
)
inline

Definition at line 470 of file agmattr.h.

470  {
471  for (int u = 0; u < X.Len(); u++) {
472  if (NodeNameH.Len() > 0) {
473  printf("NID: %s\t Attrs: ", NodeNameH.GetKey(NIDToIdx[u]));
474  } else {
475  printf("NID: %d\t Attrs: ", NIDToIdx[u].Val);
476  }
477  for (int k = 0; k < X[u].Len(); k++) {
478  printf("%d, ", X[u][k].Val);
479  }
480  printf("\n");
481  if (u >= TopK) { break; }
482  }
483  }
int Len() const
Definition: hash.h:770
TIntSet NIDToIdx
Definition: agmattr.h:254
const char * GetKey(const int &KeyId) const
Definition: hash.h:821
TVec< TIntSet > X
Definition: agmattr.h:249
double TCesna::DotProduct ( const TIntFltH UV,
const TIntFltH VV 
)
inline

Definition at line 564 of file agmattr.h.

564  {
565  double DP = 0;
566  if (UV.Len() > VV.Len()) {
567  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
568  if (VV.IsKey(HI.GetKey())) {
569  DP += VV.GetDat(HI.GetKey()) * HI.GetDat();
570  }
571  }
572  } else {
573  for (TIntFltH::TIter HI = VV.BegI(); HI < VV.EndI(); HI++) {
574  if (UV.IsKey(HI.GetKey())) {
575  DP += UV.GetDat(HI.GetKey()) * HI.GetDat();
576  }
577  }
578  }
579  return DP;
580  }
TIter BegI() const
Definition: hash.h:171
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:220
TIter EndI() const
Definition: hash.h:176
bool IsKey(const TKey &Key) const
Definition: hash.h:216
int Len() const
Definition: hash.h:186
double TCesna::DotProduct ( const int &  UID,
const int &  VID 
)
inline

Definition at line 581 of file agmattr.h.

581  {
582  return DotProduct(F[UID], F[VID]);
583  }
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
Definition: agmattr.h:564
TVec< TIntFltH > F
Definition: agmattr.h:250
int TCesna::FindComs ( TIntV ComsV,
const bool  UseBIC = false,
const double  HOFrac = 0.2,
const int  NumThreads = 20,
const TStr  PlotLFNm = TStr(),
const double  StepAlpha = 0.3,
const double  StepBeta = 0.1 
)

Definition at line 361 of file agmattr.cpp.

361  {
362  if (ComsV.Len() == 0) {
363  int MaxComs = G->GetNodes() / 5;
364  ComsV.Add(2);
365  while(ComsV.Last() < MaxComs) { ComsV.Add(ComsV.Last() * 2); }
366  }
367  int MaxIterCV = 3;
368 
369  TVec<TVec<TIntSet> > HoldOutSets(MaxIterCV), HoldOutSetsAttr(MaxIterCV);
370  TFltIntPrV NIdPhiV;
371  TCesnaUtil::GetNIdPhiV<PUNGraph>(G, NIdPhiV);
372  if (! UseBIC) { //if edges are many enough, use CV
373  //printf("generating hold out set\n");
374  TIntV NIdV1, NIdV2;
375  G->GetNIdV(NIdV1);
376  G->GetNIdV(NIdV2);
377  for (int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
378  // generate holdout sets
379  TCesnaUtil::GenHoldOutPairs(G, HoldOutSets[IterCV], HOFrac, Rnd);
380  GenHoldOutAttr(HOFrac, HoldOutSetsAttr[IterCV]);
381  }
382  //printf("hold out set generated\n");
383  }
384 
385  TFltV HOLV(ComsV.Len());
386  TIntFltPrV ComsLV;
387  for (int c = 0; c < ComsV.Len(); c++) {
388  const int Coms = ComsV[c];
389  //printf("Try number of Coms:%d\n", Coms);
390 
391  if (! UseBIC) { //if edges are many enough, use CV
392  for (int IterCV = 0; IterCV < MaxIterCV; IterCV++) {
393  HOVIDSV = HoldOutSets[IterCV];
394  HOKIDSV = HoldOutSetsAttr[IterCV];
395  NeighborComInit(NIdPhiV, Coms);
396  //printf("Initialized\n");
397 
398  if (NumThreads == 1) {
399  //printf("MLE without parallelization begins\n");
400  MLEGradAscent(0.01, 100 * G->GetNodes(), "", StepAlpha, StepBeta);
401  //printf("likelihood: train:%f, attr:%f, hold:%f\n", Likelihood(), LikelihoodAttr(), LikelihoodHoldOut());
402  } else {
403  //printf("MLE with parallelization begins\n");
404  MLEGradAscentParallel(0.01, 100, NumThreads, "", StepAlpha, StepBeta);
405  }
406  double HOL = LikelihoodHoldOut();
407  HOL = HOL < 0? HOL: TFlt::Mn;
408  HOLV[c] += HOL;
409  }
410  }
411  else {
412  HOVIDSV.Gen(G->GetNodes());
413  HOKIDSV.Gen(G->GetNodes());
414  if (NumThreads == 1) {
415  MLEGradAscent(0.005, 100 * G->GetNodes(), "", StepAlpha, StepBeta);
416  printf("likelihood: train:%f, attr:%f, hold:%f\n", Likelihood(), LikelihoodAttr(), LikelihoodHoldOut());
417  } else {
418  MLEGradAscentParallel(0.005, 100, NumThreads, "", StepAlpha, StepBeta);
419  }
420  //int NumParams = G->GetNodes() * Coms + Coms * Attrs;
421  double NumParams = (1.0 - WeightAttr) * Coms + WeightAttr * Coms * Attrs;
422  double Observations = (1.0 - WeightAttr) * G->GetNodes() * (G->GetNodes() - 1) / 2 + WeightAttr * G->GetNodes() * Attrs;
423  double BIC = 2 * Likelihood() - NumParams * log (Observations);
424  HOLV[c] = BIC;
425  }
426  }
427  int EstComs = 2;
428  double MaxL = TFlt::Mn;
429  if (UseBIC) {
430  printf("Number of communities vs likelihood (criterion: BIC)\n");
431  } else {
432  printf("Number of communities vs likelihood (criterion: Cross validation)\n");
433  }
434  for (int c = 0; c < ComsV.Len(); c++) {
435  ComsLV.Add(TIntFltPr(ComsV[c].Val, HOLV[c].Val));
436  printf("%d(%f)\t", ComsV[c].Val, HOLV[c].Val);
437  if (MaxL < HOLV[c]) {
438  MaxL = HOLV[c];
439  EstComs = ComsV[c];
440  }
441  }
442  printf("\n");
443  RandomInit(EstComs);
444  HOVIDSV.Gen(G->GetNodes());
445  HOKIDSV.Gen(G->GetNodes());
446  if (! PlotLFNm.Empty()) {
447  TGnuPlot::PlotValV(ComsLV, PlotLFNm, "hold-out likelihood", "communities", "likelihood");
448  }
449  return EstComs;
450 }
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmattr.cpp:594
void GenHoldOutAttr(const double HOFrac, TVec< TIntSet > &HOSetV)
Definition: agmattr.h:397
PUNGraph G
Definition: agmattr.h:248
int MLEGradAscent(const double &Thres, const int &MaxIter, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmattr.cpp:505
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
double LikelihoodAttr()
Definition: agmattr.h:371
double Likelihood(const bool DoParallel=false)
Definition: agmattr.cpp:137
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:168
TPair< TInt, TFlt > TIntFltPr
Definition: ds.h:87
TInt Attrs
Definition: agmattr.h:252
void RandomInit(const int InitComs)
Definition: agmattr.cpp:9
double LikelihoodHoldOut()
Definition: agmattr.cpp:452
TVec< TIntSet > HOKIDSV
Definition: agmattr.h:259
TVec< TIntSet > HOVIDSV
Definition: agmattr.h:258
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:551
static void GenHoldOutPairs(const PGraph &G, TVec< TIntSet > &HoldOutSet, double HOFrac, TRnd &Rnd)
Definition: agmattr.h:38
TRnd Rnd
Definition: agmattr.h:253
void NeighborComInit(const int InitComs)
Definition: agmattr.cpp:32
bool Empty() const
Definition: dt.h:488
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
TFlt WeightAttr
Definition: agmattr.h:267
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
static const double Mn
Definition: dt.h:1297
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429
int TCesna::FindComs ( const int  NumThreads,
const int  MaxComs,
const int  MinComs,
const int  DivComs,
const TStr  OutFNm,
const bool  UseBIC = false,
const double  HOFrac = 0.1,
const double  StepAlpha = 0.3,
const double  StepBeta = 0.3 
)

estimate number of communities using cross validation

Definition at line 348 of file agmattr.cpp.

348  {
349  double ComsGap = exp(TMath::Log((double) MaxComs / (double) MinComs) / (double) DivComs);
350  TIntV ComsV;
351  ComsV.Add(MinComs);
352  while (ComsV.Len() < DivComs) {
353  int NewComs = int(ComsV.Last() * ComsGap);
354  if (NewComs == ComsV.Last().Val) { NewComs++; }
355  ComsV.Add(NewComs);
356  }
357  if (ComsV.Last() < MaxComs) { ComsV.Add(MaxComs); }
358  return FindComs(ComsV, UseBIC, HOFrac, NumThreads, OutFNm, StepAlpha, StepBeta);
359 }
int Val
Definition: dt.h:1046
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:551
static double Log(const double &Val)
Definition: xmath.h:14
int FindComs(TIntV &ComsV, const bool UseBIC=false, const double HOFrac=0.2, const int NumThreads=20, const TStr PlotLFNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmattr.cpp:361
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
void TCesna::GenHoldOutAttr ( const double  HOFrac,
TVec< TIntSet > &  HOSetV 
)
inline

Definition at line 397 of file agmattr.h.

397  {
398  HOSetV.Gen(F.Len());
399  int HoldOutCnt = (int) ceil(HOFrac * G->GetNodes() * Attrs);
400  TIntPrSet NIDKIDSet(HoldOutCnt);
401  int Cnt = 0;
402  for (int h = 0; h < 10 * HoldOutCnt; h++) {
403  int UID = Rnd.GetUniDevInt(F.Len());
404  int KID = Rnd.GetUniDevInt(Attrs);
405  if (! NIDKIDSet.IsKey(TIntPr(UID, KID))) {
406  NIDKIDSet.AddKey(TIntPr(UID, KID));
407  HOSetV[UID].AddKey(KID);
408  Cnt++;
409  }
410  if (Cnt >= HoldOutCnt) { break; }
411  }
412  printf("%d hold out pairs generated for attributes\n", Cnt);
413  }
TPair< TInt, TInt > TIntPr
Definition: ds.h:83
PUNGraph G
Definition: agmattr.h:248
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:168
TVec< TIntFltH > F
Definition: agmattr.h:250
TInt Attrs
Definition: agmattr.h:252
TRnd Rnd
Definition: agmattr.h:253
Definition: ds.h:32
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
double TCesna::GetAttr ( const int &  NID,
const int &  K 
)
inline

Definition at line 534 of file agmattr.h.

534  {
535  if (X[NID].IsKey(K)) {
536  return 1.0;
537  } else {
538  return 0.0;
539  }
540  }
TVec< TIntSet > X
Definition: agmattr.h:249
int TCesna::GetAttrs ( )
inline

Definition at line 321 of file agmattr.h.

321 { return Attrs; }
TInt Attrs
Definition: agmattr.h:252
void TCesna::GetCmtyVV ( TVec< TIntV > &  CmtyVV)

Definition at line 289 of file agmattr.cpp.

289  {
290  TVec<TFltV> TmpV;
291  GetCmtyVV(CmtyVV, TmpV, sqrt(2.0 * (double) G->GetEdges() / G->GetNodes() / G->GetNodes()), 3);
292 }
PUNGraph G
Definition: agmattr.h:248
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:168
void GetCmtyVV(TVec< TIntV > &CmtyVV)
Definition: agmattr.cpp:289
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429
void TCesna::GetCmtyVV ( TVec< TIntV > &  CmtyVV,
TVec< TFltV > &  Wck,
const double  Thres,
const int  MinSz = 3 
)

extract community affiliation from F_uc Wck[c][k] = W_c for k-th attribute

Definition at line 296 of file agmattr.cpp.

296  {
297  CmtyVV.Gen(NumComs, 0);
298  Wck.Gen(NumComs, 0);
299  TIntFltH CIDSumFH(NumComs);
300  for (int c = 0; c < SumFV.Len(); c++) {
301  CIDSumFH.AddDat(c, SumFV[c]);
302  }
303  CIDSumFH.SortByDat(false);
304  for (int c = 0; c < NumComs; c++) {
305  int CID = CIDSumFH.GetKey(c);
306  TIntFltH NIDFucH(F.Len() / 10);
307  TIntV CmtyV;
308  IAssert(SumFV[CID] == CIDSumFH.GetDat(CID));
309  if (SumFV[CID] < Thres) { continue; }
310  for (int u = 0; u < F.Len(); u++) {
311  int NID = NIDToIdx[u];
312  if (GetCom(u, CID) >= Thres) { NIDFucH.AddDat(NID, GetCom(u, CID)); }
313  }
314  NIDFucH.SortByDat(false);
315  NIDFucH.GetKeyV(CmtyV);
316  if (CmtyV.Len() < MinSz) { continue; }
317  CmtyVV.Add(CmtyV);
318  TFltV WV(Attrs);
319  for (int k = 0; k < Attrs; k++) {
320  WV[k] = GetW(CID, k);
321  }
322  Wck.Add(WV);
323  }
324  if ( NumComs != CmtyVV.Len()) {
325  printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
326  }
327 }
#define IAssert(Cond)
Definition: bd.h:262
TFltV SumFV
Definition: agmattr.h:256
double GetCom(const int &NID, const int &CID)
Definition: agmattr.h:527
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
void GetKeyV(TVec< TKey > &KeyV) const
Definition: shash.h:1347
TIntSet NIDToIdx
Definition: agmattr.h:254
void GetW(TVec< TFltV > &_W)
Definition: agmattr.h:346
TVec< TIntFltH > F
Definition: agmattr.h:250
TInt Attrs
Definition: agmattr.h:252
const TVal & GetDat(const TVal &Val) const
Returns reference to the first occurrence of element Val.
Definition: ds.h:807
TInt NumComs
Definition: agmattr.h:257
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
void TCesna::GetCmtyVV ( TVec< TIntV > &  CmtyVV,
const double  Thres,
const int  MinSz = 3 
)
inline

Definition at line 438 of file agmattr.h.

438  {
439  TVec<TFltV> TmpV;
440  GetCmtyVV(CmtyVV, TmpV, Thres, MinSz);
441  }
void GetCmtyVV(TVec< TIntV > &CmtyVV)
Definition: agmattr.cpp:289
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429
void TCesna::GetCmtyVV ( TVec< TIntV > &  CmtyVV,
TVec< TFltV > &  Wck 
)
inline

Definition at line 442 of file agmattr.h.

442  {
443  GetCmtyVV(CmtyVV, Wck, sqrt(2.0 * (double) G->GetEdges() / G->GetNodes() / G->GetNodes()), 3);
444  }
PUNGraph G
Definition: agmattr.h:248
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:168
void GetCmtyVV(TVec< TIntV > &CmtyVV)
Definition: agmattr.cpp:289
void TCesna::GetCmtyVVUnSorted ( TVec< TIntV > &  CmtyVV)

Definition at line 329 of file agmattr.cpp.

329  {
330  GetCmtyVVUnSorted(CmtyVV, sqrt(2.0 * (double) G->GetEdges() / G->GetNodes() / G->GetNodes()), 3);
331 }
PUNGraph G
Definition: agmattr.h:248
int GetEdges() const
Returns the number of edges in the graph.
Definition: graph.cpp:82
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:168
void GetCmtyVVUnSorted(TVec< TIntV > &CmtyVV)
Definition: agmattr.cpp:329
void TCesna::GetCmtyVVUnSorted ( TVec< TIntV > &  CmtyVV,
const double  Thres,
const int  MinSz = 3 
)

Definition at line 333 of file agmattr.cpp.

333  {
334  CmtyVV.Gen(NumComs, 0);
335  for (int c = 0; c < NumComs; c++) {
336  TIntV CmtyV;
337  for (int u = 0; u < G->GetNodes(); u++) {
338  if (GetCom(u, c) > Thres) { CmtyV.Add(NIDToIdx[u]); }
339  }
340  if (CmtyV.Len() >= MinSz) { CmtyVV.Add(CmtyV); }
341  }
342  if ( NumComs != CmtyVV.Len()) {
343  printf("Community vector generated. %d communities are ommitted\n", NumComs.Val - CmtyVV.Len());
344  }
345 }
PUNGraph G
Definition: agmattr.h:248
double GetCom(const int &NID, const int &CID)
Definition: agmattr.h:527
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
TIntSet NIDToIdx
Definition: agmattr.h:254
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:168
TInt NumComs
Definition: agmattr.h:257
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
double TCesna::GetCom ( const int &  NID,
const int &  CID 
)
inline

Definition at line 527 of file agmattr.h.

527  {
528  if (F[NID].IsKey(CID)) {
529  return F[NID].GetDat(CID);
530  } else {
531  return 0.0;
532  }
533  }
TVec< TIntFltH > F
Definition: agmattr.h:250
double TCesna::GetComFromNID ( const int &  NID,
const int &  CID 
)
inline

Definition at line 322 of file agmattr.h.

322  {
323  int NIdx = NIDToIdx.GetKeyId(NID);
324  if (F[NIdx].IsKey(CID)) {
325  return F[NIdx].GetDat(CID);
326  } else {
327  return 0.0;
328  }
329  }
int GetKeyId(const TKey &Key) const
Definition: shash.h:1328
TIntSet NIDToIdx
Definition: agmattr.h:254
TVec< TIntFltH > F
Definition: agmattr.h:250
double TCesna::GetLassoCoef ( )
inline

Definition at line 330 of file agmattr.h.

330 { return LassoCoef; }
TFlt LassoCoef
Definition: agmattr.h:266
int TCesna::GetNumComs ( )
inline

Definition at line 351 of file agmattr.h.

351 { return NumComs; }
TInt NumComs
Definition: agmattr.h:257
int TCesna::GetPositiveW ( )
inline

Definition at line 510 of file agmattr.h.

510  {
511  int PosCnt = 0;
512  for (int c = 0; c < NumComs; c++) {
513  for (int k = 0; k < Attrs; k++) {
514  if (GetW(c, k) > 0.0) { PosCnt++; }
515  }
516  }
517  return PosCnt;
518  }
void GetW(TVec< TFltV > &_W)
Definition: agmattr.h:346
TInt Attrs
Definition: agmattr.h:252
TInt NumComs
Definition: agmattr.h:257
double TCesna::GetRegCoef ( )
inline

Definition at line 317 of file agmattr.h.

317 { return RegCoef; }
TFlt RegCoef
Definition: agmattr.h:255
double TCesna::GetStepSizeByLineSearch ( const int  UID,
const TIntFltH DeltaV,
const TIntFltH GradV,
const double &  Alpha,
const double &  Beta,
const int  MaxIter = 10 
)

Definition at line 480 of file agmattr.cpp.

480  {
481  double StepSize = 1.0;
482  double InitLikelihood = LikelihoodForRow(UID);
483  TIntFltH NewVarV(DeltaV.Len());
484  for(int iter = 0; iter < MaxIter; iter++) {
485  for (int i = 0; i < DeltaV.Len(); i++){
486  int CID = DeltaV.GetKey(i);
487  double NewVal = GetCom(UID, CID) + StepSize * DeltaV.GetDat(CID);
488  if (NewVal < MinVal) { NewVal = MinVal; }
489  if (NewVal > MaxVal) { NewVal = MaxVal; }
490  NewVarV.AddDat(CID, NewVal);
491  }
492  if (LikelihoodForRow(UID, NewVarV) < InitLikelihood + Alpha * StepSize * DotProduct(GradV, DeltaV)) {
493  StepSize *= Beta;
494  } else {
495  break;
496  }
497  if (iter == MaxIter - 1) {
498  StepSize = 0.0;
499  break;
500  }
501  }
502  return StepSize;
503 }
TFlt MaxVal
Definition: agmattr.h:262
double GetCom(const int &NID, const int &CID)
Definition: agmattr.h:527
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
Definition: agmattr.h:564
TFlt MinVal
Definition: agmattr.h:261
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:220
double LikelihoodForRow(const int UID)
Definition: agmattr.cpp:157
int Len() const
Definition: hash.h:186
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:210
double TCesna::GetStepSizeByLineSearchForWK ( const int  K,
const TFltV DeltaV,
const TFltV GradV,
const double &  Alpha,
const double &  Beta,
const int  MaxIter = 10 
)
inline

Definition at line 486 of file agmattr.h.

486  {
487  double StepSize = 1.0;
488  double InitLikelihood = LikelihoodForWK(K);
489  TFltV NewVarV(DeltaV.Len());
490  IAssert(DeltaV.Len() == NumComs + 1);
491  for(int iter = 0; iter < MaxIter; iter++) {
492  for (int c = 0; c < DeltaV.Len(); c++){
493  double NewVal = W[K][c] + StepSize * DeltaV[c];
494  if (NewVal < MinValW) { NewVal = MinValW; }
495  if (NewVal > MaxValW) { NewVal = MaxValW; }
496  NewVarV[c] = NewVal;
497  }
498  if (LikelihoodForWK(K, NewVarV) < InitLikelihood + Alpha * StepSize * TLinAlg::DotProduct(GradV, DeltaV)) {
499  StepSize *= Beta;
500  } else {
501  break;
502  }
503  if (iter == MaxIter - 1) {
504  StepSize = 0.0;
505  break;
506  }
507  }
508  return StepSize;
509  }
#define IAssert(Cond)
Definition: bd.h:262
TVec< TFltV > W
Definition: agmattr.h:251
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
double LikelihoodForWK(const int K, const TFltV &WK)
Definition: agmattr.h:359
TFlt MaxValW
Definition: agmattr.h:264
TFlt MinValW
Definition: agmattr.h:263
TInt NumComs
Definition: agmattr.h:257
static double DotProduct(const TFltV &x, const TFltV &y)
Definition: linalg.cpp:165
void TCesna::GetW ( TVec< TFltV > &  _W)
inline

Definition at line 346 of file agmattr.h.

346 { _W = W; }
TVec< TFltV > W
Definition: agmattr.h:251
double TCesna::GetW ( const int  CID,
const int  K 
)
inline

Definition at line 603 of file agmattr.h.

603  {
604  return W[K][CID];
605  }
TVec< TFltV > W
Definition: agmattr.h:251
double TCesna::GetWeightAttr ( )
inline

Definition at line 319 of file agmattr.h.

319 { return WeightAttr; }
TFlt WeightAttr
Definition: agmattr.h:267
void TCesna::GradientForRow ( const int  UID,
TIntFltH GradU,
const TIntSet CIDSet 
)

Definition at line 213 of file agmattr.cpp.

213  {
214  GradU.Gen(CIDSet.Len());
215  TFltV HOSumFV; //adjust for Fv of v hold out
216  if (HOVIDSV[UID].Len() > 0) {
217  HOSumFV.Gen(SumFV.Len());
218 
219  for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
220  for (int c = 0; c < SumFV.Len(); c++) {
221  HOSumFV[c] += GetCom(HOVIDSV[UID][e], c);
222  }
223  }
224  }
225  TUNGraph::TNodeI NI = G->GetNI(UID);
226  int Deg = NI.GetDeg();
227  TFltV PredV(Deg), GradV(CIDSet.Len());
228  TIntV CIDV(CIDSet.Len());
229  for (int e = 0; e < Deg; e++) {
230  if (NI.GetNbrNId(e) == UID) { continue; }
231  if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; }
232  PredV[e] = Prediction(UID, NI.GetNbrNId(e));
233  }
234 
235  for (int c = 0; c < CIDSet.Len(); c++) {
236  int CID = CIDSet.GetKey(c);
237  double Val = 0.0;
238  for (int e = 0; e < Deg; e++) {
239  int VID = NI.GetNbrNId(e);
240  if (VID == UID) { continue; }
241  if (HOVIDSV[UID].IsKey(VID)) { continue; }
242  Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID);
243  }
244  double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
245  Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID));
246  CIDV[c] = CID;
247  GradV[c] = Val;
248  }
249  //add regularization
250  if (RegCoef > 0.0) { //L1
251  for (int c = 0; c < GradV.Len(); c++) {
252  GradV[c] -= RegCoef;
253  }
254  }
255  if (RegCoef < 0.0) { //L2
256  for (int c = 0; c < GradV.Len(); c++) {
257  GradV[c] += 2 * RegCoef * GetCom(UID, CIDV[c]);
258  }
259  }
260  for (int c = 0; c < GradV.Len(); c++) {
261  GradV[c] *= (1.0 - WeightAttr);
262  }
263  //add attribute part
264  TFltV AttrPredV(Attrs);
265  for (int k = 0; k < Attrs; k++) {
266  if (HOKIDSV[UID].IsKey(k)) { continue; }
267  AttrPredV[k] = PredictAttrK(F[UID], W[k]);
268  }
269  for (int c = 0; c < GradV.Len(); c++) {
270  for (int k = 0; k < Attrs; k++) {
271  if (HOKIDSV[UID].IsKey(k)) { continue; }
272  GradV[c] += WeightAttr * (GetAttr(UID, k) - AttrPredV[k]) * GetW(CIDV[c], k);
273  }
274  }
275 
276 
277  for (int c = 0; c < GradV.Len(); c++) {
278  if (GetCom(UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; }
279  if (fabs(GradV[c]) < 0.0001) { continue; }
280  GradU.AddDat(CIDV[c], GradV[c]);
281  }
282  for (int c = 0; c < GradU.Len(); c++) {
283  if (GradU[c] >= 10) { GradU[c] = 10; }
284  if (GradU[c] <= -10) { GradU[c] = -10; }
285  IAssert(GradU[c] >= -10);
286  }
287 }
#define IAssert(Cond)
Definition: bd.h:262
PUNGraph G
Definition: agmattr.h:248
TFltV SumFV
Definition: agmattr.h:256
double GetCom(const int &NID, const int &CID)
Definition: agmattr.h:527
TFlt NegWgt
Definition: agmattr.h:265
TVec< TFltV > W
Definition: agmattr.h:251
double Prediction(const TIntFltH &FU, const TIntFltH &FV)
Definition: agmattr.h:584
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:64
double GetAttr(const int &NID, const int &K)
Definition: agmattr.h:534
void GetW(TVec< TFltV > &_W)
Definition: agmattr.h:346
double PredictAttrK(const TIntFltH &FU, const TFltV &WK)
Definition: agmattr.h:589
const TKey & GetKey(const int &KeyId) const
Definition: shash.h:1141
TVec< TIntFltH > F
Definition: agmattr.h:250
int GetDeg() const
Returns degree of the current node.
Definition: graph.h:86
TInt Attrs
Definition: agmattr.h:252
void Gen(const int &ExpectVals)
Definition: hash.h:180
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:213
TFlt RegCoef
Definition: agmattr.h:255
TVec< TIntSet > HOKIDSV
Definition: agmattr.h:259
TVec< TIntSet > HOVIDSV
Definition: agmattr.h:258
int Len() const
Definition: shash.h:1121
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
Definition: graph.h:107
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
int Len() const
Definition: hash.h:186
TDat & AddDat(const TKey &Key)
Definition: hash.h:196
TFlt WeightAttr
Definition: agmattr.h:267
void TCesna::GradientForWK ( TFltV GradV,
const int  K 
)
inline

Definition at line 421 of file agmattr.h.

421  {
422  GradV.Gen(NumComs + 1);
423  for (int u = 0; u < F.Len(); u++) {
424  if (HOKIDSV[u].IsKey(K)) { continue; }
425  double Pred = PredictAttrK(u, K);
426  for (TIntFltH::TIter CI = F[u].BegI(); CI < F[u].EndI(); CI++) {
427  GradV[CI.GetKey()] += (GetAttr(u, K) - Pred) * GetCom(u, CI.GetKey());
428  }
429  GradV[NumComs] += (GetAttr(u, K) - Pred);
430  }
431 
432  for (int c = 0; c < GradV.Len() - 1; c++) {
433  GradV[c] -= LassoCoef * TMath::Sign(GetW(c, K));
434  }
435  }
TFlt LassoCoef
Definition: agmattr.h:266
double GetCom(const int &NID, const int &CID)
Definition: agmattr.h:527
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
double GetAttr(const int &NID, const int &K)
Definition: agmattr.h:534
void GetW(TVec< TFltV > &_W)
Definition: agmattr.h:346
double PredictAttrK(const TIntFltH &FU, const TFltV &WK)
Definition: agmattr.h:589
TVec< TIntFltH > F
Definition: agmattr.h:250
TVec< TIntSet > HOKIDSV
Definition: agmattr.h:259
TInt NumComs
Definition: agmattr.h:257
static int Sign(const T &Val)
Definition: xmath.h:29
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
void TCesna::InitW ( )
inline

Definition at line 331 of file agmattr.h.

331  { // initialize W
332  W.Gen(Attrs);
333  for (int k = 0; k < Attrs; k++) {
334  W[k].Gen(NumComs + 1);
335  }
336  }
TVec< TFltV > W
Definition: agmattr.h:251
TInt Attrs
Definition: agmattr.h:252
TInt NumComs
Definition: agmattr.h:257
double TCesna::Likelihood ( const bool  DoParallel = false)

Definition at line 137 of file agmattr.cpp.

137  {
138  TExeTm ExeTm;
139  double L = 0.0;
140  if (_DoParallel) {
141  #pragma omp parallel for
142  for (int u = 0; u < F.Len(); u++) {
143  double LU = LikelihoodForRow(u);
144  #pragma omp atomic
145  L += LU;
146  }
147  }
148  else {
149  for (int u = 0; u < F.Len(); u++) {
150  double LU = LikelihoodForRow(u);
151  L += LU;
152  }
153  }
154  return L;
155 }
Definition: tm.h:355
TVec< TIntFltH > F
Definition: agmattr.h:250
double LikelihoodForRow(const int UID)
Definition: agmattr.cpp:157
double TCesna::LikelihoodAttr ( )
inline

Definition at line 371 of file agmattr.h.

371  {
372  double L = 0.0;
373  for (int k = 0; k < Attrs; k++) {
374  for (int u = 0; u < F.Len(); u++) {
375  if (HOKIDSV[u].IsKey(k)) { continue; }
376  L += LikelihoodAttrKForRow(u, k, F[u], W[k]);
377  }
378  }
379  return L;
380  }
TVec< TFltV > W
Definition: agmattr.h:251
double LikelihoodAttrKForRow(const int UID, const int K)
Definition: agmattr.h:356
TVec< TIntFltH > F
Definition: agmattr.h:250
TInt Attrs
Definition: agmattr.h:252
TVec< TIntSet > HOKIDSV
Definition: agmattr.h:259
double TCesna::LikelihoodAttrKForRow ( const int  UID,
const int  K 
)
inline

Definition at line 356 of file agmattr.h.

356 { return LikelihoodAttrKForRow(UID, K, F[UID]); }
double LikelihoodAttrKForRow(const int UID, const int K)
Definition: agmattr.h:356
TVec< TIntFltH > F
Definition: agmattr.h:250
double TCesna::LikelihoodAttrKForRow ( const int  UID,
const int  K,
const TIntFltH FU 
)
inline

Definition at line 357 of file agmattr.h.

357 { return LikelihoodAttrKForRow(UID, K, FU, W[K]); }
TVec< TFltV > W
Definition: agmattr.h:251
double LikelihoodAttrKForRow(const int UID, const int K)
Definition: agmattr.h:356
double TCesna::LikelihoodAttrKForRow ( const int  UID,
const int  K,
const TIntFltH FU,
const TFltV WK 
)

Definition at line 202 of file agmattr.cpp.

202  {
203  double Prob = PredictAttrK(FU, WK);
204  double L = 0.0;
205  if (GetAttr(UID, K)) {
206  L = Prob == 0.0? -100.0: log(Prob);
207  } else {
208  L = Prob == 1.0? -100.0: log(1.0 - Prob);
209  }
210  return L;
211 }
double GetAttr(const int &NID, const int &K)
Definition: agmattr.h:534
double PredictAttrK(const TIntFltH &FU, const TFltV &WK)
Definition: agmattr.h:589
double TCesna::LikelihoodForRow ( const int  UID)

Definition at line 157 of file agmattr.cpp.

157  {
158  return LikelihoodForRow(UID, F[UID]);
159 }
TVec< TIntFltH > F
Definition: agmattr.h:250
double LikelihoodForRow(const int UID)
Definition: agmattr.cpp:157
double TCesna::LikelihoodForRow ( const int  UID,
const TIntFltH FU 
)

Definition at line 161 of file agmattr.cpp.

161  {
162  double L = 0.0;
163  TFltV HOSumFV; //adjust for Fv of v hold out
164  if (HOVIDSV[UID].Len() > 0) {
165  HOSumFV.Gen(SumFV.Len());
166 
167  for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
168  for (int c = 0; c < SumFV.Len(); c++) {
169  HOSumFV[c] += GetCom(HOVIDSV[UID][e], c);
170  }
171  }
172  }
173 
174  TUNGraph::TNodeI NI = G->GetNI(UID);
175  for (int e = 0; e < NI.GetDeg(); e++) {
176  int v = NI.GetNbrNId(e);
177  if (v == UID) { continue; }
178  if (HOVIDSV[UID].IsKey(v)) { continue; }
179  L += log (1.0 - Prediction(FU, F[v])) + NegWgt * DotProduct(FU, F[v]);
180  }
181  for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) {
182  double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
183  L -= NegWgt * (SumFV[HI.GetKey()] - HOSum - GetCom(UID, HI.GetKey())) * HI.GetDat();
184  }
185  //add regularization
186  if (RegCoef > 0.0) { //L1
187  L -= RegCoef * Sum(FU);
188  }
189  if (RegCoef < 0.0) { //L2
190  L += RegCoef * Norm2(FU);
191  }
192  L *= (1.0 - WeightAttr);
193  // add attribute part
194  for (int k = 0; k < Attrs; k++) {
195  if (HOKIDSV[UID].IsKey(k)) { continue; }
196  L += WeightAttr * LikelihoodAttrKForRow(UID, k, FU);
197  }
198  return L;
199 }
PUNGraph G
Definition: agmattr.h:248
TFltV SumFV
Definition: agmattr.h:256
double Sum(const TIntFltH &UV)
Definition: agmattr.h:609
double GetCom(const int &NID, const int &CID)
Definition: agmattr.h:527
TFlt NegWgt
Definition: agmattr.h:265
double Prediction(const TIntFltH &FU, const TIntFltH &FV)
Definition: agmattr.h:584
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
Definition: agmattr.h:564
TIter BegI() const
Definition: hash.h:171
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:64
TIter EndI() const
Definition: hash.h:176
double LikelihoodAttrKForRow(const int UID, const int K)
Definition: agmattr.h:356
TVec< TIntFltH > F
Definition: agmattr.h:250
int GetDeg() const
Returns degree of the current node.
Definition: graph.h:86
TInt Attrs
Definition: agmattr.h:252
double Norm2(const TIntFltH &UV)
Definition: agmattr.h:616
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:213
TFlt RegCoef
Definition: agmattr.h:255
TVec< TIntSet > HOKIDSV
Definition: agmattr.h:259
TVec< TIntSet > HOVIDSV
Definition: agmattr.h:258
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
Definition: graph.h:107
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
TFlt WeightAttr
Definition: agmattr.h:267
double TCesna::LikelihoodForWK ( const int  K,
const TFltV WK 
)
inline

Definition at line 359 of file agmattr.h.

359  {
360  double L = 0.0;
361  for (int u = 0; u < F.Len(); u++) {
362  if (HOKIDSV[u].IsKey(K)) { continue; }
363  L += LikelihoodAttrKForRow(u, K, F[u], WK);
364  }
365  for (int c = 0; c < WK.Len() - 1; c++) {
366  L -= LassoCoef * fabs(WK[c]);
367  }
368  return L;
369  }
TFlt LassoCoef
Definition: agmattr.h:266
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
double LikelihoodAttrKForRow(const int UID, const int K)
Definition: agmattr.h:356
TVec< TIntFltH > F
Definition: agmattr.h:250
TVec< TIntSet > HOKIDSV
Definition: agmattr.h:259
double TCesna::LikelihoodForWK ( const int  K)
inline

Definition at line 370 of file agmattr.h.

370 { return LikelihoodForWK(K, W[K]); }
TVec< TFltV > W
Definition: agmattr.h:251
double LikelihoodForWK(const int K, const TFltV &WK)
Definition: agmattr.h:359
double TCesna::LikelihoodGraph ( )
inline

Definition at line 381 of file agmattr.h.

381  {
382  double L = Likelihood();
383  //add regularization
384  if (RegCoef > 0.0) { //L1
385  for (int u = 0; u < F.Len(); u++) {
386  L += RegCoef * Sum(F[u]);
387  }
388  }
389  if (RegCoef < 0.0) { //L2
390  for (int u = 0; u < F.Len(); u++) {
391  L -= RegCoef * Norm2(F[u]);
392  }
393  }
394 
395  return L - WeightAttr * LikelihoodAttr();
396  }
double Sum(const TIntFltH &UV)
Definition: agmattr.h:609
double LikelihoodAttr()
Definition: agmattr.h:371
double Likelihood(const bool DoParallel=false)
Definition: agmattr.cpp:137
TVec< TIntFltH > F
Definition: agmattr.h:250
double Norm2(const TIntFltH &UV)
Definition: agmattr.h:616
TFlt RegCoef
Definition: agmattr.h:255
TFlt WeightAttr
Definition: agmattr.h:267
double TCesna::LikelihoodHoldOut ( )

Definition at line 452 of file agmattr.cpp.

452  {
453  double L = 0.0;
454  for (int u = 0; u < HOVIDSV.Len(); u++) {
455  for (int e = 0; e < HOVIDSV[u].Len(); e++) {
456  int VID = HOVIDSV[u][e];
457  if (VID == u) { continue; }
458  double Pred = Prediction(u, VID);
459  if (G->IsEdge(u, VID)) {
460  L += log(1.0 - Pred);
461  }
462  else {
463  L += NegWgt * log(Pred);
464  }
465  //printf("NODE %d, %d: Dot Prod: %f, Prediction: %f L:%f\n", u, VID, DotProduct(F[u], F[VID]), Pred, L);
466  }
467  }
468  L *= (1.0 - WeightAttr);
469  //printf("likelihood for hold out network:%f\n", L);
470  for (int u = 0; u < HOKIDSV.Len(); u++) {
471  for (int e = 0; e < HOKIDSV[u].Len(); e++) {
472  IAssert(HOKIDSV[u][e] < Attrs);
473  L += WeightAttr * LikelihoodAttrKForRow(u, HOKIDSV[u][e]);
474  //printf("asbefsafd ATTRIBUTE %d, NODE %d:%f\n", u, HOKIDSV[u][e].Val, LikelihoodAttrKForRow(u, HOKIDSV[u][e]));
475  }
476  }
477  return L;
478 }
#define IAssert(Cond)
Definition: bd.h:262
PUNGraph G
Definition: agmattr.h:248
TFlt NegWgt
Definition: agmattr.h:265
double Prediction(const TIntFltH &FU, const TIntFltH &FV)
Definition: agmattr.h:584
double LikelihoodAttrKForRow(const int UID, const int K)
Definition: agmattr.h:356
TInt Attrs
Definition: agmattr.h:252
TVec< TIntSet > HOKIDSV
Definition: agmattr.h:259
TVec< TIntSet > HOVIDSV
Definition: agmattr.h:258
bool IsEdge(const int &SrcNId, const int &DstNId) const
Tests whether an edge between node IDs SrcNId and DstNId exists in the graph.
Definition: graph.cpp:137
TFlt WeightAttr
Definition: agmattr.h:267
void TCesna::Load ( TSIn SIn,
const int &  RndSeed = 0 
)
inline

Definition at line 294 of file agmattr.h.

294  {
295  G->Load(SIn);
296  X.Load(SIn);
297  F.Load(SIn);
298  W.Load(SIn);
299  Attrs.Load(SIn);
300  NIDToIdx.Load(SIn);
301  RegCoef.Load(SIn);
302  LassoCoef.Load(SIn);
303  SumFV.Load(SIn);
304  NumComs.Load(SIn);
305  HOVIDSV.Load(SIn);
306  HOKIDSV.Load(SIn);
307  MinVal.Load(SIn);
308  MaxVal.Load(SIn);
309  MinValW.Load(SIn);
310  MaxValW.Load(SIn);
311  NegWgt.Load(SIn);
312  PNoCom.Load(SIn);
313  }
PUNGraph G
Definition: agmattr.h:248
TFltV SumFV
Definition: agmattr.h:256
TFlt MaxVal
Definition: agmattr.h:262
TFlt LassoCoef
Definition: agmattr.h:266
TFlt NegWgt
Definition: agmattr.h:265
TVec< TFltV > W
Definition: agmattr.h:251
TFlt PNoCom
Definition: agmattr.h:268
TFlt MinVal
Definition: agmattr.h:261
TIntSet NIDToIdx
Definition: agmattr.h:254
void Load(TSIn &SIn)
Definition: ds.h:895
TVec< TIntFltH > F
Definition: agmattr.h:250
TFlt MaxValW
Definition: agmattr.h:264
TFlt MinValW
Definition: agmattr.h:263
TInt Attrs
Definition: agmattr.h:252
void Load(TSIn &SIn)
Definition: dt.h:1059
TVec< TIntSet > X
Definition: agmattr.h:249
TFlt RegCoef
Definition: agmattr.h:255
TVec< TIntSet > HOKIDSV
Definition: agmattr.h:259
TVec< TIntSet > HOVIDSV
Definition: agmattr.h:258
static PUNGraph Load(TSIn &SIn)
Static constructor that loads the graph from a stream SIn and returns a pointer to it...
Definition: graph.h:161
TInt NumComs
Definition: agmattr.h:257
void Load(TSIn &SIn)
Definition: dt.h:1312
void Load(TSIn &SIn)
Definition: shash.h:1078
int TCesna::MLEGradAscent ( const double &  Thres,
const int &  MaxIter,
const TStr  PlotNm,
const double  StepAlpha = 0.3,
const double  StepBeta = 0.1 
)

Definition at line 505 of file agmattr.cpp.

505  {
506  time_t InitTime = time(NULL);
507  TExeTm ExeTm, CheckTm;
508  int iter = 0, PrevIter = 0;
509  TIntFltPrV IterLV;
510  TUNGraph::TNodeI UI;
511  double PrevL = TFlt::Mn, CurL = 0.0;
512  TIntV NIdxV(F.Len(), 0);
513  for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); }
514  TIntFltH GradV;
515  //consider all C
516  TIntSet CIDSet(NumComs);
517  for (int c = 0; c < NumComs; c++) { CIDSet.AddKey(c); }
518  while(iter < MaxIter) {
519  NIdxV.Shuffle(Rnd);
520  for (int ui = 0; ui < F.Len(); ui++, iter++) {
521  int u = NIdxV[ui]; //
522  /*
523  //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
524  UI = G->GetNI(u);
525  TIntSet CIDSet(20 * UI.GetDeg());
526  for (int e = 0; e < UI.GetDeg(); e++) {
527  if (HOVIDSV[u].IsKey(UI.GetNbrNId(e))) { continue; }
528  TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)];
529  for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
530  CIDSet.AddKey(CI.GetKey());
531  }
532  }
533  for (TIntFltH::TIter CI = F[u].BegI(); CI < F[u].EndI(); CI++) { //remove the community membership which U does not share with its neighbors
534  if (! CIDSet.IsKey(CI.GetKey())) {
535  DelCom(u, CI.GetKey());
536  }
537  }
538  if (CIDSet.Empty()) { continue; }
539  */
540 
541  GradientForRow(u, GradV, CIDSet);
542  if (Norm2(GradV) < 1e-4) { continue; }
543  double LearnRate = GetStepSizeByLineSearch(u, GradV, GradV, StepAlpha, StepBeta);
544  if (LearnRate == 0.0) { continue; }
545  for (int ci = 0; ci < GradV.Len(); ci++) {
546  int CID = GradV.GetKey(ci);
547  double Change = LearnRate * GradV.GetDat(CID);
548  double NewFuc = GetCom(u, CID) + Change;
549  if (NewFuc <= 0.0) {
550  DelCom(u, CID);
551  } else {
552  AddCom(u, CID, NewFuc);
553  }
554  }
555  if (! PlotNm.Empty() && (iter + 1) % G->GetNodes() == 0) {
556  IterLV.Add(TIntFltPr(iter, Likelihood(false)));
557  }
558  }
559  // fit W (logistic regression)
560  for (int k = 0; k < Attrs; k++) {
561  TFltV GradWV(NumComs);
562  GradientForWK(GradWV, k);
563  if (TLinAlg::Norm2(GradWV) < 1e-4) { continue; }
564  double LearnRate = GetStepSizeByLineSearchForWK(k, GradWV, GradWV, StepAlpha, StepBeta);
565  if (LearnRate == 0.0) { continue; }
566  for (int c = 0; c < GradWV.Len(); c++){
567  W[k][c] += LearnRate * GradWV[c];
568  if (W[k][c] < MinValW) { W[k][c] = MinValW; }
569  if (W[k][c] > MaxValW) { W[k][c] = MaxValW; }
570  }
571  }
572  printf("\r%d iterations (%f) [%lu sec]", iter, CurL, time(NULL) - InitTime);
573  fflush(stdout);
574  if (iter - PrevIter >= 2 * G->GetNodes() && iter > 10000) {
575  PrevIter = iter;
576  CurL = Likelihood();
577  if (PrevL > TFlt::Mn && ! PlotNm.Empty()) {
578  printf("\r%d iterations, Likelihood: %f, Diff: %f", iter, CurL, CurL - PrevL);
579  }
580  fflush(stdout);
581  if (CurL - PrevL <= Thres * fabs(PrevL)) { break; }
582  else { PrevL = CurL; }
583  }
584 
585  }
586  printf("\n");
587  printf("MLE for Lambda completed with %d iterations(%s)\n", iter, ExeTm.GetTmStr());
588  if (! PlotNm.Empty()) {
589  TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
590  }
591  return iter;
592 }
PUNGraph G
Definition: agmattr.h:248
Definition: tm.h:355
double GetCom(const int &NID, const int &CID)
Definition: agmattr.h:527
TVec< TFltV > W
Definition: agmattr.h:251
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:64
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:220
double GetStepSizeByLineSearch(const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
Definition: agmattr.cpp:480
double Likelihood(const bool DoParallel=false)
Definition: agmattr.cpp:137
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:168
TVec< TIntFltH > F
Definition: agmattr.h:250
TFlt MaxValW
Definition: agmattr.h:264
TPair< TInt, TFlt > TIntFltPr
Definition: ds.h:87
TFlt MinValW
Definition: agmattr.h:263
const char * GetTmStr() const
Definition: tm.h:370
TInt Attrs
Definition: agmattr.h:252
static double Norm2(const TFltV &x)
Definition: linalg.cpp:320
double Norm2(const TIntFltH &UV)
Definition: agmattr.h:616
void AddCom(const int &NID, const int &CID, const double &Val)
Definition: agmattr.h:541
TRnd Rnd
Definition: agmattr.h:253
double GetStepSizeByLineSearchForWK(const int K, const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
Definition: agmattr.h:486
bool Empty() const
Definition: dt.h:488
TInt NumComs
Definition: agmattr.h:257
void GradientForRow(const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
Definition: agmattr.cpp:213
void GradientForWK(TFltV &GradV, const int K)
Definition: agmattr.h:421
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
int Len() const
Definition: hash.h:186
void DelCom(const int &NID, const int &CID)
Definition: agmattr.h:549
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:210
static const double Mn
Definition: dt.h:1297
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429
int TCesna::MLEGradAscentParallel ( const double &  Thres,
const int &  MaxIter,
const int  ChunkNum,
const int  ChunkSize,
const TStr  PlotNm,
const double  StepAlpha = 0.3,
const double  StepBeta = 0.1 
)

Definition at line 594 of file agmattr.cpp.

594  {
595  //parallel
596  time_t InitTime = time(NULL);
597  uint64 StartTm = TSecTm::GetCurTm().GetAbsSecs();
598  TExeTm ExeTm, CheckTm;
599  double PrevL = Likelihood(true);
600  TIntFltPrV IterLV;
601  int PrevIter = 0;
602  int iter = 0;
603  TIntV NIdxV(F.Len(), 0);
604  for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); }
605  TIntV NIDOPTV(F.Len()); //check if a node needs optimization or not 1: does not require optimization
606  NIDOPTV.PutAll(0);
607  TVec<TIntFltH> NewF(ChunkNum * ChunkSize);
608  TIntV NewNIDV(ChunkNum * ChunkSize);
609  for (iter = 0; iter < MaxIter; iter++) {
610  NIdxV.Clr(false);
611  for (int i = 0; i < F.Len(); i++) {
612  if (NIDOPTV[i] == 0) { NIdxV.Add(i); }
613  }
614  IAssert (NIdxV.Len() <= F.Len());
615  NIdxV.Shuffle(Rnd);
616  // compute gradient for chunk of nodes
617 #pragma omp parallel for schedule(static, 1)
618  for (int TIdx = 0; TIdx < ChunkNum; TIdx++) {
619  TIntFltH GradV;
620  for (int ui = TIdx * ChunkSize; ui < (TIdx + 1) * ChunkSize; ui++) {
621  NewNIDV[ui] = -1;
622  if (ui >= NIdxV.Len()) { continue; }
623  int u = NIdxV[ui]; //
624  //find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
625  TUNGraph::TNodeI UI = G->GetNI(u);
626  TIntSet CIDSet(5 * UI.GetDeg());
627  TIntFltH CurFU = F[u];
628  for (int e = 0; e < UI.GetDeg(); e++) {
629  if (HOVIDSV[u].IsKey(UI.GetNbrNId(e))) { continue; }
630  TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)];
631  for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
632  CIDSet.AddKey(CI.GetKey());
633  }
634  }
635  if (CIDSet.Empty()) {
636  CurFU.Clr();
637  }
638  else {
639  for (TIntFltH::TIter CI = CurFU.BegI(); CI < CurFU.EndI(); CI++) { //remove the community membership which U does not share with its neighbors
640  if (! CIDSet.IsKey(CI.GetKey())) {
641  CurFU.DelIfKey(CI.GetKey());
642  }
643  }
644  GradientForRow(u, GradV, CIDSet);
645  if (Norm2(GradV) < 1e-4) { NIDOPTV[u] = 1; continue; }
646  double LearnRate = GetStepSizeByLineSearch(u, GradV, GradV, StepAlpha, StepBeta);
647  if (LearnRate == 0.0) { NewNIDV[ui] = -2; continue; }
648  for (int ci = 0; ci < GradV.Len(); ci++) {
649  int CID = GradV.GetKey(ci);
650  double Change = LearnRate * GradV.GetDat(CID);
651  double NewFuc = CurFU.IsKey(CID)? CurFU.GetDat(CID) + Change : Change;
652  if (NewFuc <= 0.0) {
653  CurFU.DelIfKey(CID);
654  } else {
655  CurFU.AddDat(CID) = NewFuc;
656  }
657  }
658  CurFU.Defrag();
659  }
660  //store changes
661  NewF[ui] = CurFU;
662  NewNIDV[ui] = u;
663  }
664  }
665  int NumNoChangeGrad = 0;
666  int NumNoChangeStepSize = 0;
667  for (int ui = 0; ui < NewNIDV.Len(); ui++) {
668  int NewNID = NewNIDV[ui];
669  if (NewNID == -1) { NumNoChangeGrad++; continue; }
670  if (NewNID == -2) { NumNoChangeStepSize++; continue; }
671  for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
672  SumFV[CI.GetKey()] -= CI.GetDat();
673  }
674  }
675 #pragma omp parallel for
676  for (int ui = 0; ui < NewNIDV.Len(); ui++) {
677  int NewNID = NewNIDV[ui];
678  if (NewNID < 0) { continue; }
679  F[NewNID] = NewF[ui];
680  }
681  for (int ui = 0; ui < NewNIDV.Len(); ui++) {
682  int NewNID = NewNIDV[ui];
683  if (NewNID < 0) { continue; }
684  for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
685  SumFV[CI.GetKey()] += CI.GetDat();
686  }
687  }
688  // update the nodes who are optimal
689  for (int ui = 0; ui < NewNIDV.Len(); ui++) {
690  int NewNID = NewNIDV[ui];
691  if (NewNID < 0) { continue; }
692  TUNGraph::TNodeI UI = G->GetNI(NewNID);
693  NIDOPTV[NewNID] = 0;
694  for (int e = 0; e < UI.GetDeg(); e++) {
695  NIDOPTV[UI.GetNbrNId(e)] = 0;
696  }
697  }
698  int OPTCnt = 0;
699  for (int i = 0; i < NIDOPTV.Len(); i++) { if (NIDOPTV[i] == 1) { OPTCnt++; } }
700  // fit W (logistic regression)
701  if (! PlotNm.Empty()) {
702  printf("\r%d iterations [%s] %s secs", iter * ChunkSize * ChunkNum, ExeTm.GetTmStr(), TUInt64::GetStr(TSecTm::GetCurTm().GetAbsSecs()-StartTm).CStr());
703  if (PrevL > TFlt::Mn) { printf(" (%f) %d g %d s %d OPT", PrevL, NumNoChangeGrad, NumNoChangeStepSize, OPTCnt); }
704  fflush(stdout);
705  }
706  if (iter == 0 || (iter - PrevIter) * ChunkSize * ChunkNum >= G->GetNodes()) {
707  #pragma omp parallel for
708  for (int k = 0; k < Attrs; k++) {
709  TFltV GradWV(NumComs);
710  GradientForWK(GradWV, k);
711  if (TLinAlg::Norm2(GradWV) < 1e-4) { continue; }
712  double LearnRate = GetStepSizeByLineSearchForWK(k, GradWV, GradWV, StepAlpha, StepBeta);
713  if (LearnRate == 0.0) { continue; }
714  for (int c = 0; c < GradWV.Len(); c++){
715  W[k][c] += LearnRate * GradWV[c];
716  if (W[k][c] < MinValW) { W[k][c] = MinValW; }
717  if (W[k][c] > MaxValW) { W[k][c] = MaxValW; }
718  }
719  }
720  PrevIter = iter;
721  double CurL = Likelihood(true);
722  IterLV.Add(TIntFltPr(iter * ChunkSize * ChunkNum, CurL));
723  printf("\r%d iterations, Likelihood: %f, Diff: %f [%lu secs]", iter, CurL, CurL - PrevL, time(NULL) - InitTime);
724  fflush(stdout);
725  if (CurL - PrevL <= Thres * fabs(PrevL)) {
726  break;
727  }
728  else {
729  PrevL = CurL;
730  }
731  }
732  }
733  if (! PlotNm.Empty()) {
734  printf("\nMLE completed with %d iterations(%s secs)\n", iter, TUInt64::GetStr(TSecTm::GetCurTm().GetAbsSecs()-StartTm).CStr());
735  TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q");
736  } else {
737  printf("\rMLE completed with %d iterations(%lu secs)", iter, time(NULL) - InitTime);
738  fflush(stdout);
739  }
740  return iter;
741 }
#define IAssert(Cond)
Definition: bd.h:262
PUNGraph G
Definition: agmattr.h:248
TFltV SumFV
Definition: agmattr.h:256
Definition: tm.h:355
TVec< TFltV > W
Definition: agmattr.h:251
TIter BegI() const
Definition: hash.h:171
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:64
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:220
TIter EndI() const
Definition: hash.h:176
double GetStepSizeByLineSearch(const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
Definition: agmattr.cpp:480
double Likelihood(const bool DoParallel=false)
Definition: agmattr.cpp:137
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:168
TVec< TIntFltH > F
Definition: agmattr.h:250
TFlt MaxValW
Definition: agmattr.h:264
TPair< TInt, TFlt > TIntFltPr
Definition: ds.h:87
int GetDeg() const
Returns degree of the current node.
Definition: graph.h:86
TFlt MinValW
Definition: agmattr.h:263
const char * GetTmStr() const
Definition: tm.h:370
TInt Attrs
Definition: agmattr.h:252
static double Norm2(const TFltV &x)
Definition: linalg.cpp:320
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
Definition: ds.h:1166
unsigned long long uint64
Definition: bd.h:38
const TVal & GetDat(const TVal &Val) const
Returns reference to the first occurrence of element Val.
Definition: ds.h:807
double Norm2(const TIntFltH &UV)
Definition: agmattr.h:616
uint GetAbsSecs() const
Definition: tm.h:150
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:213
TVec< TIntSet > HOVIDSV
Definition: agmattr.h:258
TRnd Rnd
Definition: agmattr.h:253
TStr GetStr() const
Definition: dt.h:1270
double GetStepSizeByLineSearchForWK(const int K, const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
Definition: agmattr.h:486
bool Empty() const
Definition: dt.h:488
TInt NumComs
Definition: agmattr.h:257
void GradientForRow(const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
Definition: agmattr.cpp:213
static TSecTm GetCurTm()
Definition: tm.cpp:697
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
Definition: graph.h:107
void GradientForWK(TFltV &GradV, const int K)
Definition: agmattr.h:421
char * CStr()
Definition: dt.h:476
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
int Len() const
Definition: hash.h:186
THKeyDat * EndI
Definition: hash.h:45
static void PlotValV(const TVec< TVal1 > &ValV, const TStr &OutFNmPref, const TStr &Desc="", const TStr &XLabel="", const TStr &YLabel="", const TGpScaleTy &ScaleTy=gpsAuto, const bool &PowerFit=false, const TGpSeriesTy &SeriesTy=gpwLinesPoints)
Definition: gnuplot.h:398
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:210
static const double Mn
Definition: dt.h:1297
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429
int TCesna::MLEGradAscentParallel ( const double &  Thres,
const int &  MaxIter,
const int  ChunkNum,
const TStr  PlotNm = TStr(),
const double  StepAlpha = 0.3,
const double  StepBeta = 0.1 
)
inline

Definition at line 521 of file agmattr.h.

521  {
522  int ChunkSize = G->GetNodes() / 10 / ChunkNum;
523  if (ChunkSize == 0) { ChunkSize = 1; }
524  return MLEGradAscentParallel(Thres, MaxIter, ChunkNum, ChunkSize, PlotNm, StepAlpha, StepBeta);
525  }
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
Definition: agmattr.cpp:594
PUNGraph G
Definition: agmattr.h:248
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:168
void TCesna::NeighborComInit ( const int  InitComs)

Definition at line 32 of file agmattr.cpp.

32  {
33  //initialize with best neighborhood communities (Gleich et.al. KDD'12)
34  TFltIntPrV NIdPhiV(F.Len(), 0);
35  TCesnaUtil::GetNIdPhiV<PUNGraph>(G, NIdPhiV);
36  NeighborComInit(NIdPhiV, InitComs);
37 }
PUNGraph G
Definition: agmattr.h:248
TVec< TIntFltH > F
Definition: agmattr.h:250
void NeighborComInit(const int InitComs)
Definition: agmattr.cpp:32
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429
void TCesna::NeighborComInit ( TFltIntPrV NIdPhiV,
const int  InitComs 
)

Definition at line 39 of file agmattr.cpp.

39  {
40  //initialize with best neighborhood communities (Gleich et.al. KDD'12)
41  NIdPhiV.Sort(true);
42  F.Gen(G->GetNodes());
43  SumFV.Gen(InitComs);
44  NumComs = InitComs;
45  TIntSet InvalidNIDS(F.Len());
46  TIntV ChosenNIDV(InitComs, 0); //FOR DEBUG
47  //choose nodes with local minimum in conductance
48  int CurCID = 0;
49  for (int ui = 0; ui < NIdPhiV.Len(); ui++) {
50  int UID = NIdPhiV[ui].Val2;
51  fflush(stdout);
52  if (InvalidNIDS.IsKey(UID)) { continue; }
53  ChosenNIDV.Add(UID); //FOR DEBUG
54  //add the node and its neighbors to the current community
55  AddCom(UID, CurCID, 1.0);
56  TUNGraph::TNodeI NI = G->GetNI(UID);
57  fflush(stdout);
58  for (int e = 0; e < NI.GetDeg(); e++) {
59  AddCom(NI.GetNbrNId(e), CurCID, 1.0);
60  }
61  //exclude its neighbors from the next considerations
62  for (int e = 0; e < NI.GetDeg(); e++) {
63  InvalidNIDS.AddKey(NI.GetNbrNId(e));
64  }
65  CurCID++;
66  fflush(stdout);
67  if (CurCID >= NumComs) { break; }
68  }
69  if (NumComs > CurCID) {
70  printf("%d communities needed to fill randomly\n", NumComs - CurCID);
71  }
72  //assign a member to zero-member community (if any)
73  for (int c = 0; c < SumFV.Len(); c++) {
74  if (SumFV[c] == 0.0) {
75  int ComSz = 10;
76  for (int u = 0; u < ComSz; u++) {
77  int UID = Rnd.GetUniDevInt(G->GetNodes());
78  AddCom(UID, c, Rnd.GetUniDev());
79  }
80  }
81  }
82  InitW();
83 }
PUNGraph G
Definition: agmattr.h:248
TFltV SumFV
Definition: agmattr.h:256
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
Node iterator. Only forward iteration (operator++) is supported.
Definition: graph.h:64
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:168
TVec< TIntFltH > F
Definition: agmattr.h:250
int GetDeg() const
Returns degree of the current node.
Definition: graph.h:86
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
Definition: ds.h:1254
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:213
void AddCom(const int &NID, const int &CID, const double &Val)
Definition: agmattr.h:541
void InitW()
Definition: agmattr.h:331
TRnd Rnd
Definition: agmattr.h:253
TInt NumComs
Definition: agmattr.h:257
double GetUniDev()
Definition: dt.h:30
int GetNbrNId(const int &NodeN) const
Returns ID of NodeN-th neighboring node.
Definition: graph.h:107
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
double TCesna::Norm2 ( const TIntFltH UV)
inline

Definition at line 616 of file agmattr.h.

616  {
617  double N = 0.0;
618  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
619  N += HI.GetDat() * HI.GetDat();
620  }
621  return N;
622  }
TIter BegI() const
Definition: hash.h:171
TIter EndI() const
Definition: hash.h:176
double TCesna::PredictAttrK ( const TIntFltH FU,
const TFltV WK 
)
inline

Definition at line 589 of file agmattr.h.

589  {
590  double DP = 0.0;
591  for (TIntFltH::TIter FI = FU.BegI(); FI < FU.EndI(); FI++) {
592  DP += FI.GetDat() * WK[FI.GetKey()];
593  }
594  DP += WK.Last();
595  return Sigmoid(DP);
596  }
double Sigmoid(const double X)
Definition: agmattr.h:632
TIter BegI() const
Definition: hash.h:171
TIter EndI() const
Definition: hash.h:176
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:551
double TCesna::PredictAttrK ( const TIntFltH FU,
const int  K 
)
inline

Definition at line 597 of file agmattr.h.

597  {
598  return PredictAttrK(FU, W[K]);
599  }
TVec< TFltV > W
Definition: agmattr.h:251
double PredictAttrK(const TIntFltH &FU, const TFltV &WK)
Definition: agmattr.h:589
double TCesna::PredictAttrK ( const int  UID,
const int  K 
)
inline

Definition at line 600 of file agmattr.h.

600  {
601  return PredictAttrK(F[UID], W[K]);
602  }
TVec< TFltV > W
Definition: agmattr.h:251
double PredictAttrK(const TIntFltH &FU, const TFltV &WK)
Definition: agmattr.h:589
TVec< TIntFltH > F
Definition: agmattr.h:250
double TCesna::Prediction ( const TIntFltH FU,
const TIntFltH FV 
)
inline

Definition at line 584 of file agmattr.h.

584  {
585  double DP = log (1.0 / (1.0 - PNoCom)) + DotProduct(FU, FV);
586  IAssertR(DP > 0.0, TStr::Fmt("DP: %f", DP));
587  return exp(- DP);
588  }
#define IAssertR(Cond, Reason)
Definition: bd.h:265
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
Definition: agmattr.h:564
TFlt PNoCom
Definition: agmattr.h:268
static TStr Fmt(const char *FmtStr,...)
Definition: dt.cpp:1599
double TCesna::Prediction ( const int &  UID,
const int &  VID 
)
inline

Definition at line 606 of file agmattr.h.

606  {
607  return Prediction(F[UID], F[VID]);
608  }
double Prediction(const TIntFltH &FU, const TIntFltH &FV)
Definition: agmattr.h:584
TVec< TIntFltH > F
Definition: agmattr.h:250
void TCesna::RandomInit ( const int  InitComs)

Definition at line 9 of file agmattr.cpp.

9  {
10  F.Gen(G->GetNodes());
11  SumFV.Gen(InitComs);
12  NumComs = InitComs;
13  for (int u = 0; u < F.Len(); u++) {
14  //assign to just one community
15  int Mem = G->GetNI(u).GetDeg();
16  if (Mem > 10) { Mem = 10; }
17  for (int c = 0; c < Mem; c++) {
18  int CID = Rnd.GetUniDevInt(InitComs);
19  AddCom(u, CID, Rnd.GetUniDev());
20  }
21  }
22  //assign a member to zero-member community (if any)
23  for (int c = 0; c < SumFV.Len(); c++) {
24  if (SumFV[c] == 0.0) {
25  int UID = Rnd.GetUniDevInt(G->GetNodes());
26  AddCom(UID, c, Rnd.GetUniDev());
27  }
28  }
29  InitW();
30 }
PUNGraph G
Definition: agmattr.h:248
TFltV SumFV
Definition: agmattr.h:256
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:168
TVec< TIntFltH > F
Definition: agmattr.h:250
int GetDeg() const
Returns degree of the current node.
Definition: graph.h:86
TNodeI GetNI(const int &NId) const
Returns an iterator referring to the node of ID NId in the graph.
Definition: graph.h:213
void AddCom(const int &NID, const int &CID, const double &Val)
Definition: agmattr.h:541
void InitW()
Definition: agmattr.h:331
TRnd Rnd
Definition: agmattr.h:253
TInt NumComs
Definition: agmattr.h:257
double GetUniDev()
Definition: dt.h:30
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
int GetUniDevInt(const int &Range=0)
Definition: dt.cpp:39
void TCesna::Save ( TSOut SOut)
inline

Definition at line 274 of file agmattr.h.

274  {
275  G->Save(SOut);
276  X.Save(SOut);
277  F.Save(SOut);
278  W.Save(SOut);
279  Attrs.Save(SOut);
280  NIDToIdx.Save(SOut);
281  RegCoef.Save(SOut);
282  LassoCoef.Save(SOut);
283  SumFV.Save(SOut);
284  NumComs.Save(SOut);
285  HOVIDSV.Save(SOut);
286  HOKIDSV.Save(SOut);
287  MinVal.Save(SOut);
288  MaxVal.Save(SOut);
289  MinValW.Save(SOut);
290  MaxValW.Save(SOut);
291  NegWgt.Save(SOut);
292  PNoCom.Save(SOut);
293  }
PUNGraph G
Definition: agmattr.h:248
TFltV SumFV
Definition: agmattr.h:256
TFlt MaxVal
Definition: agmattr.h:262
TFlt LassoCoef
Definition: agmattr.h:266
TFlt NegWgt
Definition: agmattr.h:265
TVec< TFltV > W
Definition: agmattr.h:251
void Save(TSOut &SOut) const
Definition: dt.h:1060
TFlt PNoCom
Definition: agmattr.h:268
TFlt MinVal
Definition: agmattr.h:261
TIntSet NIDToIdx
Definition: agmattr.h:254
void Save(TSOut &SOut) const
Saves the graph to a (binary) stream SOut.
Definition: graph.h:153
TVec< TIntFltH > F
Definition: agmattr.h:250
void Save(TSOut &SOut) const
Definition: ds.h:903
TFlt MaxValW
Definition: agmattr.h:264
TFlt MinValW
Definition: agmattr.h:263
TInt Attrs
Definition: agmattr.h:252
TVec< TIntSet > X
Definition: agmattr.h:249
TFlt RegCoef
Definition: agmattr.h:255
TVec< TIntSet > HOKIDSV
Definition: agmattr.h:259
TVec< TIntSet > HOVIDSV
Definition: agmattr.h:258
TInt NumComs
Definition: agmattr.h:257
void Save(TSOut &SOut) const
Definition: shash.h:1082
void Save(TSOut &SOut) const
Definition: dt.h:1309
void TCesna::SetAttrHoldOut ( const int  NID,
const int  KID 
)
inline

Definition at line 337 of file agmattr.h.

337  {
338  int NIdx = NIDToIdx.GetKeyId(NID);
339  HOKIDSV[NIdx].AddKey(KID);
340  }
int GetKeyId(const TKey &Key) const
Definition: shash.h:1328
TIntSet NIDToIdx
Definition: agmattr.h:254
TVec< TIntSet > HOKIDSV
Definition: agmattr.h:259
void TCesna::SetAttrHoldOutForOneNode ( const int  NID)
inline

Definition at line 341 of file agmattr.h.

341  {
342  for (int k = 0; k < Attrs; k++) {
343  SetAttrHoldOut(NID, k);
344  }
345  }
TInt Attrs
Definition: agmattr.h:252
void SetAttrHoldOut(const int NID, const int KID)
Definition: agmattr.h:337
void TCesna::SetCmtyVV ( const TVec< TIntV > &  CmtyVV)

Definition at line 85 of file agmattr.cpp.

85  {
86  F.Gen(G->GetNodes());
87  SumFV.Gen(CmtyVV.Len());
88  NumComs = CmtyVV.Len();
89  InitW();
90  for (int c = 0; c < CmtyVV.Len(); c++) {
91  for (int u = 0; u < CmtyVV[c].Len(); u++) {
92  int UID = CmtyVV[c][u];
93  if (! NIDToIdx.IsKey(UID)) { continue; }
94  AddCom(NIDToIdx.GetKeyId(UID), c, 1.0);
95  }
96  }
97 }
PUNGraph G
Definition: agmattr.h:248
TFltV SumFV
Definition: agmattr.h:256
int GetKeyId(const TKey &Key) const
Definition: shash.h:1328
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
TIntSet NIDToIdx
Definition: agmattr.h:254
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:168
TVec< TIntFltH > F
Definition: agmattr.h:250
void AddCom(const int &NID, const int &CID, const double &Val)
Definition: agmattr.h:541
void InitW()
Definition: agmattr.h:331
TInt NumComs
Definition: agmattr.h:257
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
Definition: ds.h:495
void TCesna::SetGraph ( const PUNGraph GraphPt,
const THash< TInt, TIntV > &  NIDAttrH 
)

Definition at line 99 of file agmattr.cpp.

99  {
100  HOVIDSV.Gen(GraphPt->GetNodes());
101  HOKIDSV.Gen(GraphPt->GetNodes());
102  X.Gen(GraphPt->GetNodes());
103  TIntV NIDV;
104  GraphPt->GetNIdV(NIDV);
105  NIDToIdx.Gen(NIDV.Len());
106  NIDToIdx.AddKeyV(NIDV);
107  // check that nodes IDs are {0,1,..,Nodes-1}
108  printf("rearrage nodes\n");
109  G = TSnap::GetSubGraph(GraphPt, NIDV, true);
110  for (int nid = 0; nid < G->GetNodes(); nid++) {
111  IAssert(G->IsNode(nid));
112  }
114 
115  PNoCom = 1.0 / (double) G->GetNodes();
116  DoParallel = false;
117  if (1.0 / PNoCom > sqrt(TFlt::Mx)) { PNoCom = 0.99 / sqrt(TFlt::Mx); } // to prevent overflow
118  NegWgt = 1.0;
119 
120  // add node attributes, find number of attributes
121  int NumAttr = 0;
122  for (int u = 0; u < NIDAttrH.Len(); u++) {
123  int UID = NIDAttrH.GetKey(u);
124  if (! NIDToIdx.IsKey(UID)) { continue; }
125  X[NIDToIdx.GetKeyId(UID)].Gen(NIDAttrH[u].Len());
126  for (int k = 0; k < NIDAttrH[u].Len(); k++) {
127  int KID = NIDAttrH[u][k];
128  IAssert (KID >= 0);
129  X[NIDToIdx.GetKeyId(UID)].AddKey(KID);
130  if (NumAttr < KID + 1) { NumAttr = KID + 1; }
131  }
132  }
133  Attrs = NumAttr;
134  InitW();
135 }
#define IAssert(Cond)
Definition: bd.h:262
PUNGraph G
Definition: agmattr.h:248
TBool DoParallel
Definition: agmattr.h:269
void AddKeyV(const TVec< TKey > &KeyV)
Definition: shash.h:1284
TFlt NegWgt
Definition: agmattr.h:265
int GetKeyId(const TKey &Key) const
Definition: shash.h:1328
void GetNIdV(TIntV &NIdV) const
Gets a vector IDs of all nodes in the graph.
Definition: graph.cpp:153
TFlt PNoCom
Definition: agmattr.h:268
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
void Gen(const int &ExpectVals)
Definition: shash.h:1115
TIntSet NIDToIdx
Definition: agmattr.h:254
static const double Mx
Definition: dt.h:1298
bool IsKey(const TKey &Key) const
Definition: shash.h:1148
int GetNodes() const
Returns the number of nodes in the graph.
Definition: graph.h:168
TInt Attrs
Definition: agmattr.h:252
PUNGraph GetSubGraph(const PUNGraph &Graph, const TIntV &NIdV, const bool &RenumberNodes)
Returns an induced subgraph of an undirected graph Graph with NIdV nodes with an optional node renumb...
Definition: subgraph.cpp:7
TVec< TIntSet > X
Definition: agmattr.h:249
TVec< TIntSet > HOKIDSV
Definition: agmattr.h:259
TVec< TIntSet > HOVIDSV
Definition: agmattr.h:258
void InitW()
Definition: agmattr.h:331
bool IsNode(const int &NId) const
Tests whether ID NId is a node.
Definition: graph.h:207
void DelSelfEdges(const PGraph &Graph)
Removes all the self-edges from the graph.
Definition: alg.h:419
int Len() const
Definition: hash.h:186
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:210
void TCesna::SetHoldOut ( const double  HOFrac)
inline

Definition at line 414 of file agmattr.h.

414  {
415  TVec<TIntSet> HoldOut;
416  TCesnaUtil::GenHoldOutPairs(G, HoldOut, HOFrac, Rnd);
417  GenHoldOutAttr(HOFrac, HOKIDSV);
418  HOVIDSV = HoldOut;
419  }
void GenHoldOutAttr(const double HOFrac, TVec< TIntSet > &HOSetV)
Definition: agmattr.h:397
PUNGraph G
Definition: agmattr.h:248
TVec< TIntSet > HOKIDSV
Definition: agmattr.h:259
TVec< TIntSet > HOVIDSV
Definition: agmattr.h:258
static void GenHoldOutPairs(const PGraph &G, TVec< TIntSet > &HoldOutSet, double HOFrac, TRnd &Rnd)
Definition: agmattr.h:38
TRnd Rnd
Definition: agmattr.h:253
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429
void TCesna::SetLassoCoef ( const double  _LassoCoef)
inline

Definition at line 320 of file agmattr.h.

320 { LassoCoef = _LassoCoef; }
TFlt LassoCoef
Definition: agmattr.h:266
void TCesna::SetRegCoef ( const double  _RegCoef)
inline

Definition at line 316 of file agmattr.h.

316 { RegCoef = _RegCoef; }
TFlt RegCoef
Definition: agmattr.h:255
void TCesna::SetW ( TVec< TFltV > &  _W)
inline

Definition at line 347 of file agmattr.h.

347 { W = _W; }
TVec< TFltV > W
Definition: agmattr.h:251
void TCesna::SetWeightAttr ( const double  _WeightAttr)
inline

Definition at line 318 of file agmattr.h.

318 { IAssert (_WeightAttr <= 1.0 && _WeightAttr >= 0.0); WeightAttr = _WeightAttr; }
#define IAssert(Cond)
Definition: bd.h:262
TFlt WeightAttr
Definition: agmattr.h:267
double TCesna::Sigmoid ( const double  X)
inline

Definition at line 632 of file agmattr.h.

632  {
633  return 1.0 / ( 1.0 + exp(-X));
634  }
TVec< TIntSet > X
Definition: agmattr.h:249
double TCesna::Sum ( const TIntFltH UV)
inline

Definition at line 609 of file agmattr.h.

609  {
610  double N = 0.0;
611  for (TIntFltH::TIter HI = UV.BegI(); HI < UV.EndI(); HI++) {
612  N += HI.GetDat();
613  }
614  return N;
615  }
TIter BegI() const
Definition: hash.h:171
TIter EndI() const
Definition: hash.h:176

Member Data Documentation

TInt TCesna::Attrs
private

Definition at line 252 of file agmattr.h.

TBool TCesna::DoParallel

Definition at line 269 of file agmattr.h.

TVec<TIntFltH> TCesna::F
private

Definition at line 250 of file agmattr.h.

PUNGraph TCesna::G
private

Definition at line 248 of file agmattr.h.

TVec<TIntSet> TCesna::HOKIDSV
private

Definition at line 259 of file agmattr.h.

TVec<TIntSet> TCesna::HOVIDSV
private

Definition at line 258 of file agmattr.h.

TFlt TCesna::LassoCoef

Definition at line 266 of file agmattr.h.

TFlt TCesna::MaxVal

Definition at line 262 of file agmattr.h.

TFlt TCesna::MaxValW

Definition at line 264 of file agmattr.h.

TFlt TCesna::MinVal

Definition at line 261 of file agmattr.h.

TFlt TCesna::MinValW

Definition at line 263 of file agmattr.h.

TFlt TCesna::NegWgt

Definition at line 265 of file agmattr.h.

TIntSet TCesna::NIDToIdx
private

Definition at line 254 of file agmattr.h.

TInt TCesna::NumComs
private

Definition at line 257 of file agmattr.h.

TFlt TCesna::PNoCom

Definition at line 268 of file agmattr.h.

TFlt TCesna::RegCoef
private

Definition at line 255 of file agmattr.h.

TRnd TCesna::Rnd
private

Definition at line 253 of file agmattr.h.

TFltV TCesna::SumFV
private

Definition at line 256 of file agmattr.h.

TVec<TFltV> TCesna::W
private

Definition at line 251 of file agmattr.h.

TFlt TCesna::WeightAttr

Definition at line 267 of file agmattr.h.

TVec<TIntSet> TCesna::X
private

Definition at line 249 of file agmattr.h.


The documentation for this class was generated from the following files: