SNAP Library 2.2, Developer Reference
2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 #ifndef snap_agm_h 00002 #define snap_agm_h 00003 #include "Snap.h" 00004 00007 class TAGM { 00008 public: 00009 static void RndConnectInsideCommunity(PUNGraph& Graph, const TIntV& CmtyV, const double& Prob, TRnd& Rnd); 00010 static PUNGraph GenAGM(const TIntV& NIdV , THash<TInt,TIntV >& CmtyVH, const TStr& AGMNm, const double& PiCoef, const double& ProbBase, TRnd& Rnd=TInt::Rnd); 00011 static PUNGraph GenAGM(TVec<TIntV >& CmtyVV, const double& DensityCoef, const double& ScaleCoef, TRnd& Rnd=TInt::Rnd); 00012 static PUNGraph GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const int TargetEdges, TRnd& Rnd); 00013 static PUNGraph GenAGM(TVec<TIntV>& CmtyVV, const TFltV& CProbV, TRnd& Rnd, const double PNoCom = -1.0); 00014 }; 00015 00018 class TAGMUtil { 00019 public: 00020 static void GenPLSeq(TIntV& SzSeq,const int& SeqLen, const double& Alpha, TRnd& Rnd, const int& Min, const int& Max); 00021 static void ConnectCmtyVV(TVec<TIntV>& CmtyVV, const TIntPrV& CIDSzPrV, const TIntPrV& NIDMemPrV, TRnd& Rnd); 00022 static void GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const PUNGraph& Graph, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd); 00023 static void GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const TIntV& NIDV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd); 00024 static void GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd); 00025 static void RndConnectInsideCommunity(PUNGraph& Graph, const TIntV& CmtyV, const double& Prob, TRnd& Rnd); 00026 static PUNGraph GenAGM(TVec<TIntV >& CmtyVV, const double& DensityCoef, const double& ScaleCoef, TRnd& Rnd=TInt::Rnd); 00027 static PUNGraph GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const int TargetEdges, TRnd& Rnd); 00028 static PUNGraph GenAGM(TVec<TIntV>& CmtyVV, const TFltV& CProbV, TRnd& Rnd, const double PNoCom = -1.0); 00029 static void GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV); 00030 static void GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV, const TIntV& NIDV); 00031 static void GetNodeMembership(TIntH& NIDComVH, const THash<TInt,TIntV >& CmtyVH); 00032 static void GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntSet>& CmtyVV); 00033 static void GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const THash<TInt,TIntV >& CmtyVH); 00034 static void GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const THash<TInt,TIntV >& CmtyVH); 00035 static void GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const TVec<TIntV >& CmtyVV); 00036 static void GetNodeMembership(TIntH& NIDComVH, const TVec<TIntV >& CmtyVV); 00037 static void LoadCmtyVV(const TStr& InFNm, TVec<TIntV>& CmtyVV); 00038 static void LoadCmtyVV(const TStr& InFNm, TVec<TIntV>& CmtyVV, TStrHash<TInt>& StrToNIdH, const int BeginCol, const int MinSz = 3, const TSsFmt Sep = ssfTabSep); 00039 static void DumpCmtyVV(const TStr& OutFNm, const TVec<TIntV>& CmtyVV); 00040 static void DumpCmtyVV(const TStr OutFNm, TVec<TIntV>& CmtyVV, TIntStrH& NIDNmH); 00041 static int TotalMemberships(const TVec<TIntV>& CmtyVV); 00042 static void RewireCmtyVV(const TVec<TIntV>& CmtyVVIn, TVec<TIntV>& CmtyVVOut, TRnd& Rnd); 00043 static void RewireCmtyNID(THash<TInt,TIntV >& CmtyVH, TRnd& Rnd); 00044 static int Intersection(const TIntV& C1, const TIntV& C2); 00045 static void GetIntersection(const THashSet<TInt>& A, const THashSet<TInt>& B, THashSet<TInt>& C); 00046 static int Intersection(const THashSet<TInt>& A, const THashSet<TInt>& B); 00047 static double GetConductance(const PUNGraph& Graph, const TIntSet& CmtyS, const int Edges); 00048 static void GetNbhCom(const PUNGraph& Graph, const int NID, TIntSet& NBCmtyS); 00049 static void SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV >& CmtyVVAtr, const double MaxSz, const double MinSz) { 00050 THash<TInt, TStr> TmpH; 00051 SaveGephi(OutFNm, G, CmtyVVAtr, MaxSz, MinSz, TmpH); 00052 } 00053 static void SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV >& CmtyVVAtr, const double MaxSz, const double MinSz, const THash<TInt, TStr>& NIDNameH) { 00054 THash<TInt, TIntTr> TmpH; 00055 SaveGephi(OutFNm, G, CmtyVVAtr, MaxSz, MinSz, NIDNameH, TmpH); 00056 } 00057 static void SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV >& CmtyVVAtr, const double MaxSz, const double MinSz, const THash<TInt, TStr>& NIDNameH, const THash<TInt, TIntTr >& NIDColorH); 00058 static void SaveBipartiteGephi(const TStr& OutFNm, const TIntV& NIDV, const TVec<TIntV>& CmtyVV, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr >& NIDColorH, const THash<TInt, TIntTr >& CIDColorH); 00059 static int FindComsByAGM(const PUNGraph& Graph, const int InitComs, const int MaxIter, const int RndSeed, const double RegGap, const double PNoCom = 0.0, const TStr PltFPrx = TStr()); 00060 template <class PGraph> 00061 static PGraph LoadEdgeListStr(const TStr& InFNm, TIntStrH& NIDNameH, const int& SrcColId = 0, const int& DstColId = 1, const TSsFmt SsFmt = ssfTabSep) { 00062 TSsParser Ss(InFNm, SsFmt); 00063 PGraph Graph = PGraph::TObj::New(); 00064 TStrHash<TInt> StrSet(Mega(1), true); 00065 while (Ss.Next()) { 00066 const int SrcNId = StrSet.AddKey(Ss[SrcColId]); 00067 const int DstNId = StrSet.AddKey(Ss[DstColId]); 00068 if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); } 00069 if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); } 00070 Graph->AddEdge(SrcNId, DstNId); 00071 } 00072 NIDNameH.Gen(StrSet.Len()); 00073 for (int s = 0; s < StrSet.Len(); s++) { NIDNameH.AddDat(s, StrSet.GetKey(s)); } 00074 IAssert(NIDNameH.Len() == Graph->GetNodes()); 00075 00076 Graph->Defrag(); 00077 return Graph; 00078 } 00079 00080 template<class PGraph> 00081 static void GVizComGraph(const PGraph& Graph,const TVec<TIntV >& CmtyVV, const TStr& OutFNm, const TStr& Desc = TStr()){ 00082 TStrV Colors = TStrV::GetV("red","blue","green","pink","cyan"); 00083 TStrV Shapes = TStrV::GetV("ellipse","triangle","square","pentagon","hexagon"); 00084 THash<TInt,TIntV> NIDComVH; 00085 GetNodeMembership(NIDComVH,CmtyVV); 00086 00087 const TStr Ext = OutFNm.GetFExt(); 00088 const TStr GraphFNm = OutFNm.GetSubStr(0, OutFNm.Len() - Ext.Len()) + "dot"; 00089 const bool IsDir = HasGraphFlag(typename PGraph::TObj, gfDirected); 00090 FILE *F = fopen(GraphFNm.CStr(), "wt"); 00091 if (! Desc.Empty()) fprintf(F, "/*****\n%s\n*****/\n\n", Desc.CStr()); 00092 if (IsDir) { fprintf(F, "digraph G {\n"); } else { fprintf(F, "graph G {\n"); } 00093 fprintf(F, " graph [splines=false overlap=false]\n"); //size=\"12,10\" ratio=fill 00094 fprintf(F, " node [width=0.3, height=0.3]\n"); 00095 //nodes 00096 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00097 int NID = NI.GetId(); 00098 TIntV& CIDV = NIDComVH.GetDat(NID); 00099 IAssert(CIDV.Len()>0); 00100 TStr ShapeNm = Shapes[(CIDV.Len()-1) % Shapes.Len()]; 00101 TStr ColorNm = Colors[CIDV[0] % Colors.Len()]; 00102 TStr NodeComLabel = TStr::Fmt("%d(",NID); 00103 for(int i=0;i<CIDV.Len();i++) { 00104 TStr TmpStr = TStr::Fmt("%d",int(CIDV[i])); 00105 NodeComLabel += TmpStr; 00106 if(i<CIDV.Len()-1){NodeComLabel+=",";} 00107 } 00108 NodeComLabel += ")"; 00109 fprintf(F, " %d [style=filled, shape=\"%s\" fillcolor=\"%s\" label=\"%s\"];\n", NI.GetId(), ShapeNm.CStr(),ColorNm.CStr(), NodeComLabel.CStr()); 00110 } 00111 00112 // edges 00113 for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) { 00114 if (NI.GetOutDeg()==0 && NI.GetInDeg()==0 ) { 00115 fprintf(F, "%d;\n", NI.GetId()); } 00116 else { 00117 for (int e = 0; e < NI.GetOutDeg(); e++) { 00118 if (! IsDir && NI.GetId() > NI.GetOutNId(e)) { continue; } 00119 fprintf(F, " %d %s %d;\n", NI.GetId(), IsDir?"->":"--", NI.GetOutNId(e)); 00120 } 00121 } 00122 } 00123 if (! Desc.Empty()) { 00124 fprintf(F, " label = \"\\n%s\\n\";", Desc.CStr()); 00125 fprintf(F, " fontsize=24;\n"); 00126 } 00127 fprintf(F, "}\n"); 00128 fclose(F); 00129 TSnap::TSnapDetail::GVizDoLayout(GraphFNm, OutFNm, gvlNeato); 00130 } 00131 }; 00132 00133 //Light version of logistic regression (originally in Snap)-- Jaewon Yang, 04/07/12 00134 // forward 00135 class TLogRegPredict; 00136 typedef TPt<TLogRegPredict> PLogRegPredict; 00137 00139 // Logistic Regression using gradient descent 00140 // X: N * M matrix where N = number of examples and M = number of features. 00141 //J: JAEWON THIS HAS NOTHING TO DO WITH AGM. THIS NEEDS TO BE MOVED SOMEWHERE ELSE (glib/xmath.h) 00142 //J: BUT I THINK THERE IS ALREADY A LOGISTIC REGRESSION CLASS THERE!! 00143 class TLogRegFit { 00144 private: 00145 TVec<TFltV> X; 00146 TFltV Y; 00147 TFltV Theta; 00148 int M; // number of features 00149 public: 00150 TLogRegFit() {} 00151 ~TLogRegFit() {} 00152 // returns model for logistic regression 00153 PLogRegPredict CalcLogRegGradient(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm = TStr(), const double& ChangeEps = 0.01, const int& MaxStep = 200, const bool InterceptPt = false); 00154 PLogRegPredict CalcLogRegNewton(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm = TStr(), const double& ChangeEps = 0.01, const int& MaxStep = 200, const bool InterceptPt = false); 00155 int MLEGradient(const double& ChangeEps, const int& MaxStep, const TStr PlotNm); 00156 int MLENewton(const double& ChangeEps, const int& MaxStep, const TStr PlotNm); 00157 double GetStepSizeByLineSearch(const TFltV& DeltaV, const TFltV& GradV, const double& Alpha, const double& Beta); 00158 double Likelihood(const TFltV& NewTheta); 00159 double Likelihood() { return Likelihood(Theta); } 00160 void Gradient(TFltV& GradV); 00161 void Hessian(TFltVV& HVV); 00162 void GetNewtonStep(TFltVV& HVV, const TFltV& GradV, TFltV& DeltaLV); 00163 }; 00164 00165 00167 // Logistic-Regression-Model 00168 //J: JAEWON THIS HAS NOTHING TO DO WITH AGM. THIS NEEDS TO BE MOVED SOMEWHERE ELSE (glib/xmath.h) 00169 //J: BUT I THINK THERE IS ALREADY A LOGISTIC REGRESSION CLASS THERE!! 00170 class TLogRegPredict { 00171 private: 00172 TCRef CRef; 00173 private: 00174 TFltV Theta; 00175 public: 00176 TLogRegPredict(const TFltV& _bb): Theta(_bb) { }; 00177 //static PLogRegMd New(PSparseTrainSet Set) { return TLogReg::CalcLogReg(Set); }; //TODO 00178 00179 TLogRegPredict(TSIn& SIn) { Theta.Load(SIn); }; 00180 static PLogRegPredict Load(TSIn& SIn) { return new TLogRegPredict(SIn); }; 00181 void Save(TSOut& SOut) const { Theta.Save(SOut); }; 00182 00183 UndefDefaultCopyAssign(TLogRegPredict); 00184 public: 00185 // classifies vector, returns probability that AttrV is positive 00186 static void GetCfy(const TVec<TFltV>& X, TFltV& OutV, const TFltV& NewTheta); 00187 static double GetCfy(const TFltV& AttrV, const TFltV& NewTheta); 00188 double GetCfy(const TFltV& AttrV) { return GetCfy(AttrV, Theta); } 00189 void GetTheta(TFltV& _Theta) { _Theta = Theta; } 00190 void GetCfy(const TVec<TFltV>& X, TFltV& OutV) { GetCfy(X, OutV, Theta); } 00191 void PrintTheta() { for (int t = 0; t < Theta.Len(); t++) { printf("Theta[%d] = %f\n", t, Theta[t].Val); } } 00192 friend class TPt<TLogRegPredict>; 00193 }; 00194 00195 #endif