SNAP Library 2.2, Developer Reference
2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 #include "stdafx.h" 00002 #include "Snap.h" 00003 #include "agm.h" 00004 #include "agmfit.h" 00005 00007 // AGM graph generation. 00008 00010 void TAGM::RndConnectInsideCommunity(PUNGraph& Graph, const TIntV& CmtyV, const double& Prob, TRnd& Rnd){ 00011 int CNodes = CmtyV.Len(), CEdges; 00012 if (CNodes < 20) { 00013 CEdges = (int) Rnd.GetBinomialDev(Prob, CNodes * (CNodes-1) / 2); 00014 } else { 00015 CEdges = (int) (Prob * CNodes * (CNodes - 1) / 2); 00016 } 00017 THashSet<TIntPr> NewEdgeSet(CEdges); 00018 for (int edge = 0; edge < CEdges; ) { 00019 int SrcNId = CmtyV[Rnd.GetUniDevInt(CNodes)]; 00020 int DstNId = CmtyV[Rnd.GetUniDevInt(CNodes)]; 00021 if (SrcNId > DstNId) { Swap(SrcNId,DstNId); } 00022 if (SrcNId != DstNId && ! NewEdgeSet.IsKey(TIntPr(SrcNId, DstNId))) { // is new edge 00023 NewEdgeSet.AddKey(TIntPr(SrcNId, DstNId)); 00024 Graph->AddEdge(SrcNId, DstNId); 00025 edge++; 00026 } 00027 } 00028 } 00029 00030 00031 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const int TargetEdges, TRnd& Rnd){ 00032 PUNGraph TryG = TAGM::GenAGM(CmtyVV, DensityCoef, 1.0, Rnd); 00033 const double ScaleCoef = (double) TargetEdges / (double) TryG->GetEdges(); 00034 return TAGM::GenAGM(CmtyVV, DensityCoef, ScaleCoef, Rnd); 00035 } 00036 00037 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const double& DensityCoef, const double& ScaleCoef, TRnd& Rnd){ 00038 TFltV CProbV; 00039 double Prob; 00040 for (int i = 0; i < CmtyVV.Len(); i++) { 00041 Prob = ScaleCoef*pow( double( CmtyVV[i].Len()), - DensityCoef); 00042 if (Prob > 1.0) { Prob = 1; } 00043 CProbV.Add(Prob); 00044 } 00045 return TAGM::GenAGM(CmtyVV, CProbV, Rnd); 00046 } 00047 00049 PUNGraph TAGM::GenAGM(TVec<TIntV>& CmtyVV, const TFltV& CProbV, TRnd& Rnd, const double PNoCom){ 00050 PUNGraph G = TUNGraph::New(100 * CmtyVV.Len(), -1); 00051 printf("AGM begins\n"); 00052 for (int i = 0; i < CmtyVV.Len(); i++) { 00053 TIntV& CmtyV = CmtyVV[i]; 00054 for (int u = 0; u < CmtyV.Len(); u++) { 00055 if ( G->IsNode(CmtyV[u])) { continue; } 00056 G->AddNode(CmtyV[u]); 00057 } 00058 double Prob = CProbV[i]; 00059 RndConnectInsideCommunity(G, CmtyV, Prob, Rnd); 00060 } 00061 if (PNoCom > 0.0) { //if we want to connect nodes that do not share any community 00062 TIntSet NIDS; 00063 for (int c = 0; c < CmtyVV.Len(); c++) { 00064 for (int u = 0; u < CmtyVV[c].Len(); u++) { 00065 NIDS.AddKey(CmtyVV[c][u]); 00066 } 00067 } 00068 TIntV NIDV; 00069 NIDS.GetKeyV(NIDV); 00070 RndConnectInsideCommunity(G,NIDV,PNoCom,Rnd); 00071 } 00072 printf("AGM completed (%d nodes %d edges)\n",G->GetNodes(),G->GetEdges()); 00073 G->Defrag(); 00074 return G; 00075 } 00076 00079 00081 void TAGMUtil::GenPLSeq(TIntV& SzSeq, const int& SeqLen, const double& Alpha, TRnd& Rnd, const int& Min, const int& Max) { 00082 SzSeq.Gen(SeqLen, 0); 00083 while (SzSeq.Len() < SeqLen) { 00084 int Sz = (int) TMath::Round(Rnd.GetPowerDev(Alpha)); 00085 if (Sz >= Min && Sz <= Max) { 00086 SzSeq.Add(Sz); 00087 } 00088 } 00089 } 00090 00092 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){ 00093 TIntV NIDV(Nodes, 0); 00094 for (int i = 0; i < Nodes; i++) { 00095 NIDV.Add(i); 00096 } 00097 GenCmtyVVFromPL(CmtyVV, NIDV, Nodes, Coms, ComSzAlpha, MemAlpha, MinSz, MaxSz, MinK, MaxK, Rnd); 00098 } 00099 00101 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const PUNGraph& Graph, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){ 00102 if (Coms == 0 || Nodes == 0) { 00103 CmtyVV.Clr(); 00104 return; 00105 } 00106 TIntV NIDV; 00107 Graph->GetNIdV(NIDV); 00108 GenCmtyVVFromPL(CmtyVV, NIDV, Nodes, Coms, ComSzAlpha, MemAlpha, MinSz, MaxSz, MinK, MaxK, Rnd); 00109 } 00110 00112 void TAGMUtil::GenCmtyVVFromPL(TVec<TIntV>& CmtyVV, const TIntV& NIDV, const int& Nodes, const int& Coms, const double& ComSzAlpha, const double& MemAlpha, const int& MinSz, const int& MaxSz, const int& MinK, const int& MaxK, TRnd& Rnd){ 00113 if (Coms == 0 || Nodes == 0) { 00114 CmtyVV.Clr(); 00115 return; 00116 } 00117 TIntV ComSzSeq, MemSeq; 00118 TAGMUtil::GenPLSeq(ComSzSeq,Coms,ComSzAlpha,Rnd,MinSz,MaxSz); 00119 TAGMUtil::GenPLSeq(MemSeq,Nodes,MemAlpha,Rnd,MinK,MaxK); 00120 TIntPrV CIDSzPrV, NIDMemPrV; 00121 for (int i = 0; i < ComSzSeq.Len(); i++) { 00122 CIDSzPrV.Add(TIntPr(i, ComSzSeq[i])); 00123 } 00124 for (int i = 0; i < MemSeq.Len(); i++) { 00125 NIDMemPrV.Add(TIntPr(NIDV[i], MemSeq[i])); 00126 } 00127 TAGMUtil::ConnectCmtyVV(CmtyVV, CIDSzPrV, NIDMemPrV, Rnd); 00128 } 00129 00131 void TAGMUtil::ConnectCmtyVV(TVec<TIntV>& CmtyVV, const TIntPrV& CIDSzPrV, const TIntPrV& NIDMemPrV, TRnd& Rnd) { 00132 const int Nodes = NIDMemPrV.Len(), Coms = CIDSzPrV.Len(); 00133 TIntV NDegV,CDegV; 00134 TIntPrSet CNIDSet; 00135 TIntSet HitNodes(Nodes); 00136 THash<TInt,TIntV> CmtyVH; 00137 for (int i = 0;i < CIDSzPrV.Len(); i++) { 00138 for (int j = 0; j < CIDSzPrV[i].Val2; j++) { 00139 CDegV.Add(CIDSzPrV[i].Val1); 00140 } 00141 } 00142 for (int i = 0; i < NIDMemPrV.Len(); i++) { 00143 for (int j = 0; j < NIDMemPrV[i].Val2; j++) { 00144 NDegV.Add(NIDMemPrV[i].Val1); 00145 } 00146 } 00147 while (CDegV.Len() < (int) (1.2 * Nodes)) { 00148 CDegV.Add(CIDSzPrV[Rnd.GetUniDevInt(Coms)].Val1); 00149 } 00150 while (NDegV.Len() < CDegV.Len()) { 00151 NDegV.Add(NIDMemPrV[Rnd.GetUniDevInt(Nodes)].Val1); 00152 } 00153 printf("Total Mem: %d, Total Sz: %d\n",NDegV.Len(), CDegV.Len()); 00154 int c=0; 00155 while (c++ < 15 && CDegV.Len() > 1) { 00156 for (int i = 0; i < CDegV.Len(); i++) { 00157 int u = Rnd.GetUniDevInt(CDegV.Len()); 00158 int v = Rnd.GetUniDevInt(NDegV.Len()); 00159 if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) { continue; } 00160 CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v])); 00161 HitNodes.AddKey(NDegV[v]); 00162 if (u == CDegV.Len() - 1) { CDegV.DelLast(); } 00163 else { 00164 CDegV[u] = CDegV.Last(); 00165 CDegV.DelLast(); 00166 } 00167 if (v == NDegV.Len() - 1) { NDegV.DelLast(); } 00168 else { 00169 NDegV[v] = NDegV.Last(); 00170 NDegV.DelLast(); 00171 } 00172 } 00173 } 00174 //make sure that every node belongs to at least one community 00175 for (int i = 0; i < Nodes; i++) { 00176 int NID = NIDMemPrV[i].Val1; 00177 if (! HitNodes.IsKey(NID)) { 00178 CNIDSet.AddKey(TIntPr(CIDSzPrV[Rnd.GetUniDevInt(Coms)].Val1, NID)); 00179 HitNodes.AddKey(NID); 00180 } 00181 } 00182 IAssert(HitNodes.Len() == Nodes); 00183 for (int i = 0; i < CNIDSet.Len(); i++) { 00184 TIntPr CNIDPr = CNIDSet[i]; 00185 CmtyVH.AddDat(CNIDPr.Val1); 00186 CmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2); 00187 } 00188 CmtyVH.GetDatV(CmtyVV); 00189 } 00190 00192 void TAGMUtil::RewireCmtyVV(const TVec<TIntV>& CmtyVVIn, TVec<TIntV>& CmtyVVOut, TRnd& Rnd){ 00193 THash<TInt,TIntV> CmtyVH; 00194 for (int i = 0; i < CmtyVVIn.Len(); i++) { 00195 CmtyVH.AddDat(i, CmtyVVIn[i]); 00196 } 00197 TAGMUtil::RewireCmtyNID(CmtyVH, Rnd); 00198 CmtyVH.GetDatV(CmtyVVOut); 00199 } 00200 00202 void TAGMUtil::RewireCmtyNID(THash<TInt,TIntV >& CmtyVH, TRnd& Rnd) { 00203 THash<TInt,TIntV > NewCmtyVH(CmtyVH.Len()); 00204 TIntV NDegV; 00205 TIntV CDegV; 00206 for (int i = 0; i < CmtyVH.Len(); i++) { 00207 int CID = CmtyVH.GetKey(i); 00208 for (int j = 0; j < CmtyVH[i].Len(); j++) { 00209 int NID = CmtyVH[i][j]; 00210 NDegV.Add(NID); 00211 CDegV.Add(CID); 00212 } 00213 } 00214 TIntPrSet CNIDSet(CDegV.Len()); 00215 int c=0; 00216 while (c++ < 15 && CDegV.Len() > 1){ 00217 for (int i = 0; i < CDegV.Len(); i++) { 00218 int u = Rnd.GetUniDevInt(CDegV.Len()); 00219 int v = Rnd.GetUniDevInt(NDegV.Len()); 00220 if (CNIDSet.IsKey(TIntPr(CDegV[u], NDegV[v]))) { continue; } 00221 CNIDSet.AddKey(TIntPr(CDegV[u], NDegV[v])); 00222 if (u == CDegV.Len() - 1) { 00223 CDegV.DelLast(); 00224 } else { 00225 CDegV[u] = CDegV.Last(); 00226 CDegV.DelLast(); 00227 } 00228 if ( v == NDegV.Len() - 1) { 00229 NDegV.DelLast(); 00230 } else{ 00231 NDegV[v] = NDegV.Last(); 00232 NDegV.DelLast(); 00233 } 00234 } 00235 } 00236 for (int i = 0; i < CNIDSet.Len(); i++) { 00237 TIntPr CNIDPr = CNIDSet[i]; 00238 IAssert(CmtyVH.IsKey(CNIDPr.Val1)); 00239 NewCmtyVH.AddDat(CNIDPr.Val1); 00240 NewCmtyVH.GetDat(CNIDPr.Val1).Add(CNIDPr.Val2); 00241 } 00242 CmtyVH = NewCmtyVH; 00243 } 00244 00246 void TAGMUtil::LoadCmtyVV(const TStr& InFNm, TVec<TIntV>& CmtyVV) { 00247 CmtyVV.Gen(Kilo(100), 0); 00248 TSsParser Ss(InFNm, ssfWhiteSep); 00249 while (Ss.Next()) { 00250 if(Ss.GetFlds() > 0) { 00251 TIntV CmtyV; 00252 for (int i = 0; i < Ss.GetFlds(); i++) { 00253 if (Ss.IsInt(i)) { 00254 CmtyV.Add(Ss.GetInt(i)); 00255 } 00256 } 00257 CmtyVV.Add(CmtyV); 00258 } 00259 } 00260 CmtyVV.Pack(); 00261 printf("community loading completed (%d communities)\n",CmtyVV.Len()); 00262 00263 } 00264 00266 void TAGMUtil::LoadCmtyVV(const TStr& InFNm, TVec<TIntV>& CmtyVV, TStrHash<TInt>& StrToNIdH, const int BeginCol, const int MinSz, const TSsFmt Sep) { 00267 CmtyVV.Gen(Kilo(100), 0); 00268 TSsParser Ss(InFNm, Sep); 00269 while (Ss.Next()) { 00270 if(Ss.GetFlds() > BeginCol) { 00271 TIntV CmtyV; 00272 for (int i = BeginCol; i < Ss.GetFlds(); i++) { 00273 if (StrToNIdH.IsKey(Ss.GetFld(i))) { 00274 CmtyV.Add(StrToNIdH.GetKeyId(Ss.GetFld(i))); 00275 } 00276 } 00277 if (CmtyV.Len() < MinSz) { continue; } 00278 CmtyVV.Add(CmtyV); 00279 } 00280 } 00281 CmtyVV.Pack(); 00282 printf("community loading completed (%d communities)\n",CmtyVV.Len()); 00283 } 00284 00286 void TAGMUtil::DumpCmtyVV(const TStr& OutFNm, const TVec<TIntV>& CmtyVV) { 00287 FILE* F = fopen(OutFNm.CStr(),"wt"); 00288 for (int i = 0; i < CmtyVV.Len(); i++) { 00289 for (int j = 0; j < CmtyVV[i].Len(); j++) { 00290 fprintf(F,"%d\t", (int) CmtyVV[i][j]); 00291 } 00292 fprintf(F,"\n"); 00293 } 00294 fclose(F); 00295 } 00296 00298 void TAGMUtil::DumpCmtyVV(const TStr OutFNm, TVec<TIntV>& CmtyVV, TIntStrH& NIDNmH) { 00299 FILE* F = fopen(OutFNm.CStr(), "wt"); 00300 for (int c = 0; c < CmtyVV.Len(); c++) { 00301 for (int u = 0; u < CmtyVV[c].Len(); u++) { 00302 if (NIDNmH.IsKey(CmtyVV[c][u])){ 00303 fprintf(F, "%s\t", NIDNmH.GetDat(CmtyVV[c][u]).CStr()); 00304 } 00305 else { 00306 fprintf(F, "%d\t", (int) CmtyVV[c][u]); 00307 } 00308 } 00309 fprintf(F, "\n"); 00310 } 00311 fclose(F); 00312 } 00313 00315 int TAGMUtil::TotalMemberships(const TVec<TIntV>& CmtyVV){ 00316 int M = 0; 00317 for (int i = 0; i < CmtyVV.Len(); i++) { 00318 M += CmtyVV[i].Len(); 00319 } 00320 return M; 00321 } 00322 00324 void TAGMUtil::GetNodeMembership(TIntH& NIDComVH, const THash<TInt,TIntV >& CmtyVH) { 00325 NIDComVH.Clr(); 00326 for (THash<TInt,TIntV>::TIter HI = CmtyVH.BegI(); HI < CmtyVH.EndI(); HI++){ 00327 for (int j = 0;j < HI.GetDat().Len(); j++) { 00328 int NID = HI.GetDat()[j]; 00329 NIDComVH.AddDat(NID)++; 00330 } 00331 } 00332 } 00333 00335 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV) { 00336 NIDComVH.Clr(); 00337 for (int i = 0; i < CmtyVV.Len(); i++){ 00338 int CID = i; 00339 for (int j = 0; j < CmtyVV[i].Len(); j++) { 00340 int NID = CmtyVV[i][j]; 00341 NIDComVH.AddDat(NID).AddKey(CID); 00342 } 00343 } 00344 } 00345 00347 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntV>& CmtyVV, const TIntV& NIDV) { 00348 NIDComVH.Clr(); 00349 for (int u = 0; u < NIDV.Len(); u++) { 00350 NIDComVH.AddDat(NIDV[u]); 00351 } 00352 for (int i = 0; i < CmtyVV.Len(); i++){ 00353 int CID = i; 00354 for (int j = 0; j < CmtyVV[i].Len(); j++) { 00355 int NID = CmtyVV[i][j]; 00356 NIDComVH.AddDat(NID).AddKey(CID); 00357 } 00358 } 00359 } 00360 00361 00362 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const TVec<TIntSet>& CmtyVV) { 00363 for (int i = 0; i < CmtyVV.Len(); i++){ 00364 int CID = i; 00365 for (TIntSet::TIter SI = CmtyVV[i].BegI(); SI < CmtyVV[i].EndI(); SI++) { 00366 int NID = SI.GetKey(); 00367 NIDComVH.AddDat(NID).AddKey(CID); 00368 } 00369 } 00370 } 00371 void TAGMUtil::GetNodeMembership(THash<TInt,TIntSet >& NIDComVH, const THash<TInt,TIntV>& CmtyVH) { 00372 for (THash<TInt,TIntV>::TIter HI = CmtyVH.BegI(); HI < CmtyVH.EndI(); HI++){ 00373 int CID = HI.GetKey(); 00374 for (int j = 0; j < HI.GetDat().Len(); j++) { 00375 int NID = HI.GetDat()[j]; 00376 NIDComVH.AddDat(NID).AddKey(CID); 00377 } 00378 } 00379 } 00380 00381 void TAGMUtil::GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const THash<TInt,TIntV>& CmtyVH) { 00382 for (int i = 0; i < CmtyVH.Len(); i++){ 00383 int CID = CmtyVH.GetKey(i); 00384 for (int j = 0; j < CmtyVH[i].Len(); j++) { 00385 int NID = CmtyVH[i][j]; 00386 NIDComVH.AddDat(NID).Add(CID); 00387 } 00388 } 00389 } 00390 00391 void TAGMUtil::GetNodeMembership(THash<TInt,TIntV >& NIDComVH, const TVec<TIntV>& CmtyVV) { 00392 THash<TInt,TIntV> CmtyVH; 00393 for (int i = 0; i < CmtyVV.Len(); i++) { 00394 CmtyVH.AddDat(i, CmtyVV[i]); 00395 } 00396 GetNodeMembership(NIDComVH, CmtyVH); 00397 } 00398 00399 int TAGMUtil::Intersection(const TIntV& C1, const TIntV& C2) { 00400 TIntSet S1(C1), S2(C2); 00401 return TAGMUtil::Intersection(S1, S2); 00402 } 00403 00404 void TAGMUtil::GetIntersection(const THashSet<TInt>& A, const THashSet<TInt>& B, THashSet<TInt>& C) { 00405 C.Gen(A.Len()); 00406 if (A.Len() < B.Len()) { 00407 for (THashSetKeyI<TInt> it = A.BegI(); it < A.EndI(); it++) 00408 if (B.IsKey(it.GetKey())) C.AddKey(it.GetKey()); 00409 } else { 00410 for (THashSetKeyI<TInt> it = B.BegI(); it < B.EndI(); it++) 00411 if (A.IsKey(it.GetKey())) C.AddKey(it.GetKey()); 00412 } 00413 } 00414 00415 int TAGMUtil::Intersection(const THashSet<TInt>& A, const THashSet<TInt>& B) { 00416 int n = 0; 00417 if (A.Len() < B.Len()) { 00418 for (THashSetKeyI<TInt> it = A.BegI(); it < A.EndI(); it++) 00419 if (B.IsKey(it.GetKey())) n++; 00420 } else { 00421 for (THashSetKeyI<TInt> it = B.BegI(); it < B.EndI(); it++) 00422 if (A.IsKey(it.GetKey())) n++; 00423 } 00424 return n; 00425 } 00426 00428 void TAGMUtil::SaveGephi(const TStr& OutFNm, const PUNGraph& G, const TVec<TIntV>& CmtyVVAtr, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH ) { 00429 THash<TInt,TIntV> NIDComVHAtr; 00430 TAGMUtil::GetNodeMembership(NIDComVHAtr, CmtyVVAtr); 00431 00432 FILE* F = fopen(OutFNm.CStr(), "wt"); 00433 fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n"); 00434 fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n"); 00435 fprintf(F, "\t<graph mode='static' defaultedgetype='undirected'>\n"); 00436 if (CmtyVVAtr.Len() > 0) { 00437 fprintf(F, "\t<attributes class='node'>\n"); 00438 for (int c = 0; c < CmtyVVAtr.Len(); c++) { 00439 fprintf(F, "\t\t<attribute id='%d' title='c%d' type='boolean'>", c, c); 00440 fprintf(F, "\t\t<default>false</default>\n"); 00441 fprintf(F, "\t\t</attribute>\n"); 00442 } 00443 fprintf(F, "\t</attributes>\n"); 00444 } 00445 fprintf(F, "\t\t<nodes>\n"); 00446 for (TUNGraph::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) { 00447 int NID = NI.GetId(); 00448 TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): ""; 00449 TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120); 00450 00451 double Size = MinSz; 00452 double SizeStep = (MaxSz - MinSz) / (double) CmtyVVAtr.Len(); 00453 if (NIDComVHAtr.IsKey(NID)) { 00454 Size = MinSz + SizeStep * (double) NIDComVHAtr.GetDat(NID).Len(); 00455 } 00456 double Alpha = 1.0; 00457 fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr()); 00458 fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha); 00459 fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size); 00460 //specify attributes 00461 if (NIDComVHAtr.IsKey(NID)) { 00462 fprintf(F, "\t\t\t\t<attvalues>\n"); 00463 for (int c = 0; c < NIDComVHAtr.GetDat(NID).Len(); c++) { 00464 int CID = NIDComVHAtr.GetDat(NID)[c]; 00465 fprintf(F, "\t\t\t\t\t<attvalue for='%d' value='true'/>\n", CID); 00466 } 00467 fprintf(F, "\t\t\t\t</attvalues>\n"); 00468 } 00469 00470 fprintf(F, "\t\t\t</node>\n"); 00471 } 00472 fprintf(F, "\t\t</nodes>\n"); 00473 //plot edges 00474 int EID = 0; 00475 fprintf(F, "\t\t<edges>\n"); 00476 for (TUNGraph::TEdgeI EI = G->BegEI(); EI < G->EndEI(); EI++) { 00477 fprintf(F, "\t\t\t<edge id='%d' source='%d' target='%d'/>\n", EID++, EI.GetSrcNId(), EI.GetDstNId()); 00478 } 00479 fprintf(F, "\t\t</edges>\n"); 00480 fprintf(F, "\t</graph>\n"); 00481 fprintf(F, "</gexf>\n"); 00482 fclose(F); 00483 } 00484 00486 void TAGMUtil::SaveBipartiteGephi(const TStr& OutFNm, const TIntV& NIDV, const TVec<TIntV>& CmtyVV, const double MaxSz, const double MinSz, const TIntStrH& NIDNameH, const THash<TInt, TIntTr>& NIDColorH, const THash<TInt, TIntTr>& CIDColorH ) { 00488 if (CmtyVV.Len() == 0) { return; } 00489 double NXMin = 0.1, YMin = 0.1, NXMax = 250.00, YMax = 30.0; 00490 double CXMin = 0.3 * NXMax, CXMax = 0.7 * NXMax; 00491 double CStep = (CXMax - CXMin) / (double) CmtyVV.Len(), NStep = (NXMax - NXMin) / (double) NIDV.Len(); 00492 THash<TInt,TIntV> NIDComVH; 00493 TAGMUtil::GetNodeMembership(NIDComVH, CmtyVV); 00494 00495 FILE* F = fopen(OutFNm.CStr(), "wt"); 00496 fprintf(F, "<?xml version='1.0' encoding='UTF-8'?>\n"); 00497 fprintf(F, "<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n"); 00498 fprintf(F, "\t<graph mode='static' defaultedgetype='directed'>\n"); 00499 fprintf(F, "\t\t<nodes>\n"); 00500 for (int c = 0; c < CmtyVV.Len(); c++) { 00501 int CID = c; 00502 double XPos = c * CStep + CXMin; 00503 TIntTr Color = CIDColorH.IsKey(CID)? CIDColorH.GetDat(CID) : TIntTr(120, 120, 120); 00504 fprintf(F, "\t\t\t<node id='C%d' label='C%d'>\n", CID, CID); 00505 fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val); 00506 fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", MaxSz); 00507 fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n"); 00508 fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMax); 00509 fprintf(F, "\t\t\t</node>\n"); 00510 } 00511 00512 for (int u = 0;u < NIDV.Len(); u++) { 00513 int NID = NIDV[u]; 00514 TStr Label = NIDNameH.IsKey(NID)? NIDNameH.GetDat(NID): ""; 00515 double Size = MinSz; 00516 double XPos = NXMin + u * NStep; 00517 TIntTr Color = NIDColorH.IsKey(NID)? NIDColorH.GetDat(NID) : TIntTr(120, 120, 120); 00518 double Alpha = 1.0; 00519 fprintf(F, "\t\t\t<node id='%d' label='%s'>\n", NID, Label.CStr()); 00520 fprintf(F, "\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.Val1.Val, Color.Val2.Val, Color.Val3.Val, Alpha); 00521 fprintf(F, "\t\t\t\t<viz:size value='%.3f'/>\n", Size); 00522 fprintf(F, "\t\t\t\t<viz:shape value='square'/>\n"); 00523 fprintf(F, "\t\t\t\t<viz:position x='%f' y='%f' z='0.0'/>\n", XPos, YMin); 00524 fprintf(F, "\t\t\t</node>\n"); 00525 } 00526 fprintf(F, "\t\t</nodes>\n"); 00527 fprintf(F, "\t\t<edges>\n"); 00528 int EID = 0; 00529 for (int u = 0;u < NIDV.Len(); u++) { 00530 int NID = NIDV[u]; 00531 if (NIDComVH.IsKey(NID)) { 00532 for (int c = 0; c < NIDComVH.GetDat(NID).Len(); c++) { 00533 int CID = NIDComVH.GetDat(NID)[c]; 00534 fprintf(F, "\t\t\t<edge id='%d' source='C%d' target='%d'/>\n", EID++, CID, NID); 00535 } 00536 } 00537 } 00538 fprintf(F, "\t\t</edges>\n"); 00539 fprintf(F, "\t</graph>\n"); 00540 fprintf(F, "</gexf>\n"); 00541 } 00542 00544 int TAGMUtil::FindComsByAGM(const PUNGraph& Graph, const int InitComs, const int MaxIter, const int RndSeed, const double RegGap, const double PNoCom, const TStr PltFPrx) { 00545 TRnd Rnd(RndSeed); 00546 int LambdaIter = 100; 00547 if (Graph->GetNodes() < 200) { LambdaIter = 1; } 00548 if (Graph->GetNodes() < 200 && Graph->GetEdges() > 2000) { LambdaIter = 100; } 00549 00550 //Find coms with large C 00551 TAGMFit AGMFitM(Graph, InitComs, RndSeed); 00552 if (PNoCom > 0.0) { AGMFitM.SetPNoCom(PNoCom); } 00553 AGMFitM.RunMCMC(MaxIter, LambdaIter, ""); 00554 00555 int TE = Graph->GetEdges(); 00556 TFltV RegV; 00557 RegV.Add(0.3 * TE); 00558 for (int r = 0; r < 25; r++) { 00559 RegV.Add(RegV.Last() * RegGap); 00560 } 00561 TFltPrV RegComsV, RegLV, RegBICV; 00562 TFltV LV, BICV; 00563 //record likelihood and number of communities with nonzero P_c 00564 for (int r = 0; r < RegV.Len(); r++) { 00565 double RegCoef = RegV[r]; 00566 AGMFitM.SetRegCoef(RegCoef); 00567 AGMFitM.MLEGradAscentGivenCAG(0.01, 1000); 00568 AGMFitM.SetRegCoef(0.0); 00569 00570 TVec<TIntV> EstCmtyVV; 00571 AGMFitM.GetCmtyVV(EstCmtyVV, 0.99); 00572 int NumLowQ = EstCmtyVV.Len(); 00573 RegComsV.Add(TFltPr(RegCoef, (double) NumLowQ)); 00574 00575 if (EstCmtyVV.Len() > 0) { 00576 TAGMFit AFTemp(Graph, EstCmtyVV, Rnd); 00577 AFTemp.MLEGradAscentGivenCAG(0.001, 1000); 00578 double CurL = AFTemp.Likelihood(); 00579 LV.Add(CurL); 00580 BICV.Add(-2.0 * CurL + (double) EstCmtyVV.Len() * log((double) Graph->GetNodes() * (Graph->GetNodes() - 1) / 2.0)); 00581 } 00582 else { 00583 break; 00584 } 00585 } 00586 // if likelihood does not exist or does not change at all, report the smallest number of communities or 2 00587 if (LV.Len() == 0) { return 2; } 00588 else if (LV[0] == LV.Last()) { return (int) TMath::Mx<TFlt>(2.0, RegComsV[LV.Len() - 1].Val2); } 00589 00590 00591 //normalize likelihood and BIC to 0~100 00592 int MaxL = 100; 00593 { 00594 TFltV& ValueV = LV; 00595 TFltPrV& RegValueV = RegLV; 00596 double MinValue = TFlt::Mx, MaxValue = TFlt::Mn; 00597 for (int l = 0; l < ValueV.Len(); l++) { 00598 if (ValueV[l] < MinValue) { MinValue = ValueV[l]; } 00599 if (ValueV[l] > MaxValue) { MaxValue = ValueV[l]; } 00600 } 00601 while (ValueV.Len() < RegV.Len()) { ValueV.Add(MinValue); } 00602 double RangeVal = MaxValue - MinValue; 00603 for (int l = 0; l < ValueV.Len(); l++) { 00604 RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal)); 00605 } 00606 00607 } 00608 { 00609 TFltV& ValueV = BICV; 00610 TFltPrV& RegValueV = RegBICV; 00611 double MinValue = TFlt::Mx, MaxValue = TFlt::Mn; 00612 for (int l = 0; l < ValueV.Len(); l++) { 00613 if (ValueV[l] < MinValue) { MinValue = ValueV[l]; } 00614 if (ValueV[l] > MaxValue) { MaxValue = ValueV[l]; } 00615 } 00616 while (ValueV.Len() < RegV.Len()) { ValueV.Add(MaxValue); } 00617 double RangeVal = MaxValue - MinValue; 00618 for (int l = 0; l < ValueV.Len(); l++) { 00619 RegValueV.Add(TFltPr(RegV[l], double(MaxL) * (ValueV[l] - MinValue) / RangeVal)); 00620 } 00621 } 00622 00623 //fit logistic regression to normalized likelihood. 00624 TVec<TFltV> XV(RegLV.Len()); 00625 TFltV YV (RegLV.Len()); 00626 for (int l = 0; l < RegLV.Len(); l++) { 00627 XV[l] = TFltV::GetV(log(RegLV[l].Val1)); 00628 YV[l] = RegLV[l].Val2 / (double) MaxL; 00629 } 00630 TFltPrV LRVScaled, LRV; 00631 TLogRegFit LRFit; 00632 PLogRegPredict LRMd = LRFit.CalcLogRegNewton(XV, YV, PltFPrx); 00633 for (int l = 0; l < RegLV.Len(); l++) { 00634 LRV.Add(TFltPr(RegV[l], LRMd->GetCfy(XV[l]))); 00635 LRVScaled.Add(TFltPr(RegV[l], double(MaxL) * LRV.Last().Val2)); 00636 } 00637 00638 //estimate # communities from fitted logistic regression 00639 int NumComs = 0, IdxRegDrop = 0; 00640 double LRThres = 1.1, RegDrop; // 1 / (1 + exp(1.1)) = 0.25 00641 double LeftReg = 0.0, RightReg = 0.0; 00642 TFltV Theta; 00643 LRMd->GetTheta(Theta); 00644 RegDrop = (- Theta[1] - LRThres) / Theta[0]; 00645 if (RegDrop <= XV[0][0]) { NumComs = (int) RegComsV[0].Val2; } 00646 else if (RegDrop >= XV.Last()[0]) { NumComs = (int) RegComsV.Last().Val2; } 00647 else { //interpolate for RegDrop 00648 for (int i = 0; i < XV.Len(); i++) { 00649 if (XV[i][0] > RegDrop) { IdxRegDrop = i; break; } 00650 } 00651 00652 if (IdxRegDrop == 0) { 00653 printf("Error!! RegDrop:%f, Theta[0]:%f, Theta[1]:%f\n", RegDrop, Theta[0].Val, Theta[1].Val); 00654 for (int l = 0; l < RegLV.Len(); l++) { 00655 printf("X[%d]:%f, Y[%d]:%f\n", l, XV[l][0].Val, l, YV[l].Val); 00656 } 00657 } 00658 IAssert(IdxRegDrop > 0); 00659 LeftReg = RegDrop - XV[IdxRegDrop - 1][0]; 00660 RightReg = XV[IdxRegDrop][0] - RegDrop; 00661 NumComs = (int) TMath::Round( (RightReg * RegComsV[IdxRegDrop - 1].Val2 + LeftReg * RegComsV[IdxRegDrop].Val2) / (LeftReg + RightReg)); 00662 00663 } 00664 //printf("Interpolation coeff: %f, %f, index at drop:%d (%f), Left-Right Vals: %f, %f\n", LeftReg, RightReg, IdxRegDrop, RegDrop, RegComsV[IdxRegDrop - 1].Val2, RegComsV[IdxRegDrop].Val2); 00665 printf("Num Coms:%d\n", NumComs); 00666 if (NumComs < 2) { NumComs = 2; } 00667 00668 if (PltFPrx.Len() > 0) { 00669 TStr PlotTitle = TStr::Fmt("N:%d, E:%d ", Graph->GetNodes(), TE); 00670 TGnuPlot GPC(PltFPrx + ".l"); 00671 GPC.AddPlot(RegComsV, gpwLinesPoints, "C"); 00672 GPC.AddPlot(RegLV, gpwLinesPoints, "likelihood"); 00673 GPC.AddPlot(RegBICV, gpwLinesPoints, "BIC"); 00674 GPC.AddPlot(LRVScaled, gpwLinesPoints, "Sigmoid (scaled)"); 00675 GPC.SetScale(gpsLog10X); 00676 GPC.SetTitle(PlotTitle); 00677 GPC.SavePng(PltFPrx + ".l.png"); 00678 } 00679 00680 return NumComs; 00681 } 00682 00683 double TAGMUtil::GetConductance(const PUNGraph& Graph, const TIntSet& CmtyS, const int Edges) { 00684 const int Edges2 = Edges >= 0 ? 2*Edges : Graph->GetEdges(); 00685 int Vol = 0, Cut = 0; 00686 double Phi = 0.0; 00687 for (int i = 0; i < CmtyS.Len(); i++) { 00688 if (! Graph->IsNode(CmtyS[i])) { continue; } 00689 TUNGraph::TNodeI NI = Graph->GetNI(CmtyS[i]); 00690 for (int e = 0; e < NI.GetOutDeg(); e++) { 00691 if (! CmtyS.IsKey(NI.GetOutNId(e))) { Cut += 1; } 00692 } 00693 Vol += NI.GetOutDeg(); 00694 } 00695 // get conductance 00696 if (Vol != Edges2) { 00697 if (2 * Vol > Edges2) { Phi = Cut / double (Edges2 - Vol); } 00698 else if (Vol == 0) { Phi = 0.0; } 00699 else { Phi = Cut / double(Vol); } 00700 } else { 00701 if (Vol == Edges2) { Phi = 1.0; } 00702 } 00703 return Phi; 00704 } 00705 00706 void TAGMUtil::GetNbhCom(const PUNGraph& Graph, const int NID, TIntSet& NBCmtyS) { 00707 TUNGraph::TNodeI NI = Graph->GetNI(NID); 00708 NBCmtyS.Gen(NI.GetDeg()); 00709 NBCmtyS.AddKey(NID); 00710 for (int e = 0; e < NI.GetDeg(); e++) { 00711 NBCmtyS.AddKey(NI.GetNbrNId(e)); 00712 } 00713 } 00714 00716 // Logistic regression by gradient ascent 00717 00718 void TLogRegFit::GetNewtonStep(TFltVV& HVV, const TFltV& GradV, TFltV& DeltaLV){ 00719 bool HSingular = false; 00720 for (int i = 0; i < HVV.GetXDim(); i++) { 00721 if (HVV(i,i) == 0.0) { 00722 HVV(i,i) = 0.001; 00723 HSingular = true; 00724 } 00725 DeltaLV[i] = GradV[i] / HVV(i, i); 00726 } 00727 if (! HSingular) { 00728 if (HVV(0, 0) < 0) { // if Hessian is negative definite, convert it to positive definite 00729 for (int r = 0; r < Theta.Len(); r++) { 00730 for (int c = 0; c < Theta.Len(); c++) { 00731 HVV(r, c) = - HVV(r, c); 00732 } 00733 } 00734 TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV); 00735 } 00736 else { 00737 TNumericalStuff::SolveSymetricSystem(HVV, GradV, DeltaLV); 00738 for (int i = 0; i < DeltaLV.Len(); i++) { 00739 DeltaLV[i] = - DeltaLV[i]; 00740 } 00741 } 00742 00743 } 00744 } 00745 00746 void TLogRegFit::Hessian(TFltVV& HVV) { 00747 HVV.Gen(Theta.Len(), Theta.Len()); 00748 TFltV OutV; 00749 TLogRegPredict::GetCfy(X, OutV, Theta); 00750 for (int i = 0; i < X.Len(); i++) { 00751 for (int r = 0; r < Theta.Len(); r++) { 00752 HVV.At(r, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][r]); 00753 for (int c = r + 1; c < Theta.Len(); c++) { 00754 HVV.At(r, c) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]); 00755 HVV.At(c, r) += - (X[i][r] * OutV[i] * (1 - OutV[i]) * X[i][c]); 00756 } 00757 } 00758 } 00759 /* 00760 printf("\n"); 00761 for (int r = 0; r < Theta.Len(); r++) { 00762 for (int c = 0; c < Theta.Len(); c++) { 00763 printf("%f\t", HVV.At(r, c).Val); 00764 } 00765 printf("\n"); 00766 } 00767 */ 00768 } 00769 00770 int TLogRegFit::MLENewton(const double& ChangeEps, const int& MaxStep, const TStr PlotNm) { 00771 TExeTm ExeTm; 00772 TFltV GradV(Theta.Len()), DeltaLV(Theta.Len()); 00773 TFltVV HVV(Theta.Len(), Theta.Len()); 00774 int iter = 0; 00775 double MinVal = -1e10, MaxVal = 1e10; 00776 for(iter = 0; iter < MaxStep; iter++) { 00777 Gradient(GradV); 00778 Hessian(HVV); 00779 GetNewtonStep(HVV, GradV, DeltaLV); 00780 double Increment = TLinAlg::DotProduct(GradV, DeltaLV); 00781 if (Increment <= ChangeEps) {break;} 00782 double LearnRate = GetStepSizeByLineSearch(DeltaLV, GradV, 0.15, 0.5);//InitLearnRate/double(0.01*(double)iter + 1); 00783 for(int i = 0; i < Theta.Len(); i++) { 00784 double Change = LearnRate * DeltaLV[i]; 00785 Theta[i] += Change; 00786 if(Theta[i] < MinVal) { Theta[i] = MinVal;} 00787 if(Theta[i] > MaxVal) { Theta[i] = MaxVal;} 00788 } 00789 } 00790 if (! PlotNm.Empty()) { 00791 printf("MLE with Newton method completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr()); 00792 } 00793 00794 return iter; 00795 } 00796 00797 int TLogRegFit::MLEGradient(const double& ChangeEps, const int& MaxStep, const TStr PlotNm) { 00798 TExeTm ExeTm; 00799 TFltV GradV(Theta.Len()); 00800 int iter = 0; 00801 TIntFltPrV IterLV, IterGradNormV; 00802 double MinVal = -1e10, MaxVal = 1e10; 00803 double GradCutOff = 100000; 00804 for(iter = 0; iter < MaxStep; iter++) { 00805 Gradient(GradV); //if gradient is going out of the boundary, cut off 00806 for(int i = 0; i < Theta.Len(); i++) { 00807 if (GradV[i] < -GradCutOff) { GradV[i] = -GradCutOff; } 00808 if (GradV[i] > GradCutOff) { GradV[i] = GradCutOff; } 00809 if (Theta[i] <= MinVal && GradV[i] < 0) { GradV[i] = 0.0; } 00810 if (Theta[i] >= MaxVal && GradV[i] > 0) { GradV[i] = 0.0; } 00811 } 00812 double Alpha = 0.15, Beta = 0.9; 00813 //double LearnRate = 0.1 / (0.1 * iter + 1); //GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta); 00814 double LearnRate = GetStepSizeByLineSearch(GradV, GradV, Alpha, Beta); 00815 if (TLinAlg::Norm(GradV) < ChangeEps) { break; } 00816 for(int i = 0; i < Theta.Len(); i++) { 00817 double Change = LearnRate * GradV[i]; 00818 Theta[i] += Change; 00819 if(Theta[i] < MinVal) { Theta[i] = MinVal;} 00820 if(Theta[i] > MaxVal) { Theta[i] = MaxVal;} 00821 } 00822 if (! PlotNm.Empty()) { 00823 double L = Likelihood(); 00824 IterLV.Add(TIntFltPr(iter, L)); 00825 IterGradNormV.Add(TIntFltPr(iter, TLinAlg::Norm(GradV))); 00826 } 00827 00828 } 00829 if (! PlotNm.Empty()) { 00830 TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q"); 00831 TGnuPlot::PlotValV(IterGradNormV, PlotNm + ".gradnorm_Q"); 00832 printf("MLE for Lambda completed with %d iterations(%s)\n",iter,ExeTm.GetTmStr()); 00833 } 00834 return iter; 00835 } 00836 00837 double TLogRegFit::GetStepSizeByLineSearch(const TFltV& DeltaV, const TFltV& GradV, const double& Alpha, const double& Beta) { 00838 double StepSize = 1.0; 00839 double InitLikelihood = Likelihood(); 00840 IAssert(Theta.Len() == DeltaV.Len()); 00841 TFltV NewThetaV(Theta.Len()); 00842 double MinVal = -1e10, MaxVal = 1e10; 00843 for(int iter = 0; ; iter++) { 00844 for (int i = 0; i < Theta.Len(); i++){ 00845 NewThetaV[i] = Theta[i] + StepSize * DeltaV[i]; 00846 if (NewThetaV[i] < MinVal) { NewThetaV[i] = MinVal; } 00847 if (NewThetaV[i] > MaxVal) { NewThetaV[i] = MaxVal; } 00848 } 00849 if (Likelihood(NewThetaV) < InitLikelihood + Alpha * StepSize * TLinAlg::DotProduct(GradV, DeltaV)) { 00850 StepSize *= Beta; 00851 } else { 00852 break; 00853 } 00854 } 00855 return StepSize; 00856 } 00857 00858 double TLogRegFit::Likelihood(const TFltV& NewTheta) { 00859 TFltV OutV; 00860 TLogRegPredict::GetCfy(X, OutV, NewTheta); 00861 double L = 0; 00862 for (int r = 0; r < OutV.Len(); r++) { 00863 L += Y[r] * log(OutV[r]); 00864 L += (1 - Y[r]) * log(1 - OutV[r]); 00865 } 00866 return L; 00867 } 00868 00869 void TLogRegFit::Gradient(TFltV& GradV) { 00870 TFltV OutV; 00871 TLogRegPredict::GetCfy(X, OutV, Theta); 00872 GradV.Gen(M); 00873 for (int r = 0; r < X.Len(); r++) { 00874 //printf("Y[%d] = %f, Out[%d] = %f\n", r, Y[r].Val, r, OutV[r].Val); 00875 for (int m = 0; m < M; m++) { 00876 GradV[m] += (Y[r] - OutV[r]) * X[r][m]; 00877 } 00878 } 00879 //for (int m = 0; m < M; m++) { printf("Theta[%d] = %f, GradV[%d] = %f\n", m, Theta[m].Val, m, GradV[m].Val); } 00880 } 00881 00882 PLogRegPredict TLogRegFit::CalcLogRegNewton(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm, const double& ChangeEps, const int& MaxStep, const bool Intercept) { 00883 00884 X = XPt; 00885 Y = yPt; 00886 IAssert(X.Len() == Y.Len()); 00887 if (Intercept == false) { // if intercept is not included, add it 00888 for (int r = 0; r < X.Len(); r++) { X[r].Add(1); } 00889 } 00890 M = X[0].Len(); 00891 for (int r = 0; r < X.Len(); r++) { IAssert(X[r].Len() == M); } 00892 for (int r = 0; r < Y.Len(); r++) { 00893 if (Y[r] >= 0.99999) { Y[r] = 0.99999; } 00894 if (Y[r] <= 0.00001) { Y[r] = 0.00001; } 00895 } 00896 Theta.Gen(M); 00897 MLENewton(ChangeEps, MaxStep, PlotNm); 00898 return new TLogRegPredict(Theta); 00899 }; 00900 00901 PLogRegPredict TLogRegFit::CalcLogRegGradient(const TVec<TFltV>& XPt, const TFltV& yPt, const TStr& PlotNm, const double& ChangeEps, const int& MaxStep, const bool Intercept) { 00902 X = XPt; 00903 Y = yPt; 00904 IAssert(X.Len() == Y.Len()); 00905 if (Intercept == false) { // if intercept is not included, add it 00906 for (int r = 0; r < X.Len(); r++) { X[r].Add(1); } 00907 } 00908 M = X[0].Len(); 00909 for (int r = 0; r < X.Len(); r++) { IAssert(X[r].Len() == M); } 00910 for (int r = 0; r < Y.Len(); r++) { 00911 if (Y[r] >= 0.99999) { Y[r] = 0.99999; } 00912 if (Y[r] <= 0.00001) { Y[r] = 0.00001; } 00913 } 00914 Theta.Gen(M); 00915 MLEGradient(ChangeEps, MaxStep, PlotNm); 00916 return new TLogRegPredict(Theta); 00917 }; 00918 00920 // Logistic-Regression-Model 00921 00922 double TLogRegPredict::GetCfy(const TFltV& AttrV, const TFltV& NewTheta) { 00923 int len = AttrV.Len(); 00924 double res = 0; 00925 if (len < NewTheta.Len()) { res = NewTheta.Last(); } //if feature vector is shorter, add an intercept 00926 for (int i = 0; i < len; i++) { 00927 if (i < NewTheta.Len()) { res += AttrV[i] * NewTheta[i]; } 00928 } 00929 double mu = 1 / (1 + exp(-res)); 00930 return mu; 00931 } 00932 00933 void TLogRegPredict::GetCfy(const TVec<TFltV>& X, TFltV& OutV, const TFltV& NewTheta) { 00934 OutV.Gen(X.Len()); 00935 for (int r = 0; r < X.Len(); r++) { 00936 OutV[r] = GetCfy(X[r], NewTheta); 00937 } 00938 }