1 #ifndef yanglib_agmattr1_h
2 #define yanglib_agmattr1_h
13 if (GraphType) { Edges2 = Edges >= 0 ? Edges : Graph->GetEdges(); }
14 else { Edges2 = Edges >= 0 ? 2 * Edges : Graph->GetEdges(); }
17 for (
int i = 0; i < CmtyS.
Len(); i++) {
18 if (! Graph->IsNode(CmtyS[i])) {
continue; }
19 typename PGraph::TObj::TNodeI NI = Graph->GetNI(CmtyS[i]);
20 for (
int e = 0; e < NI.GetOutDeg(); e++) {
21 if (! CmtyS.
IsKey(NI.GetOutNId(e))) { Cut += 1; }
23 Vol += NI.GetOutDeg();
27 if (2 * Vol > Edges2) { Phi = Cut / double (Edges2 - Vol); }
28 else if (Vol == 0) { Phi = 0.0; }
29 else { Phi = Cut / double(Vol); }
31 if (Vol == Edges2) { Phi = 1.0; }
37 template<
class PGraph>
39 TIntPrV EdgeV(G->GetEdges(), 0);
40 for (
typename PGraph::TObj::TEdgeI EI = G->BegEI(); EI < G->EndEI(); EI++) {
41 EdgeV.
Add(
TIntPr(EI.GetSrcNId(), EI.GetDstNId()));
46 HoldOutSet.
Gen(G->GetNodes());
47 int HOTotal = int(HOFrac * G->GetNodes() * (G->GetNodes() - 1) / 2.0);
48 if (GraphType) { HOTotal *= 2;}
50 int HOEdges = (int)
TMath::Round(HOFrac * G->GetEdges());
51 printf(
"holding out %d edges...\n", HOEdges);
52 for (
int he = 0; he < (int) HOEdges; he++) {
53 HoldOutSet[EdgeV[he].Val1].AddKey(EdgeV[he].Val2);
54 if (! GraphType) { HoldOutSet[EdgeV[he].Val2].AddKey(EdgeV[he].Val1); }
57 printf(
"%d Edges hold out\n", HOCnt);
58 while(HOCnt++ < HOTotal) {
61 if (SrcNID == DstNID) {
continue; }
62 HoldOutSet[SrcNID].AddKey(DstNID);
63 if (! GraphType) { HoldOutSet[DstNID].AddKey(SrcNID); }
67 template<
class PGraph>
69 typename PGraph::TObj::TNodeI NI = Graph->GetNI(NID);
70 NBCmtyS.
Gen(NI.GetDeg());
72 for (
int e = 0; e < NI.GetDeg(); e++) {
73 NBCmtyS.
AddKey(NI.GetNbrNId(e));
76 template<
class PGraph>
78 NIdPhiV.
Gen(G->GetNodes(), 0);
79 const int Edges = G->GetEdges();
82 for (
typename PGraph::TObj::TNodeI NI = G->BegNI(); NI < G->EndNI(); NI++) {
83 TIntSet NBCmty(NI.GetDeg() + 1);
85 if (NI.GetDeg() < 5) {
88 TCesnaUtil::GetNbhCom<PGraph>(G, NI.GetId(), NBCmty);
96 printf(
"conductance computation completed [%s]\n", RunTm.
GetTmStr());
103 printf(
"nodes in the graph:%d\n", NIDV.
Len());
104 for (
int u = 0; u < NIDV.
Len(); u++) { NIDAttrH.
AddDat(NIDV[u]).
Gen(0, 0); }
109 if (NodeNameH.
Len() > 0 && ! NodeNameH.
IsKey(NodeName)) {
continue; }
110 if (NodeNameH.
Len() > 0) {
114 if (! NIDAttrH.
IsKey(NID)) {
128 FILE* F = fopen(FNm.
CStr(),
"wt");
129 for (
int u = 0; u < NIDAttrH.
Len(); u++) {
130 int NID = NIDAttrH.
GetKey(u);
132 for (
int k = 0; k < NIDAttrH[u].
Len(); k++) {
133 int KID = NIDAttrH[u][k];
135 fprintf(F,
"%s\t%s\n", NodeName.
CStr(), FeatName.
CStr());
149 FILE* F = fopen(FNm.
CStr(),
"wt");
150 for (
int u = 0; u < NIDAttrH.
Len(); u++) {
151 int NID = NIDAttrH.
GetKey(u);
153 for (
int k = 0; k < NIDAttrH[u].
Len(); k++) {
154 int KID = NIDAttrH[u][k];
156 fprintf(F,
"%s\t%s\n", NodeName.
CStr(), FeatName.
CStr());
171 for (
int u = 0; u < NIDAttrH.
Len(); u++) {
172 for (
int k = 0; k < NIDAttrH[u].
Len(); k++) {
173 if (NIDAttrH[u][k] >= Attrs) { Attrs = NIDAttrH[u][k] + 1; }
181 for (
int u = 1; u < NIDV.
Len(); u++) {
182 if (! NIDAttrH.
IsKey(NIDV[u])) {
continue; }
183 AttrCnt += NIDAttrH.
GetDat(NIDV[u]).
Len();
186 FILE* F = fopen(FNm.
CStr(),
"wt");
187 fprintf(F,
"%d %d\n", NIDV.
Len() - 1, AttrCnt);
189 for (
int u = 1; u < NIDV.
Len(); u++) {
190 if (NIDAttrH.
IsKey(NIDV[u])) {
191 for (
int k = 0; k < NIDAttrH.
GetDat(NIDV[u]).
Len(); k++) {
192 if (k > 0) { fprintf(F,
" "); }
193 fprintf(F,
"%d", NIDAttrH.
GetDat(NIDV[u])[k].Val + 1);
205 for (
int u = 0; u < OldNIDAttrH.
Len(); u++) {
206 for (
int k = 0; k < OldNIDAttrH[u].
Len(); k++) {
207 KIDCntH.
AddDat(OldNIDAttrH[u][k])++;
213 for (
int c = 0; c < KIDCntH.
Len(); c++) {
214 double Frac = (double) KIDCntH[c].Val / (
double) OldNIDAttrH.
Len();
215 if (KIDCntH[c].Val < MinCnt) {
continue; }
216 if (Frac > MaxFrac || Frac < MinFrac) {
continue; }
217 SelectedK.AddKey(KIDCntH.
GetKey(c));
219 printf(
"%d attributes selected from %d\n", SelectedK.Len(), KIDCntH.
Len());
220 NewNIDAttrH.
Gen(OldNIDAttrH.
Len());
221 for (
int u = 0; u < OldNIDAttrH.
Len(); u++) {
222 int NID = OldNIDAttrH.
GetKey(u);
224 for (
int k = 0; k < OldNIDAttrH[u].
Len(); k++) {
225 if (! SelectedK.IsKey(OldNIDAttrH[u][k])) {
continue; }
226 AttrV.
Add(SelectedK.GetKeyId(OldNIDAttrH[u][k]));
230 if (! OldNameH.
Empty()) {
231 NewNameH.
Gen(SelectedK.Len());
232 for (
int k = 0; k < SelectedK.Len(); k++) {
233 int OldKID = SelectedK.GetKey(k);
234 if (OldNameH.
IsKey(OldKID)) {
238 printf(
"%d attributes names copied\n", NewNameH.
Len());
243 FilterLowEntropy(OldNIDAttrH, NewNIDAttrH, TmpH1, TmpH2, MinFrac, MaxFrac, MinCnt);
324 if (
F[NIdx].IsKey(CID)) {
325 return F[NIdx].GetDat(CID);
333 for (
int k = 0; k <
Attrs; k++) {
342 for (
int k = 0; k <
Attrs; k++) {
361 for (
int u = 0; u <
F.Len(); u++) {
362 if (
HOKIDSV[u].IsKey(K)) {
continue; }
365 for (
int c = 0; c < WK.
Len() - 1; c++) {
373 for (
int k = 0; k <
Attrs; k++) {
374 for (
int u = 0; u <
F.Len(); u++) {
375 if (
HOKIDSV[u].IsKey(k)) {
continue; }
385 for (
int u = 0; u <
F.Len(); u++) {
390 for (
int u = 0; u <
F.Len(); u++) {
402 for (
int h = 0; h < 10 * HoldOutCnt; h++) {
407 HOSetV[UID].AddKey(KID);
410 if (Cnt >= HoldOutCnt) {
break; }
412 printf(
"%d hold out pairs generated for attributes\n", Cnt);
423 for (
int u = 0; u <
F.Len(); u++) {
424 if (
HOKIDSV[u].IsKey(K)) {
continue; }
427 GradV[CI.GetKey()] += (
GetAttr(u, K) - Pred) *
GetCom(u, CI.GetKey());
432 for (
int c = 0; c < GradV.
Len() - 1; c++) {
468 int FindComs(
TIntV& ComsV,
const bool UseBIC =
false,
const double HOFrac = 0.2,
const int NumThreads = 20,
const TStr PlotLFNm =
TStr(),
const double StepAlpha = 0.3,
const double StepBeta = 0.1);
469 int FindComs(
const int NumThreads,
const int MaxComs,
const int MinComs,
const int DivComs,
const TStr OutFNm,
const bool UseBIC =
false,
const double HOFrac = 0.1,
const double StepAlpha = 0.3,
const double StepBeta = 0.3);
471 for (
int u = 0; u <
X.Len(); u++) {
472 if (NodeNameH.
Len() > 0) {
475 printf(
"NID: %d\t Attrs: ",
NIDToIdx[u].Val);
477 for (
int k = 0; k <
X[u].Len(); k++) {
478 printf(
"%d, ",
X[u][k].Val);
481 if (u >= TopK) {
break; }
487 double StepSize = 1.0;
491 for(
int iter = 0; iter < MaxIter; iter++) {
492 for (
int c = 0; c < DeltaV.
Len(); c++){
493 double NewVal =
W[K][c] + StepSize * DeltaV[c];
503 if (iter == MaxIter - 1) {
512 for (
int c = 0; c <
NumComs; c++) {
513 for (
int k = 0; k <
Attrs; k++) {
514 if (
GetW(c, k) > 0.0) { PosCnt++; }
519 int MLEGradAscent(
const double& Thres,
const int& MaxIter,
const TStr PlotNm,
const double StepAlpha = 0.3,
const double StepBeta = 0.1);
520 int MLEGradAscentParallel(
const double& Thres,
const int& MaxIter,
const int ChunkNum,
const int ChunkSize,
const TStr PlotNm,
const double StepAlpha = 0.3,
const double StepBeta = 0.1);
521 int MLEGradAscentParallel(
const double& Thres,
const int& MaxIter,
const int ChunkNum,
const TStr PlotNm =
TStr(),
const double StepAlpha = 0.3,
const double StepBeta = 0.1) {
522 int ChunkSize =
G->
GetNodes() / 10 / ChunkNum;
523 if (ChunkSize == 0) { ChunkSize = 1; }
527 double inline GetCom(
const int& NID,
const int& CID) {
528 if (
F[NID].IsKey(CID)) {
529 return F[NID].GetDat(CID);
534 double inline GetAttr(
const int& NID,
const int& K) {
535 if (
X[NID].IsKey(K)) {
541 void inline AddCom(
const int& NID,
const int& CID,
const double& Val) {
542 if (
F[NID].IsKey(CID)) {
543 SumFV[CID] -=
F[NID].GetDat(CID);
545 F[NID].AddDat(CID) = Val;
549 void inline DelCom(
const int& NID,
const int& CID) {
550 if (
F[NID].IsKey(CID)) {
551 SumFV[CID] -=
F[NID].GetDat(CID);
566 if (UV.
Len() > VV.
Len()) {
568 if (VV.
IsKey(HI.GetKey())) {
569 DP += VV.
GetDat(HI.GetKey()) * HI.GetDat();
574 if (UV.
IsKey(HI.GetKey())) {
575 DP += UV.
GetDat(HI.GetKey()) * HI.GetDat();
592 DP += FI.GetDat() * WK[FI.GetKey()];
603 double inline GetW(
const int CID,
const int K) {
619 N += HI.GetDat() * HI.GetDat();
633 return 1.0 / ( 1.0 + exp(-X));
double Sigmoid(const double X)
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
void GenHoldOutAttr(const double HOFrac, TVec< TIntSet > &HOSetV)
TPair< TInt, TInt > TIntPr
double LikelihoodForWK(const int K)
#define IAssertR(Cond, Reason)
static void GetNIdPhiV(const PGraph &G, TFltIntPrV &NIdPhiV)
TPair< TFlt, TInt > TFltIntPr
void SetWeightAttr(const double _WeightAttr)
double PredictAttrK(const int UID, const int K)
double Sum(const TIntFltH &UV)
double GetCom(const int &NID, const int &CID)
static void DumpNIDAttrHToMetis(const TStr &FNm, const THash< TInt, TIntV > &NIDAttrH, const TIntV &NIDV)
int MLEGradAscent(const double &Thres, const int &MaxIter, const TStr PlotNm, const double StepAlpha=0.3, const double StepBeta=0.1)
void Save(TSOut &SOut) const
bool IsIn(const TVal &Val) const
Checks whether element Val is a member of the vector.
double Prediction(const TIntFltH &FU, const TIntFltH &FV)
int GetKeyId(const TKey &Key) const
double DotProduct(const TIntFltH &UV, const TIntFltH &VV)
void SetRegCoef(const double _RegCoef)
static void GetNbhCom(const PGraph &Graph, const int NID, TIntSet &NBCmtyS)
int GetEdges() const
Returns the number of edges in the graph.
TSizeTy Len() const
Returns the number of elements in the vector.
void Gen(const int &ExpectVals)
double LikelihoodForWK(const int K, const TFltV &WK)
double GetAttr(const int &NID, const int &K)
bool GetInt(const int &FldN, int &Val) const
If the field FldN is an integer its value is returned in Val and the function returns true...
const TDat & GetDat(const TKey &Key) const
void Load(TSIn &SIn, const int &RndSeed=0)
void Save(TSOut &SOut) const
Saves the graph to a (binary) stream SOut.
void GetW(TVec< TFltV > &_W)
double GetStepSizeByLineSearch(const int UID, const TIntFltH &DeltaV, const TIntFltH &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
double PredictAttrK(const TIntFltH &FU, const TFltV &WK)
bool IsKey(const TKey &Key) const
double Likelihood(const bool DoParallel=false)
int GetNodes() const
Returns the number of nodes in the graph.
const char * GetFld(const int &FldN) const
Returns the contents of the field at index FldN.
double LikelihoodAttrKForRow(const int UID, const int K)
static void FilterLowEntropy(const THash< TInt, TIntV > &OldNIDAttrH, THash< TInt, TIntV > &NewNIDAttrH, const TIntStrH &OldNameH, TIntStrH &NewNameH, const double MinFrac=0.00001, const double MaxFrac=0.95, const int MinCnt=3)
static void LoadNIDAttrHFromNIDKH(const TIntV &NIDV, const TStr &InFNm, THash< TInt, TIntV > &NIDAttrH)
void Save(TSOut &SOut) const
TSsFmt
Spread-Sheet Separator Format.
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntSet > &NIDAttrH, const TStrHash< TInt > &FeatNameH, const TStrHash< TInt > &NodeNameH)
void DisplayAttrs(const int TopK, const TStrHash< TInt > &NodeNameH)
const char * GetTmStr() const
void SetW(TVec< TFltV > &_W)
void Gen(const int &ExpectVals)
bool IsKey(const char *Key) const
#define HasGraphFlag(TGraph, Flag)
For quick testing of the properties of the graph/network object (see TGraphFlag). ...
double GetComFromNID(const int &NID, const int &CID)
void RandomInit(const int InitComs)
void SetAttrHoldOut(const int NID, const int KID)
double LikelihoodHoldOut()
void GetCmtyVV(TVec< TIntV > &CmtyVV, TVec< TFltV > &Wck)
const char * GetKey(const int &KeyId) const
static double Round(const double &Val)
double Norm2(const TIntFltH &UV)
int MLEGradAscentParallel(const double &Thres, const int &MaxIter, const int ChunkNum, const TStr PlotNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
static PUNGraph New()
Static constructor that returns a pointer to the graph. Call: PUNGraph Graph = TUNGraph::New().
int AddKey(const TKey &Key)
const TVal & Last() const
Returns a reference to the last element of the vector.
void AddCom(const int &NID, const int &CID, const double &Val)
void GetCmtyVVUnSorted(TVec< TIntV > &CmtyVV)
double PredictAttrK(const TIntFltH &FU, const int K)
static void GenHoldOutPairs(const PGraph &G, TVec< TIntSet > &HoldOutSet, double HOFrac, TRnd &Rnd)
TCesna(const PUNGraph &GraphPt, const THash< TInt, TIntV > &NIDAttrH, const int &InitComs, const int RndSeed=0)
void GetCmtyVV(TVec< TIntV > &CmtyVV)
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntV > &NIDAttrH)
void SetCmtyVV(const TVec< TIntV > &CmtyVV)
static PUNGraph Load(TSIn &SIn)
Static constructor that loads the graph from a stream SIn and returns a pointer to it...
directed graph (TNGraph, TNEGraph), else graph is undirected TUNGraph
static void LoadNIDAttrHFromNIDKH(const TIntV &NIDV, const TStr &InFNm, THash< TInt, TIntV > &NIDAttrH, const TStrHash< TInt > &NodeNameH, const TSsFmt Sep=ssfTabSep)
void NeighborComInit(const int InitComs)
double DotProduct(const int &UID, const int &VID)
static double GetConductance(const PGraph &Graph, const TIntSet &CmtyS, const int Edges)
double GetStepSizeByLineSearchForWK(const int K, const TFltV &DeltaV, const TFltV &GradV, const double &Alpha, const double &Beta, const int MaxIter=10)
double LikelihoodForRow(const int UID)
static TStr Fmt(const char *FmtStr,...)
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntV > &NIDAttrH, const TStrHash< TInt > &FeatNameH)
uint64 GetLineNo() const
Returns the line number of the current line.
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntV > &NIDAttrH, const TStrHash< TInt > &FeatNameH, const TStrHash< TInt > &NodeNameH)
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntSet > &NIDAttrH, const TStrHash< TInt > &FeatNameH)
static int Sign(const T &Val)
void GradientForRow(const int UID, TIntFltH &GradU, const TIntSet &CIDSet)
void SetLassoCoef(const double _LassoCoef)
void SetGraph(const PUNGraph &GraphPt, const THash< TInt, TIntV > &NIDAttrH)
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
bool Next()
Loads next line from the input file.
void GradientForWK(TFltV &GradV, const int K)
static double DotProduct(const TFltV &x, const TFltV &y)
void Save(TSOut &SOut) const
void GetCmtyVV(TVec< TIntV > &CmtyVV, const double Thres, const int MinSz=3)
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
double Prediction(const int &UID, const int &VID)
int GetUniDevInt(const int &Range=0)
static int GetAttrs(const THash< TInt, TIntV > &NIDAttrH)
int FindComs(TIntV &ComsV, const bool UseBIC=false, const double HOFrac=0.2, const int NumThreads=20, const TStr PlotLFNm=TStr(), const double StepAlpha=0.3, const double StepBeta=0.1)
double LikelihoodAttrKForRow(const int UID, const int K, const TIntFltH &FU)
bool IsKey(const TKey &Key) const
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
void SetHoldOut(const double HOFrac)
TDat & AddDat(const TKey &Key)
void DelCom(const int &NID, const int &CID)
void SetAttrHoldOutForOneNode(const int NID)
double GetW(const int CID, const int K)
static void DumpNIDAttrHToNIDK(const TStr &FNm, const THash< TInt, TIntSet > &NIDAttrH)
void Save(TSOut &SOut) const
const TKey & GetKey(const int &KeyId) const
static void FilterLowEntropy(const THash< TInt, TIntV > &OldNIDAttrH, THash< TInt, TIntV > &NewNIDAttrH, const double MinFrac=0.00001, const double MaxFrac=0.95, const int MinCnt=3)
int GetKeyId(const char *Key) const
Vector is a sequence TVal objects representing an array that can change in size.
bool IsKeyId(const int &KeyId) const
void SortByDat(const bool &Asc=true)