SNAP Library 6.0, Developer Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
cascdynetinf.h
Go to the documentation of this file.
1 #ifndef snap_cascdynetinf_h
2 #define snap_cascdynetinf_h
3 
4 #include "Snap.h"
5 
6 // pairwise transmission models
7 typedef enum {
8  EXP, // exponential
9  POW, // powerlaw
10  RAY, // rayleigh
11  WEI // weibull
12 } TModel;
13 
14 // tx rates trends over time for synthetic experiments
15 typedef enum {
16  CONSTANT, // constant
17  LINEAR, // linear trend up/down
18  EXPONENTIAL, // exponential trend
19  RAYLEIGH, // rayleigh trend
20  SLAB, // slab
21  SQUARE, // square
22  CHAINSAW, // chainsaw
23  RANDOM // random noise around alpha value
24 } TVarying;
25 
26 // optimization methods
27 typedef enum {
28  OSG, // stochastic gradient
29  OWSG, // windowed stochastic gradient
30  OESG, // exponential decay stochastic gradient
31  OWESG, // windowed exponential decay stochastic gradient
32  ORSG, // rayleigh decay stochastic gradient
33  OBSG, // no decay batch stochastic gradient
34  OWBSG, // windowed batch stochastic gradient
35  OEBSG, // exponential decay batch stochastic gradient
36  ORBSG, // rayleigh decay batch stochastic gradient
38 } TOptMethod;
39 
40 typedef enum {
46 } TSampling;
47 
48 // l2 regularizer on/off
49 typedef enum {
50  NONE, // no regularizer
51  L2REG // L2 regularizer
52 } TRegularizer;
53 
54 typedef enum {
55  TIME_STEP, // run inference every time step
56  INFECTION_STEP, // run inference every # number of infections
57  CASCADE_STEP, // run inference every time a cascade "finishes"
59 } TRunningMode;
60 
63 
66 
67 // Hit info (node id, timestamp) about a node in a cascade
68 class THitInfo {
69 public:
73 public:
74  THitInfo(const int& NodeId=-1, const double& HitTime=0) : NId(NodeId), Tm(HitTime) { }
75  THitInfo(TSIn& SIn) : NId(SIn), Tm(SIn), Keywords(SIn) { }
76  void AddKeyword(const int& KId) { Keywords.AddUnique(KId); }
77  void DelKeywords() { Keywords.Clr(); }
78  void Save(TSOut& SOut) const { NId.Save(SOut); Tm.Save(SOut); Keywords.Save(SOut); }
79  bool operator < (const THitInfo& Hit) const {
80  return Tm < Hit.Tm; }
81 };
82 
83 // Cascade
84 class TCascade {
85 public:
86  TInt CId; // cascade id
87  THash<TInt, THitInfo> NIdHitH; // infected nodes
88  TInt Model; // pairwise transmission model
89 public:
90  TCascade() : CId(0), NIdHitH(), Model(0) { }
91  TCascade(const int &model) : NIdHitH() { Model = model; }
92  TCascade(const int &cid, const int& model) : NIdHitH() { CId = cid; Model = model; }
93  TCascade(TSIn& SIn) : CId(SIn), NIdHitH(SIn), Model(SIn) { }
94  void Save(TSOut& SOut) const { CId.Save(SOut); NIdHitH.Save(SOut); Model.Save(SOut); }
95  void Clr() { NIdHitH.Clr(); }
96  int GetId() { return CId; }
97  int Len() const { return NIdHitH.Len(); }
98  int LenBeforeT(const double& T) { int len = 0; while (len < NIdHitH.Len() && NIdHitH[len].Tm <= T) { len++; } return len; }
99  int LenAfterT(const double& T) { int len = 0; while (len < NIdHitH.Len() && NIdHitH[NIdHitH.Len()-1-len].Tm >= T) { len++; } return len; }
100  int GetNode(const int& i) const { return NIdHitH.GetKey(i); }
101  THash<TInt, THitInfo>::TIter BegI() const { return NIdHitH.BegI(); }
102  THash<TInt, THitInfo>::TIter EndI() const { return NIdHitH.EndI(); }
103  int GetModel() const { return Model; }
104  double GetTm(const int& NId) const { return NIdHitH.GetDat(NId).Tm; }
105  double GetMaxTm() const { return NIdHitH[NIdHitH.Len()-1].Tm; } // we assume the cascade is sorted
106  double GetMinTm() const { return NIdHitH[0].Tm; } // we assume the cascade is sorted
107  void Add(const int& NId, const double& HitTm) { NIdHitH.AddDat(NId, THitInfo(NId, HitTm)); }
108  void Del(const int& NId) { NIdHitH.DelKey(NId); }
109  bool IsNode(const int& NId) const { return NIdHitH.IsKey(NId); }
110  void Sort() { NIdHitH.SortByDat(true); }
111  bool operator < (const TCascade& Cascade) const {
112  return Len() < Cascade.Len(); }
113 };
114 
115 // Node info (name and number of cascades)
116 class TNodeInfo {
117 public:
120 public:
121  TNodeInfo() { }
122  TNodeInfo(const TStr& NodeNm, const int& Volume) : Name(NodeNm), Vol(Volume) { }
123  TNodeInfo(TSIn& SIn) : Name(SIn), Vol(SIn) { }
124  void Save(TSOut& SOut) const { Name.Save(SOut); Vol.Save(SOut); }
125  bool operator < (const TNodeInfo& NodeInfo) const {
126  return Vol < NodeInfo.Vol; }
127 };
128 
129 // Stochastic gradient network inference class
130 class TNIBs {
131 public:
132  THash<TInt, TCascade> CascH; // cascades, indexed by id
133  THash<TInt, TNodeInfo> NodeNmH; // node info (name, volume), indexed by node id
134  TStrIntH DomainsIdH; // domain, DomainId hash table
135  TStrIntH CascadeIdH; // quote, CascadeId hash table, QuoteId is equivalent to cascadeId
136 
137  // cascades per edge
139 
140  // network
142 
143  // pairwise transmission model
145 
146  // time horizon per cascade (if it is fixed), and totaltime
148 
149  // delta for power-law and k for weibull
151 
152  // step (gamma), regularizer (mu), tolerance, and min/max alpha for stochastic gradient descend
156 
157  // inferred network
160 
161  // gradients (per alpha & cascade)
164 
165  // sampled cascades
167 
168  // performance measures
171 
172 public:
173  TNIBs( ) { }
174  TNIBs(TSIn& SIn) : CascH(SIn), NodeNmH(SIn), CascPerEdge(SIn), InferredNetwork(SIn) { Model = EXP; }
175  void Save(TSOut& SOut) const { CascH.Save(SOut); NodeNmH.Save(SOut); CascPerEdge.Save(SOut); InferredNetwork.Save(SOut); }
176 
177  // functions to load text cascades & network files
178  void LoadCascadesTxt(TSIn& SIn);
179  void LoadGroundTruthTxt(TSIn& SIn);
180  void LoadGroundTruthNodesTxt(TSIn& SIn);
181  void LoadInferredTxt(TSIn& SIn);
182  void LoadInferredNodesTxt(TSIn& SIn);
183 
184  // maximum time for synthetic generation, tx model & window per cascade (if any)
185  void SetTotalTime(const float& tt) { TotalTime = tt; }
186  void SetModel(const TModel& model) { Model = model; }
187  void SetWindow(const double& window) { Window = window; }
188 
189  // delta for power law & k for weibull
190  void SetDelta(const double& delta) { Delta = delta; }
191  void SetK(const double& k) { K = k; }
192 
193  // optimization parameters
194  void SetGamma(const double& gamma) { Gamma = gamma; }
195  void SetAging(const double& aging) { Aging = aging; }
196  void SetRegularizer(const TRegularizer& reg) { Regularizer = reg; }
197  void SetMu(const double& mu) { Mu = mu; }
198  void SetTolerance(const double& tol) { Tol = tol; }
199  void SetMaxAlpha(const double& ma) { MaxAlpha = ma; }
200  void SetMinAlpha(const double& ma) { MinAlpha = ma; }
201  void SetInitAlpha(const double& ia) { InitAlpha = ia; }
202 
203  // processing cascades
204  void AddCasc(const TStr& CascStr, const TModel& Model=EXP);
205  void AddCasc(const TCascade& Cascade) { CascH.AddDat(Cascade.CId) = Cascade; }
206  void AddCasc(const TIntFltH& Cascade, const int& CId=-1, const TModel& Model=EXP);
207  void GenCascade(TCascade& C);
208  bool IsCascade(int c) { return CascH.IsKey(c); }
209  TCascade & GetCasc(int c) { return CascH.GetDat(c); }
210  int GetCascs() { return CascH.Len(); }
211  int GetCascadeId(const TStr& Cascade) { return CascadeIdH.GetDat(Cascade); }
212 
213  // node info
214  int GetNodes() { return InferredNetwork.GetNodes(); }
215  void AddNodeNm(const int& NId, const TNodeInfo& Info) { NodeNmH.AddDat(NId, Info); }
216  TStr GetNodeNm(const int& NId) const { return NodeNmH.GetDat(NId).Name; }
217  TNodeInfo GetNodeInfo(const int& NId) const { return NodeNmH.GetDat(NId); }
218  bool IsNodeNm(const int& NId) const { return NodeNmH.IsKey(NId); }
219  void SortNodeNmByVol(const bool& asc=false) { NodeNmH.SortByDat(asc); }
220 
221  // domains
222  void AddDomainNm(const TStr& Domain, const int& DomainId=-1) { DomainsIdH.AddDat(Domain) = TInt(DomainId==-1? DomainsIdH.Len() : DomainId); }
223  bool IsDomainNm(const TStr& Domain) const { return DomainsIdH.IsKey(Domain); }
224  int GetDomainId(const TStr& Domain) { return DomainsIdH.GetDat(Domain); }
225 
226  // get network or graph at a given time
227  void GetGroundTruthGraphAtT(const double& Step, PNGraph &GraphAtT);
228  void GetGroundTruthNetworkAtT(const double& Step, PStrFltNEDNet& NetworkAtT);
229  void GetInferredGraphAtT(const double& Step, PNGraph &GraphAtT);
230  void GetInferredNetworkAtT(const double& Step, PStrFltNEDNet& NetworkAtT);
231 
232  // reset/init for optimization
233  void Reset();
234  void Init(const TFltV& Steps);
235 
236  // optimization methods
237  void SG(const int& NId, const int& Iters, const TFltV& Steps, const TSampling& Sampling, const TStr& ParamSampling=TStr(""), const bool& PlotPerformance=false);
238  void BSG(const int& NId, const int& Iters, const TFltV& Steps, const int& BatchLen, const TSampling& Sampling, const TStr& ParamSampling=TStr(""), const bool& PlotPerformance=false);
239  void FG(const int& NId, const int& Iters, const TFltV& Steps);
240 
241  // auxiliary function for optimization
242  void UpdateDiff(const TOptMethod& OptMethod, const int& NId, TCascade& Cascade, TIntPrV& AlphasToUpdate, const double& CurrentTime=TFlt::Mx);
243 
244  // functions to compute burstiness
245  void find_C( int t, TFltV &x, TFltVV &C, const int& k, const double& s, const double& gamma, const double& T );
246  void find_min_state( TFltVV &C, TIntV &states, const int& k, const double& s, const double& gamma, const double& T );
247  void LabelBurstAutomaton(const int& SrcId, const int& DstId, TIntV &state_labels, TFltV &state_times, const bool& inferred=false, const int& k = 5, const double& s = 2.0, const double& gamma = 1.0, const TSecTm& MinTime=TSecTm(), const TSecTm& MaxTime=TSecTm() );
248 
249  // function to compute performance for a particular time step and node given groundtruth + inferred network
250  void ComputePerformanceNId(const int& NId, const int& Step, const TFltV& Steps);
251 
252  // storing ground truth and inferred network in pajek and text format
253  void SaveInferredPajek(const TStr& OutFNm, const double& Step, const TIntV& NIdV=TIntV());
254  void SaveInferred(const TStr& OutFNm, const TIntV& NIdV=TIntV());
255  void SaveInferred(const TStr& OutFNm, const double& Step, const TIntV& NIdV=TIntV());
256  void SaveInferredEdges(const TStr& OutFNm);
257 
258  // store network
259  void SaveGroundTruthPajek(const TStr& OutFNm, const double& Step);
260  void SaveGroundTruth(const TStr& OutFNm);
261 
262  // storing NodeId, site name
263  void SaveSites(const TStr& OutFNm, const TIntFltVH& CascadesPerNode=TIntFltVH());
264 
265  // storing cascades in text format
266  void SaveCascades(const TStr& OutFNm);
267 };
268 
269 #endif
TSizeTy AddUnique(const TVal &Val)
Adds element Val to a vector only if the element Val is not already in the vector.
Definition: ds.h:1162
TIntV Keywords
Definition: cascdynetinf.h:72
int GetCascs()
Definition: cascdynetinf.h:210
TNIBs(TSIn &SIn)
Definition: cascdynetinf.h:174
void BSG(const int &NId, const int &Iters, const TFltV &Steps, const int &BatchLen, const TSampling &Sampling, const TStr &ParamSampling=TStr(""), const bool &PlotPerformance=false)
void SaveGroundTruth(const TStr &OutFNm)
void Save(TSOut &SOut) const
Definition: cascdynetinf.h:94
void SaveInferred(const TStr &OutFNm, const TIntV &NIdV=TIntV())
void SortNodeNmByVol(const bool &asc=false)
Definition: cascdynetinf.h:219
void AddCasc(const TCascade &Cascade)
Definition: cascdynetinf.h:205
TNodeInfo GetNodeInfo(const int &NId) const
Definition: cascdynetinf.h:217
THitInfo(const int &NodeId=-1, const double &HitTime=0)
Definition: cascdynetinf.h:74
void SetK(const double &k)
Definition: cascdynetinf.h:191
bool operator<(const TCascade &Cascade) const
Definition: cascdynetinf.h:111
TIntIntPrH SampledCascadesH
Definition: cascdynetinf.h:166
virtual void Save(TSOut &SOut) const
Saves the network to a (binary) stream SOut.
Definition: network.h:659
void SaveGroundTruthPajek(const TStr &OutFNm, const double &Step)
void SetInitAlpha(const double &ia)
Definition: cascdynetinf.h:201
int GetId()
Definition: cascdynetinf.h:96
TInt NId
Definition: cascdynetinf.h:70
TFlt Mu
Definition: cascdynetinf.h:153
TCascade(TSIn &SIn)
Definition: cascdynetinf.h:93
TCascade(const int &model)
Definition: cascdynetinf.h:91
void DelKeywords()
Definition: cascdynetinf.h:77
TFlt TotalTime
Definition: cascdynetinf.h:147
void Save(TSOut &SOut) const
Definition: dt.h:1153
void Init(const TFltV &Steps)
THash< TInt, TCascade > CascH
Definition: cascdynetinf.h:132
THash< TInt, THitInfo >::TIter EndI() const
Definition: cascdynetinf.h:102
int Len() const
Definition: cascdynetinf.h:97
TStrFltFltHNEDNet Network
Definition: cascdynetinf.h:141
void SetMu(const double &mu)
Definition: cascdynetinf.h:197
TStrFltFltHNEDNet InferredNetwork
Definition: cascdynetinf.h:158
TCascade & GetCasc(int c)
Definition: cascdynetinf.h:209
TFlt MaxAlpha
Definition: cascdynetinf.h:155
void AddDomainNm(const TStr &Domain, const int &DomainId=-1)
Definition: cascdynetinf.h:222
void LoadInferredNodesTxt(TSIn &SIn)
TIter BegI() const
Definition: hash.h:213
void LoadCascadesTxt(TSIn &SIn)
Definition: cascdynetinf.cpp:4
int GetNodes()
Definition: cascdynetinf.h:214
TRunningMode
Definition: cascdynetinf.h:54
THash< TInt, TNodeInfo > NodeNmH
Definition: cascdynetinf.h:133
bool IsDomainNm(const TStr &Domain) const
Definition: cascdynetinf.h:223
void Save(TSOut &SOut) const
Definition: hash.h:183
TModel
Definition: cascdynetinf.h:7
TNodeInfo(const TStr &NodeNm, const int &Volume)
Definition: cascdynetinf.h:122
void AddCasc(const TStr &CascStr, const TModel &Model=EXP)
TFlt Window
Definition: cascdynetinf.h:147
TNodeInfo(TSIn &SIn)
Definition: cascdynetinf.h:123
void LoadGroundTruthTxt(TSIn &SIn)
TPt< TStrFltFltHNEDNet > PStrFltFltHNEDNet
Definition: cascdynetinf.h:62
void AddKeyword(const int &KId)
Definition: cascdynetinf.h:76
THash< TInt, THitInfo > NIdHitH
Definition: cascdynetinf.h:87
void SetRegularizer(const TRegularizer &reg)
Definition: cascdynetinf.h:196
void SetMinAlpha(const double &ma)
Definition: cascdynetinf.h:200
const TDat & GetDat(const TKey &Key) const
Definition: hash.h:262
TIter EndI() const
Definition: hash.h:218
int LenAfterT(const double &T)
Definition: cascdynetinf.h:99
void SetDelta(const double &delta)
Definition: cascdynetinf.h:190
void GetInferredGraphAtT(const double &Step, PNGraph &GraphAtT)
TFlt K
Definition: cascdynetinf.h:150
bool operator<(const THitInfo &Hit) const
Definition: cascdynetinf.h:79
static const double Mx
Definition: dt.h:1391
TIntFltH AveDiffAlphas
Definition: cascdynetinf.h:162
void SaveInferredPajek(const TStr &OutFNm, const double &Step, const TIntV &NIdV=TIntV())
double GetMinTm() const
Definition: cascdynetinf.h:106
Definition: dt.h:1386
TFlt Delta
Definition: cascdynetinf.h:150
TNodeEDatNet< TStr, TFltFltH > TStrFltFltHNEDNet
Definition: cascdynetinf.h:61
Definition: fl.h:58
void Save(TSOut &SOut) const
Definition: ds.h:954
TCascade(const int &cid, const int &model)
Definition: cascdynetinf.h:92
TInt Model
Definition: cascdynetinf.h:88
TFltPrV PrecisionRecall
Definition: cascdynetinf.h:169
double GetTm(const int &NId) const
Definition: cascdynetinf.h:104
TFlt MinAlpha
Definition: cascdynetinf.h:155
THitInfo(TSIn &SIn)
Definition: cascdynetinf.h:75
void DelKey(const TKey &Key)
Definition: hash.h:404
TFlt Gamma
Definition: cascdynetinf.h:153
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
THash< TInt, TFltV > TIntFltVH
Definition: hash.h:618
TOptMethod
Definition: cascdynetinf.h:27
void Save(TSOut &SOut, const bool &IsSmall=false) const
Definition: dt.h:440
TFltPrV MSE
Definition: cascdynetinf.h:170
void SG(const int &NId, const int &Iters, const TFltV &Steps, const TSampling &Sampling, const TStr &ParamSampling=TStr(""), const bool &PlotPerformance=false)
TInt CId
Definition: cascdynetinf.h:86
void Del(const int &NId)
Definition: cascdynetinf.h:108
void LoadInferredTxt(TSIn &SIn)
void SetWindow(const double &window)
Definition: cascdynetinf.h:187
void SetAging(const double &aging)
Definition: cascdynetinf.h:195
void GenCascade(TCascade &C)
void Save(TSOut &SOut) const
Definition: cascdynetinf.h:175
void Save(TSOut &SOut) const
Definition: cascdynetinf.h:78
TPt< TStrFltNEDNet > PStrFltNEDNet
Definition: cascdynetinf.h:65
bool operator<(const TNodeInfo &NodeInfo) const
Definition: cascdynetinf.h:125
void ComputePerformanceNId(const int &NId, const int &Step, const TFltV &Steps)
void FG(const int &NId, const int &Iters, const TFltV &Steps)
TStrIntH DomainsIdH
Definition: cascdynetinf.h:134
TFlt InitAlpha
Definition: cascdynetinf.h:155
void SaveSites(const TStr &OutFNm, const TIntFltVH &CascadesPerNode=TIntFltVH())
Definition: fl.h:128
TFltPrV Accuracy
Definition: cascdynetinf.h:170
void SaveInferredEdges(const TStr &OutFNm)
void GetInferredNetworkAtT(const double &Step, PStrFltNEDNet &NetworkAtT)
bool IsNodeNm(const int &NId) const
Definition: cascdynetinf.h:218
void Clr()
Definition: cascdynetinf.h:95
void GetGroundTruthNetworkAtT(const double &Step, PStrFltNEDNet &NetworkAtT)
void Reset()
int GetModel() const
Definition: cascdynetinf.h:103
Definition: dt.h:1137
int GetCascadeId(const TStr &Cascade)
Definition: cascdynetinf.h:211
void SetTolerance(const double &tol)
Definition: cascdynetinf.h:198
TStrIntH CascadeIdH
Definition: cascdynetinf.h:135
TNodeEDatNet< TStr, TFlt > TStrFltNEDNet
Definition: cascdynetinf.h:64
Definition: tm.h:81
TFlt Aging
Definition: cascdynetinf.h:153
void UpdateDiff(const TOptMethod &OptMethod, const int &NId, TCascade &Cascade, TIntPrV &AlphasToUpdate, const double &CurrentTime=TFlt::Mx)
void find_C(int t, TFltV &x, TFltVV &C, const int &k, const double &s, const double &gamma, const double &T)
THash< TInt, THitInfo >::TIter BegI() const
Definition: cascdynetinf.h:101
double GetMaxTm() const
Definition: cascdynetinf.h:105
Definition: dt.h:412
void LoadGroundTruthNodesTxt(TSIn &SIn)
TFltPrV MAE
Definition: cascdynetinf.h:170
void Add(const int &NId, const double &HitTm)
Definition: cascdynetinf.h:107
bool IsCascade(int c)
Definition: cascdynetinf.h:208
TRegularizer Regularizer
Definition: cascdynetinf.h:154
void LabelBurstAutomaton(const int &SrcId, const int &DstId, TIntV &state_labels, TFltV &state_times, const bool &inferred=false, const int &k=5, const double &s=2.0, const double &gamma=1.0, const TSecTm &MinTime=TSecTm(), const TSecTm &MaxTime=TSecTm())
void SetMaxAlpha(const double &ma)
Definition: cascdynetinf.h:199
void SetModel(const TModel &model)
Definition: cascdynetinf.h:186
TVec< TInt > TIntV
Definition: ds.h:1594
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
Definition: hash.h:361
Definition: bd.h:196
int LenBeforeT(const double &T)
Definition: cascdynetinf.h:98
void SetTotalTime(const float &tt)
Definition: cascdynetinf.h:185
void SaveCascades(const TStr &OutFNm)
TSampling
Definition: cascdynetinf.h:40
int GetNode(const int &i) const
Definition: cascdynetinf.h:100
void Save(TSOut &SOut) const
Definition: cascdynetinf.h:124
THash< TIntPr, TIntV > CascPerEdge
Definition: cascdynetinf.h:138
bool IsKey(const TKey &Key) const
Definition: hash.h:258
TStr GetNodeNm(const int &NId) const
Definition: cascdynetinf.h:216
int Len() const
Definition: hash.h:228
int GetDomainId(const TStr &Domain)
Definition: cascdynetinf.h:224
TVarying
Definition: cascdynetinf.h:15
void Sort()
Definition: cascdynetinf.h:110
TDat & AddDat(const TKey &Key)
Definition: hash.h:238
void SetGamma(const double &gamma)
Definition: cascdynetinf.h:194
void AddNodeNm(const int &NId, const TNodeInfo &Info)
Definition: cascdynetinf.h:215
THash< TInt, TIntFltH > DiffAlphas
Definition: cascdynetinf.h:163
void Save(TSOut &SOut) const
Definition: dt.h:1402
const TKey & GetKey(const int &KeyId) const
Definition: hash.h:252
TRegularizer
Definition: cascdynetinf.h:49
void GetGroundTruthGraphAtT(const double &Step, PNGraph &GraphAtT)
bool IsNode(const int &NId) const
Definition: cascdynetinf.h:109
void find_min_state(TFltVV &C, TIntV &states, const int &k, const double &s, const double &gamma, const double &T)
TModel Model
Definition: cascdynetinf.h:144
TFlt Tol
Definition: cascdynetinf.h:155
int GetNodes() const
Returns the number of nodes in the network.
Definition: network.h:680
TIntFltH TotalCascadesAlpha
Definition: cascdynetinf.h:159
void SortByDat(const bool &Asc=true)
Definition: hash.h:292