SNAP Library, Developer Reference
2012-10-15 15:06:59
SNAP, a general purpose network analysis and graph mining library
|
00001 00002 // Mathematics-Utilities 00003 class TMath{ 00004 public: 00005 static double E; 00006 static double Pi; 00007 static double LogOf2; 00008 00009 static double Inv(const double& x){IAssert(x!=0.0); return (1.0/x);} 00010 static double Sqr(const double& x){return x*x;} 00011 static double Sqrt(const double& x){IAssert(!(x<0.0)); return sqrt(x);} 00012 static double Log(const double& Val){return log(Val);} 00013 static double Log2(const double& Val){return log(Val)/LogOf2;} 00014 static double Round(const double& Val){ 00015 return (Val>0)?floor(Val+0.5):ceil(Val-0.5);} 00016 static double Round(const double & Val, int Decs){ 00017 const double pwr=pow(10.0, Decs); return Round(Val * pwr) / pwr;} 00018 static int Fac(const int& Val){ 00019 if (Val<=1){return 1;} else {return Val*Fac(Val-1);}} 00020 static int Choose(const int& N, const int& K){ // binomial coefficient 00021 return Fac(N)/(Fac(K)*Fac(N-K)); } 00022 static uint Pow2(const int& pow){return uint(1u<<pow);} 00023 static double Power(const double& Base, const double& Exponent){ 00024 return exp(log(Base)*Exponent);} 00025 00026 template <typename T> 00027 static int Sign(const T& Val){return Val<0?-1:(Val>0?1:0);} 00028 00029 template <class T> 00030 static const T& Mx(const T& LVal, const T& RVal) { 00031 return LVal > RVal ? LVal : RVal;} 00032 00033 template <class T> 00034 static const T& Mn(const T& LVal, const T& RVal){ 00035 return LVal < RVal ? LVal : RVal;} 00036 00037 template <class T> 00038 static const T& Mx(const T& Val1, const T& Val2, const T& Val3) { 00039 if (Val1 > Val2) { 00040 if (Val1 > Val3) return Val1; 00041 else return Val3; 00042 } else { 00043 if (Val2 > Val3) return Val2; 00044 else return Val3; 00045 } 00046 } 00047 00048 template <class T> 00049 static const T& Mn(const T& Val1, const T& Val2, const T& Val3) { 00050 if(Val1 < Val2) { 00051 if (Val1 < Val3) return Val1; 00052 else return Val3; 00053 } else { 00054 if (Val2 < Val3) return Val2; 00055 else return Val3; 00056 } 00057 } 00058 00059 template <class T> 00060 static const T& Median(const T& Val1, const T& Val2, const T& Val3) { 00061 if (Val1 < Val2) { 00062 if (Val2 < Val3) return Val2; 00063 else if (Val3 < Val1) return Val1; 00064 else return Val3; 00065 } else { 00066 if (Val1 < Val3) return Val1; 00067 else if (Val3 < Val2) return Val2; 00068 else return Val3; 00069 } 00070 } 00071 00072 template <class T> 00073 static const T& InRange(const T& Val, const T& Mn, const T& Mx) { 00074 IAssert(Mn <= Mx); return Val < Mn ? Mn : (Val > Mx ? Mx : Val);} 00075 00076 template <class T> 00077 static bool IsInRange(const T& Val, const T& Mn, const T& Mx) { 00078 IAssert(Mn <= Mx); return Val >= Mn && Val <= Mx;} 00079 00080 template <class T> 00081 static bool IsInEps(const T& Val, const T& Eps) { 00082 return Val >= -Eps && Val <= Eps;} 00083 }; 00084 00086 // Special-Functions 00087 class TSpecFunc{ 00088 public: 00089 static void GammaPSeries/*gser*/( 00090 double& gamser, const double& a, const double& x, double& gln); 00091 static void GammaQContFrac/*gcf*/( 00092 double& gammcf, const double& a, const double& x, double& gln); 00093 static double GammaQ/*gammq*/(const double& a, const double& x); 00094 static double LnGamma/*gammln*/(const double& xx); 00095 static double BetaCf/*betacf*/( 00096 const double& a, const double& b, const double& x); 00097 static double BetaI(const double& a, const double& b, const double& x); 00098 00099 static void LinearFit( // Y = A + B*X 00100 const TVec<TFltPr>& XY, double& A, double& B, 00101 double& SigA, double& SigB, double& Chi2, double& R2); 00102 static void PowerFit( // Y = A * X^B 00103 const TVec<TFltPr>& XY, double& A, double& B, 00104 double& SigA, double& SigB, double& Chi2, double& R2); 00105 static void LogFit( // Y = A + B*log(X) 00106 const TVec<TFltPr>& XY, double& A, double& B, 00107 double& SigA, double& SigB, double& Chi2, double& R2); 00108 static void ExpFit( // Y = A * exp(B*X) 00109 const TVec<TFltPr>& XY, double& A, double& B, 00110 double& SigA, double& SigB, double& Chi2, double& R2); 00111 public: 00112 static double LnComb(const int& n, const int& k); 00113 public: 00114 static double Entropy(const TIntV& ValV); 00115 static double Entropy(const TFltV& ValV); 00116 static void EntropyFracDim(const TIntV& ValV, TFltV& EntropyV); 00117 static void EntropyFracDim(const TFltV& ValV, TFltV& EntropyV); 00118 public: 00119 static double EntropyBias(const double& B); // solves for p: B = p*log2(p)+(1-p)log2(1-p) 00120 //MLE of the power-law coefficient 00121 static double GetPowerCoef(const TFltV& XValV, double MinX=-1.0); // values (sampled from the distribution) 00122 static double GetPowerCoef(const TFltPrV& XValCntV, double MinX=-1.0); // (value, count) pairs 00123 }; 00124 00126 // Statistical-Moments 00127 ClassTPV(TMom, PMom, TMomV)//{ 00128 private: 00129 TBool DefP; 00130 TFltPrV ValWgtV; 00131 TFlt SumW, ValSumW; 00132 TInt Vals; 00133 TBool UsableP; 00134 TFlt UnusableVal; 00135 TFlt Mn, Mx; 00136 TFlt Mean, Vari, SDev, SErr; 00137 TFlt Median, Quart1, Quart3; 00138 TFltV DecileV; // 0=min 1=1.decile, ..., 9=9.decile, 10=max 00139 TFltV PercentileV; // 0=min 1=1.percentile, ..., 9=9.percentile, 10=max 00140 public: 00141 TMom(): 00142 DefP(false), ValWgtV(), 00143 SumW(), ValSumW(), Vals(), 00144 UsableP(false), UnusableVal(-1), 00145 Mn(), Mx(), 00146 Mean(), Vari(), SDev(), SErr(), 00147 Median(), Quart1(), Quart3(), 00148 DecileV(), PercentileV(){} 00149 TMom(const TMom& Mom): 00150 DefP(Mom.DefP), ValWgtV(Mom.ValWgtV), 00151 SumW(Mom.SumW), ValSumW(Mom.ValSumW), Vals(Mom.Vals), 00152 UsableP(Mom.UsableP), UnusableVal(Mom.UnusableVal), 00153 Mn(Mom.Mn), Mx(Mom.Mx), 00154 Mean(Mom.Mean), Vari(Mom.Vari), SDev(Mom.SDev), SErr(Mom.SErr), 00155 Median(Mom.Median), Quart1(Mom.Quart1), Quart3(Mom.Quart3), 00156 DecileV(Mom.DecileV), PercentileV(Mom.PercentileV){} 00157 static PMom New(){return PMom(new TMom());} 00158 static void NewV(TMomV& MomV, const int& Moms){ 00159 MomV.Gen(Moms); for (int MomN=0; MomN<Moms; MomN++){MomV[MomN]=New();}} 00160 static void NewVV(TVVec<PMom>& MomVV, const int& XMoms, const int& YMoms){ 00161 MomVV.Gen(XMoms, YMoms); 00162 for (int XMomN=0; XMomN<XMoms; XMomN++){ 00163 for (int YMomN=0; YMomN<YMoms; YMomN++){ 00164 MomVV.At(XMomN, YMomN)=New();}}} 00165 TMom(const TFltV& _ValV); 00166 static PMom New(const TFltV& ValV){ 00167 return PMom(new TMom(ValV));} 00168 TMom(TSIn& SIn): 00169 DefP(SIn), 00170 ValWgtV(SIn), 00171 SumW(SIn), ValSumW(SIn), Vals(SIn), 00172 UsableP(SIn), UnusableVal(SIn), 00173 Mn(SIn), Mx(SIn), 00174 Mean(SIn), Vari(SIn), SDev(SIn), SErr(SIn), 00175 Median(SIn), Quart1(SIn), Quart3(SIn), 00176 DecileV(SIn), PercentileV(SIn){} 00177 static PMom Load(TSIn& SIn){return new TMom(SIn);} 00178 void Save(TSOut& SOut) const { 00179 DefP.Save(SOut); 00180 ValWgtV.Save(SOut); 00181 SumW.Save(SOut); ValSumW.Save(SOut); Vals.Save(SOut); 00182 UsableP.Save(SOut); UnusableVal.Save(SOut); 00183 Mn.Save(SOut); Mx.Save(SOut); 00184 Mean.Save(SOut); Vari.Save(SOut); SDev.Save(SOut); SErr.Save(SOut); 00185 Median.Save(SOut); Quart1.Save(SOut); Quart3.Save(SOut); 00186 DecileV.Save(SOut); PercentileV.Save(SOut);} 00187 00188 TMom& operator=(const TMom& Mom){ 00189 Assert(!DefP); DefP=Mom.DefP; 00190 ValWgtV=Mom.ValWgtV; 00191 SumW=Mom.SumW; ValSumW=Mom.ValSumW; Vals=Mom.Vals; 00192 UsableP=Mom.UsableP; UnusableVal=Mom.UnusableVal; 00193 Mn=Mom.Mn; Mx=Mom.Mx; 00194 Mean=Mom.Mean; Vari=Mom.Vari; SDev=Mom.SDev; SErr=Mom.SErr; 00195 Median=Mom.Median; Quart1=Mom.Quart1; Quart3=Mom.Quart3; 00196 DecileV=Mom.DecileV; PercentileV=Mom.PercentileV; 00197 return *this;} 00198 bool operator==(const TMom& Mom) const { 00199 return Vals==Mom.Vals;} 00200 bool operator<(const TMom& Mom) const { 00201 return Vals<Mom.Vals;} 00202 00203 // define 00204 void Def(); 00205 static void DefV(TMomV& MomV){ 00206 for (int MomN=0; MomN<MomV.Len(); MomN++){MomV[MomN]->Def();}} 00207 static void DefVV(TVVec<PMom>& MomVV){ 00208 for (int XMomN=0; XMomN<MomVV.GetXDim(); XMomN++){ 00209 for (int YMomN=0; YMomN<MomVV.GetYDim(); YMomN++){ 00210 MomVV.At(XMomN, YMomN)->Def();}}} 00211 bool IsDef() const {return DefP;} 00212 00213 // values 00214 void Add(const TFlt& Val, const TFlt& Wgt=1){Assert(!DefP); 00215 ValWgtV.Add(TFltPr(Val, Wgt)); SumW+=Wgt; ValSumW+=Wgt*Val; Vals++;} 00216 double GetWgt() const {return SumW;} 00217 int GetVals() const {return Vals;} 00218 TFlt GetVal(const int& ValN) const {IAssert(!IsDef()); return ValWgtV[ValN].Val1;} 00219 //const TFltV& GetValV() const {IAssert(!IsDef()); return ValV;} //J: 00220 00221 // usability 00222 bool IsUsable() const {Assert(DefP); return UsableP;} 00223 static bool IsUsableV(const TMomV& MomV){ 00224 for (int MomN=0; MomN<MomV.Len(); MomN++){ 00225 if (!MomV[MomN]->IsUsable()){return false;}} 00226 return true;} 00227 static bool IsUsableVV(const TVVec<PMom>& MomVV){ 00228 for (int XMomN=0; XMomN<MomVV.GetXDim(); XMomN++){ 00229 for (int YMomN=0; YMomN<MomVV.GetYDim(); YMomN++){ 00230 if (!MomVV.At(XMomN, YMomN)->IsUsable()){return false;}}} 00231 return true;} 00232 00233 // moments 00234 double GetMn() const {Assert(DefP&&UsableP); return Mn;} 00235 double GetMx() const {Assert(DefP&&UsableP); return Mx;} 00236 double GetExtent() const {Assert(DefP&&UsableP); return Mx-Mn;} 00237 double GetMean() const {Assert(DefP&&UsableP); return Mean;} 00238 double GetVari() const {Assert(DefP&&UsableP); return Vari;} 00239 double GetSDev() const {Assert(DefP&&UsableP); return SDev;} 00240 double GetSErr() const {Assert(DefP&&UsableP); return SErr;} 00241 double GetMedian() const {Assert(DefP&&UsableP); return Median;} 00242 double GetQuart1() const {Assert(DefP&&UsableP); return Quart1;} 00243 double GetQuart3() const {Assert(DefP&&UsableP); return Quart3;} 00244 double GetDecile(const int& DecileN) const { 00245 Assert(DefP&&UsableP); return DecileV[DecileN];} 00246 double GetPercentile(const int& PercentileN) const { 00247 Assert(DefP&&UsableP); return PercentileV[PercentileN];} 00248 double GetByNm(const TStr& MomNm) const; 00249 TStr GetStrByNm(const TStr& MomNm, char* FmtStr=NULL) const; 00250 00251 // strings 00252 TStr GetStr(const char& SepCh=' ', const char& DelimCh=':', 00253 const bool& DecileP=true, const bool& PercentileP=true, const TStr& FmtStr="%g") const; 00254 static TStr GetNmVStr(const TStr& VarPfx, 00255 const char& SepCh='\t', const bool& DecileP=true, const bool& PercentileP=true); 00256 TStr GetValVStr(const char& SepCh='\t', const bool& DecileP=true, const bool& PercentileP=true) const; 00257 }; 00258 typedef TVVec<PMom> TMomVV; 00259 typedef THash<TInt, PMom> TIntMomH; 00260 typedef THash<TInt, TMomV> TIntMomVH; 00261 typedef THash<TInt, TMomVV> TIntMomVVH; 00262 00264 // Correlation 00265 ClassTP(TCorr, PCorr)//{ 00266 private: 00267 int ValVLen; 00268 double CorrCf; 00269 double CorrCfPrb; 00270 double FisherZ; 00271 public: 00272 TCorr(){} 00273 TCorr(const TFltV& ValV1, const TFltV& ValV2); 00274 static PCorr New(const TFltV& ValV1, const TFltV& ValV2){ 00275 return PCorr(new TCorr(ValV1, ValV2));} 00276 TCorr(TSIn&){Fail;} 00277 static PCorr Load(TSIn& SIn){return new TCorr(SIn);} 00278 void Save(TSOut&){Fail;} 00279 00280 TCorr& operator=(const TCorr&){Fail; return *this;} 00281 00282 double GetCorrCf() const {return CorrCf;} 00283 double GetCorrCfPrb() const {return CorrCfPrb;} 00284 00285 TStr GetStr() const; 00286 }; 00287 00289 // Statistical Tests 00290 class TStatTest { 00291 private: 00292 static void AveVar(const TFltV& ValV, double& Ave, double& Var); 00293 static double KsProb(const double& Alam); 00294 public: 00295 static void ChiSquareOne( 00296 const TFltV& ObservedBinV, const TFltV& ExpectedBinV, 00297 double& ChiSquareVal, double& SignificancePrb); 00298 static void ChiSquareTwo( 00299 const TFltV& ObservedBin1V, const TFltV& ObservedBin2V, 00300 double& ChiSquareVal, double& SignificancePrb); 00301 00302 static void TTest( 00303 const TFltV& ValV1, const TFltV& ValV2, double& TTestVal, double& TTestPrb); 00304 00305 // Kolmogorov-Smirnov (are two distributions the same) 00306 static void KsTest(const TFltV& ValV1, const TFltV& ValV2, double& DStat, double& PVal); 00307 static void KsTest(const TFltPrV& ValCntV1, const TFltPrV& ValCntV2, double& DStat, double& PVal); 00308 }; 00309 00311 // Combinations 00312 ClassTP(TComb, PComb)//{ 00313 public: 00314 int Items; 00315 int Order; 00316 int CombN; 00317 TIntV ItemV; 00318 public: 00319 TComb(): Items(-1), Order(-1), CombN(-1), ItemV(){} 00320 TComb(const int& _Items, const int& _Order): 00321 Items(_Items), Order(_Order), CombN(0), ItemV(){ 00322 IAssert((Order>0)&&(Order<=Items));} 00323 static PComb New(const int& Items, const int& Order){ 00324 return PComb(new TComb(Items, Order));} 00325 ~TComb(){} 00326 TComb(TSIn&){Fail;} 00327 static PComb Load(TSIn& SIn){return new TComb(SIn);} 00328 void Save(TSOut&){Fail;} 00329 00330 TComb& operator=(const TComb&){Fail; return *this;} 00331 00332 bool GetNext(); 00333 TIntV& GetItemV(){return ItemV;} 00334 int GetCombN() const {return CombN;} 00335 int GetCombs() const; 00336 void Wr(); 00337 }; 00338 00340 // Linear-Regression 00341 ClassTP(TLinReg, PLinReg)//{ 00342 public: 00343 TFltVV XVV; 00344 TFltV YV; 00345 TFltV SigV; 00346 int Recs, Vars; 00347 TFltVV CovarVV; // 1 based 00348 TFltV CfV; // 1 based 00349 double ChiSq; 00350 void GetXV(const int RecN, TFltV& VarV) const { 00351 VarV.Gen(Vars+1); 00352 for (int VarN=0; VarN<Vars; VarN++){VarV[VarN+1]=XVV.At(RecN-1, VarN);} 00353 } 00354 double GetY(const int RecN) const {return YV[RecN-1];} 00355 double GetSig(const int RecN) const {return SigV[RecN-1];} 00356 void NR_covsrt(TFltVV& CovarVV, const int& Vars, const TIntV& ia, const int& mfit); 00357 void NR_gaussj(TFltVV& a, const int& n, TFltVV& b, const int& m); 00358 void NR_lfit(); 00359 public: 00360 TLinReg(){} 00361 static PLinReg New( 00362 const TFltVV& XVV, const TFltV& YV, const TFltV& SigV=TFltV()); 00363 ~TLinReg(){} 00364 TLinReg(TSIn&){Fail;} 00365 static PLinReg Load(TSIn& SIn){return new TLinReg(SIn);} 00366 void Save(TSOut&){Fail;} 00367 00368 TLinReg& operator=(const TLinReg&){Fail; return *this;} 00369 00370 int GetRecs() const {return Recs;} 00371 int GetVars() const {return Vars;} 00372 00373 double GetCf(const int& VarN) const {return CfV[VarN+1];} 00374 double GetCfUncer(const int& VarN) const { 00375 return sqrt(double(CovarVV.At(VarN+1, VarN+1)));} 00376 double GetCovar(const int& VarN1, const int& VarN2) const { 00377 return CovarVV.At(VarN1+1, VarN2+1);} 00378 00379 double GetChiSq() const {return ChiSq;} 00380 00381 static double LinInterp(const double& x1, const double& y1, 00382 const double& x2, const double& y2, const double& AtX) _CMPWARN{ 00383 if (x1 == x2) return (y1+y2)/2.0; 00384 const double k = (y2 - y1) / (x2 - x1); 00385 return k*(AtX - x1) + y1; 00386 } 00387 00388 void Wr() const; 00389 }; 00390 00392 // Singular-Value-Decomposition 00393 ClassTP(TSvd, PSvd)//{ 00394 public: 00395 TFltVV XVV; 00396 TFltV YV; 00397 TFltV SigV; 00398 int Recs, Vars; 00399 TFltVV CovarVV; // 1 based 00400 TFltV CfV; // 1 based 00401 double ChiSq; 00402 void GetXV(const int RecN, TFltV& VarV) const { 00403 VarV.Gen(Vars+1); 00404 for (int VarN=0; VarN<Vars; VarN++){VarV[VarN+1]=XVV.At(RecN-1, VarN);} 00405 } 00406 double GetY(const int RecN) const {return YV[RecN-1];} 00407 double GetSig(const int RecN) const {return SigV[RecN-1];} 00408 static double NR_SIGN(double a, double b){return b >= 0.0 ? fabs(a) : -fabs(a);} 00409 static double NR_FMAX(double maxarg1, double maxarg2){ 00410 return maxarg1 > maxarg2 ? maxarg1 : maxarg2;} 00411 static int NR_IMIN(int iminarg1, int iminarg2){ 00412 return iminarg1 < iminarg2 ? iminarg1 : iminarg2;} 00413 static double NR_pythag(double a, double b); 00414 static void NR_svdcmp(TFltVV& a, int m, int n, TFltV& w, TFltVV& v); 00415 void NR_svbksb( 00416 TFltVV& u, TFltV& w, TFltVV& v, int m, int n, TFltV& b, TFltV& x); 00417 void NR_svdvar(TFltVV& v, int ma, TFltV& w, TFltVV& cvm); 00418 void NR_svdfit(); 00419 public: 00420 TSvd(){} 00421 static PSvd New( 00422 const TFltVV& XVV, const TFltV& YV, const TFltV& SigV=TFltV()); 00423 ~TSvd(){} 00424 TSvd(TSIn&){Fail;} 00425 static PSvd Load(TSIn& SIn){return new TSvd(SIn);} 00426 void Save(TSOut&){Fail;} 00427 00428 TSvd& operator=(const TSvd&){Fail; return *this;} 00429 00430 int GetRecs() const {return Recs;} 00431 int GetVars() const {return Vars;} 00432 00433 double GetCf(const int& VarN) const {return CfV[VarN+1];} 00434 double GetCfUncer(const int& VarN) const { 00435 return sqrt(double(CovarVV.At(VarN+1, VarN+1)));} 00436 double GetCovar(const int& VarN1, const int& VarN2) const { 00437 return CovarVV.At(VarN1+1, VarN2+1);} 00438 00439 double GetChiSq() const {return ChiSq;} 00440 00441 void GetCfV(TFltV& _CfV); 00442 void GetCfUncerV(TFltV& CfUncerV); 00443 00444 static void Svd(const TFltVV& InMtx, TFltVV& LSingV, TFltV& SingValV, TFltVV& RSingV); 00445 static void Svd1Based(const TFltVV& InMtx1, TFltVV& LSingV, TFltV& SingValV, TFltVV& RSingV); 00446 00447 void Wr() const; 00448 }; 00449 00451 // Histogram 00452 class THist { 00453 private: 00454 TFlt MnVal; 00455 TFlt MxVal; 00456 TIntV BucketV; 00457 TFlt BucketSize; 00458 TInt Vals; 00459 public: 00460 THist() { } 00461 THist(const double& _MnVal, const double& _MxVal, const int& Buckets): 00462 MnVal(_MnVal), MxVal(_MxVal), BucketV(Buckets), 00463 BucketSize(1.01 * double(MxVal - MnVal) / double(Buckets)) { } 00464 00465 void Add(const double& Val, const bool& OnlyInP); 00466 00467 int GetVals() const { return Vals; } 00468 int GetBuckets() const { return BucketV.Len(); } 00469 double GetBucketMn(const int& BucketN) const { return MnVal + BucketN * BucketSize; } 00470 double GetBucketMx(const int& BucketN) const { return MnVal + (BucketN + 1) * BucketSize; } 00471 int GetBucketVal(const int& BucketN) const { return BucketV[BucketN]; } 00472 double GetBucketValPerc(const int& BucketN) const { 00473 return (Vals > 0) ? (double(BucketV[BucketN]) / double(Vals)) : 0.0; } 00474 00475 void SaveStat(const TStr& ValNm, TSOut& FOut) const; 00476 void SaveTxt(const TStr& ValNm, const TStr& FNm) const { 00477 TFOut FOut(FNm); SaveStat(ValNm, FOut); } 00478 };