SNAP Library 2.2, Developer Reference
2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 #include "bd.h" 00002 00004 // Mathematics-Utilities 00005 class TMath{ 00006 public: 00007 static double E; 00008 static double Pi; 00009 static double LogOf2; 00010 00011 static double Inv(const double& x){IAssert(x!=0.0); return (1.0/x);} 00012 static double Sqr(const double& x){return x*x;} 00013 static double Sqrt(const double& x){IAssert(!(x<0.0)); return sqrt(x);} 00014 static double Log(const double& Val){return log(Val);} 00015 static double Log2(const double& Val){return log(Val)/LogOf2;} 00016 static double Round(const double& Val){ 00017 return (Val>0)?floor(Val+0.5):ceil(Val-0.5);} 00018 static double Round(const double & Val, int Decs){ 00019 const double pwr=pow(10.0, Decs); return Round(Val * pwr) / pwr;} 00020 static int Fac(const int& Val){ 00021 if (Val<=1){return 1;} else {return Val*Fac(Val-1);}} 00022 static int Choose(const int& N, const int& K){ // binomial coefficient 00023 return Fac(N)/(Fac(K)*Fac(N-K)); } 00024 static uint Pow2(const int& pow){return uint(1u<<pow);} 00025 static double Power(const double& Base, const double& Exponent){ 00026 return exp(log(Base)*Exponent);} 00027 00028 template <typename T> 00029 static int Sign(const T& Val){return Val<0?-1:(Val>0?1:0);} 00030 00031 template <class T> 00032 static const T& Mx(const T& LVal, const T& RVal) { 00033 return LVal > RVal ? LVal : RVal;} 00034 00035 template <class T> 00036 static const T& Mn(const T& LVal, const T& RVal){ 00037 return LVal < RVal ? LVal : RVal;} 00038 00039 template <class T> 00040 static const T& Mx(const T& Val1, const T& Val2, const T& Val3) { 00041 if (Val1 > Val2) { 00042 if (Val1 > Val3) return Val1; 00043 else return Val3; 00044 } else { 00045 if (Val2 > Val3) return Val2; 00046 else return Val3; 00047 } 00048 } 00049 00050 template <class T> 00051 static const T& Mn(const T& Val1, const T& Val2, const T& Val3) { 00052 if(Val1 < Val2) { 00053 if (Val1 < Val3) return Val1; 00054 else return Val3; 00055 } else { 00056 if (Val2 < Val3) return Val2; 00057 else return Val3; 00058 } 00059 } 00060 00061 template <class T> 00062 static const T& Median(const T& Val1, const T& Val2, const T& Val3) { 00063 if (Val1 < Val2) { 00064 if (Val2 < Val3) return Val2; 00065 else if (Val3 < Val1) return Val1; 00066 else return Val3; 00067 } else { 00068 if (Val1 < Val3) return Val1; 00069 else if (Val3 < Val2) return Val2; 00070 else return Val3; 00071 } 00072 } 00073 00074 template <class T> 00075 static const T& InRange(const T& Val, const T& Mn, const T& Mx) { 00076 IAssert(Mn <= Mx); return Val < Mn ? Mn : (Val > Mx ? Mx : Val);} 00077 00078 template <class T> 00079 static bool IsInRange(const T& Val, const T& Mn, const T& Mx) { 00080 IAssert(Mn <= Mx); return Val >= Mn && Val <= Mx;} 00081 00082 template <class T> 00083 static bool IsInEps(const T& Val, const T& Eps) { 00084 return Val >= -Eps && Val <= Eps;} 00085 }; 00086 00088 // Special-Functions 00089 class TSpecFunc{ 00090 public: 00091 static void GammaPSeries/*gser*/( 00092 double& gamser, const double& a, const double& x, double& gln); 00093 static void GammaQContFrac/*gcf*/( 00094 double& gammcf, const double& a, const double& x, double& gln); 00095 static double GammaQ/*gammq*/(const double& a, const double& x); 00096 static double LnGamma/*gammln*/(const double& xx); 00097 static double BetaCf/*betacf*/( 00098 const double& a, const double& b, const double& x); 00099 static double BetaI(const double& a, const double& b, const double& x); 00100 00101 static void LinearFit( // Y = A + B*X 00102 const TVec<TFltPr>& XY, double& A, double& B, 00103 double& SigA, double& SigB, double& Chi2, double& R2); 00104 static void PowerFit( // Y = A * X^B 00105 const TVec<TFltPr>& XY, double& A, double& B, 00106 double& SigA, double& SigB, double& Chi2, double& R2); 00107 static void LogFit( // Y = A + B*log(X) 00108 const TVec<TFltPr>& XY, double& A, double& B, 00109 double& SigA, double& SigB, double& Chi2, double& R2); 00110 static void ExpFit( // Y = A * exp(B*X) 00111 const TVec<TFltPr>& XY, double& A, double& B, 00112 double& SigA, double& SigB, double& Chi2, double& R2); 00113 public: 00114 static double LnComb(const int& n, const int& k); 00115 public: 00116 static double Entropy(const TIntV& ValV); 00117 static double Entropy(const TFltV& ValV); 00118 static void EntropyFracDim(const TIntV& ValV, TFltV& EntropyV); 00119 static void EntropyFracDim(const TFltV& ValV, TFltV& EntropyV); 00120 public: 00121 static double EntropyBias(const double& B); // solves for p: B = p*log2(p)+(1-p)log2(1-p) 00122 //MLE of the power-law coefficient 00123 static double GetPowerCoef(const TFltV& XValV, double MinX=-1.0); // values (sampled from the distribution) 00124 static double GetPowerCoef(const TFltPrV& XValCntV, double MinX=-1.0); // (value, count) pairs 00125 }; 00126 00128 // Statistical-Moments 00129 ClassTPV(TMom, PMom, TMomV)//{ 00130 private: 00131 TBool DefP; 00132 TFltPrV ValWgtV; 00133 TFlt SumW, ValSumW; 00134 TInt Vals; 00135 TBool UsableP; 00136 TFlt UnusableVal; 00137 TFlt Mn, Mx; 00138 TFlt Mean, Vari, SDev, SErr; 00139 TFlt Median, Quart1, Quart3; 00140 TFlt Mode; 00141 TFltV DecileV; // 0=min 1=1.decile, ..., 9=9.decile, 10=max 00142 TFltV PercentileV; // 0=min 1=1.percentile, ..., 9=9.percentile, 10=max 00143 public: 00144 TMom(): 00145 DefP(false), ValWgtV(), 00146 SumW(), ValSumW(), Vals(), 00147 UsableP(false), UnusableVal(-1), 00148 Mn(), Mx(), 00149 Mean(), Vari(), SDev(), SErr(), 00150 Median(), Quart1(), Quart3(), Mode(), 00151 DecileV(), PercentileV(){} 00152 TMom(const TMom& Mom): 00153 DefP(Mom.DefP), ValWgtV(Mom.ValWgtV), 00154 SumW(Mom.SumW), ValSumW(Mom.ValSumW), Vals(Mom.Vals), 00155 UsableP(Mom.UsableP), UnusableVal(Mom.UnusableVal), 00156 Mn(Mom.Mn), Mx(Mom.Mx), 00157 Mean(Mom.Mean), Vari(Mom.Vari), SDev(Mom.SDev), SErr(Mom.SErr), 00158 Median(Mom.Median), Quart1(Mom.Quart1), Quart3(Mom.Quart3), Mode(Mom.Mode), 00159 DecileV(Mom.DecileV), PercentileV(Mom.PercentileV){} 00160 static PMom New(){return PMom(new TMom());} 00161 static void NewV(TMomV& MomV, const int& Moms){ 00162 MomV.Gen(Moms); for (int MomN=0; MomN<Moms; MomN++){MomV[MomN]=New();}} 00163 static void NewVV(TVVec<PMom>& MomVV, const int& XMoms, const int& YMoms){ 00164 MomVV.Gen(XMoms, YMoms); 00165 for (int XMomN=0; XMomN<XMoms; XMomN++){ 00166 for (int YMomN=0; YMomN<YMoms; YMomN++){ 00167 MomVV.At(XMomN, YMomN)=New();}}} 00168 TMom(const TFltV& _ValV); 00169 static PMom New(const TFltV& ValV){ 00170 return PMom(new TMom(ValV));} 00171 TMom(TSIn& SIn): 00172 DefP(SIn), 00173 ValWgtV(SIn), 00174 SumW(SIn), ValSumW(SIn), Vals(SIn), 00175 UsableP(SIn), UnusableVal(SIn), 00176 Mn(SIn), Mx(SIn), 00177 Mean(SIn), Vari(SIn), SDev(SIn), SErr(SIn), 00178 Median(SIn), Quart1(SIn), Quart3(SIn), Mode(SIn), 00179 DecileV(SIn), PercentileV(SIn){} 00180 static PMom Load(TSIn& SIn){return new TMom(SIn);} 00181 void Save(TSOut& SOut) const { 00182 DefP.Save(SOut); 00183 ValWgtV.Save(SOut); 00184 SumW.Save(SOut); ValSumW.Save(SOut); Vals.Save(SOut); 00185 UsableP.Save(SOut); UnusableVal.Save(SOut); 00186 Mn.Save(SOut); Mx.Save(SOut); 00187 Mean.Save(SOut); Vari.Save(SOut); SDev.Save(SOut); SErr.Save(SOut); 00188 Median.Save(SOut); Quart1.Save(SOut); Quart3.Save(SOut); Mode.Save(SOut); 00189 DecileV.Save(SOut); PercentileV.Save(SOut);} 00190 00191 TMom& operator=(const TMom& Mom){ 00192 Assert(!DefP); DefP=Mom.DefP; 00193 ValWgtV=Mom.ValWgtV; 00194 SumW=Mom.SumW; ValSumW=Mom.ValSumW; Vals=Mom.Vals; 00195 UsableP=Mom.UsableP; UnusableVal=Mom.UnusableVal; 00196 Mn=Mom.Mn; Mx=Mom.Mx; 00197 Mean=Mom.Mean; Vari=Mom.Vari; SDev=Mom.SDev; SErr=Mom.SErr; 00198 Median=Mom.Median; Quart1=Mom.Quart1; Quart3=Mom.Quart3; Mode=Mom.Mode; 00199 DecileV=Mom.DecileV; PercentileV=Mom.PercentileV; 00200 return *this;} 00201 bool operator==(const TMom& Mom) const { 00202 return Vals==Mom.Vals;} 00203 bool operator<(const TMom& Mom) const { 00204 return Vals<Mom.Vals;} 00205 00206 // define 00207 void Def(); 00208 static void DefV(TMomV& MomV){ 00209 for (int MomN=0; MomN<MomV.Len(); MomN++){MomV[MomN]->Def();}} 00210 static void DefVV(TVVec<PMom>& MomVV){ 00211 for (int XMomN=0; XMomN<MomVV.GetXDim(); XMomN++){ 00212 for (int YMomN=0; YMomN<MomVV.GetYDim(); YMomN++){ 00213 MomVV.At(XMomN, YMomN)->Def();}}} 00214 bool IsDef() const {return DefP;} 00215 00216 // values 00217 void Add(const TFlt& Val, const TFlt& Wgt=1){Assert(!DefP); 00218 ValWgtV.Add(TFltPr(Val, Wgt)); SumW+=Wgt; ValSumW+=Wgt*Val; Vals++;} 00219 double GetWgt() const {return SumW;} 00220 int GetVals() const {return Vals;} 00221 TFlt GetVal(const int& ValN) const {IAssert(!IsDef()); return ValWgtV[ValN].Val1;} 00222 //const TFltV& GetValV() const {IAssert(!IsDef()); return ValV;} //J: 00223 00224 // usability 00225 bool IsUsable() const {Assert(DefP); return UsableP;} 00226 static bool IsUsableV(const TMomV& MomV){ 00227 for (int MomN=0; MomN<MomV.Len(); MomN++){ 00228 if (!MomV[MomN]->IsUsable()){return false;}} 00229 return true;} 00230 static bool IsUsableVV(const TVVec<PMom>& MomVV){ 00231 for (int XMomN=0; XMomN<MomVV.GetXDim(); XMomN++){ 00232 for (int YMomN=0; YMomN<MomVV.GetYDim(); YMomN++){ 00233 if (!MomVV.At(XMomN, YMomN)->IsUsable()){return false;}}} 00234 return true;} 00235 00236 // moments 00237 double GetMn() const {Assert(DefP&&UsableP); return Mn;} 00238 double GetMx() const {Assert(DefP&&UsableP); return Mx;} 00239 double GetExtent() const {Assert(DefP&&UsableP); return Mx-Mn;} 00240 double GetMean() const {Assert(DefP&&UsableP); return Mean;} 00241 double GetVari() const {Assert(DefP&&UsableP); return Vari;} 00242 double GetSDev() const {Assert(DefP&&UsableP); return SDev;} 00243 double GetSErr() const {Assert(DefP&&UsableP); return SErr;} 00244 double GetMedian() const {Assert(DefP&&UsableP); return Median;} 00245 double GetQuart1() const {Assert(DefP&&UsableP); return Quart1;} 00246 double GetQuart3() const {Assert(DefP&&UsableP); return Quart3;} 00247 double GetMode() const {Assert(DefP&&UsableP); return Mode;} 00248 double GetDecile(const int& DecileN) const { 00249 Assert(DefP&&UsableP); return DecileV[DecileN];} 00250 double GetPercentile(const int& PercentileN) const { 00251 Assert(DefP&&UsableP); return PercentileV[PercentileN];} 00252 double GetByNm(const TStr& MomNm) const; 00253 TStr GetStrByNm(const TStr& MomNm, char* FmtStr=NULL) const; 00254 00255 // strings 00256 TStr GetStr(const char& SepCh=' ', const char& DelimCh=':', 00257 const bool& DecileP=true, const bool& PercentileP=true, const TStr& FmtStr="%g") const; 00258 static TStr GetNmVStr(const TStr& VarPfx, 00259 const char& SepCh='\t', const bool& DecileP=true, const bool& PercentileP=true); 00260 TStr GetValVStr(const char& SepCh='\t', const bool& DecileP=true, const bool& PercentileP=true) const; 00261 }; 00262 typedef TVVec<PMom> TMomVV; 00263 typedef THash<TInt, PMom> TIntMomH; 00264 typedef THash<TInt, TMomV> TIntMomVH; 00265 typedef THash<TInt, TMomVV> TIntMomVVH; 00266 00268 // Correlation 00269 ClassTP(TCorr, PCorr)//{ 00270 private: 00271 int ValVLen; 00272 double CorrCf; 00273 double CorrCfPrb; 00274 double FisherZ; 00275 public: 00276 TCorr(){} 00277 TCorr(const TFltV& ValV1, const TFltV& ValV2); 00278 static PCorr New(const TFltV& ValV1, const TFltV& ValV2){ 00279 return PCorr(new TCorr(ValV1, ValV2));} 00280 TCorr(TSIn&){Fail;} 00281 static PCorr Load(TSIn& SIn){return new TCorr(SIn);} 00282 void Save(TSOut&){Fail;} 00283 00284 TCorr& operator=(const TCorr&){Fail; return *this;} 00285 00286 double GetCorrCf() const {return CorrCf;} 00287 double GetCorrCfPrb() const {return CorrCfPrb;} 00288 00289 TStr GetStr() const; 00290 }; 00291 00293 // Statistical Tests 00294 class TStatTest { 00295 private: 00296 static void AveVar(const TFltV& ValV, double& Ave, double& Var); 00297 static double KsProb(const double& Alam); 00298 public: 00299 static void ChiSquareOne( 00300 const TFltV& ObservedBinV, const TFltV& ExpectedBinV, 00301 double& ChiSquareVal, double& SignificancePrb); 00302 static void ChiSquareTwo( 00303 const TFltV& ObservedBin1V, const TFltV& ObservedBin2V, 00304 double& ChiSquareVal, double& SignificancePrb); 00305 00306 static void TTest( 00307 const TFltV& ValV1, const TFltV& ValV2, double& TTestVal, double& TTestPrb); 00308 00309 // Kolmogorov-Smirnov (are two distributions the same) 00310 static void KsTest(const TFltV& ValV1, const TFltV& ValV2, double& DStat, double& PVal); 00311 static void KsTest(const TFltPrV& ValCntV1, const TFltPrV& ValCntV2, double& DStat, double& PVal); 00312 }; 00313 00315 // Combinations 00316 ClassTP(TComb, PComb)//{ 00317 public: 00318 int Items; 00319 int Order; 00320 int CombN; 00321 TIntV ItemV; 00322 public: 00323 TComb(): Items(-1), Order(-1), CombN(-1), ItemV(){} 00324 TComb(const int& _Items, const int& _Order): 00325 Items(_Items), Order(_Order), CombN(0), ItemV(){ 00326 IAssert((Order>0)&&(Order<=Items));} 00327 static PComb New(const int& Items, const int& Order){ 00328 return PComb(new TComb(Items, Order));} 00329 ~TComb(){} 00330 TComb(TSIn&){Fail;} 00331 static PComb Load(TSIn& SIn){return new TComb(SIn);} 00332 void Save(TSOut&){Fail;} 00333 00334 TComb& operator=(const TComb&){Fail; return *this;} 00335 00336 bool GetNext(); 00337 TIntV& GetItemV(){return ItemV;} 00338 int GetCombN() const {return CombN;} 00339 int GetCombs() const; 00340 void Wr(); 00341 }; 00342 00344 // Linear-Regression 00345 ClassTP(TLinReg, PLinReg)//{ 00346 public: 00347 TFltVV XVV; 00348 TFltV YV; 00349 TFltV SigV; 00350 int Recs, Vars; 00351 TFltVV CovarVV; // 1 based 00352 TFltV CfV; // 1 based 00353 double ChiSq; 00354 void GetXV(const int RecN, TFltV& VarV) const { 00355 VarV.Gen(Vars+1); 00356 for (int VarN=0; VarN<Vars; VarN++){VarV[VarN+1]=XVV.At(RecN-1, VarN);} 00357 } 00358 double GetY(const int RecN) const {return YV[RecN-1];} 00359 double GetSig(const int RecN) const {return SigV[RecN-1];} 00360 void NR_covsrt(TFltVV& CovarVV, const int& Vars, const TIntV& ia, const int& mfit); 00361 void NR_gaussj(TFltVV& a, const int& n, TFltVV& b, const int& m); 00362 void NR_lfit(); 00363 public: 00364 TLinReg(){} 00365 static PLinReg New( 00366 const TFltVV& XVV, const TFltV& YV, const TFltV& SigV=TFltV()); 00367 ~TLinReg(){} 00368 TLinReg(TSIn&){Fail;} 00369 static PLinReg Load(TSIn& SIn){return new TLinReg(SIn);} 00370 void Save(TSOut&){Fail;} 00371 00372 TLinReg& operator=(const TLinReg&){Fail; return *this;} 00373 00374 int GetRecs() const {return Recs;} 00375 int GetVars() const {return Vars;} 00376 00377 double GetCf(const int& VarN) const {return CfV[VarN+1];} 00378 double GetCfUncer(const int& VarN) const { 00379 return sqrt(double(CovarVV.At(VarN+1, VarN+1)));} 00380 double GetCovar(const int& VarN1, const int& VarN2) const { 00381 return CovarVV.At(VarN1+1, VarN2+1);} 00382 00383 double GetChiSq() const {return ChiSq;} 00384 00385 static double LinInterp(const double& x1, const double& y1, 00386 const double& x2, const double& y2, const double& AtX) _CMPWARN{ 00387 if (x1 == x2) return (y1+y2)/2.0; 00388 const double k = (y2 - y1) / (x2 - x1); 00389 return k*(AtX - x1) + y1; 00390 } 00391 00392 void Wr() const; 00393 }; 00394 00396 // Singular-Value-Decomposition 00397 ClassTP(TSvd, PSvd)//{ 00398 public: 00399 TFltVV XVV; 00400 TFltV YV; 00401 TFltV SigV; 00402 int Recs, Vars; 00403 TFltVV CovarVV; // 1 based 00404 TFltV CfV; // 1 based 00405 double ChiSq; 00406 void GetXV(const int RecN, TFltV& VarV) const { 00407 VarV.Gen(Vars+1); 00408 for (int VarN=0; VarN<Vars; VarN++){VarV[VarN+1]=XVV.At(RecN-1, VarN);} 00409 } 00410 double GetY(const int RecN) const {return YV[RecN-1];} 00411 double GetSig(const int RecN) const {return SigV[RecN-1];} 00412 static double NR_SIGN(double a, double b){return b >= 0.0 ? fabs(a) : -fabs(a);} 00413 static double NR_FMAX(double maxarg1, double maxarg2){ 00414 return maxarg1 > maxarg2 ? maxarg1 : maxarg2;} 00415 static int NR_IMIN(int iminarg1, int iminarg2){ 00416 return iminarg1 < iminarg2 ? iminarg1 : iminarg2;} 00417 static double NR_pythag(double a, double b); 00418 static void NR_svdcmp(TFltVV& a, int m, int n, TFltV& w, TFltVV& v); 00419 void NR_svbksb( 00420 TFltVV& u, TFltV& w, TFltVV& v, int m, int n, TFltV& b, TFltV& x); 00421 void NR_svdvar(TFltVV& v, int ma, TFltV& w, TFltVV& cvm); 00422 void NR_svdfit(); 00423 public: 00424 TSvd(){} 00425 static PSvd New( 00426 const TFltVV& XVV, const TFltV& YV, const TFltV& SigV=TFltV()); 00427 ~TSvd(){} 00428 TSvd(TSIn&){Fail;} 00429 static PSvd Load(TSIn& SIn){return new TSvd(SIn);} 00430 void Save(TSOut&){Fail;} 00431 00432 TSvd& operator=(const TSvd&){Fail; return *this;} 00433 00434 int GetRecs() const {return Recs;} 00435 int GetVars() const {return Vars;} 00436 00437 double GetCf(const int& VarN) const {return CfV[VarN+1];} 00438 double GetCfUncer(const int& VarN) const { 00439 return sqrt(double(CovarVV.At(VarN+1, VarN+1)));} 00440 double GetCovar(const int& VarN1, const int& VarN2) const { 00441 return CovarVV.At(VarN1+1, VarN2+1);} 00442 00443 double GetChiSq() const {return ChiSq;} 00444 00445 void GetCfV(TFltV& _CfV); 00446 void GetCfUncerV(TFltV& CfUncerV); 00447 00448 static void Svd(const TFltVV& InMtx, TFltVV& LSingV, TFltV& SingValV, TFltVV& RSingV); 00449 static void Svd1Based(const TFltVV& InMtx1, TFltVV& LSingV, TFltV& SingValV, TFltVV& RSingV); 00450 00451 void Wr() const; 00452 }; 00453 00455 // Histogram 00456 class THist { 00457 private: 00458 TFlt MnVal; 00459 TFlt MxVal; 00460 TIntV BucketV; 00461 TFlt BucketSize; 00462 TInt Vals; 00463 public: 00464 THist() { } 00465 THist(const double& _MnVal, const double& _MxVal, const int& Buckets): 00466 MnVal(_MnVal), MxVal(_MxVal), BucketV(Buckets), 00467 BucketSize(1.01 * double(MxVal - MnVal) / double(Buckets)) { } 00468 00469 void Add(const double& Val, const bool& OnlyInP); 00470 00471 int GetVals() const { return Vals; } 00472 int GetBuckets() const { return BucketV.Len(); } 00473 double GetBucketMn(const int& BucketN) const { return MnVal + BucketN * BucketSize; } 00474 double GetBucketMx(const int& BucketN) const { return MnVal + (BucketN + 1) * BucketSize; } 00475 int GetBucketVal(const int& BucketN) const { return BucketV[BucketN]; } 00476 double GetBucketValPerc(const int& BucketN) const { 00477 return (Vals > 0) ? (double(BucketV[BucketN]) / double(Vals)) : 0.0; } 00478 00479 void SaveStat(const TStr& ValNm, TSOut& FOut) const; 00480 void SaveTxt(const TStr& ValNm, const TStr& FNm) const { 00481 TFOut FOut(FNm); SaveStat(ValNm, FOut); } 00482 };