SNAP Library 2.2, Developer Reference
2014-03-11 19:15:55
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 //#////////////////////////////////////////////// 00003 class TGUtil { 00004 public: 00005 static void GetCdf(const TIntPrV& PdfV, TIntPrV& CdfV); 00006 static void GetCdf(const TFltPrV& PdfV, TFltPrV& CdfV); 00007 static void GetCdf(const TIntFltKdV& PdfV, TIntFltKdV& CdfV); 00008 static TIntPrV GetCdf(const TIntPrV& PdfV); 00009 static TFltPrV GetCdf(const TFltPrV& PdfV); 00010 00011 static void GetCCdf(const TIntPrV& PdfV, TIntPrV& CCdfV); 00012 static void GetCCdf(const TFltPrV& PdfV, TFltPrV& CCdfV); 00013 static void GetCCdf(const TIntFltKdV& PdfV, TIntFltKdV& CCdfV); 00014 static TIntPrV GetCCdf(const TIntPrV& PdfV); 00015 static TFltPrV GetCCdf(const TFltPrV& PdfV); 00016 00017 static void GetPdf(const TIntPrV& CdfV, TIntPrV& PdfV); 00018 static void GetPdf(const TFltPrV& CdfV, TFltPrV& PdfV); 00019 static void GetPdf(const TIntFltKdV& CdfV, TIntFltKdV& PdfV); 00020 00021 static void Normalize(TFltPrV& PdfV); 00022 static void Normalize(TIntFltKdV& PdfV); 00023 00024 static void MakeExpBins(const TFltPrV& XYValV, TFltPrV& ExpXYValV, 00025 const double& BinFactor = 2, const double& MinYVal = 1); 00026 static void MakeExpBins(const TFltKdV& XYValV, TFltKdV& ExpXYValV, 00027 const double& BinFactor = 2, const double& MinYVal = 1); 00028 static void MakeExpBins(const TFltV& YValV, TFltV& ExpYValV, const double& BinFactor = 1.01); 00029 static void MakeExpBins(const TIntV& YValV, TIntV& ExpYValV, const double& BinFactor = 1.01); 00030 }; 00031 00032 //#////////////////////////////////////////////// 00034 class TStrUtil { 00035 public: 00036 static TChA& GetXmlTagVal(TXmlLx& XmlLx, const TChA& TagNm); 00037 static void GetXmlTagNmVal(TXmlLx& XmlLx, TChA& TagNm, TChA& TagVal); 00038 static bool GetXmlTagNmVal2(TXmlLx& XmlLx, TChA& TagNm, TChA& TagVal, const bool& TakeTagNms); 00039 static TChA GetDomNm(const TChA& UrlChA); // www.cs.cmu.edu 00040 static TChA GetDomNm2(const TChA& UrlChA); // also strip starting www. 00041 static TChA GetWebsiteNm(const TChA& UrlChA); // get website (GetDomNm2 or blog url) 00042 static bool GetNormalizedUrl(const TChA& UrlIn, const TChA& BaseUrl, TChA& UrlOut); 00043 static bool StripEnd(const TChA& Str, const TChA& SearchStr, TChA& NewStr); 00044 00045 static TChA GetShorStr(const TChA& LongStr, const int MaxLen=50); 00046 static TChA GetCleanStr(const TChA& ChA); 00047 static TChA GetCleanWrdStr(const TChA& ChA); 00048 static int CountWords(const char* CStr); 00049 static int CountWords(const TChA& ChA); 00050 static int CountWords(const TChA& ChA, const TStrHash<TInt>& StopWordH); 00051 static int SplitWords(TChA& ChA, TVec<char *>& WrdV, const bool& SplitOnWs=true); 00052 static int SplitOnCh(TChA& ChA, TVec<char *>& WrdV, const char& Ch, const bool& SkipEmpty=false); 00053 static int SplitLines(TChA& ChA, TVec<char *>& LineV, const bool& SkipEmpty=false); 00054 static int SplitSentences(TChA& ChA, TVec<char *>& SentenceV); 00055 static void RemoveHtmlTags(const TChA& HtmlStr, TChA& TextStr); 00056 static bool IsLatinStr(const TChA& Str, const double& MinAlFrac); 00057 static void GetWIdV(const TStrHash<TInt>& StrH, const char *CStr, TIntV& WIdV); 00058 static void GetAddWIdV(TStrHash<TInt>& StrH, const char *CStr, TIntV& WIdV); 00060 static bool GetTmFromStr(const char* TmStr, TSecTm& Tm); 00061 00063 static TStr GetStdName(TStr AuthorName); 00065 static void GetStdNameV(TStr AuthorNames, TStrV& StdNameV); 00066 }; 00067 00068 //#////////////////////////////////////////////// 00070 00071 #if defined(SW_WRITEN) 00072 00074 extern int WriteN(int fd, char *ptr, int nbytes); 00075 00078 template <class TVal, class TSizeTy> 00079 int SendVec(const TVec<TVal, TSizeTy>& V, int FileDesc) { 00080 int l = 0; 00081 int n; 00082 int r; 00083 TSizeTy Vals = V.Len(); 00084 int ChunkSize = 25600; 00085 00086 r = WriteN(FileDesc, (char *) &Vals, (int) sizeof(TSizeTy)); 00087 if (r < 0) { 00088 return r; 00089 } 00090 l += r; 00091 00092 r = WriteN(FileDesc, (char *) &Vals, (int) sizeof(TSizeTy)); 00093 if (r < 0) { 00094 return r; 00095 } 00096 l += r; 00097 00098 for (TSizeTy ValN = 0; ValN < Vals; ValN += ChunkSize) { 00099 n = ChunkSize; 00100 if ((Vals - ValN) < ChunkSize) { 00101 n = Vals - ValN; 00102 } 00103 r = WriteN(FileDesc, (char *) &V[ValN], (int) (n*sizeof(TVal))); 00104 if (r < 0) { 00105 return r; 00106 } 00107 l += r; 00108 } 00109 return l; 00110 } 00111 #endif 00112