SNAP Library, Developer Reference
2012-10-02 12:56:23
SNAP, a general purpose network analysis and graph mining library
|
00001 00002 // Url 00003 typedef enum {usUndef, usHttp, usOther} TUrlScheme; 00004 00005 ClassTPV(TUrl, PUrl, TUrlV)//{ 00006 private: 00007 static const TStr UrlHttpPrefixStr; 00008 static const TStr UrlHttpAbsPrefixStr; 00009 TUrlScheme Scheme; 00010 TStr UrlStr, RelUrlStr, BaseUrlStr; 00011 TStr SchemeNm, HostNm; 00012 TStr PortStr, PathStr, SearchStr, FragIdStr; 00013 int PortN; 00014 TStrV PathSegV; 00015 TStr IpNum; 00016 TStr FinalUrlStr, FinalHostNm; 00017 TStr HttpRqStr; 00018 void GetAbs(const TStr& AbsUrlStr); 00019 void GetAbsFromBase(const TStr& RelUrlStr, const TStr& BaseUrlStr); 00020 UndefDefaultCopyAssign(TUrl); 00021 public: 00022 TUrl(const TStr& _RelUrlStr, const TStr& _BaseUrlStr=TStr()); 00023 static PUrl New(const TStr& RelUrlStr, const TStr& BaseUrlStr=TStr()){ 00024 return PUrl(new TUrl(RelUrlStr, BaseUrlStr));} 00025 ~TUrl(){} 00026 TUrl(TSIn&){Fail;} 00027 static PUrl Load(TSIn&){Fail; return NULL;} 00028 void Save(TSOut&){Fail;} 00029 00030 bool IsOk(const TUrlScheme _Scheme=usUndef) const { 00031 if (_Scheme==usUndef){return Scheme!=usUndef;} 00032 else {return Scheme==_Scheme;}} 00033 TUrlScheme GetScheme(){return Scheme;} 00034 TStr GetUrlStr() const {return UrlStr;} 00035 TStr GetRelUrlStr() const {return RelUrlStr;} 00036 bool IsBaseUrl(){return !BaseUrlStr.Empty();} 00037 TStr GetBaseUrlStr() const {return BaseUrlStr;} 00038 TStr GetSchemeNm() const {EAssert(IsOk()); return SchemeNm;} 00039 TStr GetHostNm() const {EAssert(IsOk()); return HostNm;} 00040 TStr GetDmNm(const int& MxDmSegs=-1) const; 00041 bool IsPortOk() const { EAssert(IsOk()); return (PortN > 0); } 00042 TStr GetPortStr() const {EAssert(IsOk()); return PortStr;} 00043 int GetPortN() const {EAssert(IsOk()&&(PortN!=-1)); return PortN;} 00044 TStr GetPathStr() const {EAssert(IsOk()); return PathStr;} 00045 int GetPathSegs() const {return PathSegV.Len();} 00046 TStr GetPathSeg(const int& PathSegN) const {return PathSegV[PathSegN];} 00047 TStr GetSearchStr() const {EAssert(IsOk()); return SearchStr;} 00048 TStr GetFragIdStr() const {EAssert(IsOk()); return FragIdStr;} 00049 00050 bool IsIpNum() const {return !IpNum.Empty();} 00051 void PutIpNum(const TStr& _IpNum){IpNum=_IpNum;} 00052 TStr GetIpNum() const {EAssert(IsIpNum()); return IpNum;} 00053 TStr GetIpNumOrHostNm() const {return IsIpNum() ? GetIpNum() : GetHostNm();} 00054 00055 bool IsDefFinalUrl() const { 00056 EAssert(IsOk(usHttp)); return !FinalUrlStr.Empty();} 00057 TStr GetFinalUrlStr() const { 00058 EAssert(IsDefFinalUrl()); return FinalUrlStr;} 00059 TStr GetAsFinalUrlStr() const { 00060 if (IsDefFinalUrl()){return FinalUrlStr;} else {return UrlStr;}} 00061 TStr GetFinalHostNm() const { 00062 EAssert(IsDefFinalUrl()); return FinalHostNm;} 00063 TStr GetAsFinalHostNm() const { 00064 if (IsDefFinalUrl()){return FinalHostNm;} else {return HostNm;}} 00065 void DefUrlAsFinal(){ 00066 EAssert(IsOk(usHttp)); EAssert(!IsDefFinalUrl()); 00067 FinalUrlStr=UrlStr; FinalHostNm=HostNm;} 00068 void DefFinalUrl(const TStr& _FinalHostNm); 00069 00070 void PutHttpRqStr(const TStr& _HttpRqStr){HttpRqStr=_HttpRqStr;} 00071 TStr GetHttpRqStr() const {return HttpRqStr;} 00072 bool IsHttpRqStr() const {return !HttpRqStr.Empty();} 00073 void ChangeHttpRqStr(const TStr& SrcStr, const TStr& DstStr){ 00074 HttpRqStr.ChangeStr(SrcStr, DstStr);} 00075 00076 bool IsInHost(const TStr& _HostNm) const { 00077 EAssert(IsOk()); return HostNm.GetUc().IsSuffix(_HostNm.GetUc());} 00078 bool IsInPath(const TStr& _PathStr) const { 00079 EAssert(IsOk()); return PathStr.GetUc().IsPrefix(_PathStr.GetUc());} 00080 void ToLcPath(); 00081 00082 static bool IsAbs(const TStr& UrlStr); 00083 static bool IsScript(const TStr& UrlStr); 00084 static bool IsSite(const TStr& UrlStr); 00085 00086 static PUrl GetUrlFromShortcut(const TStr& ShortcutUrlStr, 00087 const TStr& DfHostNmPrefix, const TStr& DfHostNmSufix); 00088 static TStr GetUrlSearchStr(const TStr& Str); 00089 static TStr DecodeUrlStr(const TStr& UrlStr); 00090 static TStr GetDocStrFromUrlStr(const TStr& UrlStr, const int& Copies=1); 00091 static TStr GetTopDownDocNm( 00092 const TStr& UrlStr, const int& MxLen=-1, const bool& HostOnlyP=false); 00093 }; 00094 typedef TPair<TInt, PUrl> TIdUrlPr; 00095 typedef TQQueue<TIdUrlPr> TIdUrlPrQ; 00096 typedef THash<TInt, PUrl> TIdToUrlH; 00097 00099 // Url-Environment 00100 ClassTP(TUrlEnv, PUrlEnv)//{ 00101 private: 00102 TStr BaseUrlStr; 00103 TStrV KeyNmV; 00104 TStrStrVH KeyNmToValH; 00105 public: 00106 TUrlEnv(): 00107 KeyNmV(), KeyNmToValH(10){} 00108 TUrlEnv(const TUrlEnv& UrlEnv): 00109 KeyNmV(UrlEnv.KeyNmV), KeyNmToValH(UrlEnv.KeyNmToValH){} 00110 static PUrlEnv New(){return new TUrlEnv();} 00111 static PUrlEnv New(const TStr& BaseUrlStr, 00112 const TStr& KeyNm1=TStr(), const TStr& ValStr1=TStr(), 00113 const TStr& KeyNm2=TStr(), const TStr& ValStr2=TStr(), 00114 const TStr& KeyNm3=TStr(), const TStr& ValStr3=TStr(), 00115 const TStr& KeyNm4=TStr(), const TStr& ValStr4=TStr()){ 00116 PUrlEnv UrlEnv=New(); 00117 UrlEnv->PutBaseUrlStr(BaseUrlStr); 00118 if (!KeyNm1.Empty()){UrlEnv->AddKeyVal(KeyNm1, ValStr1);} 00119 if (!KeyNm2.Empty()){UrlEnv->AddKeyVal(KeyNm2, ValStr2);} 00120 if (!KeyNm3.Empty()){UrlEnv->AddKeyVal(KeyNm3, ValStr3);} 00121 if (!KeyNm4.Empty()){UrlEnv->AddKeyVal(KeyNm4, ValStr4);} 00122 return UrlEnv;} 00123 ~TUrlEnv(){} 00124 TUrlEnv(TSIn& SIn): KeyNmV(SIn), KeyNmToValH(SIn){} 00125 static PUrlEnv Load(TSIn& SIn){return new TUrlEnv(SIn);} 00126 void Save(TSOut& SOut){KeyNmV.Save(SOut); KeyNmToValH.Save(SOut);} 00127 00128 TUrlEnv& operator=(const TUrlEnv& Env){ 00129 if (this!=&Env){KeyNmV=Env.KeyNmV; KeyNmToValH=Env.KeyNmToValH;} 00130 return *this;} 00131 00132 // base url 00133 void PutBaseUrlStr(const TStr& _BaseUrlStr){BaseUrlStr=_BaseUrlStr;} 00134 TStr GetBaseUrlStr() const {return BaseUrlStr;} 00135 00136 // adding key-value 00137 void AddKeyVal(const TStr& KeyNm, const TStr& ValStr){ 00138 if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);} 00139 KeyNmToValH.GetDat(KeyNm).Clr(); 00140 KeyNmToValH.GetDat(KeyNm).Add(ValStr);} 00141 void AddToKeyVal(const TStr& KeyNm, const TStr& ValStr){ 00142 if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);} 00143 KeyNmToValH.GetDat(KeyNm).Add(ValStr);} 00144 00145 // key retrieval 00146 bool Empty() const {return KeyNmV.Empty();} 00147 int GetKeys() const {return KeyNmV.Len();} 00148 bool IsKey(const TStr& KeyNm) const {return KeyNmV.SearchForw(KeyNm)!=-1;} 00149 int GetKeyN(const TStr& KeyNm) const {return KeyNmV.SearchForw(KeyNm);} 00150 TStr GetKeyNm(const int& KeyN) const {return KeyNmV[KeyN];} 00151 00152 // value retrieval 00153 int GetVals(const int& KeyN) const { 00154 return KeyNmToValH.GetDat(KeyNmV[KeyN]).Len();} 00155 int GetVals(const TStr& KeyNm) const { 00156 return KeyNmToValH.GetDat(KeyNm).Len();} 00157 TStr GetVal(const int& KeyN, const int& ValN=0) const { 00158 return KeyNmToValH.GetDat(KeyNmV[KeyN])[ValN];} 00159 TStr GetVal(const TStr& KeyNm, const int& ValN=0, const TStr& DfVal="") const { 00160 if (KeyNmToValH.IsKey(KeyNm)){ 00161 return KeyNmToValH.GetDat(KeyNm)[ValN];} 00162 else {return DfVal;}} 00163 00164 // full-url-string 00165 TStr GetFullUrlStr() const; 00166 00167 static PUrlEnv MkClone(const PUrlEnv& UrlEnv); 00168 }; 00169