SNAP Library, User Reference
2012-10-15 15:06:59
SNAP, a general purpose network analysis and graph mining library
|
Go to the source code of this file.
Typedefs | |
typedef TPair< TInt, PUrl > | TIdUrlPr |
typedef TQQueue< TIdUrlPr > | TIdUrlPrQ |
typedef THash< TInt, PUrl > | TIdToUrlH |
Enumerations | |
enum | TUrlScheme { usUndef, usHttp, usOther } |
Functions | |
void | GetAbs (const TStr &AbsUrlStr) |
void | GetAbsFromBase (const TStr &RelUrlStr, const TStr &BaseUrlStr) |
UndefDefaultCopyAssign (TUrl) | |
TUrl (const TStr &_RelUrlStr, const TStr &_BaseUrlStr=TStr()) | |
static PUrl | New (const TStr &RelUrlStr, const TStr &BaseUrlStr=TStr()) |
~TUrl () | |
TUrl (TSIn &) | |
static PUrl | Load (TSIn &) |
void | Save (TSOut &) |
bool | IsOk (const TUrlScheme _Scheme=usUndef) const |
TUrlScheme | GetScheme () |
TStr | GetUrlStr () const |
TStr | GetRelUrlStr () const |
bool | IsBaseUrl () |
TStr | GetBaseUrlStr () const |
TStr | GetSchemeNm () const |
TStr | GetHostNm () const |
TStr | GetDmNm (const int &MxDmSegs=-1) const |
bool | IsPortOk () const |
TStr | GetPortStr () const |
int | GetPortN () const |
TStr | GetPathStr () const |
int | GetPathSegs () const |
TStr | GetPathSeg (const int &PathSegN) const |
TStr | GetSearchStr () const |
TStr | GetFragIdStr () const |
bool | IsIpNum () const |
void | PutIpNum (const TStr &_IpNum) |
TStr | GetIpNum () const |
TStr | GetIpNumOrHostNm () const |
bool | IsDefFinalUrl () const |
TStr | GetFinalUrlStr () const |
TStr | GetAsFinalUrlStr () const |
TStr | GetFinalHostNm () const |
TStr | GetAsFinalHostNm () const |
void | DefUrlAsFinal () |
void | DefFinalUrl (const TStr &_FinalHostNm) |
void | PutHttpRqStr (const TStr &_HttpRqStr) |
TStr | GetHttpRqStr () const |
bool | IsHttpRqStr () const |
void | ChangeHttpRqStr (const TStr &SrcStr, const TStr &DstStr) |
bool | IsInHost (const TStr &_HostNm) const |
bool | IsInPath (const TStr &_PathStr) const |
void | ToLcPath () |
static bool | IsAbs (const TStr &UrlStr) |
static bool | IsScript (const TStr &UrlStr) |
static bool | IsSite (const TStr &UrlStr) |
static PUrl | GetUrlFromShortcut (const TStr &ShortcutUrlStr, const TStr &DfHostNmPrefix, const TStr &DfHostNmSufix) |
static TStr | GetUrlSearchStr (const TStr &Str) |
static TStr | DecodeUrlStr (const TStr &UrlStr) |
static TStr | GetDocStrFromUrlStr (const TStr &UrlStr, const int &Copies=1) |
static TStr | GetTopDownDocNm (const TStr &UrlStr, const int &MxLen=-1, const bool &HostOnlyP=false) |
TUrlEnv () | |
TUrlEnv (const TUrlEnv &UrlEnv) | |
static PUrlEnv | New () |
static PUrlEnv | New (const TStr &BaseUrlStr, const TStr &KeyNm1=TStr(), const TStr &ValStr1=TStr(), const TStr &KeyNm2=TStr(), const TStr &ValStr2=TStr(), const TStr &KeyNm3=TStr(), const TStr &ValStr3=TStr(), const TStr &KeyNm4=TStr(), const TStr &ValStr4=TStr()) |
~TUrlEnv () | |
TUrlEnv (TSIn &SIn) | |
TUrlEnv & | operator= (const TUrlEnv &Env) |
void | PutBaseUrlStr (const TStr &_BaseUrlStr) |
void | AddKeyVal (const TStr &KeyNm, const TStr &ValStr) |
void | AddToKeyVal (const TStr &KeyNm, const TStr &ValStr) |
bool | Empty () const |
int | GetKeys () const |
bool | IsKey (const TStr &KeyNm) const |
int | GetKeyN (const TStr &KeyNm) const |
TStr | GetKeyNm (const int &KeyN) const |
int | GetVals (const int &KeyN) const |
int | GetVals (const TStr &KeyNm) const |
TStr | GetVal (const int &KeyN, const int &ValN=0) const |
TStr | GetVal (const TStr &KeyNm, const int &ValN=0, const TStr &DfVal="") const |
TStr | GetFullUrlStr () const |
static PUrlEnv | MkClone (const PUrlEnv &UrlEnv) |
Variables | |
static ClassTPV(TUrl, PUrl, TUrlV) private const TStr | UrlHttpAbsPrefixStr = "http://" |
TUrlScheme | Scheme |
TStr | UrlStr |
TStr | RelUrlStr |
TStr | BaseUrlStr |
TStr | SchemeNm |
TStr | HostNm |
TStr | PortStr |
TStr | PathStr |
TStr | SearchStr |
TStr | FragIdStr |
int | PortN |
TStrV | PathSegV |
TStr | IpNum |
TStr | FinalUrlStr |
TStr | FinalHostNm |
TStr | HttpRqStr |
ClassTP(TUrlEnv, PUrlEnv) private TStrV | KeyNmV |
TStrStrVH | KeyNmToValH |
enum TUrlScheme |
Definition at line 137 of file url.h.
{ if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);} KeyNmToValH.GetDat(KeyNm).Clr(); KeyNmToValH.GetDat(KeyNm).Add(ValStr);}
void AddToKeyVal | ( | const TStr & | KeyNm, |
const TStr & | ValStr | ||
) |
Definition at line 141 of file url.h.
{ if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);} KeyNmToValH.GetDat(KeyNm).Add(ValStr);}
void ChangeHttpRqStr | ( | const TStr & | SrcStr, |
const TStr & | DstStr | ||
) |
TStr TUrl::DecodeUrlStr | ( | const TStr & | UrlStr | ) | [static] |
Definition at line 386 of file url.cpp.
{ TChA InChA=UrlStr; TChA OutChA; for (int ChN=0; ChN<InChA.Len(); ChN++){ char Ch=InChA[ChN]; if (Ch=='+'){ OutChA+=' '; } else if (Ch=='%') { ChN++; if (ChN==InChA.Len()) { break; } char FirstCh = InChA[ChN]; if (!TCh::IsHex(FirstCh)) { break; } ChN++; if (ChN==InChA.Len()) { break; } char SecondCh = InChA[ChN]; if (!TCh::IsHex(SecondCh)) { break; } OutChA+=char(TCh::GetHex(FirstCh)*16 + TCh::GetHex(SecondCh)); } else { OutChA+=Ch; } } return OutChA; }
void TUrl::DefFinalUrl | ( | const TStr & | _FinalHostNm | ) |
Definition at line 284 of file url.cpp.
{ EAssert(IsOk(usHttp)); EAssert(!IsDefFinalUrl()); FinalHostNm=_FinalHostNm.GetLc(); if (HostNm==FinalHostNm){ FinalUrlStr=UrlStr; } else { TChA FinalUrlChA; FinalUrlChA+=SchemeNm; FinalUrlChA+="://"; FinalUrlChA+=FinalHostNm; if (!PortStr.Empty()){ FinalUrlChA+=":"; FinalUrlChA+=PortStr;} FinalUrlChA+=PathStr; FinalUrlChA+=SearchStr; FinalUrlStr=FinalUrlChA; } }
void DefUrlAsFinal | ( | ) |
Definition at line 65 of file url.h.
{ EAssert(IsOk(usHttp)); EAssert(!IsDefFinalUrl()); FinalUrlStr=UrlStr; FinalHostNm=HostNm;}
void TUrl::GetAbs | ( | const TStr & | AbsUrlStr | ) |
Definition at line 154 of file url.cpp.
{ EAssertR(IsAbs(AbsUrlStr), AbsUrlStr); TUrlLx Lx(AbsUrlStr); TChA Str; Str+=SchemeNm=Lx.GetScheme(); Str+=Lx.GetCh(':'); if (SchemeNm=="http"){ Scheme=usHttp; const char *DbSlashStr="//"; Str+=Lx.GetStr(DbSlashStr); Str+=Lx.GetHostPort(HostNm, PortStr, PortN); if (PortN==-1){PortN=THttp::DfPortN; PortStr.Clr();} else if (PortN==THttp::DfPortN){PortStr.Clr();} //**if (!PortStr.Empty()){Str+=':'; Str+=PortStr;} if (Lx.PeekCh()=='/'){ PathStr=Lx.GetCh('/'); PathStr+=Lx.GetHPath(PathSegV); Str+=PathStr;} if (PathStr.Empty()){PathStr="/"; Str+=PathStr;} if (Lx.PeekCh()=='?'){ SearchStr=Lx.GetCh('?'); SearchStr+=Lx.GetSearch(); Str+=SearchStr;} } else { Scheme=usOther; Str+=Lx.GetToCh(); } while (Lx.PeekCh()==' '){Lx.GetCh();} if (Lx.PeekCh()=='#'){ FragIdStr=Lx.GetCh('#'); FragIdStr+=Lx.GetToCh(); } EAssertR(Lx.Eof(), ""); UrlStr=Str; }
void TUrl::GetAbsFromBase | ( | const TStr & | RelUrlStr, |
const TStr & | BaseUrlStr | ||
) |
Definition at line 182 of file url.cpp.
{ EAssertR(!BaseUrlStr.Empty(), ""); PUrl Url=TUrl::New(BaseUrlStr); EAssertR(Url->IsOk(), ""); EAssertR(IsAbs(BaseUrlStr), ""); TStr AbsUrlStr=BaseUrlStr; TStr NrRelUrlStr=RelUrlStr; if (NrRelUrlStr.GetLc().IsPrefix(UrlHttpPrefixStr)){ NrRelUrlStr.DelSubStr(0, UrlHttpPrefixStr.Len()-1);} if (NrRelUrlStr.Len()>0){ if (NrRelUrlStr[0]=='/'){ TStr SlashStr; int SlashChN=0; while ((SlashChN<NrRelUrlStr.Len())&&(NrRelUrlStr[SlashChN]=='/')){ SlashChN++; SlashStr+="/";} int ChN=0; bool Found=false; while ((!Found)&&((ChN=AbsUrlStr.SearchStr(SlashStr, ChN))!=-1)){ TStr Str=AbsUrlStr.GetSubStr(ChN-1, ChN+SlashStr.Len()-1+1); Found=((ChN==0)||(Str[0]!='/'))&& ((ChN+SlashStr.Len()-1==AbsUrlStr.Len()-1)||(Str[Str.Len()-1]!='/')); if (!Found){ChN++;} } if (Found){ AbsUrlStr.DelSubStr(ChN, AbsUrlStr.Len()-1); AbsUrlStr+=NrRelUrlStr; } } else { int ChN=AbsUrlStr.Len()-1; while ((ChN>=0)&&(AbsUrlStr[ChN]!='/')){ChN--;} AbsUrlStr.DelSubStr(ChN+1, AbsUrlStr.Len()-1); AbsUrlStr+=NrRelUrlStr; } } const char *PrevDirStr="/../"; {int ChN; while ((ChN=AbsUrlStr.SearchStr(PrevDirStr))!=-1){ int BChN=ChN; int EChN=ChN+(int) strlen(PrevDirStr)-1; while ((BChN-1>=0)&&(AbsUrlStr[BChN-1]!='/')){BChN--;} AbsUrlStr.DelSubStr(BChN, EChN); }} const char *CurDirStr="/."; while (AbsUrlStr.DelStr(CurDirStr)){} GetAbs(AbsUrlStr); }
TStr GetAsFinalHostNm | ( | ) | const |
Definition at line 63 of file url.h.
{ if (IsDefFinalUrl()){return FinalHostNm;} else {return HostNm;}}
TStr GetAsFinalUrlStr | ( | ) | const |
Definition at line 59 of file url.h.
{ if (IsDefFinalUrl()){return FinalUrlStr;} else {return UrlStr;}}
TStr GetBaseUrlStr | ( | ) | const |
Definition at line 37 of file url.h.
{return BaseUrlStr;}
TStr TUrl::GetDmNm | ( | const int & | MxDmSegs = -1 | ) | const |
TStr TUrl::GetDocStrFromUrlStr | ( | const TStr & | UrlStr, |
const int & | Copies = 1 |
||
) | [static] |
Definition at line 407 of file url.cpp.
{ TStrV StrV; UrlStr.SplitOnNonAlNum(StrV); TChA DocChA; for (int StrN=0; StrN<StrV.Len(); StrN++){ TStr UcStr=StrV[StrN].GetUc(); if ((UcStr.Len()>3)&&(UcStr!="HTTP")&&(UcStr!="HTML")&&(UcStr!="INDEX")&&(UcStr!="DEFAULT")){ for (int CopyN=0; CopyN<Copies; CopyN++){ if (!DocChA.Empty()){DocChA+=' ';} DocChA+=StrV[StrN]; } } } return DocChA; }
TStr GetFinalHostNm | ( | ) | const |
Definition at line 61 of file url.h.
{ EAssert(IsDefFinalUrl()); return FinalHostNm;}
TStr GetFinalUrlStr | ( | ) | const |
Definition at line 57 of file url.h.
{ EAssert(IsDefFinalUrl()); return FinalUrlStr;}
TStr GetFragIdStr | ( | ) | const |
TStr TUrlEnv::GetFullUrlStr | ( | ) | const |
Definition at line 445 of file url.cpp.
{ if (GetKeys()==0){return TStr();} TChA SearchChA; SearchChA+=BaseUrlStr; SearchChA+="?"; int KeyVals=0; for (int KeyN=0; KeyN<GetKeys(); KeyN++){ TStr KeyNm=GetKeyNm(KeyN); TStrV ValStrV=KeyNmToValH.GetDat(KeyNm); for (int ValStrN=0; ValStrN<ValStrV.Len(); ValStrN++){ if (KeyVals>0){SearchChA+="&";} SearchChA+=TUrl::GetUrlSearchStr(KeyNm); SearchChA+='='; SearchChA+=TUrl::GetUrlSearchStr(ValStrV[ValStrN]); KeyVals++; } } return SearchChA; }
TStr GetHttpRqStr | ( | ) | const |
TStr GetIpNumOrHostNm | ( | ) | const |
Definition at line 149 of file url.h.
{return KeyNmV.SearchForw(KeyNm);}
TStr GetPathSeg | ( | const int & | PathSegN | ) | const |
int GetPathSegs | ( | ) | const |
int GetPortN | ( | ) | const |
TStr GetRelUrlStr | ( | ) | const |
TUrlScheme GetScheme | ( | ) |
TStr GetSearchStr | ( | ) | const |
TStr TUrl::GetTopDownDocNm | ( | const TStr & | UrlStr, |
const int & | MxLen = -1 , |
||
const bool & | HostOnlyP = false |
||
) | [static] |
Definition at line 421 of file url.cpp.
{ PUrl Url=TUrl::New(UrlStr); TChA DocNm; if (Url->IsOk()){ TStr HostNm=Url->GetHostNm().GetLc(); TStrV HostNmSegV; HostNm.SplitOnAllCh('.', HostNmSegV, false); for (int HostNmSegN=0; HostNmSegN<HostNmSegV.Len(); HostNmSegN++){ if (HostNmSegN>0){DocNm+='.';} DocNm+=HostNmSegV[HostNmSegV.Len()-HostNmSegN-1]; } if (!HostOnlyP){ DocNm+=Url->GetPathStr().GetLc(); } } else { DocNm=UrlStr.GetLc(); } if (MxLen!=-1){ DocNm.Trunc(MxLen);} return DocNm; }
PUrl TUrl::GetUrlFromShortcut | ( | const TStr & | ShortcutUrlStr, |
const TStr & | DfHostNmPrefix, | ||
const TStr & | DfHostNmSufix | ||
) | [static] |
Definition at line 343 of file url.cpp.
{ // shortcut is already correct url TStr UrlStr=ShortcutUrlStr; PUrl Url=TUrl::New(UrlStr); if (Url->IsOk()){return Url;} // add 'http://' to shortcut (if shortcut is from more segments) if (ShortcutUrlStr.IsChIn('.')){ UrlStr=TUrl::UrlHttpAbsPrefixStr+ShortcutUrlStr; Url=TUrl::New(UrlStr); if (Url->IsOk()){return Url;} } // add 'http://' and '/' to shortcut (if shortcut is from more segments) if (ShortcutUrlStr.IsChIn('.')){ UrlStr=TUrl::UrlHttpAbsPrefixStr+ShortcutUrlStr+"/"; Url=TUrl::New(UrlStr); if (Url->IsOk()){return Url;} } // add 'http://', prefix, postfix and '/' to shortcut UrlStr=UrlHttpAbsPrefixStr+ DfHostNmPrefix+"."+ShortcutUrlStr+"."+DfHostNmSufix+"/"; Url=TUrl::New(UrlStr); return Url; }
TStr TUrl::GetUrlSearchStr | ( | const TStr & | Str | ) | [static] |
Definition at line 368 of file url.cpp.
{ TChA InChA=Str; TChA OutChA; for (int ChN=0; ChN<InChA.Len(); ChN++){ char Ch=InChA[ChN]; if (Ch==' '){ OutChA+='+'; } else if ((' '<Ch)&&(Ch<='~')&&(Ch!='+')&&(Ch!='&')&&(Ch!='%')){ OutChA+=Ch; } else { OutChA+='%'; OutChA+=TInt::GetHexStr(uchar(Ch)/16); OutChA+=TInt::GetHexStr(uchar(Ch)%16); } } return OutChA; }
Definition at line 157 of file url.h.
{ return KeyNmToValH.GetDat(KeyNmV[KeyN])[ValN];}
Definition at line 159 of file url.h.
{ if (KeyNmToValH.IsKey(KeyNm)){ return KeyNmToValH.GetDat(KeyNm)[ValN];} else {return DfVal;}}
int GetVals | ( | const int & | KeyN | ) | const |
Definition at line 153 of file url.h.
{ return KeyNmToValH.GetDat(KeyNmV[KeyN]).Len();}
Definition at line 155 of file url.h.
{ return KeyNmToValH.GetDat(KeyNm).Len();}
bool TUrl::IsAbs | ( | const TStr & | UrlStr | ) | [static] |
bool IsBaseUrl | ( | ) |
Definition at line 36 of file url.h.
{return !BaseUrlStr.Empty();}
bool IsDefFinalUrl | ( | ) | const |
bool IsHttpRqStr | ( | ) | const |
Definition at line 148 of file url.h.
{return KeyNmV.SearchForw(KeyNm)!=-1;}
bool IsOk | ( | const TUrlScheme | _Scheme = usUndef | ) | const |
bool IsPortOk | ( | ) | const |
bool TUrl::IsScript | ( | const TStr & | UrlStr | ) | [static] |
bool TUrl::IsSite | ( | const TStr & | UrlStr | ) | [static] |
static PUrlEnv MkClone | ( | const PUrlEnv & | UrlEnv | ) | [static] |
static PUrlEnv New | ( | const TStr & | BaseUrlStr, |
const TStr & | KeyNm1 = TStr() , |
||
const TStr & | ValStr1 = TStr() , |
||
const TStr & | KeyNm2 = TStr() , |
||
const TStr & | ValStr2 = TStr() , |
||
const TStr & | KeyNm3 = TStr() , |
||
const TStr & | ValStr3 = TStr() , |
||
const TStr & | KeyNm4 = TStr() , |
||
const TStr & | ValStr4 = TStr() |
||
) | [static] |
Definition at line 111 of file url.h.
{ PUrlEnv UrlEnv=New(); UrlEnv->PutBaseUrlStr(BaseUrlStr); if (!KeyNm1.Empty()){UrlEnv->AddKeyVal(KeyNm1, ValStr1);} if (!KeyNm2.Empty()){UrlEnv->AddKeyVal(KeyNm2, ValStr2);} if (!KeyNm3.Empty()){UrlEnv->AddKeyVal(KeyNm3, ValStr3);} if (!KeyNm4.Empty()){UrlEnv->AddKeyVal(KeyNm4, ValStr4);} return UrlEnv;}
void PutBaseUrlStr | ( | const TStr & | _BaseUrlStr | ) |
Definition at line 133 of file url.h.
{BaseUrlStr=_BaseUrlStr;}
void PutHttpRqStr | ( | const TStr & | _HttpRqStr | ) |
void TUrl::ToLcPath | ( | ) |
Definition at line 302 of file url.cpp.
{ // test if the conversion is needed if (!PathStr.IsLc()){ // convert path strings to lower-case PathStr.ToLc(); for (int PathSegN=0; PathSegN<PathSegV.Len(); PathSegN++){ PathSegV[PathSegN].ToLc();} // recompose url TChA UrlChA; UrlChA+=SchemeNm; UrlChA+="://"; UrlChA+=HostNm; if (!PortStr.Empty()){ UrlChA+=":"; UrlChA+=PortStr;} UrlChA+=PathStr; UrlChA+=SearchStr; UrlStr=UrlChA; // recompose final-url if (IsDefFinalUrl()){ FinalUrlStr.Clr(); DefFinalUrl(FinalHostNm);} } }
TUrl::TUrl | ( | const TStr & | _RelUrlStr, |
const TStr & | _BaseUrlStr = TStr() |
||
) |
Definition at line 228 of file url.cpp.
: Scheme(usUndef), UrlStr(), RelUrlStr(_RelUrlStr), BaseUrlStr(_BaseUrlStr), SchemeNm(), HostNm(), PortStr(), PathStr(), SearchStr(), FragIdStr(), PortN(-1), PathSegV(), IpNum(), FinalUrlStr(), FinalHostNm(), HttpRqStr(){ RelUrlStr.ToTrunc(); RelUrlStr.ChangeStrAll(" ", "%20"); try { if (IsAbs(RelUrlStr)){ GetAbs(RelUrlStr); } else if (IsAbs(BaseUrlStr)){ GetAbsFromBase(RelUrlStr, BaseUrlStr); } else { Scheme=usUndef; } } catch (PExcept&){Scheme=usUndef;} //** old version /* PUrl BaseUrl; if (!BaseUrlStr.Empty()){ // must be outside try-block (CBuilder3.0 bug) BaseUrl=TUrl::New(BaseUrlStr);} try { if (!BaseUrlStr.Empty()){ EAssertR(BaseUrl->IsOk(), "");} if (IsAbs(RelUrlStr)){ GetAbs(RelUrlStr); } else { GetAbsFromBase(RelUrlStr, BaseUrlStr); } } catch (PExcept&){Scheme=usUndef;} */ }
TUrlEnv | ( | ) |
Definition at line 106 of file url.h.
: KeyNmV(), KeyNmToValH(10){}
const TStr TUrl::UrlHttpAbsPrefixStr = "http://" [static] |