|
SNAP Library, User Reference
2012-10-15 15:06:59
SNAP, a general purpose network analysis and graph mining library
|
Go to the source code of this file.
Typedefs | |
| typedef TPair< TInt, PUrl > | TIdUrlPr |
| typedef TQQueue< TIdUrlPr > | TIdUrlPrQ |
| typedef THash< TInt, PUrl > | TIdToUrlH |
Enumerations | |
| enum | TUrlScheme { usUndef, usHttp, usOther } |
Functions | |
| void | GetAbs (const TStr &AbsUrlStr) |
| void | GetAbsFromBase (const TStr &RelUrlStr, const TStr &BaseUrlStr) |
| UndefDefaultCopyAssign (TUrl) | |
| TUrl (const TStr &_RelUrlStr, const TStr &_BaseUrlStr=TStr()) | |
| static PUrl | New (const TStr &RelUrlStr, const TStr &BaseUrlStr=TStr()) |
| ~TUrl () | |
| TUrl (TSIn &) | |
| static PUrl | Load (TSIn &) |
| void | Save (TSOut &) |
| bool | IsOk (const TUrlScheme _Scheme=usUndef) const |
| TUrlScheme | GetScheme () |
| TStr | GetUrlStr () const |
| TStr | GetRelUrlStr () const |
| bool | IsBaseUrl () |
| TStr | GetBaseUrlStr () const |
| TStr | GetSchemeNm () const |
| TStr | GetHostNm () const |
| TStr | GetDmNm (const int &MxDmSegs=-1) const |
| bool | IsPortOk () const |
| TStr | GetPortStr () const |
| int | GetPortN () const |
| TStr | GetPathStr () const |
| int | GetPathSegs () const |
| TStr | GetPathSeg (const int &PathSegN) const |
| TStr | GetSearchStr () const |
| TStr | GetFragIdStr () const |
| bool | IsIpNum () const |
| void | PutIpNum (const TStr &_IpNum) |
| TStr | GetIpNum () const |
| TStr | GetIpNumOrHostNm () const |
| bool | IsDefFinalUrl () const |
| TStr | GetFinalUrlStr () const |
| TStr | GetAsFinalUrlStr () const |
| TStr | GetFinalHostNm () const |
| TStr | GetAsFinalHostNm () const |
| void | DefUrlAsFinal () |
| void | DefFinalUrl (const TStr &_FinalHostNm) |
| void | PutHttpRqStr (const TStr &_HttpRqStr) |
| TStr | GetHttpRqStr () const |
| bool | IsHttpRqStr () const |
| void | ChangeHttpRqStr (const TStr &SrcStr, const TStr &DstStr) |
| bool | IsInHost (const TStr &_HostNm) const |
| bool | IsInPath (const TStr &_PathStr) const |
| void | ToLcPath () |
| static bool | IsAbs (const TStr &UrlStr) |
| static bool | IsScript (const TStr &UrlStr) |
| static bool | IsSite (const TStr &UrlStr) |
| static PUrl | GetUrlFromShortcut (const TStr &ShortcutUrlStr, const TStr &DfHostNmPrefix, const TStr &DfHostNmSufix) |
| static TStr | GetUrlSearchStr (const TStr &Str) |
| static TStr | DecodeUrlStr (const TStr &UrlStr) |
| static TStr | GetDocStrFromUrlStr (const TStr &UrlStr, const int &Copies=1) |
| static TStr | GetTopDownDocNm (const TStr &UrlStr, const int &MxLen=-1, const bool &HostOnlyP=false) |
| TUrlEnv () | |
| TUrlEnv (const TUrlEnv &UrlEnv) | |
| static PUrlEnv | New () |
| static PUrlEnv | New (const TStr &BaseUrlStr, const TStr &KeyNm1=TStr(), const TStr &ValStr1=TStr(), const TStr &KeyNm2=TStr(), const TStr &ValStr2=TStr(), const TStr &KeyNm3=TStr(), const TStr &ValStr3=TStr(), const TStr &KeyNm4=TStr(), const TStr &ValStr4=TStr()) |
| ~TUrlEnv () | |
| TUrlEnv (TSIn &SIn) | |
| TUrlEnv & | operator= (const TUrlEnv &Env) |
| void | PutBaseUrlStr (const TStr &_BaseUrlStr) |
| void | AddKeyVal (const TStr &KeyNm, const TStr &ValStr) |
| void | AddToKeyVal (const TStr &KeyNm, const TStr &ValStr) |
| bool | Empty () const |
| int | GetKeys () const |
| bool | IsKey (const TStr &KeyNm) const |
| int | GetKeyN (const TStr &KeyNm) const |
| TStr | GetKeyNm (const int &KeyN) const |
| int | GetVals (const int &KeyN) const |
| int | GetVals (const TStr &KeyNm) const |
| TStr | GetVal (const int &KeyN, const int &ValN=0) const |
| TStr | GetVal (const TStr &KeyNm, const int &ValN=0, const TStr &DfVal="") const |
| TStr | GetFullUrlStr () const |
| static PUrlEnv | MkClone (const PUrlEnv &UrlEnv) |
Variables | |
| static ClassTPV(TUrl, PUrl, TUrlV) private const TStr | UrlHttpAbsPrefixStr = "http://" |
| TUrlScheme | Scheme |
| TStr | UrlStr |
| TStr | RelUrlStr |
| TStr | BaseUrlStr |
| TStr | SchemeNm |
| TStr | HostNm |
| TStr | PortStr |
| TStr | PathStr |
| TStr | SearchStr |
| TStr | FragIdStr |
| int | PortN |
| TStrV | PathSegV |
| TStr | IpNum |
| TStr | FinalUrlStr |
| TStr | FinalHostNm |
| TStr | HttpRqStr |
| ClassTP(TUrlEnv, PUrlEnv) private TStrV | KeyNmV |
| TStrStrVH | KeyNmToValH |
| enum TUrlScheme |
Definition at line 137 of file url.h.
{
if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);}
KeyNmToValH.GetDat(KeyNm).Clr();
KeyNmToValH.GetDat(KeyNm).Add(ValStr);}
| void AddToKeyVal | ( | const TStr & | KeyNm, |
| const TStr & | ValStr | ||
| ) |
Definition at line 141 of file url.h.
{
if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);}
KeyNmToValH.GetDat(KeyNm).Add(ValStr);}
| void ChangeHttpRqStr | ( | const TStr & | SrcStr, |
| const TStr & | DstStr | ||
| ) |
| TStr TUrl::DecodeUrlStr | ( | const TStr & | UrlStr | ) | [static] |
Definition at line 386 of file url.cpp.
{
TChA InChA=UrlStr; TChA OutChA;
for (int ChN=0; ChN<InChA.Len(); ChN++){
char Ch=InChA[ChN];
if (Ch=='+'){
OutChA+=' ';
} else if (Ch=='%') {
ChN++; if (ChN==InChA.Len()) { break; }
char FirstCh = InChA[ChN];
if (!TCh::IsHex(FirstCh)) { break; }
ChN++; if (ChN==InChA.Len()) { break; }
char SecondCh = InChA[ChN];
if (!TCh::IsHex(SecondCh)) { break; }
OutChA+=char(TCh::GetHex(FirstCh)*16 + TCh::GetHex(SecondCh));
} else {
OutChA+=Ch;
}
}
return OutChA;
}
| void TUrl::DefFinalUrl | ( | const TStr & | _FinalHostNm | ) |
Definition at line 284 of file url.cpp.
{
EAssert(IsOk(usHttp));
EAssert(!IsDefFinalUrl());
FinalHostNm=_FinalHostNm.GetLc();
if (HostNm==FinalHostNm){
FinalUrlStr=UrlStr;
} else {
TChA FinalUrlChA;
FinalUrlChA+=SchemeNm; FinalUrlChA+="://";
FinalUrlChA+=FinalHostNm;
if (!PortStr.Empty()){
FinalUrlChA+=":"; FinalUrlChA+=PortStr;}
FinalUrlChA+=PathStr;
FinalUrlChA+=SearchStr;
FinalUrlStr=FinalUrlChA;
}
}
| void DefUrlAsFinal | ( | ) |
Definition at line 65 of file url.h.
{
EAssert(IsOk(usHttp)); EAssert(!IsDefFinalUrl());
FinalUrlStr=UrlStr; FinalHostNm=HostNm;}
| void TUrl::GetAbs | ( | const TStr & | AbsUrlStr | ) |
Definition at line 154 of file url.cpp.
{
EAssertR(IsAbs(AbsUrlStr), AbsUrlStr);
TUrlLx Lx(AbsUrlStr); TChA Str;
Str+=SchemeNm=Lx.GetScheme(); Str+=Lx.GetCh(':');
if (SchemeNm=="http"){
Scheme=usHttp;
const char *DbSlashStr="//";
Str+=Lx.GetStr(DbSlashStr);
Str+=Lx.GetHostPort(HostNm, PortStr, PortN);
if (PortN==-1){PortN=THttp::DfPortN; PortStr.Clr();}
else if (PortN==THttp::DfPortN){PortStr.Clr();}
//**if (!PortStr.Empty()){Str+=':'; Str+=PortStr;}
if (Lx.PeekCh()=='/'){
PathStr=Lx.GetCh('/'); PathStr+=Lx.GetHPath(PathSegV); Str+=PathStr;}
if (PathStr.Empty()){PathStr="/"; Str+=PathStr;}
if (Lx.PeekCh()=='?'){
SearchStr=Lx.GetCh('?'); SearchStr+=Lx.GetSearch(); Str+=SearchStr;}
} else {
Scheme=usOther; Str+=Lx.GetToCh();
}
while (Lx.PeekCh()==' '){Lx.GetCh();}
if (Lx.PeekCh()=='#'){
FragIdStr=Lx.GetCh('#'); FragIdStr+=Lx.GetToCh();
}
EAssertR(Lx.Eof(), "");
UrlStr=Str;
}
| void TUrl::GetAbsFromBase | ( | const TStr & | RelUrlStr, |
| const TStr & | BaseUrlStr | ||
| ) |
Definition at line 182 of file url.cpp.
{
EAssertR(!BaseUrlStr.Empty(), "");
PUrl Url=TUrl::New(BaseUrlStr); EAssertR(Url->IsOk(), "");
EAssertR(IsAbs(BaseUrlStr), "");
TStr AbsUrlStr=BaseUrlStr;
TStr NrRelUrlStr=RelUrlStr;
if (NrRelUrlStr.GetLc().IsPrefix(UrlHttpPrefixStr)){
NrRelUrlStr.DelSubStr(0, UrlHttpPrefixStr.Len()-1);}
if (NrRelUrlStr.Len()>0){
if (NrRelUrlStr[0]=='/'){
TStr SlashStr; int SlashChN=0;
while ((SlashChN<NrRelUrlStr.Len())&&(NrRelUrlStr[SlashChN]=='/')){
SlashChN++; SlashStr+="/";}
int ChN=0; bool Found=false;
while ((!Found)&&((ChN=AbsUrlStr.SearchStr(SlashStr, ChN))!=-1)){
TStr Str=AbsUrlStr.GetSubStr(ChN-1, ChN+SlashStr.Len()-1+1);
Found=((ChN==0)||(Str[0]!='/'))&&
((ChN+SlashStr.Len()-1==AbsUrlStr.Len()-1)||(Str[Str.Len()-1]!='/'));
if (!Found){ChN++;}
}
if (Found){
AbsUrlStr.DelSubStr(ChN, AbsUrlStr.Len()-1);
AbsUrlStr+=NrRelUrlStr;
}
} else {
int ChN=AbsUrlStr.Len()-1;
while ((ChN>=0)&&(AbsUrlStr[ChN]!='/')){ChN--;}
AbsUrlStr.DelSubStr(ChN+1, AbsUrlStr.Len()-1);
AbsUrlStr+=NrRelUrlStr;
}
}
const char *PrevDirStr="/../";
{int ChN;
while ((ChN=AbsUrlStr.SearchStr(PrevDirStr))!=-1){
int BChN=ChN; int EChN=ChN+(int) strlen(PrevDirStr)-1;
while ((BChN-1>=0)&&(AbsUrlStr[BChN-1]!='/')){BChN--;}
AbsUrlStr.DelSubStr(BChN, EChN);
}}
const char *CurDirStr="/.";
while (AbsUrlStr.DelStr(CurDirStr)){}
GetAbs(AbsUrlStr);
}
| TStr GetAsFinalHostNm | ( | ) | const |
Definition at line 63 of file url.h.
{
if (IsDefFinalUrl()){return FinalHostNm;} else {return HostNm;}}
| TStr GetAsFinalUrlStr | ( | ) | const |
Definition at line 59 of file url.h.
{
if (IsDefFinalUrl()){return FinalUrlStr;} else {return UrlStr;}}
| TStr GetBaseUrlStr | ( | ) | const |
Definition at line 37 of file url.h.
{return BaseUrlStr;}
| TStr TUrl::GetDmNm | ( | const int & | MxDmSegs = -1 | ) | const |
| TStr TUrl::GetDocStrFromUrlStr | ( | const TStr & | UrlStr, |
| const int & | Copies = 1 |
||
| ) | [static] |
Definition at line 407 of file url.cpp.
{
TStrV StrV; UrlStr.SplitOnNonAlNum(StrV);
TChA DocChA;
for (int StrN=0; StrN<StrV.Len(); StrN++){
TStr UcStr=StrV[StrN].GetUc();
if ((UcStr.Len()>3)&&(UcStr!="HTTP")&&(UcStr!="HTML")&&(UcStr!="INDEX")&&(UcStr!="DEFAULT")){
for (int CopyN=0; CopyN<Copies; CopyN++){
if (!DocChA.Empty()){DocChA+=' ';} DocChA+=StrV[StrN];
}
}
}
return DocChA;
}
| TStr GetFinalHostNm | ( | ) | const |
Definition at line 61 of file url.h.
{
EAssert(IsDefFinalUrl()); return FinalHostNm;}
| TStr GetFinalUrlStr | ( | ) | const |
Definition at line 57 of file url.h.
{
EAssert(IsDefFinalUrl()); return FinalUrlStr;}
| TStr GetFragIdStr | ( | ) | const |
| TStr TUrlEnv::GetFullUrlStr | ( | ) | const |
Definition at line 445 of file url.cpp.
{
if (GetKeys()==0){return TStr();}
TChA SearchChA;
SearchChA+=BaseUrlStr;
SearchChA+="?";
int KeyVals=0;
for (int KeyN=0; KeyN<GetKeys(); KeyN++){
TStr KeyNm=GetKeyNm(KeyN);
TStrV ValStrV=KeyNmToValH.GetDat(KeyNm);
for (int ValStrN=0; ValStrN<ValStrV.Len(); ValStrN++){
if (KeyVals>0){SearchChA+="&";}
SearchChA+=TUrl::GetUrlSearchStr(KeyNm);
SearchChA+='=';
SearchChA+=TUrl::GetUrlSearchStr(ValStrV[ValStrN]);
KeyVals++;
}
}
return SearchChA;
}
| TStr GetHttpRqStr | ( | ) | const |
| TStr GetIpNumOrHostNm | ( | ) | const |
Definition at line 149 of file url.h.
{return KeyNmV.SearchForw(KeyNm);}
| TStr GetPathSeg | ( | const int & | PathSegN | ) | const |
| int GetPathSegs | ( | ) | const |
| int GetPortN | ( | ) | const |
| TStr GetRelUrlStr | ( | ) | const |
| TUrlScheme GetScheme | ( | ) |
| TStr GetSearchStr | ( | ) | const |
| TStr TUrl::GetTopDownDocNm | ( | const TStr & | UrlStr, |
| const int & | MxLen = -1, |
||
| const bool & | HostOnlyP = false |
||
| ) | [static] |
Definition at line 421 of file url.cpp.
{
PUrl Url=TUrl::New(UrlStr);
TChA DocNm;
if (Url->IsOk()){
TStr HostNm=Url->GetHostNm().GetLc();
TStrV HostNmSegV; HostNm.SplitOnAllCh('.', HostNmSegV, false);
for (int HostNmSegN=0; HostNmSegN<HostNmSegV.Len(); HostNmSegN++){
if (HostNmSegN>0){DocNm+='.';}
DocNm+=HostNmSegV[HostNmSegV.Len()-HostNmSegN-1];
}
if (!HostOnlyP){
DocNm+=Url->GetPathStr().GetLc();
}
} else {
DocNm=UrlStr.GetLc();
}
if (MxLen!=-1){
DocNm.Trunc(MxLen);}
return DocNm;
}
| PUrl TUrl::GetUrlFromShortcut | ( | const TStr & | ShortcutUrlStr, |
| const TStr & | DfHostNmPrefix, | ||
| const TStr & | DfHostNmSufix | ||
| ) | [static] |
Definition at line 343 of file url.cpp.
{
// shortcut is already correct url
TStr UrlStr=ShortcutUrlStr;
PUrl Url=TUrl::New(UrlStr);
if (Url->IsOk()){return Url;}
// add 'http://' to shortcut (if shortcut is from more segments)
if (ShortcutUrlStr.IsChIn('.')){
UrlStr=TUrl::UrlHttpAbsPrefixStr+ShortcutUrlStr;
Url=TUrl::New(UrlStr);
if (Url->IsOk()){return Url;}
}
// add 'http://' and '/' to shortcut (if shortcut is from more segments)
if (ShortcutUrlStr.IsChIn('.')){
UrlStr=TUrl::UrlHttpAbsPrefixStr+ShortcutUrlStr+"/";
Url=TUrl::New(UrlStr);
if (Url->IsOk()){return Url;}
}
// add 'http://', prefix, postfix and '/' to shortcut
UrlStr=UrlHttpAbsPrefixStr+
DfHostNmPrefix+"."+ShortcutUrlStr+"."+DfHostNmSufix+"/";
Url=TUrl::New(UrlStr);
return Url;
}
| TStr TUrl::GetUrlSearchStr | ( | const TStr & | Str | ) | [static] |
Definition at line 368 of file url.cpp.
{
TChA InChA=Str; TChA OutChA;
for (int ChN=0; ChN<InChA.Len(); ChN++){
char Ch=InChA[ChN];
if (Ch==' '){
OutChA+='+';
} else
if ((' '<Ch)&&(Ch<='~')&&(Ch!='+')&&(Ch!='&')&&(Ch!='%')){
OutChA+=Ch;
} else {
OutChA+='%';
OutChA+=TInt::GetHexStr(uchar(Ch)/16);
OutChA+=TInt::GetHexStr(uchar(Ch)%16);
}
}
return OutChA;
}
Definition at line 157 of file url.h.
{
return KeyNmToValH.GetDat(KeyNmV[KeyN])[ValN];}
Definition at line 159 of file url.h.
{
if (KeyNmToValH.IsKey(KeyNm)){
return KeyNmToValH.GetDat(KeyNm)[ValN];}
else {return DfVal;}}
| int GetVals | ( | const int & | KeyN | ) | const |
Definition at line 153 of file url.h.
{
return KeyNmToValH.GetDat(KeyNmV[KeyN]).Len();}
Definition at line 155 of file url.h.
{
return KeyNmToValH.GetDat(KeyNm).Len();}
| bool TUrl::IsAbs | ( | const TStr & | UrlStr | ) | [static] |
| bool IsBaseUrl | ( | ) |
Definition at line 36 of file url.h.
{return !BaseUrlStr.Empty();}
| bool IsDefFinalUrl | ( | ) | const |
| bool IsHttpRqStr | ( | ) | const |
Definition at line 148 of file url.h.
{return KeyNmV.SearchForw(KeyNm)!=-1;}
| bool IsOk | ( | const TUrlScheme | _Scheme = usUndef | ) | const |
| bool IsPortOk | ( | ) | const |
| bool TUrl::IsScript | ( | const TStr & | UrlStr | ) | [static] |
| bool TUrl::IsSite | ( | const TStr & | UrlStr | ) | [static] |
| static PUrlEnv MkClone | ( | const PUrlEnv & | UrlEnv | ) | [static] |
| static PUrlEnv New | ( | const TStr & | BaseUrlStr, |
| const TStr & | KeyNm1 = TStr(), |
||
| const TStr & | ValStr1 = TStr(), |
||
| const TStr & | KeyNm2 = TStr(), |
||
| const TStr & | ValStr2 = TStr(), |
||
| const TStr & | KeyNm3 = TStr(), |
||
| const TStr & | ValStr3 = TStr(), |
||
| const TStr & | KeyNm4 = TStr(), |
||
| const TStr & | ValStr4 = TStr() |
||
| ) | [static] |
Definition at line 111 of file url.h.
{
PUrlEnv UrlEnv=New();
UrlEnv->PutBaseUrlStr(BaseUrlStr);
if (!KeyNm1.Empty()){UrlEnv->AddKeyVal(KeyNm1, ValStr1);}
if (!KeyNm2.Empty()){UrlEnv->AddKeyVal(KeyNm2, ValStr2);}
if (!KeyNm3.Empty()){UrlEnv->AddKeyVal(KeyNm3, ValStr3);}
if (!KeyNm4.Empty()){UrlEnv->AddKeyVal(KeyNm4, ValStr4);}
return UrlEnv;}
| void PutBaseUrlStr | ( | const TStr & | _BaseUrlStr | ) |
Definition at line 133 of file url.h.
{BaseUrlStr=_BaseUrlStr;}
| void PutHttpRqStr | ( | const TStr & | _HttpRqStr | ) |
| void TUrl::ToLcPath | ( | ) |
Definition at line 302 of file url.cpp.
{
// test if the conversion is needed
if (!PathStr.IsLc()){
// convert path strings to lower-case
PathStr.ToLc();
for (int PathSegN=0; PathSegN<PathSegV.Len(); PathSegN++){
PathSegV[PathSegN].ToLc();}
// recompose url
TChA UrlChA;
UrlChA+=SchemeNm; UrlChA+="://";
UrlChA+=HostNm;
if (!PortStr.Empty()){
UrlChA+=":"; UrlChA+=PortStr;}
UrlChA+=PathStr;
UrlChA+=SearchStr;
UrlStr=UrlChA;
// recompose final-url
if (IsDefFinalUrl()){
FinalUrlStr.Clr(); DefFinalUrl(FinalHostNm);}
}
}
| TUrl::TUrl | ( | const TStr & | _RelUrlStr, |
| const TStr & | _BaseUrlStr = TStr() |
||
| ) |
Definition at line 228 of file url.cpp.
: Scheme(usUndef), UrlStr(), RelUrlStr(_RelUrlStr), BaseUrlStr(_BaseUrlStr), SchemeNm(), HostNm(), PortStr(), PathStr(), SearchStr(), FragIdStr(), PortN(-1), PathSegV(), IpNum(), FinalUrlStr(), FinalHostNm(), HttpRqStr(){ RelUrlStr.ToTrunc(); RelUrlStr.ChangeStrAll(" ", "%20"); try { if (IsAbs(RelUrlStr)){ GetAbs(RelUrlStr); } else if (IsAbs(BaseUrlStr)){ GetAbsFromBase(RelUrlStr, BaseUrlStr); } else { Scheme=usUndef; } } catch (PExcept&){Scheme=usUndef;} //** old version /* PUrl BaseUrl; if (!BaseUrlStr.Empty()){ // must be outside try-block (CBuilder3.0 bug) BaseUrl=TUrl::New(BaseUrlStr);} try { if (!BaseUrlStr.Empty()){ EAssertR(BaseUrl->IsOk(), "");} if (IsAbs(RelUrlStr)){ GetAbs(RelUrlStr); } else { GetAbsFromBase(RelUrlStr, BaseUrlStr); } } catch (PExcept&){Scheme=usUndef;} */ }
| TUrlEnv | ( | ) |
Definition at line 106 of file url.h.
:
KeyNmV(), KeyNmToValH(10){}
const TStr TUrl::UrlHttpAbsPrefixStr = "http://" [static] |