|
SNAP Library, User Reference
2012-10-02 12:56:23
SNAP, a general purpose network analysis and graph mining library
|
Go to the source code of this file.
Classes | |
| class | THtmlLx |
Enumerations | |
| enum | THtmlLxChTy { hlctSpace, hlctAlpha, hlctNum, hlctSym, hlctLTag, hlctRTag, hlctEof } |
| enum | THtmlLxSym { hsyUndef, hsyStr, hsyNum, hsySSym, hsyUrl, hsyBTag, hsyETag, hsyMTag, hsyEof } |
| enum | THtmlDocType { hdtAll, hdtStr, hdtStrNum, hdtTag, hdtA, hdtHRef, hdtUL } |
Functions | |
| ClassHdTP (THtmlTok, PHtmlTok) ClassHdTP(THtmlDoc | |
| void | SetUcCh (const char &UcCh, const char &LcCh) |
| void | SetUcCh (const TStr &Str) |
| void | SetChTy (const THtmlLxChTy &ChTy, const TStr &Str) |
| void | SetEscStr (const TStr &SrcStr, const TStr &DstStr) |
| THtmlLxChDef () | |
| THtmlLxChDef (TSIn &SIn) | |
| static PHtmlLxChDef | Load (TSIn &SIn) |
| void | Save (TSOut &SOut) |
| THtmlLxChDef & | operator= (const THtmlLxChDef &) |
| int | GetChTy (const char &Ch) const |
| bool | IsEoln (const char &Ch) const |
| bool | IsWs (const char &Ch) const |
| bool | IsSpace (const char &Ch) const |
| bool | IsAlpha (const char &Ch) const |
| bool | IsNum (const char &Ch) const |
| bool | IsAlNum (const char &Ch) const |
| bool | IsSym (const char &Ch) const |
| bool | IsUrl (const char &Ch) const |
| bool | IsUc (const char &Ch) const |
| bool | IsLc (const char &Ch) const |
| char | GetUc (const char &Ch) const |
| char | GetLc (const char &Ch) const |
| void | GetUcChA (TChA &ChA) const |
| void | GetLcChA (TChA &ChA) const |
| TStr | GetUcStr (const TStr &Str) const |
| TStr | GetLcStr (const TStr &Str) const |
| TStr | GetEscStr (const TStr &Str) const |
| static PHtmlLxChDef | GetChDef () |
| static THtmlLxChDef & | GetChDefRef () |
| static TStr | GetCSZFromYuascii (const TChA &ChA) |
| static TStr | GetCSZFromWin1250 (const TChA &ChA) |
| static TStr | GetWin1250FromYuascii (const TChA &ChA) |
| static TStr | GetIsoCeFromYuascii (const TChA &ChA) |
| THtmlTok () | |
| THtmlTok (const THtmlLxSym &_Sym) | |
| THtmlTok (const THtmlLxSym &_Sym, const TStr &_Str) | |
| THtmlTok (const THtmlLxSym &_Sym, const TStr &_Str, const THtmlLx::TArgNmValV &_ArgNmValV) | |
| THtmlTok (TSIn &) | |
| THtmlTok & | operator= (const THtmlTok &) |
| THtmlLxSym | GetSym () const |
| TStr | GetStr () const |
| TStr | GetFullStr () const |
| bool | IsArg (const TStr &ArgNm) const |
| TStr | GetArg (const TStr &ArgNm) const |
| TStr | GetArg (const TStr &ArgNm, const TStr &DfArgVal) const |
| bool | IsUrlTok (TStr &RelUrlStr) const |
| bool | IsRedirUrlTok () const |
| void | SaveTxt (const PSOut &SOut, const bool &TxtMode=true) |
| static bool | IsBreakTag (const TStr &TagNm) |
| static bool | IsBreakTok (const PHtmlTok &Tok) |
| static bool | IsHTag (const TStr &TagNm, int &HTagN) |
| static PHtmlTok | GetHTok (const bool &IsBTag, const int &HTagN) |
| THtmlDoc () | |
| THtmlDoc (const PSIn &SIn, const THtmlDocType &Type=hdtAll, const bool &DoUc=true) | |
| static PHtmlDoc | New (const PSIn &SIn, const THtmlDocType &Type=hdtAll, const bool &DoUc=true) |
| THtmlDoc (TSIn &) | |
| THtmlDoc & | operator= (const THtmlDoc &) |
| int | GetToks () const |
| PHtmlTok | GetTok (const int &TokN) const |
| PHtmlTok | GetTok (const int &TokN, THtmlLxSym &Sym, TStr &Str) const |
| void | AddTokV (const THtmlTokV &_TokV) |
| static TStr | GetTxtLnDoc (const TStr &HtmlStr) |
| static TStr | GetTxtLnDoc (const TStr &HtmlStr, const TStr &BaseUrlStr, const bool &OutUrlP, const bool &OutTagsP) |
| static PHtmlDoc | LoadTxt (const TStr &FNm, const THtmlDocType &Type=hdtAll, const bool &DoUc=true) |
| static void | SaveHtmlToTxt (const TStr &HtmlStr, const PSOut &TxtSOut, const TStr &BaseUrlStr, const bool &OutUrlP, const bool &OutToksP) |
| static void | SaveHtmlToTxt (const TStr &HtmlStr, const TStr &TxtFNm, const TStr &BaseUrlStr, const bool &OutUrlP, const bool &OutToksP) |
| static void | SaveHtmlToXml (const TStr &HtmlStr, const PSOut &XmlSOut, const TStr &BaseUrlStr, const bool &OutTextP, const bool &OutUrlP, const bool &OutToksP, const bool &OutTagsP, const bool &OutArgsP) |
| static void | SaveHtmlToXml (const TStr &HtmlStr, const TStr &XmlFNm, const TStr &BaseUrlStr, const bool &OutTextP, const bool &OutUrlP, const bool &OutToksP, const bool &OutTagsP, const bool &OutArgsP) |
| static TLxSym | GetLxSym (const THtmlLxSym &HtmlLxSym, const TChA &ChA) |
| static bool | _IsTagRedir (const TStr &TagStr, const TStr &ArgNm, THtmlLx &Lx, const TStr &BaseUrlStr, const TStr &RedirUrlStr) |
| static TStr | GetRedirHtmlDocStr (const TStr &HtmlStr, const TStr &BaseUrlStr, const TStr &RedirUrlStr) |
| THtmlHldV (const PHtmlDoc &_RefHtmlDoc, const int &HldWnLen=10) | |
| THtmlHldV (TSIn &) | |
| THtmlHldV & | operator= (const THtmlHldV &) |
| PHtmlDoc | GetRefHtmlDoc () |
| int | GetHlds () |
| PHtmlDoc | GetHld (const int &HldN) |
| TWebPg () | |
| TWebPg (const TStrV &_UrlStrV, const TStrV &_IpNumV, const PHttpResp &_HttpResp) | |
| static PWebPg | New (const TStrV &UrlStrV, const TStrV &IpNumV, const PHttpResp &HttpResp) |
| static PWebPg | New (const TStrV &UrlStrV, const PHttpResp &HttpResp) |
| static PWebPg | New (const TStr &UrlStr, const PHttpResp &HttpResp) |
| ~TWebPg () | |
| TWebPg (TSIn &) | |
| TWebPg & | operator= (const TWebPg &) |
| int | GetUrls () const |
| TStr | GetUrlStr (const int &UrlN=-1) const |
| PUrl | GetUrl (const int &UrlN=-1) const |
| int | GetIps () const |
| TStr | GetIpNum (const int &IpN=-1) const |
| PHttpResp | GetHttpResp () const |
| TStr | GetHttpHdStr () const |
| TStr | GetHttpBodyAsStr () const |
| void | GetOutUrlV (TUrlV &OutUrlV, TUrlV &OutRedirUrlV) const |
| void | GetOutUrlV (TUrlV &OutUrlV) const |
| void | GetOutDescUrlStrKdV (TStrKdV &OutDescUrlStrKdV) const |
| void | PutFetchMSecs (const uint64 &_FetchMSecs) |
| uint64 | GetFetchMSecs () const |
| void | SaveAsHttpBody (const TStr &FNm) const |
| void | SaveAsHttp (const TStr &FNm) const |
| bool | IsTxt () const |
Variables | |
| ClassTP(THtmlLxChDef, PHtmlLxChDef) private TChV | UcChV |
| TChV | LcChV |
| TStrStrH | EscStrH |
| static PHtmlLxChDef | ChDef |
| ClassTPV(THtmlTok, PHtmlTok, THtmlTokV) private TStr | Str |
| THtmlLx::TArgNmValV | ArgNmValV |
| static const TStr | ATagNm = "<A>" |
| static const TStr | AreaTagNm = "<AREA>" |
| static const TStr | BrTagNm = "<BR>" |
| static const TStr | CardTagNm = "<CARD>" |
| static const TStr | CenterTagNm = "<CENTER>" |
| static const TStr | FrameTagNm = "<FRAME>" |
| static const TStr | H1TagNm = "<H1>" |
| static const TStr | H2TagNm = "<H2>" |
| static const TStr | H3TagNm = "<H3>" |
| static const TStr | H4TagNm = "<H4>" |
| static const TStr | H5TagNm = "<H5>" |
| static const TStr | H6TagNm = "<H6>" |
| static const TStr | ImgTagNm = "<IMG>" |
| static const TStr | LiTagNm = "<LI>" |
| static const TStr | MetaTagNm = "<META>" |
| static const TStr | PTagNm = "<P>" |
| static const TStr | UlTagNm = "<UL>" |
| static const TStr | TitleTagNm = "<TITLE>" |
| static const TStr | TitleETagNm = "</TITLE>" |
| static const TStr | AltArgNm = "ALT" |
| static const TStr | HRefArgNm = "HREF" |
| static const TStr | SrcArgNm = "SRC" |
| static const TStr | TitleArgNm = "TITLE" |
| static const TStr | HttpEquivArgNm = "HTTP-EQUIV" |
| ClassTP(THtmlHldV, PHtmlHldV) private THtmlDocV | HldV |
| ClassTPV(TWebPg, PWebPg, TWebPgV) private TStrV | IpNumV |
| PHttpResp | HttpResp |
| uint64 | FetchMSecs |
| enum THtmlDocType |
| enum THtmlLxChTy |
| enum THtmlLxSym |
| bool THtmlDoc::_IsTagRedir | ( | const TStr & | TagStr, |
| const TStr & | ArgNm, | ||
| THtmlLx & | Lx, | ||
| const TStr & | BaseUrlStr, | ||
| const TStr & | RedirUrlStr | ||
| ) | [static] |
| void AddTokV | ( | const THtmlTokV & | _TokV | ) |
| static PHtmlLxChDef GetChDef | ( | ) | [static] |
| static THtmlLxChDef& GetChDefRef | ( | ) | [static] |
| int GetChTy | ( | const char & | Ch | ) | const |
| TStr THtmlLxChDef::GetCSZFromWin1250 | ( | const TChA & | ChA | ) | [static] |
| TStr THtmlLxChDef::GetCSZFromYuascii | ( | const TChA & | ChA | ) | [static] |
| TStr THtmlLxChDef::GetEscStr | ( | const TStr & | Str | ) | const |
| uint64 GetFetchMSecs | ( | ) | const |
| TStr THtmlTok::GetFullStr | ( | ) | const |
| PHtmlDoc GetHld | ( | const int & | HldN | ) |
| int GetHlds | ( | ) |
| PHtmlTok THtmlTok::GetHTok | ( | const bool & | IsBTag, |
| const int & | HTagN | ||
| ) | [static] |
| TStr GetHttpBodyAsStr | ( | ) | const |
| TStr GetHttpHdStr | ( | ) | const |
| PHttpResp GetHttpResp | ( | ) | const |
| int GetIps | ( | ) | const |
| TStr THtmlLxChDef::GetIsoCeFromYuascii | ( | const TChA & | ChA | ) | [static] |
| char GetLc | ( | const char & | Ch | ) | const |
| TLxSym THtmlDoc::GetLxSym | ( | const THtmlLxSym & | HtmlLxSym, |
| const TChA & | ChA | ||
| ) | [static] |
| void TWebPg::GetOutDescUrlStrKdV | ( | TStrKdV & | OutDescUrlStrKdV | ) | const |
| void TWebPg::GetOutUrlV | ( | TUrlV & | OutUrlV, |
| TUrlV & | OutRedirUrlV | ||
| ) | const |
| void GetOutUrlV | ( | TUrlV & | OutUrlV | ) | const |
| TStr THtmlDoc::GetRedirHtmlDocStr | ( | const TStr & | HtmlStr, |
| const TStr & | BaseUrlStr, | ||
| const TStr & | RedirUrlStr | ||
| ) | [static] |
| PHtmlDoc GetRefHtmlDoc | ( | ) |
| THtmlLxSym GetSym | ( | ) | const |
| PHtmlTok GetTok | ( | const int & | TokN | ) | const |
| PHtmlTok GetTok | ( | const int & | TokN, |
| THtmlLxSym & | Sym, | ||
| TStr & | Str | ||
| ) | const |
| int GetToks | ( | ) | const |
| static TStr GetTxtLnDoc | ( | const TStr & | HtmlStr | ) | [static] |
| static TStr GetTxtLnDoc | ( | const TStr & | HtmlStr, |
| const TStr & | BaseUrlStr, | ||
| const bool & | OutUrlP, | ||
| const bool & | OutTagsP | ||
| ) | [static] |
| char GetUc | ( | const char & | Ch | ) | const |
| TStr TLxChDef::GetUcStr | ( | const TStr & | Str | ) | const |
| PUrl GetUrl | ( | const int & | UrlN = -1 | ) | const |
| int GetUrls | ( | ) | const |
| TStr THtmlLxChDef::GetWin1250FromYuascii | ( | const TChA & | ChA | ) | [static] |
| bool IsAlNum | ( | const char & | Ch | ) | const |
| bool IsAlpha | ( | const char & | Ch | ) | const |
| bool THtmlTok::IsBreakTag | ( | const TStr & | TagNm | ) | [static] |
| bool THtmlTok::IsBreakTok | ( | const PHtmlTok & | Tok | ) | [static] |
| bool IsEoln | ( | const char & | Ch | ) | const |
| bool THtmlTok::IsHTag | ( | const TStr & | TagNm, |
| int & | HTagN | ||
| ) | [static] |
| bool IsLc | ( | const char & | Ch | ) | const |
| bool IsNum | ( | const char & | Ch | ) | const |
| bool THtmlTok::IsRedirUrlTok | ( | ) | const |
| bool IsSpace | ( | const char & | Ch | ) | const |
| bool IsSym | ( | const char & | Ch | ) | const |
| bool TWebPg::IsTxt | ( | ) | const |
| bool IsUc | ( | const char & | Ch | ) | const |
| bool IsUrl | ( | const char & | Ch | ) | const |
| bool THtmlTok::IsUrlTok | ( | TStr & | RelUrlStr | ) | const |
| bool IsWs | ( | const char & | Ch | ) | const |
| static PHtmlDoc LoadTxt | ( | const TStr & | FNm, |
| const THtmlDocType & | Type = hdtAll, |
||
| const bool & | DoUc = true |
||
| ) | [static] |
| static PHtmlDoc New | ( | const PSIn & | SIn, |
| const THtmlDocType & | Type = hdtAll, |
||
| const bool & | DoUc = true |
||
| ) | [static] |
| static PWebPg New | ( | const TStrV & | UrlStrV, |
| const TStrV & | IpNumV, | ||
| const PHttpResp & | HttpResp | ||
| ) | [static] |
| THtmlLxChDef& operator= | ( | const THtmlLxChDef & | ) |
| void PutFetchMSecs | ( | const uint64 & | _FetchMSecs | ) |
| void TWebPg::SaveAsHttp | ( | const TStr & | FNm | ) | const |
| void TWebPg::SaveAsHttpBody | ( | const TStr & | FNm | ) | const |
| static void SaveHtmlToTxt | ( | const TStr & | HtmlStr, |
| const PSOut & | TxtSOut, | ||
| const TStr & | BaseUrlStr, | ||
| const bool & | OutUrlP, | ||
| const bool & | OutToksP | ||
| ) | [static] |
| static void SaveHtmlToTxt | ( | const TStr & | HtmlStr, |
| const TStr & | TxtFNm, | ||
| const TStr & | BaseUrlStr, | ||
| const bool & | OutUrlP, | ||
| const bool & | OutToksP | ||
| ) | [static] |
| static void SaveHtmlToXml | ( | const TStr & | HtmlStr, |
| const PSOut & | XmlSOut, | ||
| const TStr & | BaseUrlStr, | ||
| const bool & | OutTextP, | ||
| const bool & | OutUrlP, | ||
| const bool & | OutToksP, | ||
| const bool & | OutTagsP, | ||
| const bool & | OutArgsP | ||
| ) | [static] |
| static void SaveHtmlToXml | ( | const TStr & | HtmlStr, |
| const TStr & | XmlFNm, | ||
| const TStr & | BaseUrlStr, | ||
| const bool & | OutTextP, | ||
| const bool & | OutUrlP, | ||
| const bool & | OutToksP, | ||
| const bool & | OutTagsP, | ||
| const bool & | OutArgsP | ||
| ) | [static] |
| void SetChTy | ( | const THtmlLxChTy & | ChTy, |
| const TStr & | Str | ||
| ) |
| void THtmlLxChDef::SetEscStr | ( | const TStr & | SrcStr, |
| const TStr & | DstStr | ||
| ) |
| void THtmlLxChDef::SetUcCh | ( | const char & | UcCh, |
| const char & | LcCh | ||
| ) |
| void TLxChDef::SetUcCh | ( | const TStr & | Str | ) |
| THtmlDoc | ( | ) |
| THtmlDoc::THtmlDoc | ( | const PSIn & | SIn, |
| const THtmlDocType & | Type = hdtAll, |
||
| const bool & | DoUc = true |
||
| ) |
| THtmlHldV::THtmlHldV | ( | const PHtmlDoc & | _RefHtmlDoc, |
| const int & | HldWnLen = 10 |
||
| ) |
| THtmlLxChDef | ( | TSIn & | SIn | ) |
| THtmlTok | ( | ) |
| THtmlTok | ( | const THtmlLxSym & | _Sym | ) |
| THtmlTok | ( | const THtmlLxSym & | _Sym, |
| const TStr & | _Str | ||
| ) |
| THtmlTok | ( | const THtmlLxSym & | _Sym, |
| const TStr & | _Str, | ||
| const THtmlLx::TArgNmValV & | _ArgNmValV | ||
| ) |
| TWebPg | ( | ) |
| ~TWebPg | ( | ) |
const TStr THtmlTok::AltArgNm = "ALT" [static] |
const TStr THtmlTok::AreaTagNm = "<AREA>" [static] |
const TStr THtmlTok::ATagNm = "<A>" [static] |
const TStr THtmlTok::BrTagNm = "<BR>" [static] |
const TStr THtmlTok::CardTagNm = "<CARD>" [static] |
const TStr THtmlTok::CenterTagNm = "<CENTER>" [static] |
PHtmlLxChDef ChDef [static] |
const TStr THtmlTok::FrameTagNm = "<FRAME>" [static] |
const TStr THtmlTok::H1TagNm = "<H1>" [static] |
const TStr THtmlTok::H2TagNm = "<H2>" [static] |
const TStr THtmlTok::H3TagNm = "<H3>" [static] |
const TStr THtmlTok::H4TagNm = "<H4>" [static] |
const TStr THtmlTok::H5TagNm = "<H5>" [static] |
const TStr THtmlTok::H6TagNm = "<H6>" [static] |
const TStr THtmlTok::HRefArgNm = "HREF" [static] |
const TStr THtmlTok::HttpEquivArgNm = "HTTP-EQUIV" [static] |
| PHttpResp HttpResp |
const TStr THtmlTok::ImgTagNm = "<IMG>" [static] |
const TStr THtmlTok::LiTagNm = "<LI>" [static] |
const TStr THtmlTok::MetaTagNm = "<META>" [static] |
const TStr THtmlTok::PTagNm = "<P>" [static] |
const TStr THtmlTok::SrcArgNm = "SRC" [static] |
const TStr THtmlTok::TitleArgNm = "TITLE" [static] |
const TStr THtmlTok::TitleETagNm = "</TITLE>" [static] |
const TStr THtmlTok::TitleTagNm = "<TITLE>" [static] |
| ClassTP (THtmlLxChDef, PHtmlLxChDef) private TChV UcChV |
const TStr THtmlTok::UlTagNm = "<UL>" [static] |