SNAP Library, User Reference
2012-10-02 12:56:23
SNAP, a general purpose network analysis and graph mining library
|
Go to the source code of this file.
Classes | |
class | THtmlLx |
Enumerations | |
enum | THtmlLxChTy { hlctSpace, hlctAlpha, hlctNum, hlctSym, hlctLTag, hlctRTag, hlctEof } |
enum | THtmlLxSym { hsyUndef, hsyStr, hsyNum, hsySSym, hsyUrl, hsyBTag, hsyETag, hsyMTag, hsyEof } |
enum | THtmlDocType { hdtAll, hdtStr, hdtStrNum, hdtTag, hdtA, hdtHRef, hdtUL } |
Functions | |
ClassHdTP (THtmlTok, PHtmlTok) ClassHdTP(THtmlDoc | |
void | SetUcCh (const char &UcCh, const char &LcCh) |
void | SetUcCh (const TStr &Str) |
void | SetChTy (const THtmlLxChTy &ChTy, const TStr &Str) |
void | SetEscStr (const TStr &SrcStr, const TStr &DstStr) |
THtmlLxChDef () | |
THtmlLxChDef (TSIn &SIn) | |
static PHtmlLxChDef | Load (TSIn &SIn) |
void | Save (TSOut &SOut) |
THtmlLxChDef & | operator= (const THtmlLxChDef &) |
int | GetChTy (const char &Ch) const |
bool | IsEoln (const char &Ch) const |
bool | IsWs (const char &Ch) const |
bool | IsSpace (const char &Ch) const |
bool | IsAlpha (const char &Ch) const |
bool | IsNum (const char &Ch) const |
bool | IsAlNum (const char &Ch) const |
bool | IsSym (const char &Ch) const |
bool | IsUrl (const char &Ch) const |
bool | IsUc (const char &Ch) const |
bool | IsLc (const char &Ch) const |
char | GetUc (const char &Ch) const |
char | GetLc (const char &Ch) const |
void | GetUcChA (TChA &ChA) const |
void | GetLcChA (TChA &ChA) const |
TStr | GetUcStr (const TStr &Str) const |
TStr | GetLcStr (const TStr &Str) const |
TStr | GetEscStr (const TStr &Str) const |
static PHtmlLxChDef | GetChDef () |
static THtmlLxChDef & | GetChDefRef () |
static TStr | GetCSZFromYuascii (const TChA &ChA) |
static TStr | GetCSZFromWin1250 (const TChA &ChA) |
static TStr | GetWin1250FromYuascii (const TChA &ChA) |
static TStr | GetIsoCeFromYuascii (const TChA &ChA) |
THtmlTok () | |
THtmlTok (const THtmlLxSym &_Sym) | |
THtmlTok (const THtmlLxSym &_Sym, const TStr &_Str) | |
THtmlTok (const THtmlLxSym &_Sym, const TStr &_Str, const THtmlLx::TArgNmValV &_ArgNmValV) | |
THtmlTok (TSIn &) | |
THtmlTok & | operator= (const THtmlTok &) |
THtmlLxSym | GetSym () const |
TStr | GetStr () const |
TStr | GetFullStr () const |
bool | IsArg (const TStr &ArgNm) const |
TStr | GetArg (const TStr &ArgNm) const |
TStr | GetArg (const TStr &ArgNm, const TStr &DfArgVal) const |
bool | IsUrlTok (TStr &RelUrlStr) const |
bool | IsRedirUrlTok () const |
void | SaveTxt (const PSOut &SOut, const bool &TxtMode=true) |
static bool | IsBreakTag (const TStr &TagNm) |
static bool | IsBreakTok (const PHtmlTok &Tok) |
static bool | IsHTag (const TStr &TagNm, int &HTagN) |
static PHtmlTok | GetHTok (const bool &IsBTag, const int &HTagN) |
THtmlDoc () | |
THtmlDoc (const PSIn &SIn, const THtmlDocType &Type=hdtAll, const bool &DoUc=true) | |
static PHtmlDoc | New (const PSIn &SIn, const THtmlDocType &Type=hdtAll, const bool &DoUc=true) |
THtmlDoc (TSIn &) | |
THtmlDoc & | operator= (const THtmlDoc &) |
int | GetToks () const |
PHtmlTok | GetTok (const int &TokN) const |
PHtmlTok | GetTok (const int &TokN, THtmlLxSym &Sym, TStr &Str) const |
void | AddTokV (const THtmlTokV &_TokV) |
static TStr | GetTxtLnDoc (const TStr &HtmlStr) |
static TStr | GetTxtLnDoc (const TStr &HtmlStr, const TStr &BaseUrlStr, const bool &OutUrlP, const bool &OutTagsP) |
static PHtmlDoc | LoadTxt (const TStr &FNm, const THtmlDocType &Type=hdtAll, const bool &DoUc=true) |
static void | SaveHtmlToTxt (const TStr &HtmlStr, const PSOut &TxtSOut, const TStr &BaseUrlStr, const bool &OutUrlP, const bool &OutToksP) |
static void | SaveHtmlToTxt (const TStr &HtmlStr, const TStr &TxtFNm, const TStr &BaseUrlStr, const bool &OutUrlP, const bool &OutToksP) |
static void | SaveHtmlToXml (const TStr &HtmlStr, const PSOut &XmlSOut, const TStr &BaseUrlStr, const bool &OutTextP, const bool &OutUrlP, const bool &OutToksP, const bool &OutTagsP, const bool &OutArgsP) |
static void | SaveHtmlToXml (const TStr &HtmlStr, const TStr &XmlFNm, const TStr &BaseUrlStr, const bool &OutTextP, const bool &OutUrlP, const bool &OutToksP, const bool &OutTagsP, const bool &OutArgsP) |
static TLxSym | GetLxSym (const THtmlLxSym &HtmlLxSym, const TChA &ChA) |
static bool | _IsTagRedir (const TStr &TagStr, const TStr &ArgNm, THtmlLx &Lx, const TStr &BaseUrlStr, const TStr &RedirUrlStr) |
static TStr | GetRedirHtmlDocStr (const TStr &HtmlStr, const TStr &BaseUrlStr, const TStr &RedirUrlStr) |
THtmlHldV (const PHtmlDoc &_RefHtmlDoc, const int &HldWnLen=10) | |
THtmlHldV (TSIn &) | |
THtmlHldV & | operator= (const THtmlHldV &) |
PHtmlDoc | GetRefHtmlDoc () |
int | GetHlds () |
PHtmlDoc | GetHld (const int &HldN) |
TWebPg () | |
TWebPg (const TStrV &_UrlStrV, const TStrV &_IpNumV, const PHttpResp &_HttpResp) | |
static PWebPg | New (const TStrV &UrlStrV, const TStrV &IpNumV, const PHttpResp &HttpResp) |
static PWebPg | New (const TStrV &UrlStrV, const PHttpResp &HttpResp) |
static PWebPg | New (const TStr &UrlStr, const PHttpResp &HttpResp) |
~TWebPg () | |
TWebPg (TSIn &) | |
TWebPg & | operator= (const TWebPg &) |
int | GetUrls () const |
TStr | GetUrlStr (const int &UrlN=-1) const |
PUrl | GetUrl (const int &UrlN=-1) const |
int | GetIps () const |
TStr | GetIpNum (const int &IpN=-1) const |
PHttpResp | GetHttpResp () const |
TStr | GetHttpHdStr () const |
TStr | GetHttpBodyAsStr () const |
void | GetOutUrlV (TUrlV &OutUrlV, TUrlV &OutRedirUrlV) const |
void | GetOutUrlV (TUrlV &OutUrlV) const |
void | GetOutDescUrlStrKdV (TStrKdV &OutDescUrlStrKdV) const |
void | PutFetchMSecs (const uint64 &_FetchMSecs) |
uint64 | GetFetchMSecs () const |
void | SaveAsHttpBody (const TStr &FNm) const |
void | SaveAsHttp (const TStr &FNm) const |
bool | IsTxt () const |
Variables | |
ClassTP(THtmlLxChDef, PHtmlLxChDef) private TChV | UcChV |
TChV | LcChV |
TStrStrH | EscStrH |
static PHtmlLxChDef | ChDef |
ClassTPV(THtmlTok, PHtmlTok, THtmlTokV) private TStr | Str |
THtmlLx::TArgNmValV | ArgNmValV |
static const TStr | ATagNm = "<A>" |
static const TStr | AreaTagNm = "<AREA>" |
static const TStr | BrTagNm = "<BR>" |
static const TStr | CardTagNm = "<CARD>" |
static const TStr | CenterTagNm = "<CENTER>" |
static const TStr | FrameTagNm = "<FRAME>" |
static const TStr | H1TagNm = "<H1>" |
static const TStr | H2TagNm = "<H2>" |
static const TStr | H3TagNm = "<H3>" |
static const TStr | H4TagNm = "<H4>" |
static const TStr | H5TagNm = "<H5>" |
static const TStr | H6TagNm = "<H6>" |
static const TStr | ImgTagNm = "<IMG>" |
static const TStr | LiTagNm = "<LI>" |
static const TStr | MetaTagNm = "<META>" |
static const TStr | PTagNm = "<P>" |
static const TStr | UlTagNm = "<UL>" |
static const TStr | TitleTagNm = "<TITLE>" |
static const TStr | TitleETagNm = "</TITLE>" |
static const TStr | AltArgNm = "ALT" |
static const TStr | HRefArgNm = "HREF" |
static const TStr | SrcArgNm = "SRC" |
static const TStr | TitleArgNm = "TITLE" |
static const TStr | HttpEquivArgNm = "HTTP-EQUIV" |
ClassTP(THtmlHldV, PHtmlHldV) private THtmlDocV | HldV |
ClassTPV(TWebPg, PWebPg, TWebPgV) private TStrV | IpNumV |
PHttpResp | HttpResp |
uint64 | FetchMSecs |
enum THtmlDocType |
enum THtmlLxChTy |
enum THtmlLxSym |
bool THtmlDoc::_IsTagRedir | ( | const TStr & | TagStr, |
const TStr & | ArgNm, | ||
THtmlLx & | Lx, | ||
const TStr & | BaseUrlStr, | ||
const TStr & | RedirUrlStr | ||
) | [static] |
void AddTokV | ( | const THtmlTokV & | _TokV | ) |
static PHtmlLxChDef GetChDef | ( | ) | [static] |
static THtmlLxChDef& GetChDefRef | ( | ) | [static] |
int GetChTy | ( | const char & | Ch | ) | const |
TStr THtmlLxChDef::GetCSZFromWin1250 | ( | const TChA & | ChA | ) | [static] |
TStr THtmlLxChDef::GetCSZFromYuascii | ( | const TChA & | ChA | ) | [static] |
TStr THtmlLxChDef::GetEscStr | ( | const TStr & | Str | ) | const |
uint64 GetFetchMSecs | ( | ) | const |
TStr THtmlTok::GetFullStr | ( | ) | const |
PHtmlDoc GetHld | ( | const int & | HldN | ) |
int GetHlds | ( | ) |
PHtmlTok THtmlTok::GetHTok | ( | const bool & | IsBTag, |
const int & | HTagN | ||
) | [static] |
TStr GetHttpBodyAsStr | ( | ) | const |
TStr GetHttpHdStr | ( | ) | const |
PHttpResp GetHttpResp | ( | ) | const |
int GetIps | ( | ) | const |
TStr THtmlLxChDef::GetIsoCeFromYuascii | ( | const TChA & | ChA | ) | [static] |
char GetLc | ( | const char & | Ch | ) | const |
TLxSym THtmlDoc::GetLxSym | ( | const THtmlLxSym & | HtmlLxSym, |
const TChA & | ChA | ||
) | [static] |
void TWebPg::GetOutDescUrlStrKdV | ( | TStrKdV & | OutDescUrlStrKdV | ) | const |
void TWebPg::GetOutUrlV | ( | TUrlV & | OutUrlV, |
TUrlV & | OutRedirUrlV | ||
) | const |
void GetOutUrlV | ( | TUrlV & | OutUrlV | ) | const |
TStr THtmlDoc::GetRedirHtmlDocStr | ( | const TStr & | HtmlStr, |
const TStr & | BaseUrlStr, | ||
const TStr & | RedirUrlStr | ||
) | [static] |
PHtmlDoc GetRefHtmlDoc | ( | ) |
THtmlLxSym GetSym | ( | ) | const |
PHtmlTok GetTok | ( | const int & | TokN | ) | const |
PHtmlTok GetTok | ( | const int & | TokN, |
THtmlLxSym & | Sym, | ||
TStr & | Str | ||
) | const |
int GetToks | ( | ) | const |
static TStr GetTxtLnDoc | ( | const TStr & | HtmlStr | ) | [static] |
static TStr GetTxtLnDoc | ( | const TStr & | HtmlStr, |
const TStr & | BaseUrlStr, | ||
const bool & | OutUrlP, | ||
const bool & | OutTagsP | ||
) | [static] |
char GetUc | ( | const char & | Ch | ) | const |
TStr TLxChDef::GetUcStr | ( | const TStr & | Str | ) | const |
PUrl GetUrl | ( | const int & | UrlN = -1 | ) | const |
int GetUrls | ( | ) | const |
TStr THtmlLxChDef::GetWin1250FromYuascii | ( | const TChA & | ChA | ) | [static] |
bool IsAlNum | ( | const char & | Ch | ) | const |
bool IsAlpha | ( | const char & | Ch | ) | const |
bool THtmlTok::IsBreakTag | ( | const TStr & | TagNm | ) | [static] |
bool THtmlTok::IsBreakTok | ( | const PHtmlTok & | Tok | ) | [static] |
bool IsEoln | ( | const char & | Ch | ) | const |
bool THtmlTok::IsHTag | ( | const TStr & | TagNm, |
int & | HTagN | ||
) | [static] |
bool IsLc | ( | const char & | Ch | ) | const |
bool IsNum | ( | const char & | Ch | ) | const |
bool THtmlTok::IsRedirUrlTok | ( | ) | const |
bool IsSpace | ( | const char & | Ch | ) | const |
bool IsSym | ( | const char & | Ch | ) | const |
bool TWebPg::IsTxt | ( | ) | const |
bool IsUc | ( | const char & | Ch | ) | const |
bool IsUrl | ( | const char & | Ch | ) | const |
bool THtmlTok::IsUrlTok | ( | TStr & | RelUrlStr | ) | const |
bool IsWs | ( | const char & | Ch | ) | const |
static PHtmlDoc LoadTxt | ( | const TStr & | FNm, |
const THtmlDocType & | Type = hdtAll , |
||
const bool & | DoUc = true |
||
) | [static] |
static PHtmlDoc New | ( | const PSIn & | SIn, |
const THtmlDocType & | Type = hdtAll , |
||
const bool & | DoUc = true |
||
) | [static] |
static PWebPg New | ( | const TStrV & | UrlStrV, |
const TStrV & | IpNumV, | ||
const PHttpResp & | HttpResp | ||
) | [static] |
THtmlLxChDef& operator= | ( | const THtmlLxChDef & | ) |
void PutFetchMSecs | ( | const uint64 & | _FetchMSecs | ) |
void TWebPg::SaveAsHttp | ( | const TStr & | FNm | ) | const |
void TWebPg::SaveAsHttpBody | ( | const TStr & | FNm | ) | const |
static void SaveHtmlToTxt | ( | const TStr & | HtmlStr, |
const PSOut & | TxtSOut, | ||
const TStr & | BaseUrlStr, | ||
const bool & | OutUrlP, | ||
const bool & | OutToksP | ||
) | [static] |
static void SaveHtmlToTxt | ( | const TStr & | HtmlStr, |
const TStr & | TxtFNm, | ||
const TStr & | BaseUrlStr, | ||
const bool & | OutUrlP, | ||
const bool & | OutToksP | ||
) | [static] |
static void SaveHtmlToXml | ( | const TStr & | HtmlStr, |
const PSOut & | XmlSOut, | ||
const TStr & | BaseUrlStr, | ||
const bool & | OutTextP, | ||
const bool & | OutUrlP, | ||
const bool & | OutToksP, | ||
const bool & | OutTagsP, | ||
const bool & | OutArgsP | ||
) | [static] |
static void SaveHtmlToXml | ( | const TStr & | HtmlStr, |
const TStr & | XmlFNm, | ||
const TStr & | BaseUrlStr, | ||
const bool & | OutTextP, | ||
const bool & | OutUrlP, | ||
const bool & | OutToksP, | ||
const bool & | OutTagsP, | ||
const bool & | OutArgsP | ||
) | [static] |
void SetChTy | ( | const THtmlLxChTy & | ChTy, |
const TStr & | Str | ||
) |
void THtmlLxChDef::SetEscStr | ( | const TStr & | SrcStr, |
const TStr & | DstStr | ||
) |
void THtmlLxChDef::SetUcCh | ( | const char & | UcCh, |
const char & | LcCh | ||
) |
void TLxChDef::SetUcCh | ( | const TStr & | Str | ) |
THtmlDoc | ( | ) |
THtmlDoc::THtmlDoc | ( | const PSIn & | SIn, |
const THtmlDocType & | Type = hdtAll , |
||
const bool & | DoUc = true |
||
) |
THtmlHldV::THtmlHldV | ( | const PHtmlDoc & | _RefHtmlDoc, |
const int & | HldWnLen = 10 |
||
) |
THtmlLxChDef | ( | TSIn & | SIn | ) |
THtmlTok | ( | ) |
THtmlTok | ( | const THtmlLxSym & | _Sym | ) |
THtmlTok | ( | const THtmlLxSym & | _Sym, |
const TStr & | _Str | ||
) |
THtmlTok | ( | const THtmlLxSym & | _Sym, |
const TStr & | _Str, | ||
const THtmlLx::TArgNmValV & | _ArgNmValV | ||
) |
TWebPg | ( | ) |
~TWebPg | ( | ) |
const TStr THtmlTok::AltArgNm = "ALT" [static] |
const TStr THtmlTok::AreaTagNm = "<AREA>" [static] |
const TStr THtmlTok::ATagNm = "<A>" [static] |
const TStr THtmlTok::BrTagNm = "<BR>" [static] |
const TStr THtmlTok::CardTagNm = "<CARD>" [static] |
const TStr THtmlTok::CenterTagNm = "<CENTER>" [static] |
PHtmlLxChDef ChDef [static] |
const TStr THtmlTok::FrameTagNm = "<FRAME>" [static] |
const TStr THtmlTok::H1TagNm = "<H1>" [static] |
const TStr THtmlTok::H2TagNm = "<H2>" [static] |
const TStr THtmlTok::H3TagNm = "<H3>" [static] |
const TStr THtmlTok::H4TagNm = "<H4>" [static] |
const TStr THtmlTok::H5TagNm = "<H5>" [static] |
const TStr THtmlTok::H6TagNm = "<H6>" [static] |
const TStr THtmlTok::HRefArgNm = "HREF" [static] |
const TStr THtmlTok::HttpEquivArgNm = "HTTP-EQUIV" [static] |
PHttpResp HttpResp |
const TStr THtmlTok::ImgTagNm = "<IMG>" [static] |
const TStr THtmlTok::LiTagNm = "<LI>" [static] |
const TStr THtmlTok::MetaTagNm = "<META>" [static] |
const TStr THtmlTok::PTagNm = "<P>" [static] |
const TStr THtmlTok::SrcArgNm = "SRC" [static] |
const TStr THtmlTok::TitleArgNm = "TITLE" [static] |
const TStr THtmlTok::TitleETagNm = "</TITLE>" [static] |
const TStr THtmlTok::TitleTagNm = "<TITLE>" [static] |
ClassTP (THtmlLxChDef, PHtmlLxChDef) private TChV UcChV |
const TStr THtmlTok::UlTagNm = "<UL>" [static] |