SNAP Library 6.0, Developer Reference
2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <html.h>
Public Member Functions | |
THtmlDoc () | |
THtmlDoc (const PSIn &SIn, const THtmlDocType &Type=hdtAll, const bool &DoUc=true) | |
THtmlDoc (TSIn &) | |
void | Save (TSOut &) |
THtmlDoc & | operator= (const THtmlDoc &) |
int | GetToks () const |
PHtmlTok | GetTok (const int &TokN) const |
PHtmlTok | GetTok (const int &TokN, THtmlLxSym &Sym, TStr &Str) const |
void | AddTokV (const THtmlTokV &_TokV) |
void | SaveTxt (const PSOut &SOut, const bool &TxtMode=true) const |
Static Public Member Functions | |
static PHtmlDoc | New (const PSIn &SIn, const THtmlDocType &Type=hdtAll, const bool &DoUc=true) |
static PHtmlDoc | Load (TSIn &) |
static TStr | GetTxtLnDoc (const TStr &HtmlStr) |
static TStr | GetTxtLnDoc (const TStr &HtmlStr, const TStr &BaseUrlStr, const bool &OutUrlP, const bool &OutTagsP) |
static PHtmlDoc | LoadTxt (const TStr &FNm, const THtmlDocType &Type=hdtAll, const bool &DoUc=true) |
static void | SaveHtmlToTxt (const TStr &HtmlStr, const PSOut &TxtSOut, const TStr &BaseUrlStr, const bool &OutUrlP, const bool &OutToksP) |
static void | SaveHtmlToTxt (const TStr &HtmlStr, const TStr &TxtFNm, const TStr &BaseUrlStr, const bool &OutUrlP, const bool &OutToksP) |
static void | SaveHtmlToXml (const TStr &HtmlStr, const PSOut &XmlSOut, const TStr &BaseUrlStr, const bool &OutTextP, const bool &OutUrlP, const bool &OutToksP, const bool &OutTagsP, const bool &OutArgsP) |
static void | SaveHtmlToXml (const TStr &HtmlStr, const TStr &XmlFNm, const TStr &BaseUrlStr, const bool &OutTextP, const bool &OutUrlP, const bool &OutToksP, const bool &OutTagsP, const bool &OutArgsP) |
static TLxSym | GetLxSym (const THtmlLxSym &HtmlLxSym, const TChA &ChA) |
static bool | _IsTagRedir (const TStr &TagStr, const TStr &ArgNm, THtmlLx &Lx, const TStr &BaseUrlStr, const TStr &RedirUrlStr) |
static TStr | GetRedirHtmlDocStr (const TStr &HtmlStr, const TStr &BaseUrlStr, const TStr &RedirUrlStr) |
Private Attributes | |
TCRef | CRef |
THtmlTokV | TokV |
Friends | |
class | TPt< THtmlDoc > |
THtmlDoc::THtmlDoc | ( | const PSIn & | SIn, |
const THtmlDocType & | Type = hdtAll , |
||
const bool & | DoUc = true |
||
) |
Definition at line 779 of file html.cpp.
References TVec< TVal, TSizeTy >::Add(), THtmlTok::AreaTagNm, Fail, THtmlLx::GetSym(), THtmlLx::GetTok(), hdtA, hdtAll, hdtHRef, hdtStr, hdtStrNum, hdtTag, hdtUL, hsyBTag, hsyEof, hsyETag, hsyNum, hsyStr, THtmlTok::ImgTagNm, THtmlLx::Sym, TokV, THtmlLx::UcChA, and THtmlTok::UlTagNm.
|
inline |
|
static |
Definition at line 1106 of file html.cpp.
References THtmlLx::ChA, THtmlLx::GetArg(), TUrlEnv::GetFullUrlStr(), TUrl::GetUrlStr(), hsyBTag, IAssert, THtmlLx::IsArg(), TUrl::IsOk(), TUrl::New(), TUrlEnv::New(), THtmlLx::PutArg(), THtmlLx::Sym, and usHttp.
Referenced by GetRedirHtmlDocStr().
|
inline |
Definition at line 274 of file html.h.
Referenced by THtmlHldV::THtmlHldV().
|
static |
|
static |
Definition at line 1126 of file html.cpp.
References _IsTagRedir(), TMOut::GetAsStr(), THtmlLx::GetFullBTagStr(), THtmlLx::GetSym(), hsyBTag, hsyEof, TStrIn::New(), THtmlLx::PreSpaceChA, TSOut::PutStr(), THtmlLx::Sym, and THtmlLx::SymChA.
|
inline |
Definition at line 271 of file html.h.
Referenced by TWebPg::GetOutDescUrlStrKdV(), TWebPg::GetOutUrlV(), and THtmlHldV::THtmlHldV().
|
inline |
Definition at line 272 of file html.h.
References TStr::GetStr().
|
inline |
Definition at line 270 of file html.h.
Referenced by TWebPg::GetOutDescUrlStrKdV(), TWebPg::GetOutUrlV(), and THtmlHldV::THtmlHldV().
Definition at line 808 of file html.cpp.
References THtmlLx::ChA, TStr::CStr(), TChA::Empty(), THtmlLx::GetSym(), hsyBTag, hsyEof, hsyETag, hsyNum, hsySSym, hsyStr, TChA::LastCh(), TStrIn::New(), THtmlLx::PreSpaces, and THtmlLx::Sym.
Referenced by SaveHtmlToTxt().
|
static |
Definition at line 840 of file html.cpp.
References THtmlLx::ChA, Fail, TStr::GetSubStr(), THtmlLx::GetSym(), THtmlLx::GetTok(), TUrl::GetUrlStr(), TXmlLx::GetXmlStrFromPlainStr(), hsyBTag, hsyEof, hsyETag, hsyMTag, hsyNum, hsySSym, hsyStr, hsyUndef, hsyUrl, TUrl::IsOk(), TChA::LastCh(), TStr::Len(), TUrl::New(), TStrIn::New(), THtmlLx::PreSpaces, and THtmlLx::Sym.
|
inlinestatic |
Definition at line 280 of file html.h.
References TFIn::New().
|
inlinestatic |
Definition at line 261 of file html.h.
Referenced by TWebPg::GetOutDescUrlStrKdV(), and TWebPg::GetOutUrlV().
|
inline |
|
static |
Definition at line 928 of file html.cpp.
References GetTxtLnDoc(), and TStr::SaveTxt().
Referenced by SaveHtmlToTxt().
|
static |
Definition at line 937 of file html.cpp.
References TFOut::New(), and SaveHtmlToTxt().
|
static |
Definition at line 946 of file html.cpp.
References TVec< TVal, TSizeTy >::Add(), THtmlLx::ChA, TChA::Clr(), TChA::CStr(), TStr::CStr(), TChA::Empty(), TStr::Empty(), Fail, THtmlLx::GetArgNm(), THtmlLx::GetArgs(), THtmlLx::GetArgVal(), TSOut::GetFileId(), TStr::GetSubStr(), THtmlLx::GetSym(), THtmlLx::GetTok(), TUrl::GetUrlStr(), TXmlLx::GetXmlStrFromPlainStr(), hsyBTag, hsyEof, hsyETag, hsyMTag, hsyNum, hsySSym, hsyStr, hsyUndef, hsyUrl, TUrl::IsOk(), TStr::Len(), TVec< TVal, TSizeTy >::Len(), TUrl::New(), TStrIn::New(), and THtmlLx::Sym.
Referenced by SaveHtmlToXml().
|
static |
Definition at line 1081 of file html.cpp.
References TFOut::New(), and SaveHtmlToXml().
void THtmlDoc::SaveTxt | ( | const PSOut & | SOut, |
const bool & | TxtMode = true |
||
) | const |
Definition at line 915 of file html.cpp.
References TInt::GetStr(), TVec< TVal, TSizeTy >::Len(), TSOut::PutLn(), TSOut::PutStr(), and TokV.
Referenced by THtmlHldV::THtmlHldV().
|
private |
Definition at line 256 of file html.h.
Referenced by SaveTxt(), and THtmlDoc().