SNAP Library 6.0, Developer Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
THtmlTok Class Reference

#include <html.h>

Collaboration diagram for THtmlTok:

Public Member Functions

 THtmlTok ()
 
 THtmlTok (const THtmlLxSym &_Sym)
 
 THtmlTok (const THtmlLxSym &_Sym, const TStr &_Str)
 
 THtmlTok (const THtmlLxSym &_Sym, const TStr &_Str, const THtmlLx::TArgNmValV &_ArgNmValV)
 
 THtmlTok (TSIn &)
 
void Save (TSOut &)
 
THtmlTokoperator= (const THtmlTok &)
 
THtmlLxSym GetSym () const
 
TStr GetStr () const
 
TStr GetFullStr () const
 
bool IsArg (const TStr &ArgNm) const
 
TStr GetArg (const TStr &ArgNm) const
 
TStr GetArg (const TStr &ArgNm, const TStr &DfArgVal) const
 
bool IsUrlTok (TStr &RelUrlStr) const
 
bool IsRedirUrlTok () const
 
void SaveTxt (const PSOut &SOut, const bool &TxtMode=true)
 

Static Public Member Functions

static PHtmlTok Load (TSIn &)
 
static bool IsBreakTag (const TStr &TagNm)
 
static bool IsBreakTok (const PHtmlTok &Tok)
 
static bool IsHTag (const TStr &TagNm, int &HTagN)
 
static PHtmlTok GetHTok (const bool &IsBTag, const int &HTagN)
 

Static Public Attributes

static const TStr ATagNm ="<A>"
 
static const TStr AreaTagNm ="<AREA>"
 
static const TStr BrTagNm ="<BR>"
 
static const TStr CardTagNm ="<CARD>"
 
static const TStr CenterTagNm ="<CENTER>"
 
static const TStr FrameTagNm ="<FRAME>"
 
static const TStr H1TagNm ="<H1>"
 
static const TStr H2TagNm ="<H2>"
 
static const TStr H3TagNm ="<H3>"
 
static const TStr H4TagNm ="<H4>"
 
static const TStr H5TagNm ="<H5>"
 
static const TStr H6TagNm ="<H6>"
 
static const TStr ImgTagNm ="<IMG>"
 
static const TStr LiTagNm ="<LI>"
 
static const TStr MetaTagNm ="<META>"
 
static const TStr PTagNm ="<P>"
 
static const TStr UlTagNm ="<UL>"
 
static const TStr TitleTagNm ="<TITLE>"
 
static const TStr TitleETagNm ="</TITLE>"
 
static const TStr AltArgNm ="ALT"
 
static const TStr HRefArgNm ="HREF"
 
static const TStr SrcArgNm ="SRC"
 
static const TStr TitleArgNm ="TITLE"
 
static const TStr HttpEquivArgNm ="HTTP-EQUIV"
 

Private Attributes

TCRef CRef
 
THtmlLxSym Sym
 
TStr Str
 
THtmlLx::TArgNmValV ArgNmValV
 

Friends

class TPt< THtmlTok >
 

Detailed Description

Definition at line 182 of file html.h.

Constructor & Destructor Documentation

THtmlTok::THtmlTok ( )
inline

Definition at line 188 of file html.h.

Referenced by GetHTok().

188 : Sym(hsyUndef), Str(), ArgNmValV(){}
TStr Str
Definition: html.h:185
Definition: html.h:79
THtmlLx::TArgNmValV ArgNmValV
Definition: html.h:186
THtmlLxSym Sym
Definition: html.h:184

Here is the caller graph for this function:

THtmlTok::THtmlTok ( const THtmlLxSym _Sym)
inline

Definition at line 189 of file html.h.

189  :
190  Sym(_Sym), Str(), ArgNmValV(){}
TStr Str
Definition: html.h:185
THtmlLx::TArgNmValV ArgNmValV
Definition: html.h:186
THtmlLxSym Sym
Definition: html.h:184
THtmlTok::THtmlTok ( const THtmlLxSym _Sym,
const TStr _Str 
)
inline

Definition at line 191 of file html.h.

191  :
192  Sym(_Sym), Str(_Str), ArgNmValV(){}
TStr Str
Definition: html.h:185
THtmlLx::TArgNmValV ArgNmValV
Definition: html.h:186
THtmlLxSym Sym
Definition: html.h:184
THtmlTok::THtmlTok ( const THtmlLxSym _Sym,
const TStr _Str,
const THtmlLx::TArgNmValV _ArgNmValV 
)
inline

Definition at line 193 of file html.h.

194  :
195  Sym(_Sym), Str(_Str), ArgNmValV(_ArgNmValV){}
TStr Str
Definition: html.h:185
THtmlLx::TArgNmValV ArgNmValV
Definition: html.h:186
THtmlLxSym Sym
Definition: html.h:184
THtmlTok::THtmlTok ( TSIn )
inline

Definition at line 196 of file html.h.

References Fail.

196 {Fail;}
#define Fail
Definition: bd.h:238

Member Function Documentation

TStr THtmlTok::GetArg ( const TStr ArgNm) const
inline

Definition at line 207 of file html.h.

References TVec< TVal, TSizeTy >::SearchForw().

Referenced by IsRedirUrlTok(), and IsUrlTok().

207  {
208  return ArgNmValV[ArgNmValV.SearchForw(TStrKd(ArgNm))].Dat;}
TSizeTy SearchForw(const TVal &Val, const TSizeTy &BValN=0) const
Returns the position of an element with value Val.
Definition: ds.h:1552
THtmlLx::TArgNmValV ArgNmValV
Definition: html.h:186
TKeyDat< TStr, TStr > TStrKd
Definition: ds.h:405

Here is the call graph for this function:

Here is the caller graph for this function:

TStr THtmlTok::GetArg ( const TStr ArgNm,
const TStr DfArgVal 
) const
inline

Definition at line 209 of file html.h.

References TVec< TVal, TSizeTy >::SearchForw().

209  {
210  int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
211  if (ArgN==-1){return DfArgVal;} else {return ArgNmValV[ArgN].Dat;}}
TSizeTy SearchForw(const TVal &Val, const TSizeTy &BValN=0) const
Returns the position of an element with value Val.
Definition: ds.h:1552
THtmlLx::TArgNmValV ArgNmValV
Definition: html.h:186
TKeyDat< TStr, TStr > TStrKd
Definition: ds.h:405

Here is the call graph for this function:

TStr THtmlTok::GetFullStr ( ) const

Definition at line 628 of file html.cpp.

References ArgNmValV, GetStr(), TStr::GetSubStr(), hsyBTag, hsyETag, TStr::Len(), TVec< TVal, TSizeTy >::Len(), Str, and Sym.

Referenced by SaveTxt().

628  {
629  if ((Sym==hsyBTag)&&(ArgNmValV.Len()>0)){
630  TChA FullChA;
631  FullChA+=Str.GetSubStr(0, Str.Len()-2);
632  for (int ArgNmValN=0; ArgNmValN<ArgNmValV.Len(); ArgNmValN++){
633  FullChA+=' '; FullChA+=ArgNmValV[ArgNmValN].Key; FullChA+='=';
634  FullChA+='"'; FullChA+=ArgNmValV[ArgNmValN].Dat; FullChA+='"';
635  }
636  FullChA+='>';
637  return FullChA;
638  } else
639  if (Sym==hsyETag){
640  TChA FullChA;
641  FullChA+='<'; FullChA+='/'; FullChA+=Str.GetSubStr(1, Str.Len()-1);
642  return FullChA;
643  } else {
644  return GetStr();
645  }
646 }
int Len() const
Definition: dt.h:490
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
TStr GetSubStr(const int &BChN, const int &EChN) const
Definition: dt.cpp:811
TStr GetStr() const
Definition: html.h:203
TStr Str
Definition: html.h:185
Definition: dt.h:201
Definition: html.h:80
Definition: html.h:80
THtmlLx::TArgNmValV ArgNmValV
Definition: html.h:186
THtmlLxSym Sym
Definition: html.h:184

Here is the call graph for this function:

Here is the caller graph for this function:

PHtmlTok THtmlTok::GetHTok ( const bool &  IsBTag,
const int &  HTagN 
)
static

Definition at line 762 of file html.cpp.

References Fail, H1TagNm, H2TagNm, H3TagNm, H4TagNm, H5TagNm, H6TagNm, hsyBTag, hsyETag, and THtmlTok().

Referenced by THtmlHldV::THtmlHldV().

762  {
763  THtmlLxSym HTagSym=IsBTag?hsyBTag:hsyETag;
764  TStr HTagNm;
765  switch (HTagN){
766  case 1: HTagNm=H1TagNm; break;
767  case 2: HTagNm=H2TagNm; break;
768  case 3: HTagNm=H3TagNm; break;
769  case 4: HTagNm=H4TagNm; break;
770  case 5: HTagNm=H5TagNm; break;
771  case 6: HTagNm=H6TagNm; break;
772  default: Fail;
773  }
774  return PHtmlTok(new THtmlTok(HTagSym, HTagNm));
775 }
static const TStr H5TagNm
Definition: html.h:227
THtmlLxSym
Definition: html.h:78
static const TStr H4TagNm
Definition: html.h:226
THtmlTok()
Definition: html.h:188
static const TStr H3TagNm
Definition: html.h:225
static const TStr H1TagNm
Definition: html.h:223
#define Fail
Definition: bd.h:238
static const TStr H2TagNm
Definition: html.h:224
static const TStr H6TagNm
Definition: html.h:228
Definition: html.h:80
Definition: html.h:80
Definition: dt.h:412
TPt< THtmlTok > PHtmlTok
Definition: html.h:5

Here is the call graph for this function:

Here is the caller graph for this function:

TStr THtmlTok::GetStr ( ) const
inline

Definition at line 203 of file html.h.

Referenced by GetFullStr(), IsRedirUrlTok(), and IsUrlTok().

203 {return Str;}
TStr Str
Definition: html.h:185

Here is the caller graph for this function:

THtmlLxSym THtmlTok::GetSym ( ) const
inline

Definition at line 202 of file html.h.

Referenced by IsRedirUrlTok(), and IsUrlTok().

202 {return Sym;}
THtmlLxSym Sym
Definition: html.h:184

Here is the caller graph for this function:

bool THtmlTok::IsArg ( const TStr ArgNm) const
inline

Definition at line 205 of file html.h.

References TVec< TVal, TSizeTy >::SearchForw().

Referenced by IsRedirUrlTok(), and IsUrlTok().

205  {
206  return ArgNmValV.SearchForw(TStrKd(ArgNm))!=-1;}
TSizeTy SearchForw(const TVal &Val, const TSizeTy &BValN=0) const
Returns the position of an element with value Val.
Definition: ds.h:1552
THtmlLx::TArgNmValV ArgNmValV
Definition: html.h:186
TKeyDat< TStr, TStr > TStrKd
Definition: ds.h:405

Here is the call graph for this function:

Here is the caller graph for this function:

bool THtmlTok::IsBreakTag ( const TStr TagNm)
static

Definition at line 726 of file html.cpp.

References THash< TKey, TDat, THashFunc >::AddKey(), THash< TKey, TDat, THashFunc >::IsKey(), and THash< TKey, TDat, THashFunc >::Len().

Referenced by IsBreakTok().

726  {
727  static TStrH BreakTagNmH(50);
728  if (BreakTagNmH.Len()==0){
729  BreakTagNmH.AddKey(TStr("<H1>")); BreakTagNmH.AddKey(TStr("<H2>"));
730  BreakTagNmH.AddKey(TStr("<H3>")); BreakTagNmH.AddKey(TStr("<H4>"));
731  BreakTagNmH.AddKey(TStr("<H5>")); BreakTagNmH.AddKey(TStr("<H6>"));
732  BreakTagNmH.AddKey(TStr("<BR>")); BreakTagNmH.AddKey(TStr("<HR>"));
733  BreakTagNmH.AddKey(TStr("<P>")); BreakTagNmH.AddKey(TStr("<DL>"));
734  BreakTagNmH.AddKey(TStr("<UL>")); BreakTagNmH.AddKey(TStr("<OL>"));
735  BreakTagNmH.AddKey(TStr("<LI>")); BreakTagNmH.AddKey(TStr("<DT>"));
736  BreakTagNmH.AddKey(TStr("<DD>")); BreakTagNmH.AddKey(TStr("<HEAD>"));
737  BreakTagNmH.AddKey(TStr("<TITLE>")); BreakTagNmH.AddKey(TStr("<META>"));
738  BreakTagNmH.AddKey(TStr("<SCRIPT>"));
739  BreakTagNmH.AddKey(TStr("<HEAD>")); BreakTagNmH.AddKey(TStr("<BODY>"));
740  }
741  return BreakTagNmH.IsKey(TagNm);
742 }
Definition: dt.h:412

Here is the call graph for this function:

Here is the caller graph for this function:

bool THtmlTok::IsBreakTok ( const PHtmlTok Tok)
static

Definition at line 744 of file html.cpp.

References hsyBTag, hsyETag, and IsBreakTag().

Referenced by THtmlHldV::THtmlHldV().

744  {
745  if ((Tok->GetSym()==hsyBTag)||(Tok->GetSym()==hsyETag)){
746  return IsBreakTag(Tok->GetStr());
747  } else {
748  return false;
749  }
750 }
static bool IsBreakTag(const TStr &TagNm)
Definition: html.cpp:726
Definition: html.h:80
Definition: html.h:80

Here is the call graph for this function:

Here is the caller graph for this function:

bool THtmlTok::IsHTag ( const TStr TagNm,
int &  HTagN 
)
static

Definition at line 752 of file html.cpp.

References TStr::Len().

Referenced by THtmlHldV::THtmlHldV().

752  {
753  if ((TagNm.Len()==4)&&(TagNm[0]=='<')&&(TagNm[1]=='H')&&(TagNm[3]=='>')){
754  char Ch=TagNm[2];
755  if (('1'<=Ch)&&(Ch<='6')){HTagN=Ch-'0'; return true;}
756  else {HTagN=-1; return false;}
757  } else {
758  HTagN=-1; return false;
759  }
760 }
int Len() const
Definition: dt.h:490

Here is the call graph for this function:

Here is the caller graph for this function:

bool THtmlTok::IsRedirUrlTok ( ) const

Definition at line 676 of file html.cpp.

References GetArg(), GetStr(), GetSym(), TStr::GetUc(), hsyBTag, HttpEquivArgNm, IsArg(), and MetaTagNm.

676  {
677  if (GetSym()==hsyBTag){
678  TStr TagNm=GetStr();
679  if ((TagNm==MetaTagNm)&&(IsArg(HttpEquivArgNm))){
680  TStr HttpEquivArgVal=GetArg(HttpEquivArgNm).GetUc();
681  if ((HttpEquivArgVal=="REFRESH")&&IsArg("CONTENT")){
682  return true;
683  } else {
684  return false;
685  }
686  }
687  }
688  return false;
689 }
TStr GetUc() const
Definition: dt.h:496
TStr GetArg(const TStr &ArgNm) const
Definition: html.h:207
static const TStr MetaTagNm
Definition: html.h:231
TStr GetStr() const
Definition: html.h:203
bool IsArg(const TStr &ArgNm) const
Definition: html.h:205
THtmlLxSym GetSym() const
Definition: html.h:202
static const TStr HttpEquivArgNm
Definition: html.h:241
Definition: html.h:80
Definition: dt.h:412

Here is the call graph for this function:

bool THtmlTok::IsUrlTok ( TStr RelUrlStr) const

Definition at line 648 of file html.cpp.

References AreaTagNm, ATagNm, TStr::Empty(), FrameTagNm, GetArg(), GetStr(), TStr::GetSubStr(), GetSym(), TStr::GetUc(), HRefArgNm, hsyBTag, HttpEquivArgNm, ImgTagNm, IsArg(), TStr::Len(), MetaTagNm, TStr::SplitOnStr(), and SrcArgNm.

648  {
649  if (GetSym()==hsyBTag){
650  TStr TagNm=GetStr();
651  if ((TagNm==ATagNm)&&(IsArg(HRefArgNm))){
652  RelUrlStr=GetArg(HRefArgNm); return true;}
653  else if ((TagNm==AreaTagNm)&&(IsArg(HRefArgNm))){
654  RelUrlStr=GetArg(HRefArgNm); return true;}
655  else if ((TagNm==FrameTagNm)&&(IsArg(SrcArgNm))){
656  RelUrlStr=GetArg(SrcArgNm); return true;}
657  else if ((TagNm==ImgTagNm)&&(IsArg(SrcArgNm))){
658  RelUrlStr=GetArg(SrcArgNm); return true;}
659  else if ((TagNm==MetaTagNm)&&(IsArg(HttpEquivArgNm))){
660  TStr HttpEquivArgVal=GetArg(HttpEquivArgNm).GetUc();
661  if ((HttpEquivArgVal=="REFRESH")&&IsArg("CONTENT")){
662  TStr ContentStr=GetArg("CONTENT");
663  TStr LeftStr; TStr RightStr; TStr UrlEqStr="URL=";
664  ContentStr.GetUc().SplitOnStr(LeftStr, UrlEqStr, RightStr);
665  RelUrlStr=ContentStr.GetSubStr(
666  LeftStr.Len()+UrlEqStr.Len(), ContentStr.Len());
667  return !RelUrlStr.Empty();
668  } else {
669  return false;
670  }
671  }
672  }
673  return false;
674 }
static const TStr FrameTagNm
Definition: html.h:222
int Len() const
Definition: dt.h:490
static const TStr HRefArgNm
Definition: html.h:238
TStr GetUc() const
Definition: dt.h:496
TStr GetArg(const TStr &ArgNm) const
Definition: html.h:207
static const TStr MetaTagNm
Definition: html.h:231
TStr GetSubStr(const int &BChN, const int &EChN) const
Definition: dt.cpp:811
TStr GetStr() const
Definition: html.h:203
bool IsArg(const TStr &ArgNm) const
Definition: html.h:205
static const TStr AreaTagNm
Definition: html.h:218
THtmlLxSym GetSym() const
Definition: html.h:202
static const TStr HttpEquivArgNm
Definition: html.h:241
static const TStr ATagNm
Definition: html.h:217
static const TStr ImgTagNm
Definition: html.h:229
Definition: html.h:80
Definition: dt.h:412
bool Empty() const
Definition: dt.h:491
void SplitOnStr(const TStr &SplitStr, TStrV &StrV) const
Definition: dt.cpp:1008
static const TStr SrcArgNm
Definition: html.h:239

Here is the call graph for this function:

static PHtmlTok THtmlTok::Load ( TSIn )
inlinestatic

Definition at line 197 of file html.h.

References Fail.

197 {Fail; return NULL;}
#define Fail
Definition: bd.h:238
THtmlTok& THtmlTok::operator= ( const THtmlTok )
inline

Definition at line 200 of file html.h.

References Fail.

200 {Fail; return *this;}
#define Fail
Definition: bd.h:238
void THtmlTok::Save ( TSOut )
inline

Definition at line 198 of file html.h.

References Fail.

198 {Fail;}
#define Fail
Definition: bd.h:238
void THtmlTok::SaveTxt ( const PSOut SOut,
const bool &  TxtMode = true 
)

Definition at line 691 of file html.cpp.

References GetFullStr(), THtmlLx::GetSymStr(), TSOut::PutStr(), and Sym.

691  {
692  if (TxtMode){
693  SOut->PutStr(GetFullStr()); SOut->PutStr(" ");
694  } else {
695  SOut->PutStr(THtmlLx::GetSymStr(Sym)); SOut->PutStr(" ");
696  SOut->PutStr(GetFullStr()); SOut->PutStr(" ");
697  }
698 }
TStr GetFullStr() const
Definition: html.cpp:628
THtmlLxSym Sym
Definition: html.h:184
static TStr GetSymStr(const THtmlLxSym &Sym)
Definition: html.cpp:553

Here is the call graph for this function:

Friends And Related Function Documentation

friend class TPt< THtmlTok >
friend

Definition at line 182 of file html.h.

Member Data Documentation

const TStr THtmlTok::AltArgNm ="ALT"
static

Definition at line 237 of file html.h.

const TStr THtmlTok::AreaTagNm ="<AREA>"
static

Definition at line 218 of file html.h.

Referenced by IsUrlTok(), and THtmlDoc::THtmlDoc().

THtmlLx::TArgNmValV THtmlTok::ArgNmValV
private

Definition at line 186 of file html.h.

Referenced by GetFullStr().

const TStr THtmlTok::ATagNm ="<A>"
static

Definition at line 217 of file html.h.

Referenced by IsUrlTok().

const TStr THtmlTok::BrTagNm ="<BR>"
static

Definition at line 219 of file html.h.

const TStr THtmlTok::CardTagNm ="<CARD>"
static

Definition at line 220 of file html.h.

const TStr THtmlTok::CenterTagNm ="<CENTER>"
static

Definition at line 221 of file html.h.

TCRef THtmlTok::CRef
private

Definition at line 182 of file html.h.

const TStr THtmlTok::FrameTagNm ="<FRAME>"
static

Definition at line 222 of file html.h.

Referenced by IsUrlTok().

const TStr THtmlTok::H1TagNm ="<H1>"
static

Definition at line 223 of file html.h.

Referenced by GetHTok().

const TStr THtmlTok::H2TagNm ="<H2>"
static

Definition at line 224 of file html.h.

Referenced by GetHTok().

const TStr THtmlTok::H3TagNm ="<H3>"
static

Definition at line 225 of file html.h.

Referenced by GetHTok().

const TStr THtmlTok::H4TagNm ="<H4>"
static

Definition at line 226 of file html.h.

Referenced by GetHTok().

const TStr THtmlTok::H5TagNm ="<H5>"
static

Definition at line 227 of file html.h.

Referenced by GetHTok().

const TStr THtmlTok::H6TagNm ="<H6>"
static

Definition at line 228 of file html.h.

Referenced by GetHTok().

const TStr THtmlTok::HRefArgNm ="HREF"
static

Definition at line 238 of file html.h.

Referenced by IsUrlTok().

const TStr THtmlTok::HttpEquivArgNm ="HTTP-EQUIV"
static

Definition at line 241 of file html.h.

Referenced by IsRedirUrlTok(), and IsUrlTok().

const TStr THtmlTok::ImgTagNm ="<IMG>"
static

Definition at line 229 of file html.h.

Referenced by IsUrlTok(), and THtmlDoc::THtmlDoc().

const TStr THtmlTok::LiTagNm ="<LI>"
static

Definition at line 230 of file html.h.

const TStr THtmlTok::MetaTagNm ="<META>"
static

Definition at line 231 of file html.h.

Referenced by IsRedirUrlTok(), and IsUrlTok().

const TStr THtmlTok::PTagNm ="<P>"
static

Definition at line 232 of file html.h.

const TStr THtmlTok::SrcArgNm ="SRC"
static

Definition at line 239 of file html.h.

Referenced by IsUrlTok().

TStr THtmlTok::Str
private

Definition at line 185 of file html.h.

Referenced by GetFullStr().

THtmlLxSym THtmlTok::Sym
private

Definition at line 184 of file html.h.

Referenced by GetFullStr(), and SaveTxt().

const TStr THtmlTok::TitleArgNm ="TITLE"
static

Definition at line 240 of file html.h.

const TStr THtmlTok::TitleETagNm ="</TITLE>"
static

Definition at line 235 of file html.h.

const TStr THtmlTok::TitleTagNm ="<TITLE>"
static

Definition at line 234 of file html.h.

const TStr THtmlTok::UlTagNm ="<UL>"
static

Definition at line 233 of file html.h.

Referenced by THtmlDoc::THtmlDoc().


The documentation for this class was generated from the following files: