SNAP Library 6.0, User Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
THtmlLx Class Reference

#include <html.h>

Public Types

typedef TStrKdV TArgNmValV
 

Public Member Functions

 THtmlLx (const PSIn &_SIn, const bool &_DoParseArg=true)
 
THtmlLxoperator= (const THtmlLx &)
 
void PutCh (const char &_Ch)
 
void PutStr (const TStr &Str)
 
THtmlLxSym GetSym ()
 
PHtmlTok GetTok (const bool &DoUc=true)
 
TStr GetPreSpaceStr () const
 
int GetArgs () const
 
TStr GetArgNm (const int &ArgN) const
 
TStr GetArgVal (const int &ArgN) const
 
bool IsArg (const TStr &ArgNm) const
 
TStr GetArg (const TStr &ArgNm, const TStr &DfArgVal=TStr()) const
 
void PutArg (const TStr &ArgNm, const TStr &ArgVal)
 
TStr GetFullBTagStr () const
 
void MoveToStrOrEof (const TStr &Str)
 
void MoveToBTagOrEof (const TStr &TagNm)
 
void MoveToBTag2OrEof (const TStr &TagNm1, const TStr &TagNm2)
 
void MoveToBTag3OrEof (const TStr &TagNm1, const TStr &TagNm2, const TStr &TagNm3)
 
void MoveToBTagOrETagOrEof (const TStr &BTagNm, const TStr &ETagNm)
 
void MoveToBTagArgOrEof (const TStr &TagNm, const TStr &ArgNm, const TStr &ArgVal)
 
void MoveToBTagArg2OrEof (const TStr &TagNm, const TStr &ArgNm1, const TStr &ArgVal1, const TStr &ArgNm2, const TStr &ArgVal2, const bool &AndOpP=true)
 
void MoveToBTagOrEof (const TStr &TagNm1, const TStr &ArgNm1, const TStr &ArgVal1, const TStr &TagNm2, const TStr &ArgNm2, const TStr &ArgVal2)
 
void MoveToETagOrEof (const TStr &TagNm)
 
TStr GetTextOnlyStrToEof ()
 
TStr GetStrToBTag (const TStr &TagNm, const bool &TxtOnlyP=false)
 
TStr GetStrToBTag (const TStr &TagNm, const TStr &ArgNm, const TStr &ArgVal, const bool &TxtOnlyP=false)
 
TStr GetStrToETag (const TStr &TagNm, const bool &TxtOnlyP=false)
 
TStr GetStrToETag2 (const TStr &TagNm1, const TStr &TagNm2, const bool &TxtOnlyP=false)
 
TStr GetStrInTag (const TStr &TagNm, const bool &TxtOnlyP=false)
 
TStr GetHRefBeforeStr (const TStr &Str)
 
bool IsGetBTag (const TStr &TagNm)
 
bool IsGetETag (const TStr &TagNm)
 

Static Public Member Functions

static TStr GetSymStr (const THtmlLxSym &Sym)
 
static TStr GetEscapedStr (const TChA &ChA)
 
static TStr GetAsciiStr (const TChA &ChA, const char &GenericCh='_')
 
static void GetTokStrV (const TStr &Str, TStrV &TokStrV)
 
static TStr GetNoTag (const TStr &Str)
 

Public Attributes

THtmlLxSym Sym
 
int SymBChX
 
int SymEChX
 
TChA ChA
 
TChA UcChA
 
TChA SymChA
 
int PreSpaces
 
TChA PreSpaceChA
 
TArgNmValV ArgNmValV
 

Private Member Functions

void GetCh ()
 
void GetEscCh ()
 
void GetMetaTag ()
 
void GetTag ()
 

Private Attributes

PSIn SIn
 
TSInRSIn
 
bool DoParseArg
 
TChA ChStack
 
char Ch
 
int ChX
 
bool EscCh
 
TChA EscChA
 
TChA ArgNm
 
TChA ArgVal
 

Static Private Attributes

static THtmlLxChDef ChDef
 

Detailed Description

Definition at line 82 of file html.h.

Member Typedef Documentation

Definition at line 114 of file html.h.

Constructor & Destructor Documentation

THtmlLx::THtmlLx ( const PSIn _SIn,
const bool &  _DoParseArg = true 
)
inline

Definition at line 117 of file html.h.

117  :
118  SIn(_SIn), RSIn(*SIn), DoParseArg(_DoParseArg),
119  ChStack(), Ch(' '), ChX(0), EscCh(false),
120  EscChA(), ArgNm(), ArgVal(),
121  Sym(hsyUndef), SymBChX(0), SymEChX(0), ChA(), UcChA(),
122  PreSpaces(0), PreSpaceChA(), ArgNmValV(){}
TChA ArgNm
Definition: html.h:93
TSIn & RSIn
Definition: html.h:86
TChA EscChA
Definition: html.h:92
int SymBChX
Definition: html.h:108
bool DoParseArg
Definition: html.h:87
char Ch
Definition: html.h:89
TChA PreSpaceChA
Definition: html.h:113
TChA ChStack
Definition: html.h:88
bool EscCh
Definition: html.h:91
int SymEChX
Definition: html.h:108
TChA UcChA
Definition: html.h:110
TChA ChA
Definition: html.h:109
int ChX
Definition: html.h:90
PSIn SIn
Definition: html.h:85
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:79
TArgNmValV ArgNmValV
Definition: html.h:115
TChA ArgVal
Definition: html.h:94
int PreSpaces
Definition: html.h:112

Member Function Documentation

TStr THtmlLx::GetArg ( const TStr ArgNm,
const TStr DfArgVal = TStr() 
) const
inline

Definition at line 139 of file html.h.

139  {
140  int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
141  if (ArgN==-1){return DfArgVal;} else {return ArgNmValV[ArgN].Dat;}}
TSizeTy SearchForw(const TVal &Val, const TSizeTy &BValN=0) const
Returns the position of an element with value Val.
Definition: ds.h:1552
TArgNmValV ArgNmValV
Definition: html.h:115
TKeyDat< TStr, TStr > TStrKd
Definition: ds.h:405
TStr THtmlLx::GetArgNm ( const int &  ArgN) const
inline

Definition at line 136 of file html.h.

136 {return ArgNmValV[ArgN].Key;}
TArgNmValV ArgNmValV
Definition: html.h:115
int THtmlLx::GetArgs ( ) const
inline

Definition at line 135 of file html.h.

135 {return ArgNmValV.Len();}
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:575
TArgNmValV ArgNmValV
Definition: html.h:115
TStr THtmlLx::GetArgVal ( const int &  ArgN) const
inline

Definition at line 137 of file html.h.

137 {return ArgNmValV[ArgN].Dat;}
TArgNmValV ArgNmValV
Definition: html.h:115
TStr THtmlLx::GetAsciiStr ( const TChA ChA,
const char &  GenericCh = '_' 
)
static

Definition at line 584 of file html.cpp.

584  {
585  TChA AsciiChA;
586  for (int ChN=0; ChN<ChA.Len(); ChN++){
587  char Ch=ChA[ChN];
588  if ((Ch<' ')||('~'<Ch)){
589  Ch=GenericCh;}
590  AsciiChA+=Ch;
591  }
592  return AsciiChA;
593 }
int Len() const
Definition: dt.h:259
char Ch
Definition: html.h:89
Definition: dt.h:201
void THtmlLx::GetCh ( )
inlineprivate

Definition at line 95 of file html.h.

95  {
96  if (ChStack.Empty()){
97  if (RSIn.Eof()){Ch=TCh::EofCh;} else {Ch=RSIn.GetCh(); ChX++;}
98  } else {
99  Ch=ChStack.Pop(); ChX++;
100  }
101  SymChA+=Ch;
102  }
TSIn & RSIn
Definition: html.h:86
bool Empty() const
Definition: dt.h:260
char Ch
Definition: html.h:89
TChA ChStack
Definition: html.h:88
static const char EofCh
Definition: dt.h:1040
virtual bool Eof()=0
int ChX
Definition: html.h:90
TChA SymChA
Definition: html.h:111
virtual char GetCh()=0
char Pop()
Definition: dt.h:265
TStr THtmlLx::GetEscapedStr ( const TChA ChA)
static

Definition at line 568 of file html.cpp.

568  {
569  TChA EscapedChA;
570  for (int ChN=0; ChN<ChA.Len(); ChN++){
571  char Ch=ChA[ChN];
572  switch (Ch){
573  case '"': EscapedChA+="&quot;"; break;
574  case '&': EscapedChA+="&amp;"; break;
575  case '\'': EscapedChA+="&apos;"; break;
576  case '<': EscapedChA+="&lt;"; break;
577  case '>': EscapedChA+="&gt;"; break;
578  default: EscapedChA+=Ch;
579  }
580  }
581  return EscapedChA;
582 }
int Len() const
Definition: dt.h:259
char Ch
Definition: html.h:89
Definition: dt.h:201
void THtmlLx::GetEscCh ( )
private

Definition at line 195 of file html.cpp.

195  {
196  GetCh();
197  EscCh=(Ch=='&');
198  if (EscCh){
199  EscChA.Clr(); EscChA.AddCh(Ch); GetCh();
200  if (Ch=='#'){
201  EscChA.AddCh(Ch); GetCh();
202  if (('0'<=Ch)&&(Ch<='9')){
203  do {EscChA.AddCh(Ch); GetCh();} while (('0'<=Ch)&&(Ch<='9'));
204  if (Ch==';'){GetCh();}
206  } else {
207  PutCh('#'); PutCh('&');
208  }
209  } else
210  if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))){
211  do {
212  EscChA.AddCh(Ch); GetCh();
213  } while ((('A'<=Ch)&&(Ch<='Z'))||(('a'<=Ch)&&(Ch<='z'))||(('0'<=Ch)&&(Ch<='9')));
214  if (Ch==';'){
216  } else {
217  PutStr(EscChA);
218  }
219  } else {
220  PutCh('&');
221  }
222  }
223 }
static THtmlLxChDef ChDef
Definition: html.h:84
TChA EscChA
Definition: html.h:92
void PutStr(const TStr &Str)
Definition: html.h:128
TStr GetEscStr(const TStr &Str) const
Definition: html.cpp:33
void Clr()
Definition: dt.h:258
void AddCh(const char &Ch, const int &MxLen=-1)
Definition: dt.h:271
void PutCh(const char &_Ch)
Definition: html.h:126
char Ch
Definition: html.h:89
bool EscCh
Definition: html.h:91
void GetCh()
Definition: html.h:95
TStr THtmlLx::GetFullBTagStr ( ) const

Definition at line 358 of file html.cpp.

358  {
359  IAssert(Sym==hsyBTag);
360  TChA BTagChA;
361  BTagChA+=ChA; BTagChA.Pop();
362  for (int ArgN=0; ArgN<GetArgs(); ArgN++){
363  BTagChA+=' '; BTagChA+=GetArgNm(ArgN);
364  BTagChA+='='; BTagChA+='"'; BTagChA+=GetArgVal(ArgN); BTagChA+='"';
365  }
366  BTagChA+='>';
367  return BTagChA;
368 }
#define IAssert(Cond)
Definition: bd.h:262
TStr GetArgVal(const int &ArgN) const
Definition: html.h:137
TStr GetArgNm(const int &ArgN) const
Definition: html.h:136
TChA ChA
Definition: html.h:109
Definition: dt.h:201
Definition: html.h:80
THtmlLxSym Sym
Definition: html.h:107
int GetArgs() const
Definition: html.h:135
char Pop()
Definition: dt.h:265
TStr THtmlLx::GetHRefBeforeStr ( const TStr Str)

Definition at line 530 of file html.cpp.

530  {
531  TStr HRefStr;
532  forever {
533  GetSym();
534  if (Sym==hsyEof){HRefStr=""; break;}
535  if ((Sym==hsyBTag)&&(UcChA=="<A>")){HRefStr=GetArg("HREF");}
536  if ((Sym==hsyStr)&&(ChA==Str)){break;}
537  }
538  return HRefStr;
539 }
#define forever
Definition: bd.h:6
Definition: html.h:79
THtmlLxSym GetSym()
Definition: html.cpp:277
TStr GetArg(const TStr &ArgNm, const TStr &DfArgVal=TStr()) const
Definition: html.h:139
TChA UcChA
Definition: html.h:110
TChA ChA
Definition: html.h:109
Definition: html.h:80
Definition: dt.h:412
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:80
void THtmlLx::GetMetaTag ( )
private

Definition at line 225 of file html.cpp.

225  {
226  Sym=hsyMTag;
227  if (Ch=='-'){
228  char PCh=' ';
229  while ((Ch!=TCh::EofCh) && ((PCh!='-')||(Ch!='>'))){PCh=Ch; GetCh();}
230  } else {
231  while ((Ch!=TCh::EofCh) && (Ch!='>')){GetCh();}
232  }
233  if (Ch!=TCh::EofCh){GetEscCh();}
234 }
char Ch
Definition: html.h:89
static const char EofCh
Definition: dt.h:1040
Definition: html.h:80
void GetCh()
Definition: html.h:95
void GetEscCh()
Definition: html.cpp:195
THtmlLxSym Sym
Definition: html.h:107
TStr THtmlLx::GetNoTag ( const TStr Str)
static

Definition at line 606 of file html.cpp.

606  {
607  PSIn SIn=TStrIn::New(Str);
608  THtmlLx Lx(SIn);
609  Lx.GetSym();
610  TChA ChA;
611  while (Lx.Sym!=hsyEof){
612  switch (Lx.Sym){
613  case hsyUndef:
614  case hsyStr:
615  case hsyNum:
616  case hsySSym:
617  if (Lx.PreSpaces > 0) { ChA += ' '; }
618  ChA += Lx.ChA;
619  default: break;
620  }
621  Lx.GetSym();
622  }
623  return ChA;
624 }
Definition: html.h:79
Definition: html.h:79
static PSIn New(const TStr &Str)
Definition: dt.h:711
TChA ChA
Definition: html.h:109
Definition: dt.h:201
Definition: html.h:79
PSIn SIn
Definition: html.h:85
Definition: html.h:79
Definition: html.h:80
Definition: html.h:82
TStr THtmlLx::GetPreSpaceStr ( ) const
inline

Definition at line 132 of file html.h.

132  {
133  return TStr::GetSpaceStr(PreSpaces);}
static TStr GetSpaceStr(const int &Spaces)
Definition: dt.cpp:1608
int PreSpaces
Definition: html.h:112
TStr THtmlLx::GetStrInTag ( const TStr TagNm,
const bool &  TxtOnlyP = false 
)

Definition at line 525 of file html.cpp.

525  {
526  MoveToBTagOrEof(TagNm);
527  return GetStrToETag(TagNm, TxtOnlyP);
528 }
TStr GetStrToETag(const TStr &TagNm, const bool &TxtOnlyP=false)
Definition: html.cpp:494
void MoveToBTagOrEof(const TStr &TagNm)
Definition: html.cpp:376
TStr THtmlLx::GetStrToBTag ( const TStr TagNm,
const bool &  TxtOnlyP = false 
)

Definition at line 462 of file html.cpp.

462  {
463  TChA OutChA;
464  forever {
465  GetSym();
466  if ((Sym==hsyEof)||((Sym==hsyBTag)&&(UcChA==TagNm))){
467  break;
468  } else {
469  if (PreSpaces>0){OutChA+=' ';}
470  if ((TxtOnlyP&&(Sym!=hsyBTag)&&(Sym!=hsyETag))||(!TxtOnlyP)){
471  OutChA+=ChA;}
472  }
473  }
474  return OutChA;
475 }
#define forever
Definition: bd.h:6
THtmlLxSym GetSym()
Definition: html.cpp:277
TChA UcChA
Definition: html.h:110
TChA ChA
Definition: html.h:109
Definition: dt.h:201
Definition: html.h:80
Definition: html.h:80
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:80
int PreSpaces
Definition: html.h:112
TStr THtmlLx::GetStrToBTag ( const TStr TagNm,
const TStr ArgNm,
const TStr ArgVal,
const bool &  TxtOnlyP = false 
)

Definition at line 477 of file html.cpp.

478  {
479  TChA OutChA;
480  forever {
481  GetSym();
482  if ((Sym==hsyEof)||((Sym==hsyBTag)&&(UcChA==TagNm)&&
483  (IsArg(ArgNm))&&(GetArg(ArgNm)==ArgVal))){
484  break;
485  } else {
486  if (PreSpaces>0){OutChA+=' ';}
487  if ((TxtOnlyP&&(Sym!=hsyBTag)&&(Sym!=hsyETag))||(!TxtOnlyP)){
488  OutChA+=ChA;}
489  }
490  }
491  return OutChA;
492 }
#define forever
Definition: bd.h:6
THtmlLxSym GetSym()
Definition: html.cpp:277
bool IsArg(const TStr &ArgNm) const
Definition: html.h:138
TStr GetArg(const TStr &ArgNm, const TStr &DfArgVal=TStr()) const
Definition: html.h:139
TChA UcChA
Definition: html.h:110
TChA ChA
Definition: html.h:109
Definition: dt.h:201
Definition: html.h:80
Definition: html.h:80
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:80
int PreSpaces
Definition: html.h:112
TStr THtmlLx::GetStrToETag ( const TStr TagNm,
const bool &  TxtOnlyP = false 
)

Definition at line 494 of file html.cpp.

494  {
495  TChA OutChA;
496  forever {
497  GetSym();
498  if ((Sym==hsyEof)||((Sym==hsyETag)&&(UcChA==TagNm))){
499  break;
500  } else {
501  if (PreSpaces>0){OutChA+=' ';}
502  if ((TxtOnlyP&&(Sym!=hsyBTag)&&(Sym!=hsyETag))||(!TxtOnlyP)){
503  OutChA+=ChA;}
504  }
505  }
506  return OutChA;
507 }
#define forever
Definition: bd.h:6
THtmlLxSym GetSym()
Definition: html.cpp:277
TChA UcChA
Definition: html.h:110
TChA ChA
Definition: html.h:109
Definition: dt.h:201
Definition: html.h:80
Definition: html.h:80
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:80
int PreSpaces
Definition: html.h:112
TStr THtmlLx::GetStrToETag2 ( const TStr TagNm1,
const TStr TagNm2,
const bool &  TxtOnlyP = false 
)

Definition at line 509 of file html.cpp.

510  {
511  TChA OutChA;
512  forever {
513  GetSym();
514  if ((Sym==hsyEof)||((Sym==hsyETag)&&(UcChA==TagNm1))||((Sym==hsyETag)&&(UcChA==TagNm2))){
515  break;
516  } else {
517  if (PreSpaces>0){OutChA+=' ';}
518  if ((TxtOnlyP&&(Sym!=hsyBTag)&&(Sym!=hsyETag))||(!TxtOnlyP)){
519  OutChA+=ChA;}
520  }
521  }
522  return OutChA;
523 }
#define forever
Definition: bd.h:6
THtmlLxSym GetSym()
Definition: html.cpp:277
TChA UcChA
Definition: html.h:110
TChA ChA
Definition: html.h:109
Definition: dt.h:201
Definition: html.h:80
Definition: html.h:80
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:80
int PreSpaces
Definition: html.h:112
THtmlLxSym THtmlLx::GetSym ( )

Definition at line 277 of file html.cpp.

277  {
278  // prepare symbol descriptions
279  ChA.Clr(); UcChA.Clr();
281  ArgNmValV.Clr();
282  // skip white-space
283  while (ChDef.IsSpace(Ch)){
284  if (ChX>0){PreSpaceChA+=Ch; PreSpaces++;} GetEscCh();}
285  // parse symbol
286  SymChA.Clr(); SymChA+=Ch; SymBChX=ChX;
287  switch (ChDef.GetChTy(Ch)){
288  case hlctAlpha:
289  Sym=hsyStr;
290  forever{
291  do {
293  } while (ChDef.IsAlNum(Ch));
294  if (Ch=='.'){
295  GetCh();
296  if (ChDef.IsAlNum(Ch)){ChA.AddCh('.'); UcChA.AddCh('.');}
297  else {PutCh(Ch); Ch='.'; break;}
298  } else {break;}
299  }
300  break;
301  case hlctNum:
302  Sym=hsyNum;
303  forever{
304  do {
305  ChA.AddCh(Ch); UcChA.AddCh(Ch); GetEscCh();
306  } while (ChDef.IsNum(Ch));
307  if (Ch=='.'){
308  GetCh();
309  if (ChDef.IsAlNum(Ch)){ChA.AddCh('.'); UcChA.AddCh('.');}
310  else {PutCh(Ch); Ch='.'; break;}
311  } else if (ChDef.IsAlpha(Ch)){
312  Sym=hsyStr;
313  } else {
314  break;
315  }
316  }
317  break;
318  case hlctSym:
320  if ((ChA.LastCh()=='.')&&(ChDef.IsAlNum(Ch))){
321  Sym=hsyStr;
322  do {
324  } while (ChDef.IsAlNum(Ch));
325  }
326  break;
327  case hlctLTag:
328  if (EscCh){
330  } else {
331  GetCh();
332  if (Ch=='!'){GetCh(); GetMetaTag();} else {GetTag();}
333  }
334  break;
335  case hlctRTag:
336  if (EscCh){
338  } else {
340  }
341  break;
342  case hlctEof: Sym=hsyEof; break;
343  default: Sym=hsyUndef; GetEscCh();
344  }
345  // set symbol last-character-position
346  SymEChX=ChX-1;
347  // delete last character
348  if (!SymChA.Empty()){SymChA.Pop();}
349  // return symbol
350  return Sym;
351 }
static THtmlLxChDef ChDef
Definition: html.h:84
Definition: html.h:79
int SymBChX
Definition: html.h:108
bool IsNum(const char &Ch) const
Definition: html.h:40
#define forever
Definition: bd.h:6
bool Empty() const
Definition: dt.h:260
Definition: html.h:79
void Clr()
Definition: dt.h:258
void AddCh(const char &Ch, const int &MxLen=-1)
Definition: dt.h:271
void PutCh(const char &_Ch)
Definition: html.h:126
Definition: html.h:12
char Ch
Definition: html.h:89
void GetTag()
Definition: html.cpp:236
Definition: html.h:11
TChA PreSpaceChA
Definition: html.h:113
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
char LastCh() const
Definition: dt.h:281
bool EscCh
Definition: html.h:91
int SymEChX
Definition: html.h:108
Definition: html.h:12
Definition: html.h:11
bool IsAlNum(const char &Ch) const
Definition: html.h:41
TChA UcChA
Definition: html.h:110
TChA ChA
Definition: html.h:109
int GetChTy(const char &Ch) const
Definition: html.h:34
void GetMetaTag()
Definition: html.cpp:225
int ChX
Definition: html.h:90
bool IsSpace(const char &Ch) const
Definition: html.h:38
Definition: html.h:79
void GetCh()
Definition: html.h:95
Definition: html.h:12
bool IsAlpha(const char &Ch) const
Definition: html.h:39
void GetEscCh()
Definition: html.cpp:195
TChA SymChA
Definition: html.h:111
char GetUc(const char &Ch) const
Definition: html.h:52
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:79
Definition: html.h:80
TArgNmValV ArgNmValV
Definition: html.h:115
Definition: html.h:11
int PreSpaces
Definition: html.h:112
char Pop()
Definition: dt.h:265
TStr THtmlLx::GetSymStr ( const THtmlLxSym Sym)
static

Definition at line 553 of file html.cpp.

553  {
554  switch (Sym){
555  case hsyUndef: return "Undef";
556  case hsyStr: return "Str";
557  case hsyNum: return "Num";
558  case hsySSym: return "SSym";
559  case hsyUrl: return "Url";
560  case hsyBTag: return "BTag";
561  case hsyETag: return "ETag";
562  case hsyMTag: return "MTag";
563  case hsyEof: return "Eof";
564  default: Fail; return TStr();
565  }
566 }
Definition: html.h:79
#define Fail
Definition: bd.h:238
Definition: html.h:79
Definition: html.h:80
Definition: html.h:79
Definition: html.h:79
Definition: html.h:80
Definition: html.h:80
Definition: dt.h:412
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:79
Definition: html.h:80
void THtmlLx::GetTag ( )
private

Definition at line 236 of file html.cpp.

236  {
237  if (Ch=='/'){Sym=hsyETag; GetCh();} else {Sym=hsyBTag;}
238  UcChA.AddCh('<');
239  while (ChDef.IsAlNum(Ch)||(Ch==':')){
240  UcChA.AddCh(ChDef.GetUc(Ch)); GetCh();}
241  UcChA.AddCh('>');
242  ChA=UcChA;
243 
244  if (DoParseArg){
245  while ((Ch!='>')&&(Ch!=TCh::EofCh)){
246  while ((!ChDef.IsAlpha(Ch))&&(Ch!='>')&&(Ch!=TCh::EofCh)){GetCh();}
247  if (ChDef.IsAlpha(Ch)){
248  ArgNm.Clr(); ArgVal.Clr();
249  while (ChDef.IsAlNum(Ch)||(Ch=='-')){ArgNm.AddCh(ChDef.GetUc(Ch)); GetCh();}
250  while (ChDef.IsWs(Ch)){GetCh();}
251  if (Ch=='='){
252  GetCh(); while (ChDef.IsWs(Ch)){GetCh();}
253  if (Ch=='"'){
254  GetCh();
255  while ((Ch!=TCh::EofCh)&&(Ch!='"')&&(Ch!='>')){
256  if (!ChDef.IsEoln(Ch)){ArgVal.AddCh(Ch);} GetCh();}
257  if (Ch=='"'){GetCh();}
258  } else if (Ch=='\''){
259  GetCh();
260  while ((Ch!=TCh::EofCh)&&(Ch!='\'')&&(Ch!='>')){
261  if (!ChDef.IsEoln(Ch)){ArgVal.AddCh(Ch);} GetCh();}
262  if (Ch=='\''){GetCh();}
263  } else {
264  while ((!ChDef.IsWs(Ch))&&(Ch!='>')&&(Ch!=TCh::EofCh)){
265  ArgVal.AddCh(Ch); GetCh();}
266  }
268  }
269  }
270  }
271  } else {
272  while ((Ch!='>')&&(Ch!=TCh::EofCh)){GetCh();}
273  }
274  if (Ch!=TCh::EofCh){GetEscCh();}
275 }
TChA ArgNm
Definition: html.h:93
static THtmlLxChDef ChDef
Definition: html.h:84
bool IsWs(const char &Ch) const
Definition: html.h:36
void Clr()
Definition: dt.h:258
void AddCh(const char &Ch, const int &MxLen=-1)
Definition: dt.h:271
bool IsEoln(const char &Ch) const
Definition: html.h:35
bool DoParseArg
Definition: html.h:87
char Ch
Definition: html.h:89
static const char EofCh
Definition: dt.h:1040
bool IsAlNum(const char &Ch) const
Definition: html.h:41
TChA UcChA
Definition: html.h:110
TChA ChA
Definition: html.h:109
Definition: html.h:80
void GetCh()
Definition: html.h:95
bool IsAlpha(const char &Ch) const
Definition: html.h:39
void GetEscCh()
Definition: html.cpp:195
Definition: html.h:80
char GetUc(const char &Ch) const
Definition: html.h:52
THtmlLxSym Sym
Definition: html.h:107
TArgNmValV ArgNmValV
Definition: html.h:115
TChA ArgVal
Definition: html.h:94
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
TKeyDat< TStr, TStr > TStrKd
Definition: ds.h:405
TStr THtmlLx::GetTextOnlyStrToEof ( )

Definition at line 447 of file html.cpp.

447  {
448  TChA OutChA;
449  forever {
450  GetSym();
451  if (Sym==hsyEof){
452  break;
453  } else {
454  if (PreSpaces>0){OutChA+=' ';}
455  if ((Sym!=hsyBTag)&&(Sym!=hsyETag)){
456  OutChA+=ChA;}
457  }
458  }
459  return OutChA;
460 }
#define forever
Definition: bd.h:6
THtmlLxSym GetSym()
Definition: html.cpp:277
TChA ChA
Definition: html.h:109
Definition: dt.h:201
Definition: html.h:80
Definition: html.h:80
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:80
int PreSpaces
Definition: html.h:112
PHtmlTok THtmlLx::GetTok ( const bool &  DoUc = true)

Definition at line 353 of file html.cpp.

353  {
354  if (DoUc){return PHtmlTok(new THtmlTok(Sym, UcChA, ArgNmValV));}
355  else {return PHtmlTok(new THtmlTok(Sym, ChA, ArgNmValV));}
356 }
Definition: html.h:182
TChA UcChA
Definition: html.h:110
TChA ChA
Definition: html.h:109
TPt< THtmlTok > PHtmlTok
Definition: html.h:5
THtmlLxSym Sym
Definition: html.h:107
TArgNmValV ArgNmValV
Definition: html.h:115
void THtmlLx::GetTokStrV ( const TStr Str,
TStrV TokStrV 
)
static

Definition at line 595 of file html.cpp.

595  {
596  PSIn SIn=TStrIn::New(Str);
597  THtmlLx Lx(SIn);
598  Lx.GetSym();
599  TokStrV.Clr();
600  while (Lx.Sym!=hsyEof){
601  TokStrV.Add(Lx.ChA);
602  Lx.GetSym();
603  }
604 }
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:1022
static PSIn New(const TStr &Str)
Definition: dt.h:711
PSIn SIn
Definition: html.h:85
Definition: html.h:80
Definition: html.h:82
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
bool THtmlLx::IsArg ( const TStr ArgNm) const
inline

Definition at line 138 of file html.h.

138 {return ArgNmValV.IsIn(TStrKd(ArgNm));}
bool IsIn(const TVal &Val) const
Checks whether element Val is a member of the vector.
Definition: ds.h:828
TArgNmValV ArgNmValV
Definition: html.h:115
TKeyDat< TStr, TStr > TStrKd
Definition: ds.h:405
bool THtmlLx::IsGetBTag ( const TStr TagNm)

Definition at line 541 of file html.cpp.

541  {
542  if (GetSym()==hsyBTag){
543  return ChA==TagNm;
544  } else {return false;}
545 }
THtmlLxSym GetSym()
Definition: html.cpp:277
TChA ChA
Definition: html.h:109
Definition: html.h:80
bool THtmlLx::IsGetETag ( const TStr TagNm)

Definition at line 547 of file html.cpp.

547  {
548  if (GetSym()==hsyETag){
549  return ChA==TagNm;
550  } else {return false;}
551 }
THtmlLxSym GetSym()
Definition: html.cpp:277
TChA ChA
Definition: html.h:109
Definition: html.h:80
void THtmlLx::MoveToBTag2OrEof ( const TStr TagNm1,
const TStr TagNm2 
)

Definition at line 382 of file html.cpp.

382  {
383  do {
384  GetSym();
385  } while ((Sym!=hsyEof)&&((Sym!=hsyBTag)||((UcChA!=TagNm1)&&(UcChA!=TagNm2))));
386 }
THtmlLxSym GetSym()
Definition: html.cpp:277
TChA UcChA
Definition: html.h:110
Definition: html.h:80
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:80
void THtmlLx::MoveToBTag3OrEof ( const TStr TagNm1,
const TStr TagNm2,
const TStr TagNm3 
)

Definition at line 388 of file html.cpp.

388  {
389  do {
390  GetSym();
391  } while ((Sym!=hsyEof)&&((Sym!=hsyBTag)||((UcChA!=TagNm1)&&(UcChA!=TagNm2)&&(UcChA!=TagNm3))));
392 }
THtmlLxSym GetSym()
Definition: html.cpp:277
TChA UcChA
Definition: html.h:110
Definition: html.h:80
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:80
void THtmlLx::MoveToBTagArg2OrEof ( const TStr TagNm,
const TStr ArgNm1,
const TStr ArgVal1,
const TStr ArgNm2,
const TStr ArgVal2,
const bool &  AndOpP = true 
)

Definition at line 410 of file html.cpp.

412  {
413  forever {
414  GetSym();
415  if (Sym==hsyEof){break;}
416  if (AndOpP){
417  if ((Sym==hsyBTag)&&(UcChA==TagNm)&&
418  (IsArg(ArgNm1))&&(GetArg(ArgNm1)==ArgVal1)&&
419  (IsArg(ArgNm2))&&(GetArg(ArgNm2)==ArgVal2)){break;}
420  } else {
421  if ((Sym==hsyBTag)&&(UcChA==TagNm)&&
422  (((IsArg(ArgNm1))&&(GetArg(ArgNm1)==ArgVal1))||
423  ((IsArg(ArgNm2))&&(GetArg(ArgNm2)==ArgVal2)))){break;}
424  }
425  }
426 }
#define forever
Definition: bd.h:6
THtmlLxSym GetSym()
Definition: html.cpp:277
bool IsArg(const TStr &ArgNm) const
Definition: html.h:138
TStr GetArg(const TStr &ArgNm, const TStr &DfArgVal=TStr()) const
Definition: html.h:139
TChA UcChA
Definition: html.h:110
Definition: html.h:80
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:80
void THtmlLx::MoveToBTagArgOrEof ( const TStr TagNm,
const TStr ArgNm,
const TStr ArgVal 
)

Definition at line 400 of file html.cpp.

401  {
402  forever {
403  GetSym();
404  if (Sym==hsyEof){break;}
405  if ((Sym==hsyBTag)&&(UcChA==TagNm)&&
406  (IsArg(ArgNm))&&(GetArg(ArgNm)==ArgVal)){break;}
407  }
408 }
#define forever
Definition: bd.h:6
THtmlLxSym GetSym()
Definition: html.cpp:277
bool IsArg(const TStr &ArgNm) const
Definition: html.h:138
TStr GetArg(const TStr &ArgNm, const TStr &DfArgVal=TStr()) const
Definition: html.h:139
TChA UcChA
Definition: html.h:110
Definition: html.h:80
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:80
void THtmlLx::MoveToBTagOrEof ( const TStr TagNm)

Definition at line 376 of file html.cpp.

376  {
377  do {
378  GetSym();
379  } while ((Sym!=hsyEof)&&((Sym!=hsyBTag)||(UcChA!=TagNm)));
380 }
THtmlLxSym GetSym()
Definition: html.cpp:277
TChA UcChA
Definition: html.h:110
Definition: html.h:80
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:80
void THtmlLx::MoveToBTagOrEof ( const TStr TagNm1,
const TStr ArgNm1,
const TStr ArgVal1,
const TStr TagNm2,
const TStr ArgNm2,
const TStr ArgVal2 
)

Definition at line 428 of file html.cpp.

430  {
431  forever {
432  GetSym();
433  if (Sym==hsyEof){break;}
434  if ((Sym==hsyBTag)&&(UcChA==TagNm1)&&
435  (IsArg(ArgNm1))&&(GetArg(ArgNm1)==ArgVal1)){break;}
436  if ((Sym==hsyBTag)&&(UcChA==TagNm2)&&
437  (IsArg(ArgNm2))&&(GetArg(ArgNm2)==ArgVal2)){break;}
438  }
439 }
#define forever
Definition: bd.h:6
THtmlLxSym GetSym()
Definition: html.cpp:277
bool IsArg(const TStr &ArgNm) const
Definition: html.h:138
TStr GetArg(const TStr &ArgNm, const TStr &DfArgVal=TStr()) const
Definition: html.h:139
TChA UcChA
Definition: html.h:110
Definition: html.h:80
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:80
void THtmlLx::MoveToBTagOrETagOrEof ( const TStr BTagNm,
const TStr ETagNm 
)

Definition at line 394 of file html.cpp.

394  {
395  do {
396  GetSym();
397  } while ((Sym!=hsyEof) && ((Sym!=hsyBTag)||(UcChA!=BTagNm)) && ((Sym!=hsyETag) || (UcChA!=ETagNm)));
398 }
THtmlLxSym GetSym()
Definition: html.cpp:277
TChA UcChA
Definition: html.h:110
Definition: html.h:80
Definition: html.h:80
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:80
void THtmlLx::MoveToETagOrEof ( const TStr TagNm)

Definition at line 441 of file html.cpp.

441  {
442  do {
443  GetSym();
444  } while ((Sym!=hsyEof)&&((Sym!=hsyETag)||(UcChA!=TagNm)));
445 }
THtmlLxSym GetSym()
Definition: html.cpp:277
TChA UcChA
Definition: html.h:110
Definition: html.h:80
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:80
void THtmlLx::MoveToStrOrEof ( const TStr Str)

Definition at line 370 of file html.cpp.

370  {
371  do {
372  GetSym();
373  } while ((Sym!=hsyEof)&&((Sym!=hsyStr)||(ChA!=Str)));
374 }
Definition: html.h:79
THtmlLxSym GetSym()
Definition: html.cpp:277
TChA ChA
Definition: html.h:109
THtmlLxSym Sym
Definition: html.h:107
Definition: html.h:80
THtmlLx& THtmlLx::operator= ( const THtmlLx )
inline

Definition at line 124 of file html.h.

124 {Fail; return *this;}
#define Fail
Definition: bd.h:238
void THtmlLx::PutArg ( const TStr ArgNm,
const TStr ArgVal 
)
inline

Definition at line 142 of file html.h.

142  {
143  int ArgN=ArgNmValV.SearchForw(TStrKd(ArgNm));
144  if (ArgN==-1){ArgNmValV.Add(TStrKd(ArgNm, ArgVal));}
145  else {ArgNmValV[ArgN]=TStrKd(ArgNm, ArgVal);}}
TSizeTy SearchForw(const TVal &Val, const TSizeTy &BValN=0) const
Returns the position of an element with value Val.
Definition: ds.h:1552
TArgNmValV ArgNmValV
Definition: html.h:115
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:602
TKeyDat< TStr, TStr > TStrKd
Definition: ds.h:405
void THtmlLx::PutCh ( const char &  _Ch)
inline

Definition at line 126 of file html.h.

126  {
127  ChStack.Push(Ch); if (!SymChA.Empty()){SymChA.Pop();} Ch=_Ch; ChX--;}
bool Empty() const
Definition: dt.h:260
char Ch
Definition: html.h:89
TChA ChStack
Definition: html.h:88
int ChX
Definition: html.h:90
TChA SymChA
Definition: html.h:111
void Push(const char &Ch)
Definition: dt.h:264
char Pop()
Definition: dt.h:265
void THtmlLx::PutStr ( const TStr Str)
inline

Definition at line 128 of file html.h.

128  {
129  for (int ChN=Str.Len()-1; ChN>=0; ChN--){PutCh(Str[ChN]);}}
int Len() const
Definition: dt.h:490
void PutCh(const char &_Ch)
Definition: html.h:126

Member Data Documentation

TChA THtmlLx::ArgNm
private

Definition at line 93 of file html.h.

TArgNmValV THtmlLx::ArgNmValV

Definition at line 115 of file html.h.

TChA THtmlLx::ArgVal
private

Definition at line 94 of file html.h.

char THtmlLx::Ch
private

Definition at line 89 of file html.h.

TChA THtmlLx::ChA

Definition at line 109 of file html.h.

THtmlLxChDef THtmlLx::ChDef
staticprivate

Definition at line 84 of file html.h.

TChA THtmlLx::ChStack
private

Definition at line 88 of file html.h.

int THtmlLx::ChX
private

Definition at line 90 of file html.h.

bool THtmlLx::DoParseArg
private

Definition at line 87 of file html.h.

bool THtmlLx::EscCh
private

Definition at line 91 of file html.h.

TChA THtmlLx::EscChA
private

Definition at line 92 of file html.h.

TChA THtmlLx::PreSpaceChA

Definition at line 113 of file html.h.

int THtmlLx::PreSpaces

Definition at line 112 of file html.h.

TSIn& THtmlLx::RSIn
private

Definition at line 86 of file html.h.

PSIn THtmlLx::SIn
private

Definition at line 85 of file html.h.

THtmlLxSym THtmlLx::Sym

Definition at line 107 of file html.h.

int THtmlLx::SymBChX

Definition at line 108 of file html.h.

TChA THtmlLx::SymChA

Definition at line 111 of file html.h.

int THtmlLx::SymEChX

Definition at line 108 of file html.h.

TChA THtmlLx::UcChA

Definition at line 110 of file html.h.


The documentation for this class was generated from the following files: