SNAP Library 3.0, User Reference  2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
THtmlLxChDef Class Reference

#include <html.h>

Public Member Functions

 THtmlLxChDef ()
 
 THtmlLxChDef (TSIn &SIn)
 
void Save (TSOut &SOut)
 
THtmlLxChDefoperator= (const THtmlLxChDef &)
 
int GetChTy (const char &Ch) const
 
bool IsEoln (const char &Ch) const
 
bool IsWs (const char &Ch) const
 
bool IsSpace (const char &Ch) const
 
bool IsAlpha (const char &Ch) const
 
bool IsNum (const char &Ch) const
 
bool IsAlNum (const char &Ch) const
 
bool IsSym (const char &Ch) const
 
bool IsUrl (const char &Ch) const
 
bool IsUc (const char &Ch) const
 
bool IsLc (const char &Ch) const
 
char GetUc (const char &Ch) const
 
char GetLc (const char &Ch) const
 
void GetUcChA (TChA &ChA) const
 
void GetLcChA (TChA &ChA) const
 
TStr GetUcStr (const TStr &Str) const
 
TStr GetLcStr (const TStr &Str) const
 
TStr GetEscStr (const TStr &Str) const
 

Static Public Member Functions

static PHtmlLxChDef Load (TSIn &SIn)
 
static PHtmlLxChDef GetChDef ()
 
static THtmlLxChDefGetChDefRef ()
 
static TStr GetCSZFromYuascii (const TChA &ChA)
 
static TStr GetCSZFromWin1250 (const TChA &ChA)
 
static TStr GetWin1250FromYuascii (const TChA &ChA)
 
static TStr GetIsoCeFromYuascii (const TChA &ChA)
 

Static Public Attributes

static PHtmlLxChDef ChDef =PHtmlLxChDef(new THtmlLxChDef())
 

Private Member Functions

void SetUcCh (const char &UcCh, const char &LcCh)
 
void SetUcCh (const TStr &Str)
 
void SetChTy (const THtmlLxChTy &ChTy, const TStr &Str)
 
void SetEscStr (const TStr &SrcStr, const TStr &DstStr)
 

Private Attributes

TCRef CRef
 
TIntV ChTyV
 
TChV UcChV
 
TChV LcChV
 
TStrStrH EscStrH
 

Friends

class TPt< THtmlLxChDef >
 

Detailed Description

Definition at line 14 of file html.h.

Constructor & Destructor Documentation

THtmlLxChDef::THtmlLxChDef ( )

Definition at line 48 of file html.cpp.

48  :
50 
51  // Character-Types
53  SetChTy(hlctAlpha, "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
54  SetChTy(hlctAlpha, "abcdefghijklmnopqrstuvwxyz");
55  SetChTy(hlctAlpha, "@_");
56  SetChTy(hlctNum, "0123456789");
57  SetChTy(hlctSym, "`~!#$%^&*()-=+[{]}\\|;:'\",<.>/?");
58  SetChTy(hlctLTag, "<"); SetChTy(hlctRTag, ">");
60  for (int Ch=TCh::Mn; Ch<=TCh::Mx; Ch++){
61  if ((Ch<0)||(127<Ch)){SetChTy(hlctAlpha, TStr(TCh(char(Ch))));}}
62  //SetChTy(hlctSpace, TStr(TCh(char(160))));
63 
64  // Upper-Case
65  {for (int Ch=TCh::Mn; Ch<=TCh::Mx; Ch++){
66  SetUcCh(char(Ch), char(Ch));}}
67  SetUcCh("Aa"); SetUcCh("\xc0\xe0"); SetUcCh("\xc1\xe1"); SetUcCh("\xc2\xe2");
68  SetUcCh("\xc3\xe3"); SetUcCh("\xc4\xe4"); SetUcCh("\xc5\xe5"); SetUcCh("\xc6\xe6");
69  SetUcCh("Bb"); SetUcCh("Cc"); SetUcCh("\xc7\xe7"); SetUcCh("Dd");
70  SetUcCh("\xd0\xf0"); SetUcCh("Ee"); SetUcCh("\xc8\xe8"); SetUcCh("\xc9\xe9");
71  SetUcCh("\xca\xea"); SetUcCh("\xcb\xeb"); SetUcCh("Ff"); SetUcCh("Gg");
72  SetUcCh("Hh"); SetUcCh("Ii"); SetUcCh("\xcc\xec"); SetUcCh("\xcd\xed");
73  SetUcCh("\xce\xee"); SetUcCh("\xcf\xef"); SetUcCh("Jj"); SetUcCh("Kk");
74  SetUcCh("Ll"); SetUcCh("Mm"); SetUcCh("Nn"); SetUcCh("\xd1\xf1");
75  SetUcCh("Oo"); SetUcCh("\xd2\xf2"); SetUcCh("\xd3\xf3"); SetUcCh("\xd4\xf4");
76  SetUcCh("\xd5\xf5"); SetUcCh("\xd6\xf6"); SetUcCh("\xd8\xf8"); SetUcCh("Pp");
77  SetUcCh("Qq"); SetUcCh("Rr"); SetUcCh("Ss"); SetUcCh("\x8a\x9a");
78  SetUcCh("Tt"); SetUcCh("Uu"); SetUcCh("\xd9\xf9"); SetUcCh("\xda\xfa");
79  SetUcCh("\xdb\xfb"); SetUcCh("\xdc\xfc"); SetUcCh("Vv"); SetUcCh("Ww");
80  SetUcCh("Xx"); SetUcCh("Yy\xff"); SetUcCh("\xdd\xfd"); SetUcCh("Zz");
81  SetUcCh("\x8e\x9e");
82  // ISO-CE
83  //SetUcCh(uchar(169), uchar(185)); /*Sh - \xa9\xb9*/
84  //SetUcCh(uchar(174), uchar(190)); /*Zh - \xae\xbe*/
85  //SetUcCh(uchar(200), uchar(232)); /*Ch - \xc8\xe8*/
86  //SetUcCh(uchar(198), uchar(230)); /*Cs - \xc6\xe6*/
87  //SetUcCh(uchar(208), uchar(240)); /*Dz - \xd0\xf0*/
88 
89  // Annoying Unicode-characters
90  //SetChTy(hlctSpace, "\xc2\xef");
91 
92  // Escape-Sequences
93  SetEscStr("&quot", "\""); SetEscStr("&amp", "&");
94  SetEscStr("&lt", "<"); SetEscStr("&gt", ">");
95  SetEscStr("&nbsp", " ");
96 
97  SetEscStr("&auml", "\xe4"); SetEscStr("&Auml", "\xc4");
98  SetEscStr("&ouml", "\xf6"); SetEscStr("&Ouml", "\xd6");
99  SetEscStr("&uuml", "\xfc"); SetEscStr("&Uuml", "\xdc");
100  SetEscStr("&aring", "\xe5"); SetEscStr("&Aring", "\xc5");
101  SetEscStr("&oslash", "\xf8"); SetEscStr("&Oslash", "\xd8");
102  SetEscStr("&Aelig", "\xc6"); SetEscStr("&aelig", "\xe6");
103 
104  SetEscStr("&eacute", "e"); SetEscStr("&Eacute", "E");
105  SetEscStr("&egrave", "e"); SetEscStr("&Egrave", "E");
106  SetEscStr("&agrave", "a"); SetEscStr("&Agrave", "A");
107 }
TIntV ChTyV
Definition: html.h:16
Definition: html.h:12
Definition: html.h:11
Definition: html.h:11
void SetChTy(const THtmlLxChTy &ChTy, const TStr &Str)
Definition: html.cpp:24
static const char EofCh
Definition: dt.h:947
static const char Mx
Definition: dt.h:940
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
Definition: ds.h:1166
Definition: dt.h:935
static const char Mn
Definition: dt.h:939
Definition: html.h:12
Definition: html.h:11
void SetEscStr(const TStr &SrcStr, const TStr &DstStr)
Definition: html.cpp:29
TStrStrH EscStrH
Definition: html.h:19
Definition: dt.h:1044
TChV LcChV
Definition: html.h:18
Definition: html.h:12
Definition: dt.h:412
TChV UcChV
Definition: html.h:17
static const int Vals
Definition: dt.h:941
Definition: html.h:11
void SetUcCh(const char &UcCh, const char &LcCh)
Definition: html.cpp:3
THtmlLxChDef::THtmlLxChDef ( TSIn SIn)
inline

Definition at line 26 of file html.h.

26 : ChTyV(SIn), UcChV(SIn), LcChV(SIn), EscStrH(SIn){}
TIntV ChTyV
Definition: html.h:16
TStrStrH EscStrH
Definition: html.h:19
TChV LcChV
Definition: html.h:18
TChV UcChV
Definition: html.h:17

Member Function Documentation

static PHtmlLxChDef THtmlLxChDef::GetChDef ( )
inlinestatic

Definition at line 66 of file html.h.

66 {IAssert(!ChDef.Empty()); return ChDef;}
#define IAssert(Cond)
Definition: bd.h:262
bool Empty() const
Definition: bd.h:501
static PHtmlLxChDef ChDef
Definition: html.h:65
static THtmlLxChDef& THtmlLxChDef::GetChDefRef ( )
inlinestatic

Definition at line 67 of file html.h.

67 {IAssert(!ChDef.Empty()); return *ChDef;}
#define IAssert(Cond)
Definition: bd.h:262
bool Empty() const
Definition: bd.h:501
static PHtmlLxChDef ChDef
Definition: html.h:65
int THtmlLxChDef::GetChTy ( const char &  Ch) const
inline

Definition at line 34 of file html.h.

34 {return ChTyV[Ch-TCh::Mn];}
TIntV ChTyV
Definition: html.h:16
static const char Mn
Definition: dt.h:939
TStr THtmlLxChDef::GetCSZFromWin1250 ( const TChA ChA)
static

Definition at line 132 of file html.cpp.

132  {
133  TChA DstChA;
134  for (int ChN=0; ChN<ChA.Len(); ChN++){
135  const uchar Ch=ChA[ChN];
136  switch (Ch){
137  case 232: DstChA+='c'; break;
138  case 200: DstChA+='C'; break;
139  case 154: DstChA+='s'; break;
140  case 138: DstChA+='S'; break;
141  case 158: DstChA+='z'; break;
142  case 142: DstChA+='Z'; break;
143  default: DstChA+=Ch;
144  }
145  }
146  return DstChA;
147 }
int Len() const
Definition: dt.h:259
unsigned char uchar
Definition: bd.h:10
Definition: dt.h:201
TStr THtmlLxChDef::GetCSZFromYuascii ( const TChA ChA)
static

Definition at line 111 of file html.cpp.

111  {
112  TChA DstChA;
113  for (int ChN=0; ChN<ChA.Len(); ChN++){
114  char Ch=ChA[ChN];
115  switch (Ch){
116  case '~': DstChA+='c'; break;
117  case '^': DstChA+='C'; break;
118  case '}': DstChA+='c'; break;
119  case ']': DstChA+='C'; break;
120  case '|': DstChA+='d'; break;
121  case '\\': DstChA+='D'; break;
122  case '{': DstChA+='s'; break;
123  case '[': DstChA+='S'; break;
124  case '`': DstChA+='z'; break;
125  case '@': DstChA+='Z'; break;
126  default: DstChA+=Ch;
127  }
128  }
129  return DstChA;
130 }
int Len() const
Definition: dt.h:259
Definition: dt.h:201
TStr THtmlLxChDef::GetEscStr ( const TStr Str) const

Definition at line 33 of file html.cpp.

33  {
34  int EscStrId;
35  if ((EscStrId=EscStrH.GetKeyId(Str))!=-1){
36  return EscStrH[EscStrId];
37  } else
38  if ((Str.Len()>=2)&&(Str[0]=='&')&&(Str[1]=='#')){
39  int ChCd=0;
40  for (int ChN=2; ChN<Str.Len(); ChN++){
41  if (ChCd<=0xFFFF){ChCd=ChCd*10+Str[ChN]-'0';}}
42  return TStr((char)ChCd);
43  } else {
44  return TStr(' ');
45  }
46 }
int Len() const
Definition: dt.h:487
TStrStrH EscStrH
Definition: html.h:19
int GetKeyId(const TKey &Key) const
Definition: hash.h:424
Definition: dt.h:412
TStr THtmlLxChDef::GetIsoCeFromYuascii ( const TChA ChA)
static

Definition at line 170 of file html.cpp.

170  {
171  TChA DstChA;
172  for (int ChN=0; ChN<ChA.Len(); ChN++){
173  char Ch=ChA[ChN];
174  switch (Ch){
175  case '~': DstChA+=uchar(232); break;
176  case '^': DstChA+=uchar(200); break;
177  case '}': DstChA+=uchar(230); break;
178  case ']': DstChA+=uchar(198); break;
179  case '|': DstChA+=uchar(240); break;
180  case '\\': DstChA+=uchar(208); break;
181  case '{': DstChA+=uchar(185); break;
182  case '[': DstChA+=uchar(169); break;
183  case '`': DstChA+=uchar(190); break;
184  case '@': DstChA+=uchar(174); break;
185  default: DstChA+=Ch;
186  }
187  }
188  return DstChA;
189 }
int Len() const
Definition: dt.h:259
unsigned char uchar
Definition: bd.h:10
Definition: dt.h:201
char THtmlLxChDef::GetLc ( const char &  Ch) const
inline

Definition at line 53 of file html.h.

53 {return LcChV[Ch-TCh::Mn];}
static const char Mn
Definition: dt.h:939
TChV LcChV
Definition: html.h:18
void THtmlLxChDef::GetLcChA ( TChA ChA) const
inline

Definition at line 56 of file html.h.

56  {
57  for (int ChN=0; ChN<ChA.Len(); ChN++){ChA.PutCh(ChN, GetLc(ChA[ChN]));}}
void PutCh(const int &ChN, const char &Ch)
Definition: dt.h:278
int Len() const
Definition: dt.h:259
char GetLc(const char &Ch) const
Definition: html.h:53
TStr THtmlLxChDef::GetLcStr ( const TStr Str) const
inline

Definition at line 60 of file html.h.

60  {
61  TChA ChA(Str); GetLcChA(ChA); return ChA;}
void GetLcChA(TChA &ChA) const
Definition: html.h:56
Definition: dt.h:201
char THtmlLxChDef::GetUc ( const char &  Ch) const
inline

Definition at line 52 of file html.h.

52 {return UcChV[Ch-TCh::Mn];}
static const char Mn
Definition: dt.h:939
TChV UcChV
Definition: html.h:17
void THtmlLxChDef::GetUcChA ( TChA ChA) const
inline

Definition at line 54 of file html.h.

54  {
55  for (int ChN=0; ChN<ChA.Len(); ChN++){ChA.PutCh(ChN, GetUc(ChA[ChN]));}}
void PutCh(const int &ChN, const char &Ch)
Definition: dt.h:278
int Len() const
Definition: dt.h:259
char GetUc(const char &Ch) const
Definition: html.h:52
TStr THtmlLxChDef::GetUcStr ( const TStr Str) const
inline

Definition at line 58 of file html.h.

58  {
59  TChA ChA(Str); GetUcChA(ChA); return ChA;}
void GetUcChA(TChA &ChA) const
Definition: html.h:54
Definition: dt.h:201
TStr THtmlLxChDef::GetWin1250FromYuascii ( const TChA ChA)
static

Definition at line 149 of file html.cpp.

149  {
150  TChA DstChA;
151  for (int ChN=0; ChN<ChA.Len(); ChN++){
152  char Ch=ChA[ChN];
153  switch (Ch){
154  case '~': DstChA+=uchar(232); break;
155  case '^': DstChA+=uchar(200); break;
156  case '}': DstChA+='c'; break;
157  case ']': DstChA+='C'; break;
158  case '|': DstChA+='d'; break;
159  case '\\': DstChA+='D'; break;
160  case '{': DstChA+=uchar(154); break;
161  case '[': DstChA+=uchar(138); break;
162  case '`': DstChA+=uchar(158); break;
163  case '@': DstChA+=uchar(142); break;
164  default: DstChA+=Ch;
165  }
166  }
167  return DstChA;
168 }
int Len() const
Definition: dt.h:259
unsigned char uchar
Definition: bd.h:10
Definition: dt.h:201
bool THtmlLxChDef::IsAlNum ( const char &  Ch) const
inline

Definition at line 41 of file html.h.

41  {
42  return (int(ChTyV[Ch-TCh::Mn])==hlctAlpha)||(int(ChTyV[Ch-TCh::Mn])==hlctNum);}
TIntV ChTyV
Definition: html.h:16
Definition: html.h:11
static const char Mn
Definition: dt.h:939
Definition: html.h:11
bool THtmlLxChDef::IsAlpha ( const char &  Ch) const
inline

Definition at line 39 of file html.h.

39 {return int(ChTyV[Ch-TCh::Mn])==hlctAlpha;}
TIntV ChTyV
Definition: html.h:16
static const char Mn
Definition: dt.h:939
Definition: html.h:11
bool THtmlLxChDef::IsEoln ( const char &  Ch) const
inline

Definition at line 35 of file html.h.

35 {return (Ch==TCh::CrCh)||(Ch==TCh::LfCh);}
static const char LfCh
Definition: dt.h:945
static const char CrCh
Definition: dt.h:946
bool THtmlLxChDef::IsLc ( const char &  Ch) const
inline

Definition at line 51 of file html.h.

51 {return Ch==LcChV[Ch-TCh::Mn];}
static const char Mn
Definition: dt.h:939
TChV LcChV
Definition: html.h:18
bool THtmlLxChDef::IsNum ( const char &  Ch) const
inline

Definition at line 40 of file html.h.

40 {return int(ChTyV[Ch-TCh::Mn])==hlctNum;}
TIntV ChTyV
Definition: html.h:16
Definition: html.h:11
static const char Mn
Definition: dt.h:939
bool THtmlLxChDef::IsSpace ( const char &  Ch) const
inline

Definition at line 38 of file html.h.

38 {return int(ChTyV[Ch-TCh::Mn])==hlctSpace;}
TIntV ChTyV
Definition: html.h:16
Definition: html.h:11
static const char Mn
Definition: dt.h:939
bool THtmlLxChDef::IsSym ( const char &  Ch) const
inline

Definition at line 43 of file html.h.

43 {return int(ChTyV[Ch-TCh::Mn])==hlctSym;}
TIntV ChTyV
Definition: html.h:16
static const char Mn
Definition: dt.h:939
Definition: html.h:11
bool THtmlLxChDef::IsUc ( const char &  Ch) const
inline

Definition at line 50 of file html.h.

50 {return Ch==UcChV[Ch-TCh::Mn];}
static const char Mn
Definition: dt.h:939
TChV UcChV
Definition: html.h:17
bool THtmlLxChDef::IsUrl ( const char &  Ch) const
inline

Definition at line 44 of file html.h.

44  {
45  int ChTy=ChTyV[Ch-TCh::Mn];
46  return (ChTy==hlctAlpha)||(ChTy==hlctNum)||
47  (Ch=='.')||(Ch=='-')||(Ch==':')||(Ch=='/')||(Ch=='~');}
TIntV ChTyV
Definition: html.h:16
Definition: html.h:11
static const char Mn
Definition: dt.h:939
Definition: html.h:11
bool THtmlLxChDef::IsWs ( const char &  Ch) const
inline

Definition at line 36 of file html.h.

36  {
37  return (Ch==' ')||(Ch==TCh::TabCh)||(Ch==TCh::CrCh)||(Ch==TCh::LfCh);}
static const char TabCh
Definition: dt.h:944
static const char LfCh
Definition: dt.h:945
static const char CrCh
Definition: dt.h:946
static PHtmlLxChDef THtmlLxChDef::Load ( TSIn SIn)
inlinestatic

Definition at line 27 of file html.h.

27 {return new THtmlLxChDef(SIn);}
THtmlLxChDef()
Definition: html.cpp:48
THtmlLxChDef& THtmlLxChDef::operator= ( const THtmlLxChDef )
inline

Definition at line 31 of file html.h.

31 {Fail; return *this;}
#define Fail
Definition: bd.h:238
void THtmlLxChDef::Save ( TSOut SOut)
inline

Definition at line 28 of file html.h.

28  {
29  ChTyV.Save(SOut); UcChV.Save(SOut); LcChV.Save(SOut); EscStrH.Save(SOut);}
TIntV ChTyV
Definition: html.h:16
void Save(TSOut &SOut) const
Definition: hash.h:141
void Save(TSOut &SOut) const
Definition: ds.h:903
TStrStrH EscStrH
Definition: html.h:19
TChV LcChV
Definition: html.h:18
TChV UcChV
Definition: html.h:17
void THtmlLxChDef::SetChTy ( const THtmlLxChTy ChTy,
const TStr Str 
)
private

Definition at line 24 of file html.cpp.

24  {
25  for (int ChN=0; ChN<Str.Len(); ChN++){
26  ChTyV[Str[ChN]-TCh::Mn]=TInt(ChTy);}
27 }
int Len() const
Definition: dt.h:487
TIntV ChTyV
Definition: html.h:16
static const char Mn
Definition: dt.h:939
Definition: dt.h:1044
void THtmlLxChDef::SetEscStr ( const TStr SrcStr,
const TStr DstStr 
)
private

Definition at line 29 of file html.cpp.

29  {
30  EscStrH.AddDat(SrcStr, DstStr);
31 }
TStrStrH EscStrH
Definition: html.h:19
TDat & AddDat(const TKey &Key)
Definition: hash.h:196
void THtmlLxChDef::SetUcCh ( const char &  UcCh,
const char &  LcCh 
)
private

Definition at line 3 of file html.cpp.

3  {
4  // update upper-case (more lower cases may have one upper case)
5  IAssert(
6  (UcChV[LcCh-TCh::Mn]==TCh(0))||
7  (UcChV[LcCh-TCh::Mn]==TCh(LcCh)));
8  UcChV[LcCh-TCh::Mn]=TCh(UcCh);
9  // update lower-case (one upper case may have only one lower case)
10  if ((LcChV[UcCh-TCh::Mn]==TCh(0))||(LcChV[UcCh-TCh::Mn]==TCh(UcCh))){
11  LcChV[UcCh-TCh::Mn]=TCh(LcCh);
12  }
13 }
#define IAssert(Cond)
Definition: bd.h:262
Definition: dt.h:935
static const char Mn
Definition: dt.h:939
TChV LcChV
Definition: html.h:18
TChV UcChV
Definition: html.h:17
void THtmlLxChDef::SetUcCh ( const TStr Str)
private

Definition at line 15 of file html.cpp.

15  {
16  // set type of characters as letters
17  SetChTy(hlctAlpha, Str);
18  // first char in string is upper-case, rest are lower-case
19  for (int ChN=1; ChN<Str.Len(); ChN++){
20  SetUcCh(Str[0], Str[ChN]);
21  }
22 }
int Len() const
Definition: dt.h:487
void SetChTy(const THtmlLxChTy &ChTy, const TStr &Str)
Definition: html.cpp:24
Definition: html.h:11
void SetUcCh(const char &UcCh, const char &LcCh)
Definition: html.cpp:3

Friends And Related Function Documentation

friend class TPt< THtmlLxChDef >
friend

Definition at line 14 of file html.h.

Member Data Documentation

PHtmlLxChDef THtmlLxChDef::ChDef =PHtmlLxChDef(new THtmlLxChDef())
static

Definition at line 65 of file html.h.

TIntV THtmlLxChDef::ChTyV
private

Definition at line 16 of file html.h.

TCRef THtmlLxChDef::CRef
private

Definition at line 14 of file html.h.

TStrStrH THtmlLxChDef::EscStrH
private

Definition at line 19 of file html.h.

TChV THtmlLxChDef::LcChV
private

Definition at line 18 of file html.h.

TChV THtmlLxChDef::UcChV
private

Definition at line 17 of file html.h.


The documentation for this class was generated from the following files: