19 for (
int ChN=1; ChN<Str.
Len(); ChN++){
25 for (
int ChN=0; ChN<Str.
Len(); ChN++){
38 if ((Str.
Len()>=2)&&(Str[0]==
'&')&&(Str[1]==
'#')){
40 for (
int ChN=2; ChN<Str.
Len(); ChN++){
41 if (ChCd<=0xFFFF){ChCd=ChCd*10+Str[ChN]-
'0';}}
42 return TStr((
char)ChCd);
49 ChTyV(
TCh::Vals), UcChV(
TCh::Vals), LcChV(
TCh::Vals), EscStrH(100){
113 for (
int ChN=0; ChN<ChA.
Len(); ChN++){
116 case '~': DstChA+=
'c';
break;
117 case '^': DstChA+=
'C';
break;
118 case '}': DstChA+=
'c';
break;
119 case ']': DstChA+=
'C';
break;
120 case '|': DstChA+=
'd';
break;
121 case '\\': DstChA+=
'D';
break;
122 case '{': DstChA+=
's';
break;
123 case '[': DstChA+=
'S';
break;
124 case '`': DstChA+=
'z';
break;
125 case '@': DstChA+=
'Z';
break;
134 for (
int ChN=0; ChN<ChA.
Len(); ChN++){
135 const uchar Ch=ChA[ChN];
137 case 232: DstChA+=
'c';
break;
138 case 200: DstChA+=
'C';
break;
139 case 154: DstChA+=
's';
break;
140 case 138: DstChA+=
'S';
break;
141 case 158: DstChA+=
'z';
break;
142 case 142: DstChA+=
'Z';
break;
151 for (
int ChN=0; ChN<ChA.
Len(); ChN++){
154 case '~': DstChA+=
uchar(232);
break;
155 case '^': DstChA+=
uchar(200);
break;
156 case '}': DstChA+=
'c';
break;
157 case ']': DstChA+=
'C';
break;
158 case '|': DstChA+=
'd';
break;
159 case '\\': DstChA+=
'D';
break;
160 case '{': DstChA+=
uchar(154);
break;
161 case '[': DstChA+=
uchar(138);
break;
162 case '`': DstChA+=
uchar(158);
break;
163 case '@': DstChA+=
uchar(142);
break;
172 for (
int ChN=0; ChN<ChA.
Len(); ChN++){
175 case '~': DstChA+=
uchar(232);
break;
176 case '^': DstChA+=
uchar(200);
break;
177 case '}': DstChA+=
uchar(230);
break;
178 case ']': DstChA+=
uchar(198);
break;
179 case '|': DstChA+=
uchar(240);
break;
180 case '\\': DstChA+=
uchar(208);
break;
181 case '{': DstChA+=
uchar(185);
break;
182 case '[': DstChA+=
uchar(169);
break;
183 case '`': DstChA+=
uchar(190);
break;
184 case '@': DstChA+=
uchar(174);
break;
202 if ((
'0'<=
Ch)&&(
Ch<=
'9')){
210 if (((
'a'<=
Ch)&&(
Ch<=
'z'))||((
'A'<=
Ch)&&(
Ch<=
'Z'))){
213 }
while (((
'A'<=
Ch)&&(
Ch<=
'Z'))||((
'a'<=
Ch)&&(
Ch<=
'z'))||((
'0'<=
Ch)&&(
Ch<=
'9')));
258 }
else if (
Ch==
'\''){
361 BTagChA+=
ChA; BTagChA.
Pop();
362 for (
int ArgN=0; ArgN<
GetArgs(); ArgN++){
363 BTagChA+=
' '; BTagChA+=
GetArgNm(ArgN);
364 BTagChA+=
'='; BTagChA+=
'"'; BTagChA+=
GetArgVal(ArgN); BTagChA+=
'"';
401 const TStr& TagNm,
const TStr& ArgNm,
const TStr& ArgVal){
411 const TStr& ArgNm1,
const TStr& ArgVal1,
412 const TStr& ArgNm2,
const TStr& ArgVal2,
const bool& AndOpP){
419 (
IsArg(ArgNm2))&&(
GetArg(ArgNm2)==ArgVal2)){
break;}
423 ((
IsArg(ArgNm2))&&(
GetArg(ArgNm2)==ArgVal2)))){
break;}
429 const TStr& TagNm1,
const TStr& ArgNm1,
const TStr& ArgVal1,
430 const TStr& TagNm2,
const TStr& ArgNm2,
const TStr& ArgVal2){
435 (
IsArg(ArgNm1))&&(
GetArg(ArgNm1)==ArgVal1)){
break;}
437 (
IsArg(ArgNm2))&&(
GetArg(ArgNm2)==ArgVal2)){
break;}
478 const TStr& ArgVal,
const bool& TxtOnlyP){
510 const TStr& TagNm2,
const bool& TxtOnlyP){
544 }
else {
return false;}
550 }
else {
return false;}
556 case hsyStr:
return "Str";
557 case hsyNum:
return "Num";
559 case hsyUrl:
return "Url";
563 case hsyEof:
return "Eof";
570 for (
int ChN=0; ChN<ChA.
Len(); ChN++){
573 case '"': EscapedChA+=
""";
break;
574 case '&': EscapedChA+=
"&";
break;
575 case '\'': EscapedChA+=
"'";
break;
576 case '<': EscapedChA+=
"<";
break;
577 case '>': EscapedChA+=
">";
break;
578 default: EscapedChA+=
Ch;
586 for (
int ChN=0; ChN<ChA.
Len(); ChN++){
588 if ((Ch<
' ')||(
'~'<
Ch)){
632 for (
int ArgNmValN=0; ArgNmValN<
ArgNmValV.
Len(); ArgNmValN++){
633 FullChA+=
' '; FullChA+=
ArgNmValV[ArgNmValN].Key; FullChA+=
'=';
634 FullChA+=
'"'; FullChA+=
ArgNmValV[ArgNmValN].Dat; FullChA+=
'"';
661 if ((HttpEquivArgVal==
"REFRESH")&&
IsArg(
"CONTENT")){
666 LeftStr.
Len()+UrlEqStr.
Len(), ContentStr.
Len());
667 return !RelUrlStr.
Empty();
681 if ((HttpEquivArgVal==
"REFRESH")&&
IsArg(
"CONTENT")){
727 static TStrH BreakTagNmH(50);
728 if (BreakTagNmH.
Len()==0){
741 return BreakTagNmH.
IsKey(TagNm);
753 if ((TagNm.
Len()==4)&&(TagNm[0]==
'<')&&(TagNm[1]==
'H')&&(TagNm[3]==
'>')){
755 if ((
'1'<=Ch)&&(Ch<=
'6')){HTagN=Ch-
'0';
return true;}
756 else {HTagN=-1;
return false;}
758 HTagN=-1;
return false;
782 bool MkTok=
false;
bool InUL=
false;
785 case hdtAll: MkTok=
true;
break;
794 (Lx.
UcChA==THtmlTok::MetaTagNm));
821 if (InScript){
break;}
823 LnDocChA+=Str.
CStr();
826 if ((!LnDocChA.
Empty())&&(LnDocChA.
LastCh()!=
' ')){LnDocChA+=
' ';}
827 if ((!InScript)&&(Str==
"<SCRIPT>")){InScript=
true;}
830 if ((!LnDocChA.
Empty())&&(LnDocChA.
LastCh()!=
' ')){LnDocChA+=
' ';}
831 if ((InScript)&&(Str==
"<SCRIPT>")){InScript=
false;}
841 const TStr& BaseUrlStr,
const bool& OutUrlP,
const bool& OutTagsP){
843 TChA OutChA; OutChA+=
' ';
859 if (InScript){
break;}
870 InScript=
true;
break;}
873 OutChA+=
'<'; OutChA+=Str; OutChA+=
'>';
875 if (OutChA.
LastCh()!=
' '){OutChA+=
' ';}
880 if (Tok->IsUrlTok(RelUrlStr)){
885 OutChA+=
"<Url>"; OutChA+=XmlUrlStr; OutChA+=
"</Url>";
897 InScript=
false;
break;}
900 OutChA+=
"</"; OutChA+=Str; OutChA+=
'>';
902 if (OutChA.
LastCh()!=
' '){OutChA+=
' ';}
917 for (
int TokN=0; TokN<
TokV.
Len(); TokN++){
TokV[TokN]->SaveTxt(SOut);}
920 for (
int TokN=0; TokN<
TokV.
Len(); TokN++){
922 TokV[TokN]->SaveTxt(SOut);
929 const TStr& HtmlStr,
const PSOut& TxtSOut,
const TStr& BaseUrlStr,
930 const bool& OutUrlP,
const bool& OutTagsP){
938 const TStr& HtmlStr,
const TStr& TxtFNm,
const TStr& BaseUrlStr,
939 const bool& OutUrlP,
const bool& OutTagsP){
943 SaveHtmlToTxt(HtmlStr, TxtSOut, BaseUrlStr, OutUrlP, OutTagsP);
947 const TStr& HtmlStr,
const PSOut& XmlSOut,
const TStr& BaseUrlStr,
948 const bool& OutTextP,
const bool& OutUrlP,
const bool& OutToksP,
949 const bool& OutTagsP,
const bool& OutArgsP){
955 fprintf(fXml,
"<HtmlDoc>\n");
957 if (!BaseUrlStr.
Empty()){
959 fprintf(fXml,
"<BaseUrl>%s</BaseUrl>\n", XmlBaseUrlStr.
CStr());
964 TChA ContTextChA;
bool InScript=
false;
966 fprintf(fXml,
"<Body>\n");
975 if (InScript){
break;}
978 fprintf(fXml,
" <Str>%s</Str>\n", Str.
CStr());}
979 if (!ContTextChA.
Empty()){ContTextChA+=
' ';} ContTextChA+=Str;
982 if (InScript){
break;}
985 fprintf(fXml,
" <Num>%s</Num>\n", Str.
CStr());}
986 if (!ContTextChA.
Empty()){ContTextChA+=
' ';} ContTextChA+=Str;
989 if (InScript){
break;}
992 fprintf(fXml,
" <Sym>%s</Sym>\n", Str.
CStr());}
993 if (!ContTextChA.
Empty()){ContTextChA+=
' ';} ContTextChA+=Str;
997 if (!ContTextChA.
Empty()){
999 fprintf(fXml,
" <Text>%s</Text>\n", ContTextChA.
CStr());}
1009 InScript=
true;
break;}
1013 fprintf(fXml,
" <BTag Nm=\"%s\">\n", Str.
CStr());
1014 for (
int ArgN=0; ArgN<HtmlLx.
GetArgs(); ArgN++){
1017 fprintf(fXml,
" <Arg Nm=\"%s\" Val=\"%s\"/>", ArgNm.
CStr(), ArgVal.CStr());
1019 fprintf(fXml,
" </BTag>\n");
1021 fprintf(fXml,
" <BTag Nm=\"%s\"/>\n", Str.
CStr());
1027 if (Tok->IsUrlTok(RelUrlStr)){
1033 fprintf(fXml,
" <Url>%s</Url>\n", XmlUrlStr.
CStr());
1041 if (!ContTextChA.
Empty()){
1043 fprintf(fXml,
" <Text>%s</Text>\n", ContTextChA.
CStr());}
1052 InScript=
false;
break;}
1055 fprintf(fXml,
" <ETag Nm=\"%s\"/>\n", Str.
CStr());}
1063 if (!ContTextChA.
Empty()){
1065 fprintf(fXml,
" <Text>%s</Text>\n", ContTextChA.
CStr());}
1068 fprintf(fXml,
"</Body>\n");
1070 fprintf(fXml,
"<OutUrls>\n");
1071 for (
int UrlN=0; UrlN<OutUrlStrV.
Len(); UrlN++){
1073 fprintf(fXml,
" <Url N=\"%d\">%s</Url>\n", 1+UrlN, XmlUrlStr.
CStr());
1075 fprintf(fXml,
"</OutUrls>\n");
1078 fprintf(fXml,
"</HtmlDoc>\n");
1082 const TStr& HtmlStr,
const TStr& XmlFNm,
const TStr& BaseUrlStr,
1083 const bool& OutTextP,
const bool& OutUrlP,
const bool& OutToksP,
1084 const bool& OutTagsP,
const bool& OutArgsP){
1088 SaveHtmlToXml(HtmlStr, XmlSOut, BaseUrlStr, OutTextP, OutUrlP,
1089 OutToksP, OutTagsP, OutArgsP);
1108 const TStr& BaseUrlStr,
const TStr& RedirUrlStr){
1110 if ((Lx.
ChA==TagStr)&&(Lx.
IsArg(ArgNm))){
1127 const TStr& BaseUrlStr,
const TStr& RedirUrlStr){
1134 (
_IsTagRedir(THtmlTok::ATagNm, THtmlTok::HRefArgNm, Lx, BaseUrlStr, RedirUrlStr))||
1135 (
_IsTagRedir(THtmlTok::AreaTagNm, THtmlTok::HRefArgNm, Lx, BaseUrlStr, RedirUrlStr))||
1136 (
_IsTagRedir(THtmlTok::FrameTagNm, THtmlTok::SrcArgNm, Lx, BaseUrlStr, RedirUrlStr))||
1137 (
_IsTagRedir(THtmlTok::ImgTagNm, THtmlTok::SrcArgNm, Lx, BaseUrlStr, RedirUrlStr)))){
1149 RefHtmlDoc(_RefHtmlDoc), HldV(){
1150 bool IsTitleAct=
false;
THtmlTokV TitleTokV;
1151 bool IsHAct=
false;
int ActHTagN=-1;
1156 if ((TokSym==
hsyBTag)&&(TokStr==THtmlTok::ATagNm)){
1164 if ((ATokSym==
hsyETag)&&(ATokStr==THtmlTok::ATagNm)){
break;}
1168 int ETagATokN=ATokN+1;
1173 if (ATokN<0){
break;}
1177 if (ATokV.
Len()>=HldWnLen){
break;}
1187 if (ATokV.
Len()>=HldWnLen){
break;}
1192 for (
int HTagN=1; HTagN<=6; HTagN++){HtmlDoc->
AddTokV(HTokV[HTagN-1]);}
1201 if (TokStr==THtmlTok::TitleTagNm){
1202 IsTitleAct=
true; TitleTokV.
Clr(); TitleTokV.
Add(Tok);
1207 IsHAct=
true; ActHTagN=HTagN;
1208 {
for (
int HTagN=ActHTagN; HTagN<=6; HTagN++){HTokV[HTagN-1].
Clr();}}
1209 HTokV[ActHTagN-1].
Add(Tok);
1214 if (TokStr==THtmlTok::TitleTagNm){
1215 if (IsTitleAct){TitleTokV.
Add(Tok); IsTitleAct=
false;}
1218 if (IsHAct){HTokV[ActHTagN-1].
Add(Tok); IsHAct=
false;}
1222 if (IsTitleAct){TitleTokV.
Add(Tok);}
1223 if (IsHAct){HTokV[ActHTagN-1].
Add(Tok);}
1232 OutUrlV.
Clr(); OutRedirUrlV.
Clr();
1241 for (
int TokN=0; TokN<HtmlDoc->
GetToks(); TokN++){
1245 if (Tok->IsUrlTok(RelUrlStr)){
1249 if (Tok->IsRedirUrlTok()){
1250 OutRedirUrlV.
Add(Url);
1260 OutDescUrlStrKdV.
Clr();
1269 int TokN=0;
int Toks=HtmlDoc->
GetToks();
1271 Tok=HtmlDoc->
GetTok(TokN, TokSym, TokStr); TokN++;
1272 if ((TokSym==
hsyBTag)&&(TokStr==THtmlTok::ATagNm)){
1274 if (Tok->IsUrlTok(RelUrlStr)){
1279 Tok=HtmlDoc->
GetTok(TokN, TokSym, TokStr); TokN++;
1280 if ((TokSym==
hsyETag)&&(TokStr==THtmlTok::ATagNm)){
1284 if (!DescChA.
Empty()){DescChA+=
' ';}
1313 int StrLen=Str.
Len();
int ChN=0;
int PrintChs=0;
1314 while ((ChN<100)&&(ChN<StrLen)){
1319 double PrintPrb=double(PrintChs)/double(ChN+1);
1320 return PrintPrb>0.9;
static const TStr H5TagNm
bool IsGetETag(const TStr &TagNm)
static const TStr H4TagNm
static TStr GetCSZFromYuascii(const TChA &ChA)
TStr GetHRefBeforeStr(const TStr &Str)
static TStr GetWin1250FromYuascii(const TChA &ChA)
static const TStr FrameTagNm
static THtmlLxChDef ChDef
static const TStr H3TagNm
static PSOut New(const TStr &FNm, const bool &Append=false)
bool IsWs(const char &Ch) const
static const TStr H1TagNm
static const TStr TitleArgNm
static const TStr LiTagNm
void MoveToETagOrEof(const TStr &TagNm)
static const TStr HRefArgNm
TStr GetFullBTagStr() const
TStr GetUrlStr(const int &UrlN=-1) const
void GetOutDescUrlStrKdV(TStrKdV &OutDescUrlStrKdV) const
void PutStr(const TStr &Str)
bool IsNum(const char &Ch) const
static bool IsBreakTok(const PHtmlTok &Tok)
void MoveToBTagArgOrEof(const TStr &TagNm, const TStr &ArgNm, const TStr &ArgVal)
TStr GetEscStr(const TStr &Str) const
static void GetTokStrV(const TStr &Str, TStrV &TokStrV)
void SaveAsHttp(const TStr &FNm) const
void AddCh(const char &Ch, const int &MxLen=-1)
bool IsEoln(const char &Ch) const
TSizeTy Len() const
Returns the number of elements in the vector.
void SaveTxt(const PSOut &SOut) const
void PutCh(const char &_Ch)
TStr GetArg(const TStr &ArgNm) const
static const TStr MetaTagNm
static PUrl New(const TStr &RelUrlStr, const TStr &BaseUrlStr=TStr())
TStr GetSubStr(const int &BChN, const int &EChN) const
void MoveToBTagArg2OrEof(const TStr &TagNm, const TStr &ArgNm1, const TStr &ArgVal1, const TStr &ArgNm2, const TStr &ArgVal2, const bool &AndOpP=true)
static TStr GetEscapedStr(const TChA &ChA)
bool IsOk(const TUrlScheme _Scheme=usUndef) const
static bool IsBreakTag(const TStr &TagNm)
static PHtmlLxChDef ChDef
static const TStr TitleTagNm
int PutLn(const int &Lns=1)
bool IsArg(const TStr &ArgNm) const
bool IsGetBTag(const TStr &TagNm)
static const TStr AreaTagNm
static TLxSym GetLxSym(const THtmlLxSym &HtmlLxSym, const TChA &ChA)
THtmlLxSym GetSym() const
void SaveAsHttpBody(const TStr &FNm) const
static const TStr HttpEquivArgNm
TStr GetArgVal(const int &ArgN) const
void SetChTy(const THtmlLxChTy &ChTy, const TStr &Str)
static const TStr H2TagNm
static const TStr CenterTagNm
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
TPt< THtmlLxChDef > PHtmlLxChDef
bool IsArg(const TStr &ArgNm) const
static const TStr UlTagNm
static TStr GetNoTag(const TStr &Str)
virtual TFileId GetFileId() const
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
static const TStr CardTagNm
TStr GetArgNm(const int &ArgN) const
static PSIn New(const TStr &Str)
TStr GetStrToETag(const TStr &TagNm, const bool &TxtOnlyP=false)
void PutArg(const TStr &ArgNm, const TStr &ArgVal)
PHtmlTok GetTok(const bool &DoUc=true)
TStr GetArg(const TStr &ArgNm, const TStr &DfArgVal=TStr()) const
TStr GetBodyAsStr() const
bool IsAlNum(const char &Ch) const
TStr GetStrInTag(const TStr &TagNm, const bool &TxtOnlyP=false)
static PHtmlTok GetHTok(const bool &IsBTag, const int &HTagN)
void SaveTxt(const PSOut &SOut, const bool &TxtMode=true) const
void SetEscStr(const TStr &SrcStr, const TStr &DstStr)
static TStr GetCSZFromWin1250(const TChA &ChA)
static const TStr H6TagNm
static TStr GetIsoCeFromYuascii(const TChA &ChA)
void MoveToBTagOrETagOrEof(const TStr &BTagNm, const TStr &ETagNm)
static const TPt< TSOut > StdOut
bool IsRedirUrlTok() const
void MoveToBTag3OrEof(const TStr &TagNm1, const TStr &TagNm2, const TStr &TagNm3)
int GetChTy(const char &Ch) const
static void SaveHtmlToTxt(const TStr &HtmlStr, const PSOut &TxtSOut, const TStr &BaseUrlStr, const bool &OutUrlP, const bool &OutToksP)
bool IsSpace(const char &Ch) const
int GetKeyId(const TKey &Key) const
void AddTokV(const THtmlTokV &_TokV)
TStr GetTextOnlyStrToEof()
static bool IsHTag(const TStr &TagNm, int &HTagN)
int AddKey(const TKey &Key)
static const TStr ImgTagNm
TStr GetStrToETag2(const TStr &TagNm1, const TStr &TagNm2, const bool &TxtOnlyP=false)
static TLxSym GetSSym(const TStr &Str)
void MoveToStrOrEof(const TStr &Str)
void GetOutUrlV(TUrlV &OutUrlV, TUrlV &OutRedirUrlV) const
static void SaveHtmlToXml(const TStr &HtmlStr, const PSOut &XmlSOut, const TStr &BaseUrlStr, const bool &OutTextP, const bool &OutUrlP, const bool &OutToksP, const bool &OutTagsP, const bool &OutArgsP)
bool IsAlpha(const char &Ch) const
static TStr GetAsciiStr(const TChA &ChA, const char &GenericCh='_')
static const TStr AltArgNm
THtmlHldV(const PHtmlDoc &_RefHtmlDoc, const int &HldWnLen=10)
char GetUc(const char &Ch) const
bool IsUrlTok(TStr &RelUrlStr) const
int PutStr(const char *CStr)
void MoveToBTag2OrEof(const TStr &TagNm1, const TStr &TagNm2)
static const TStr TextFldVal
THtmlLx::TArgNmValV ArgNmValV
static const TStr BrTagNm
void SplitOnStr(const TStr &SplitStr, TStrV &StrV) const
static TStr GetRedirHtmlDocStr(const TStr &HtmlStr, const TStr &BaseUrlStr, const TStr &RedirUrlStr)
void SaveBody(const PSOut &SOut) const
static TStr GetTxtLnDoc(const TStr &HtmlStr)
void SaveTxt(const PSOut &SOut, const bool &TxtMode=true)
void MoveToBTagOrEof(const TStr &TagNm)
bool IsKey(const TKey &Key) const
static PHtmlDoc New(const PSIn &SIn, const THtmlDocType &Type=hdtAll, const bool &DoUc=true)
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
static const TStr TitleETagNm
TDat & AddDat(const TKey &Key)
PHtmlTok GetTok(const int &TokN) const
static const TStr SrcArgNm
static bool _IsTagRedir(const TStr &TagStr, const TStr &ArgNm, THtmlLx &Lx, const TStr &BaseUrlStr, const TStr &RedirUrlStr)
void SetUcCh(const char &UcCh, const char &LcCh)
TKeyDat< TStr, TStr > TStrKd
static TStr GetSymStr(const THtmlLxSym &Sym)
TStr GetFullUrlStr() const
void SaveTxt(const PSOut &SOut) const
static TStr GetXmlStrFromPlainStr(const TChA &PlainChA)
TStr GetStrToBTag(const TStr &TagNm, const bool &TxtOnlyP=false)
TStr GetHttpBodyAsStr() const