SNAP Library 6.0, User Reference  2020-12-09 16:24:20
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
url.h
Go to the documentation of this file.
1 #include "bd.h"
2 
4 // Url
5 typedef enum {usUndef, usHttp, usOther} TUrlScheme;
6 
8 private:
9  static const TStr UrlHttpPrefixStr;
10  static const TStr UrlHttpAbsPrefixStr;
11  TUrlScheme Scheme;
12  TStr UrlStr, RelUrlStr, BaseUrlStr;
13  TStr SchemeNm, HostNm;
14  TStr PortStr, PathStr, SearchStr, FragIdStr;
15  int PortN;
16  TStrV PathSegV;
17  TStr IpNum;
18  TStr FinalUrlStr, FinalHostNm;
19  TStr HttpRqStr;
20  void GetAbs(const TStr& AbsUrlStr);
21  void GetAbsFromBase(const TStr& RelUrlStr, const TStr& BaseUrlStr);
23 public:
24  TUrl(const TStr& _RelUrlStr, const TStr& _BaseUrlStr=TStr());
25  static PUrl New(const TStr& RelUrlStr, const TStr& BaseUrlStr=TStr()){
26  return PUrl(new TUrl(RelUrlStr, BaseUrlStr));}
27  ~TUrl(){}
29  static PUrl Load(TSIn&){Fail; return NULL;}
30  void Save(TSOut&){Fail;}
31 
32  bool IsOk(const TUrlScheme _Scheme=usUndef) const {
33  if (_Scheme==usUndef){return Scheme!=usUndef;}
34  else {return Scheme==_Scheme;}}
35  TUrlScheme GetScheme(){return Scheme;}
36  TStr GetUrlStr() const {return UrlStr;}
37  TStr GetRelUrlStr() const {return RelUrlStr;}
38  bool IsBaseUrl(){return !BaseUrlStr.Empty();}
39  TStr GetBaseUrlStr() const {return BaseUrlStr;}
40  TStr GetSchemeNm() const {EAssert(IsOk()); return SchemeNm;}
41  TStr GetHostNm() const {EAssert(IsOk()); return HostNm;}
42  TStr GetDmNm(const int& MxDmSegs=-1) const;
43  bool IsPortOk() const { EAssert(IsOk()); return (PortN > 0); }
44  TStr GetPortStr() const {EAssert(IsOk()); return PortStr;}
45  int GetPortN() const {EAssert(IsOk()&&(PortN!=-1)); return PortN;}
46  TStr GetPathStr() const {EAssert(IsOk()); return PathStr;}
47  int GetPathSegs() const {return PathSegV.Len();}
48  TStr GetPathSeg(const int& PathSegN) const {return PathSegV[PathSegN];}
49  TStr GetSearchStr() const {EAssert(IsOk()); return SearchStr;}
50  TStr GetFragIdStr() const {EAssert(IsOk()); return FragIdStr;}
51 
52  bool IsIpNum() const {return !IpNum.Empty();}
53  void PutIpNum(const TStr& _IpNum){IpNum=_IpNum;}
54  TStr GetIpNum() const {EAssert(IsIpNum()); return IpNum;}
55  TStr GetIpNumOrHostNm() const {return IsIpNum() ? GetIpNum() : GetHostNm();}
56 
57  bool IsDefFinalUrl() const {
58  EAssert(IsOk(usHttp)); return !FinalUrlStr.Empty();}
59  TStr GetFinalUrlStr() const {
60  EAssert(IsDefFinalUrl()); return FinalUrlStr;}
62  if (IsDefFinalUrl()){return FinalUrlStr;} else {return UrlStr;}}
63  TStr GetFinalHostNm() const {
64  EAssert(IsDefFinalUrl()); return FinalHostNm;}
66  if (IsDefFinalUrl()){return FinalHostNm;} else {return HostNm;}}
67  void DefUrlAsFinal(){
68  EAssert(IsOk(usHttp)); EAssert(!IsDefFinalUrl());
69  FinalUrlStr=UrlStr; FinalHostNm=HostNm;}
70  void DefFinalUrl(const TStr& _FinalHostNm);
71 
72  void PutHttpRqStr(const TStr& _HttpRqStr){HttpRqStr=_HttpRqStr;}
73  TStr GetHttpRqStr() const {return HttpRqStr;}
74  bool IsHttpRqStr() const {return !HttpRqStr.Empty();}
75  void ChangeHttpRqStr(const TStr& SrcStr, const TStr& DstStr){
76  HttpRqStr.ChangeStr(SrcStr, DstStr);}
77 
78  bool IsInHost(const TStr& _HostNm) const {
79  EAssert(IsOk()); return HostNm.GetUc().IsSuffix(_HostNm.GetUc());}
80  bool IsInPath(const TStr& _PathStr) const {
81  EAssert(IsOk()); return PathStr.GetUc().IsPrefix(_PathStr.GetUc());}
82  void ToLcPath();
83 
84  static bool IsAbs(const TStr& UrlStr);
85  static bool IsScript(const TStr& UrlStr);
86  static bool IsSite(const TStr& UrlStr);
87 
88  static PUrl GetUrlFromShortcut(const TStr& ShortcutUrlStr,
89  const TStr& DfHostNmPrefix, const TStr& DfHostNmSufix);
90  static TStr GetUrlSearchStr(const TStr& Str);
91  static TStr EncodeUrlStr(const TStr& Str){return GetUrlSearchStr(Str);}
92  static TStr DecodeUrlStr(const TStr& UrlStr);
93  static TStr GetDocStrFromUrlStr(const TStr& UrlStr, const int& Copies=1);
94  static TStr GetTopDownDocNm(
95  const TStr& UrlStr, const int& MxLen=-1, const bool& HostOnlyP=false);
96 };
100 
102 // Url-Environment
104 private:
105  TStr BaseUrlStr;
106  TStrV KeyNmV;
107  TStrStrVH KeyNmToValH;
108 public:
110  KeyNmV(), KeyNmToValH(10){}
111  TUrlEnv(const TUrlEnv& UrlEnv):
112  KeyNmV(UrlEnv.KeyNmV), KeyNmToValH(UrlEnv.KeyNmToValH){}
113  static PUrlEnv New(){return new TUrlEnv();}
114  static PUrlEnv New(const TStr& BaseUrlStr,
115  const TStr& KeyNm1=TStr(), const TStr& ValStr1=TStr(),
116  const TStr& KeyNm2=TStr(), const TStr& ValStr2=TStr(),
117  const TStr& KeyNm3=TStr(), const TStr& ValStr3=TStr(),
118  const TStr& KeyNm4=TStr(), const TStr& ValStr4=TStr()){
119  PUrlEnv UrlEnv=New();
120  UrlEnv->PutBaseUrlStr(BaseUrlStr);
121  if (!KeyNm1.Empty()){UrlEnv->AddKeyVal(KeyNm1, ValStr1);}
122  if (!KeyNm2.Empty()){UrlEnv->AddKeyVal(KeyNm2, ValStr2);}
123  if (!KeyNm3.Empty()){UrlEnv->AddKeyVal(KeyNm3, ValStr3);}
124  if (!KeyNm4.Empty()){UrlEnv->AddKeyVal(KeyNm4, ValStr4);}
125  return UrlEnv;}
127  TUrlEnv(TSIn& SIn): KeyNmV(SIn), KeyNmToValH(SIn){}
128  static PUrlEnv Load(TSIn& SIn){return new TUrlEnv(SIn);}
129  void Save(TSOut& SOut){KeyNmV.Save(SOut); KeyNmToValH.Save(SOut);}
130 
132  if (this!=&Env){KeyNmV=Env.KeyNmV; KeyNmToValH=Env.KeyNmToValH;}
133  return *this;}
134 
135  // base url
136  void PutBaseUrlStr(const TStr& _BaseUrlStr){BaseUrlStr=_BaseUrlStr;}
137  TStr GetBaseUrlStr() const {return BaseUrlStr;}
138 
139  // adding key-value
140  void AddKeyVal(const TStr& KeyNm, const TStr& ValStr){
141  if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);}
142  KeyNmToValH.GetDat(KeyNm).Clr();
143  KeyNmToValH.GetDat(KeyNm).Add(ValStr);}
144  void AddToKeyVal(const TStr& KeyNm, const TStr& ValStr){
145  if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);}
146  KeyNmToValH.GetDat(KeyNm).Add(ValStr);}
147 
148  // key retrieval
149  bool Empty() const {return KeyNmV.Empty();}
150  int GetKeys() const {return KeyNmV.Len();}
151  bool IsKey(const TStr& KeyNm) const {return KeyNmV.SearchForw(KeyNm)!=-1;}
152  int GetKeyN(const TStr& KeyNm) const {return KeyNmV.SearchForw(KeyNm);}
153  TStr GetKeyNm(const int& KeyN) const {return KeyNmV[KeyN];}
154 
155  // value retrieval
156  int GetVals(const int& KeyN) const {
157  return KeyNmToValH.GetDat(KeyNmV[KeyN]).Len();}
158  int GetVals(const TStr& KeyNm) const {
159  return KeyNmToValH.GetDat(KeyNm).Len();}
160  TStr GetVal(const int& KeyN, const int& ValN=0) const {
161  return KeyNmToValH.GetDat(KeyNmV[KeyN])[ValN];}
162  TStr GetVal(const TStr& KeyNm, const int& ValN=0, const TStr& DfVal="") const {
163  if (KeyNmToValH.IsKey(KeyNm)){
164  return KeyNmToValH.GetDat(KeyNm)[ValN];}
165  else {return DfVal;}}
166 
167  // full-url-string
168  TStr GetFullUrlStr() const;
169 
170  static PUrlEnv MkClone(const PUrlEnv& UrlEnv);
171 };
172 
~TUrlEnv()
Definition: url.h:126
TStr GetIpNum() const
Definition: url.h:54
TStr GetRelUrlStr() const
Definition: url.h:37
TUrlScheme
Definition: url.h:5
TStr GetSchemeNm() const
Definition: url.h:40
#define UndefDefaultCopyAssign(TNm)
Definition: bd.h:203
static TStr EncodeUrlStr(const TStr &Str)
Definition: url.h:91
TStr GetVal(const TStr &KeyNm, const int &ValN=0, const TStr &DfVal="") const
Definition: url.h:162
TStr GetBaseUrlStr() const
Definition: url.h:39
static PUrlEnv New(const TStr &BaseUrlStr, const TStr &KeyNm1=TStr(), const TStr &ValStr1=TStr(), const TStr &KeyNm2=TStr(), const TStr &ValStr2=TStr(), const TStr &KeyNm3=TStr(), const TStr &ValStr3=TStr(), const TStr &KeyNm4=TStr(), const TStr &ValStr4=TStr())
Definition: url.h:114
Definition: url.h:7
Definition: url.h:5
TStr GetFinalHostNm() const
Definition: url.h:63
void AddKeyVal(const TStr &KeyNm, const TStr &ValStr)
Definition: url.h:140
#define Fail
Definition: bd.h:238
TUrlEnv(const TUrlEnv &UrlEnv)
Definition: url.h:111
TStr GetUc() const
Definition: dt.h:496
int GetPathSegs() const
Definition: url.h:47
void AddToKeyVal(const TStr &KeyNm, const TStr &ValStr)
Definition: url.h:144
bool IsHttpRqStr() const
Definition: url.h:74
void Save(TSOut &)
Definition: url.h:30
bool IsInPath(const TStr &_PathStr) const
Definition: url.h:80
bool IsOk(const TUrlScheme _Scheme=usUndef) const
Definition: url.h:32
THash< TInt, PUrl > TIdToUrlH
Definition: url.h:99
TStr GetAsFinalUrlStr() const
Definition: url.h:61
~TUrl()
Definition: url.h:27
bool IsBaseUrl()
Definition: url.h:38
int GetPortN() const
Definition: url.h:45
bool IsIpNum() const
Definition: url.h:52
TPt< TUrl > PUrl
Definition: url.h:7
bool Empty() const
Definition: url.h:149
void PutHttpRqStr(const TStr &_HttpRqStr)
Definition: url.h:72
Definition: url.h:5
static PUrl Load(TSIn &)
Definition: url.h:29
Definition: fl.h:58
static PUrlEnv Load(TSIn &SIn)
Definition: url.h:128
TStrV KeyNmV
Definition: url.h:106
bool IsPortOk() const
Definition: url.h:43
#define ClassTP(TNm, PNm)
Definition: bd.h:126
int GetKeys() const
Definition: url.h:150
TStr GetPathStr() const
Definition: url.h:46
Definition: url.h:103
#define ClassTPV(TNm, PNm, TNmV)
Definition: bd.h:162
TEnv Env
Definition: env.cpp:297
TStr GetHostNm() const
Definition: url.h:41
TStr GetIpNumOrHostNm() const
Definition: url.h:55
void ChangeHttpRqStr(const TStr &SrcStr, const TStr &DstStr)
Definition: url.h:75
bool IsInHost(const TStr &_HostNm) const
Definition: url.h:78
TStr GetVal(const int &KeyN, const int &ValN=0) const
Definition: url.h:160
int GetVals(const int &KeyN) const
Definition: url.h:156
TStr GetPathSeg(const int &PathSegN) const
Definition: url.h:48
TQQueue< TIdUrlPr > TIdUrlPrQ
Definition: url.h:98
static PUrlEnv New()
Definition: url.h:113
TStr GetUrlStr() const
Definition: url.h:36
TStr GetFragIdStr() const
Definition: url.h:50
Definition: ds.h:2610
Definition: fl.h:128
TStr GetKeyNm(const int &KeyN) const
Definition: url.h:153
void DefUrlAsFinal()
Definition: url.h:67
bool IsKey(const TStr &KeyNm) const
Definition: url.h:151
TStr GetSearchStr() const
Definition: url.h:49
#define EAssert(Cond)
Definition: bd.h:280
void PutBaseUrlStr(const TStr &_BaseUrlStr)
Definition: url.h:136
TStr GetAsFinalHostNm() const
Definition: url.h:65
Definition: ds.h:32
TUrlEnv(TSIn &SIn)
Definition: url.h:127
Definition: url.h:5
Definition: dt.h:412
Definition: hash.h:97
bool IsDefFinalUrl() const
Definition: url.h:57
int GetKeyN(const TStr &KeyNm) const
Definition: url.h:152
TStr GetPortStr() const
Definition: url.h:44
TUrl(TSIn &)
Definition: url.h:28
void PutIpNum(const TStr &_IpNum)
Definition: url.h:53
Definition: bd.h:196
TStrStrVH KeyNmToValH
Definition: url.h:107
TStr GetFinalUrlStr() const
Definition: url.h:59
TStr GetHttpRqStr() const
Definition: url.h:73
TPair< TInt, PUrl > TIdUrlPr
Definition: url.h:97
int GetVals(const TStr &KeyNm) const
Definition: url.h:158
void Save(TSOut &SOut)
Definition: url.h:129
TStr GetBaseUrlStr() const
Definition: url.h:137
TUrlEnv & operator=(const TUrlEnv &Env)
Definition: url.h:131
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:430
TUrlScheme GetScheme()
Definition: url.h:35