SNAP Library 3.0, User Reference  2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
TUStr Class Reference

#include <unicodestring.h>

Public Member Functions

 TUStr ()
 
 TUStr (const TUStr &UStr)
 
 TUStr (const TIntV &_UniChV)
 
 TUStr (const TStr &Str)
 
 ~TUStr ()
 
 TUStr (TSIn &SIn)
 
void Save (TSOut &SOut) const
 
void LoadXml (const PXmlTok &XmlTok, const TStr &Nm)
 
void SaveXml (TSOut &SOut, const TStr &Nm) const
 
TUStroperator= (const TUStr &UStr)
 
bool operator== (const TUStr &UStr) const
 
TUStroperator+= (const TUStr &UStr)
 
int operator[] (const int &UniChN) const
 
void Clr ()
 
int Len () const
 
bool Empty () const
 
void ToLowerCase ()
 
void ToUpperCase ()
 
void ToStarterCase ()
 
void GetWordBoundPV (TBoolV &WordBoundPV)
 
void GetWordUStrV (TUStrV &UStrV)
 
TStr GetStr () const
 
TStr GetStarterStr () const
 
TStr GetStarterLowerCaseStr () const
 

Static Public Member Functions

static int GetScriptId (const TStr &ScriptNm)
 
static TStr GetScriptNm (const int &ScriptId)
 
static int GetChScriptId (const int &UniCh)
 
static TStr GetChScriptNm (const int &UniCh)
 
static TStr GetChNm (const int &UniCh)
 
static TStr GetChTypeStr (const int &UniCh)
 
static bool IsCase (const int &UniCh)
 
static bool IsUpperCase (const int &UniCh)
 
static bool IsLowerCase (const int &UniCh)
 
static bool IsAlphabetic (const int &UniCh)
 
static bool IsMath (const int &UniCh)
 
static TStr EncodeUtf8 (const int &UniCh)
 

Static Private Member Functions

static void AssertUnicodeDefOk ()
 

Private Attributes

TIntV UniChV
 

Detailed Description

Definition at line 32 of file unicodestring.h.

Constructor & Destructor Documentation

TUStr::TUStr ( )
inline

Definition at line 38 of file unicodestring.h.

TIntV UniChV
Definition: unicodestring.h:34
static void AssertUnicodeDefOk()
Definition: unicodestring.h:35
TUStr::TUStr ( const TUStr UStr)
inline

Definition at line 39 of file unicodestring.h.

TIntV UniChV
Definition: unicodestring.h:34
static void AssertUnicodeDefOk()
Definition: unicodestring.h:35
TUStr::TUStr ( const TIntV _UniChV)
inline

Definition at line 40 of file unicodestring.h.

40 : UniChV(_UniChV){AssertUnicodeDefOk();}
TIntV UniChV
Definition: unicodestring.h:34
static void AssertUnicodeDefOk()
Definition: unicodestring.h:35
TUStr::TUStr ( const TStr Str)

Definition at line 12 of file unicodestring.cpp.

12  {
15  TIntV NfcUniChV; TUnicodeDef::GetDef()->Decompose(UniChV, NfcUniChV, true);
16  UniChV=NfcUniChV;
17 }
static TUnicode * GetDef()
Definition: unicodestring.h:23
int DecodeUtf8(const TIntV &src, TIntV &dest) const
Definition: unicode.h:1787
TIntV UniChV
Definition: unicodestring.h:34
void Decompose(const TIntV &src, TIntV &dest, bool compatibility) const
Definition: unicode.h:1934
static void AssertUnicodeDefOk()
Definition: unicodestring.h:35
TUStr::~TUStr ( )
inline

Definition at line 42 of file unicodestring.h.

42 {}
TUStr::TUStr ( TSIn SIn)
inline

Definition at line 43 of file unicodestring.h.

43 : UniChV(SIn){AssertUnicodeDefOk();}
TIntV UniChV
Definition: unicodestring.h:34
static void AssertUnicodeDefOk()
Definition: unicodestring.h:35

Member Function Documentation

static void TUStr::AssertUnicodeDefOk ( )
inlinestaticprivate

Definition at line 35 of file unicodestring.h.

35  {
36  EAssertR(TUnicodeDef::IsDef(), "Unicode-Definition-File not loaded!");}
static bool IsDef()
Definition: unicodestring.h:21
#define EAssertR(Cond, MsgStr)
Definition: bd.h:283
void TUStr::Clr ( )
inline

Definition at line 56 of file unicodestring.h.

56 {UniChV.Clr();}
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:971
TIntV UniChV
Definition: unicodestring.h:34
bool TUStr::Empty ( ) const
inline

Definition at line 58 of file unicodestring.h.

58 {return UniChV.Empty();}
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:542
TIntV UniChV
Definition: unicodestring.h:34
TStr TUStr::EncodeUtf8 ( const int &  UniCh)
static

Definition at line 157 of file unicodestring.cpp.

157  {
160 }
TStr EncodeUtf8Str(const TIntV &src) const
Definition: unicode.h:1796
static TUnicode * GetDef()
Definition: unicodestring.h:23
static TVec< TInt, TSizeTy > GetV(const TInt &Val1)
Returns a vector on element Val1.
Definition: ds.h:817
static void AssertUnicodeDefOk()
Definition: unicodestring.h:35
TStr TUStr::GetChNm ( const int &  UniCh)
static

Definition at line 104 of file unicodestring.cpp.

104  {
105  TStr UniChNm(TUnicodeDef::GetDef()->ucd.GetCharNameS(UniCh));
106  return UniChNm;
107 }
static TUnicode * GetDef()
Definition: unicodestring.h:23
TStr GetCharNameS(const int cp) const
Definition: unicode.h:2025
Definition: dt.h:412
int TUStr::GetChScriptId ( const int &  UniCh)
static

Definition at line 96 of file unicodestring.cpp.

96  {
97  return TUnicodeDef::GetDef()->ucd.GetScript(UniCh);
98 }
int GetScript(const TUniChInfo &ci) const
Definition: unicode.h:1323
TUniChDb ucd
Definition: unicode.h:1775
static TUnicode * GetDef()
Definition: unicodestring.h:23
TStr TUStr::GetChScriptNm ( const int &  UniCh)
static

Definition at line 100 of file unicodestring.cpp.

100  {
101  return GetScriptNm(GetChScriptId(UniCh));
102 }
static int GetChScriptId(const int &UniCh)
static TStr GetScriptNm(const int &ScriptId)
TStr TUStr::GetChTypeStr ( const int &  UniCh)
static

Definition at line 109 of file unicodestring.cpp.

109  {
110  TChA ChTypeChA;
111  ChTypeChA+='[';
112  if (IsCase(UniCh)){ChTypeChA+="Case,";}
113  if (IsUpperCase(UniCh)){ChTypeChA+="UpperCase,";}
114  if (IsLowerCase(UniCh)){ChTypeChA+="LowerCase,";}
115  if (IsAlphabetic(UniCh)){ChTypeChA+="Alphabetic,";}
116  if (IsMath(UniCh)){ChTypeChA+="Math,";}
117  if (ChTypeChA.LastCh()=='['){ChTypeChA+=']';}
118  else {ChTypeChA[ChTypeChA.Len()-1]=']';}
119  return ChTypeChA;
120 }
static bool IsAlphabetic(const int &UniCh)
int Len() const
Definition: dt.h:259
static bool IsLowerCase(const int &UniCh)
static bool IsMath(const int &UniCh)
char LastCh() const
Definition: dt.h:281
static bool IsUpperCase(const int &UniCh)
Definition: dt.h:201
static bool IsCase(const int &UniCh)
int TUStr::GetScriptId ( const TStr ScriptNm)
static

Definition at line 88 of file unicodestring.cpp.

88  {
89  return TUnicodeDef::GetDef()->ucd.GetScriptByName(ScriptNm);
90 }
int GetScriptByName(const TStr &scriptName) const
Definition: unicode.h:1322
TUniChDb ucd
Definition: unicode.h:1775
static TUnicode * GetDef()
Definition: unicodestring.h:23
TStr TUStr::GetScriptNm ( const int &  ScriptId)
static

Definition at line 92 of file unicodestring.cpp.

92  {
93  return TUnicodeDef::GetDef()->ucd.GetScriptName(ScriptId);
94 }
const TStr & GetScriptName(const int scriptId) const
Definition: unicode.h:1321
TUniChDb ucd
Definition: unicode.h:1775
static TUnicode * GetDef()
Definition: unicodestring.h:23
TStr TUStr::GetStarterLowerCaseStr ( ) const

Definition at line 79 of file unicodestring.cpp.

79  {
80  TIntV UniChV1; TIntV UniChV2; TIntV UniChV3;
82  TUnicodeDef::GetDef()->ExtractStarters(UniChV1, UniChV2);
83  TUnicodeDef::GetDef()->Decompose(UniChV2, UniChV3, true);
84  TStr Str=TUnicodeDef::GetDef()->EncodeUtf8Str(UniChV3);
85  return Str;
86 }
TStr EncodeUtf8Str(const TIntV &src) const
Definition: unicode.h:1796
static TUnicode * GetDef()
Definition: unicodestring.h:23
void GetSimpleLowerCase(const TIntV &src, TIntV &dest) const
Definition: unicode.h:1972
int ExtractStarters(const TIntV &src, TIntV &dest) const
Definition: unicode.h:1951
Definition: dt.h:412
TIntV UniChV
Definition: unicodestring.h:34
void Decompose(const TIntV &src, TIntV &dest, bool compatibility) const
Definition: unicode.h:1934
TStr TUStr::GetStarterStr ( ) const

Definition at line 71 of file unicodestring.cpp.

71  {
72  TIntV UniChV1; TIntV UniChV2;
74  TUnicodeDef::GetDef()->Decompose(UniChV1, UniChV2, true);
75  TStr Str=TUnicodeDef::GetDef()->EncodeUtf8Str(UniChV2);
76  return Str;
77 }
TStr EncodeUtf8Str(const TIntV &src) const
Definition: unicode.h:1796
static TUnicode * GetDef()
Definition: unicodestring.h:23
int ExtractStarters(const TIntV &src, TIntV &dest) const
Definition: unicode.h:1951
Definition: dt.h:412
TIntV UniChV
Definition: unicodestring.h:34
void Decompose(const TIntV &src, TIntV &dest, bool compatibility) const
Definition: unicode.h:1934
TStr TUStr::GetStr ( ) const

Definition at line 66 of file unicodestring.cpp.

66  {
68  return Str;
69 }
TStr EncodeUtf8Str(const TIntV &src) const
Definition: unicode.h:1796
static TUnicode * GetDef()
Definition: unicodestring.h:23
Definition: dt.h:412
TIntV UniChV
Definition: unicodestring.h:34
void TUStr::GetWordBoundPV ( TBoolV WordBoundPV)

Definition at line 33 of file unicodestring.cpp.

33  {
35 }
static TUnicode * GetDef()
Definition: unicodestring.h:23
void FindWordBoundaries(const TIntV &src, TBoolV &dest) const
Definition: unicode.h:1907
TIntV UniChV
Definition: unicodestring.h:34
void TUStr::GetWordUStrV ( TUStrV UStrV)

Definition at line 37 of file unicodestring.cpp.

37  {
38  // clear word vector
39  WordUStrV.Clr();
40  // create boundaries
41  TBoolV WordBoundPV; GetWordBoundPV(WordBoundPV);
42  IAssert(Len()==WordBoundPV.Len()-1);
43  IAssert((WordBoundPV.Len()>0)&&(WordBoundPV.Last()));
44  // traverse characters and bounds
45  int UniChs=Len(); TIntV WordUniChV;
46  for (int UniChN=0; UniChN<=UniChs; UniChN++){
47  if ((UniChN==UniChs)||(WordBoundPV[UniChN+1])){ // finish or word-boundary
48  if (UniChN<UniChs){ // if not finish
49  // if last-word-char or single-alphabetic-char
50  if ((!WordUniChV.Empty())||(IsAlphabetic(UniChV[UniChN]))){
51  WordUniChV.Add(UniChV[UniChN]); // add char
52  }
53  }
54  if (!WordUniChV.Empty()){ // add current word to vector
55  TUStr WordUStr(WordUniChV); // construct word from char-vector
56  WordUStrV.Add(WordUStr); // add word to word-vector
57  WordUniChV.Clr(false); // clear char-vector
58  }
59  } else {
60  // add character to char-vector
61  WordUniChV.Add(UniChV[UniChN]);
62  }
63  }
64 }
#define IAssert(Cond)
Definition: bd.h:262
static bool IsAlphabetic(const int &UniCh)
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
bool Empty() const
Tests whether the vector is empty.
Definition: ds.h:542
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
Definition: ds.h:971
const TVal & Last() const
Returns a reference to the last element of the vector.
Definition: ds.h:551
void GetWordBoundPV(TBoolV &WordBoundPV)
TIntV UniChV
Definition: unicodestring.h:34
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
int Len() const
Definition: unicodestring.h:57
bool TUStr::IsAlphabetic ( const int &  UniCh)
static

Definition at line 143 of file unicodestring.cpp.

143  {
144  TUniChInfo ChInfo;
145  if (TUnicodeDef::GetDef()->ucd.IsGetChInfo(UniCh, ChInfo)){
146  return ChInfo.IsAlphabetic();}
147  else {return false;}
148 }
bool IsAlphabetic() const
Definition: unicode.h:1071
static TUnicode * GetDef()
Definition: unicodestring.h:23
bool TUStr::IsCase ( const int &  UniCh)
static

Definition at line 122 of file unicodestring.cpp.

122  {
123  TUniChInfo ChInfo;
124  if (TUnicodeDef::GetDef()->ucd.IsGetChInfo(UniCh, ChInfo)){
125  return ChInfo.IsCased();}
126  else {return false;}
127 }
static TUnicode * GetDef()
Definition: unicodestring.h:23
bool IsCased() const
Definition: unicode.h:1142
bool TUStr::IsLowerCase ( const int &  UniCh)
static

Definition at line 136 of file unicodestring.cpp.

136  {
137  TUniChInfo ChInfo;
138  if (TUnicodeDef::GetDef()->ucd.IsGetChInfo(UniCh, ChInfo)){
139  return ChInfo.IsLowercase();}
140  else {return false;}
141 }
static TUnicode * GetDef()
Definition: unicodestring.h:23
bool IsLowercase() const
Definition: unicode.h:1073
bool TUStr::IsMath ( const int &  UniCh)
static

Definition at line 150 of file unicodestring.cpp.

150  {
151  TUniChInfo ChInfo;
152  if (TUnicodeDef::GetDef()->ucd.IsGetChInfo(UniCh, ChInfo)){
153  return ChInfo.IsMath();}
154  else {return false;}
155 }
static TUnicode * GetDef()
Definition: unicodestring.h:23
bool IsMath() const
Definition: unicode.h:1074
bool TUStr::IsUpperCase ( const int &  UniCh)
static

Definition at line 129 of file unicodestring.cpp.

129  {
130  TUniChInfo ChInfo;
131  if (TUnicodeDef::GetDef()->ucd.IsGetChInfo(UniCh, ChInfo)){
132  return ChInfo.IsUppercase();}
133  else {return false;}
134 }
static TUnicode * GetDef()
Definition: unicodestring.h:23
bool IsUppercase() const
Definition: unicode.h:1072
int TUStr::Len ( ) const
inline

Definition at line 57 of file unicodestring.h.

57 {return UniChV.Len();}
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
TIntV UniChV
Definition: unicodestring.h:34
void TUStr::LoadXml ( const PXmlTok XmlTok,
const TStr Nm 
)
TUStr& TUStr::operator+= ( const TUStr UStr)
inline

Definition at line 52 of file unicodestring.h.

52 {UniChV.AddV(UStr.UniChV); return *this;}
TIntV UniChV
Definition: unicodestring.h:34
TSizeTy AddV(const TVec< TVal, TSizeTy > &ValV)
Adds the elements of the vector ValV to the to end of the vector.
Definition: ds.h:1056
TUStr& TUStr::operator= ( const TUStr UStr)
inline

Definition at line 48 of file unicodestring.h.

48  {
49  if (this!=&UStr){UniChV=UStr.UniChV;} return *this;}
TIntV UniChV
Definition: unicodestring.h:34
bool TUStr::operator== ( const TUStr UStr) const
inline

Definition at line 50 of file unicodestring.h.

50 {return UniChV==UStr.UniChV;}
TIntV UniChV
Definition: unicodestring.h:34
int TUStr::operator[] ( const int &  UniChN) const
inline

Definition at line 53 of file unicodestring.h.

53 {return UniChV[UniChN];}
TIntV UniChV
Definition: unicodestring.h:34
void TUStr::Save ( TSOut SOut) const
inline

Definition at line 44 of file unicodestring.h.

44 {UniChV.Save(SOut);}
void Save(TSOut &SOut) const
Definition: ds.h:903
TIntV UniChV
Definition: unicodestring.h:34
void TUStr::SaveXml ( TSOut SOut,
const TStr Nm 
) const
void TUStr::ToLowerCase ( )

Definition at line 19 of file unicodestring.cpp.

19  {
21 }
static TUnicode * GetDef()
Definition: unicodestring.h:23
void ToSimpleLowerCase(TIntV &src) const
Definition: unicode.h:1978
TIntV UniChV
Definition: unicodestring.h:34
void TUStr::ToStarterCase ( )

Definition at line 27 of file unicodestring.cpp.

27  {
28  TIntV StarterUniChV;
29  TUnicodeDef::GetDef()->ExtractStarters(UniChV, StarterUniChV);
30  TUnicodeDef::GetDef()->Decompose(StarterUniChV, UniChV, true);
31 }
static TUnicode * GetDef()
Definition: unicodestring.h:23
int ExtractStarters(const TIntV &src, TIntV &dest) const
Definition: unicode.h:1951
TIntV UniChV
Definition: unicodestring.h:34
void Decompose(const TIntV &src, TIntV &dest, bool compatibility) const
Definition: unicode.h:1934
void TUStr::ToUpperCase ( )

Definition at line 23 of file unicodestring.cpp.

23  {
25 }
void ToSimpleUpperCase(TIntV &src) const
Definition: unicode.h:1977
static TUnicode * GetDef()
Definition: unicodestring.h:23
TIntV UniChV
Definition: unicodestring.h:34

Member Data Documentation

TIntV TUStr::UniChV
private

Definition at line 34 of file unicodestring.h.


The documentation for this class was generated from the following files: