SNAP Library, User Reference
2012-10-02 12:56:23
SNAP, a general purpose network analysis and graph mining library
|
Public Types | |
typedef TUniChDb::TCaseConversion | TCaseConversion |
Public Member Functions | |
TUnicode () | |
TUnicode (const TStr &fnBinUcd) | |
void | Init () |
int | DecodeUtf8 (const TIntV &src, TIntV &dest) const |
int | DecodeUtf8 (const TStr &src, TIntV &dest) const |
int | EncodeUtf8 (const TIntV &src, TIntV &dest) const |
TStr | EncodeUtf8Str (const TIntV &src) const |
int | DecodeUtf16FromBytes (const TIntV &src, TIntV &dest, const TUtf16BomHandling bomHandling=bomAllowed, const TUniByteOrder defaultByteOrder=boMachineEndian) const |
int | DecodeUtf16FromWords (const TIntV &src, TIntV &dest, const TUtf16BomHandling bomHandling=bomAllowed, const TUniByteOrder defaultByteOrder=boMachineEndian) const |
int | EncodeUtf16ToWords (const TIntV &src, TIntV &dest, const bool insertBom, const TUniByteOrder destByteOrder=boMachineEndian) const |
int | EncodeUtf16ToBytes (const TIntV &src, TIntV &dest, const bool insertBom, const TUniByteOrder destByteOrder=boMachineEndian) const |
void | RegisterCodec (const TStr &nameList, const PCodecBase &codec) |
void | UnregisterCodec (const TStr &nameList) |
void | ClrCodecs () |
void | InitCodecs () |
PCodecBase | GetCodec (const TStr &name) const |
void | GetAllCodecs (TCodecBaseV &dest) const |
bool | FindNextWordBoundary (const TIntV &src, int &position) const |
void | FindWordBoundaries (const TIntV &src, TBoolV &dest) const |
bool | FindNextSentenceBoundary (const TIntV &src, int &position) const |
void | FindSentenceBoundaries (const TIntV &src, TBoolV &dest) const |
void | ClrSentenceBoundaryExceptions () |
void | UseEnglishSentenceBoundaryExceptions () |
void | Decompose (const TIntV &src, TIntV &dest, bool compatibility) const |
void | Compose (const TIntV &src, TIntV &dest) const |
void | DecomposeAndCompose (const TIntV &src, TIntV &dest, bool compatibility) const |
int | ExtractStarters (const TIntV &src, TIntV &dest) const |
int | ExtractStarters (TIntV &src) const |
void | GetLowerCase (const TIntV &src, TIntV &dest) const |
void | GetUpperCase (const TIntV &src, TIntV &dest) const |
void | GetTitleCase (const TIntV &src, TIntV &dest) const |
void | GetSimpleLowerCase (const TIntV &src, TIntV &dest) const |
void | GetSimpleUpperCase (const TIntV &src, TIntV &dest) const |
void | GetSimpleTitleCase (const TIntV &src, TIntV &dest) const |
void | ToSimpleUpperCase (TIntV &src) const |
void | ToSimpleLowerCase (TIntV &src) const |
void | ToSimpleTitleCase (TIntV &src) const |
void | GetCaseFolded (const TIntV &src, TIntV &dest, const bool full=true) const |
void | ToCaseFolded (TIntV &src) const |
TStr | GetUtf8CaseFolded (const TStr &s) const |
DECLARE_FORWARDED_PROPERTY_METHODS | ___UniFwd2 (IsPrivateUse, IsSurrogate) TUniChCategory GetCat(const int cp) const |
TUniChSubCategory | GetSubCat (const int cp) const |
const char * | GetCharName (const int cp) const |
TStr | GetCharNameS (const int cp) const |
Public Attributes | |
TUniCodec | codec |
TUniChDb | ucd |
T8BitCodec< TEncoding_ISO8859_1 > | iso8859_1 |
T8BitCodec< TEncoding_ISO8859_2 > | iso8859_2 |
T8BitCodec< TEncoding_ISO8859_3 > | iso8859_3 |
T8BitCodec< TEncoding_ISO8859_4 > | iso8859_4 |
T8BitCodec< TEncoding_YuAscii > | yuAscii |
T8BitCodec< TEncoding_CP1250 > | cp1250 |
T8BitCodec< TEncoding_CP852 > | cp852 |
T8BitCodec< TEncoding_CP437 > | cp437 |
Static Protected Member Functions | |
static TStr | NormalizeCodecName (const TStr &name) |
Protected Attributes | |
THash< TStr, PCodecBase > | codecs |
TUnicode::TUnicode | ( | ) | [inline] |
TUnicode::TUnicode | ( | const TStr & | fnBinUcd | ) | [inline, explicit] |
DECLARE_FORWARDED_PROPERTY_METHODS TUnicode::___UniFwd2 | ( | IsPrivateUse | , |
IsSurrogate | |||
) | const [inline] |
void TUnicode::ClrCodecs | ( | ) | [inline] |
void TUnicode::ClrSentenceBoundaryExceptions | ( | ) | [inline] |
void TUnicode::Compose | ( | const TIntV & | src, |
TIntV & | dest | ||
) | const [inline] |
int TUnicode::DecodeUtf16FromBytes | ( | const TIntV & | src, |
TIntV & | dest, | ||
const TUtf16BomHandling | bomHandling = bomAllowed , |
||
const TUniByteOrder | defaultByteOrder = boMachineEndian |
||
) | const [inline] |
int TUnicode::DecodeUtf16FromWords | ( | const TIntV & | src, |
TIntV & | dest, | ||
const TUtf16BomHandling | bomHandling = bomAllowed , |
||
const TUniByteOrder | defaultByteOrder = boMachineEndian |
||
) | const [inline] |
int TUnicode::DecodeUtf8 | ( | const TIntV & | src, |
TIntV & | dest | ||
) | const [inline] |
int TUnicode::DecodeUtf8 | ( | const TStr & | src, |
TIntV & | dest | ||
) | const [inline] |
void TUnicode::Decompose | ( | const TIntV & | src, |
TIntV & | dest, | ||
bool | compatibility | ||
) | const [inline] |
void TUnicode::DecomposeAndCompose | ( | const TIntV & | src, |
TIntV & | dest, | ||
bool | compatibility | ||
) | const [inline] |
int TUnicode::EncodeUtf16ToBytes | ( | const TIntV & | src, |
TIntV & | dest, | ||
const bool | insertBom, | ||
const TUniByteOrder | destByteOrder = boMachineEndian |
||
) | const [inline] |
int TUnicode::EncodeUtf16ToWords | ( | const TIntV & | src, |
TIntV & | dest, | ||
const bool | insertBom, | ||
const TUniByteOrder | destByteOrder = boMachineEndian |
||
) | const [inline] |
int TUnicode::EncodeUtf8 | ( | const TIntV & | src, |
TIntV & | dest | ||
) | const [inline] |
TStr TUnicode::EncodeUtf8Str | ( | const TIntV & | src | ) | const [inline] |
int TUnicode::ExtractStarters | ( | const TIntV & | src, |
TIntV & | dest | ||
) | const [inline] |
int TUnicode::ExtractStarters | ( | TIntV & | src | ) | const [inline] |
bool TUnicode::FindNextSentenceBoundary | ( | const TIntV & | src, |
int & | position | ||
) | const [inline] |
bool TUnicode::FindNextWordBoundary | ( | const TIntV & | src, |
int & | position | ||
) | const [inline] |
void TUnicode::FindSentenceBoundaries | ( | const TIntV & | src, |
TBoolV & | dest | ||
) | const [inline] |
void TUnicode::FindWordBoundaries | ( | const TIntV & | src, |
TBoolV & | dest | ||
) | const [inline] |
void TUnicode::GetAllCodecs | ( | TCodecBaseV & | dest | ) | const [inline] |
void TUnicode::GetCaseFolded | ( | const TIntV & | src, |
TIntV & | dest, | ||
const bool | full = true |
||
) | const [inline] |
const char* TUnicode::GetCharName | ( | const int | cp | ) | const [inline] |
TStr TUnicode::GetCharNameS | ( | const int | cp | ) | const [inline] |
PCodecBase TUnicode::GetCodec | ( | const TStr & | name | ) | const [inline] |
void TUnicode::GetLowerCase | ( | const TIntV & | src, |
TIntV & | dest | ||
) | const [inline] |
void TUnicode::GetSimpleLowerCase | ( | const TIntV & | src, |
TIntV & | dest | ||
) | const [inline] |
void TUnicode::GetSimpleTitleCase | ( | const TIntV & | src, |
TIntV & | dest | ||
) | const [inline] |
void TUnicode::GetSimpleUpperCase | ( | const TIntV & | src, |
TIntV & | dest | ||
) | const [inline] |
TUniChSubCategory TUnicode::GetSubCat | ( | const int | cp | ) | const [inline] |
void TUnicode::GetTitleCase | ( | const TIntV & | src, |
TIntV & | dest | ||
) | const [inline] |
void TUnicode::GetUpperCase | ( | const TIntV & | src, |
TIntV & | dest | ||
) | const [inline] |
TStr TUnicode::GetUtf8CaseFolded | ( | const TStr & | s | ) | const [inline] |
void TUnicode::Init | ( | ) | [inline] |
void TUnicode::InitCodecs | ( | ) |
static TStr TUnicode::NormalizeCodecName | ( | const TStr & | name | ) | [inline, static, protected] |
void TUnicode::RegisterCodec | ( | const TStr & | nameList, |
const PCodecBase & | codec | ||
) | [inline] |
void TUnicode::ToCaseFolded | ( | TIntV & | src | ) | const [inline] |
void TUnicode::ToSimpleLowerCase | ( | TIntV & | src | ) | const [inline] |
void TUnicode::ToSimpleTitleCase | ( | TIntV & | src | ) | const [inline] |
void TUnicode::ToSimpleUpperCase | ( | TIntV & | src | ) | const [inline] |
void TUnicode::UnregisterCodec | ( | const TStr & | nameList | ) | [inline] |
void TUnicode::UseEnglishSentenceBoundaryExceptions | ( | ) | [inline] |
THash<TStr, PCodecBase> TUnicode::codecs [protected] |