SNAP Library 3.0, User Reference  2016-07-20 17:56:49
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
TUniCaseFolding Class Reference

#include <unicode.h>

Public Member Functions

 TUniCaseFolding ()
 
 TUniCaseFolding (TSIn &SIn)
 
void Load (TSIn &SIn)
 
void Save (TSOut &SOut) const
 
void Clr ()
 
void LoadTxt (const TStr &fileName)
 
template<typename TSrcVec , typename TDestCh >
void Fold (const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool full, const bool turkic) const
 
template<typename TSrcVec >
void FoldInPlace (TSrcVec &src, size_t srcIdx, const size_t srcCount, const bool turkic) const
 
void Test ()
 

Protected Types

typedef TUniVecIdx TVecIdx
 

Protected Member Functions

void Test (const TIntV &src, const TIntV &expectedDest, const bool full, const bool turkic, FILE *f)
 

Static Protected Member Functions

template<typename TSrcDat , typename TDestDat >
static void AppendVector (const TVec< TSrcDat > &src, TVec< TDestDat > &dest)
 

Protected Attributes

TIntH cfCommon
 
TIntH cfSimple
 
TIntH cfTurkic
 
TIntIntVH cfFull
 

Friends

class TUniChDb
 

Detailed Description

Definition at line 271 of file unicode.h.

Member Typedef Documentation

Definition at line 281 of file unicode.h.

Constructor & Destructor Documentation

TUniCaseFolding::TUniCaseFolding ( )
inline

Definition at line 284 of file unicode.h.

284 { }
TUniCaseFolding::TUniCaseFolding ( TSIn SIn)
inlineexplicit

Definition at line 285 of file unicode.h.

285 : cfCommon(SIn), cfSimple(SIn), cfTurkic(SIn), cfFull(SIn) { SIn.LoadCs(); }
TIntIntVH cfFull
Definition: unicode.h:275
void LoadCs()
Definition: fl.cpp:28
TIntH cfTurkic
Definition: unicode.h:274
TIntH cfCommon
Definition: unicode.h:274
TIntH cfSimple
Definition: unicode.h:274

Member Function Documentation

template<typename TSrcDat , typename TDestDat >
static void TUniCaseFolding::AppendVector ( const TVec< TSrcDat > &  src,
TVec< TDestDat > &  dest 
)
inlinestaticprotected

Definition at line 278 of file unicode.h.

278  {
279  for (int i = 0; i < src.Len(); i++) dest.Add(src[i]); }
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
void TUniCaseFolding::Clr ( )
inline

Definition at line 288 of file unicode.h.

288 { cfCommon.Clr(); cfSimple.Clr(); cfFull.Clr(); cfTurkic.Clr(); }
TIntIntVH cfFull
Definition: unicode.h:275
TIntH cfTurkic
Definition: unicode.h:274
TIntH cfCommon
Definition: unicode.h:274
TIntH cfSimple
Definition: unicode.h:274
void Clr(const bool &DoDel=true, const int &NoDelLim=-1, const bool &ResetDat=true)
Definition: hash.h:319
template<typename TSrcVec , typename TDestCh >
void TUniCaseFolding::Fold ( const TSrcVec &  src,
size_t  srcIdx,
const size_t  srcCount,
TVec< TDestCh > &  dest,
const bool  clrDest,
const bool  full,
const bool  turkic 
) const
inline

Definition at line 293 of file unicode.h.

295  {
296  for (const size_t srcEnd = srcIdx + srcCount; srcIdx < srcEnd; )
297  {
298  int c = src[TVecIdx(srcIdx)], i; srcIdx++;
299  if (turkic && ((i = cfTurkic.GetKeyId(c)) >= 0)) { dest.Add(cfTurkic[i]); continue; }
300  if (full && ((i = cfFull.GetKeyId(c)) >= 0)) { AppendVector(cfFull[i], dest); continue; }
301  if ((! full) && ((i = cfSimple.GetKeyId(c)) >= 0)) { dest.Add(cfSimple[i]); continue; }
302  i = cfCommon.GetKeyId(c); if (i >= 0) dest.Add(cfCommon[i]); else dest.Add(c);
303  }
304  }
static void AppendVector(const TVec< TSrcDat > &src, TVec< TDestDat > &dest)
Definition: unicode.h:278
TUniVecIdx TVecIdx
Definition: unicode.h:281
TIntIntVH cfFull
Definition: unicode.h:275
TIntH cfTurkic
Definition: unicode.h:274
TIntH cfCommon
Definition: unicode.h:274
int GetKeyId(const TKey &Key) const
Definition: hash.h:424
TIntH cfSimple
Definition: unicode.h:274
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
Definition: ds.h:574
template<typename TSrcVec >
void TUniCaseFolding::FoldInPlace ( TSrcVec &  src,
size_t  srcIdx,
const size_t  srcCount,
const bool  turkic 
) const
inline

Definition at line 307 of file unicode.h.

308  {
309  for (const size_t srcEnd = srcIdx + srcCount; srcIdx < srcEnd; srcIdx++)
310  {
311  int c = src[TVecIdx(srcIdx)], i;
312  if (turkic && ((i = cfTurkic.GetKeyId(c)) >= 0)) { src[TVecIdx(srcIdx)] = cfTurkic[i]; continue; }
313  if ((i = cfSimple.GetKeyId(c)) >= 0) { src[TVecIdx(srcIdx)] = cfSimple[i]; continue; }
314  i = cfCommon.GetKeyId(c); if (i >= 0) src[TVecIdx(srcIdx)] = cfCommon[i];
315  }
316  }
TUniVecIdx TVecIdx
Definition: unicode.h:281
TIntH cfTurkic
Definition: unicode.h:274
TIntH cfCommon
Definition: unicode.h:274
int GetKeyId(const TKey &Key) const
Definition: hash.h:424
TIntH cfSimple
Definition: unicode.h:274
void TUniCaseFolding::Load ( TSIn SIn)
inline

Definition at line 286 of file unicode.h.

286 { cfCommon.Load(SIn); cfSimple.Load(SIn); cfFull.Load(SIn); cfTurkic.Load(SIn); SIn.LoadCs(); }
TIntIntVH cfFull
Definition: unicode.h:275
void LoadCs()
Definition: fl.cpp:28
void Load(TSIn &SIn)
Definition: hash.h:137
TIntH cfTurkic
Definition: unicode.h:274
TIntH cfCommon
Definition: unicode.h:274
TIntH cfSimple
Definition: unicode.h:274
void TUniCaseFolding::LoadTxt ( const TStr fileName)

Definition at line 505 of file unicode.cpp.

506 {
507  Clr();
508  TUniChDb::TUcdFileReader reader; reader.Open(fileName);
509  TStrV fields;
510  while (reader.GetNextLine(fields))
511  {
512  int cp = reader.ParseCodePoint(fields[0]);
513  const TStr status = fields[1], mapsTo = fields[2];
514  if (status == "C" || status == "S" || status == "T") {
515  TIntH &dest = (status == "C" ? cfCommon : status == "S" ? cfSimple : cfTurkic);
516  IAssert(! dest.IsKey(cp));
517  int cp2 = reader.ParseCodePoint(mapsTo);
518  dest.AddDat(cp, cp2); }
519  else if (status == "F") {
520  TIntIntVH &dest = cfFull;
521  IAssert(! dest.IsKey(cp));
522  TIntV cps; reader.ParseCodePointList(mapsTo, cps); IAssert(cps.Len() > 0);
523  dest.AddDat(cp, cps); }
524  else
525  FailR(status.CStr());
526  }
527  printf("TUniCaseFolding(\"%s\"): %d common, %d simple, %d full, %d Turkic.\n",
528  fileName.CStr(), cfCommon.Len(), cfSimple.Len(), cfFull.Len(), cfTurkic.Len());
529 }
#define IAssert(Cond)
Definition: bd.h:262
TIntIntVH cfFull
Definition: unicode.h:275
void Clr()
Definition: unicode.h:288
bool GetNextLine(TStrV &dest)
Definition: unicode.h:1686
void Open(const TStr &fileName)
Definition: unicode.h:1683
#define FailR(Reason)
Definition: bd.h:240
TIntH cfTurkic
Definition: unicode.h:274
static void ParseCodePointList(const TStr &s, TIntV &dest, bool ClrDestP=true)
Definition: unicode.h:1697
TIntH cfCommon
Definition: unicode.h:274
Definition: dt.h:412
TIntH cfSimple
Definition: unicode.h:274
char * CStr()
Definition: dt.h:476
bool IsKey(const TKey &Key) const
Definition: hash.h:216
int Len() const
Definition: hash.h:186
TDat & AddDat(const TKey &Key)
Definition: hash.h:196
static int ParseCodePoint(const TStr &s)
Definition: unicode.h:1695
Vector is a sequence TVal objects representing an array that can change in size.
Definition: ds.h:429
void TUniCaseFolding::Save ( TSOut SOut) const
inline

Definition at line 287 of file unicode.h.

287 { cfCommon.Save(SOut); cfSimple.Save(SOut); cfFull.Save(SOut); cfTurkic.Save(SOut); SOut.SaveCs(); }
void Save(TSOut &SOut) const
Definition: hash.h:141
TIntIntVH cfFull
Definition: unicode.h:275
void SaveCs()
Definition: fl.h:171
TIntH cfTurkic
Definition: unicode.h:274
TIntH cfCommon
Definition: unicode.h:274
TIntH cfSimple
Definition: unicode.h:274
void TUniCaseFolding::Test ( const TIntV src,
const TIntV expectedDest,
const bool  full,
const bool  turkic,
FILE *  f 
)
protected

Definition at line 531 of file unicode.cpp.

532 {
533  fprintf(f, "TUniCaseFolding(%s%s): ", (full ? "full" : "simple"), (turkic ? ", turkic" : ""));
534  for (int i = 0; i < src.Len(); i++) fprintf(f, " %04x", int(src[i]));
535  TIntV dest; Fold(src, 0, src.Len(), dest, true, full, turkic);
536  fprintf(f, "\n -> ");
537  for (int i = 0; i < dest.Len(); i++) fprintf(f, " %04x", int(dest[i]));
538  fprintf(f, "\n");
539  IAssert(dest.Len() == expectedDest.Len());
540  for (int i = 0; i < dest.Len(); i++) IAssert(dest[i] == expectedDest[i]);
541 }
#define IAssert(Cond)
Definition: bd.h:262
TSizeTy Len() const
Returns the number of elements in the vector.
Definition: ds.h:547
void Fold(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool full, const bool turkic) const
Definition: unicode.h:293
void TUniCaseFolding::Test ( )

Definition at line 549 of file unicode.cpp.

550 {
551  FILE *f = stderr;
552  TVectorBuilder VB;
553  // simple
554  Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x69, 0x63, 0xdf), false, false, f);
555  // simple + turkic
556  Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x131, 0x63, 0xdf), false, true, f);
557  // full
558  Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x69, 0x63, 0x73, 0x73), true, false, f);
559  // full + turkic
560  Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x131, 0x63, 0x73, 0x73), true, true, f);
561 }

Friends And Related Function Documentation

friend class TUniChDb
friend

Definition at line 280 of file unicode.h.

Member Data Documentation

TIntH TUniCaseFolding::cfCommon
protected

Definition at line 274 of file unicode.h.

TIntIntVH TUniCaseFolding::cfFull
protected

Definition at line 275 of file unicode.h.

TIntH TUniCaseFolding::cfSimple
protected

Definition at line 274 of file unicode.h.

TIntH TUniCaseFolding::cfTurkic
protected

Definition at line 274 of file unicode.h.


The documentation for this class was generated from the following files: