19 operator TIntV()
const {
return v; }
34 int n = s.
Len();
if (n <= 0)
return t;
35 if (s[n - 1] ==
'\\' || s[n - 1] ==
'/' || s[n - 1] ==
':')
return s + t;
41 const int n = v1.
Len();
42 bool ok = (n == v2.
Len());
43 if (ok)
for (
int i = 0; i < n && ok; i++) ok = ok && (v1[i] == v2[i]);
47 fprintf(f,
"%s: [", explanation.
CStr());
48 for (
int i = 0; i < v1.
Len(); i++) fprintf(f,
"%s%04x", (i == 0 ?
"" :
" "), int(v1[i]));
50 for (
int i = 0; i < v2.
Len(); i++) fprintf(f,
"%s%04x", (i == 0 ?
"" :
" "), int(v2[i]));
74 uint range = maxVal - minVal + 1;
75 if (range > (
uint(1) << (8 *
sizeof(
uint) - 1)))
76 while (
true) {
uint u =
GetRndUint(rnd);
if (u < range)
return minVal + u; }
78 while (mask < range) mask <<= 1;
80 while (
true) {
uint u =
GetRndUint(rnd) & mask;
if (u < range)
return minVal + u; }
85 static bool isLE, initialized =
false;
86 if (initialized)
return isLE;
88 if(*(
char *)&i == 1) isLE =
true;
103 fprintf(f,
"Settings: %s %s %s replacementChar = %x\n",
106 fprintf(f,
"src: ");
for (
int i = 0; i < src.
Len(); i++) fprintf(f, (decode ?
" %02x" :
" %x"),
uint(src[i])); }
111 fprintf(f,
"\n -> dest: ");
for (
int i = 0; i < dest.
Len(); i++) fprintf(f, (decode ?
" %x" :
" %02x"),
uint(dest[i]));
112 fprintf(f,
"\n expDest ");
for (
int i = 0; i < expectedDest.
Len(); i++) fprintf(f, (decode ?
" %x" :
" %02x"),
uint(expectedDest[i]));
113 fprintf(f,
"\n retVal = %llu (expected %llu)\n", static_cast<long long unsigned int> (retVal), static_cast<long long unsigned int> (expectedRetVal)); }
114 if (retVal != expectedRetVal)
117 if (dest.
Len() != expectedDest.
Len())
120 for (
int i = 0; i < dest.
Len(); i++)
IAssert(dest[i] == expectedDest[i]);
125 fprintf(f,
"\n -> expDest ");
for (
int i = 0; i < expectedDest.
Len(); i++) fprintf(f,
" %x",
uint(expectedDest[i]));
135 TIntV src;
TIntV expectedDest;
int expectedRetVal = 0;
136 bool expectedAbort =
false;
143 for (
int i = 0; i < testCaseDesc.
Len(); )
146 const char c = testCaseDesc[i], d = testCaseDesc[i + 1]; i += 2;
147 uint cp = 0;
int nBytes = -1, minBytes = -1;
bool eighties =
false;
148 IAssert(
'1' <= d && d <=
'6'); nBytes = d -
'0';
149 if (c ==
'A') { cp =
GetRndUint(rnd, 0u, 0x7fu); minBytes = 1; }
150 else if (c ==
'B') { cp =
GetRndUint(rnd, 0x80u, 0x7ffu); minBytes = 2; }
151 else if (c ==
'C') { cp =
GetRndUint(rnd, 0x800u, 0xffffu); minBytes = 3; }
152 else if (c ==
'D') { cp =
GetRndUint(rnd, 0x10000u, 0x10ffffu); minBytes = 4; }
153 else if (c ==
'E') { cp =
GetRndUint(rnd, 0x110000u, 0x1fffffu); minBytes = 4; }
154 else if (c ==
'F') { cp =
GetRndUint(rnd, 0x200000u, 0x3ffffffu); minBytes = 5; }
155 else if (c ==
'G') { cp =
GetRndUint(rnd, 0x4000000u, 0x7fffffffu); minBytes = 6; }
156 else if (c ==
'H') { cp =
GetRndUint(rnd, 0x80000000u, 0xffffffffu); minBytes = 6; }
157 else if (c ==
'X') { cp = 0xfffe; minBytes = 3; }
158 else if (c ==
'Y') { cp = 0xfeff; minBytes = 3; }
159 else if (c ==
'Z') { eighties =
true; minBytes = 1; }
164 if (i < testCaseDesc.
Len()) {
165 const char e = testCaseDesc[i];
166 if (e >=
'a' && e <=
'e') { i += 1; nToDel = e -
'a' + 1; }}
169 bool errHere =
false;
170 if (eighties) errHere =
true;
171 else if (nToDel > 0) errHere =
true;
172 else if (
strict && (cp >= 0x10ffff || nBytes > minBytes)) errHere =
true;
174 if (! expectedAbort) {
176 if (src.
Len() == 0 && (cp == 0xfffe || cp == 0xfeff) &&
skipBom) { }
177 else { expectedDest.
Add(cp); expectedRetVal += 1; } }
183 if (eighties)
for (
int j = 0; j < nBytes; j++) src.
Add(
GetRndUint(rnd, 0x80, 0xff));
184 else if (nBytes == 1) src.
Add(cp);
186 int mask = (1 << nBytes) - 1; mask <<= (8 - nBytes);
187 src.
Add(mask | (
uint(cp) >> (6 * (nBytes - 1))));
188 for (
int j = 1; j < nBytes - nToDel; j++) src.
Add(0x80 | ((cp >> (6 * (nBytes - j - 1))) & _0011_1111)); }
190 if (f) fprintf(f,
"Test case: \"%s\"\n", testCaseDesc.
CStr());
197 for (
int skipBom_ = 0; skipBom_ < 2; skipBom_++)
198 for (
int strict_ = 0; strict_ < 2; strict_++)
199 for (
int errMode_ = 0; errMode_ < 4; errMode_++)
204 for (
int i = 0; i < 10; i++)
206 TestDecodeUtf8(rnd,
"X3A1A2A3A4A5A6B2B3B4B5B6C3C4C5C6D4D5D6E5E6F6G6");
219 TestDecodeUtf8(rnd,
"X3A2aA3aA4aA5aA6aB2aB3aB4aB5aB6aC3aC4aC5aC6aD4aD5aD6aE5aE6aF6aG6a");
220 TestDecodeUtf8(rnd,
"X3A3bA4bA5bA6aB3bB4bB5bB6bC3bC4bC5bC6bD4bD5bD6bE5bE6bF6bG6b");
221 TestDecodeUtf8(rnd,
"X3A4cA5cA6cB4cB5cB6cC4cC5cC6cD4cD5cD6cE5cE6cF6cG6c");
227 TIntV src, expectedDest, src2;
229 for (
int pow = 8; pow <= 32; pow++)
232 if (pow == 8) uFrom = 0, uTo = 1u << pow;
234 else uFrom = (1u << pow) - (1u << 8), uTo = (1u << pow) + (1u << 8);
235 printf(
"%u..%u \r", uFrom, uTo);
236 for (
uint u = uFrom; ; u++)
239 if (u < (1u << 7)) nBytes = 1;
240 else if (u < (1u << 11)) nBytes = 2;
241 else if (u < (1u << 16)) nBytes = 3;
242 else if (u < (1u << 21)) nBytes = 4;
243 else if (u < (1u << 26)) nBytes = 5;
246 if (nBytes == 1) src[0] = u;
248 src[0] = (((1 << nBytes) - 1) << (8 - nBytes)) | (u >> (6 * (nBytes - 1)));
249 for (
int i = 1; i < nBytes; i++) src[i] = 0x80 | ((u >> (6 * (nBytes - i - 1))) & _0011_1111); }
250 bool err = (
strict && u > 0x10ffff);
253 else if (! err) expectedDest.
Add(u);
254 int erv = (err ? 0 : 1);
255 if (
skipBom && (u == 0xfeff || u == 0xfffe)) expectedDest.
Clr(), erv = 0;
261 else src.
Clr(
false); }
278 for (
int i = 0; i < src.
Len(); i++) {
279 int c = src[i] & 0xffff;
280 if (isLE) { dest.
Add(c & 0xff); dest.
Add((c >> 8) & 0xff); }
281 else { dest.
Add((c >> 8) & 0xff); dest.
Add(c & 0xff); } }
288 TIntV srcBytes, expectedDestBytes;
292 fprintf(f,
"Settings: %s %s %s %s %s replacementChar = %x \n",
294 (
strict ?
"STRICT" :
""), (decode ? (
skipBom ?
"skipBom" :
"") : (insertBom ?
"insrtBom" :
"")),
295 (bomHandling ==
bomAllowed ?
"bomAllowed" : bomHandling ==
bomRequired ?
"bomRequired" :
"bomIgnored"),
296 (defaultByteOrder ==
boBigEndian ?
"boBigEndian" : defaultByteOrder ==
boLittleEndian ?
"boLittleEndian" :
"boMachineEndian"),
298 fprintf(f,
"src: ");
for (
int i = 0; i < src.
Len(); i++) fprintf(f, (decode ?
" %04x" :
" %x"),
uint(src[i])); }
299 for (
int useBytes = 0; useBytes < 2; useBytes++)
301 const char *fmt = (useBytes ?
" %02x" :
" %04x");
310 if (decode) retVal =
DecodeUtf16FromBytes(srcBytes, 0, srcBytes.
Len(), dest,
true, bomHandling, defaultByteOrder);
312 const TIntV& ed = (useBytes && ! decode ? expectedDestBytes : expectedDest);
314 fprintf(f,
"\n -> dest: ");
for (
int i = 0; i < dest.
Len(); i++) fprintf(f, (decode ?
" %x" : fmt),
uint(dest[i]));
315 fprintf(f,
"\n expDest ");
for (
int i = 0; i < ed.
Len(); i++) fprintf(f, (decode ?
" %x" : fmt),
uint(ed[i]));
316 fprintf(f,
"\n retVal = %llu (expected %llu)\n", static_cast<long long unsigned int> (retVal), static_cast<long long unsigned int> (expectedRetVal)); }
318 if (retVal != expectedRetVal) ok =
false;
319 if (dest.
Len() != ed.
Len()) ok =
false;
320 if (ok)
for (
int i = 0; i < dest.
Len(); i++)
if (dest[i] != ed[i]) ok =
false;
327 for (
int i = 0; i < dest.
Len(); i++)
IAssert(dest[i] == ed[i]);
332 fprintf(f,
"\n -> expDest ");
for (
int i = 0; i < expectedDest.
Len(); i++) fprintf(f, (decode ?
" %x" : fmt),
uint(expectedDest[i]));
344 const bool insertBom)
346 TIntV src;
TIntV expectedDest;
int expectedRetVal = 0;
347 bool expectedAbort =
false;
351 bool swap = (isMachineLe != isDefaultLe);
353 src.
Add(swap ? 0xfffe : 0xfeff);
354 if (!
skipBom) { expectedRetVal += 1; expectedDest.
Add(0xfeff); } }
356 expectedAbort =
true; expectedRetVal = -1; }
361 for (
int i = 0; i < testCaseDesc.
Len(); )
363 const char c = testCaseDesc[i++];
364 uint cp = 0;
int nWords = -1;
365 if (c ==
'X' || c ==
'Y')
IAssert(i > 1);
370 else if (c ==
'E') { cp =
GetRndUint(rnd, 0x10000u, 0x10ffffu); nWords = 2; }
371 else if (c ==
'X') { cp = 0xfffe; nWords = 1; }
372 else if (c ==
'Y') { cp = 0xfeff; nWords = 1; }
374 if (c ==
'B' && i < testCaseDesc.
Len())
IAssert(testCaseDesc[i] !=
'C');
377 if (i < testCaseDesc.
Len()) {
378 const char e = testCaseDesc[i];
379 if (e >=
'a') { i += 1; nToDel = 1; }}
380 IAssert((nWords == 1 && nToDel == 0) || (nWords == 2 && (nToDel == 0 || nToDel == 1)));
381 if (nWords == 2 && nToDel == 1 && i < testCaseDesc.
Len())
IAssert(testCaseDesc[i] !=
'C');
383 bool errHere =
false;
385 else if (cp > 0x10ffff) {
Fail; errHere =
true; }
386 else if (nToDel > 0) errHere =
true;
389 if (! expectedAbort) {
391 if (src.
Len() == 0 && (cp == 0xfffe || cp == 0xfeff) &&
skipBom) { }
392 else { expectedDest.
Add(cp); expectedRetVal += 1; } }
404 if (f) fprintf(f,
"Test case: \"%s\"\n", testCaseDesc.
CStr());
411 for (
int skipBom_ = 0; skipBom_ < 2; skipBom_++)
412 for (
int strict_ = 0; strict_ < 2; strict_++)
413 for (
int errMode_ = 0; errMode_ < 4; errMode_++)
414 for (
int bomHandling_ = 0; bomHandling_ < 3; bomHandling_++)
415 for (
int byteOrder_ = 0; byteOrder_ < 3; byteOrder_++)
416 for (
int insertBom_ = 0; insertBom_ < 2; insertBom_++)
419 bool insertBom = (insertBom_ == 1);
424 for (
int i = 0; i < 10; i++)
429 TestDecodeUtf16(rnd,
"DDAADADAAADDDAA", bomHandling, byteOrder, insertBom);
430 TestDecodeUtf16(rnd,
"DEEEDAAEEDADEEAAEEADEEDDAA", bomHandling, byteOrder, insertBom);
431 TestDecodeUtf16(rnd,
"DEaEaEDAAEaEDADEaEAAEEADEEDDAA", bomHandling, byteOrder, insertBom);
432 TestDecodeUtf16(rnd,
"CABDEBACCEaB", bomHandling, byteOrder, insertBom);
433 TestDecodeUtf16(rnd,
"EaEEEEaBBACABXABYXXEaYDDXBDCEA", bomHandling, byteOrder, insertBom);
434 TestDecodeUtf16(rnd,
"EaEEEEaBDCAAXADYXXEaYDDXDCEA", bomHandling, byteOrder, insertBom);
439 TIntV src, expectedDest, src2;
441 for (
int pow = 8; pow <= 32; pow++)
444 if (pow == 8) uFrom = 0, uTo = 1u << pow;
446 else uFrom = (1u << pow) - (1u << 8), uTo = (1u << pow) + (1u << 8);
447 printf(
"%u..%u \r", uFrom, uTo);
448 for (
uint u = uFrom; ; u++)
451 if (u < 0x10000) nWords = 1;
454 bool swap = (isMachineLe != isDestLe);
456 src.
Gen(3, (err ? 0 : nWords) + (insertBom ? 1 : 0));
457 if (insertBom) src[0] = (swap ? 0xfffe : 0xfeff);
461 if (nWords == 1) src[insertBom ? 1 : 0] = (swap ?
SwapBytes(u) : u);
464 int u2 = Utf16SecondSurrogate + ((u - 0x10000) & 1023);
465 src[insertBom ? 1 : 0] = (swap ?
SwapBytes(u1) : u1);
466 src[insertBom ? 2 : 1] = (swap ?
SwapBytes(u2) : u2); }
467 if (! ((u == 0xfffe || u == 0xfeff) && bomHandling ==
bomAllowed && ! insertBom))
470 if (insertBom && !
skipBom) expectedDest.
Add(0xfeff);
472 else if (! err) expectedDest.
Add(u);
473 int erv = (err ? 0 : expectedDest.
Len());
474 if (
skipBom && (u == 0xfeff || u == 0xfffe) && ! insertBom) expectedDest.
Clr(), erv = 0;
477 expectedDest.
Clr(
false);
478 if (u == 0xfeff || u == 0xfffe) { erv = (
skipBom ? 0 : 1);
if (!
skipBom) expectedDest.
Add(0xfeff); }
479 else { erv = -1; errD =
true;
487 src.
Clr(
false);
if (insertBom) src.
Add(swap ? 0xfffe : 0xfeff);
513 const TStr status = fields[1], mapsTo = fields[2];
514 if (status ==
"C" || status ==
"S" || status ==
"T") {
519 else if (status ==
"F") {
527 printf(
"TUniCaseFolding(\"%s\"): %d common, %d simple, %d full, %d Turkic.\n",
533 fprintf(f,
"TUniCaseFolding(%s%s): ", (full ?
"full" :
"simple"), (turkic ?
", turkic" :
""));
534 for (
int i = 0; i < src.
Len(); i++) fprintf(f,
" %04x",
int(src[i]));
535 TIntV dest;
Fold(src, 0, src.
Len(), dest,
true, full, turkic);
536 fprintf(f,
"\n -> ");
537 for (
int i = 0; i < dest.
Len(); i++) fprintf(f,
" %04x",
int(dest[i]));
540 for (
int i = 0; i < dest.
Len(); i++)
IAssert(dest[i] == expectedDest[i]);
554 Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x69, 0x63, 0xdf),
false,
false, f);
556 Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x131, 0x63, 0xdf),
false,
true, f);
558 Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x69, 0x63, 0x73, 0x73),
true,
false, f);
560 Test((VB, 0x41, 0x62, 0x49, 0x43, 0xdf), (VB, 0x61, 0x62, 0x131, 0x63, 0x73, 0x73),
true,
true, f);
583 for (
int i = 0; i < n; i++) isIgnored[i] =
IsWbIgnored(src[i]);
584 TIntV prevNonIgnored, nextNonIgnored, curOrNextNonIgnored;
585 prevNonIgnored.
Gen(n); nextNonIgnored.
Gen(n); curOrNextNonIgnored.
Gen(n);
587 for (
int srcIdx = 0; srcIdx < n; srcIdx++)
for (
int srcLen = 1; srcLen < n - srcIdx; srcLen++)
590 for (
int i = 0; i < srcLen; i++) {
591 prevNonIgnored[i] = prev;
592 if (! isIgnored[srcIdx + i]) prev = srcIdx + i; }
593 int next = srcIdx + srcLen;
594 for (
int i = srcLen - 1; i >= 0; i--) {
595 nextNonIgnored[i] = next;
596 if (! isIgnored[srcIdx + i]) next = srcIdx + i;
597 curOrNextNonIgnored[i] = next; }
599 fprintf(f,
"\nIndex: ");
for (
int i = 0; i < srcLen; i++) fprintf(f,
" %2d", srcIdx + i);
600 fprintf(f,
"\nNonIgn: ");
for (
int i = 0; i < srcLen; i++) fprintf(f,
" %s", (isIgnored[srcIdx + i] ?
" ." :
" Y"));
601 fprintf(f,
"\nPrevNI: ");
for (
int i = 0; i < srcLen; i++) fprintf(f,
" %2d",
int(prevNonIgnored[i]));
602 fprintf(f,
"\nNextNI: ");
for (
int i = 0; i < srcLen; i++) fprintf(f,
" %2d",
int(nextNonIgnored[i]));
603 fprintf(f,
"\nCurNextNI: ");
for (
int i = 0; i < srcLen; i++) fprintf(f,
" %2d",
int(curOrNextNonIgnored[i]));
605 for (
int i = 0; i < srcLen; i++)
609 IAssert(s ==
size_t(nextNonIgnored[i]));
611 IAssert(s ==
size_t(curOrNextNonIgnored[i]));
613 if (prevNonIgnored[i] < 0) {
IAssert(! ok);
IAssert(s ==
size_t(srcIdx)); }
621 TIntV chIgnored, chNonIgnored;
625 if (f) fprintf(f,
"%04x: flags %08x props %08x %08x script \"%s\"\n", cp,
629 chIgnored.
Sort(); chNonIgnored.
Sort();
630 printf(
"TUniChDb::TestWbNonIgnored: %d ignored, %d nonignored chars.\n", chIgnored.
Len(), chNonIgnored.
Len());
632 for (
int iter = 0; iter <= 50; iter++)
634 int percIgnored = 2 * iter;
635 for (
int n = 0; n <= 20; n++)
639 for (
int i = 0; i < n; i++) {
640 TIntV& chars = (rnd.GetUniDevInt(100) < percIgnored) ? chIgnored : chNonIgnored;
641 int j = rnd.GetUniDevInt(chars.
Len());
653 int nLines = 0;
TRnd rnd =
TRnd(123);
658 TStrV parts; fields[0].SplitOnWs(parts);
659 const int n = parts.
Len();
IAssert((n % 2) == 1);
660 TIntV chars;
TBoolV isBreak, isPredicted, isPredicted2;
664 for (
int i = 0; i < n; i++)
666 const TStr& s = parts[i];
670 else if (s ==
"\xc3\xb7")
677 isPredicted.
Gen(m + 1); isPredicted.
PutAll(
false);
678 if (
AlwaysFalse()) { printf(
"%3d", nLines);
for (
int i = 0; i < m; i++) printf(
" %04x",
int(chars[i])); printf(
"\n"); }
681 for (
int nBefore = 0; nBefore < 5; nBefore++)
686 size_t position = (nBefore > 0 ? nBefore - 1 : nBefore);
size_t prevPosition = position;
689 IAssert(prevPosition < position);
690 IAssert(position <=
size_t(nBefore + m));
691 isPredicted[int(position) - nBefore] =
true;
692 prevPosition = position;
694 IAssert(position ==
size_t(nBefore + m));
701 if (nBefore == 0) isPredicted[0] =
true;
703 for (
int i = 0; i <= m; i++) {
704 if (isBreak[i] != isPredicted[i]) ok =
false;
705 IAssert(isPredicted2[i] == isPredicted[i]); }
709 fprintf(f,
"\nError in line %d:\n", nLines);
710 fprintf(f,
"True: ");
711 for (
int i = 0; i <= m; i++) {
712 fprintf(f,
"%s ", (isBreak[i] ?
"|" :
"."));
713 if (i < m) fprintf(f,
"%04x ",
int(chars[i + nBefore])); }
714 fprintf(f,
"\nPredicted: ");
715 for (
int i = 0; i <= m; i++) {
716 fprintf(f,
"%s ", (isPredicted[i] ?
"|" :
"."));
718 const int cp = chars[i + nBefore];
721 fprintf(f,
"%4s ", s.
CStr()); }}
727 for (
int i = 0; i < m; i++) {
730 IAssert(
size_t(i + nBefore) < position);
IAssert(position <=
size_t(nBefore + m));
732 for (
int j = i + 1; j < int(position); j++)
734 IAssert(isBreak[
int(position)]); }
738 printf(
"TUniChDb::TestFindNext%sBoundary: %d lines processed.\n", (sentence ?
"Sentence" :
"Word"), nLines);
749 bool inPart1 =
false;
TIntH testedInPart1;
753 if (fields.
Len() == 1) {
754 IAssert(fields[0].IsPrefix(
"@Part"));
755 inPart1 = (fields[0] ==
"@Part1");
continue; }
758 TIntV c1, c2, c3, c4, c5;
765 #define AssE_(v1, v2, expl) AssertEq(v1, v2, TStr(expl) + " (line " + TInt::GetStr(nLines) + ")", 0)
766 #define NFC_(cmpWith, operand) DecomposeAndCompose(operand, 0, operand.Len(), v, false); AssE_(cmpWith, v, #cmpWith " == NFC(" #operand ")")
767 #define NFD_(cmpWith, operand) Decompose(operand, 0, operand.Len(), v, false); AssE_(cmpWith, v, #cmpWith " == NFD(" #operand ")")
768 #define NFKC_(cmpWith, operand) DecomposeAndCompose(operand, 0, operand.Len(), v, true); AssE_(cmpWith, v, #cmpWith " == NFKC(" #operand ")")
769 #define NFKD_(cmpWith, operand) Decompose(operand, 0, operand.Len(), v, true); AssE_(cmpWith, v, #cmpWith " == NFKD(" #operand ")")
797 testedInPart1.
AddKey(c1[0]); }
804 const int cp =
h.
GetKey(i), nLines = -1;
805 if (testedInPart1.
IsKey(cp))
continue;
818 printf(
"TUniChDb::TestComposition: %d lines processed + %d other individual codepoints.\n", nLines, nOther);
826 const TStr& trueTc,
const TStr& trueUc,
827 bool turkic,
bool lithuanian)
832 for (
int i = 0; i < 3; i++)
839 bool ok = (dest.
Len() == trueDest.
Len());
840 if (ok)
for (
int i = 0; i < dest.
Len() && ok; i++) ok = ok && (dest[i] == trueDest[i]);
842 fprintf(f,
"%s(", (how ==
ccLower ?
"toLowercase" : how ==
ccTitle ?
"toTitlecase" :
"toUppercase"));
843 for (
int i = 0; i < src.
Len(); i++) fprintf(f,
"%s%04x", (i == 0 ?
"" :
" "), int(src[i]));
844 fprintf(f,
")\nCorrect: (");
845 for (
int i = 0; i < trueDest.
Len(); i++) fprintf(f,
"%s%04x", (i == 0 ?
"" :
" "), int(trueDest[i]));
846 fprintf(f,
")\nOur output:(");
847 for (
int i = 0; i < dest.
Len(); i++) fprintf(f,
"%s%04x", (i == 0 ?
"" :
" "), int(dest[i]));
858 const TStr F =
"0046 ", L =
"004C ", S =
"0053 ", T =
"0054 ", W =
"0057 ";
859 const TStr f =
"0066 ", l =
"006c ", s =
"0073 ", t =
"0074 ", w =
"0077 ";
860 const TStr ss =
"00df ", ffl =
"fb04 ", longs =
"017f ", longst =
"fb05 ", wRing =
"1e98 ", Ring =
"030a ";
861 const TStr DZ =
"01c4 ", Dz =
"01c5 ", dz =
"01c6 ";
862 const TStr space =
"0020 ", Grave =
"0300 ";
864 F + L + s + t + space + Dz + w + T + ss + wRing + space + longs + DZ + space + dz + longst,
865 f + l + s + t + space + dz + w + t + ss + wRing + space + longs + dz + space + dz + longst,
866 F + l + s + t + space + Dz + w + t + ss + wRing + space + S + dz + space + Dz + longst,
867 F + L + S + T + space + DZ + W + T + S + S + W + Ring + space + S + DZ + space + DZ + S + T,
870 const TStr I =
"0049 ", J =
"004a ", i =
"0069 ", j =
"006a ", iDotless =
"0131 ", IDot =
"0130 ", DotA =
"0307 ";
872 s + I + t + i + w + iDotless + f + IDot + l + space + iDotless + DotA + f + I + DotA + s,
873 s + i + t + i + w + iDotless + f + i + DotA + l + space + iDotless + DotA + f + i + DotA + s,
874 S + i + t + i + w + iDotless + f + i + DotA + l + space + I + DotA + f + i + DotA + s,
875 S + I + T + I + W + I + F + IDot + L + space + I + DotA + F + I + DotA + S,
878 const TStr Sigma =
"03a3 ", sigma =
"03c3 ", fsigma =
"03c2 ";
880 Sigma + s + space + s + Sigma + space + s + Sigma + s + space + Sigma + S + Sigma + space + Sigma,
881 sigma + s + space + s + fsigma + space + s + sigma + s + space + sigma + s + fsigma + space + sigma,
882 Sigma + s + space + S + fsigma + space + S + sigma + s + space + Sigma + s + fsigma + space + Sigma,
883 Sigma + S + space + S + Sigma + space + S + Sigma + S + space + Sigma + S + Sigma + space + Sigma,
886 sigma + s + space + s + sigma + space + s + sigma + s + space + sigma + S + sigma + space + sigma,
887 sigma + s + space + s + sigma + space + s + sigma + s + space + sigma + s + sigma + space + sigma,
888 Sigma + s + space + S + sigma + space + S + sigma + s + space + Sigma + s + sigma + space + Sigma,
889 Sigma + S + space + S + Sigma + space + S + Sigma + S + space + Sigma + S + Sigma + space + Sigma,
892 fsigma + s + space + s + fsigma + space + s + fsigma + s + space + fsigma + S + fsigma + space + fsigma,
893 fsigma + s + space + s + fsigma + space + s + fsigma + s + space + fsigma + s + fsigma + space + fsigma,
894 Sigma + s + space + S + fsigma + space + S + fsigma + s + space + Sigma + s + fsigma + space + Sigma,
895 Sigma + S + space + S + Sigma + space + S + Sigma + S + space + Sigma + S + Sigma + space + Sigma,
897 const TStr nonSA =
"0315 0321 0322 ";
901 s + I + t + i + w + iDotless + f + IDot + l + space + iDotless + DotA + f + I + DotA + J + DotA + I + Grave + DotA + I + DotA + DotA + I + nonSA + DotA + s,
902 s + iDotless + t + i + w + iDotless + f + i + l + space + iDotless + DotA + f + i + j + DotA + iDotless + Grave + DotA + i + DotA + i + nonSA + s,
903 S + iDotless + t + i + w + iDotless + f + i + l + space + I + DotA + f + i + j + DotA + iDotless + Grave + DotA + i + DotA + i + nonSA + s,
904 S + I + T + IDot + W + I + F + IDot + L + space + I + DotA + F + I + DotA + J + DotA + I + Grave + DotA + I + DotA + DotA + I + nonSA + DotA + S,
908 I + Grave + t + I + DotA + f + I + nonSA + DotA + j + space + I + nonSA + DotA + space + I + Grave + t,
909 iDotless + Grave + t + i + f + i + nonSA + j + space + i + nonSA + space + iDotless + Grave + t,
910 I + Grave + t + i + f + i + nonSA + j + space + I + nonSA + DotA + space + I + Grave + t,
911 I + Grave + T + I + DotA + F + I + nonSA + DotA + J + space + I + nonSA + DotA + space + I + Grave + T,
916 i + DotA + t + i + Grave + DotA + f + i + DotA + DotA + f + i + nonSA + DotA + I + DotA + t + DotA + i + DotA + Grave,
917 i + DotA + t + i + Grave + DotA + f + i + DotA + DotA + f + i + nonSA + DotA + i + DotA + DotA + t + DotA + i + DotA + Grave,
918 I + t + i + Grave + DotA + f + i + DotA + DotA + f + i + nonSA + DotA + i + DotA + DotA + t + DotA + i + DotA + Grave,
919 I + T + I + Grave + DotA + F + I + DotA + F + I + nonSA + I + DotA + T + DotA + I + Grave,
923 J + Grave + space + J + nonSA + DotA + space + j + Grave + space + j + DotA + space + J + nonSA + J + nonSA + Grave + space + j + nonSA,
924 j + DotA + Grave + space + j + DotA + nonSA + DotA + space + j + Grave + space + j + DotA + space + j + nonSA + j + DotA + nonSA + Grave + space + j + nonSA,
925 J + Grave + space + J + nonSA + DotA + space + J + Grave + space + J + space + J + nonSA + j + DotA + nonSA + Grave + space + J + nonSA,
926 J + Grave + space + J + nonSA + DotA + space + J + Grave + space + J + space + J + nonSA + J + nonSA + Grave + space + J + nonSA,
939 if (s.
Empty())
return;
995 for (
int cp = from; cp <= to; cp++) {
1004 printf(
"TUniChDb::InitPropList: %d lines, %d code points.\n", nLines, nCps);
1029 else if (s ==
"Grapheme_Link")
continue;
1035 for (
int cp = from; cp <= to; cp++) {
1043 printf(
"TUniChDb::InitDerivedCoreProperties: %d lines, %d code points.\n", nLines, nCps);
1054 int nLines = 0, nCps = 0;
1061 if (us == xx)
continue;
1062 for (
int cp = from; cp <= to; cp++) {
1064 printf(
"TUniChDb::InitLineBreaks: warning, adding codepoint %d, its category will remain unknown.\n", cp); }
1066 h[i].lineBreak = us; nCps++; }
1070 printf(
"TUniChDb::InitLineBreaks: %d lines, %d codepoints processed (excluding \'xx\' values).\n", nLines, nCps);
1081 TStr scriptName = fields[1];
1084 IAssert(scriptNo >= 0 && scriptNo < SCHAR_MAX);
1087 for (
int cp = from; cp <= to; cp++) {
1094 printf(
"TUniChDb::InitScripts: %d scripts: ",
scripts.
Len());
1118 if (cp == 0xa || cp == 0xd || cp == 0x85 || cp == 0x2028 || cp == 0x2029) ci.
SetSbFlag(
ucfSbSep);
1134 TIntV v = (
VB, 0x3031, 0x3032, 0x3033, 0x3034, 0x3035, 0x309b, 0x309c, 0x30a0, 0x30fc, 0xff70, 0xff9e, 0xff9f);
1136 v = (
VB, 0x27, 0xb7, 0x5f4, 0x2019, 0x2027, 0x3a);
1164 for (
int c = from; c <= to; c++) {
1166 else hh[i].Val |= flag; }
1172 for (
int i = 0; i < cps.
Len(); i++)
1176 int flags2 = 0;
if (hh.
IsKey(cp)) flags2 = hh.
GetDat(cp);
1178 if (flags1 != flags2) {
1179 printf(
"cp = %04x: flags1 = %08x flags2 = %08x xor = %08x\n", cp, flags1, flags2, flags1 ^ flags2);
1194 else if (s ==
"Sp") flag =
ucfSbSp;
1203 for (
int c = from; c <= to; c++) {
1205 else hh[i].Val |= flag; }
1211 for (
int i = 0; i < cps.
Len(); i++)
1215 int flags2 = 0;
if (hh.
IsKey(cp)) flags2 = hh.
GetDat(cp);
1216 if (flags1 != flags2) {
1217 printf(
"cp = %04x: flags1 = %08x [%s] flags2 = %08x [%s] xor = %08x\n", cp,
1234 TStr conditions =
"";
1235 if (fields.
Len() == 6) conditions = fields[4];
1236 conditions.ToTrunc();
if (! conditions.Empty())
continue;
1308 int nExclusionTable = 0;
1327 if (n != 2)
continue;
1333 printf(
"TUniChDb(%s): %d chars in h, %d in decomp inverse index; %d in decomp vector; %d in exclusion table\n",
1341 int j =
h.
GetKeyId(cp);
if (j < 0)
continue;
1347 const int oldHLen =
h.
Len();
1416 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
1417 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
1418 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
1419 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
1420 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
1421 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9
1425 0x00a0, -1, -1, -1, 0x00a4, -1, -1, 0x00a7, 0x00a8, -1, -1, -1, -1, 0x00ad, -1, -1,
1426 0x00b0, -1, -1, -1, 0x00b4, -1, -1, -1, 0x00b8, -1, -1, -1, -1, -1, -1, -1,
1427 -1, 0x00c1, 0x00c2, -1, 0x00c4, -1, -1, 0x00c7, -1, 0x00c9, -1, 0x00cb, -1, 0x00cd, 0x00ce, -1,
1428 -1, -1, -1, 0x00d3, 0x00d4, -1, 0x00d6, 0x00d7, -1, -1, 0x00da, -1, 0x00dc, 0x00dd, -1, 0x00df,
1429 -1, 0x00e1, 0x00e2, -1, 0x00e4, -1, -1, 0x00e7, -1, 0x00e9, -1, 0x00eb, -1, 0x00ed, 0x00ee, -1,
1430 -1, -1, -1, 0x00f3, 0x00f4, -1, 0x00f6, 0x00f7, -1, -1, 0x00fa, -1, 0x00fc, 0x00fd, -1, -1,
1431 -1, -1, 0x00c3, 0x00e3, 0x00a1, 0x00b1, 0x00c6, 0x00e6, -1, -1, -1, -1, 0x00c8, 0x00e8, 0x00cf, 0x00ef,
1432 0x00d0, 0x00f0, -1, -1, -1, -1, -1, -1, 0x00ca, 0x00ea, 0x00cc, 0x00ec, -1, -1, -1, -1,
1433 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ,
1434 -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x00c5, 0x00e5, -1, -1, 0x00a5, 0x00b5, -1,
1435 -1, 0x00a3, 0x00b3, 0x00d1, 0x00f1, -1, -1, 0x00d2, 0x00f2, -1, -1, -1, -1, -1, -1, -1,
1436 0x00d5, 0x00f5, -1, -1, 0x00c0, 0x00e0, -1, -1, 0x00d8, 0x00f8, 0x00a6, 0x00b6, -1, -1, 0x00aa, 0x00ba,
1437 0x00a9, 0x00b9, 0x00de, 0x00fe, 0x00ab, 0x00bb, -1, -1, -1, -1, -1, -1, -1, -1, 0x00d9, 0x00f9,
1438 0x00db, 0x00fb, -1, -1, -1, -1, -1, -1, -1, 0x00ac, 0x00bc, 0x00af, 0x00bf, 0x00ae, 0x00be, -1
1442 -1, -1, -1, -1, -1, -1, -1, 0x00b7, -1, -1, -1, -1, -1, -1, -1, -1,
1443 -1, -1, -1, -1, -1, -1, -1, -1, 0x00a2, 0x00ff, -1, 0x00b2, -1, 0x00bd, -1, -1
1451 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, -1, 0x0124, 0x00a7, 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, -1, 0x017b,
1452 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, -1, 0x017c,
1453 0x00c0, 0x00c1, 0x00c2, -1, 0x00c4, 0x010a, 0x0108, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
1454 -1, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
1455 0x00e0, 0x00e1, 0x00e2, -1, 0x00e4, 0x010b, 0x0109, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
1456 -1, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9
1460 0x00a0, -1, -1, 0x00a3, 0x00a4, -1, -1, 0x00a7, 0x00a8, -1, -1, -1, -1, 0x00ad, -1, -1,
1461 0x00b0, -1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, -1, 0x00b7, 0x00b8, -1, -1, -1, -1, 0x00bd, -1, -1,
1462 0x00c0, 0x00c1, 0x00c2, -1, 0x00c4, -1, -1, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
1463 -1, 0x00d1, 0x00d2, 0x00d3, 0x00d4, -1, 0x00d6, 0x00d7, -1, 0x00d9, 0x00da, 0x00db, 0x00dc, -1, -1, 0x00df,
1464 0x00e0, 0x00e1, 0x00e2, -1, 0x00e4, -1, -1, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
1465 -1, 0x00f1, 0x00f2, 0x00f3, 0x00f4, -1, 0x00f6, 0x00f7, -1, 0x00f9, 0x00fa, 0x00fb, 0x00fc, -1, -1, -1,
1466 -1, -1, -1, -1, -1, -1, -1, -1, 0x00c6, 0x00e6, 0x00c5, 0x00e5, -1, -1, -1, -1,
1467 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x00d8, 0x00f8, 0x00ab, 0x00bb,
1468 0x00d5, 0x00f5, -1, -1, 0x00a6, 0x00b6, 0x00a1, 0x00b1, -1, -1, -1, -1, -1, -1, -1, -1,
1469 0x00a9, 0x00b9, -1, -1, 0x00ac, 0x00bc, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1470 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ,
1471 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x00de, 0x00fe, 0x00aa, 0x00ba,
1472 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x00dd, 0x00fd, -1, -1,
1473 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x00af, 0x00bf, -1, -1, -1,
1484 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
1485 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
1486 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
1487 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
1488 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
1489 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9
1493 0x00a0, -1, -1, -1, 0x00a4, -1, -1, 0x00a7, 0x00a8, -1, -1, -1, -1, 0x00ad, -1, 0x00af,
1494 0x00b0, -1, -1, -1, 0x00b4, -1, -1, -1, 0x00b8, -1, -1, -1, -1, -1, -1, -1,
1495 -1, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, -1, -1, 0x00c9, -1, 0x00cb, -1, 0x00cd, 0x00ce, -1,
1496 -1, -1, -1, -1, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 0x00d8, -1, 0x00da, 0x00db, 0x00dc, -1, -1, 0x00df,
1497 -1, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, -1, -1, 0x00e9, -1, 0x00eb, -1, 0x00ed, 0x00ee, -1,
1498 -1, -1, -1, -1, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, -1, 0x00fa, 0x00fb, 0x00fc, -1, -1, -1,
1499 0x00c0, 0x00e0, -1, -1, 0x00a1, 0x00b1, -1, -1, -1, -1, -1, -1, 0x00c8, 0x00e8, -1, -1,
1500 0x00d0, 0x00f0, 0x00aa, 0x00ba, -1, -1, 0x00cc, 0x00ec, 0x00ca, 0x00ea, -1, -1, -1, -1, -1, -1,
1501 -1, -1, 0x00ab, 0x00bb, -1, -1, -1, -1, 0x00a5, 0x00b5, 0x00cf, 0x00ef, -1, -1, 0x00c7, 0x00e7,
1502 -1, -1, -1, -1, -1, -1, 0x00d3, 0x00f3, 0x00a2, -1, -1, 0x00a6, 0x00b6, -1, -1, -1,
1503 -1, -1, -1, -1, -1, 0x00d1, 0x00f1, -1, -1, -1, 0x00bd, 0x00bf, 0x00d2, 0x00f2, -1, -1,
1504 -1, -1, -1, -1, -1, -1, 0x00a3, 0x00b3, -1, -1, -1, -1, -1, -1, -1, -1,
1505 0x00a9, 0x00b9, -1, -1, -1, -1, 0x00ac, 0x00bc, 0x00dd, 0x00fd, 0x00de, 0x00fe, -1, -1, -1, -1,
1506 -1, -1, 0x00d9, 0x00f9, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x00ae, 0x00be, -1,
1510 -1, -1, -1, -1, -1, -1, -1, 0x00b7, -1, -1, -1, -1, -1, -1, -1, -1,
1511 -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x00ff, -1, 0x00b2, -1, -1, -1, -1
1519 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5,
1520 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192,
1521 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
1522 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510,
1523 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567,
1524 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
1525 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4, 0x221e, 0x03c6, 0x03b5, 0x2229,
1526 0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0
1530 0x00ff, 0x00ad, 0x009b, 0x009c, -1, 0x009d, -1, -1, -1, -1, 0x00a6, 0x00ae, 0x00aa, -1, -1, -1,
1531 0x00f8, 0x00f1, 0x00fd, -1, -1, 0x00e6, -1, 0x00fa, -1, -1, 0x00a7, 0x00af, 0x00ac, 0x00ab, -1, 0x00a8,
1532 -1, -1, -1, -1, 0x008e, 0x008f, 0x0092, 0x0080, -1, 0x0090, -1, -1, -1, -1, -1, -1,
1533 -1, 0x00a5, -1, -1, -1, -1, 0x0099, -1, -1, -1, -1, -1, 0x009a, -1, -1, 0x00e1,
1534 0x0085, 0x00a0, 0x0083, -1, 0x0084, 0x0086, 0x0091, 0x0087, 0x008a, 0x0082, 0x0088, 0x0089, 0x008d, 0x00a1, 0x008c, 0x008b,
1535 -1, 0x00a4, 0x0095, 0x00a2, 0x0093, -1, 0x0094, 0x00f6, -1, 0x0097, 0x00a3, 0x0096, 0x0081, -1, -1, 0x0098,
1539 -1, -1, -1, 0x00e2, -1, -1, -1, -1, 0x00e9, -1, -1, -1, -1, -1, -1, -1,
1540 -1, -1, -1, 0x00e4, -1, -1, 0x00e8, -1, -1, 0x00ea, -1, -1, -1, -1, -1, -1,
1541 -1, 0x00e0, -1, -1, 0x00eb, 0x00ee, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1542 0x00e3, -1, -1, 0x00e5, 0x00e7, -1, 0x00ed, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1546 -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x00f9, 0x00fb, -1, -1, -1, 0x00ec, -1,
1547 -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x00ef, -1, -1, -1, -1, -1, -1,
1548 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ,
1549 -1, -1, -1, -1, -1, -1, -1, -1, 0x00f7, -1, -1, -1, -1, -1, -1, -1,
1550 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ,
1551 -1, 0x00f0, -1, -1, 0x00f3, 0x00f2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1555 0x00c4, -1, 0x00b3, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x00da, -1, -1, -1,
1556 0x00bf, -1, -1, -1, 0x00c0, -1, -1, -1, 0x00d9, -1, -1, -1, 0x00c3, -1, -1, -1,
1557 -1, -1, -1, -1, 0x00b4, -1, -1, -1, -1, -1, -1, -1, 0x00c2, -1, -1, -1,
1558 -1, -1, -1, -1, 0x00c1, -1, -1, -1, -1, -1, -1, -1, 0x00c5, -1, -1, -1,
1559 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ,
1560 0x00cd, 0x00ba, 0x00d5, 0x00d6, 0x00c9, 0x00b8, 0x00b7, 0x00bb, 0x00d4, 0x00d3, 0x00c8, 0x00be, 0x00bd, 0x00bc, 0x00c6, 0x00c7,
1561 0x00cc, 0x00b5, 0x00b6, 0x00b9, 0x00d1, 0x00d2, 0x00cb, 0x00cf, 0x00d0, 0x00ca, 0x00d8, 0x00d7, 0x00ce, -1, -1, -1,
1562 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ,
1563 0x00df, -1, -1, -1, 0x00dc, -1, -1, -1, 0x00db, -1, -1, -1, 0x00dd, -1, -1, -1,
1564 0x00de, 0x00b0, 0x00b1, 0x00b2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1565 0x00fe, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
1578 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x016f, 0x0107, 0x00e7, 0x0142, 0x00eb, 0x0150, 0x0151, 0x00ee, 0x0179, 0x00c4, 0x0106,
1579 0x00c9, 0x0139, 0x013a, 0x00f4, 0x00f6, 0x013d, 0x013e, 0x015a, 0x015b, 0x00d6, 0x00dc, 0x0164, 0x0165, 0x0141, 0x00d7, 0x010d,
1580 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x0104, 0x0105, 0x017d, 0x017e, 0x0118, 0x0119, 0x00ac, 0x017a, 0x010c, 0x015f, 0x00ab, 0x00bb,
1581 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, 0x00c2, 0x011a, 0x015e, 0x2563, 0x2551, 0x2557, 0x255d, 0x017b, 0x017c, 0x2510,
1582 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x0102, 0x0103, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x00a4,
1583 0x0111, 0x0110, 0x010e, 0x00cb, 0x010f, 0x0147, 0x00cd, 0x00ce, 0x011b, 0x2518, 0x250c, 0x2588, 0x2584, 0x0162, 0x016e, 0x2580,
1584 0x00d3, 0x00df, 0x00d4, 0x0143, 0x0144, 0x0148, 0x0160, 0x0161, 0x0154, 0x00da, 0x0155, 0x0170, 0x00fd, 0x00dd, 0x0163, 0x00b4,
1585 0x00ad, 0x02dd, 0x02db, 0x02c7, 0x02d8, 0x00a7, 0x00f7, 0x00b8, 0x00b0, 0x00a8, 0x02d9, 0x0171, 0x0158, 0x0159, 0x25a0, 0x00a0
1589 0x00ff, -1, -1, -1, 0x00cf, -1, -1, 0x00f5, 0x00f9, -1, -1, 0x00ae, 0x00aa, 0x00f0, -1, -1,
1590 0x00f8, -1, -1, -1, 0x00ef, -1, -1, -1, 0x00f7, -1, -1, 0x00af, -1, -1, -1, -1,
1591 -1, 0x00b5, 0x00b6, -1, 0x008e, -1, -1, 0x0080, -1, 0x0090, -1, 0x00d3, -1, 0x00d6, 0x00d7, -1,
1592 -1, -1, -1, 0x00e0, 0x00e2, -1, 0x0099, 0x009e, -1, -1, 0x00e9, -1, 0x009a, 0x00ed, -1, 0x00e1,
1593 -1, 0x00a0, 0x0083, -1, 0x0084, -1, -1, 0x0087, -1, 0x0082, -1, 0x0089, -1, 0x00a1, 0x008c, -1,
1594 -1, -1, -1, 0x00a2, 0x0093, -1, 0x0094, 0x00f6, -1, -1, 0x00a3, -1, 0x0081, 0x00ec, -1, -1,
1595 -1, -1, 0x00c6, 0x00c7, 0x00a4, 0x00a5, 0x008f, 0x0086, -1, -1, -1, -1, 0x00ac, 0x009f, 0x00d2, 0x00d4,
1596 0x00d1, 0x00d0, -1, -1, -1, -1, -1, -1, 0x00a8, 0x00a9, 0x00b7, 0x00d8, -1, -1, -1, -1,
1597 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ,
1598 -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0091, 0x0092, -1, -1, 0x0095, 0x0096, -1,
1599 -1, 0x009d, 0x0088, 0x00e3, 0x00e4, -1, -1, 0x00d5, 0x00e5, -1, -1, -1, -1, -1, -1, -1,
1600 0x008a, 0x008b, -1, -1, 0x00e8, 0x00ea, -1, -1, 0x00fc, 0x00fd, 0x0097, 0x0098, -1, -1, 0x00b8, 0x00ad,
1601 0x00e6, 0x00e7, 0x00dd, 0x00ee, 0x009b, 0x009c, -1, -1, -1, -1, -1, -1, -1, -1, 0x00de, 0x0085,
1602 0x00eb, 0x00fb, -1, -1, -1, -1, -1, -1, -1, 0x008d, 0x00ab, 0x00bd, 0x00be, 0x00a6, 0x00a7, -1
1606 -1, -1, -1, -1, -1, -1, -1, 0x00f3, -1, -1, -1, -1, -1, -1, -1, -1,
1607 -1, -1, -1, -1, -1, -1, -1, -1, 0x00f4, 0x00fa, -1, 0x00f2, -1, 0x00f1, -1, -1
1611 0x00c4, -1, 0x00b3, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x00da, -1, -1, -1,
1612 0x00bf, -1, -1, -1, 0x00c0, -1, -1, -1, 0x00d9, -1, -1, -1, 0x00c3, -1, -1, -1,
1613 -1, -1, -1, -1, 0x00b4, -1, -1, -1, -1, -1, -1, -1, 0x00c2, -1, -1, -1,
1614 -1, -1, -1, -1, 0x00c1, -1, -1, -1, -1, -1, -1, -1, 0x00c5, -1, -1, -1,
1615 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ,
1616 0x00cd, 0x00ba, -1, -1, 0x00c9, -1, -1, 0x00bb, -1, -1, 0x00c8, -1, -1, 0x00bc, -1, -1,
1617 0x00cc, -1, -1, 0x00b9, -1, -1, 0x00cb, -1, -1, 0x00ca, -1, -1, 0x00ce, -1, -1, -1,
1618 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ,
1619 0x00df, -1, -1, -1, 0x00dc, -1, -1, -1, 0x00db, -1, -1, -1, -1, -1, -1, -1,
1620 -1, 0x00b0, 0x00b1, 0x00b2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1621 0x00fe, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
1629 0x20ac, -1, 0x201a, -1, 0x201e, 0x2026, 0x2020, 0x2021, -1, 0x2030, 0x0160, 0x2039, 0x015a, 0x0164, 0x017d, 0x0179,
1630 -1, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, -1, 0x2122, 0x0161, 0x203a, 0x015b, 0x0165, 0x017e, 0x017a,
1631 0x00a0, 0x02c7, 0x02d8, 0x0141, 0x00a4, 0x0104, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x015e, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x017b,
1632 0x00b0, 0x00b1, 0x02db, 0x0142, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x0105, 0x015f, 0x00bb, 0x013d, 0x02dd, 0x013e, 0x017c,
1633 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
1634 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
1635 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
1636 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9
1640 0x00a0, -1, -1, -1, 0x00a4, -1, 0x00a6, 0x00a7, 0x00a8, 0x00a9, -1, 0x00ab, 0x00ac, 0x00ad, 0x00ae, -1,
1641 0x00b0, 0x00b1, -1, -1, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, -1, -1, 0x00bb, -1, -1, -1, -1,
1642 -1, 0x00c1, 0x00c2, -1, 0x00c4, -1, -1, 0x00c7, -1, 0x00c9, -1, 0x00cb, -1, 0x00cd, 0x00ce, -1,
1643 -1, -1, -1, 0x00d3, 0x00d4, -1, 0x00d6, 0x00d7, -1, -1, 0x00da, -1, 0x00dc, 0x00dd, -1, 0x00df,
1644 -1, 0x00e1, 0x00e2, -1, 0x00e4, -1, -1, 0x00e7, -1, 0x00e9, -1, 0x00eb, -1, 0x00ed, 0x00ee, -1,
1645 -1, -1, -1, 0x00f3, 0x00f4, -1, 0x00f6, 0x00f7, -1, -1, 0x00fa, -1, 0x00fc, 0x00fd, -1, -1,
1646 -1, -1, 0x00c3, 0x00e3, 0x00a5, 0x00b9, 0x00c6, 0x00e6, -1, -1, -1, -1, 0x00c8, 0x00e8, 0x00cf, 0x00ef,
1647 0x00d0, 0x00f0, -1, -1, -1, -1, -1, -1, 0x00ca, 0x00ea, 0x00cc, 0x00ec, -1, -1, -1, -1,
1648 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ,
1649 -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x00c5, 0x00e5, -1, -1, 0x00bc, 0x00be, -1,
1650 -1, 0x00a3, 0x00b3, 0x00d1, 0x00f1, -1, -1, 0x00d2, 0x00f2, -1, -1, -1, -1, -1, -1, -1,
1651 0x00d5, 0x00f5, -1, -1, 0x00c0, 0x00e0, -1, -1, 0x00d8, 0x00f8, 0x008c, 0x009c, -1, -1, 0x00aa, 0x00ba,
1652 0x008a, 0x009a, 0x00de, 0x00fe, 0x008d, 0x009d, -1, -1, -1, -1, -1, -1, -1, -1, 0x00d9, 0x00f9,
1653 0x00db, 0x00fb, -1, -1, -1, -1, -1, -1, -1, 0x008f, 0x009f, 0x00af, 0x00bf, 0x008e, 0x009e, -1,
1657 -1, -1, -1, -1, -1, -1, -1, 0x00a1, -1, -1, -1, -1, -1, -1, -1, -1,
1658 -1, -1, -1, -1, -1, -1, -1, -1, 0x00a2, 0x00ff, -1, 0x00b2, -1, 0x00bd, -1, -1,
1662 -1, -1, -1, 0x0096, 0x0097, -1, -1, -1, 0x0091, 0x0092, 0x0082, -1, 0x0093, 0x0094, 0x0084, -1,
1663 0x0086, 0x0087, 0x0095, -1, -1, -1, 0x0085, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1664 0x0089, -1, -1, -1, -1, -1, -1, -1, -1, 0x008b, 0x009b, -1, -1, -1, -1, -1,
1686 RegisterCodec(
"ISO-8859-1 ISO_8859-1 ISO_8859-1:1987 ISO-IR-100 CP819 IBM819 LATIN1 L1 csISOLatin1 ISO8859-1 ISO8859_1 CP28591", TCodecBase::New<TCodec_ISO8859_1>());
1687 RegisterCodec(
"ISO-8859-2 ISO_8859-2 ISO_8859-2:1987 ISO-IR-101 LATIN2 L2 csISOLatin2 ISO8859-2 ISO8859_2 CP28592", TCodecBase::New<TCodec_ISO8859_2>());
1688 RegisterCodec(
"ISO-8859-3 ISO_8859-3 ISO_8859-3:1988 ISO-IR-109 LATIN3 L3 csISOLatin3 ISO8859-3 ISO8859_3 CP28593", TCodecBase::New<TCodec_ISO8859_3>());
1689 RegisterCodec(
"ISO-8859-4 ISO_8859-4 ISO_8859-4:1988 ISO-IR-110 LATIN4 L4 csISOLatin4 ISO8859-4 ISO8859_4 CP28594", TCodecBase::New<TCodec_ISO8859_4>());
1690 RegisterCodec(
"YUASCII YU-ASCII YU_ASCII", TCodecBase::New<TCodec_YuAscii>());
1691 RegisterCodec(
"CP1250 Windows-1250 MS-EE", TCodecBase::New<TCodec_CP1250>());
1692 RegisterCodec(
"CP852 cp852_DOSLatin2 DOSLatin2", TCodecBase::New<TCodec_CP852>());
1693 RegisterCodec(
"CP437 cp437_DOSLatinUS DOSLatinUS", TCodecBase::New<TCodec_CP437>());
1700 dest.
AddCh(
char(c & 0xffu));
1701 else if (c < 0x800u) {
1702 dest.
AddCh(
char(TUniCodec::_1100_0000 | ((c >> 6) & TUniCodec::_0001_1111)));
1703 dest.
AddCh(
char(TUniCodec::_1000_0000 | (c & TUniCodec::_0011_1111))); }
1704 else if (c < 0x10000u) {
1705 dest.
AddCh(
char(TUniCodec::_1110_0000 | ((c >> 12) & TUniCodec::_0000_1111)));
1706 dest.
AddCh(
char(TUniCodec::_1000_0000 | ((c >> 6) & TUniCodec::_0011_1111)));
1707 dest.
AddCh(
char(TUniCodec::_1000_0000 | (c & TUniCodec::_0011_1111))); }
1708 else if (c < 0x200000u) {
1709 dest.
AddCh(
char(TUniCodec::_1111_0000 | ((c >> 18) & TUniCodec::_0000_0111)));
1710 dest.
AddCh(
char(TUniCodec::_1000_0000 | ((c >> 12) & TUniCodec::_0011_1111)));
1711 dest.
AddCh(
char(TUniCodec::_1000_0000 | ((c >> 6) & TUniCodec::_0011_1111)));
1712 dest.
AddCh(
char(TUniCodec::_1000_0000 | (c & TUniCodec::_0011_1111))); }
1713 else if (c < 0x4000000u) {
1714 dest.
AddCh(
char(TUniCodec::_1111_1000 | ((c >> 24) & TUniCodec::_0000_0011)));
1715 dest.
AddCh(
char(TUniCodec::_1000_0000 | ((c >> 18) & TUniCodec::_0011_1111)));
1716 dest.
AddCh(
char(TUniCodec::_1000_0000 | ((c >> 12) & TUniCodec::_0011_1111)));
1717 dest.
AddCh(
char(TUniCodec::_1000_0000 | ((c >> 6) & TUniCodec::_0011_1111)));
1718 dest.
AddCh(
char(TUniCodec::_1000_0000 | (c & TUniCodec::_0011_1111))); }
1720 dest.
AddCh(
char(TUniCodec::_1111_1100 | ((c >> 30) & TUniCodec::_0000_0011)));
1721 dest.
AddCh(
char(TUniCodec::_1000_0000 | ((c >> 24) & TUniCodec::_0011_1111)));
1722 dest.
AddCh(
char(TUniCodec::_1000_0000 | ((c >> 18) & TUniCodec::_0011_1111)));
1723 dest.
AddCh(
char(TUniCodec::_1000_0000 | ((c >> 12) & TUniCodec::_0011_1111)));
1724 dest.
AddCh(
char(TUniCodec::_1000_0000 | ((c >> 6) & TUniCodec::_0011_1111)));
1725 dest.
AddCh(
char(TUniCodec::_1000_0000 | (c & TUniCodec::_0011_1111))); }
static int SwapBytes(int x)
TPair< TInt, TInt > TIntPr
int SearchCh(const char &Ch, const int &BChN=0) const
static PExcept New(const TStr &MsgStr, const TStr &LocStr=TStr())
void TestDecodeUtf16(TRnd &rnd, const TStr &testCaseDesc, const TUtf16BomHandling bomHandling, const TUniByteOrder defaultByteOrder, const bool insertBom)
static const int fromUnicodeTable1[6 *16]
enum TUniChProperties_ TUniChProperties
#define IAssertR(Cond, Reason)
static PSOut New(const TStr &FNm, const bool &Append=false)
void SetPropertyX(const TUniChPropertiesX flag)
bool IsInt(const bool &Check, const int &MnVal, const int &MxVal, int &Val) const
int GetScriptByName(const TStr &scriptName) const
void Merge()
Sorts the vector and only keeps a single element of each value.
void Test(const TIntV &src, const TIntV &expectedDest, const bool full, const bool turkic, FILE *f)
enum TUniChFlags_ TUniChFlags
bool IsCompositionExclusion() const
#define NFC_(cmpWith, operand)
void SaveBin(const TStr &fnBinUcd)
bool IsDcpFlag(const TUniChFlags flag) const
static const ushort LineBreak_Quotation
void SetProperty(const TUniChProperties flag)
bool IsGraphemeExtend() const
void SetSbFlag(const TUniChFlags flag)
static const int fromUnicodeTable1[14 *16]
static bool Exists(const TStr &FNm)
static TStr GetSpecialCasingFn()
int GetWbFlags(const int cp) const
void AssertEq(const TIntV &v1, const TIntV &v2, const TStr &explanation, FILE *f)
void SetDcpFlag(const TUniChFlags flag)
void SetWbFlag(const TUniChFlags flag)
enum TUnicodeErrorHandling_ TUnicodeErrorHandling
static TStr GetScriptNameKatakana()
static const ushort LineBreak_InfixNumeric
#define NFD_(cmpWith, operand)
static uint GetRndUint(TRnd &rnd)
void AddCh(const char &Ch, const int &MxLen=-1)
TSizeTy Len() const
Returns the number of elements in the vector.
void InitPropList(const TStr &basePath)
static const int toUnicodeTable[8 *16]
enum TUniChDb::TCaseConversion_ TCaseConversion
bool IsAlphabetic() const
int GetSbFlags(const int cp) const
void WbFindCurOrNextNonIgnored(const TSrcVec &src, size_t &position, const size_t srcEnd) const
static const ushort LineBreak_ComplexContext
const TStr & GetScriptName(const int scriptId) const
TIntIntVH specialCasingUpper
static const int yuAsciiChars[10]
TStr GetSubStr(const int &BChN, const int &EChN) const
void RegisterCodec(const TStr &nameList, const PCodecBase &codec)
void InitDerivedCoreProperties(const TStr &basePath)
bool IsWhiteSpace() const
void InitLineBreaks(const TStr &basePath)
static const int uniChars[10]
void WbFindNextNonIgnored(const TSrcVec &src, size_t &position, const size_t srcEnd) const
bool WbFindPrevNonIgnored(const TSrcVec &src, const size_t srcStart, size_t &position) const
const TDat & GetDat(const TKey &Key) const
static TStr GetNormalizationTestFn()
enum TUniChPropertiesX_ TUniChPropertiesX
TUnicodeErrorHandling errorHandling
void Test(const TStr &basePath)
static const int fromUnicodeTable2[2 *16]
static const int fromUnicodeTable2[4 *16]
static void ParseCodePointRange(const TStr &s, int &from, int &to)
TIntIntVH specialCasingLower
int simpleUpperCaseMapping
void TestCaseConversion(const TStr &source, const TStr &trueLc, const TStr &trueTc, const TStr &trueUc, bool turkic, bool lithuanian)
static TStr GetUnicodeDataFn()
THash< TIntPr, TInt > inverseDec
bool IsPropertyX(const TUniChPropertiesX flag) const
bool FindNextSentenceBoundary(const TSrcVec &src, const size_t srcIdx, const size_t srcCount, size_t &position) const
size_t DecodeUtf16FromWords(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, bool clrDest, const TUtf16BomHandling bomHandling=bomAllowed, const TUniByteOrder defaultByteOrder=boMachineEndian) const
TStr GetWbFlagsStr() const
static TStr GetScriptsFn()
static const int fromUnicodeTable3[6 *16]
void TestCaseConversions()
int simpleTitleCaseMapping
static PSIn New(const TStr &FNm)
void Clr(const bool &DoDel=true, const TSizeTy &NoDelLim=-1)
Clears the contents of the vector.
static const int fromUnicodeTable2[2 *16]
void TestDecodeUtf8(TRnd &rnd, const TStr &testCaseDesc)
void Sort(const bool &Asc=true)
Sorts the elements of the vector.
bool IsCompatibilityDecomposition() const
static TStr GetScriptNameUnknown()
void PutAll(const TVal &Val)
Sets all elements of the vector to value Val.
static TStr GetSentenceBreakTestFn()
size_t EncodeUtf16ToBytes(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool insertBom, const TUniByteOrder destByteOrder=boMachineEndian) const
void Save(TSOut &SOut) const
uint GetUniDevUInt(const uint &Range=0)
enum TUniByteOrder_ TUniByteOrder
bool FNextKeyId(int &KeyId) const
static const int fromUnicodeTable3[3 *16]
TStr GetSbFlagsStr() const
void LoadTxt_ProcessDecomposition(TUniChInfo &ci, TStr s)
bool FindNextWordBoundary(const TSrcVec &src, const size_t srcIdx, const size_t srcCount, size_t &position) const
static const int toUnicodeTable[6 *16]
void InitSpecialCasing(const TStr &basePath)
const TVal & Last() const
Returns a reference to the last element of the vector.
static TStr GetDerivedCorePropsFn()
static TStr GetWordBreakPropertyFn()
static const int fromUnicodeTable3[11 *16]
THash< TInt, TUniChInfo > h
bool GetNextLine(TStrV &dest)
void Open(const TStr &fileName)
static const int fromUnicodeTable1[14 *16]
bool IsSbFlag(const TUniChFlags flag) const
static const ushort LineBreak_Numeric
void InitScripts(const TStr &basePath)
void TestComposition(const TStr &basePath)
enum TUtf16BomHandling_ TUtf16BomHandling
static TStr GetLineBreakFn()
void Fold(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool full, const bool turkic) const
static const int fromUnicodeTable1[14 *16]
static void ParseCodePointList(const TStr &s, TIntV &dest, bool ClrDestP=true)
static TStr GetWordBreakTestFn()
size_t DecodeUtf16FromBytes(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const TUtf16BomHandling bomHandling=bomAllowed, const TUniByteOrder defaultByteOrder=boMachineEndian) const
static bool IsWbIgnored(const TUniChInfo &ci)
static const int toUnicodeTable[8 *16]
int GetKeyId(const TKey &Key) const
void SetCat(const int cp)
static const int toUnicodeTable[8 *16]
static TStr GetSentenceBreakPropertyFn()
void LoadTxt(const TStr &fileName)
static bool IsMachineLittleEndian()
int AddKey(const TKey &Key)
void InitWordAndSentenceBoundaryFlags(const TStr &basePath)
void TestFindNextWordOrSentenceBoundary(const TStr &basePath, bool sentence)
static const int toUnicodeTable[6 *16]
int simpleLowerCaseMapping
TStr CombinePath(const TStr &s, const TStr &t)
static const ushort LineBreak_Unknown
static TStr GetCompositionExclusionsFn()
void LoadTxt(const TStr &basePath)
size_t EncodeUtf16ToWords(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const bool insertBom, const TUniByteOrder destByteOrder=boMachineEndian) const
void ProcessComment(TUniChDb::TUcdFileReader &reader)
void FindWordBoundaries(const TSrcVec &src, const size_t srcIdx, const size_t srcCount, TBoolV &dest) const
static TStr Fmt(const char *FmtStr,...)
static TStr GetPropListFn()
void TestCat(const int cp)
#define NFKC_(cmpWith, operand)
size_t DecodeUtf8(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true) const
void Gen(const TSizeTy &_Vals)
Constructs a vector (an array) of _Vals elements.
void FindSentenceBoundaries(const TSrcVec &src, const size_t srcIdx, const size_t srcCount, TBoolV &dest) const
#define NFKD_(cmpWith, operand)
int EncodeUtf8(const TIntV &src, TIntV &dest) const
int GetUniDevInt(const int &Range=0)
void Reserve(const TSizeTy &_MxVals)
Reserves enough memory for the vector to store _MxVals elements.
static const int toUnicodeTable[6 *16]
bool IsProperty(const TUniChProperties flag) const
bool IsWbFlag(const TUniChFlags flag) const
bool IsKey(const TKey &Key) const
TSizeTy Add()
Adds a new element at the end of the vector, after its current last element.
static const int fromUnicodeTable2[2 *16]
TUniCaseFolding caseFolding
uint AddStr(const char *Str, const uint &Len)
static const int fromUnicodeTable2[2 *16]
bool IsIdeographic() const
TIntIntVH specialCasingTitle
TDat & AddDat(const TKey &Key)
static const int fromUnicodeTable1[14 *16]
static const int fromUnicodeTable1[14 *16]
static const int fromUnicodeTable4[11 *16]
const TKey & GetKey(const int &KeyId) const
static const int fromUnicodeTable2[2]
void TestWbFindNonIgnored() const
static int ParseCodePoint(const TStr &s)
static TStr GetCaseFoldingFn()
size_t EncodeUtf8(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest=true) const
static TStr GetAuxiliaryDir()
static TStr GetScriptNameHiragana()
static ushort GetLineBreakCode(char c1, char c2)
TSizeTy AddV(const TVec< TVal, TSizeTy > &ValV)
Adds the elements of the vector ValV to the to end of the vector.
void GetCaseConverted(const TSrcVec &src, size_t srcIdx, const size_t srcCount, TVec< TDestCh > &dest, const bool clrDest, const TCaseConversion how, const bool turkic, const bool lithuanian) const
void WordsToBytes(const TIntV &src, TIntV &dest)