Main Page | Modules | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members | Related Pages
csuctransform.h
Go to the documentation of this file.00001 /* 00002 Copyright (C) 2003 by Frank Richter 00003 00004 This library is free software; you can redistribute it and/or 00005 modify it under the terms of the GNU Library General Public 00006 License as published by the Free Software Foundation; either 00007 version 2 of the License, or (at your option) any later version. 00008 00009 This library is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00012 Library General Public License for more details. 00013 00014 You should have received a copy of the GNU Library General Public 00015 License along with this library; if not, write to the Free 00016 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00017 */ 00018 00019 #ifndef __CS_CSUCTRANSFORM_H__ 00020 #define __CS_CSUCTRANSFORM_H__ 00021 00022 #include "csunicode.h" 00023 00031 00032 #define CS_UC_MAX_UTF8_ENCODED 6 00033 00034 #define CS_UC_MAX_UTF16_ENCODED 2 00035 00036 #define CS_UC_MAX_UTF32_ENCODED 1 00037 00041 #define CS_UC_MAX_MAPPED 3 00042 00046 class csUnicodeTransform 00047 { 00048 public: 00049 #define FAIL(ret) \ 00050 { \ 00051 if (isValid) *isValid = false; \ 00052 ch = CS_UC_CHAR_REPLACER; \ 00053 return ret; \ 00054 } 00055 00056 #define SUCCEED \ 00057 if (isValid) *isValid = true; \ 00058 return chUsed; 00059 00060 #define GET_NEXT(next) \ 00061 if ((size_t)chUsed == strlen) \ 00062 { \ 00063 FAIL(chUsed); \ 00064 } \ 00065 next = *str++; \ 00066 if (next == 0) \ 00067 { \ 00068 FAIL(chUsed); \ 00069 } \ 00070 chUsed++; 00071 00090 inline static int UTF8Decode (const utf8_char* str, size_t strlen, 00091 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00092 { 00093 if (str == 0) 00094 { 00095 FAIL(0); 00096 } 00097 int chUsed = 0; 00098 00099 utf8_char curCh; 00100 GET_NEXT(curCh); 00101 if ((curCh & 0x80) == 0) 00102 { 00103 // easy case 00104 ch = curCh; 00105 SUCCEED; 00106 } 00107 else 00108 { 00109 // Count with how many bytes this char is encoded. 00110 int n = 0; 00111 while ((n < 7) && ((curCh & (1 << (7 - n))) != 0)) { n++; } 00112 00113 if ((n < 2) || (n > 6)) 00114 { 00115 // Invalid code: first char of a "sequence" must have 00116 // at least two and at most six MSBs set 00117 FAIL(1); 00118 } 00119 00120 ch = (curCh & ((1 << (8 - n)) - 1)); 00121 00122 for (int i = 1; i < n; i++) 00123 { 00124 GET_NEXT(curCh); 00125 if ((curCh & 0xc0) != 0x80) 00126 { 00127 FAIL(chUsed); 00128 } 00129 else 00130 { 00131 ch <<= 6; 00132 ch |= (curCh & 0x3f); 00133 } 00134 } 00135 00136 // Check if in Unicode range. 00137 if (ch > CS_UC_LAST_CHAR) 00138 { 00139 FAIL(chUsed); 00140 } 00141 00142 // Check for "overlong" codes. 00143 if ((ch < 0x80) && (n > 0)) 00144 { 00145 FAIL(chUsed); 00146 } 00147 else if ((ch < 0x800) && (n > 2)) 00148 { 00149 FAIL(chUsed); 00150 } 00151 else if ((ch < 0x10000) && (n > 3)) 00152 { 00153 FAIL(chUsed); 00154 } 00155 else if ((ch < 0x200000) && (n > 4)) 00156 { 00157 FAIL(chUsed); 00158 } 00159 /* 00160 else if ((ch < 0x4000000) && (n > 5)) 00161 { 00162 FAIL(chUsed); 00163 } 00164 else if ((ch < 0x80000000) && (n > 6)) 00165 { 00166 FAIL(chUsed); 00167 } 00168 */ 00169 00170 if (!returnNonChar && (CS_UC_IS_NONCHARACTER(ch) 00171 || CS_UC_IS_SURROGATE(ch))) 00172 FAIL(chUsed); 00173 SUCCEED; 00174 } 00175 } 00176 00181 inline static int UTF16Decode (const utf16_char* str, size_t strlen, 00182 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00183 { 00184 if (str == 0) 00185 { 00186 FAIL(0); 00187 } 00188 int chUsed = 0; 00189 00190 utf16_char curCh; 00191 GET_NEXT(curCh); 00192 // Decode surrogate 00193 if (CS_UC_IS_SURROGATE (curCh)) 00194 { 00195 // Invalid code 00196 if (!CS_UC_IS_HIGH_SURROGATE (curCh)) 00197 { 00198 FAIL(chUsed); 00199 } 00200 ch = 0x10000 + ((curCh & 0x03ff) << 10); 00201 GET_NEXT(curCh); 00202 // Invalid code 00203 if (!CS_UC_IS_LOW_SURROGATE (curCh)) 00204 { 00205 // Fail with 1 so the char is handled upon the next Decode. 00206 FAIL(1); 00207 } 00208 ch |= (curCh & 0x3ff); 00209 } 00210 else 00211 { 00212 ch = curCh; 00213 } 00214 if (!returnNonChar && (CS_UC_IS_NONCHARACTER(ch) 00215 || CS_UC_IS_SURROGATE(ch))) 00216 FAIL(chUsed); 00217 SUCCEED; 00218 } 00219 00224 inline static int UTF32Decode (const utf32_char* str, size_t strlen, 00225 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00226 { 00227 if (str == 0) 00228 { 00229 FAIL(0); 00230 } 00231 int chUsed = 0; 00232 00233 GET_NEXT(ch); 00234 if ((!returnNonChar && (CS_UC_IS_NONCHARACTER(ch) 00235 || CS_UC_IS_SURROGATE(ch))) || (ch > CS_UC_LAST_CHAR)) 00236 FAIL(chUsed); 00237 SUCCEED; 00238 } 00239 00244 inline static int Decode (const utf8_char* str, size_t strlen, 00245 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00246 { 00247 return UTF8Decode (str, strlen, ch, isValid, returnNonChar); 00248 } 00253 inline static int Decode (const utf16_char* str, size_t strlen, 00254 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00255 { 00256 return UTF16Decode (str, strlen, ch, isValid, returnNonChar); 00257 } 00262 inline static int Decode (const utf32_char* str, size_t strlen, 00263 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00264 { 00265 return UTF32Decode (str, strlen, ch, isValid, returnNonChar); 00266 } 00267 00269 #undef FAIL 00270 #undef SUCCEED 00271 #undef GET_NEXT 00272 00275 #define _OUTPUT_CHAR(buf, chr) \ 00276 if (bufRemaining > 0) \ 00277 { \ 00278 if(buf) *buf++ = chr; \ 00279 bufRemaining--; \ 00280 } \ 00281 encodedLen++; 00282 00283 #define OUTPUT_CHAR(chr) _OUTPUT_CHAR(buf, chr) 00284 00298 inline static int EncodeUTF8 (const utf32_char ch, utf8_char* buf, 00299 size_t bufsize, bool allowNonchars = false) 00300 { 00301 if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch)) 00302 || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR)) 00303 return 0; 00304 size_t bufRemaining = bufsize; 00305 int encodedLen = 0; 00306 00307 if (ch < 0x80) 00308 { 00309 OUTPUT_CHAR ((utf8_char)ch); 00310 } 00311 else if (ch < 0x800) 00312 { 00313 OUTPUT_CHAR ((utf8_char)(0xc0 | (ch >> 6))); 00314 OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f))); 00315 } 00316 else if (ch < 0x10000) 00317 { 00318 OUTPUT_CHAR ((utf8_char)(0xe0 | (ch >> 12))); 00319 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f))); 00320 OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f))); 00321 } 00322 else if (ch < 0x200000) 00323 { 00324 OUTPUT_CHAR ((utf8_char)(0xf0 | (ch >> 18))); 00325 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 12) & 0x3f))); 00326 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f))); 00327 OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f))); 00328 } 00329 /* 00330 else if (ch < 0x4000000) 00331 { 00332 OUTPUT_CHAR ((utf8_char)(0xf8 | (ch >> 24))); 00333 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 18) & 0x3f))); 00334 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 12) & 0x3f))); 00335 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f))); 00336 OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f))); 00337 } 00338 else if (ch < 0x80000000) 00339 { 00340 OUTPUT_CHAR ((utf8_char)(0xfc | (ch >> 30))); 00341 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 24) & 0x3f))); 00342 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 18) & 0x3f))); 00343 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 12) & 0x3f))); 00344 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f))); 00345 OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f))); 00346 } 00347 */ 00348 return encodedLen; 00349 } 00350 00355 inline static int EncodeUTF16 (const utf32_char ch, utf16_char* buf, 00356 size_t bufsize, bool allowNonchars = false) 00357 { 00358 if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch)) 00359 || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR)) 00360 return 0; 00361 size_t bufRemaining = bufsize; 00362 int encodedLen = 0; 00363 00364 if (ch < 0x10000) 00365 { 00366 OUTPUT_CHAR((utf16_char)ch); 00367 } 00368 else if (ch < 0x100000) 00369 { 00370 utf32_char ch_shifted = ch - 0x10000; 00371 OUTPUT_CHAR((utf16_char)((ch_shifted >> 10) 00372 | CS_UC_CHAR_HIGH_SURROGATE_FIRST)); 00373 OUTPUT_CHAR((utf16_char)((ch_shifted & 0x3ff) 00374 | CS_UC_CHAR_LOW_SURROGATE_FIRST)); 00375 } 00376 else 00377 return 0; 00378 00379 return encodedLen; 00380 } 00381 00386 inline static int EncodeUTF32 (const utf32_char ch, utf32_char* buf, 00387 size_t bufsize, bool allowNonchars = false) 00388 { 00389 if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch)) 00390 || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR)) 00391 return 0; 00392 size_t bufRemaining = bufsize; 00393 int encodedLen = 0; 00394 00395 OUTPUT_CHAR(ch); 00396 00397 return encodedLen; 00398 } 00399 00404 inline static int Encode (const utf32_char ch, utf8_char* buf, 00405 size_t bufsize, bool allowNonchars = false) 00406 { 00407 return EncodeUTF8 (ch, buf, bufsize, allowNonchars); 00408 } 00413 inline static int Encode (const utf32_char ch, utf16_char* buf, 00414 size_t bufsize, bool allowNonchars = false) 00415 { 00416 return EncodeUTF16 (ch, buf, bufsize, allowNonchars); 00417 } 00422 inline static int Encode (const utf32_char ch, utf32_char* buf, 00423 size_t bufsize, bool allowNonchars = false) 00424 { 00425 return EncodeUTF32 (ch, buf, bufsize, allowNonchars); 00426 } 00428 #undef OUTPUT_CHAR 00429 00432 #define OUTPUT_CHAR(chr) _OUTPUT_CHAR(dest, chr) 00433 00434 #define UCTF_CONVERTER(funcName, fromType, decoder, toType, encoder) \ 00435 inline static size_t funcName (toType* dest, size_t destSize, \ 00436 const fromType* source, size_t srcSize = (size_t)-1) \ 00437 { \ 00438 if ((srcSize == 0) || (source == 0)) \ 00439 return 0; \ 00440 \ 00441 size_t bufRemaining = (destSize > 0) ? destSize - 1 : 0; \ 00442 size_t encodedLen = 0; \ 00443 \ 00444 size_t srcChars = srcSize; \ 00445 \ 00446 if (srcSize == (size_t)-1) \ 00447 { \ 00448 srcChars = 0; \ 00449 const fromType* sptr = source; \ 00450 while (*sptr++ != 0) srcChars++; \ 00451 } \ 00452 \ 00453 while (srcChars > 0) \ 00454 { \ 00455 utf32_char ch; \ 00456 int scnt = decoder (source, srcChars, ch, 0); \ 00457 if (scnt == 0) break; \ 00458 int dcnt = encoder (ch, dest, bufRemaining); \ 00459 if (dcnt == 0) \ 00460 { \ 00461 dcnt = encoder (CS_UC_CHAR_REPLACER, dest, bufRemaining); \ 00462 } \ 00463 \ 00464 if ((size_t)dcnt >= bufRemaining) \ 00465 { \ 00466 if (dest && (destSize > 0)) dest += bufRemaining; \ 00467 bufRemaining = 0; \ 00468 } \ 00469 else \ 00470 { \ 00471 bufRemaining -= dcnt; \ 00472 if (dest && (destSize > 0)) dest += dcnt; \ 00473 } \ 00474 encodedLen += dcnt; \ 00475 if ((size_t)scnt >= srcChars) break; \ 00476 srcChars -= scnt; \ 00477 source += scnt; \ 00478 } \ 00479 \ 00480 if (dest) *dest = 0; \ 00481 \ 00482 return encodedLen + 1; \ 00483 } 00484 00500 UCTF_CONVERTER (UTF8to16, utf8_char, UTF8Decode, utf16_char, EncodeUTF16); 00505 UCTF_CONVERTER (UTF8to32, utf8_char, UTF8Decode, utf32_char, EncodeUTF32); 00506 00511 UCTF_CONVERTER (UTF16to8, utf16_char, UTF16Decode, utf8_char, EncodeUTF8); 00516 UCTF_CONVERTER (UTF16to32, utf16_char, UTF16Decode, utf32_char, EncodeUTF32); 00517 00522 UCTF_CONVERTER (UTF32to8, utf32_char, UTF32Decode, utf8_char, EncodeUTF8); 00527 UCTF_CONVERTER (UTF32to16, utf32_char, UTF32Decode, utf16_char, EncodeUTF16); 00530 #undef UCTF_CONVERTER 00531 #undef OUTPUT_CHAR 00532 #undef _OUTPUT_CHAR 00533 00534 #if (CS_WCHAR_T_SIZE == 1) 00535 inline static size_t UTF8toWC (wchar_t* dest, size_t destSize, 00536 const utf8_char* source, size_t srcSize) 00537 { 00538 size_t srcChars = srcSize; 00539 if (srcSize == (size_t)-1) 00540 { 00541 srcChars = 0; 00542 const utf8_char* sptr = source; 00543 while (*sptr++ != 0) srcChars++; 00544 } 00545 if ((dest != 0) && (destSize != 0)) 00546 { 00547 size_t len = MIN (destSize - 1, srcChars); 00548 memcpy (dest, source, size * sizeof (wchar_t)); 00549 *(dest + len) = 0; 00550 } 00551 return srcChars + 1; 00552 }; 00553 00554 inline static size_t UTF16toWC (wchar_t* dest, size_t destSize, 00555 const utf16_char* source, size_t srcSize) 00556 { 00557 return UTF16to8 ((utf8_char*)dest, destSize, source, srcSize); 00558 }; 00559 00560 inline static size_t UTF32toWC (wchar_t* dest, size_t destSize, 00561 const utf32_char* source, size_t srcSize) 00562 { 00563 return UTF32to8 ((utf8_char*)dest, destSize, source, srcSize); 00564 }; 00565 00566 inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize, 00567 const wchar_t* source, size_t srcSize) 00568 { 00569 size_t srcChars = srcSize; 00570 if (srcSize == (size_t)-1) 00571 { 00572 srcChars = 0; 00573 const wchar_t* sptr = source; 00574 while (*sptr++ != 0) srcChars++; 00575 } 00576 if ((dest != 0) && (destSize != 0)) 00577 { 00578 size_t len = MIN (destSize - 1, srcChars); 00579 memcpy (dest, source, len * sizeof (wchar_t)); 00580 *(dest + len) = 0; 00581 } 00582 return srcChars + 1; 00583 }; 00584 00585 inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize, 00586 const wchar_t* source, size_t srcSize) 00587 { 00588 return UTF8to16 (dest, destSize, source, srcSize); 00589 }; 00590 00591 inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize, 00592 const wchar_t* source, size_t srcSize) 00593 { 00594 return UTF8to32 (dest, destSize, source, srcSize); 00595 }; 00596 00597 inline static int Decode (const wchar_t* str, size_t strlen, 00598 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00599 { 00600 return UTF8Decode ((utf8_char*)str, strlen, ch, isValid, returnNonChar); 00601 } 00602 inline static int Encode (const utf32_char ch, wchar_t* buf, 00603 size_t bufsize, bool allowNonchars = false) 00604 { 00605 return EncodeUTF8 (ch, (utf8_char*)buf, bufsize, allowNonchars); 00606 } 00607 #elif (CS_WCHAR_T_SIZE == 2) 00608 // Methods below for doxygen documentation are here as the size '2' is 00609 // default. 00610 00617 inline static size_t UTF8toWC (wchar_t* dest, size_t destSize, 00618 const utf8_char* source, size_t srcSize) 00619 { 00620 return UTF8to16 ((utf16_char*)dest, destSize, source, srcSize); 00621 }; 00622 00627 inline static size_t UTF16toWC (wchar_t* dest, size_t destSize, 00628 const utf16_char* source, size_t srcSize) 00629 { 00630 size_t srcChars = srcSize; 00631 if (srcSize == (size_t)-1) 00632 { 00633 srcChars = 0; 00634 const utf16_char* sptr = source; 00635 while (*sptr++ != 0) srcChars++; 00636 } 00637 if ((dest != 0) && (destSize != 0)) 00638 { 00639 size_t len = MIN (destSize - 1, srcChars); 00640 memcpy (dest, source, len * sizeof (wchar_t)); 00641 *(dest + len) = 0; 00642 } 00643 return srcChars + 1; 00644 }; 00645 00650 inline static size_t UTF32toWC (wchar_t* dest, size_t destSize, 00651 const utf32_char* source, size_t srcSize) 00652 { 00653 return UTF32to16 ((utf16_char*)dest, destSize, source, srcSize); 00654 }; 00655 00660 inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize, 00661 const wchar_t* source, size_t srcSize) 00662 { 00663 return UTF16to8 (dest, destSize, (utf16_char*)source, srcSize); 00664 }; 00665 00670 inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize, 00671 const wchar_t* source, size_t srcSize) 00672 { 00673 size_t srcChars = srcSize; 00674 if (srcSize == (size_t)-1) 00675 { 00676 srcChars = 0; 00677 const wchar_t* sptr = source; 00678 while (*sptr++ != 0) srcChars++; 00679 } 00680 if ((dest != 0) && (destSize != 0)) 00681 { 00682 size_t len = MIN (destSize - 1, srcChars); 00683 memcpy (dest, source, len * sizeof (wchar_t)); 00684 *(dest + len) = 0; 00685 } 00686 return srcChars + 1; 00687 }; 00688 00693 inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize, 00694 const wchar_t* source, size_t srcSize) 00695 { 00696 return UTF16to32 (dest, destSize, (utf16_char*)source, srcSize); 00697 }; 00698 00699 #if !defined(CS_COMPILER_MSVC) || (_MSC_VER >= 1300) 00700 /* @@@ For VC6, utf16_char == wchar_t, complains below. (Can be avoided on 00701 * VC7 with "Builtin wchar_t") */ 00706 inline static int Decode (const wchar_t* str, size_t strlen, 00707 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00708 { 00709 return UTF16Decode ((utf16_char*)str, strlen, ch, isValid, returnNonChar); 00710 } 00715 inline static int Encode (const utf32_char ch, wchar_t* buf, 00716 size_t bufsize, bool allowNonchars = false) 00717 { 00718 return EncodeUTF16 (ch, (utf16_char*)buf, bufsize, allowNonchars); 00719 } 00720 #endif 00721 00722 #elif (CS_WCHAR_T_SIZE == 4) 00723 inline static size_t UTF8toWC (wchar_t* dest, size_t destSize, 00724 const utf8_char* source, size_t srcSize) 00725 { 00726 return UTF8to32 ((utf32_char*)dest, destSize, source, srcSize); 00727 }; 00728 00729 inline static size_t UTF16toWC (wchar_t* dest, size_t destSize, 00730 const utf16_char* source, size_t srcSize) 00731 { 00732 return UTF16to32 ((utf32_char*)dest, destSize, source, srcSize); 00733 }; 00734 00735 inline static size_t UTF32toWC (wchar_t* dest, size_t destSize, 00736 const utf32_char* source, size_t srcSize) 00737 { 00738 size_t srcChars = srcSize; 00739 if (srcSize == (size_t)-1) 00740 { 00741 srcChars = 0; 00742 const utf32_char* sptr = source; 00743 while (*sptr++ != 0) srcChars++; 00744 } 00745 if ((dest != 0) && (destSize != 0)) 00746 { 00747 size_t len = MIN (destSize - 1, srcChars); 00748 memcpy (dest, source, len * sizeof (wchar_t)); 00749 *(dest + len) = 0; 00750 } 00751 return srcChars + 1; 00752 }; 00753 00754 inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize, 00755 const wchar_t* source, size_t srcSize) 00756 { 00757 return UTF32to8 (dest, destSize, (utf32_char*)source, srcSize); 00758 }; 00759 00760 inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize, 00761 const wchar_t* source, size_t srcSize) 00762 { 00763 return UTF32to16 (dest, destSize, (utf32_char*)source, srcSize); 00764 }; 00765 00766 inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize, 00767 const wchar_t* source, size_t srcSize) 00768 { 00769 size_t srcChars = srcSize; 00770 if (srcSize == (size_t)-1) 00771 { 00772 srcChars = 0; 00773 const wchar_t* sptr = source; 00774 while (*sptr++ != 0) srcChars++; 00775 } 00776 if ((dest != 0) && (destSize != 0)) 00777 { 00778 size_t len = MIN (destSize - 1, srcChars); 00779 memcpy (dest, source, len * sizeof (wchar_t)); 00780 *(dest + len) = 0; 00781 } 00782 return srcChars + 1; 00783 }; 00784 00785 inline static int Decode (const wchar_t* str, size_t strlen, 00786 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false) 00787 { 00788 return UTF32Decode ((utf32_char*)str, strlen, ch, isValid, returnNonChar); 00789 } 00790 inline static int Encode (const utf32_char ch, wchar_t* buf, 00791 size_t bufsize, bool allowNonchars = false) 00792 { 00793 return EncodeUTF32 (ch, (utf32_char*)buf, bufsize, allowNonchars); 00794 } 00795 #else 00796 #error Odd-sized, unsupported wchar_t! 00797 #endif 00798 00811 inline static int UTF8Skip (const utf8_char* str, size_t maxSkip) 00812 { 00813 if (maxSkip < 1) return 0; 00814 00815 if ((*str & 0x80) == 0) 00816 { 00817 return 1; 00818 } 00819 else 00820 { 00821 int n = 0; 00822 while ((n < 7) && ((*str & (1 << (7 - n))) != 0)) { n++; } 00823 00824 if ((n < 2) || (n > 6)) 00825 { 00826 return 1; 00827 } 00828 00829 int skip = 1; 00830 00831 for (; skip < n; skip++) 00832 { 00833 if (((str[skip] & 0xc0) != 0x80) || ((size_t)skip > maxSkip)) 00834 { 00835 break; 00836 } 00837 } 00838 return skip; 00839 } 00840 } 00841 00852 inline static int UTF8Rewind (const utf8_char* str, size_t maxRew) 00853 { 00854 if (maxRew < 1) return 0; 00855 00856 const utf8_char* pos = str - 1; 00857 00858 if ((*pos & 0x80) == 0) 00859 { 00860 return 1; 00861 } 00862 00863 // Skip backward to the first byte of the sequence. 00864 int skip = 1; 00865 while (((*pos & 0xc0) == 0x80) && ((size_t)skip < maxRew)) 00866 { 00867 skip++; 00868 pos--; 00869 } 00870 00871 return skip; 00872 } 00873 00879 inline static int UTF16Skip (const utf16_char* str, size_t maxSkip) 00880 { 00881 if (CS_UC_IS_HIGH_SURROGATE (*str)) 00882 return (int)(MIN(maxSkip, 2)); 00883 else 00884 return (int)(MIN(maxSkip, 1)); 00885 } 00886 00892 inline static int UTF16Rewind (const utf16_char* str, size_t maxRew) 00893 { 00894 if (maxRew < 1) return 0; 00895 00896 const utf16_char* pos = str - 1; 00897 if (!CS_UC_IS_SURROGATE(*pos)) 00898 return 1; 00899 else 00900 { 00901 if ((maxRew > 1) && (CS_UC_IS_HIGH_SURROGATE(*(pos - 1)))) 00902 return 2; 00903 else 00904 return 1; 00905 } 00906 } 00907 00913 inline static int UTF32Skip (const utf32_char* str, size_t maxSkip) 00914 { 00915 return (int)(MIN(maxSkip, 1)); 00916 } 00917 00923 inline static int UTF32Rewind (const utf32_char* str, size_t maxRew) 00924 { 00925 if (maxRew < 1) return 0; 00926 return 1; 00927 } 00940 static size_t MapToUpper (const utf32_char ch, utf32_char* dest, 00941 size_t destSize); 00946 static size_t MapToLower (const utf32_char ch, utf32_char* dest, 00947 size_t destSize); 00953 static size_t MapToFold (const utf32_char ch, utf32_char* dest, 00954 size_t destSize); 00956 }; 00957 00960 #endif 00961
Generated for Crystal Space by doxygen 1.3.9.1