CrystalSpace

Public API Reference

Main Page | Modules | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members | Related Pages

csuctransform.h

Go to the documentation of this file.
00001 /*
00002     Copyright (C) 2003 by Frank Richter
00003 
00004     This library is free software; you can redistribute it and/or
00005     modify it under the terms of the GNU Library General Public
00006     License as published by the Free Software Foundation; either
00007     version 2 of the License, or (at your option) any later version.
00008 
00009     This library is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012     Library General Public License for more details.
00013 
00014     You should have received a copy of the GNU Library General Public
00015     License along with this library; if not, write to the Free
00016     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00017 */
00018 
00019 #ifndef __CS_CSUCTRANSFORM_H__
00020 #define __CS_CSUCTRANSFORM_H__
00021 
00022 #include "csunicode.h"
00023 
00031 
00032 #define CS_UC_MAX_UTF8_ENCODED          6
00033 
00034 #define CS_UC_MAX_UTF16_ENCODED         2
00035 
00036 #define CS_UC_MAX_UTF32_ENCODED         1
00037 
00041 #define CS_UC_MAX_MAPPED                3
00042  
00046 class csUnicodeTransform
00047 {
00048 public:
00049 #define FAIL(ret)                               \
00050   {                                             \
00051     if (isValid) *isValid = false;              \
00052     ch = CS_UC_CHAR_REPLACER;                   \
00053     return ret;                                 \
00054   }
00055 
00056 #define SUCCEED                                 \
00057     if (isValid) *isValid = true;               \
00058     return chUsed;
00059   
00060 #define GET_NEXT(next)  \
00061   if ((size_t)chUsed == strlen)                 \
00062   {                                             \
00063     FAIL(chUsed);                               \
00064   }                                             \
00065   next = *str++;                                \
00066   if (next == 0)                                \
00067   {                                             \
00068     FAIL(chUsed);                               \
00069   }                                             \
00070   chUsed++;                                     
00071   
00090   inline static int UTF8Decode (const utf8_char* str, size_t strlen, 
00091     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00092   {
00093     if (str == 0)
00094     {
00095       FAIL(0);
00096     }
00097     int chUsed = 0;
00098     
00099     utf8_char curCh;
00100     GET_NEXT(curCh);
00101     if ((curCh & 0x80) == 0)
00102     {
00103       // easy case
00104       ch = curCh;
00105       SUCCEED;
00106     }
00107     else
00108     {
00109       // Count with how many bytes this char is encoded.
00110       int n = 0;
00111       while ((n < 7) && ((curCh & (1 << (7 - n))) != 0)) { n++; }
00112 
00113       if ((n < 2) || (n > 6))
00114       {
00115         // Invalid code: first char of a "sequence" must have
00116         // at least two and at most six MSBs set
00117         FAIL(1);
00118       }
00119 
00120       ch = (curCh & ((1 << (8 - n)) - 1));
00121       
00122       for (int i = 1; i < n; i++)
00123       {
00124         GET_NEXT(curCh);
00125         if ((curCh & 0xc0) != 0x80)
00126         {
00127           FAIL(chUsed);
00128         }
00129         else
00130         {
00131           ch <<= 6;
00132           ch |= (curCh & 0x3f);
00133         }
00134       }
00135       
00136       // Check if in Unicode range.
00137       if (ch > CS_UC_LAST_CHAR)
00138       {
00139         FAIL(chUsed);
00140       }
00141 
00142       // Check for "overlong" codes.
00143       if ((ch < 0x80) && (n > 0))
00144       {
00145         FAIL(chUsed);
00146       }
00147       else if ((ch < 0x800) && (n > 2))
00148       {
00149         FAIL(chUsed);
00150       }
00151       else if ((ch < 0x10000) && (n > 3))
00152       {
00153         FAIL(chUsed);
00154       }
00155       else if ((ch < 0x200000) && (n > 4))
00156       {
00157         FAIL(chUsed);
00158       }
00159       /* 
00160       else if ((ch < 0x4000000) && (n > 5))
00161       {
00162         FAIL(chUsed);
00163       }
00164       else if ((ch < 0x80000000) && (n > 6))
00165       {
00166         FAIL(chUsed);
00167       }
00168       */
00169       
00170       if (!returnNonChar && (CS_UC_IS_NONCHARACTER(ch) 
00171         || CS_UC_IS_SURROGATE(ch)))
00172         FAIL(chUsed);
00173       SUCCEED;
00174     }
00175   }
00176   
00181   inline static int UTF16Decode (const utf16_char* str, size_t strlen, 
00182     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00183   {
00184     if (str == 0)
00185     {
00186       FAIL(0);
00187     }
00188     int chUsed = 0;
00189     
00190     utf16_char curCh;
00191     GET_NEXT(curCh);
00192     // Decode surrogate
00193     if (CS_UC_IS_SURROGATE (curCh))
00194     {
00195       // Invalid code
00196       if (!CS_UC_IS_HIGH_SURROGATE (curCh))
00197       {
00198         FAIL(chUsed);
00199       }
00200       ch = 0x10000 + ((curCh & 0x03ff) << 10);
00201       GET_NEXT(curCh);
00202       // Invalid code
00203       if (!CS_UC_IS_LOW_SURROGATE (curCh))
00204       {
00205         // Fail with 1 so the char is handled upon the next Decode.
00206         FAIL(1);
00207       }
00208       ch |= (curCh & 0x3ff);
00209     }
00210     else
00211     {
00212       ch = curCh;
00213     }
00214     if (!returnNonChar && (CS_UC_IS_NONCHARACTER(ch) 
00215       || CS_UC_IS_SURROGATE(ch)))
00216       FAIL(chUsed);
00217     SUCCEED;
00218   }
00219   
00224   inline static int UTF32Decode (const utf32_char* str, size_t strlen, 
00225     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00226   {
00227     if (str == 0)
00228     {
00229       FAIL(0);
00230     }
00231     int chUsed = 0;
00232     
00233     GET_NEXT(ch);
00234     if ((!returnNonChar && (CS_UC_IS_NONCHARACTER(ch) 
00235       || CS_UC_IS_SURROGATE(ch))) || (ch > CS_UC_LAST_CHAR))
00236       FAIL(chUsed);
00237     SUCCEED;
00238   }
00239 
00244   inline static int Decode (const utf8_char* str, size_t strlen, 
00245     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00246   {
00247     return UTF8Decode (str, strlen, ch, isValid, returnNonChar);
00248   }
00253   inline static int Decode (const utf16_char* str, size_t strlen, 
00254     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00255   {
00256     return UTF16Decode (str, strlen, ch, isValid, returnNonChar);
00257   }
00262   inline static int Decode (const utf32_char* str, size_t strlen, 
00263     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00264   {
00265     return UTF32Decode (str, strlen, ch, isValid, returnNonChar);
00266   }
00267 
00269 #undef FAIL
00270 #undef SUCCEED
00271 #undef GET_NEXT
00272 
00275 #define _OUTPUT_CHAR(buf, chr)                          \
00276   if (bufRemaining > 0)                                 \
00277   {                                                     \
00278     if(buf) *buf++ = chr;                               \
00279     bufRemaining--;                                     \
00280   }                                                     \
00281   encodedLen++;
00282 
00283 #define OUTPUT_CHAR(chr) _OUTPUT_CHAR(buf, chr)
00284   
00298   inline static int EncodeUTF8 (const utf32_char ch, utf8_char* buf, 
00299     size_t bufsize, bool allowNonchars = false)
00300   {
00301     if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch)) 
00302       || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR))
00303       return 0;
00304     size_t bufRemaining = bufsize;
00305     int encodedLen = 0;
00306     
00307     if (ch < 0x80)
00308     {
00309       OUTPUT_CHAR ((utf8_char)ch);
00310     }
00311     else if (ch < 0x800)
00312     {
00313       OUTPUT_CHAR ((utf8_char)(0xc0 | (ch >> 6)));
00314       OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00315     }
00316     else if (ch < 0x10000)
00317     {
00318       OUTPUT_CHAR ((utf8_char)(0xe0 | (ch >> 12)));
00319       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f)));
00320       OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00321     }
00322     else if (ch < 0x200000)
00323     {
00324       OUTPUT_CHAR ((utf8_char)(0xf0 | (ch >> 18)));
00325       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 12) & 0x3f)));
00326       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f)));
00327       OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00328     }
00329     /*
00330     else if (ch < 0x4000000)
00331     {
00332       OUTPUT_CHAR ((utf8_char)(0xf8 | (ch >> 24)));
00333       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 18) & 0x3f)));
00334       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 12) & 0x3f)));
00335       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f)));
00336       OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00337     }
00338     else if (ch < 0x80000000)
00339     {
00340       OUTPUT_CHAR ((utf8_char)(0xfc | (ch >> 30)));
00341       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 24) & 0x3f)));
00342       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 18) & 0x3f)));
00343       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 12) & 0x3f)));
00344       OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f)));
00345       OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00346     }
00347     */
00348     return encodedLen;
00349   }
00350     
00355   inline static int EncodeUTF16 (const utf32_char ch, utf16_char* buf, 
00356     size_t bufsize, bool allowNonchars = false)
00357   {
00358     if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch)) 
00359       || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR))
00360       return 0;
00361     size_t bufRemaining = bufsize;
00362     int encodedLen = 0;
00363     
00364     if (ch < 0x10000)
00365     {
00366       OUTPUT_CHAR((utf16_char)ch);
00367     }
00368     else if (ch < 0x100000)
00369     {
00370       utf32_char ch_shifted = ch - 0x10000;
00371       OUTPUT_CHAR((utf16_char)((ch_shifted >> 10) 
00372         | CS_UC_CHAR_HIGH_SURROGATE_FIRST));
00373       OUTPUT_CHAR((utf16_char)((ch_shifted & 0x3ff) 
00374         | CS_UC_CHAR_LOW_SURROGATE_FIRST));
00375     }
00376     else
00377       return 0;
00378     
00379     return encodedLen;
00380   }
00381 
00386   inline static int EncodeUTF32 (const utf32_char ch, utf32_char* buf, 
00387     size_t bufsize, bool allowNonchars = false)
00388   {
00389     if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch)) 
00390       || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR))
00391       return 0;
00392     size_t bufRemaining = bufsize;
00393     int encodedLen = 0;
00394     
00395     OUTPUT_CHAR(ch);
00396     
00397     return encodedLen;
00398   }
00399 
00404   inline static int Encode (const utf32_char ch, utf8_char* buf, 
00405     size_t bufsize, bool allowNonchars = false)
00406   {
00407     return EncodeUTF8 (ch, buf, bufsize, allowNonchars);
00408   }
00413   inline static int Encode (const utf32_char ch, utf16_char* buf, 
00414     size_t bufsize, bool allowNonchars = false)
00415   {
00416     return EncodeUTF16 (ch, buf, bufsize, allowNonchars);
00417   }
00422   inline static int Encode (const utf32_char ch, utf32_char* buf, 
00423     size_t bufsize, bool allowNonchars = false)
00424   {
00425     return EncodeUTF32 (ch, buf, bufsize, allowNonchars);
00426   }
00428 #undef OUTPUT_CHAR
00429   
00432 #define OUTPUT_CHAR(chr) _OUTPUT_CHAR(dest, chr)
00433   
00434 #define UCTF_CONVERTER(funcName, fromType, decoder, toType, encoder)    \
00435   inline static size_t funcName (toType* dest, size_t destSize,         \
00436     const fromType* source, size_t srcSize = (size_t)-1)                \
00437   {                                                                     \
00438     if ((srcSize == 0) || (source == 0))                                \
00439       return 0;                                                         \
00440                                                                         \
00441     size_t bufRemaining = (destSize > 0) ? destSize - 1 : 0;            \
00442     size_t encodedLen = 0;                                              \
00443                                                                         \
00444     size_t srcChars = srcSize;                                          \
00445                                                                         \
00446     if (srcSize == (size_t)-1)                                          \
00447     {                                                                   \
00448       srcChars = 0;                                                     \
00449       const fromType* sptr = source;                                    \
00450       while (*sptr++ != 0) srcChars++;                                  \
00451     }                                                                   \
00452                                                                         \
00453     while (srcChars > 0)                                                \
00454     {                                                                   \
00455       utf32_char ch;                                                    \
00456       int scnt = decoder (source, srcChars, ch, 0);                     \
00457       if (scnt == 0) break;                                             \
00458       int dcnt = encoder (ch, dest, bufRemaining);                      \
00459       if (dcnt == 0)                                                    \
00460       {                                                                 \
00461         dcnt = encoder (CS_UC_CHAR_REPLACER, dest, bufRemaining);       \
00462       }                                                                 \
00463                                                                         \
00464       if ((size_t)dcnt >= bufRemaining)                                 \
00465       {                                                                 \
00466         if (dest && (destSize > 0)) dest += bufRemaining;               \
00467         bufRemaining = 0;                                               \
00468       }                                                                 \
00469       else                                                              \
00470       {                                                                 \
00471         bufRemaining -= dcnt;                                           \
00472         if (dest && (destSize > 0)) dest += dcnt;                       \
00473       }                                                                 \
00474       encodedLen += dcnt;                                               \
00475       if ((size_t)scnt >= srcChars) break;                              \
00476       srcChars -= scnt;                                                 \
00477       source += scnt;                                                   \
00478     }                                                                   \
00479                                                                         \
00480     if (dest) *dest = 0;                                                \
00481                                                                         \
00482     return encodedLen + 1;                                              \
00483   }
00484 
00500   UCTF_CONVERTER (UTF8to16, utf8_char, UTF8Decode, utf16_char, EncodeUTF16);
00505   UCTF_CONVERTER (UTF8to32, utf8_char, UTF8Decode, utf32_char, EncodeUTF32);
00506 
00511   UCTF_CONVERTER (UTF16to8, utf16_char, UTF16Decode, utf8_char, EncodeUTF8);
00516   UCTF_CONVERTER (UTF16to32, utf16_char, UTF16Decode, utf32_char, EncodeUTF32);
00517   
00522   UCTF_CONVERTER (UTF32to8, utf32_char, UTF32Decode, utf8_char, EncodeUTF8);
00527   UCTF_CONVERTER (UTF32to16, utf32_char, UTF32Decode, utf16_char, EncodeUTF16);
00530 #undef UCTF_CONVERTER
00531 #undef OUTPUT_CHAR
00532 #undef _OUTPUT_CHAR
00533 
00534 #if (CS_WCHAR_T_SIZE == 1)
00535   inline static size_t UTF8toWC (wchar_t* dest, size_t destSize, 
00536     const utf8_char* source, size_t srcSize)
00537   {
00538     size_t srcChars = srcSize;                                          
00539     if (srcSize == (size_t)-1)                                          
00540     {                                                                   
00541       srcChars = 0;                                                     
00542       const utf8_char* sptr = source;                                   
00543       while (*sptr++ != 0) srcChars++;                                  
00544     }                           
00545     if ((dest != 0) && (destSize != 0))
00546     {
00547       size_t len = MIN (destSize - 1, srcChars);
00548       memcpy (dest, source, size * sizeof (wchar_t));
00549       *(dest + len) = 0;
00550     }
00551     return srcChars + 1;
00552   };
00553 
00554   inline static size_t UTF16toWC (wchar_t* dest, size_t destSize, 
00555     const utf16_char* source, size_t srcSize)
00556   {
00557     return UTF16to8 ((utf8_char*)dest, destSize, source, srcSize);
00558   };
00559 
00560   inline static size_t UTF32toWC (wchar_t* dest, size_t destSize, 
00561     const utf32_char* source, size_t srcSize)
00562   {
00563     return UTF32to8 ((utf8_char*)dest, destSize, source, srcSize);
00564   };
00565   
00566   inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize, 
00567     const wchar_t* source, size_t srcSize)
00568   {
00569     size_t srcChars = srcSize;                                          
00570     if (srcSize == (size_t)-1)                                          
00571     {                                                                   
00572       srcChars = 0;                                                     
00573       const wchar_t* sptr = source;                                     
00574       while (*sptr++ != 0) srcChars++;                                  
00575     }                           
00576     if ((dest != 0) && (destSize != 0))
00577     {
00578       size_t len = MIN (destSize - 1, srcChars);
00579       memcpy (dest, source, len * sizeof (wchar_t));
00580       *(dest + len) = 0;
00581     }
00582     return srcChars + 1;
00583   };
00584 
00585   inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize, 
00586     const wchar_t* source, size_t srcSize)
00587   {
00588     return UTF8to16 (dest, destSize, source, srcSize);
00589   };
00590 
00591   inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize, 
00592     const wchar_t* source, size_t srcSize)
00593   {
00594     return UTF8to32 (dest, destSize, source, srcSize);
00595   };
00596 
00597   inline static int Decode (const wchar_t* str, size_t strlen, 
00598     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00599   {
00600     return UTF8Decode ((utf8_char*)str, strlen, ch, isValid, returnNonChar);
00601   }
00602   inline static int Encode (const utf32_char ch, wchar_t* buf, 
00603     size_t bufsize, bool allowNonchars = false)
00604   {
00605     return EncodeUTF8 (ch, (utf8_char*)buf, bufsize, allowNonchars);
00606   }
00607 #elif (CS_WCHAR_T_SIZE == 2)
00608   // Methods below for doxygen documentation are here as the size '2' is 
00609   // default.
00610   
00617   inline static size_t UTF8toWC (wchar_t* dest, size_t destSize, 
00618     const utf8_char* source, size_t srcSize)
00619   {
00620     return UTF8to16 ((utf16_char*)dest, destSize, source, srcSize);
00621   };
00622 
00627   inline static size_t UTF16toWC (wchar_t* dest, size_t destSize, 
00628     const utf16_char* source, size_t srcSize)
00629   {
00630     size_t srcChars = srcSize;                                          
00631     if (srcSize == (size_t)-1)                                          
00632     {                                                                   
00633       srcChars = 0;                                                     
00634       const utf16_char* sptr = source;                                  
00635       while (*sptr++ != 0) srcChars++;                                  
00636     }                           
00637     if ((dest != 0) && (destSize != 0))
00638     {
00639       size_t len = MIN (destSize - 1, srcChars);
00640       memcpy (dest, source, len * sizeof (wchar_t));
00641       *(dest + len) = 0;
00642     }
00643     return srcChars + 1;
00644   };
00645 
00650   inline static size_t UTF32toWC (wchar_t* dest, size_t destSize, 
00651     const utf32_char* source, size_t srcSize)
00652   {
00653     return UTF32to16 ((utf16_char*)dest, destSize, source, srcSize);
00654   };
00655   
00660   inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize, 
00661     const wchar_t* source, size_t srcSize)
00662   {
00663     return UTF16to8 (dest, destSize, (utf16_char*)source, srcSize);
00664   };
00665 
00670   inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize, 
00671     const wchar_t* source, size_t srcSize)
00672   {
00673     size_t srcChars = srcSize;                                          
00674     if (srcSize == (size_t)-1)                                          
00675     {                                                                   
00676       srcChars = 0;                                                     
00677       const wchar_t* sptr = source;                                     
00678       while (*sptr++ != 0) srcChars++;                                  
00679     }                           
00680     if ((dest != 0) && (destSize != 0))
00681     {
00682       size_t len = MIN (destSize - 1, srcChars);
00683       memcpy (dest, source, len * sizeof (wchar_t));
00684       *(dest + len) = 0;
00685     }
00686     return srcChars + 1;
00687   };
00688 
00693   inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize, 
00694     const wchar_t* source, size_t srcSize)
00695   {
00696     return UTF16to32 (dest, destSize, (utf16_char*)source, srcSize);
00697   };
00698 
00699 #if !defined(CS_COMPILER_MSVC) || (_MSC_VER >= 1300)
00700   /* @@@ For VC6, utf16_char == wchar_t, complains below. (Can be avoided on 
00701    * VC7 with  "Builtin wchar_t") */
00706   inline static int Decode (const wchar_t* str, size_t strlen, 
00707     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00708   {
00709     return UTF16Decode ((utf16_char*)str, strlen, ch, isValid, returnNonChar);
00710   }
00715   inline static int Encode (const utf32_char ch, wchar_t* buf, 
00716     size_t bufsize, bool allowNonchars = false)
00717   {
00718     return EncodeUTF16 (ch, (utf16_char*)buf, bufsize, allowNonchars);
00719   }
00720 #endif
00721 
00722 #elif (CS_WCHAR_T_SIZE == 4)
00723   inline static size_t UTF8toWC (wchar_t* dest, size_t destSize, 
00724     const utf8_char* source, size_t srcSize)
00725   {
00726     return UTF8to32 ((utf32_char*)dest, destSize, source, srcSize);
00727   };
00728 
00729   inline static size_t UTF16toWC (wchar_t* dest, size_t destSize, 
00730     const utf16_char* source, size_t srcSize)
00731   {
00732     return UTF16to32 ((utf32_char*)dest, destSize, source, srcSize);
00733   };
00734 
00735   inline static size_t UTF32toWC (wchar_t* dest, size_t destSize, 
00736     const utf32_char* source,  size_t srcSize)
00737   {
00738     size_t srcChars = srcSize;                                          
00739     if (srcSize == (size_t)-1)                                          
00740     {                                                                   
00741       srcChars = 0;                                                     
00742       const utf32_char* sptr = source;                                  
00743       while (*sptr++ != 0) srcChars++;                                  
00744     }                           
00745     if ((dest != 0) && (destSize != 0))
00746     {
00747       size_t len = MIN (destSize - 1, srcChars);
00748       memcpy (dest, source, len * sizeof (wchar_t));
00749       *(dest + len) = 0;
00750     }
00751     return srcChars + 1;
00752   };
00753   
00754   inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize, 
00755     const wchar_t* source, size_t srcSize)
00756   {
00757     return UTF32to8 (dest, destSize, (utf32_char*)source, srcSize);
00758   };
00759 
00760   inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize, 
00761     const wchar_t* source, size_t srcSize)
00762   {
00763     return UTF32to16 (dest, destSize, (utf32_char*)source, srcSize);
00764   };
00765 
00766   inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize, 
00767     const wchar_t* source, size_t srcSize)
00768   {
00769     size_t srcChars = srcSize;                                          
00770     if (srcSize == (size_t)-1)                                          
00771     {                                                                   
00772       srcChars = 0;                                                     
00773       const wchar_t* sptr = source;                                     
00774       while (*sptr++ != 0) srcChars++;                                  
00775     }                           
00776     if ((dest != 0) && (destSize != 0))
00777     {
00778       size_t len = MIN (destSize - 1, srcChars);
00779       memcpy (dest, source, len * sizeof (wchar_t));
00780       *(dest + len) = 0;
00781     }
00782     return srcChars + 1;
00783   };
00784 
00785   inline static int Decode (const wchar_t* str, size_t strlen, 
00786     utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00787   {
00788     return UTF32Decode ((utf32_char*)str, strlen, ch, isValid, returnNonChar);
00789   }
00790   inline static int Encode (const utf32_char ch, wchar_t* buf, 
00791     size_t bufsize, bool allowNonchars = false)
00792   {
00793     return EncodeUTF32 (ch, (utf32_char*)buf, bufsize, allowNonchars);
00794   }
00795 #else
00796   #error Odd-sized, unsupported wchar_t!
00797 #endif
00798 
00811   inline static int UTF8Skip (const utf8_char* str, size_t maxSkip)
00812   {
00813     if (maxSkip < 1) return 0;
00814   
00815     if ((*str & 0x80) == 0)
00816     {
00817       return 1;
00818     }
00819     else
00820     {
00821       int n = 0;
00822       while ((n < 7) && ((*str & (1 << (7 - n))) != 0)) { n++; }
00823 
00824       if ((n < 2) || (n > 6))
00825       {
00826         return 1;
00827       }
00828 
00829       int skip = 1;
00830       
00831       for (; skip < n; skip++)
00832       {
00833         if (((str[skip] & 0xc0) != 0x80) || ((size_t)skip > maxSkip))
00834         {
00835           break;
00836         }
00837       }
00838       return skip;
00839     }
00840   }
00841   
00852   inline static int UTF8Rewind (const utf8_char* str, size_t maxRew)
00853   {
00854     if (maxRew < 1) return 0;
00855     
00856     const utf8_char* pos = str - 1;
00857     
00858     if ((*pos & 0x80) == 0)
00859     {
00860       return 1;
00861     }
00862     
00863     // Skip backward to the first byte of the sequence.
00864     int skip = 1;
00865     while (((*pos & 0xc0) == 0x80) && ((size_t)skip < maxRew))
00866     {
00867       skip++;
00868       pos--;
00869     }
00870     
00871     return skip;
00872   }
00873   
00879   inline static int UTF16Skip (const utf16_char* str, size_t maxSkip)
00880   {
00881     if (CS_UC_IS_HIGH_SURROGATE (*str))
00882       return (int)(MIN(maxSkip, 2));
00883     else
00884       return (int)(MIN(maxSkip, 1));
00885   }
00886   
00892   inline static int UTF16Rewind (const utf16_char* str, size_t maxRew)
00893   {
00894     if (maxRew < 1) return 0;
00895     
00896     const utf16_char* pos = str - 1;
00897     if (!CS_UC_IS_SURROGATE(*pos)) 
00898       return 1;
00899     else
00900     {
00901       if ((maxRew > 1) && (CS_UC_IS_HIGH_SURROGATE(*(pos - 1))))
00902         return 2;
00903       else
00904         return 1;
00905     }
00906   }
00907   
00913   inline static int UTF32Skip (const utf32_char* str, size_t maxSkip)
00914   {
00915     return (int)(MIN(maxSkip, 1));
00916   }
00917 
00923   inline static int UTF32Rewind (const utf32_char* str, size_t maxRew)
00924   {
00925     if (maxRew < 1) return 0;
00926     return 1;
00927   }
00940   static size_t MapToUpper (const utf32_char ch, utf32_char* dest, 
00941     size_t destSize);
00946   static size_t MapToLower (const utf32_char ch, utf32_char* dest, 
00947     size_t destSize);
00953   static size_t MapToFold (const utf32_char ch, utf32_char* dest, 
00954     size_t destSize);
00956 };
00957 
00960 #endif
00961 

Generated for Crystal Space by doxygen 1.3.9.1