00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #ifndef __MYGUI_U_STRING_H__
00028 #define __MYGUI_U_STRING_H__
00029
00030 #include "MyGUI_Prerequest.h"
00031 #include "MyGUI_Types.h"
00032 #include "MyGUI_Diagnostic.h"
00033 #include "MyGUI_LogManager.h"
00034
00035
00036 #include <iterator>
00037 #include <string>
00038 #include <stdexcept>
00039
00040
00041 #if MYGUI_COMPILER == MYGUI_COMPILER_GNUC
00042 #pragma GCC system_header
00043 #endif
00044
00045
00046
00047
00048
00049
00050
00051
00052 #if MYGUI_COMPILER == MYGUI_COMPILER_MSVC && (1300 <= MYGUI_COMP_VER && MYGUI_COMP_VER <= 1310)
00053
00054 # if defined(_DLL_CPPLIB)
00055
00056 namespace std
00057 {
00058 template class _CRTIMP2 basic_string<unsigned short, char_traits<unsigned short>,
00059 allocator<unsigned short> >;
00060
00061 template class _CRTIMP2 basic_string<__wchar_t, char_traits<__wchar_t>,
00062 allocator<__wchar_t> >;
00063 }
00064
00065 # endif // defined(_DLL_CPPLIB)
00066
00067 #endif // MYGUI_COMPILER == MYGUI_COMPILER_MSVC && MYGUI_COMP_VER == 1300
00068
00069
00070 namespace MyGUI
00071 {
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106 #ifdef __STDC_ISO_10646__
00107
00108
00109 #else // #ifdef __STDC_ISO_10646__
00110 #if defined( __WIN32__ ) || defined( _WIN32 )
00111 #define WCHAR_UTF16 // All currently known Windows platforms utilize UTF-16 encoding in wchar_t
00112 #else // #if defined( __WIN32__ ) || defined( _WIN32 )
00113 #if WCHAR_MAX <= 0xFFFF // this is a last resort fall back test; WCHAR_MAX is defined in <wchar.h>
00114 #define WCHAR_UTF16 // best we can tell, wchar_t is not larger than 16-bit
00115 #endif // #if WCHAR_MAX <= 0xFFFF
00116 #endif // #if defined( __WIN32__ ) || defined( _WIN32 )
00117 #endif // #ifdef __STDC_ISO_10646__
00118
00119
00120
00121
00122 #if MYGUI_COMPILER == MYGUI_COMPILER_MSVC
00123
00124
00125
00126
00127
00128 # if defined(_NATIVE_WCHAR_T_DEFINED)
00129 # define MYGUI_IS_NATIVE_WCHAR_T 1
00130 # else
00131 # define MYGUI_IS_NATIVE_WCHAR_T 0
00132 # endif
00133
00134 #else // MYGUI_COMPILER != MYGUI_COMPILER_MSVC
00135
00136
00137 # define MYGUI_IS_NATIVE_WCHAR_T 1
00138
00139 #endif // MYGUI_COMPILER == MYGUI_COMPILER_MSVC
00140
00142
00167 class UString
00168 {
00169
00170 static const unsigned char _lead1 = 0xC0;
00171 static const unsigned char _lead1_mask = 0x1F;
00172 static const unsigned char _lead2 = 0xE0;
00173 static const unsigned char _lead2_mask = 0x0F;
00174 static const unsigned char _lead3 = 0xF0;
00175 static const unsigned char _lead3_mask = 0x07;
00176 static const unsigned char _lead4 = 0xF8;
00177 static const unsigned char _lead4_mask = 0x03;
00178 static const unsigned char _lead5 = 0xFC;
00179 static const unsigned char _lead5_mask = 0x01;
00180 static const unsigned char _cont = 0x80;
00181 static const unsigned char _cont_mask = 0x3F;
00182
00183 public:
00185 typedef size_t size_type;
00187 static const size_type npos = ~(size_t)0;
00188
00190 typedef uint32 unicode_char;
00191
00193 typedef uint16 code_point;
00194
00196 typedef code_point value_type;
00197
00198 typedef std::basic_string<code_point> dstring;
00199
00201 typedef std::basic_string<unicode_char> utf32string;
00202
00204 class invalid_data: public std::runtime_error
00205 {
00206 public:
00208 explicit invalid_data( const std::string& _Message ): std::runtime_error( _Message )
00209 {
00210
00211 }
00212 };
00213
00214
00216 class _base_iterator: public std::iterator<std::random_access_iterator_tag, value_type>
00217 {
00218 friend class UString;
00219 protected:
00220 _base_iterator()
00221 {
00222 mString = 0;
00223 }
00224
00225 void _seekFwd( size_type c )
00226 {
00227 mIter += c;
00228 }
00229 void _seekRev( size_type c )
00230 {
00231 mIter -= c;
00232 }
00233 void _become( const _base_iterator& i )
00234 {
00235 mIter = i.mIter;
00236 mString = i.mString;
00237 }
00238 bool _test_begin() const
00239 {
00240 return mIter == mString->mData.begin();
00241 }
00242 bool _test_end() const
00243 {
00244 return mIter == mString->mData.end();
00245 }
00246 size_type _get_index() const
00247 {
00248 return mIter - mString->mData.begin();
00249 }
00250 void _jump_to( size_type index )
00251 {
00252 mIter = mString->mData.begin() + index;
00253 }
00254
00255 unicode_char _getCharacter() const
00256 {
00257 size_type current_index = _get_index();
00258 return mString->getChar( current_index );
00259 }
00260 int _setCharacter( unicode_char uc )
00261 {
00262 size_type current_index = _get_index();
00263 int change = mString->setChar( current_index, uc );
00264 _jump_to( current_index );
00265 return change;
00266 }
00267
00268 void _moveNext()
00269 {
00270 _seekFwd( 1 );
00271 if ( _test_end() ) return;
00272 if ( _utf16_surrogate_follow( mIter[0] ) )
00273 {
00274
00275
00276 code_point lead_half = 0;
00277
00278 lead_half = mIter[-1];
00279 if ( _utf16_surrogate_lead( lead_half ) )
00280 {
00281 _seekFwd( 1 );
00282 }
00283 }
00284 }
00285 void _movePrev()
00286 {
00287 _seekRev( 1 );
00288 if ( _test_begin() ) return;
00289 if ( _utf16_surrogate_follow( mIter[0] ) )
00290 {
00291
00292
00293 code_point lead_half = 0;
00294 lead_half = mIter[-1];
00295 if ( _utf16_surrogate_lead( lead_half ) )
00296 {
00297 _seekRev( 1 );
00298 }
00299 }
00300 }
00301
00302 dstring::iterator mIter;
00303 UString* mString;
00304 };
00305
00306
00307
00308
00309 class _const_fwd_iterator;
00310
00312 class _fwd_iterator: public _base_iterator
00313 {
00314 friend class _const_fwd_iterator;
00315 public:
00316 _fwd_iterator() { }
00317 _fwd_iterator( const _fwd_iterator& i )
00318 {
00319 _become( i );
00320 }
00321
00323 _fwd_iterator& operator++()
00324 {
00325 _seekFwd( 1 );
00326 return *this;
00327 }
00329 _fwd_iterator operator++( int )
00330 {
00331 _fwd_iterator tmp( *this );
00332 _seekFwd( 1 );
00333 return tmp;
00334 }
00335
00337 _fwd_iterator& operator--()
00338 {
00339 _seekRev( 1 );
00340 return *this;
00341 }
00343 _fwd_iterator operator--( int )
00344 {
00345 _fwd_iterator tmp( *this );
00346 _seekRev( 1 );
00347 return tmp;
00348 }
00349
00351 _fwd_iterator operator+( size_type n )
00352 {
00353 _fwd_iterator tmp( *this );
00354 tmp._seekFwd( n );
00355 return tmp;
00356 }
00358 _fwd_iterator operator+( difference_type n )
00359 {
00360 _fwd_iterator tmp( *this );
00361 if ( n < 0 )
00362 tmp._seekRev( -n );
00363 else
00364 tmp._seekFwd( n );
00365 return tmp;
00366 }
00368 _fwd_iterator operator-( size_type n )
00369 {
00370 _fwd_iterator tmp( *this );
00371 tmp._seekRev( n );
00372 return tmp;
00373 }
00375 _fwd_iterator operator-( difference_type n )
00376 {
00377 _fwd_iterator tmp( *this );
00378 if ( n < 0 )
00379 tmp._seekFwd( -n );
00380 else
00381 tmp._seekRev( n );
00382 return tmp;
00383 }
00384
00386 _fwd_iterator& operator+=( size_type n )
00387 {
00388 _seekFwd( n );
00389 return *this;
00390 }
00392 _fwd_iterator& operator+=( difference_type n )
00393 {
00394 if ( n < 0 )
00395 _seekRev( -n );
00396 else
00397 _seekFwd( n );
00398 return *this;
00399 }
00401 _fwd_iterator& operator-=( size_type n )
00402 {
00403 _seekRev( n );
00404 return *this;
00405 }
00407 _fwd_iterator& operator-=( difference_type n )
00408 {
00409 if ( n < 0 )
00410 _seekFwd( -n );
00411 else
00412 _seekRev( n );
00413 return *this;
00414 }
00415
00417 value_type& operator*() const
00418 {
00419 return *mIter;
00420 }
00421
00423 value_type& operator[]( size_type n ) const
00424 {
00425 _fwd_iterator tmp( *this );
00426 tmp += n;
00427 return *tmp;
00428 }
00430 value_type& operator[]( difference_type n ) const
00431 {
00432 _fwd_iterator tmp( *this );
00433 tmp += n;
00434 return *tmp;
00435 }
00436
00438 _fwd_iterator& moveNext()
00439 {
00440 _moveNext();
00441 return *this;
00442 }
00444 _fwd_iterator& movePrev()
00445 {
00446 _movePrev();
00447 return *this;
00448 }
00450 unicode_char getCharacter() const
00451 {
00452 return _getCharacter();
00453 }
00455 int setCharacter( unicode_char uc )
00456 {
00457 return _setCharacter( uc );
00458 }
00459 };
00460
00461
00462
00464 class _const_fwd_iterator: public _base_iterator
00465 {
00466 public:
00467 _const_fwd_iterator() { }
00468 _const_fwd_iterator( const _const_fwd_iterator& i )
00469 {
00470 _become( i );
00471 }
00472 _const_fwd_iterator( const _fwd_iterator& i )
00473 {
00474 _become( i );
00475 }
00476
00478 _const_fwd_iterator& operator++()
00479 {
00480 _seekFwd( 1 );
00481 return *this;
00482 }
00484 _const_fwd_iterator operator++( int )
00485 {
00486 _const_fwd_iterator tmp( *this );
00487 _seekFwd( 1 );
00488 return tmp;
00489 }
00490
00492 _const_fwd_iterator& operator--()
00493 {
00494 _seekRev( 1 );
00495 return *this;
00496 }
00498 _const_fwd_iterator operator--( int )
00499 {
00500 _const_fwd_iterator tmp( *this );
00501 _seekRev( 1 );
00502 return tmp;
00503 }
00504
00506 _const_fwd_iterator operator+( size_type n )
00507 {
00508 _const_fwd_iterator tmp( *this );
00509 tmp._seekFwd( n );
00510 return tmp;
00511 }
00513 _const_fwd_iterator operator+( difference_type n )
00514 {
00515 _const_fwd_iterator tmp( *this );
00516 if ( n < 0 )
00517 tmp._seekRev( -n );
00518 else
00519 tmp._seekFwd( n );
00520 return tmp;
00521 }
00523 _const_fwd_iterator operator-( size_type n )
00524 {
00525 _const_fwd_iterator tmp( *this );
00526 tmp._seekRev( n );
00527 return tmp;
00528 }
00530 _const_fwd_iterator operator-( difference_type n )
00531 {
00532 _const_fwd_iterator tmp( *this );
00533 if ( n < 0 )
00534 tmp._seekFwd( -n );
00535 else
00536 tmp._seekRev( n );
00537 return tmp;
00538 }
00539
00541 _const_fwd_iterator& operator+=( size_type n )
00542 {
00543 _seekFwd( n );
00544 return *this;
00545 }
00547 _const_fwd_iterator& operator+=( difference_type n )
00548 {
00549 if ( n < 0 )
00550 _seekRev( -n );
00551 else
00552 _seekFwd( n );
00553 return *this;
00554 }
00556 _const_fwd_iterator& operator-=( size_type n )
00557 {
00558 _seekRev( n );
00559 return *this;
00560 }
00562 _const_fwd_iterator& operator-=( difference_type n )
00563 {
00564 if ( n < 0 )
00565 _seekFwd( -n );
00566 else
00567 _seekRev( n );
00568 return *this;
00569 }
00570
00572 const value_type& operator*() const
00573 {
00574 return *mIter;
00575 }
00576
00578 const value_type& operator[]( size_type n ) const
00579 {
00580 _const_fwd_iterator tmp( *this );
00581 tmp += n;
00582 return *tmp;
00583 }
00585 const value_type& operator[]( difference_type n ) const
00586 {
00587 _const_fwd_iterator tmp( *this );
00588 tmp += n;
00589 return *tmp;
00590 }
00591
00593 _const_fwd_iterator& moveNext()
00594 {
00595 _moveNext();
00596 return *this;
00597 }
00599 _const_fwd_iterator& movePrev()
00600 {
00601 _movePrev();
00602 return *this;
00603 }
00605 unicode_char getCharacter() const
00606 {
00607 return _getCharacter();
00608 }
00609
00611 friend size_type operator-( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00613 friend bool operator==( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00615 friend bool operator!=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00617 friend bool operator<( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00619 friend bool operator<=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00621 friend bool operator>( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00623 friend bool operator>=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00624
00625 };
00626
00627
00628
00629
00630 class _const_rev_iterator;
00632 class _rev_iterator: public _base_iterator
00633 {
00634 friend class _const_rev_iterator;
00635 public:
00636 _rev_iterator() { }
00637 _rev_iterator( const _rev_iterator& i )
00638 {
00639 _become( i );
00640 }
00641
00643 _rev_iterator& operator++()
00644 {
00645 _seekRev( 1 );
00646 return *this;
00647 }
00649 _rev_iterator operator++( int )
00650 {
00651 _rev_iterator tmp( *this );
00652 _seekRev( 1 );
00653 return tmp;
00654 }
00655
00657 _rev_iterator& operator--()
00658 {
00659 _seekFwd( 1 );
00660 return *this;
00661 }
00663 _rev_iterator operator--( int )
00664 {
00665 _rev_iterator tmp( *this );
00666 _seekFwd( 1 );
00667 return tmp;
00668 }
00669
00671 _rev_iterator operator+( size_type n )
00672 {
00673 _rev_iterator tmp( *this );
00674 tmp._seekRev( n );
00675 return tmp;
00676 }
00678 _rev_iterator operator+( difference_type n )
00679 {
00680 _rev_iterator tmp( *this );
00681 if ( n < 0 )
00682 tmp._seekFwd( -n );
00683 else
00684 tmp._seekRev( n );
00685 return tmp;
00686 }
00688 _rev_iterator operator-( size_type n )
00689 {
00690 _rev_iterator tmp( *this );
00691 tmp._seekFwd( n );
00692 return tmp;
00693 }
00695 _rev_iterator operator-( difference_type n )
00696 {
00697 _rev_iterator tmp( *this );
00698 if ( n < 0 )
00699 tmp._seekRev( -n );
00700 else
00701 tmp._seekFwd( n );
00702 return tmp;
00703 }
00704
00706 _rev_iterator& operator+=( size_type n )
00707 {
00708 _seekRev( n );
00709 return *this;
00710 }
00712 _rev_iterator& operator+=( difference_type n )
00713 {
00714 if ( n < 0 )
00715 _seekFwd( -n );
00716 else
00717 _seekRev( n );
00718 return *this;
00719 }
00721 _rev_iterator& operator-=( size_type n )
00722 {
00723 _seekFwd( n );
00724 return *this;
00725 }
00727 _rev_iterator& operator-=( difference_type n )
00728 {
00729 if ( n < 0 )
00730 _seekRev( -n );
00731 else
00732 _seekFwd( n );
00733 return *this;
00734 }
00735
00737 value_type& operator*() const
00738 {
00739 return mIter[-1];
00740 }
00741
00743 value_type& operator[]( size_type n ) const
00744 {
00745 _rev_iterator tmp( *this );
00746 tmp -= n;
00747 return *tmp;
00748 }
00750 value_type& operator[]( difference_type n ) const
00751 {
00752 _rev_iterator tmp( *this );
00753 tmp -= n;
00754 return *tmp;
00755 }
00756 };
00757
00759 class _const_rev_iterator: public _base_iterator
00760 {
00761 public:
00762 _const_rev_iterator() { }
00763 _const_rev_iterator( const _const_rev_iterator& i )
00764 {
00765 _become( i );
00766 }
00767 _const_rev_iterator( const _rev_iterator& i )
00768 {
00769 _become( i );
00770 }
00772 _const_rev_iterator& operator++()
00773 {
00774 _seekRev( 1 );
00775 return *this;
00776 }
00778 _const_rev_iterator operator++( int )
00779 {
00780 _const_rev_iterator tmp( *this );
00781 _seekRev( 1 );
00782 return tmp;
00783 }
00784
00786 _const_rev_iterator& operator--()
00787 {
00788 _seekFwd( 1 );
00789 return *this;
00790 }
00792 _const_rev_iterator operator--( int )
00793 {
00794 _const_rev_iterator tmp( *this );
00795 _seekFwd( 1 );
00796 return tmp;
00797 }
00798
00800 _const_rev_iterator operator+( size_type n )
00801 {
00802 _const_rev_iterator tmp( *this );
00803 tmp._seekRev( n );
00804 return tmp;
00805 }
00807 _const_rev_iterator operator+( difference_type n )
00808 {
00809 _const_rev_iterator tmp( *this );
00810 if ( n < 0 )
00811 tmp._seekFwd( -n );
00812 else
00813 tmp._seekRev( n );
00814 return tmp;
00815 }
00817 _const_rev_iterator operator-( size_type n )
00818 {
00819 _const_rev_iterator tmp( *this );
00820 tmp._seekFwd( n );
00821 return tmp;
00822 }
00824 _const_rev_iterator operator-( difference_type n )
00825 {
00826 _const_rev_iterator tmp( *this );
00827 if ( n < 0 )
00828 tmp._seekRev( -n );
00829 else
00830 tmp._seekFwd( n );
00831 return tmp;
00832 }
00833
00835 _const_rev_iterator& operator+=( size_type n )
00836 {
00837 _seekRev( n );
00838 return *this;
00839 }
00841 _const_rev_iterator& operator+=( difference_type n )
00842 {
00843 if ( n < 0 )
00844 _seekFwd( -n );
00845 else
00846 _seekRev( n );
00847 return *this;
00848 }
00850 _const_rev_iterator& operator-=( size_type n )
00851 {
00852 _seekFwd( n );
00853 return *this;
00854 }
00856 _const_rev_iterator& operator-=( difference_type n )
00857 {
00858 if ( n < 0 )
00859 _seekRev( -n );
00860 else
00861 _seekFwd( n );
00862 return *this;
00863 }
00864
00866 const value_type& operator*() const
00867 {
00868 return mIter[-1];
00869 }
00870
00872 const value_type& operator[]( size_type n ) const
00873 {
00874 _const_rev_iterator tmp( *this );
00875 tmp -= n;
00876 return *tmp;
00877 }
00879 const value_type& operator[]( difference_type n ) const
00880 {
00881 _const_rev_iterator tmp( *this );
00882 tmp -= n;
00883 return *tmp;
00884 }
00885
00887 friend size_type operator-( const _const_rev_iterator& left, const _const_rev_iterator& right );
00889 friend bool operator==( const _const_rev_iterator& left, const _const_rev_iterator& right );
00891 friend bool operator!=( const _const_rev_iterator& left, const _const_rev_iterator& right );
00893 friend bool operator<( const _const_rev_iterator& left, const _const_rev_iterator& right );
00895 friend bool operator<=( const _const_rev_iterator& left, const _const_rev_iterator& right );
00897 friend bool operator>( const _const_rev_iterator& left, const _const_rev_iterator& right );
00899 friend bool operator>=( const _const_rev_iterator& left, const _const_rev_iterator& right );
00900 };
00901
00902
00903 typedef _fwd_iterator iterator;
00904 typedef _rev_iterator reverse_iterator;
00905 typedef _const_fwd_iterator const_iterator;
00906 typedef _const_rev_iterator const_reverse_iterator;
00907
00908
00910
00911
00912 UString()
00913 {
00914 _init();
00915 }
00917 UString( const UString& copy )
00918 {
00919 _init();
00920 mData = copy.mData;
00921 }
00923 UString( size_type length, const code_point& ch )
00924 {
00925 _init();
00926 assign( length, ch );
00927 }
00929 UString( const code_point* str )
00930 {
00931 _init();
00932 assign( str );
00933 }
00935 UString( const code_point* str, size_type length )
00936 {
00937 _init();
00938 assign( str, length );
00939 }
00941 UString( const UString& str, size_type index, size_type length )
00942 {
00943 _init();
00944 assign( str, index, length );
00945 }
00946 #if MYGUI_IS_NATIVE_WCHAR_T
00947
00948 UString( const wchar_t* w_str )
00949 {
00950 _init();
00951 assign( w_str );
00952 }
00954 UString( const wchar_t* w_str, size_type length )
00955 {
00956 _init();
00957 assign( w_str, length );
00958 }
00959 #endif
00960
00961 UString( const std::wstring& wstr )
00962 {
00963 _init();
00964 assign( wstr );
00965 }
00967 UString( const char* c_str )
00968 {
00969 _init();
00970 assign( c_str );
00971 }
00973 UString( const char* c_str, size_type length )
00974 {
00975 _init();
00976 assign( c_str, length );
00977 }
00979 UString( const std::string& str )
00980 {
00981 _init();
00982 assign( str );
00983 }
00985 ~UString()
00986 {
00987 _cleanBuffer();
00988 }
00990
00992
00994
00995
00996 size_type size() const
00997 {
00998 return mData.size();
00999 }
01001 size_type length() const
01002 {
01003 return size();
01004 }
01006
01007 size_type length_Characters() const
01008 {
01009 const_iterator i = begin(), ie = end();
01010 size_type c = 0;
01011 while ( i != ie )
01012 {
01013 i.moveNext();
01014 ++c;
01015 }
01016 return c;
01017 }
01019 size_type max_size() const
01020 {
01021 return mData.max_size();
01022 }
01024 void reserve( size_type size )
01025 {
01026 mData.reserve( size );
01027 }
01029 void resize( size_type num, const code_point& val = 0 )
01030 {
01031 mData.resize( num, val );
01032 }
01034 void swap( UString& from )
01035 {
01036 mData.swap( from.mData );
01037 }
01039 bool empty() const
01040 {
01041 return mData.empty();
01042 }
01044 const code_point* c_str() const
01045 {
01046 return mData.c_str();
01047 }
01049 const code_point* data() const
01050 {
01051 return c_str();
01052 }
01054 size_type capacity() const
01055 {
01056 return mData.capacity();
01057 }
01059 void clear()
01060 {
01061 mData.clear();
01062 }
01064
01065 UString substr( size_type index, size_type num = npos ) const
01066 {
01067
01068 dstring data = mData.substr( index, num );
01069 UString tmp;
01070 tmp.mData.swap( data );
01071 return tmp;
01072 }
01074 void push_back( unicode_char val )
01075 {
01076 code_point cp[2];
01077 size_t c = _utf32_to_utf16( val, cp );
01078 if ( c > 0 ) push_back( cp[0] );
01079 if ( c > 1 ) push_back( cp[1] );
01080 }
01081 #if MYGUI_IS_NATIVE_WCHAR_T
01082
01083 void push_back( wchar_t val )
01084 {
01085
01086 mData.push_back( static_cast<unicode_char>( val ) );
01087 }
01088 #endif
01089
01090
01092 void push_back( code_point val )
01093 {
01094 mData.push_back( val );
01095 }
01097
01098 void push_back( char val )
01099 {
01100 mData.push_back( static_cast<code_point>( val ) );
01101 }
01103 bool inString( unicode_char ch ) const
01104 {
01105 const_iterator i, ie = end();
01106 for ( i = begin(); i != ie; i.moveNext() )
01107 {
01108 if ( i.getCharacter() == ch )
01109 return true;
01110 }
01111 return false;
01112 }
01114
01116
01118
01119
01120 const std::string& asUTF8() const
01121 {
01122 _load_buffer_UTF8();
01123 return *m_buffer.mStrBuffer;
01124 }
01126 const char* asUTF8_c_str() const
01127 {
01128 _load_buffer_UTF8();
01129 return m_buffer.mStrBuffer->c_str();
01130 }
01132 const utf32string& asUTF32() const
01133 {
01134 _load_buffer_UTF32();
01135 return *m_buffer.mUTF32StrBuffer;
01136 }
01138 const unicode_char* asUTF32_c_str() const
01139 {
01140 _load_buffer_UTF32();
01141 return m_buffer.mUTF32StrBuffer->c_str();
01142 }
01144 const std::wstring& asWStr() const
01145 {
01146 _load_buffer_WStr();
01147 return *m_buffer.mWStrBuffer;
01148 }
01150 const wchar_t* asWStr_c_str() const
01151 {
01152 _load_buffer_WStr();
01153 return m_buffer.mWStrBuffer->c_str();
01154 }
01156
01158
01160
01161
01162 code_point& at( size_type loc )
01163 {
01164 return mData.at( loc );
01165 }
01167 const code_point& at( size_type loc ) const
01168 {
01169 return mData.at( loc );
01170 }
01172
01176 unicode_char getChar( size_type loc ) const
01177 {
01178 const code_point* ptr = c_str();
01179 unicode_char uc;
01180 size_t len = _utf16_char_length( ptr[loc] );
01181 code_point cp[2] = { 0, 0 };
01182 cp[0] = ptr[loc];
01183
01184 if ( len == 2 && ( loc + 1 ) < mData.length() )
01185 {
01186 cp[1] = ptr[loc+1];
01187 }
01188 _utf16_to_utf32( cp, uc );
01189 return uc;
01190 }
01192
01200 int setChar( size_type loc, unicode_char ch )
01201 {
01202 code_point cp[2] = { 0, 0 };
01203 size_t lc = _utf32_to_utf16( ch, cp );
01204 unicode_char existingChar = getChar( loc );
01205 size_t existingSize = _utf16_char_length( existingChar );
01206 size_t newSize = _utf16_char_length( ch );
01207
01208 if ( newSize > existingSize )
01209 {
01210 at( loc ) = cp[0];
01211 insert( loc + 1, 1, cp[1] );
01212 return 1;
01213 }
01214 if ( newSize < existingSize )
01215 {
01216 erase( loc, 1 );
01217 at( loc ) = cp[0];
01218 return -1;
01219 }
01220
01221
01222 at( loc ) = cp[0];
01223 if ( lc == 2 ) at( loc + 1 ) = cp[1];
01224 return 0;
01225 }
01227
01229
01231
01232
01233 iterator begin()
01234 {
01235 iterator i;
01236 i.mIter = mData.begin();
01237 i.mString = this;
01238 return i;
01239 }
01241 const_iterator begin() const
01242 {
01243 const_iterator i;
01244 i.mIter = const_cast<UString*>( this )->mData.begin();
01245 i.mString = const_cast<UString*>( this );
01246 return i;
01247 }
01249 iterator end()
01250 {
01251 iterator i;
01252 i.mIter = mData.end();
01253 i.mString = this;
01254 return i;
01255 }
01257 const_iterator end() const
01258 {
01259 const_iterator i;
01260 i.mIter = const_cast<UString*>( this )->mData.end();
01261 i.mString = const_cast<UString*>( this );
01262 return i;
01263 }
01265 reverse_iterator rbegin()
01266 {
01267 reverse_iterator i;
01268 i.mIter = mData.end();
01269 i.mString = this;
01270 return i;
01271 }
01273 const_reverse_iterator rbegin() const
01274 {
01275 const_reverse_iterator i;
01276 i.mIter = const_cast<UString*>( this )->mData.end();
01277 i.mString = const_cast<UString*>( this );
01278 return i;
01279 }
01281 reverse_iterator rend()
01282 {
01283 reverse_iterator i;
01284 i.mIter = mData.begin();
01285 i.mString = this;
01286 return i;
01287 }
01289 const_reverse_iterator rend() const
01290 {
01291 const_reverse_iterator i;
01292 i.mIter = const_cast<UString*>( this )->mData.begin();
01293 i.mString = const_cast<UString*>( this );
01294 return i;
01295 }
01297
01299
01301
01302
01303 UString& assign( iterator start, iterator end )
01304 {
01305 mData.assign( start.mIter, end.mIter );
01306 return *this;
01307 }
01309 UString& assign( const UString& str )
01310 {
01311 mData.assign( str.mData );
01312 return *this;
01313 }
01315 UString& assign( const code_point* str )
01316 {
01317 mData.assign( str );
01318 return *this;
01319 }
01321 UString& assign( const code_point* str, size_type num )
01322 {
01323 mData.assign( str, num );
01324 return *this;
01325 }
01327 UString& assign( const UString& str, size_type index, size_type len )
01328 {
01329 mData.assign( str.mData, index, len );
01330 return *this;
01331 }
01333 UString& assign( size_type num, const code_point& ch )
01334 {
01335 mData.assign( num, ch );
01336 return *this;
01337 }
01339 UString& assign( const std::wstring& wstr )
01340 {
01341 mData.clear();
01342 mData.reserve( wstr.length() );
01343 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
01344 code_point tmp;
01345 std::wstring::const_iterator i, ie = wstr.end();
01346 for ( i = wstr.begin(); i != ie; ++i )
01347 {
01348 tmp = static_cast<code_point>( *i );
01349 mData.push_back( tmp );
01350 }
01351 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
01352 code_point cp[3] = { 0, 0, 0 };
01353 unicode_char tmp;
01354 std::wstring::const_iterator i, ie = wstr.end();
01355 for ( i = wstr.begin(); i != ie; i++ )
01356 {
01357 tmp = static_cast<unicode_char>( *i );
01358 size_t lc = _utf32_to_utf16( tmp, cp );
01359 if ( lc > 0 ) mData.push_back( cp[0] );
01360 if ( lc > 1 ) mData.push_back( cp[1] );
01361 }
01362 #endif
01363 return *this;
01364 }
01365 #if MYGUI_IS_NATIVE_WCHAR_T
01366
01367 UString& assign( const wchar_t* w_str )
01368 {
01369 std::wstring tmp;
01370 tmp.assign( w_str );
01371 return assign( tmp );
01372 }
01374 UString& assign( const wchar_t* w_str, size_type num )
01375 {
01376 std::wstring tmp;
01377 tmp.assign( w_str, num );
01378 return assign( tmp );
01379 }
01380 #endif
01381
01382 UString& assign( const std::string& str )
01383 {
01384 size_type len = _verifyUTF8( str );
01385 clear();
01386 reserve( len );
01387
01388
01389
01390
01391 unicode_char uc;
01392 unsigned char utf8buf[7];
01393 utf8buf[6] = 0;
01394 size_t utf8len;
01395 code_point utf16buff[3];
01396 utf16buff[2] = 0;
01397 size_t utf16len;
01398
01399 std::string::const_iterator i, ie = str.end();
01400 for ( i = str.begin(); i != ie; ++i )
01401 {
01402 utf8len = _utf8_char_length( static_cast<unsigned char>( *i ) );
01403 for ( size_t j = 0; j < utf8len; j++ )
01404 {
01405 utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) );
01406 }
01407 utf8buf[utf8len] = 0;
01408 utf8len = _utf8_to_utf32( utf8buf, uc );
01409 i += utf8len - 1;
01410
01411 utf16len = _utf32_to_utf16( uc, utf16buff );
01412 append( utf16buff, utf16len );
01413 }
01414 return *this;
01415 }
01417 UString& assign( const char* c_str )
01418 {
01419 std::string tmp( c_str );
01420 return assign( tmp );
01421 }
01423 UString& assign( const char* c_str, size_type num )
01424 {
01425 std::string tmp;
01426 tmp.assign( c_str, num );
01427 return assign( tmp );
01428 }
01430
01432
01434
01435
01436 UString& append( const UString& str )
01437 {
01438 mData.append( str.mData );
01439 return *this;
01440 }
01442 UString& append( const code_point* str )
01443 {
01444 mData.append( str );
01445 return *this;
01446 }
01448 UString& append( const UString& str, size_type index, size_type len )
01449 {
01450 mData.append( str.mData, index, len );
01451 return *this;
01452 }
01454 UString& append( const code_point* str, size_type num )
01455 {
01456 mData.append( str, num );
01457 return *this;
01458 }
01460 UString& append( size_type num, code_point ch )
01461 {
01462 mData.append( num, ch );
01463 return *this;
01464 }
01466 UString& append( iterator start, iterator end )
01467 {
01468 mData.append( start.mIter, end.mIter );
01469 return *this;
01470 }
01471 #if MYGUI_IS_NATIVE_WCHAR_T
01472
01473 UString& append( const wchar_t* w_str, size_type num )
01474 {
01475 std::wstring tmp( w_str, num );
01476 return append( tmp );
01477 }
01479 UString& append( size_type num, wchar_t ch )
01480 {
01481 return append( num, static_cast<unicode_char>( ch ) );
01482 }
01483 #endif
01484
01485 UString& append( const char* c_str, size_type num )
01486 {
01487 UString tmp( c_str, num );
01488 append( tmp );
01489 return *this;
01490 }
01492 UString& append( size_type num, char ch )
01493 {
01494 append( num, static_cast<code_point>( ch ) );
01495 return *this;
01496 }
01498 UString& append( size_type num, unicode_char ch )
01499 {
01500 code_point cp[2] = { 0, 0 };
01501 if ( _utf32_to_utf16( ch, cp ) == 2 )
01502 {
01503 for ( size_type i = 0; i < num; i++ )
01504 {
01505 append( 1, cp[0] );
01506 append( 1, cp[1] );
01507 }
01508 }
01509 else
01510 {
01511 for ( size_type i = 0; i < num; i++ )
01512 {
01513 append( 1, cp[0] );
01514 }
01515 }
01516 return *this;
01517 }
01519
01521
01523
01524
01525 iterator insert( iterator i, const code_point& ch )
01526 {
01527 iterator ret;
01528 ret.mIter = mData.insert( i.mIter, ch );
01529 ret.mString = this;
01530 return ret;
01531 }
01533 UString& insert( size_type index, const UString& str )
01534 {
01535 mData.insert( index, str.mData );
01536 return *this;
01537 }
01539 UString& insert( size_type index, const code_point* str )
01540 {
01541 mData.insert( index, str );
01542 return *this;
01543 }
01545 UString& insert( size_type index1, const UString& str, size_type index2, size_type num )
01546 {
01547 mData.insert( index1, str.mData, index2, num );
01548 return *this;
01549 }
01551 void insert( iterator i, iterator start, iterator end )
01552 {
01553 mData.insert( i.mIter, start.mIter, end.mIter );
01554 }
01556 UString& insert( size_type index, const code_point* str, size_type num )
01557 {
01558 mData.insert( index, str, num );
01559 return *this;
01560 }
01561 #if MYGUI_IS_NATIVE_WCHAR_T
01562
01563 UString& insert( size_type index, const wchar_t* w_str, size_type num )
01564 {
01565 UString tmp( w_str, num );
01566 insert( index, tmp );
01567 return *this;
01568 }
01569 #endif
01570
01571 UString& insert( size_type index, const char* c_str, size_type num )
01572 {
01573 UString tmp( c_str, num );
01574 insert( index, tmp );
01575 return *this;
01576 }
01578 UString& insert( size_type index, size_type num, code_point ch )
01579 {
01580 mData.insert( index, num, ch );
01581 return *this;
01582 }
01583 #if MYGUI_IS_NATIVE_WCHAR_T
01584
01585 UString& insert( size_type index, size_type num, wchar_t ch )
01586 {
01587 insert( index, num, static_cast<unicode_char>( ch ) );
01588 return *this;
01589 }
01590 #endif
01591
01592 UString& insert( size_type index, size_type num, char ch )
01593 {
01594 insert( index, num, static_cast<code_point>( ch ) );
01595 return *this;
01596 }
01598 UString& insert( size_type index, size_type num, unicode_char ch )
01599 {
01600 code_point cp[3] = { 0, 0, 0 };
01601 size_t lc = _utf32_to_utf16( ch, cp );
01602 if ( lc == 1 )
01603 {
01604 return insert( index, num, cp[0] );
01605 }
01606 for ( size_type c = 0; c < num; c++ )
01607 {
01608
01609 insert( index, 1, cp[1] );
01610 insert( index, 1, cp[0] );
01611 }
01612 return *this;
01613 }
01615 void insert( iterator i, size_type num, const code_point& ch )
01616 {
01617 mData.insert( i.mIter, num, ch );
01618 }
01619 #if MYGUI_IS_NATIVE_WCHAR_T
01620
01621 void insert( iterator i, size_type num, const wchar_t& ch )
01622 {
01623 insert( i, num, static_cast<unicode_char>( ch ) );
01624 }
01625 #endif
01626
01627 void insert( iterator i, size_type num, const char& ch )
01628 {
01629 insert( i, num, static_cast<code_point>( ch ) );
01630 }
01632 void insert( iterator i, size_type num, const unicode_char& ch )
01633 {
01634 code_point cp[3] = { 0, 0, 0 };
01635 size_t lc = _utf32_to_utf16( ch, cp );
01636 if ( lc == 1 )
01637 {
01638 insert( i, num, cp[0] );
01639 }
01640 else
01641 {
01642 for ( size_type c = 0; c < num; c++ )
01643 {
01644
01645 insert( i, 1, cp[1] );
01646 insert( i, 1, cp[0] );
01647 }
01648 }
01649 }
01651
01653
01655
01656
01657 iterator erase( iterator loc )
01658 {
01659 iterator ret;
01660 ret.mIter = mData.erase( loc.mIter );
01661 ret.mString = this;
01662 return ret;
01663 }
01665 iterator erase( iterator start, iterator end )
01666 {
01667 iterator ret;
01668 ret.mIter = mData.erase( start.mIter, end.mIter );
01669 ret.mString = this;
01670 return ret;
01671 }
01673 UString& erase( size_type index = 0, size_type num = npos )
01674 {
01675 if ( num == npos )
01676 mData.erase( index );
01677 else
01678 mData.erase( index, num );
01679 return *this;
01680 }
01682
01684
01686
01687
01688 UString& replace( size_type index1, size_type num1, const UString& str )
01689 {
01690 mData.replace( index1, num1, str.mData, 0, npos );
01691 return *this;
01692 }
01694 UString& replace( size_type index1, size_type num1, const UString& str, size_type num2 )
01695 {
01696 mData.replace( index1, num1, str.mData, 0, num2 );
01697 return *this;
01698 }
01700 UString& replace( size_type index1, size_type num1, const UString& str, size_type index2, size_type num2 )
01701 {
01702 mData.replace( index1, num1, str.mData, index2, num2 );
01703 return *this;
01704 }
01706 UString& replace( iterator start, iterator end, const UString& str, size_type num = npos )
01707 {
01708 _const_fwd_iterator st(start);
01709
01710 size_type index1 = begin() - st;
01711 size_type num1 = end - st;
01712 return replace( index1, num1, str, 0, num );
01713 }
01715 UString& replace( size_type index, size_type num1, size_type num2, code_point ch )
01716 {
01717 mData.replace( index, num1, num2, ch );
01718 return *this;
01719 }
01721 UString& replace( iterator start, iterator end, size_type num, code_point ch )
01722 {
01723 _const_fwd_iterator st(start);
01724
01725 size_type index1 = begin() - st;
01726 size_type num1 = end - st;
01727 return replace( index1, num1, num, ch );
01728 }
01730
01732
01734
01735
01736 int compare( const UString& str ) const
01737 {
01738 return mData.compare( str.mData );
01739 }
01741 int compare( const code_point* str ) const
01742 {
01743 return mData.compare( str );
01744 }
01746 int compare( size_type index, size_type length, const UString& str ) const
01747 {
01748 return mData.compare( index, length, str.mData );
01749 }
01751 int compare( size_type index, size_type length, const UString& str, size_type index2, size_type length2 ) const
01752 {
01753 return mData.compare( index, length, str.mData, index2, length2 );
01754 }
01756 int compare( size_type index, size_type length, const code_point* str, size_type length2 ) const
01757 {
01758 return mData.compare( index, length, str, length2 );
01759 }
01760 #if MYGUI_IS_NATIVE_WCHAR_T
01761
01762 int compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const
01763 {
01764 UString tmp( w_str, length2 );
01765 return compare( index, length, tmp );
01766 }
01767 #endif
01768
01769 int compare( size_type index, size_type length, const char* c_str, size_type length2 ) const
01770 {
01771 UString tmp( c_str, length2 );
01772 return compare( index, length, tmp );
01773 }
01775
01777
01779
01780
01781
01782 size_type find( const UString& str, size_type index = 0 ) const
01783 {
01784 return mData.find( str.c_str(), index );
01785 }
01787
01788 size_type find( const code_point* cp_str, size_type index, size_type length ) const
01789 {
01790 UString tmp( cp_str );
01791 return mData.find( tmp.c_str(), index, length );
01792 }
01794
01795 size_type find( const char* c_str, size_type index, size_type length ) const
01796 {
01797 UString tmp( c_str );
01798 return mData.find( tmp.c_str(), index, length );
01799 }
01800 #if MYGUI_IS_NATIVE_WCHAR_T
01801
01802
01803 size_type find( const wchar_t* w_str, size_type index, size_type length ) const
01804 {
01805 UString tmp( w_str );
01806 return mData.find( tmp.c_str(), index, length );
01807 }
01808 #endif
01809
01810
01811 size_type find( char ch, size_type index = 0 ) const
01812 {
01813 return find( static_cast<code_point>( ch ), index );
01814 }
01816
01817 size_type find( code_point ch, size_type index = 0 ) const
01818 {
01819 return mData.find( ch, index );
01820 }
01821 #if MYGUI_IS_NATIVE_WCHAR_T
01822
01823
01824 size_type find( wchar_t ch, size_type index = 0 ) const
01825 {
01826 return find( static_cast<unicode_char>( ch ), index );
01827 }
01828 #endif
01829
01830
01831 size_type find( unicode_char ch, size_type index = 0 ) const
01832 {
01833 code_point cp[3] = { 0, 0, 0 };
01834 size_t lc = _utf32_to_utf16( ch, cp );
01835 return find( UString( cp, lc ), index );
01836 }
01837
01839 size_type rfind( const UString& str, size_type index = 0 ) const
01840 {
01841 return mData.rfind( str.c_str(), index );
01842 }
01844 size_type rfind( const code_point* cp_str, size_type index, size_type num ) const
01845 {
01846 UString tmp( cp_str );
01847 return mData.rfind( tmp.c_str(), index, num );
01848 }
01850 size_type rfind( const char* c_str, size_type index, size_type num ) const
01851 {
01852 UString tmp( c_str );
01853 return mData.rfind( tmp.c_str(), index, num );
01854 }
01855 #if MYGUI_IS_NATIVE_WCHAR_T
01856
01857 size_type rfind( const wchar_t* w_str, size_type index, size_type num ) const
01858 {
01859 UString tmp( w_str );
01860 return mData.rfind( tmp.c_str(), index, num );
01861 }
01862 #endif
01863
01864 size_type rfind( char ch, size_type index = 0 ) const
01865 {
01866 return rfind( static_cast<code_point>( ch ), index );
01867 }
01869 size_type rfind( code_point ch, size_type index ) const
01870 {
01871 return mData.rfind( ch, index );
01872 }
01873 #if MYGUI_IS_NATIVE_WCHAR_T
01874
01875 size_type rfind( wchar_t ch, size_type index = 0 ) const
01876 {
01877 return rfind( static_cast<unicode_char>( ch ), index );
01878 }
01879 #endif
01880
01881 size_type rfind( unicode_char ch, size_type index = 0 ) const
01882 {
01883 code_point cp[3] = { 0, 0, 0 };
01884 size_t lc = _utf32_to_utf16( ch, cp );
01885 return rfind( UString( cp, lc ), index );
01886 }
01888
01890
01892
01893
01894 size_type find_first_of( const UString &str, size_type index = 0, size_type num = npos ) const
01895 {
01896 size_type i = 0;
01897 const size_type len = length();
01898 while ( i < num && ( index + i ) < len )
01899 {
01900 unicode_char ch = getChar( index + i );
01901 if ( str.inString( ch ) )
01902 return index + i;
01903 i += _utf16_char_length( ch );
01904 }
01905 return npos;
01906 }
01908 size_type find_first_of( code_point ch, size_type index = 0 ) const
01909 {
01910 UString tmp;
01911 tmp.assign( 1, ch );
01912 return find_first_of( tmp, index );
01913 }
01915 size_type find_first_of( char ch, size_type index = 0 ) const
01916 {
01917 return find_first_of( static_cast<code_point>( ch ), index );
01918 }
01919 #if MYGUI_IS_NATIVE_WCHAR_T
01920
01921 size_type find_first_of( wchar_t ch, size_type index = 0 ) const
01922 {
01923 return find_first_of( static_cast<unicode_char>( ch ), index );
01924 }
01925 #endif
01926
01927 size_type find_first_of( unicode_char ch, size_type index = 0 ) const
01928 {
01929 code_point cp[3] = { 0, 0, 0 };
01930 size_t lc = _utf32_to_utf16( ch, cp );
01931 return find_first_of( UString( cp, lc ), index );
01932 }
01933
01935 size_type find_first_not_of( const UString& str, size_type index = 0, size_type num = npos ) const
01936 {
01937 size_type i = 0;
01938 const size_type len = length();
01939 while ( i < num && ( index + i ) < len )
01940 {
01941 unicode_char ch = getChar( index + i );
01942 if ( !str.inString( ch ) )
01943 return index + i;
01944 i += _utf16_char_length( ch );
01945 }
01946 return npos;
01947 }
01949 size_type find_first_not_of( code_point ch, size_type index = 0 ) const
01950 {
01951 UString tmp;
01952 tmp.assign( 1, ch );
01953 return find_first_not_of( tmp, index );
01954 }
01956 size_type find_first_not_of( char ch, size_type index = 0 ) const
01957 {
01958 return find_first_not_of( static_cast<code_point>( ch ), index );
01959 }
01960 #if MYGUI_IS_NATIVE_WCHAR_T
01961
01962 size_type find_first_not_of( wchar_t ch, size_type index = 0 ) const
01963 {
01964 return find_first_not_of( static_cast<unicode_char>( ch ), index );
01965 }
01966 #endif
01967
01968 size_type find_first_not_of( unicode_char ch, size_type index = 0 ) const
01969 {
01970 code_point cp[3] = { 0, 0, 0 };
01971 size_t lc = _utf32_to_utf16( ch, cp );
01972 return find_first_not_of( UString( cp, lc ), index );
01973 }
01974
01976 size_type find_last_of( const UString& str, size_type index = npos, size_type num = npos ) const
01977 {
01978 size_type i = 0;
01979 const size_type len = length();
01980 if ( index > len ) index = len - 1;
01981
01982 while ( i < num && ( index - i ) != npos )
01983 {
01984 size_type j = index - i;
01985
01986 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) )
01987 {
01988 j = index - ++i;
01989 }
01990
01991 unicode_char ch = getChar( j );
01992 if ( str.inString( ch ) )
01993 return j;
01994 i++;
01995 }
01996 return npos;
01997 }
01999 size_type find_last_of( code_point ch, size_type index = npos ) const
02000 {
02001 UString tmp;
02002 tmp.assign( 1, ch );
02003 return find_last_of( tmp, index );
02004 }
02006 size_type find_last_of( char ch, size_type index = npos ) const
02007 {
02008 return find_last_of( static_cast<code_point>( ch ), index );
02009 }
02010 #if MYGUI_IS_NATIVE_WCHAR_T
02011
02012 size_type find_last_of( wchar_t ch, size_type index = npos ) const
02013 {
02014 return find_last_of( static_cast<unicode_char>( ch ), index );
02015 }
02016 #endif
02017
02018 size_type find_last_of( unicode_char ch, size_type index = npos ) const
02019 {
02020 code_point cp[3] = { 0, 0, 0 };
02021 size_t lc = _utf32_to_utf16( ch, cp );
02022 return find_last_of( UString( cp, lc ), index );
02023 }
02024
02026 size_type find_last_not_of( const UString& str, size_type index = npos, size_type num = npos ) const
02027 {
02028 size_type i = 0;
02029 const size_type len = length();
02030 if ( index > len ) index = len - 1;
02031
02032 while ( i < num && ( index - i ) != npos )
02033 {
02034 size_type j = index - i;
02035
02036 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) )
02037 {
02038 j = index - ++i;
02039 }
02040
02041 unicode_char ch = getChar( j );
02042 if ( !str.inString( ch ) )
02043 return j;
02044 i++;
02045 }
02046 return npos;
02047 }
02049 size_type find_last_not_of( code_point ch, size_type index = npos ) const
02050 {
02051 UString tmp;
02052 tmp.assign( 1, ch );
02053 return find_last_not_of( tmp, index );
02054 }
02056 size_type find_last_not_of( char ch, size_type index = npos ) const
02057 {
02058 return find_last_not_of( static_cast<code_point>( ch ), index );
02059 }
02060 #if MYGUI_IS_NATIVE_WCHAR_T
02061
02062 size_type find_last_not_of( wchar_t ch, size_type index = npos ) const
02063 {
02064 return find_last_not_of( static_cast<unicode_char>( ch ), index );
02065 }
02066 #endif
02067
02068 size_type find_last_not_of( unicode_char ch, size_type index = npos ) const
02069 {
02070 code_point cp[3] = { 0, 0, 0 };
02071 size_t lc = _utf32_to_utf16( ch, cp );
02072 return find_last_not_of( UString( cp, lc ), index );
02073 }
02075
02077
02079
02080
02081 bool operator<( const UString& right ) const
02082 {
02083 return compare( right ) < 0;
02084 }
02086 bool operator<=( const UString& right ) const
02087 {
02088 return compare( right ) <= 0;
02089 }
02091 bool operator>( const UString& right ) const
02092 {
02093 return compare( right ) > 0;
02094 }
02096 bool operator>=( const UString& right ) const
02097 {
02098 return compare( right ) >= 0;
02099 }
02101 bool operator==( const UString& right ) const
02102 {
02103 return compare( right ) == 0;
02104 }
02106 bool operator!=( const UString& right ) const
02107 {
02108 return !operator==( right );
02109 }
02111 UString& operator=( const UString& s )
02112 {
02113 return assign( s );
02114 }
02116 UString& operator=( code_point ch )
02117 {
02118 clear();
02119 return append( 1, ch );
02120 }
02122 UString& operator=( char ch )
02123 {
02124 clear();
02125 return append( 1, ch );
02126 }
02127 #if MYGUI_IS_NATIVE_WCHAR_T
02128
02129 UString& operator=( wchar_t ch )
02130 {
02131 clear();
02132 return append( 1, ch );
02133 }
02134 #endif
02135
02136 UString& operator=( unicode_char ch )
02137 {
02138 clear();
02139 return append( 1, ch );
02140 }
02142 code_point& operator[]( size_type index )
02143 {
02144 return at( index );
02145 }
02147 const code_point& operator[]( size_type index ) const
02148 {
02149 return at( index );
02150 }
02152
02154
02156
02157
02158 operator std::string() const
02159 {
02160 return std::string( asUTF8() );
02161 }
02163 operator std::wstring() const
02164 {
02165 return std::wstring( asWStr() );
02166 }
02168
02170
02172
02173
02174 static bool _utf16_independent_char( code_point cp )
02175 {
02176 if ( 0xD800 <= cp && cp <= 0xDFFF )
02177 return false;
02178 return true;
02179 }
02181 static bool _utf16_surrogate_lead( code_point cp )
02182 {
02183 if ( 0xD800 <= cp && cp <= 0xDBFF )
02184 return true;
02185 return false;
02186 }
02188 static bool _utf16_surrogate_follow( code_point cp )
02189 {
02190 if ( 0xDC00 <= cp && cp <= 0xDFFF )
02191 return true;
02192 return false;
02193 }
02195 static size_t _utf16_char_length( code_point cp )
02196 {
02197 if ( 0xD800 <= cp && cp <= 0xDBFF )
02198 return 2;
02199 return 1;
02200 }
02202 static size_t _utf16_char_length( unicode_char uc )
02203 {
02204 if ( uc > 0xFFFF )
02205 return 2;
02206 return 1;
02207 }
02209
02213 static size_t _utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc )
02214 {
02215 const code_point& cp1 = in_cp[0];
02216 const code_point& cp2 = in_cp[1];
02217 bool wordPair = false;
02218
02219
02220 if ( 0xD800 <= cp1 && cp1 <= 0xDBFF )
02221 {
02222
02223 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
02224 wordPair = true;
02225 }
02226
02227 if ( !wordPair )
02228 {
02229 out_uc = cp1;
02230 return 1;
02231 }
02232
02233 unsigned short cU = cp1, cL = cp2;
02234 cU -= 0xD800;
02235 cL -= 0xDC00;
02236
02237 out_uc = ( cU & 0x03FF ) << 10;
02238 out_uc |= ( cL & 0x03FF );
02239 out_uc += 0x10000;
02240
02241 return 2;
02242 }
02244
02249 static size_t _utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] )
02250 {
02251 if ( in_uc <= 0xFFFF )
02252 {
02253 out_cp[0] = in_uc;
02254 return 1;
02255 }
02256 unicode_char uc = in_uc;
02257 unsigned short tmp;
02258 uc -= 0x10000;
02259
02260
02261 tmp = ( uc >> 10 ) & 0x03FF;
02262 tmp += 0xD800;
02263 out_cp[0] = tmp;
02264
02265
02266 tmp = uc & 0x03FF;
02267 tmp += 0xDC00;
02268 out_cp[1] = tmp;
02269
02270 return 2;
02271 }
02273
02275
02277
02278
02279 static bool _utf8_start_char( unsigned char cp )
02280 {
02281 return ( cp & ~_cont_mask ) != _cont;
02282 }
02284 static size_t _utf8_char_length( unsigned char cp )
02285 {
02286 if ( !( cp & 0x80 ) ) return 1;
02287 if (( cp & ~_lead1_mask ) == _lead1 ) return 2;
02288 if (( cp & ~_lead2_mask ) == _lead2 ) return 3;
02289 if (( cp & ~_lead3_mask ) == _lead3 ) return 4;
02290 if (( cp & ~_lead4_mask ) == _lead4 ) return 5;
02291 if (( cp & ~_lead5_mask ) == _lead5 ) return 6;
02292 throw invalid_data( "invalid UTF-8 sequence header value" );
02293 }
02295 static size_t _utf8_char_length( unicode_char uc )
02296 {
02297
02298
02299
02300
02301
02302
02303
02304
02305 if ( !( uc & ~0x0000007F ) ) return 1;
02306 if ( !( uc & ~0x000007FF ) ) return 2;
02307 if ( !( uc & ~0x0000FFFF ) ) return 3;
02308 if ( !( uc & ~0x001FFFFF ) ) return 4;
02309 if ( !( uc & ~0x03FFFFFF ) ) return 5;
02310 if ( !( uc & ~0x7FFFFFFF ) ) return 6;
02311 throw invalid_data( "invalid UTF-32 value" );
02312 }
02313
02315 static size_t _utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc )
02316 {
02317 size_t len = _utf8_char_length( in_cp[0] );
02318 if ( len == 1 )
02319 {
02320 out_uc = in_cp[0];
02321 return 1;
02322 }
02323
02324 unicode_char c = 0;
02325 size_t i = 0;
02326 switch ( len )
02327 {
02328 case 6:
02329 c = in_cp[i] & _lead5_mask;
02330 break;
02331 case 5:
02332 c = in_cp[i] & _lead4_mask;
02333 break;
02334 case 4:
02335 c = in_cp[i] & _lead3_mask;
02336 break;
02337 case 3:
02338 c = in_cp[i] & _lead2_mask;
02339 break;
02340 case 2:
02341 c = in_cp[i] & _lead1_mask;
02342 break;
02343 }
02344
02345 for ( ++i; i < len; i++ )
02346 {
02347 if (( in_cp[i] & ~_cont_mask ) != _cont )
02348 throw invalid_data( "bad UTF-8 continuation byte" );
02349 c <<= 6;
02350 c |= ( in_cp[i] & _cont_mask );
02351 }
02352
02353 out_uc = c;
02354 return len;
02355 }
02357 static size_t _utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] )
02358 {
02359 size_t len = _utf8_char_length( in_uc );
02360 unicode_char c = in_uc;
02361
02362
02363 for ( size_t i = len - 1; i > 0; i-- )
02364 {
02365 out_cp[i] = (( c ) & _cont_mask ) | _cont;
02366 c >>= 6;
02367 }
02368
02369
02370 switch ( len )
02371 {
02372 case 6:
02373 out_cp[0] = (( c ) & _lead5_mask ) | _lead5;
02374 break;
02375 case 5:
02376 out_cp[0] = (( c ) & _lead4_mask ) | _lead4;
02377 break;
02378 case 4:
02379 out_cp[0] = (( c ) & _lead3_mask ) | _lead3;
02380 break;
02381 case 3:
02382 out_cp[0] = (( c ) & _lead2_mask ) | _lead2;
02383 break;
02384 case 2:
02385 out_cp[0] = (( c ) & _lead1_mask ) | _lead1;
02386 break;
02387 case 1:
02388 default:
02389 out_cp[0] = ( c ) & 0x7F;
02390 break;
02391 }
02392
02393
02394 return len;
02395 }
02396
02398 static size_type _verifyUTF8( const unsigned char* c_str )
02399 {
02400 std::string tmp( reinterpret_cast<const char*>( c_str ) );
02401 return _verifyUTF8( tmp );
02402 }
02404 static size_type _verifyUTF8( const std::string& str )
02405 {
02406 std::string::const_iterator i, ie = str.end();
02407 i = str.begin();
02408 size_type length = 0;
02409
02410 while ( i != ie )
02411 {
02412
02413 if (( *i ) & 0x80 )
02414 {
02415 unsigned char c = ( *i );
02416 size_t contBytes = 0;
02417
02418
02419 if (( c & ~_lead1_mask ) == _lead1 )
02420 {
02421 if ( c == _lead1 ) throw invalid_data( "overlong UTF-8 sequence" );
02422 contBytes = 1;
02423
02424 }
02425 else if (( c & ~_lead2_mask ) == _lead2 )
02426 {
02427 contBytes = 2;
02428 if ( c == _lead2 )
02429 {
02430 c = ( *( i + 1 ) );
02431 if (( c & _lead2 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02432 }
02433
02434 }
02435 else if (( c & ~_lead3_mask ) == _lead3 )
02436 {
02437 contBytes = 3;
02438 if ( c == _lead3 )
02439 {
02440 c = ( *( i + 1 ) );
02441 if (( c & _lead3 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02442 }
02443
02444 }
02445 else if (( c & ~_lead4_mask ) == _lead4 )
02446 {
02447 contBytes = 4;
02448 if ( c == _lead4 )
02449 {
02450 c = ( *( i + 1 ) );
02451 if (( c & _lead4 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02452 }
02453
02454 }
02455 else if (( c & ~_lead5_mask ) == _lead5 )
02456 {
02457 contBytes = 5;
02458 if ( c == _lead5 )
02459 {
02460 c = ( *( i + 1 ) );
02461 if (( c & _lead5 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02462 }
02463 }
02464
02465
02466 while ( contBytes-- )
02467 {
02468 c = ( *( ++i ) );
02469 if (( c & ~_cont_mask ) != _cont )
02470 throw invalid_data( "bad UTF-8 continuation byte" );
02471 }
02472 }
02473 length++;
02474 i++;
02475 }
02476 return length;
02477 }
02479
02480 private:
02481
02482 dstring mData;
02483
02485 enum BufferType
02486 {
02487 bt_none,
02488 bt_string,
02489 bt_wstring,
02490 bt_utf32string
02491 };
02492
02494 void _init()
02495 {
02496 m_buffer.mVoidBuffer = 0;
02497 m_bufferType = bt_none;
02498 m_bufferSize = 0;
02499 }
02500
02502
02504 void _cleanBuffer() const
02505 {
02506 if ( m_buffer.mVoidBuffer != 0 )
02507 {
02508 switch ( m_bufferType )
02509 {
02510 case bt_string:
02511 delete m_buffer.mStrBuffer;
02512 break;
02513 case bt_wstring:
02514 delete m_buffer.mWStrBuffer;
02515 break;
02516 case bt_utf32string:
02517 delete m_buffer.mUTF32StrBuffer;
02518 break;
02519 case bt_none:
02520 default:
02521
02522
02523 MYGUI_ASSERT(false, "This should never happen - mVoidBuffer should never contain something if we "
02524 "don't know the type");
02525 break;
02526 }
02527 m_buffer.mVoidBuffer = 0;
02528 m_bufferSize = 0;
02529 }
02530 }
02531
02533 void _getBufferStr() const
02534 {
02535 if ( m_bufferType != bt_string )
02536 {
02537 _cleanBuffer();
02538 m_buffer.mStrBuffer = new std::string();
02539 m_bufferType = bt_string;
02540 }
02541 m_buffer.mStrBuffer->clear();
02542 }
02544 void _getBufferWStr() const
02545 {
02546 if ( m_bufferType != bt_wstring )
02547 {
02548 _cleanBuffer();
02549 m_buffer.mWStrBuffer = new std::wstring();
02550 m_bufferType = bt_wstring;
02551 }
02552 m_buffer.mWStrBuffer->clear();
02553 }
02555 void _getBufferUTF32Str() const
02556 {
02557 if ( m_bufferType != bt_utf32string )
02558 {
02559 _cleanBuffer();
02560 m_buffer.mUTF32StrBuffer = new utf32string();
02561 m_bufferType = bt_utf32string;
02562 }
02563 m_buffer.mUTF32StrBuffer->clear();
02564 }
02565
02566 void _load_buffer_UTF8() const
02567 {
02568 _getBufferStr();
02569 std::string& buffer = ( *m_buffer.mStrBuffer );
02570 buffer.reserve( length() );
02571
02572 unsigned char utf8buf[6];
02573 char* charbuf = ( char* )utf8buf;
02574 unicode_char c;
02575 size_t len;
02576
02577 const_iterator i, ie = end();
02578 for ( i = begin(); i != ie; i.moveNext() )
02579 {
02580 c = i.getCharacter();
02581 len = _utf32_to_utf8( c, utf8buf );
02582 size_t j = 0;
02583 while ( j < len )
02584 buffer.push_back( charbuf[j++] );
02585 }
02586 }
02587 void _load_buffer_WStr() const
02588 {
02589 _getBufferWStr();
02590 std::wstring& buffer = ( *m_buffer.mWStrBuffer );
02591 buffer.reserve( length() );
02592 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16
02593 const_iterator i, ie = end();
02594 for ( i = begin(); i != ie; ++i )
02595 {
02596 buffer.push_back(( wchar_t )( *i ) );
02597 }
02598 #else // wchar_t fits UTF-32
02599 unicode_char c;
02600 const_iterator i, ie = end();
02601 for ( i = begin(); i != ie; i.moveNext() )
02602 {
02603 c = i.getCharacter();
02604 buffer.push_back(( wchar_t )c );
02605 }
02606 #endif
02607 }
02608 void _load_buffer_UTF32() const
02609 {
02610 _getBufferUTF32Str();
02611 utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
02612 buffer.reserve( length() );
02613
02614 unicode_char c;
02615
02616 const_iterator i, ie = end();
02617 for ( i = begin(); i != ie; i.moveNext() )
02618 {
02619 c = i.getCharacter();
02620 buffer.push_back( c );
02621 }
02622 }
02623
02624 mutable BufferType m_bufferType;
02625 mutable size_t m_bufferSize;
02626
02627
02628 union Buffer
02629 {
02630 mutable void* mVoidBuffer;
02631 mutable std::string* mStrBuffer;
02632 mutable std::wstring* mWStrBuffer;
02633 mutable utf32string* mUTF32StrBuffer;
02634 }
02635 m_buffer;
02636 };
02637
02639 inline UString operator+( const UString& s1, const UString& s2 )
02640 {
02641 return UString( s1 ).append( s2 );
02642 }
02644 inline UString operator+( const UString& s1, UString::code_point c )
02645 {
02646 return UString( s1 ).append( 1, c );
02647 }
02649 inline UString operator+( const UString& s1, UString::unicode_char c )
02650 {
02651 return UString( s1 ).append( 1, c );
02652 }
02654 inline UString operator+( const UString& s1, char c )
02655 {
02656 return UString( s1 ).append( 1, c );
02657 }
02658 #if MYGUI_IS_NATIVE_WCHAR_T
02659
02660 inline UString operator+( const UString& s1, wchar_t c )
02661 {
02662 return UString( s1 ).append( 1, c );
02663 }
02664 #endif
02665
02666 inline UString operator+( UString::code_point c, const UString& s2 )
02667 {
02668 return UString().append( 1, c ).append( s2 );
02669 }
02671 inline UString operator+( UString::unicode_char c, const UString& s2 )
02672 {
02673 return UString().append( 1, c ).append( s2 );
02674 }
02676 inline UString operator+( char c, const UString& s2 )
02677 {
02678 return UString().append( 1, c ).append( s2 );
02679 }
02680 #if MYGUI_IS_NATIVE_WCHAR_T
02681
02682 inline UString operator+( wchar_t c, const UString& s2 )
02683 {
02684 return UString().append( 1, c ).append( s2 );
02685 }
02686 #endif
02687
02688
02689 inline UString::size_type operator-( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02690 {
02691 return ( left.mIter - right.mIter );
02692 }
02693 inline bool operator==( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02694 {
02695 return left.mIter == right.mIter;
02696 }
02697 inline bool operator!=( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02698 {
02699 return left.mIter != right.mIter;
02700 }
02701 inline bool operator<( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02702 {
02703 return left.mIter < right.mIter;
02704 }
02705 inline bool operator<=( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02706 {
02707 return left.mIter <= right.mIter;
02708 }
02709 inline bool operator>( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02710 {
02711 return left.mIter > right.mIter;
02712 }
02713 inline bool operator>=( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02714 {
02715 return left.mIter >= right.mIter;
02716 }
02717
02718
02719
02720 inline UString::size_type operator-( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02721 {
02722 return ( right.mIter - left.mIter );
02723 }
02724 inline bool operator==( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02725 {
02726 return left.mIter == right.mIter;
02727 }
02728 inline bool operator!=( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02729 {
02730 return left.mIter != right.mIter;
02731 }
02732 inline bool operator<( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02733 {
02734 return right.mIter < left.mIter;
02735 }
02736 inline bool operator<=( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02737 {
02738 return right.mIter <= left.mIter;
02739 }
02740 inline bool operator>( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02741 {
02742 return right.mIter > left.mIter;
02743 }
02744 inline bool operator>=( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02745 {
02746 return right.mIter >= left.mIter;
02747 }
02748
02750 inline std::ostream& operator << ( std::ostream& os, const UString& s )
02751 {
02752 return os << s.asUTF8();
02753 }
02754
02756
02757
02758
02759
02760
02761
02762
02763 }
02764
02765 #endif // __MYGUI_U_STRING_H__