|
ICU 51.2
51.2
|
00001 /* 00002 ********************************************************************** 00003 * Copyright (C) 1998-2013, International Business Machines 00004 * Corporation and others. All Rights Reserved. 00005 ********************************************************************** 00006 * 00007 * File unistr.h 00008 * 00009 * Modification History: 00010 * 00011 * Date Name Description 00012 * 09/25/98 stephen Creation. 00013 * 11/11/98 stephen Changed per 11/9 code review. 00014 * 04/20/99 stephen Overhauled per 4/16 code review. 00015 * 11/18/99 aliu Made to inherit from Replaceable. Added method 00016 * handleReplaceBetween(); other methods unchanged. 00017 * 06/25/01 grhoten Remove dependency on iostream. 00018 ****************************************************************************** 00019 */ 00020 00021 #ifndef UNISTR_H 00022 #define UNISTR_H 00023 00029 #include "unicode/utypes.h" 00030 #include "unicode/rep.h" 00031 #include "unicode/std_string.h" 00032 #include "unicode/stringpiece.h" 00033 #include "unicode/bytestream.h" 00034 #include "unicode/ucasemap.h" 00035 00036 struct UConverter; // unicode/ucnv.h 00037 class StringThreadTest; 00038 00039 #ifndef U_COMPARE_CODE_POINT_ORDER 00040 /* see also ustring.h and unorm.h */ 00046 #define U_COMPARE_CODE_POINT_ORDER 0x8000 00047 #endif 00048 00049 #ifndef USTRING_H 00050 00053 U_STABLE int32_t U_EXPORT2 00054 u_strlen(const UChar *s); 00055 #endif 00056 00057 #ifndef U_HIDE_INTERNAL_API 00058 00063 #ifndef U_STRING_CASE_MAPPER_DEFINED 00064 #define U_STRING_CASE_MAPPER_DEFINED 00065 00070 typedef int32_t U_CALLCONV 00071 UStringCaseMapper(const UCaseMap *csm, 00072 UChar *dest, int32_t destCapacity, 00073 const UChar *src, int32_t srcLength, 00074 UErrorCode *pErrorCode); 00075 00076 #endif 00077 #endif /* U_HIDE_INTERNAL_API */ 00078 00079 U_NAMESPACE_BEGIN 00080 00081 class BreakIterator; // unicode/brkiter.h 00082 class Locale; // unicode/locid.h 00083 class StringCharacterIterator; 00084 class UnicodeStringAppendable; // unicode/appendable.h 00085 00086 /* The <iostream> include has been moved to unicode/ustream.h */ 00087 00098 #define US_INV icu::UnicodeString::kInvariant 00099 00117 #if defined(U_DECLARE_UTF16) 00118 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length) 00119 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) 00120 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length) 00121 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY 00122 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length) 00123 #else 00124 # define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV) 00125 #endif 00126 00140 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) 00141 00149 #ifndef UNISTR_FROM_CHAR_EXPLICIT 00150 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 00151 // Auto-"explicit" in ICU library code. 00152 # define UNISTR_FROM_CHAR_EXPLICIT explicit 00153 # else 00154 // Empty by default for source code compatibility. 00155 # define UNISTR_FROM_CHAR_EXPLICIT 00156 # endif 00157 #endif 00158 00169 #ifndef UNISTR_FROM_STRING_EXPLICIT 00170 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 00171 // Auto-"explicit" in ICU library code. 00172 # define UNISTR_FROM_STRING_EXPLICIT explicit 00173 # else 00174 // Empty by default for source code compatibility. 00175 # define UNISTR_FROM_STRING_EXPLICIT 00176 # endif 00177 #endif 00178 00248 class U_COMMON_API UnicodeString : public Replaceable 00249 { 00250 public: 00251 00260 enum EInvariant { 00265 kInvariant 00266 }; 00267 00268 //======================================== 00269 // Read-only operations 00270 //======================================== 00271 00272 /* Comparison - bitwise only - for international comparison use collation */ 00273 00281 inline UBool operator== (const UnicodeString& text) const; 00282 00290 inline UBool operator!= (const UnicodeString& text) const; 00291 00299 inline UBool operator> (const UnicodeString& text) const; 00300 00308 inline UBool operator< (const UnicodeString& text) const; 00309 00317 inline UBool operator>= (const UnicodeString& text) const; 00318 00326 inline UBool operator<= (const UnicodeString& text) const; 00327 00339 inline int8_t compare(const UnicodeString& text) const; 00340 00355 inline int8_t compare(int32_t start, 00356 int32_t length, 00357 const UnicodeString& text) const; 00358 00376 inline int8_t compare(int32_t start, 00377 int32_t length, 00378 const UnicodeString& srcText, 00379 int32_t srcStart, 00380 int32_t srcLength) const; 00381 00394 inline int8_t compare(const UChar *srcChars, 00395 int32_t srcLength) const; 00396 00411 inline int8_t compare(int32_t start, 00412 int32_t length, 00413 const UChar *srcChars) const; 00414 00432 inline int8_t compare(int32_t start, 00433 int32_t length, 00434 const UChar *srcChars, 00435 int32_t srcStart, 00436 int32_t srcLength) const; 00437 00455 inline int8_t compareBetween(int32_t start, 00456 int32_t limit, 00457 const UnicodeString& srcText, 00458 int32_t srcStart, 00459 int32_t srcLimit) const; 00460 00478 inline int8_t compareCodePointOrder(const UnicodeString& text) const; 00479 00499 inline int8_t compareCodePointOrder(int32_t start, 00500 int32_t length, 00501 const UnicodeString& srcText) const; 00502 00524 inline int8_t compareCodePointOrder(int32_t start, 00525 int32_t length, 00526 const UnicodeString& srcText, 00527 int32_t srcStart, 00528 int32_t srcLength) const; 00529 00548 inline int8_t compareCodePointOrder(const UChar *srcChars, 00549 int32_t srcLength) const; 00550 00570 inline int8_t compareCodePointOrder(int32_t start, 00571 int32_t length, 00572 const UChar *srcChars) const; 00573 00595 inline int8_t compareCodePointOrder(int32_t start, 00596 int32_t length, 00597 const UChar *srcChars, 00598 int32_t srcStart, 00599 int32_t srcLength) const; 00600 00622 inline int8_t compareCodePointOrderBetween(int32_t start, 00623 int32_t limit, 00624 const UnicodeString& srcText, 00625 int32_t srcStart, 00626 int32_t srcLimit) const; 00627 00646 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; 00647 00668 inline int8_t caseCompare(int32_t start, 00669 int32_t length, 00670 const UnicodeString& srcText, 00671 uint32_t options) const; 00672 00695 inline int8_t caseCompare(int32_t start, 00696 int32_t length, 00697 const UnicodeString& srcText, 00698 int32_t srcStart, 00699 int32_t srcLength, 00700 uint32_t options) const; 00701 00721 inline int8_t caseCompare(const UChar *srcChars, 00722 int32_t srcLength, 00723 uint32_t options) const; 00724 00745 inline int8_t caseCompare(int32_t start, 00746 int32_t length, 00747 const UChar *srcChars, 00748 uint32_t options) const; 00749 00772 inline int8_t caseCompare(int32_t start, 00773 int32_t length, 00774 const UChar *srcChars, 00775 int32_t srcStart, 00776 int32_t srcLength, 00777 uint32_t options) const; 00778 00801 inline int8_t caseCompareBetween(int32_t start, 00802 int32_t limit, 00803 const UnicodeString& srcText, 00804 int32_t srcStart, 00805 int32_t srcLimit, 00806 uint32_t options) const; 00807 00815 inline UBool startsWith(const UnicodeString& text) const; 00816 00827 inline UBool startsWith(const UnicodeString& srcText, 00828 int32_t srcStart, 00829 int32_t srcLength) const; 00830 00839 inline UBool startsWith(const UChar *srcChars, 00840 int32_t srcLength) const; 00841 00851 inline UBool startsWith(const UChar *srcChars, 00852 int32_t srcStart, 00853 int32_t srcLength) const; 00854 00862 inline UBool endsWith(const UnicodeString& text) const; 00863 00874 inline UBool endsWith(const UnicodeString& srcText, 00875 int32_t srcStart, 00876 int32_t srcLength) const; 00877 00886 inline UBool endsWith(const UChar *srcChars, 00887 int32_t srcLength) const; 00888 00899 inline UBool endsWith(const UChar *srcChars, 00900 int32_t srcStart, 00901 int32_t srcLength) const; 00902 00903 00904 /* Searching - bitwise only */ 00905 00914 inline int32_t indexOf(const UnicodeString& text) const; 00915 00925 inline int32_t indexOf(const UnicodeString& text, 00926 int32_t start) const; 00927 00939 inline int32_t indexOf(const UnicodeString& text, 00940 int32_t start, 00941 int32_t length) const; 00942 00959 inline int32_t indexOf(const UnicodeString& srcText, 00960 int32_t srcStart, 00961 int32_t srcLength, 00962 int32_t start, 00963 int32_t length) const; 00964 00976 inline int32_t indexOf(const UChar *srcChars, 00977 int32_t srcLength, 00978 int32_t start) const; 00979 00992 inline int32_t indexOf(const UChar *srcChars, 00993 int32_t srcLength, 00994 int32_t start, 00995 int32_t length) const; 00996 01013 int32_t indexOf(const UChar *srcChars, 01014 int32_t srcStart, 01015 int32_t srcLength, 01016 int32_t start, 01017 int32_t length) const; 01018 01026 inline int32_t indexOf(UChar c) const; 01027 01036 inline int32_t indexOf(UChar32 c) const; 01037 01046 inline int32_t indexOf(UChar c, 01047 int32_t start) const; 01048 01058 inline int32_t indexOf(UChar32 c, 01059 int32_t start) const; 01060 01071 inline int32_t indexOf(UChar c, 01072 int32_t start, 01073 int32_t length) const; 01074 01086 inline int32_t indexOf(UChar32 c, 01087 int32_t start, 01088 int32_t length) const; 01089 01098 inline int32_t lastIndexOf(const UnicodeString& text) const; 01099 01109 inline int32_t lastIndexOf(const UnicodeString& text, 01110 int32_t start) const; 01111 01123 inline int32_t lastIndexOf(const UnicodeString& text, 01124 int32_t start, 01125 int32_t length) const; 01126 01143 inline int32_t lastIndexOf(const UnicodeString& srcText, 01144 int32_t srcStart, 01145 int32_t srcLength, 01146 int32_t start, 01147 int32_t length) const; 01148 01159 inline int32_t lastIndexOf(const UChar *srcChars, 01160 int32_t srcLength, 01161 int32_t start) const; 01162 01175 inline int32_t lastIndexOf(const UChar *srcChars, 01176 int32_t srcLength, 01177 int32_t start, 01178 int32_t length) const; 01179 01196 int32_t lastIndexOf(const UChar *srcChars, 01197 int32_t srcStart, 01198 int32_t srcLength, 01199 int32_t start, 01200 int32_t length) const; 01201 01209 inline int32_t lastIndexOf(UChar c) const; 01210 01219 inline int32_t lastIndexOf(UChar32 c) const; 01220 01229 inline int32_t lastIndexOf(UChar c, 01230 int32_t start) const; 01231 01241 inline int32_t lastIndexOf(UChar32 c, 01242 int32_t start) const; 01243 01254 inline int32_t lastIndexOf(UChar c, 01255 int32_t start, 01256 int32_t length) const; 01257 01269 inline int32_t lastIndexOf(UChar32 c, 01270 int32_t start, 01271 int32_t length) const; 01272 01273 01274 /* Character access */ 01275 01284 inline UChar charAt(int32_t offset) const; 01285 01293 inline UChar operator[] (int32_t offset) const; 01294 01306 UChar32 char32At(int32_t offset) const; 01307 01323 int32_t getChar32Start(int32_t offset) const; 01324 01341 int32_t getChar32Limit(int32_t offset) const; 01342 01393 int32_t moveIndex32(int32_t index, int32_t delta) const; 01394 01395 /* Substring extraction */ 01396 01412 inline void extract(int32_t start, 01413 int32_t length, 01414 UChar *dst, 01415 int32_t dstStart = 0) const; 01416 01438 int32_t 01439 extract(UChar *dest, int32_t destCapacity, 01440 UErrorCode &errorCode) const; 01441 01452 inline void extract(int32_t start, 01453 int32_t length, 01454 UnicodeString& target) const; 01455 01467 inline void extractBetween(int32_t start, 01468 int32_t limit, 01469 UChar *dst, 01470 int32_t dstStart = 0) const; 01471 01481 virtual void extractBetween(int32_t start, 01482 int32_t limit, 01483 UnicodeString& target) const; 01484 01506 int32_t extract(int32_t start, 01507 int32_t startLength, 01508 char *target, 01509 int32_t targetCapacity, 01510 enum EInvariant inv) const; 01511 01512 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 01513 01533 int32_t extract(int32_t start, 01534 int32_t startLength, 01535 char *target, 01536 uint32_t targetLength) const; 01537 01538 #endif 01539 01540 #if !UCONFIG_NO_CONVERSION 01541 01567 inline int32_t extract(int32_t start, 01568 int32_t startLength, 01569 char *target, 01570 const char *codepage = 0) const; 01571 01601 int32_t extract(int32_t start, 01602 int32_t startLength, 01603 char *target, 01604 uint32_t targetLength, 01605 const char *codepage) const; 01606 01624 int32_t extract(char *dest, int32_t destCapacity, 01625 UConverter *cnv, 01626 UErrorCode &errorCode) const; 01627 01628 #endif 01629 01643 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; 01644 01655 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; 01656 01668 void toUTF8(ByteSink &sink) const; 01669 01670 #if U_HAVE_STD_STRING 01671 01684 template<typename StringClass> 01685 StringClass &toUTF8String(StringClass &result) const { 01686 StringByteSink<StringClass> sbs(&result); 01687 toUTF8(sbs); 01688 return result; 01689 } 01690 01691 #endif 01692 01708 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; 01709 01710 /* Length operations */ 01711 01720 inline int32_t length(void) const; 01721 01735 int32_t 01736 countChar32(int32_t start=0, int32_t length=INT32_MAX) const; 01737 01761 UBool 01762 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; 01763 01769 inline UBool isEmpty(void) const; 01770 01780 inline int32_t getCapacity(void) const; 01781 01782 /* Other operations */ 01783 01789 inline int32_t hashCode(void) const; 01790 01803 inline UBool isBogus(void) const; 01804 01805 01806 //======================================== 01807 // Write operations 01808 //======================================== 01809 01810 /* Assignment operations */ 01811 01819 UnicodeString &operator=(const UnicodeString &srcText); 01820 01843 UnicodeString &fastCopyFrom(const UnicodeString &src); 01844 01852 inline UnicodeString& operator= (UChar ch); 01853 01861 inline UnicodeString& operator= (UChar32 ch); 01862 01874 inline UnicodeString& setTo(const UnicodeString& srcText, 01875 int32_t srcStart); 01876 01890 inline UnicodeString& setTo(const UnicodeString& srcText, 01891 int32_t srcStart, 01892 int32_t srcLength); 01893 01902 inline UnicodeString& setTo(const UnicodeString& srcText); 01903 01912 inline UnicodeString& setTo(const UChar *srcChars, 01913 int32_t srcLength); 01914 01923 UnicodeString& setTo(UChar srcChar); 01924 01933 UnicodeString& setTo(UChar32 srcChar); 01934 01958 UnicodeString &setTo(UBool isTerminated, 01959 const UChar *text, 01960 int32_t textLength); 01961 01981 UnicodeString &setTo(UChar *buffer, 01982 int32_t buffLength, 01983 int32_t buffCapacity); 01984 02025 void setToBogus(); 02026 02034 UnicodeString& setCharAt(int32_t offset, 02035 UChar ch); 02036 02037 02038 /* Append operations */ 02039 02047 inline UnicodeString& operator+= (UChar ch); 02048 02056 inline UnicodeString& operator+= (UChar32 ch); 02057 02065 inline UnicodeString& operator+= (const UnicodeString& srcText); 02066 02081 inline UnicodeString& append(const UnicodeString& srcText, 02082 int32_t srcStart, 02083 int32_t srcLength); 02084 02092 inline UnicodeString& append(const UnicodeString& srcText); 02093 02107 inline UnicodeString& append(const UChar *srcChars, 02108 int32_t srcStart, 02109 int32_t srcLength); 02110 02120 inline UnicodeString& append(const UChar *srcChars, 02121 int32_t srcLength); 02122 02129 inline UnicodeString& append(UChar srcChar); 02130 02137 UnicodeString& append(UChar32 srcChar); 02138 02139 02140 /* Insert operations */ 02141 02155 inline UnicodeString& insert(int32_t start, 02156 const UnicodeString& srcText, 02157 int32_t srcStart, 02158 int32_t srcLength); 02159 02168 inline UnicodeString& insert(int32_t start, 02169 const UnicodeString& srcText); 02170 02184 inline UnicodeString& insert(int32_t start, 02185 const UChar *srcChars, 02186 int32_t srcStart, 02187 int32_t srcLength); 02188 02198 inline UnicodeString& insert(int32_t start, 02199 const UChar *srcChars, 02200 int32_t srcLength); 02201 02210 inline UnicodeString& insert(int32_t start, 02211 UChar srcChar); 02212 02221 inline UnicodeString& insert(int32_t start, 02222 UChar32 srcChar); 02223 02224 02225 /* Replace operations */ 02226 02244 UnicodeString& replace(int32_t start, 02245 int32_t length, 02246 const UnicodeString& srcText, 02247 int32_t srcStart, 02248 int32_t srcLength); 02249 02262 UnicodeString& replace(int32_t start, 02263 int32_t length, 02264 const UnicodeString& srcText); 02265 02283 UnicodeString& replace(int32_t start, 02284 int32_t length, 02285 const UChar *srcChars, 02286 int32_t srcStart, 02287 int32_t srcLength); 02288 02301 inline UnicodeString& replace(int32_t start, 02302 int32_t length, 02303 const UChar *srcChars, 02304 int32_t srcLength); 02305 02317 inline UnicodeString& replace(int32_t start, 02318 int32_t length, 02319 UChar srcChar); 02320 02332 UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar); 02333 02343 inline UnicodeString& replaceBetween(int32_t start, 02344 int32_t limit, 02345 const UnicodeString& srcText); 02346 02361 inline UnicodeString& replaceBetween(int32_t start, 02362 int32_t limit, 02363 const UnicodeString& srcText, 02364 int32_t srcStart, 02365 int32_t srcLimit); 02366 02377 virtual void handleReplaceBetween(int32_t start, 02378 int32_t limit, 02379 const UnicodeString& text); 02380 02386 virtual UBool hasMetaData() const; 02387 02403 virtual void copy(int32_t start, int32_t limit, int32_t dest); 02404 02405 /* Search and replace operations */ 02406 02415 inline UnicodeString& findAndReplace(const UnicodeString& oldText, 02416 const UnicodeString& newText); 02417 02429 inline UnicodeString& findAndReplace(int32_t start, 02430 int32_t length, 02431 const UnicodeString& oldText, 02432 const UnicodeString& newText); 02433 02451 UnicodeString& findAndReplace(int32_t start, 02452 int32_t length, 02453 const UnicodeString& oldText, 02454 int32_t oldStart, 02455 int32_t oldLength, 02456 const UnicodeString& newText, 02457 int32_t newStart, 02458 int32_t newLength); 02459 02460 02461 /* Remove operations */ 02462 02468 inline UnicodeString& remove(void); 02469 02478 inline UnicodeString& remove(int32_t start, 02479 int32_t length = (int32_t)INT32_MAX); 02480 02489 inline UnicodeString& removeBetween(int32_t start, 02490 int32_t limit = (int32_t)INT32_MAX); 02491 02501 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); 02502 02503 /* Length operations */ 02504 02516 UBool padLeading(int32_t targetLength, 02517 UChar padChar = 0x0020); 02518 02530 UBool padTrailing(int32_t targetLength, 02531 UChar padChar = 0x0020); 02532 02539 inline UBool truncate(int32_t targetLength); 02540 02546 UnicodeString& trim(void); 02547 02548 02549 /* Miscellaneous operations */ 02550 02556 inline UnicodeString& reverse(void); 02557 02566 inline UnicodeString& reverse(int32_t start, 02567 int32_t length); 02568 02575 UnicodeString& toUpper(void); 02576 02584 UnicodeString& toUpper(const Locale& locale); 02585 02592 UnicodeString& toLower(void); 02593 02601 UnicodeString& toLower(const Locale& locale); 02602 02603 #if !UCONFIG_NO_BREAK_ITERATION 02604 02631 UnicodeString &toTitle(BreakIterator *titleIter); 02632 02660 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); 02661 02693 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); 02694 02695 #endif 02696 02710 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); 02711 02712 //======================================== 02713 // Access to the internal buffer 02714 //======================================== 02715 02759 UChar *getBuffer(int32_t minCapacity); 02760 02781 void releaseBuffer(int32_t newLength=-1); 02782 02813 inline const UChar *getBuffer() const; 02814 02848 inline const UChar *getTerminatedBuffer(); 02849 02850 //======================================== 02851 // Constructors 02852 //======================================== 02853 02857 inline UnicodeString(); 02858 02870 UnicodeString(int32_t capacity, UChar32 c, int32_t count); 02871 02881 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch); 02882 02892 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); 02893 02904 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text); 02905 02913 UnicodeString(const UChar *text, 02914 int32_t textLength); 02915 02938 UnicodeString(UBool isTerminated, 02939 const UChar *text, 02940 int32_t textLength); 02941 02960 UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); 02961 02962 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 02963 02983 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData); 02984 02993 UnicodeString(const char *codepageData, int32_t dataLength); 02994 02995 #endif 02996 02997 #if !UCONFIG_NO_CONVERSION 02998 03016 UnicodeString(const char *codepageData, const char *codepage); 03017 03035 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); 03036 03058 UnicodeString( 03059 const char *src, int32_t srcLength, 03060 UConverter *cnv, 03061 UErrorCode &errorCode); 03062 03063 #endif 03064 03089 UnicodeString(const char *src, int32_t length, enum EInvariant inv); 03090 03091 03097 UnicodeString(const UnicodeString& that); 03098 03105 UnicodeString(const UnicodeString& src, int32_t srcStart); 03106 03114 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 03115 03132 virtual Replaceable *clone() const; 03133 03137 virtual ~UnicodeString(); 03138 03152 static UnicodeString fromUTF8(const StringPiece &utf8); 03153 03165 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); 03166 03167 /* Miscellaneous operations */ 03168 03203 UnicodeString unescape() const; 03204 03224 UChar32 unescapeAt(int32_t &offset) const; 03225 03231 static UClassID U_EXPORT2 getStaticClassID(); 03232 03238 virtual UClassID getDynamicClassID() const; 03239 03240 //======================================== 03241 // Implementation methods 03242 //======================================== 03243 03244 protected: 03249 virtual int32_t getLength() const; 03250 03256 virtual UChar getCharAt(int32_t offset) const; 03257 03263 virtual UChar32 getChar32At(int32_t offset) const; 03264 03265 private: 03266 // For char* constructors. Could be made public. 03267 UnicodeString &setToUTF8(const StringPiece &utf8); 03268 // For extract(char*). 03269 // We could make a toUTF8(target, capacity, errorCode) public but not 03270 // this version: New API will be cleaner if we make callers create substrings 03271 // rather than having start+length on every method, 03272 // and it should take a UErrorCode&. 03273 int32_t 03274 toUTF8(int32_t start, int32_t len, 03275 char *target, int32_t capacity) const; 03276 03281 UBool doEquals(const UnicodeString &text, int32_t len) const; 03282 03283 inline int8_t 03284 doCompare(int32_t start, 03285 int32_t length, 03286 const UnicodeString& srcText, 03287 int32_t srcStart, 03288 int32_t srcLength) const; 03289 03290 int8_t doCompare(int32_t start, 03291 int32_t length, 03292 const UChar *srcChars, 03293 int32_t srcStart, 03294 int32_t srcLength) const; 03295 03296 inline int8_t 03297 doCompareCodePointOrder(int32_t start, 03298 int32_t length, 03299 const UnicodeString& srcText, 03300 int32_t srcStart, 03301 int32_t srcLength) const; 03302 03303 int8_t doCompareCodePointOrder(int32_t start, 03304 int32_t length, 03305 const UChar *srcChars, 03306 int32_t srcStart, 03307 int32_t srcLength) const; 03308 03309 inline int8_t 03310 doCaseCompare(int32_t start, 03311 int32_t length, 03312 const UnicodeString &srcText, 03313 int32_t srcStart, 03314 int32_t srcLength, 03315 uint32_t options) const; 03316 03317 int8_t 03318 doCaseCompare(int32_t start, 03319 int32_t length, 03320 const UChar *srcChars, 03321 int32_t srcStart, 03322 int32_t srcLength, 03323 uint32_t options) const; 03324 03325 int32_t doIndexOf(UChar c, 03326 int32_t start, 03327 int32_t length) const; 03328 03329 int32_t doIndexOf(UChar32 c, 03330 int32_t start, 03331 int32_t length) const; 03332 03333 int32_t doLastIndexOf(UChar c, 03334 int32_t start, 03335 int32_t length) const; 03336 03337 int32_t doLastIndexOf(UChar32 c, 03338 int32_t start, 03339 int32_t length) const; 03340 03341 void doExtract(int32_t start, 03342 int32_t length, 03343 UChar *dst, 03344 int32_t dstStart) const; 03345 03346 inline void doExtract(int32_t start, 03347 int32_t length, 03348 UnicodeString& target) const; 03349 03350 inline UChar doCharAt(int32_t offset) const; 03351 03352 UnicodeString& doReplace(int32_t start, 03353 int32_t length, 03354 const UnicodeString& srcText, 03355 int32_t srcStart, 03356 int32_t srcLength); 03357 03358 UnicodeString& doReplace(int32_t start, 03359 int32_t length, 03360 const UChar *srcChars, 03361 int32_t srcStart, 03362 int32_t srcLength); 03363 03364 UnicodeString& doReverse(int32_t start, 03365 int32_t length); 03366 03367 // calculate hash code 03368 int32_t doHashCode(void) const; 03369 03370 // get pointer to start of array 03371 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function 03372 inline UChar* getArrayStart(void); 03373 inline const UChar* getArrayStart(void) const; 03374 03375 // A UnicodeString object (not necessarily its current buffer) 03376 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). 03377 inline UBool isWritable() const; 03378 03379 // Is the current buffer writable? 03380 inline UBool isBufferWritable() const; 03381 03382 // None of the following does releaseArray(). 03383 inline void setLength(int32_t len); // sets only fShortLength and fLength 03384 inline void setToEmpty(); // sets fFlags=kShortString 03385 inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags 03386 03387 // allocate the array; result may be fStackBuffer 03388 // sets refCount to 1 if appropriate 03389 // sets fArray, fCapacity, and fFlags 03390 // returns boolean for success or failure 03391 UBool allocate(int32_t capacity); 03392 03393 // release the array if owned 03394 void releaseArray(void); 03395 03396 // turn a bogus string into an empty one 03397 void unBogus(); 03398 03399 // implements assigment operator, copy constructor, and fastCopyFrom() 03400 UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); 03401 03402 // Pin start and limit to acceptable values. 03403 inline void pinIndex(int32_t& start) const; 03404 inline void pinIndices(int32_t& start, 03405 int32_t& length) const; 03406 03407 #if !UCONFIG_NO_CONVERSION 03408 03409 /* Internal extract() using UConverter. */ 03410 int32_t doExtract(int32_t start, int32_t length, 03411 char *dest, int32_t destCapacity, 03412 UConverter *cnv, 03413 UErrorCode &errorCode) const; 03414 03415 /* 03416 * Real constructor for converting from codepage data. 03417 * It assumes that it is called with !fRefCounted. 03418 * 03419 * If <code>codepage==0</code>, then the default converter 03420 * is used for the platform encoding. 03421 * If <code>codepage</code> is an empty string (<code>""</code>), 03422 * then a simple conversion is performed on the codepage-invariant 03423 * subset ("invariant characters") of the platform encoding. See utypes.h. 03424 */ 03425 void doCodepageCreate(const char *codepageData, 03426 int32_t dataLength, 03427 const char *codepage); 03428 03429 /* 03430 * Worker function for creating a UnicodeString from 03431 * a codepage string using a UConverter. 03432 */ 03433 void 03434 doCodepageCreate(const char *codepageData, 03435 int32_t dataLength, 03436 UConverter *converter, 03437 UErrorCode &status); 03438 03439 #endif 03440 03441 /* 03442 * This function is called when write access to the array 03443 * is necessary. 03444 * 03445 * We need to make a copy of the array if 03446 * the buffer is read-only, or 03447 * the buffer is refCounted (shared), and refCount>1, or 03448 * the buffer is too small. 03449 * 03450 * Return FALSE if memory could not be allocated. 03451 */ 03452 UBool cloneArrayIfNeeded(int32_t newCapacity = -1, 03453 int32_t growCapacity = -1, 03454 UBool doCopyArray = TRUE, 03455 int32_t **pBufferToDelete = 0, 03456 UBool forceClone = FALSE); 03457 03463 UnicodeString & 03464 caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper); 03465 03466 // ref counting 03467 void addRef(void); 03468 int32_t removeRef(void); 03469 int32_t refCount(void) const; 03470 03471 // constants 03472 enum { 03473 // Set the stack buffer size so that sizeof(UnicodeString) is, 03474 // naturally (without padding), a multiple of sizeof(pointer). 03475 US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings 03476 kInvalidUChar=0xffff, // invalid UChar index 03477 kGrowSize=128, // grow size for this buffer 03478 kInvalidHashCode=0, // invalid hash code 03479 kEmptyHashCode=1, // hash code for empty string 03480 03481 // bit flag values for fFlags 03482 kIsBogus=1, // this string is bogus, i.e., not valid or NULL 03483 kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields 03484 kRefCounted=4, // there is a refCount field before the characters in fArray 03485 kBufferIsReadonly=8,// do not write to this buffer 03486 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), 03487 // and releaseBuffer(newLength) must be called 03488 03489 // combined values for convenience 03490 kShortString=kUsingStackBuffer, 03491 kLongString=kRefCounted, 03492 kReadonlyAlias=kBufferIsReadonly, 03493 kWritableAlias=0 03494 }; 03495 03496 friend class StringThreadTest; 03497 friend class UnicodeStringAppendable; 03498 03499 union StackBufferOrFields; // forward declaration necessary before friend declaration 03500 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion 03501 03502 /* 03503 * The following are all the class fields that are stored 03504 * in each UnicodeString object. 03505 * Note that UnicodeString has virtual functions, 03506 * therefore there is an implicit vtable pointer 03507 * as the first real field. 03508 * The fields should be aligned such that no padding is necessary. 03509 * On 32-bit machines, the size should be 32 bytes, 03510 * on 64-bit machines (8-byte pointers), it should be 40 bytes. 03511 * 03512 * We use a hack to achieve this. 03513 * 03514 * With at least some compilers, each of the following is forced to 03515 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer], 03516 * rounded up with additional padding if the fields do not already fit that requirement: 03517 * - sizeof(class UnicodeString) 03518 * - offsetof(UnicodeString, fUnion) 03519 * - sizeof(fUnion) 03520 * - sizeof(fFields) 03521 * 03522 * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars) 03523 * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines. 03524 * (Padding at the end of fFields is ok: 03525 * As long as there is no padding after fStackBuffer, it is not wasted space.) 03526 * 03527 * We further assume that the compiler does not reorder the fields, 03528 * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion, 03529 * with at most some padding (but no other field) in between. 03530 * (Padding there would be wasted space, but functionally harmless.) 03531 * 03532 * We use a few more sizeof(pointer)'s chunks of space with 03533 * fRestOfStackBuffer, fShortLength and fFlags, 03534 * to get up exactly to the intended sizeof(UnicodeString). 03535 */ 03536 // (implicit) *vtable; 03537 union StackBufferOrFields { 03538 // fStackBuffer is used iff (fFlags&kUsingStackBuffer) 03539 // else fFields is used 03540 UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer 03541 struct { 03542 UChar *fArray; // the Unicode data 03543 int32_t fCapacity; // capacity of fArray (in UChars) 03544 int32_t fLength; // number of characters in fArray if >127; else undefined 03545 } fFields; 03546 } fUnion; 03547 UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8]; 03548 int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength 03549 uint8_t fFlags; // bit flags: see constants above 03550 }; 03551 03560 U_COMMON_API UnicodeString U_EXPORT2 03561 operator+ (const UnicodeString &s1, const UnicodeString &s2); 03562 03563 //======================================== 03564 // Inline members 03565 //======================================== 03566 03567 //======================================== 03568 // Privates 03569 //======================================== 03570 03571 inline void 03572 UnicodeString::pinIndex(int32_t& start) const 03573 { 03574 // pin index 03575 if(start < 0) { 03576 start = 0; 03577 } else if(start > length()) { 03578 start = length(); 03579 } 03580 } 03581 03582 inline void 03583 UnicodeString::pinIndices(int32_t& start, 03584 int32_t& _length) const 03585 { 03586 // pin indices 03587 int32_t len = length(); 03588 if(start < 0) { 03589 start = 0; 03590 } else if(start > len) { 03591 start = len; 03592 } 03593 if(_length < 0) { 03594 _length = 0; 03595 } else if(_length > (len - start)) { 03596 _length = (len - start); 03597 } 03598 } 03599 03600 inline UChar* 03601 UnicodeString::getArrayStart() 03602 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 03603 03604 inline const UChar* 03605 UnicodeString::getArrayStart() const 03606 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 03607 03608 //======================================== 03609 // Default constructor 03610 //======================================== 03611 03612 inline 03613 UnicodeString::UnicodeString() 03614 : fShortLength(0), 03615 fFlags(kShortString) 03616 {} 03617 03618 //======================================== 03619 // Read-only implementation methods 03620 //======================================== 03621 inline int32_t 03622 UnicodeString::length() const 03623 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; } 03624 03625 inline int32_t 03626 UnicodeString::getCapacity() const 03627 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; } 03628 03629 inline int32_t 03630 UnicodeString::hashCode() const 03631 { return doHashCode(); } 03632 03633 inline UBool 03634 UnicodeString::isBogus() const 03635 { return (UBool)(fFlags & kIsBogus); } 03636 03637 inline UBool 03638 UnicodeString::isWritable() const 03639 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); } 03640 03641 inline UBool 03642 UnicodeString::isBufferWritable() const 03643 { 03644 return (UBool)( 03645 !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && 03646 (!(fFlags&kRefCounted) || refCount()==1)); 03647 } 03648 03649 inline const UChar * 03650 UnicodeString::getBuffer() const { 03651 if(fFlags&(kIsBogus|kOpenGetBuffer)) { 03652 return 0; 03653 } else if(fFlags&kUsingStackBuffer) { 03654 return fUnion.fStackBuffer; 03655 } else { 03656 return fUnion.fFields.fArray; 03657 } 03658 } 03659 03660 //======================================== 03661 // Read-only alias methods 03662 //======================================== 03663 inline int8_t 03664 UnicodeString::doCompare(int32_t start, 03665 int32_t thisLength, 03666 const UnicodeString& srcText, 03667 int32_t srcStart, 03668 int32_t srcLength) const 03669 { 03670 if(srcText.isBogus()) { 03671 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03672 } else { 03673 srcText.pinIndices(srcStart, srcLength); 03674 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 03675 } 03676 } 03677 03678 inline UBool 03679 UnicodeString::operator== (const UnicodeString& text) const 03680 { 03681 if(isBogus()) { 03682 return text.isBogus(); 03683 } else { 03684 int32_t len = length(), textLength = text.length(); 03685 return !text.isBogus() && len == textLength && doEquals(text, len); 03686 } 03687 } 03688 03689 inline UBool 03690 UnicodeString::operator!= (const UnicodeString& text) const 03691 { return (! operator==(text)); } 03692 03693 inline UBool 03694 UnicodeString::operator> (const UnicodeString& text) const 03695 { return doCompare(0, length(), text, 0, text.length()) == 1; } 03696 03697 inline UBool 03698 UnicodeString::operator< (const UnicodeString& text) const 03699 { return doCompare(0, length(), text, 0, text.length()) == -1; } 03700 03701 inline UBool 03702 UnicodeString::operator>= (const UnicodeString& text) const 03703 { return doCompare(0, length(), text, 0, text.length()) != -1; } 03704 03705 inline UBool 03706 UnicodeString::operator<= (const UnicodeString& text) const 03707 { return doCompare(0, length(), text, 0, text.length()) != 1; } 03708 03709 inline int8_t 03710 UnicodeString::compare(const UnicodeString& text) const 03711 { return doCompare(0, length(), text, 0, text.length()); } 03712 03713 inline int8_t 03714 UnicodeString::compare(int32_t start, 03715 int32_t _length, 03716 const UnicodeString& srcText) const 03717 { return doCompare(start, _length, srcText, 0, srcText.length()); } 03718 03719 inline int8_t 03720 UnicodeString::compare(const UChar *srcChars, 03721 int32_t srcLength) const 03722 { return doCompare(0, length(), srcChars, 0, srcLength); } 03723 03724 inline int8_t 03725 UnicodeString::compare(int32_t start, 03726 int32_t _length, 03727 const UnicodeString& srcText, 03728 int32_t srcStart, 03729 int32_t srcLength) const 03730 { return doCompare(start, _length, srcText, srcStart, srcLength); } 03731 03732 inline int8_t 03733 UnicodeString::compare(int32_t start, 03734 int32_t _length, 03735 const UChar *srcChars) const 03736 { return doCompare(start, _length, srcChars, 0, _length); } 03737 03738 inline int8_t 03739 UnicodeString::compare(int32_t start, 03740 int32_t _length, 03741 const UChar *srcChars, 03742 int32_t srcStart, 03743 int32_t srcLength) const 03744 { return doCompare(start, _length, srcChars, srcStart, srcLength); } 03745 03746 inline int8_t 03747 UnicodeString::compareBetween(int32_t start, 03748 int32_t limit, 03749 const UnicodeString& srcText, 03750 int32_t srcStart, 03751 int32_t srcLimit) const 03752 { return doCompare(start, limit - start, 03753 srcText, srcStart, srcLimit - srcStart); } 03754 03755 inline int8_t 03756 UnicodeString::doCompareCodePointOrder(int32_t start, 03757 int32_t thisLength, 03758 const UnicodeString& srcText, 03759 int32_t srcStart, 03760 int32_t srcLength) const 03761 { 03762 if(srcText.isBogus()) { 03763 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03764 } else { 03765 srcText.pinIndices(srcStart, srcLength); 03766 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 03767 } 03768 } 03769 03770 inline int8_t 03771 UnicodeString::compareCodePointOrder(const UnicodeString& text) const 03772 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); } 03773 03774 inline int8_t 03775 UnicodeString::compareCodePointOrder(int32_t start, 03776 int32_t _length, 03777 const UnicodeString& srcText) const 03778 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } 03779 03780 inline int8_t 03781 UnicodeString::compareCodePointOrder(const UChar *srcChars, 03782 int32_t srcLength) const 03783 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } 03784 03785 inline int8_t 03786 UnicodeString::compareCodePointOrder(int32_t start, 03787 int32_t _length, 03788 const UnicodeString& srcText, 03789 int32_t srcStart, 03790 int32_t srcLength) const 03791 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } 03792 03793 inline int8_t 03794 UnicodeString::compareCodePointOrder(int32_t start, 03795 int32_t _length, 03796 const UChar *srcChars) const 03797 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } 03798 03799 inline int8_t 03800 UnicodeString::compareCodePointOrder(int32_t start, 03801 int32_t _length, 03802 const UChar *srcChars, 03803 int32_t srcStart, 03804 int32_t srcLength) const 03805 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } 03806 03807 inline int8_t 03808 UnicodeString::compareCodePointOrderBetween(int32_t start, 03809 int32_t limit, 03810 const UnicodeString& srcText, 03811 int32_t srcStart, 03812 int32_t srcLimit) const 03813 { return doCompareCodePointOrder(start, limit - start, 03814 srcText, srcStart, srcLimit - srcStart); } 03815 03816 inline int8_t 03817 UnicodeString::doCaseCompare(int32_t start, 03818 int32_t thisLength, 03819 const UnicodeString &srcText, 03820 int32_t srcStart, 03821 int32_t srcLength, 03822 uint32_t options) const 03823 { 03824 if(srcText.isBogus()) { 03825 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03826 } else { 03827 srcText.pinIndices(srcStart, srcLength); 03828 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); 03829 } 03830 } 03831 03832 inline int8_t 03833 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { 03834 return doCaseCompare(0, length(), text, 0, text.length(), options); 03835 } 03836 03837 inline int8_t 03838 UnicodeString::caseCompare(int32_t start, 03839 int32_t _length, 03840 const UnicodeString &srcText, 03841 uint32_t options) const { 03842 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); 03843 } 03844 03845 inline int8_t 03846 UnicodeString::caseCompare(const UChar *srcChars, 03847 int32_t srcLength, 03848 uint32_t options) const { 03849 return doCaseCompare(0, length(), srcChars, 0, srcLength, options); 03850 } 03851 03852 inline int8_t 03853 UnicodeString::caseCompare(int32_t start, 03854 int32_t _length, 03855 const UnicodeString &srcText, 03856 int32_t srcStart, 03857 int32_t srcLength, 03858 uint32_t options) const { 03859 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); 03860 } 03861 03862 inline int8_t 03863 UnicodeString::caseCompare(int32_t start, 03864 int32_t _length, 03865 const UChar *srcChars, 03866 uint32_t options) const { 03867 return doCaseCompare(start, _length, srcChars, 0, _length, options); 03868 } 03869 03870 inline int8_t 03871 UnicodeString::caseCompare(int32_t start, 03872 int32_t _length, 03873 const UChar *srcChars, 03874 int32_t srcStart, 03875 int32_t srcLength, 03876 uint32_t options) const { 03877 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); 03878 } 03879 03880 inline int8_t 03881 UnicodeString::caseCompareBetween(int32_t start, 03882 int32_t limit, 03883 const UnicodeString &srcText, 03884 int32_t srcStart, 03885 int32_t srcLimit, 03886 uint32_t options) const { 03887 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); 03888 } 03889 03890 inline int32_t 03891 UnicodeString::indexOf(const UnicodeString& srcText, 03892 int32_t srcStart, 03893 int32_t srcLength, 03894 int32_t start, 03895 int32_t _length) const 03896 { 03897 if(!srcText.isBogus()) { 03898 srcText.pinIndices(srcStart, srcLength); 03899 if(srcLength > 0) { 03900 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 03901 } 03902 } 03903 return -1; 03904 } 03905 03906 inline int32_t 03907 UnicodeString::indexOf(const UnicodeString& text) const 03908 { return indexOf(text, 0, text.length(), 0, length()); } 03909 03910 inline int32_t 03911 UnicodeString::indexOf(const UnicodeString& text, 03912 int32_t start) const { 03913 pinIndex(start); 03914 return indexOf(text, 0, text.length(), start, length() - start); 03915 } 03916 03917 inline int32_t 03918 UnicodeString::indexOf(const UnicodeString& text, 03919 int32_t start, 03920 int32_t _length) const 03921 { return indexOf(text, 0, text.length(), start, _length); } 03922 03923 inline int32_t 03924 UnicodeString::indexOf(const UChar *srcChars, 03925 int32_t srcLength, 03926 int32_t start) const { 03927 pinIndex(start); 03928 return indexOf(srcChars, 0, srcLength, start, length() - start); 03929 } 03930 03931 inline int32_t 03932 UnicodeString::indexOf(const UChar *srcChars, 03933 int32_t srcLength, 03934 int32_t start, 03935 int32_t _length) const 03936 { return indexOf(srcChars, 0, srcLength, start, _length); } 03937 03938 inline int32_t 03939 UnicodeString::indexOf(UChar c, 03940 int32_t start, 03941 int32_t _length) const 03942 { return doIndexOf(c, start, _length); } 03943 03944 inline int32_t 03945 UnicodeString::indexOf(UChar32 c, 03946 int32_t start, 03947 int32_t _length) const 03948 { return doIndexOf(c, start, _length); } 03949 03950 inline int32_t 03951 UnicodeString::indexOf(UChar c) const 03952 { return doIndexOf(c, 0, length()); } 03953 03954 inline int32_t 03955 UnicodeString::indexOf(UChar32 c) const 03956 { return indexOf(c, 0, length()); } 03957 03958 inline int32_t 03959 UnicodeString::indexOf(UChar c, 03960 int32_t start) const { 03961 pinIndex(start); 03962 return doIndexOf(c, start, length() - start); 03963 } 03964 03965 inline int32_t 03966 UnicodeString::indexOf(UChar32 c, 03967 int32_t start) const { 03968 pinIndex(start); 03969 return indexOf(c, start, length() - start); 03970 } 03971 03972 inline int32_t 03973 UnicodeString::lastIndexOf(const UChar *srcChars, 03974 int32_t srcLength, 03975 int32_t start, 03976 int32_t _length) const 03977 { return lastIndexOf(srcChars, 0, srcLength, start, _length); } 03978 03979 inline int32_t 03980 UnicodeString::lastIndexOf(const UChar *srcChars, 03981 int32_t srcLength, 03982 int32_t start) const { 03983 pinIndex(start); 03984 return lastIndexOf(srcChars, 0, srcLength, start, length() - start); 03985 } 03986 03987 inline int32_t 03988 UnicodeString::lastIndexOf(const UnicodeString& srcText, 03989 int32_t srcStart, 03990 int32_t srcLength, 03991 int32_t start, 03992 int32_t _length) const 03993 { 03994 if(!srcText.isBogus()) { 03995 srcText.pinIndices(srcStart, srcLength); 03996 if(srcLength > 0) { 03997 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 03998 } 03999 } 04000 return -1; 04001 } 04002 04003 inline int32_t 04004 UnicodeString::lastIndexOf(const UnicodeString& text, 04005 int32_t start, 04006 int32_t _length) const 04007 { return lastIndexOf(text, 0, text.length(), start, _length); } 04008 04009 inline int32_t 04010 UnicodeString::lastIndexOf(const UnicodeString& text, 04011 int32_t start) const { 04012 pinIndex(start); 04013 return lastIndexOf(text, 0, text.length(), start, length() - start); 04014 } 04015 04016 inline int32_t 04017 UnicodeString::lastIndexOf(const UnicodeString& text) const 04018 { return lastIndexOf(text, 0, text.length(), 0, length()); } 04019 04020 inline int32_t 04021 UnicodeString::lastIndexOf(UChar c, 04022 int32_t start, 04023 int32_t _length) const 04024 { return doLastIndexOf(c, start, _length); } 04025 04026 inline int32_t 04027 UnicodeString::lastIndexOf(UChar32 c, 04028 int32_t start, 04029 int32_t _length) const { 04030 return doLastIndexOf(c, start, _length); 04031 } 04032 04033 inline int32_t 04034 UnicodeString::lastIndexOf(UChar c) const 04035 { return doLastIndexOf(c, 0, length()); } 04036 04037 inline int32_t 04038 UnicodeString::lastIndexOf(UChar32 c) const { 04039 return lastIndexOf(c, 0, length()); 04040 } 04041 04042 inline int32_t 04043 UnicodeString::lastIndexOf(UChar c, 04044 int32_t start) const { 04045 pinIndex(start); 04046 return doLastIndexOf(c, start, length() - start); 04047 } 04048 04049 inline int32_t 04050 UnicodeString::lastIndexOf(UChar32 c, 04051 int32_t start) const { 04052 pinIndex(start); 04053 return lastIndexOf(c, start, length() - start); 04054 } 04055 04056 inline UBool 04057 UnicodeString::startsWith(const UnicodeString& text) const 04058 { return compare(0, text.length(), text, 0, text.length()) == 0; } 04059 04060 inline UBool 04061 UnicodeString::startsWith(const UnicodeString& srcText, 04062 int32_t srcStart, 04063 int32_t srcLength) const 04064 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } 04065 04066 inline UBool 04067 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const { 04068 if(srcLength < 0) { 04069 srcLength = u_strlen(srcChars); 04070 } 04071 return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; 04072 } 04073 04074 inline UBool 04075 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const { 04076 if(srcLength < 0) { 04077 srcLength = u_strlen(srcChars); 04078 } 04079 return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; 04080 } 04081 04082 inline UBool 04083 UnicodeString::endsWith(const UnicodeString& text) const 04084 { return doCompare(length() - text.length(), text.length(), 04085 text, 0, text.length()) == 0; } 04086 04087 inline UBool 04088 UnicodeString::endsWith(const UnicodeString& srcText, 04089 int32_t srcStart, 04090 int32_t srcLength) const { 04091 srcText.pinIndices(srcStart, srcLength); 04092 return doCompare(length() - srcLength, srcLength, 04093 srcText, srcStart, srcLength) == 0; 04094 } 04095 04096 inline UBool 04097 UnicodeString::endsWith(const UChar *srcChars, 04098 int32_t srcLength) const { 04099 if(srcLength < 0) { 04100 srcLength = u_strlen(srcChars); 04101 } 04102 return doCompare(length() - srcLength, srcLength, 04103 srcChars, 0, srcLength) == 0; 04104 } 04105 04106 inline UBool 04107 UnicodeString::endsWith(const UChar *srcChars, 04108 int32_t srcStart, 04109 int32_t srcLength) const { 04110 if(srcLength < 0) { 04111 srcLength = u_strlen(srcChars + srcStart); 04112 } 04113 return doCompare(length() - srcLength, srcLength, 04114 srcChars, srcStart, srcLength) == 0; 04115 } 04116 04117 //======================================== 04118 // replace 04119 //======================================== 04120 inline UnicodeString& 04121 UnicodeString::replace(int32_t start, 04122 int32_t _length, 04123 const UnicodeString& srcText) 04124 { return doReplace(start, _length, srcText, 0, srcText.length()); } 04125 04126 inline UnicodeString& 04127 UnicodeString::replace(int32_t start, 04128 int32_t _length, 04129 const UnicodeString& srcText, 04130 int32_t srcStart, 04131 int32_t srcLength) 04132 { return doReplace(start, _length, srcText, srcStart, srcLength); } 04133 04134 inline UnicodeString& 04135 UnicodeString::replace(int32_t start, 04136 int32_t _length, 04137 const UChar *srcChars, 04138 int32_t srcLength) 04139 { return doReplace(start, _length, srcChars, 0, srcLength); } 04140 04141 inline UnicodeString& 04142 UnicodeString::replace(int32_t start, 04143 int32_t _length, 04144 const UChar *srcChars, 04145 int32_t srcStart, 04146 int32_t srcLength) 04147 { return doReplace(start, _length, srcChars, srcStart, srcLength); } 04148 04149 inline UnicodeString& 04150 UnicodeString::replace(int32_t start, 04151 int32_t _length, 04152 UChar srcChar) 04153 { return doReplace(start, _length, &srcChar, 0, 1); } 04154 04155 inline UnicodeString& 04156 UnicodeString::replaceBetween(int32_t start, 04157 int32_t limit, 04158 const UnicodeString& srcText) 04159 { return doReplace(start, limit - start, srcText, 0, srcText.length()); } 04160 04161 inline UnicodeString& 04162 UnicodeString::replaceBetween(int32_t start, 04163 int32_t limit, 04164 const UnicodeString& srcText, 04165 int32_t srcStart, 04166 int32_t srcLimit) 04167 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } 04168 04169 inline UnicodeString& 04170 UnicodeString::findAndReplace(const UnicodeString& oldText, 04171 const UnicodeString& newText) 04172 { return findAndReplace(0, length(), oldText, 0, oldText.length(), 04173 newText, 0, newText.length()); } 04174 04175 inline UnicodeString& 04176 UnicodeString::findAndReplace(int32_t start, 04177 int32_t _length, 04178 const UnicodeString& oldText, 04179 const UnicodeString& newText) 04180 { return findAndReplace(start, _length, oldText, 0, oldText.length(), 04181 newText, 0, newText.length()); } 04182 04183 // ============================ 04184 // extract 04185 // ============================ 04186 inline void 04187 UnicodeString::doExtract(int32_t start, 04188 int32_t _length, 04189 UnicodeString& target) const 04190 { target.replace(0, target.length(), *this, start, _length); } 04191 04192 inline void 04193 UnicodeString::extract(int32_t start, 04194 int32_t _length, 04195 UChar *target, 04196 int32_t targetStart) const 04197 { doExtract(start, _length, target, targetStart); } 04198 04199 inline void 04200 UnicodeString::extract(int32_t start, 04201 int32_t _length, 04202 UnicodeString& target) const 04203 { doExtract(start, _length, target); } 04204 04205 #if !UCONFIG_NO_CONVERSION 04206 04207 inline int32_t 04208 UnicodeString::extract(int32_t start, 04209 int32_t _length, 04210 char *dst, 04211 const char *codepage) const 04212 04213 { 04214 // This dstSize value will be checked explicitly 04215 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); 04216 } 04217 04218 #endif 04219 04220 inline void 04221 UnicodeString::extractBetween(int32_t start, 04222 int32_t limit, 04223 UChar *dst, 04224 int32_t dstStart) const { 04225 pinIndex(start); 04226 pinIndex(limit); 04227 doExtract(start, limit - start, dst, dstStart); 04228 } 04229 04230 inline UnicodeString 04231 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { 04232 return tempSubString(start, limit - start); 04233 } 04234 04235 inline UChar 04236 UnicodeString::doCharAt(int32_t offset) const 04237 { 04238 if((uint32_t)offset < (uint32_t)length()) { 04239 return getArrayStart()[offset]; 04240 } else { 04241 return kInvalidUChar; 04242 } 04243 } 04244 04245 inline UChar 04246 UnicodeString::charAt(int32_t offset) const 04247 { return doCharAt(offset); } 04248 04249 inline UChar 04250 UnicodeString::operator[] (int32_t offset) const 04251 { return doCharAt(offset); } 04252 04253 inline UBool 04254 UnicodeString::isEmpty() const { 04255 return fShortLength == 0; 04256 } 04257 04258 //======================================== 04259 // Write implementation methods 04260 //======================================== 04261 inline void 04262 UnicodeString::setLength(int32_t len) { 04263 if(len <= 127) { 04264 fShortLength = (int8_t)len; 04265 } else { 04266 fShortLength = (int8_t)-1; 04267 fUnion.fFields.fLength = len; 04268 } 04269 } 04270 04271 inline void 04272 UnicodeString::setToEmpty() { 04273 fShortLength = 0; 04274 fFlags = kShortString; 04275 } 04276 04277 inline void 04278 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { 04279 setLength(len); 04280 fUnion.fFields.fArray = array; 04281 fUnion.fFields.fCapacity = capacity; 04282 } 04283 04284 inline const UChar * 04285 UnicodeString::getTerminatedBuffer() { 04286 if(!isWritable()) { 04287 return 0; 04288 } else { 04289 UChar *array = getArrayStart(); 04290 int32_t len = length(); 04291 if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) { 04292 /* 04293 * kRefCounted: Do not write the NUL if the buffer is shared. 04294 * That is mostly safe, except when the length of one copy was modified 04295 * without copy-on-write, e.g., via truncate(newLength) or remove(void). 04296 * Then the NUL would be written into the middle of another copy's string. 04297 */ 04298 if(!(fFlags&kBufferIsReadonly)) { 04299 /* 04300 * We must not write to a readonly buffer, but it is known to be 04301 * NUL-terminated if len<capacity. 04302 * A shared, allocated buffer (refCount()>1) must not have its contents 04303 * modified, but the NUL at [len] is beyond the string contents, 04304 * and multiple string objects and threads writing the same NUL into the 04305 * same location is harmless. 04306 * In all other cases, the buffer is fully writable and it is anyway safe 04307 * to write the NUL. 04308 * 04309 * Note: An earlier version of this code tested whether there is a NUL 04310 * at [len] already, but, while safe, it generated lots of warnings from 04311 * tools like valgrind and Purify. 04312 */ 04313 array[len] = 0; 04314 } 04315 return array; 04316 } else if(cloneArrayIfNeeded(len+1)) { 04317 array = getArrayStart(); 04318 array[len] = 0; 04319 return array; 04320 } else { 04321 return 0; 04322 } 04323 } 04324 } 04325 04326 inline UnicodeString& 04327 UnicodeString::operator= (UChar ch) 04328 { return doReplace(0, length(), &ch, 0, 1); } 04329 04330 inline UnicodeString& 04331 UnicodeString::operator= (UChar32 ch) 04332 { return replace(0, length(), ch); } 04333 04334 inline UnicodeString& 04335 UnicodeString::setTo(const UnicodeString& srcText, 04336 int32_t srcStart, 04337 int32_t srcLength) 04338 { 04339 unBogus(); 04340 return doReplace(0, length(), srcText, srcStart, srcLength); 04341 } 04342 04343 inline UnicodeString& 04344 UnicodeString::setTo(const UnicodeString& srcText, 04345 int32_t srcStart) 04346 { 04347 unBogus(); 04348 srcText.pinIndex(srcStart); 04349 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); 04350 } 04351 04352 inline UnicodeString& 04353 UnicodeString::setTo(const UnicodeString& srcText) 04354 { 04355 return copyFrom(srcText); 04356 } 04357 04358 inline UnicodeString& 04359 UnicodeString::setTo(const UChar *srcChars, 04360 int32_t srcLength) 04361 { 04362 unBogus(); 04363 return doReplace(0, length(), srcChars, 0, srcLength); 04364 } 04365 04366 inline UnicodeString& 04367 UnicodeString::setTo(UChar srcChar) 04368 { 04369 unBogus(); 04370 return doReplace(0, length(), &srcChar, 0, 1); 04371 } 04372 04373 inline UnicodeString& 04374 UnicodeString::setTo(UChar32 srcChar) 04375 { 04376 unBogus(); 04377 return replace(0, length(), srcChar); 04378 } 04379 04380 inline UnicodeString& 04381 UnicodeString::append(const UnicodeString& srcText, 04382 int32_t srcStart, 04383 int32_t srcLength) 04384 { return doReplace(length(), 0, srcText, srcStart, srcLength); } 04385 04386 inline UnicodeString& 04387 UnicodeString::append(const UnicodeString& srcText) 04388 { return doReplace(length(), 0, srcText, 0, srcText.length()); } 04389 04390 inline UnicodeString& 04391 UnicodeString::append(const UChar *srcChars, 04392 int32_t srcStart, 04393 int32_t srcLength) 04394 { return doReplace(length(), 0, srcChars, srcStart, srcLength); } 04395 04396 inline UnicodeString& 04397 UnicodeString::append(const UChar *srcChars, 04398 int32_t srcLength) 04399 { return doReplace(length(), 0, srcChars, 0, srcLength); } 04400 04401 inline UnicodeString& 04402 UnicodeString::append(UChar srcChar) 04403 { return doReplace(length(), 0, &srcChar, 0, 1); } 04404 04405 inline UnicodeString& 04406 UnicodeString::operator+= (UChar ch) 04407 { return doReplace(length(), 0, &ch, 0, 1); } 04408 04409 inline UnicodeString& 04410 UnicodeString::operator+= (UChar32 ch) { 04411 return append(ch); 04412 } 04413 04414 inline UnicodeString& 04415 UnicodeString::operator+= (const UnicodeString& srcText) 04416 { return doReplace(length(), 0, srcText, 0, srcText.length()); } 04417 04418 inline UnicodeString& 04419 UnicodeString::insert(int32_t start, 04420 const UnicodeString& srcText, 04421 int32_t srcStart, 04422 int32_t srcLength) 04423 { return doReplace(start, 0, srcText, srcStart, srcLength); } 04424 04425 inline UnicodeString& 04426 UnicodeString::insert(int32_t start, 04427 const UnicodeString& srcText) 04428 { return doReplace(start, 0, srcText, 0, srcText.length()); } 04429 04430 inline UnicodeString& 04431 UnicodeString::insert(int32_t start, 04432 const UChar *srcChars, 04433 int32_t srcStart, 04434 int32_t srcLength) 04435 { return doReplace(start, 0, srcChars, srcStart, srcLength); } 04436 04437 inline UnicodeString& 04438 UnicodeString::insert(int32_t start, 04439 const UChar *srcChars, 04440 int32_t srcLength) 04441 { return doReplace(start, 0, srcChars, 0, srcLength); } 04442 04443 inline UnicodeString& 04444 UnicodeString::insert(int32_t start, 04445 UChar srcChar) 04446 { return doReplace(start, 0, &srcChar, 0, 1); } 04447 04448 inline UnicodeString& 04449 UnicodeString::insert(int32_t start, 04450 UChar32 srcChar) 04451 { return replace(start, 0, srcChar); } 04452 04453 04454 inline UnicodeString& 04455 UnicodeString::remove() 04456 { 04457 // remove() of a bogus string makes the string empty and non-bogus 04458 // we also un-alias a read-only alias to deal with NUL-termination 04459 // issues with getTerminatedBuffer() 04460 if(fFlags & (kIsBogus|kBufferIsReadonly)) { 04461 setToEmpty(); 04462 } else { 04463 fShortLength = 0; 04464 } 04465 return *this; 04466 } 04467 04468 inline UnicodeString& 04469 UnicodeString::remove(int32_t start, 04470 int32_t _length) 04471 { 04472 if(start <= 0 && _length == INT32_MAX) { 04473 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus 04474 return remove(); 04475 } 04476 return doReplace(start, _length, NULL, 0, 0); 04477 } 04478 04479 inline UnicodeString& 04480 UnicodeString::removeBetween(int32_t start, 04481 int32_t limit) 04482 { return doReplace(start, limit - start, NULL, 0, 0); } 04483 04484 inline UnicodeString & 04485 UnicodeString::retainBetween(int32_t start, int32_t limit) { 04486 truncate(limit); 04487 return doReplace(0, start, NULL, 0, 0); 04488 } 04489 04490 inline UBool 04491 UnicodeString::truncate(int32_t targetLength) 04492 { 04493 if(isBogus() && targetLength == 0) { 04494 // truncate(0) of a bogus string makes the string empty and non-bogus 04495 unBogus(); 04496 return FALSE; 04497 } else if((uint32_t)targetLength < (uint32_t)length()) { 04498 setLength(targetLength); 04499 if(fFlags&kBufferIsReadonly) { 04500 fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more 04501 } 04502 return TRUE; 04503 } else { 04504 return FALSE; 04505 } 04506 } 04507 04508 inline UnicodeString& 04509 UnicodeString::reverse() 04510 { return doReverse(0, length()); } 04511 04512 inline UnicodeString& 04513 UnicodeString::reverse(int32_t start, 04514 int32_t _length) 04515 { return doReverse(start, _length); } 04516 04517 U_NAMESPACE_END 04518 04519 #endif
1.7.6.1