ICU 51.2  51.2
unistr.h
Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 *   Copyright (C) 1998-2013, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006 *
00007 * File unistr.h
00008 *
00009 * Modification History:
00010 *
00011 *   Date        Name        Description
00012 *   09/25/98    stephen     Creation.
00013 *   11/11/98    stephen     Changed per 11/9 code review.
00014 *   04/20/99    stephen     Overhauled per 4/16 code review.
00015 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
00016 *                           handleReplaceBetween(); other methods unchanged.
00017 *   06/25/01    grhoten     Remove dependency on iostream.
00018 ******************************************************************************
00019 */
00020 
00021 #ifndef UNISTR_H
00022 #define UNISTR_H
00023 
00029 #include "unicode/utypes.h"
00030 #include "unicode/rep.h"
00031 #include "unicode/std_string.h"
00032 #include "unicode/stringpiece.h"
00033 #include "unicode/bytestream.h"
00034 #include "unicode/ucasemap.h"
00035 
00036 struct UConverter;          // unicode/ucnv.h
00037 class  StringThreadTest;
00038 
00039 #ifndef U_COMPARE_CODE_POINT_ORDER
00040 /* see also ustring.h and unorm.h */
00046 #define U_COMPARE_CODE_POINT_ORDER  0x8000
00047 #endif
00048 
00049 #ifndef USTRING_H
00050 
00053 U_STABLE int32_t U_EXPORT2
00054 u_strlen(const UChar *s);
00055 #endif
00056 
00057 #ifndef U_HIDE_INTERNAL_API
00058 
00063 #ifndef U_STRING_CASE_MAPPER_DEFINED
00064 #define U_STRING_CASE_MAPPER_DEFINED
00065 
00070 typedef int32_t U_CALLCONV
00071 UStringCaseMapper(const UCaseMap *csm,
00072                   UChar *dest, int32_t destCapacity,
00073                   const UChar *src, int32_t srcLength,
00074                   UErrorCode *pErrorCode);
00075 
00076 #endif
00077 #endif  /* U_HIDE_INTERNAL_API */
00078 
00079 U_NAMESPACE_BEGIN
00080 
00081 class BreakIterator;        // unicode/brkiter.h
00082 class Locale;               // unicode/locid.h
00083 class StringCharacterIterator;
00084 class UnicodeStringAppendable;  // unicode/appendable.h
00085 
00086 /* The <iostream> include has been moved to unicode/ustream.h */
00087 
00098 #define US_INV icu::UnicodeString::kInvariant
00099 
00117 #if defined(U_DECLARE_UTF16)
00118 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
00119 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
00120 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
00121 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
00122 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
00123 #else
00124 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
00125 #endif
00126 
00140 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
00141 
00149 #ifndef UNISTR_FROM_CHAR_EXPLICIT
00150 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
00151     // Auto-"explicit" in ICU library code.
00152 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
00153 # else
00154     // Empty by default for source code compatibility.
00155 #   define UNISTR_FROM_CHAR_EXPLICIT
00156 # endif
00157 #endif
00158 
00169 #ifndef UNISTR_FROM_STRING_EXPLICIT
00170 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
00171     // Auto-"explicit" in ICU library code.
00172 #   define UNISTR_FROM_STRING_EXPLICIT explicit
00173 # else
00174     // Empty by default for source code compatibility.
00175 #   define UNISTR_FROM_STRING_EXPLICIT
00176 # endif
00177 #endif
00178 
00248 class U_COMMON_API UnicodeString : public Replaceable
00249 {
00250 public:
00251 
00260   enum EInvariant {
00265     kInvariant
00266   };
00267 
00268   //========================================
00269   // Read-only operations
00270   //========================================
00271 
00272   /* Comparison - bitwise only - for international comparison use collation */
00273 
00281   inline UBool operator== (const UnicodeString& text) const;
00282 
00290   inline UBool operator!= (const UnicodeString& text) const;
00291 
00299   inline UBool operator> (const UnicodeString& text) const;
00300 
00308   inline UBool operator< (const UnicodeString& text) const;
00309 
00317   inline UBool operator>= (const UnicodeString& text) const;
00318 
00326   inline UBool operator<= (const UnicodeString& text) const;
00327 
00339   inline int8_t compare(const UnicodeString& text) const;
00340 
00355   inline int8_t compare(int32_t start,
00356          int32_t length,
00357          const UnicodeString& text) const;
00358 
00376    inline int8_t compare(int32_t start,
00377          int32_t length,
00378          const UnicodeString& srcText,
00379          int32_t srcStart,
00380          int32_t srcLength) const;
00381 
00394   inline int8_t compare(const UChar *srcChars,
00395          int32_t srcLength) const;
00396 
00411   inline int8_t compare(int32_t start,
00412          int32_t length,
00413          const UChar *srcChars) const;
00414 
00432   inline int8_t compare(int32_t start,
00433          int32_t length,
00434          const UChar *srcChars,
00435          int32_t srcStart,
00436          int32_t srcLength) const;
00437 
00455   inline int8_t compareBetween(int32_t start,
00456             int32_t limit,
00457             const UnicodeString& srcText,
00458             int32_t srcStart,
00459             int32_t srcLimit) const;
00460 
00478   inline int8_t compareCodePointOrder(const UnicodeString& text) const;
00479 
00499   inline int8_t compareCodePointOrder(int32_t start,
00500                                       int32_t length,
00501                                       const UnicodeString& srcText) const;
00502 
00524    inline int8_t compareCodePointOrder(int32_t start,
00525                                        int32_t length,
00526                                        const UnicodeString& srcText,
00527                                        int32_t srcStart,
00528                                        int32_t srcLength) const;
00529 
00548   inline int8_t compareCodePointOrder(const UChar *srcChars,
00549                                       int32_t srcLength) const;
00550 
00570   inline int8_t compareCodePointOrder(int32_t start,
00571                                       int32_t length,
00572                                       const UChar *srcChars) const;
00573 
00595   inline int8_t compareCodePointOrder(int32_t start,
00596                                       int32_t length,
00597                                       const UChar *srcChars,
00598                                       int32_t srcStart,
00599                                       int32_t srcLength) const;
00600 
00622   inline int8_t compareCodePointOrderBetween(int32_t start,
00623                                              int32_t limit,
00624                                              const UnicodeString& srcText,
00625                                              int32_t srcStart,
00626                                              int32_t srcLimit) const;
00627 
00646   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
00647 
00668   inline int8_t caseCompare(int32_t start,
00669          int32_t length,
00670          const UnicodeString& srcText,
00671          uint32_t options) const;
00672 
00695   inline int8_t caseCompare(int32_t start,
00696          int32_t length,
00697          const UnicodeString& srcText,
00698          int32_t srcStart,
00699          int32_t srcLength,
00700          uint32_t options) const;
00701 
00721   inline int8_t caseCompare(const UChar *srcChars,
00722          int32_t srcLength,
00723          uint32_t options) const;
00724 
00745   inline int8_t caseCompare(int32_t start,
00746          int32_t length,
00747          const UChar *srcChars,
00748          uint32_t options) const;
00749 
00772   inline int8_t caseCompare(int32_t start,
00773          int32_t length,
00774          const UChar *srcChars,
00775          int32_t srcStart,
00776          int32_t srcLength,
00777          uint32_t options) const;
00778 
00801   inline int8_t caseCompareBetween(int32_t start,
00802             int32_t limit,
00803             const UnicodeString& srcText,
00804             int32_t srcStart,
00805             int32_t srcLimit,
00806             uint32_t options) const;
00807 
00815   inline UBool startsWith(const UnicodeString& text) const;
00816 
00827   inline UBool startsWith(const UnicodeString& srcText,
00828             int32_t srcStart,
00829             int32_t srcLength) const;
00830 
00839   inline UBool startsWith(const UChar *srcChars,
00840             int32_t srcLength) const;
00841 
00851   inline UBool startsWith(const UChar *srcChars,
00852             int32_t srcStart,
00853             int32_t srcLength) const;
00854 
00862   inline UBool endsWith(const UnicodeString& text) const;
00863 
00874   inline UBool endsWith(const UnicodeString& srcText,
00875           int32_t srcStart,
00876           int32_t srcLength) const;
00877 
00886   inline UBool endsWith(const UChar *srcChars,
00887           int32_t srcLength) const;
00888 
00899   inline UBool endsWith(const UChar *srcChars,
00900           int32_t srcStart,
00901           int32_t srcLength) const;
00902 
00903 
00904   /* Searching - bitwise only */
00905 
00914   inline int32_t indexOf(const UnicodeString& text) const;
00915 
00925   inline int32_t indexOf(const UnicodeString& text,
00926               int32_t start) const;
00927 
00939   inline int32_t indexOf(const UnicodeString& text,
00940               int32_t start,
00941               int32_t length) const;
00942 
00959   inline int32_t indexOf(const UnicodeString& srcText,
00960               int32_t srcStart,
00961               int32_t srcLength,
00962               int32_t start,
00963               int32_t length) const;
00964 
00976   inline int32_t indexOf(const UChar *srcChars,
00977               int32_t srcLength,
00978               int32_t start) const;
00979 
00992   inline int32_t indexOf(const UChar *srcChars,
00993               int32_t srcLength,
00994               int32_t start,
00995               int32_t length) const;
00996 
01013   int32_t indexOf(const UChar *srcChars,
01014               int32_t srcStart,
01015               int32_t srcLength,
01016               int32_t start,
01017               int32_t length) const;
01018 
01026   inline int32_t indexOf(UChar c) const;
01027 
01036   inline int32_t indexOf(UChar32 c) const;
01037 
01046   inline int32_t indexOf(UChar c,
01047               int32_t start) const;
01048 
01058   inline int32_t indexOf(UChar32 c,
01059               int32_t start) const;
01060 
01071   inline int32_t indexOf(UChar c,
01072               int32_t start,
01073               int32_t length) const;
01074 
01086   inline int32_t indexOf(UChar32 c,
01087               int32_t start,
01088               int32_t length) const;
01089 
01098   inline int32_t lastIndexOf(const UnicodeString& text) const;
01099 
01109   inline int32_t lastIndexOf(const UnicodeString& text,
01110               int32_t start) const;
01111 
01123   inline int32_t lastIndexOf(const UnicodeString& text,
01124               int32_t start,
01125               int32_t length) const;
01126 
01143   inline int32_t lastIndexOf(const UnicodeString& srcText,
01144               int32_t srcStart,
01145               int32_t srcLength,
01146               int32_t start,
01147               int32_t length) const;
01148 
01159   inline int32_t lastIndexOf(const UChar *srcChars,
01160               int32_t srcLength,
01161               int32_t start) const;
01162 
01175   inline int32_t lastIndexOf(const UChar *srcChars,
01176               int32_t srcLength,
01177               int32_t start,
01178               int32_t length) const;
01179 
01196   int32_t lastIndexOf(const UChar *srcChars,
01197               int32_t srcStart,
01198               int32_t srcLength,
01199               int32_t start,
01200               int32_t length) const;
01201 
01209   inline int32_t lastIndexOf(UChar c) const;
01210 
01219   inline int32_t lastIndexOf(UChar32 c) const;
01220 
01229   inline int32_t lastIndexOf(UChar c,
01230               int32_t start) const;
01231 
01241   inline int32_t lastIndexOf(UChar32 c,
01242               int32_t start) const;
01243 
01254   inline int32_t lastIndexOf(UChar c,
01255               int32_t start,
01256               int32_t length) const;
01257 
01269   inline int32_t lastIndexOf(UChar32 c,
01270               int32_t start,
01271               int32_t length) const;
01272 
01273 
01274   /* Character access */
01275 
01284   inline UChar charAt(int32_t offset) const;
01285 
01293   inline UChar operator[] (int32_t offset) const;
01294 
01306   UChar32 char32At(int32_t offset) const;
01307 
01323   int32_t getChar32Start(int32_t offset) const;
01324 
01341   int32_t getChar32Limit(int32_t offset) const;
01342 
01393   int32_t moveIndex32(int32_t index, int32_t delta) const;
01394 
01395   /* Substring extraction */
01396 
01412   inline void extract(int32_t start,
01413            int32_t length,
01414            UChar *dst,
01415            int32_t dstStart = 0) const;
01416 
01438   int32_t
01439   extract(UChar *dest, int32_t destCapacity,
01440           UErrorCode &errorCode) const;
01441 
01452   inline void extract(int32_t start,
01453            int32_t length,
01454            UnicodeString& target) const;
01455 
01467   inline void extractBetween(int32_t start,
01468               int32_t limit,
01469               UChar *dst,
01470               int32_t dstStart = 0) const;
01471 
01481   virtual void extractBetween(int32_t start,
01482               int32_t limit,
01483               UnicodeString& target) const;
01484 
01506   int32_t extract(int32_t start,
01507            int32_t startLength,
01508            char *target,
01509            int32_t targetCapacity,
01510            enum EInvariant inv) const;
01511 
01512 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
01513 
01533   int32_t extract(int32_t start,
01534            int32_t startLength,
01535            char *target,
01536            uint32_t targetLength) const;
01537 
01538 #endif
01539 
01540 #if !UCONFIG_NO_CONVERSION
01541 
01567   inline int32_t extract(int32_t start,
01568                  int32_t startLength,
01569                  char *target,
01570                  const char *codepage = 0) const;
01571 
01601   int32_t extract(int32_t start,
01602            int32_t startLength,
01603            char *target,
01604            uint32_t targetLength,
01605            const char *codepage) const;
01606 
01624   int32_t extract(char *dest, int32_t destCapacity,
01625                   UConverter *cnv,
01626                   UErrorCode &errorCode) const;
01627 
01628 #endif
01629 
01643   UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
01644 
01655   inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
01656 
01668   void toUTF8(ByteSink &sink) const;
01669 
01670 #if U_HAVE_STD_STRING
01671 
01684   template<typename StringClass>
01685   StringClass &toUTF8String(StringClass &result) const {
01686     StringByteSink<StringClass> sbs(&result);
01687     toUTF8(sbs);
01688     return result;
01689   }
01690 
01691 #endif
01692 
01708   int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
01709 
01710   /* Length operations */
01711 
01720   inline int32_t length(void) const;
01721 
01735   int32_t
01736   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
01737 
01761   UBool
01762   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
01763 
01769   inline UBool isEmpty(void) const;
01770 
01780   inline int32_t getCapacity(void) const;
01781 
01782   /* Other operations */
01783 
01789   inline int32_t hashCode(void) const;
01790 
01803   inline UBool isBogus(void) const;
01804 
01805 
01806   //========================================
01807   // Write operations
01808   //========================================
01809 
01810   /* Assignment operations */
01811 
01819   UnicodeString &operator=(const UnicodeString &srcText);
01820 
01843   UnicodeString &fastCopyFrom(const UnicodeString &src);
01844 
01852   inline UnicodeString& operator= (UChar ch);
01853 
01861   inline UnicodeString& operator= (UChar32 ch);
01862 
01874   inline UnicodeString& setTo(const UnicodeString& srcText,
01875                int32_t srcStart);
01876 
01890   inline UnicodeString& setTo(const UnicodeString& srcText,
01891                int32_t srcStart,
01892                int32_t srcLength);
01893 
01902   inline UnicodeString& setTo(const UnicodeString& srcText);
01903 
01912   inline UnicodeString& setTo(const UChar *srcChars,
01913                int32_t srcLength);
01914 
01923   UnicodeString& setTo(UChar srcChar);
01924 
01933   UnicodeString& setTo(UChar32 srcChar);
01934 
01958   UnicodeString &setTo(UBool isTerminated,
01959                        const UChar *text,
01960                        int32_t textLength);
01961 
01981   UnicodeString &setTo(UChar *buffer,
01982                        int32_t buffLength,
01983                        int32_t buffCapacity);
01984 
02025   void setToBogus();
02026 
02034   UnicodeString& setCharAt(int32_t offset,
02035                UChar ch);
02036 
02037 
02038   /* Append operations */
02039 
02047  inline  UnicodeString& operator+= (UChar ch);
02048 
02056  inline  UnicodeString& operator+= (UChar32 ch);
02057 
02065   inline UnicodeString& operator+= (const UnicodeString& srcText);
02066 
02081   inline UnicodeString& append(const UnicodeString& srcText,
02082             int32_t srcStart,
02083             int32_t srcLength);
02084 
02092   inline UnicodeString& append(const UnicodeString& srcText);
02093 
02107   inline UnicodeString& append(const UChar *srcChars,
02108             int32_t srcStart,
02109             int32_t srcLength);
02110 
02120   inline UnicodeString& append(const UChar *srcChars,
02121             int32_t srcLength);
02122 
02129   inline UnicodeString& append(UChar srcChar);
02130 
02137   UnicodeString& append(UChar32 srcChar);
02138 
02139 
02140   /* Insert operations */
02141 
02155   inline UnicodeString& insert(int32_t start,
02156             const UnicodeString& srcText,
02157             int32_t srcStart,
02158             int32_t srcLength);
02159 
02168   inline UnicodeString& insert(int32_t start,
02169             const UnicodeString& srcText);
02170 
02184   inline UnicodeString& insert(int32_t start,
02185             const UChar *srcChars,
02186             int32_t srcStart,
02187             int32_t srcLength);
02188 
02198   inline UnicodeString& insert(int32_t start,
02199             const UChar *srcChars,
02200             int32_t srcLength);
02201 
02210   inline UnicodeString& insert(int32_t start,
02211             UChar srcChar);
02212 
02221   inline UnicodeString& insert(int32_t start,
02222             UChar32 srcChar);
02223 
02224 
02225   /* Replace operations */
02226 
02244   UnicodeString& replace(int32_t start,
02245              int32_t length,
02246              const UnicodeString& srcText,
02247              int32_t srcStart,
02248              int32_t srcLength);
02249 
02262   UnicodeString& replace(int32_t start,
02263              int32_t length,
02264              const UnicodeString& srcText);
02265 
02283   UnicodeString& replace(int32_t start,
02284              int32_t length,
02285              const UChar *srcChars,
02286              int32_t srcStart,
02287              int32_t srcLength);
02288 
02301   inline UnicodeString& replace(int32_t start,
02302              int32_t length,
02303              const UChar *srcChars,
02304              int32_t srcLength);
02305 
02317   inline UnicodeString& replace(int32_t start,
02318              int32_t length,
02319              UChar srcChar);
02320 
02332   UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
02333 
02343   inline UnicodeString& replaceBetween(int32_t start,
02344                 int32_t limit,
02345                 const UnicodeString& srcText);
02346 
02361   inline UnicodeString& replaceBetween(int32_t start,
02362                 int32_t limit,
02363                 const UnicodeString& srcText,
02364                 int32_t srcStart,
02365                 int32_t srcLimit);
02366 
02377   virtual void handleReplaceBetween(int32_t start,
02378                                     int32_t limit,
02379                                     const UnicodeString& text);
02380 
02386   virtual UBool hasMetaData() const;
02387 
02403   virtual void copy(int32_t start, int32_t limit, int32_t dest);
02404 
02405   /* Search and replace operations */
02406 
02415   inline UnicodeString& findAndReplace(const UnicodeString& oldText,
02416                 const UnicodeString& newText);
02417 
02429   inline UnicodeString& findAndReplace(int32_t start,
02430                 int32_t length,
02431                 const UnicodeString& oldText,
02432                 const UnicodeString& newText);
02433 
02451   UnicodeString& findAndReplace(int32_t start,
02452                 int32_t length,
02453                 const UnicodeString& oldText,
02454                 int32_t oldStart,
02455                 int32_t oldLength,
02456                 const UnicodeString& newText,
02457                 int32_t newStart,
02458                 int32_t newLength);
02459 
02460 
02461   /* Remove operations */
02462 
02468   inline UnicodeString& remove(void);
02469 
02478   inline UnicodeString& remove(int32_t start,
02479                                int32_t length = (int32_t)INT32_MAX);
02480 
02489   inline UnicodeString& removeBetween(int32_t start,
02490                                       int32_t limit = (int32_t)INT32_MAX);
02491 
02501   inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
02502 
02503   /* Length operations */
02504 
02516   UBool padLeading(int32_t targetLength,
02517                     UChar padChar = 0x0020);
02518 
02530   UBool padTrailing(int32_t targetLength,
02531                      UChar padChar = 0x0020);
02532 
02539   inline UBool truncate(int32_t targetLength);
02540 
02546   UnicodeString& trim(void);
02547 
02548 
02549   /* Miscellaneous operations */
02550 
02556   inline UnicodeString& reverse(void);
02557 
02566   inline UnicodeString& reverse(int32_t start,
02567              int32_t length);
02568 
02575   UnicodeString& toUpper(void);
02576 
02584   UnicodeString& toUpper(const Locale& locale);
02585 
02592   UnicodeString& toLower(void);
02593 
02601   UnicodeString& toLower(const Locale& locale);
02602 
02603 #if !UCONFIG_NO_BREAK_ITERATION
02604 
02631   UnicodeString &toTitle(BreakIterator *titleIter);
02632 
02660   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
02661 
02693   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
02694 
02695 #endif
02696 
02710   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
02711 
02712   //========================================
02713   // Access to the internal buffer
02714   //========================================
02715 
02759   UChar *getBuffer(int32_t minCapacity);
02760 
02781   void releaseBuffer(int32_t newLength=-1);
02782 
02813   inline const UChar *getBuffer() const;
02814 
02848   inline const UChar *getTerminatedBuffer();
02849 
02850   //========================================
02851   // Constructors
02852   //========================================
02853 
02857   inline UnicodeString();
02858 
02870   UnicodeString(int32_t capacity, UChar32 c, int32_t count);
02871 
02881   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
02882 
02892   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
02893 
02904   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
02905 
02913   UnicodeString(const UChar *text,
02914         int32_t textLength);
02915 
02938   UnicodeString(UBool isTerminated,
02939                 const UChar *text,
02940                 int32_t textLength);
02941 
02960   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
02961 
02962 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
02963 
02983   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
02984 
02993   UnicodeString(const char *codepageData, int32_t dataLength);
02994 
02995 #endif
02996 
02997 #if !UCONFIG_NO_CONVERSION
02998 
03016   UnicodeString(const char *codepageData, const char *codepage);
03017 
03035   UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
03036 
03058   UnicodeString(
03059         const char *src, int32_t srcLength,
03060         UConverter *cnv,
03061         UErrorCode &errorCode);
03062 
03063 #endif
03064 
03089   UnicodeString(const char *src, int32_t length, enum EInvariant inv);
03090 
03091 
03097   UnicodeString(const UnicodeString& that);
03098 
03105   UnicodeString(const UnicodeString& src, int32_t srcStart);
03106 
03114   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
03115 
03132   virtual Replaceable *clone() const;
03133 
03137   virtual ~UnicodeString();
03138 
03152   static UnicodeString fromUTF8(const StringPiece &utf8);
03153 
03165   static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
03166 
03167   /* Miscellaneous operations */
03168 
03203   UnicodeString unescape() const;
03204 
03224   UChar32 unescapeAt(int32_t &offset) const;
03225 
03231   static UClassID U_EXPORT2 getStaticClassID();
03232 
03238   virtual UClassID getDynamicClassID() const;
03239 
03240   //========================================
03241   // Implementation methods
03242   //========================================
03243 
03244 protected:
03249   virtual int32_t getLength() const;
03250 
03256   virtual UChar getCharAt(int32_t offset) const;
03257 
03263   virtual UChar32 getChar32At(int32_t offset) const;
03264 
03265 private:
03266   // For char* constructors. Could be made public.
03267   UnicodeString &setToUTF8(const StringPiece &utf8);
03268   // For extract(char*).
03269   // We could make a toUTF8(target, capacity, errorCode) public but not
03270   // this version: New API will be cleaner if we make callers create substrings
03271   // rather than having start+length on every method,
03272   // and it should take a UErrorCode&.
03273   int32_t
03274   toUTF8(int32_t start, int32_t len,
03275          char *target, int32_t capacity) const;
03276 
03281   UBool doEquals(const UnicodeString &text, int32_t len) const;
03282 
03283   inline int8_t
03284   doCompare(int32_t start,
03285            int32_t length,
03286            const UnicodeString& srcText,
03287            int32_t srcStart,
03288            int32_t srcLength) const;
03289 
03290   int8_t doCompare(int32_t start,
03291            int32_t length,
03292            const UChar *srcChars,
03293            int32_t srcStart,
03294            int32_t srcLength) const;
03295 
03296   inline int8_t
03297   doCompareCodePointOrder(int32_t start,
03298                           int32_t length,
03299                           const UnicodeString& srcText,
03300                           int32_t srcStart,
03301                           int32_t srcLength) const;
03302 
03303   int8_t doCompareCodePointOrder(int32_t start,
03304                                  int32_t length,
03305                                  const UChar *srcChars,
03306                                  int32_t srcStart,
03307                                  int32_t srcLength) const;
03308 
03309   inline int8_t
03310   doCaseCompare(int32_t start,
03311                 int32_t length,
03312                 const UnicodeString &srcText,
03313                 int32_t srcStart,
03314                 int32_t srcLength,
03315                 uint32_t options) const;
03316 
03317   int8_t
03318   doCaseCompare(int32_t start,
03319                 int32_t length,
03320                 const UChar *srcChars,
03321                 int32_t srcStart,
03322                 int32_t srcLength,
03323                 uint32_t options) const;
03324 
03325   int32_t doIndexOf(UChar c,
03326             int32_t start,
03327             int32_t length) const;
03328 
03329   int32_t doIndexOf(UChar32 c,
03330                         int32_t start,
03331                         int32_t length) const;
03332 
03333   int32_t doLastIndexOf(UChar c,
03334                 int32_t start,
03335                 int32_t length) const;
03336 
03337   int32_t doLastIndexOf(UChar32 c,
03338                             int32_t start,
03339                             int32_t length) const;
03340 
03341   void doExtract(int32_t start,
03342          int32_t length,
03343          UChar *dst,
03344          int32_t dstStart) const;
03345 
03346   inline void doExtract(int32_t start,
03347          int32_t length,
03348          UnicodeString& target) const;
03349 
03350   inline UChar doCharAt(int32_t offset)  const;
03351 
03352   UnicodeString& doReplace(int32_t start,
03353                int32_t length,
03354                const UnicodeString& srcText,
03355                int32_t srcStart,
03356                int32_t srcLength);
03357 
03358   UnicodeString& doReplace(int32_t start,
03359                int32_t length,
03360                const UChar *srcChars,
03361                int32_t srcStart,
03362                int32_t srcLength);
03363 
03364   UnicodeString& doReverse(int32_t start,
03365                int32_t length);
03366 
03367   // calculate hash code
03368   int32_t doHashCode(void) const;
03369 
03370   // get pointer to start of array
03371   // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
03372   inline UChar* getArrayStart(void);
03373   inline const UChar* getArrayStart(void) const;
03374 
03375   // A UnicodeString object (not necessarily its current buffer)
03376   // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
03377   inline UBool isWritable() const;
03378 
03379   // Is the current buffer writable?
03380   inline UBool isBufferWritable() const;
03381 
03382   // None of the following does releaseArray().
03383   inline void setLength(int32_t len);        // sets only fShortLength and fLength
03384   inline void setToEmpty();                  // sets fFlags=kShortString
03385   inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
03386 
03387   // allocate the array; result may be fStackBuffer
03388   // sets refCount to 1 if appropriate
03389   // sets fArray, fCapacity, and fFlags
03390   // returns boolean for success or failure
03391   UBool allocate(int32_t capacity);
03392 
03393   // release the array if owned
03394   void releaseArray(void);
03395 
03396   // turn a bogus string into an empty one
03397   void unBogus();
03398 
03399   // implements assigment operator, copy constructor, and fastCopyFrom()
03400   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
03401 
03402   // Pin start and limit to acceptable values.
03403   inline void pinIndex(int32_t& start) const;
03404   inline void pinIndices(int32_t& start,
03405                          int32_t& length) const;
03406 
03407 #if !UCONFIG_NO_CONVERSION
03408 
03409   /* Internal extract() using UConverter. */
03410   int32_t doExtract(int32_t start, int32_t length,
03411                     char *dest, int32_t destCapacity,
03412                     UConverter *cnv,
03413                     UErrorCode &errorCode) const;
03414 
03415   /*
03416    * Real constructor for converting from codepage data.
03417    * It assumes that it is called with !fRefCounted.
03418    *
03419    * If <code>codepage==0</code>, then the default converter
03420    * is used for the platform encoding.
03421    * If <code>codepage</code> is an empty string (<code>""</code>),
03422    * then a simple conversion is performed on the codepage-invariant
03423    * subset ("invariant characters") of the platform encoding. See utypes.h.
03424    */
03425   void doCodepageCreate(const char *codepageData,
03426                         int32_t dataLength,
03427                         const char *codepage);
03428 
03429   /*
03430    * Worker function for creating a UnicodeString from
03431    * a codepage string using a UConverter.
03432    */
03433   void
03434   doCodepageCreate(const char *codepageData,
03435                    int32_t dataLength,
03436                    UConverter *converter,
03437                    UErrorCode &status);
03438 
03439 #endif
03440 
03441   /*
03442    * This function is called when write access to the array
03443    * is necessary.
03444    *
03445    * We need to make a copy of the array if
03446    * the buffer is read-only, or
03447    * the buffer is refCounted (shared), and refCount>1, or
03448    * the buffer is too small.
03449    *
03450    * Return FALSE if memory could not be allocated.
03451    */
03452   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
03453                             int32_t growCapacity = -1,
03454                             UBool doCopyArray = TRUE,
03455                             int32_t **pBufferToDelete = 0,
03456                             UBool forceClone = FALSE);
03457 
03463   UnicodeString &
03464   caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
03465 
03466   // ref counting
03467   void addRef(void);
03468   int32_t removeRef(void);
03469   int32_t refCount(void) const;
03470 
03471   // constants
03472   enum {
03473     // Set the stack buffer size so that sizeof(UnicodeString) is,
03474     // naturally (without padding), a multiple of sizeof(pointer).
03475     US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
03476     kInvalidUChar=0xffff, // invalid UChar index
03477     kGrowSize=128, // grow size for this buffer
03478     kInvalidHashCode=0, // invalid hash code
03479     kEmptyHashCode=1, // hash code for empty string
03480 
03481     // bit flag values for fFlags
03482     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
03483     kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
03484     kRefCounted=4,      // there is a refCount field before the characters in fArray
03485     kBufferIsReadonly=8,// do not write to this buffer
03486     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
03487                         // and releaseBuffer(newLength) must be called
03488 
03489     // combined values for convenience
03490     kShortString=kUsingStackBuffer,
03491     kLongString=kRefCounted,
03492     kReadonlyAlias=kBufferIsReadonly,
03493     kWritableAlias=0
03494   };
03495 
03496   friend class StringThreadTest;
03497   friend class UnicodeStringAppendable;
03498 
03499   union StackBufferOrFields;        // forward declaration necessary before friend declaration
03500   friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
03501 
03502   /*
03503    * The following are all the class fields that are stored
03504    * in each UnicodeString object.
03505    * Note that UnicodeString has virtual functions,
03506    * therefore there is an implicit vtable pointer
03507    * as the first real field.
03508    * The fields should be aligned such that no padding is necessary.
03509    * On 32-bit machines, the size should be 32 bytes,
03510    * on 64-bit machines (8-byte pointers), it should be 40 bytes.
03511    *
03512    * We use a hack to achieve this.
03513    *
03514    * With at least some compilers, each of the following is forced to
03515    * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
03516    * rounded up with additional padding if the fields do not already fit that requirement:
03517    * - sizeof(class UnicodeString)
03518    * - offsetof(UnicodeString, fUnion)
03519    * - sizeof(fUnion)
03520    * - sizeof(fFields)
03521    *
03522    * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
03523    * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
03524    * (Padding at the end of fFields is ok:
03525    * As long as there is no padding after fStackBuffer, it is not wasted space.)
03526    *
03527    * We further assume that the compiler does not reorder the fields,
03528    * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
03529    * with at most some padding (but no other field) in between.
03530    * (Padding there would be wasted space, but functionally harmless.)
03531    *
03532    * We use a few more sizeof(pointer)'s chunks of space with
03533    * fRestOfStackBuffer, fShortLength and fFlags,
03534    * to get up exactly to the intended sizeof(UnicodeString).
03535    */
03536   // (implicit) *vtable;
03537   union StackBufferOrFields {
03538     // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
03539     // else fFields is used
03540     UChar fStackBuffer[8];  // buffer for short strings, together with fRestOfStackBuffer
03541     struct {
03542       UChar   *fArray;    // the Unicode data
03543       int32_t fCapacity;  // capacity of fArray (in UChars)
03544       int32_t fLength;    // number of characters in fArray if >127; else undefined
03545     } fFields;
03546   } fUnion;
03547   UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
03548   int8_t fShortLength;  // 0..127: length  <0: real length is in fUnion.fFields.fLength
03549   uint8_t fFlags;       // bit flags: see constants above
03550 };
03551 
03560 U_COMMON_API UnicodeString U_EXPORT2
03561 operator+ (const UnicodeString &s1, const UnicodeString &s2);
03562 
03563 //========================================
03564 // Inline members
03565 //========================================
03566 
03567 //========================================
03568 // Privates
03569 //========================================
03570 
03571 inline void
03572 UnicodeString::pinIndex(int32_t& start) const
03573 {
03574   // pin index
03575   if(start < 0) {
03576     start = 0;
03577   } else if(start > length()) {
03578     start = length();
03579   }
03580 }
03581 
03582 inline void
03583 UnicodeString::pinIndices(int32_t& start,
03584                           int32_t& _length) const
03585 {
03586   // pin indices
03587   int32_t len = length();
03588   if(start < 0) {
03589     start = 0;
03590   } else if(start > len) {
03591     start = len;
03592   }
03593   if(_length < 0) {
03594     _length = 0;
03595   } else if(_length > (len - start)) {
03596     _length = (len - start);
03597   }
03598 }
03599 
03600 inline UChar*
03601 UnicodeString::getArrayStart()
03602 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
03603 
03604 inline const UChar*
03605 UnicodeString::getArrayStart() const
03606 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
03607 
03608 //========================================
03609 // Default constructor
03610 //========================================
03611 
03612 inline
03613 UnicodeString::UnicodeString()
03614   : fShortLength(0),
03615     fFlags(kShortString)
03616 {}
03617 
03618 //========================================
03619 // Read-only implementation methods
03620 //========================================
03621 inline int32_t
03622 UnicodeString::length() const
03623 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
03624 
03625 inline int32_t
03626 UnicodeString::getCapacity() const
03627 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
03628 
03629 inline int32_t
03630 UnicodeString::hashCode() const
03631 { return doHashCode(); }
03632 
03633 inline UBool
03634 UnicodeString::isBogus() const
03635 { return (UBool)(fFlags & kIsBogus); }
03636 
03637 inline UBool
03638 UnicodeString::isWritable() const
03639 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
03640 
03641 inline UBool
03642 UnicodeString::isBufferWritable() const
03643 {
03644   return (UBool)(
03645       !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
03646       (!(fFlags&kRefCounted) || refCount()==1));
03647 }
03648 
03649 inline const UChar *
03650 UnicodeString::getBuffer() const {
03651   if(fFlags&(kIsBogus|kOpenGetBuffer)) {
03652     return 0;
03653   } else if(fFlags&kUsingStackBuffer) {
03654     return fUnion.fStackBuffer;
03655   } else {
03656     return fUnion.fFields.fArray;
03657   }
03658 }
03659 
03660 //========================================
03661 // Read-only alias methods
03662 //========================================
03663 inline int8_t
03664 UnicodeString::doCompare(int32_t start,
03665               int32_t thisLength,
03666               const UnicodeString& srcText,
03667               int32_t srcStart,
03668               int32_t srcLength) const
03669 {
03670   if(srcText.isBogus()) {
03671     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03672   } else {
03673     srcText.pinIndices(srcStart, srcLength);
03674     return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
03675   }
03676 }
03677 
03678 inline UBool
03679 UnicodeString::operator== (const UnicodeString& text) const
03680 {
03681   if(isBogus()) {
03682     return text.isBogus();
03683   } else {
03684     int32_t len = length(), textLength = text.length();
03685     return !text.isBogus() && len == textLength && doEquals(text, len);
03686   }
03687 }
03688 
03689 inline UBool
03690 UnicodeString::operator!= (const UnicodeString& text) const
03691 { return (! operator==(text)); }
03692 
03693 inline UBool
03694 UnicodeString::operator> (const UnicodeString& text) const
03695 { return doCompare(0, length(), text, 0, text.length()) == 1; }
03696 
03697 inline UBool
03698 UnicodeString::operator< (const UnicodeString& text) const
03699 { return doCompare(0, length(), text, 0, text.length()) == -1; }
03700 
03701 inline UBool
03702 UnicodeString::operator>= (const UnicodeString& text) const
03703 { return doCompare(0, length(), text, 0, text.length()) != -1; }
03704 
03705 inline UBool
03706 UnicodeString::operator<= (const UnicodeString& text) const
03707 { return doCompare(0, length(), text, 0, text.length()) != 1; }
03708 
03709 inline int8_t
03710 UnicodeString::compare(const UnicodeString& text) const
03711 { return doCompare(0, length(), text, 0, text.length()); }
03712 
03713 inline int8_t
03714 UnicodeString::compare(int32_t start,
03715                int32_t _length,
03716                const UnicodeString& srcText) const
03717 { return doCompare(start, _length, srcText, 0, srcText.length()); }
03718 
03719 inline int8_t
03720 UnicodeString::compare(const UChar *srcChars,
03721                int32_t srcLength) const
03722 { return doCompare(0, length(), srcChars, 0, srcLength); }
03723 
03724 inline int8_t
03725 UnicodeString::compare(int32_t start,
03726                int32_t _length,
03727                const UnicodeString& srcText,
03728                int32_t srcStart,
03729                int32_t srcLength) const
03730 { return doCompare(start, _length, srcText, srcStart, srcLength); }
03731 
03732 inline int8_t
03733 UnicodeString::compare(int32_t start,
03734                int32_t _length,
03735                const UChar *srcChars) const
03736 { return doCompare(start, _length, srcChars, 0, _length); }
03737 
03738 inline int8_t
03739 UnicodeString::compare(int32_t start,
03740                int32_t _length,
03741                const UChar *srcChars,
03742                int32_t srcStart,
03743                int32_t srcLength) const
03744 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
03745 
03746 inline int8_t
03747 UnicodeString::compareBetween(int32_t start,
03748                   int32_t limit,
03749                   const UnicodeString& srcText,
03750                   int32_t srcStart,
03751                   int32_t srcLimit) const
03752 { return doCompare(start, limit - start,
03753            srcText, srcStart, srcLimit - srcStart); }
03754 
03755 inline int8_t
03756 UnicodeString::doCompareCodePointOrder(int32_t start,
03757                                        int32_t thisLength,
03758                                        const UnicodeString& srcText,
03759                                        int32_t srcStart,
03760                                        int32_t srcLength) const
03761 {
03762   if(srcText.isBogus()) {
03763     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03764   } else {
03765     srcText.pinIndices(srcStart, srcLength);
03766     return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
03767   }
03768 }
03769 
03770 inline int8_t
03771 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
03772 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
03773 
03774 inline int8_t
03775 UnicodeString::compareCodePointOrder(int32_t start,
03776                                      int32_t _length,
03777                                      const UnicodeString& srcText) const
03778 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
03779 
03780 inline int8_t
03781 UnicodeString::compareCodePointOrder(const UChar *srcChars,
03782                                      int32_t srcLength) const
03783 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
03784 
03785 inline int8_t
03786 UnicodeString::compareCodePointOrder(int32_t start,
03787                                      int32_t _length,
03788                                      const UnicodeString& srcText,
03789                                      int32_t srcStart,
03790                                      int32_t srcLength) const
03791 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
03792 
03793 inline int8_t
03794 UnicodeString::compareCodePointOrder(int32_t start,
03795                                      int32_t _length,
03796                                      const UChar *srcChars) const
03797 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
03798 
03799 inline int8_t
03800 UnicodeString::compareCodePointOrder(int32_t start,
03801                                      int32_t _length,
03802                                      const UChar *srcChars,
03803                                      int32_t srcStart,
03804                                      int32_t srcLength) const
03805 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
03806 
03807 inline int8_t
03808 UnicodeString::compareCodePointOrderBetween(int32_t start,
03809                                             int32_t limit,
03810                                             const UnicodeString& srcText,
03811                                             int32_t srcStart,
03812                                             int32_t srcLimit) const
03813 { return doCompareCodePointOrder(start, limit - start,
03814            srcText, srcStart, srcLimit - srcStart); }
03815 
03816 inline int8_t
03817 UnicodeString::doCaseCompare(int32_t start,
03818                              int32_t thisLength,
03819                              const UnicodeString &srcText,
03820                              int32_t srcStart,
03821                              int32_t srcLength,
03822                              uint32_t options) const
03823 {
03824   if(srcText.isBogus()) {
03825     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03826   } else {
03827     srcText.pinIndices(srcStart, srcLength);
03828     return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
03829   }
03830 }
03831 
03832 inline int8_t
03833 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
03834   return doCaseCompare(0, length(), text, 0, text.length(), options);
03835 }
03836 
03837 inline int8_t
03838 UnicodeString::caseCompare(int32_t start,
03839                            int32_t _length,
03840                            const UnicodeString &srcText,
03841                            uint32_t options) const {
03842   return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
03843 }
03844 
03845 inline int8_t
03846 UnicodeString::caseCompare(const UChar *srcChars,
03847                            int32_t srcLength,
03848                            uint32_t options) const {
03849   return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
03850 }
03851 
03852 inline int8_t
03853 UnicodeString::caseCompare(int32_t start,
03854                            int32_t _length,
03855                            const UnicodeString &srcText,
03856                            int32_t srcStart,
03857                            int32_t srcLength,
03858                            uint32_t options) const {
03859   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
03860 }
03861 
03862 inline int8_t
03863 UnicodeString::caseCompare(int32_t start,
03864                            int32_t _length,
03865                            const UChar *srcChars,
03866                            uint32_t options) const {
03867   return doCaseCompare(start, _length, srcChars, 0, _length, options);
03868 }
03869 
03870 inline int8_t
03871 UnicodeString::caseCompare(int32_t start,
03872                            int32_t _length,
03873                            const UChar *srcChars,
03874                            int32_t srcStart,
03875                            int32_t srcLength,
03876                            uint32_t options) const {
03877   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
03878 }
03879 
03880 inline int8_t
03881 UnicodeString::caseCompareBetween(int32_t start,
03882                                   int32_t limit,
03883                                   const UnicodeString &srcText,
03884                                   int32_t srcStart,
03885                                   int32_t srcLimit,
03886                                   uint32_t options) const {
03887   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
03888 }
03889 
03890 inline int32_t
03891 UnicodeString::indexOf(const UnicodeString& srcText,
03892                int32_t srcStart,
03893                int32_t srcLength,
03894                int32_t start,
03895                int32_t _length) const
03896 {
03897   if(!srcText.isBogus()) {
03898     srcText.pinIndices(srcStart, srcLength);
03899     if(srcLength > 0) {
03900       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
03901     }
03902   }
03903   return -1;
03904 }
03905 
03906 inline int32_t
03907 UnicodeString::indexOf(const UnicodeString& text) const
03908 { return indexOf(text, 0, text.length(), 0, length()); }
03909 
03910 inline int32_t
03911 UnicodeString::indexOf(const UnicodeString& text,
03912                int32_t start) const {
03913   pinIndex(start);
03914   return indexOf(text, 0, text.length(), start, length() - start);
03915 }
03916 
03917 inline int32_t
03918 UnicodeString::indexOf(const UnicodeString& text,
03919                int32_t start,
03920                int32_t _length) const
03921 { return indexOf(text, 0, text.length(), start, _length); }
03922 
03923 inline int32_t
03924 UnicodeString::indexOf(const UChar *srcChars,
03925                int32_t srcLength,
03926                int32_t start) const {
03927   pinIndex(start);
03928   return indexOf(srcChars, 0, srcLength, start, length() - start);
03929 }
03930 
03931 inline int32_t
03932 UnicodeString::indexOf(const UChar *srcChars,
03933                int32_t srcLength,
03934                int32_t start,
03935                int32_t _length) const
03936 { return indexOf(srcChars, 0, srcLength, start, _length); }
03937 
03938 inline int32_t
03939 UnicodeString::indexOf(UChar c,
03940                int32_t start,
03941                int32_t _length) const
03942 { return doIndexOf(c, start, _length); }
03943 
03944 inline int32_t
03945 UnicodeString::indexOf(UChar32 c,
03946                int32_t start,
03947                int32_t _length) const
03948 { return doIndexOf(c, start, _length); }
03949 
03950 inline int32_t
03951 UnicodeString::indexOf(UChar c) const
03952 { return doIndexOf(c, 0, length()); }
03953 
03954 inline int32_t
03955 UnicodeString::indexOf(UChar32 c) const
03956 { return indexOf(c, 0, length()); }
03957 
03958 inline int32_t
03959 UnicodeString::indexOf(UChar c,
03960                int32_t start) const {
03961   pinIndex(start);
03962   return doIndexOf(c, start, length() - start);
03963 }
03964 
03965 inline int32_t
03966 UnicodeString::indexOf(UChar32 c,
03967                int32_t start) const {
03968   pinIndex(start);
03969   return indexOf(c, start, length() - start);
03970 }
03971 
03972 inline int32_t
03973 UnicodeString::lastIndexOf(const UChar *srcChars,
03974                int32_t srcLength,
03975                int32_t start,
03976                int32_t _length) const
03977 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
03978 
03979 inline int32_t
03980 UnicodeString::lastIndexOf(const UChar *srcChars,
03981                int32_t srcLength,
03982                int32_t start) const {
03983   pinIndex(start);
03984   return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
03985 }
03986 
03987 inline int32_t
03988 UnicodeString::lastIndexOf(const UnicodeString& srcText,
03989                int32_t srcStart,
03990                int32_t srcLength,
03991                int32_t start,
03992                int32_t _length) const
03993 {
03994   if(!srcText.isBogus()) {
03995     srcText.pinIndices(srcStart, srcLength);
03996     if(srcLength > 0) {
03997       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
03998     }
03999   }
04000   return -1;
04001 }
04002 
04003 inline int32_t
04004 UnicodeString::lastIndexOf(const UnicodeString& text,
04005                int32_t start,
04006                int32_t _length) const
04007 { return lastIndexOf(text, 0, text.length(), start, _length); }
04008 
04009 inline int32_t
04010 UnicodeString::lastIndexOf(const UnicodeString& text,
04011                int32_t start) const {
04012   pinIndex(start);
04013   return lastIndexOf(text, 0, text.length(), start, length() - start);
04014 }
04015 
04016 inline int32_t
04017 UnicodeString::lastIndexOf(const UnicodeString& text) const
04018 { return lastIndexOf(text, 0, text.length(), 0, length()); }
04019 
04020 inline int32_t
04021 UnicodeString::lastIndexOf(UChar c,
04022                int32_t start,
04023                int32_t _length) const
04024 { return doLastIndexOf(c, start, _length); }
04025 
04026 inline int32_t
04027 UnicodeString::lastIndexOf(UChar32 c,
04028                int32_t start,
04029                int32_t _length) const {
04030   return doLastIndexOf(c, start, _length);
04031 }
04032 
04033 inline int32_t
04034 UnicodeString::lastIndexOf(UChar c) const
04035 { return doLastIndexOf(c, 0, length()); }
04036 
04037 inline int32_t
04038 UnicodeString::lastIndexOf(UChar32 c) const {
04039   return lastIndexOf(c, 0, length());
04040 }
04041 
04042 inline int32_t
04043 UnicodeString::lastIndexOf(UChar c,
04044                int32_t start) const {
04045   pinIndex(start);
04046   return doLastIndexOf(c, start, length() - start);
04047 }
04048 
04049 inline int32_t
04050 UnicodeString::lastIndexOf(UChar32 c,
04051                int32_t start) const {
04052   pinIndex(start);
04053   return lastIndexOf(c, start, length() - start);
04054 }
04055 
04056 inline UBool
04057 UnicodeString::startsWith(const UnicodeString& text) const
04058 { return compare(0, text.length(), text, 0, text.length()) == 0; }
04059 
04060 inline UBool
04061 UnicodeString::startsWith(const UnicodeString& srcText,
04062               int32_t srcStart,
04063               int32_t srcLength) const
04064 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
04065 
04066 inline UBool
04067 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
04068   if(srcLength < 0) {
04069     srcLength = u_strlen(srcChars);
04070   }
04071   return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
04072 }
04073 
04074 inline UBool
04075 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
04076   if(srcLength < 0) {
04077     srcLength = u_strlen(srcChars);
04078   }
04079   return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
04080 }
04081 
04082 inline UBool
04083 UnicodeString::endsWith(const UnicodeString& text) const
04084 { return doCompare(length() - text.length(), text.length(),
04085            text, 0, text.length()) == 0; }
04086 
04087 inline UBool
04088 UnicodeString::endsWith(const UnicodeString& srcText,
04089             int32_t srcStart,
04090             int32_t srcLength) const {
04091   srcText.pinIndices(srcStart, srcLength);
04092   return doCompare(length() - srcLength, srcLength,
04093                    srcText, srcStart, srcLength) == 0;
04094 }
04095 
04096 inline UBool
04097 UnicodeString::endsWith(const UChar *srcChars,
04098             int32_t srcLength) const {
04099   if(srcLength < 0) {
04100     srcLength = u_strlen(srcChars);
04101   }
04102   return doCompare(length() - srcLength, srcLength,
04103                    srcChars, 0, srcLength) == 0;
04104 }
04105 
04106 inline UBool
04107 UnicodeString::endsWith(const UChar *srcChars,
04108             int32_t srcStart,
04109             int32_t srcLength) const {
04110   if(srcLength < 0) {
04111     srcLength = u_strlen(srcChars + srcStart);
04112   }
04113   return doCompare(length() - srcLength, srcLength,
04114                    srcChars, srcStart, srcLength) == 0;
04115 }
04116 
04117 //========================================
04118 // replace
04119 //========================================
04120 inline UnicodeString&
04121 UnicodeString::replace(int32_t start,
04122                int32_t _length,
04123                const UnicodeString& srcText)
04124 { return doReplace(start, _length, srcText, 0, srcText.length()); }
04125 
04126 inline UnicodeString&
04127 UnicodeString::replace(int32_t start,
04128                int32_t _length,
04129                const UnicodeString& srcText,
04130                int32_t srcStart,
04131                int32_t srcLength)
04132 { return doReplace(start, _length, srcText, srcStart, srcLength); }
04133 
04134 inline UnicodeString&
04135 UnicodeString::replace(int32_t start,
04136                int32_t _length,
04137                const UChar *srcChars,
04138                int32_t srcLength)
04139 { return doReplace(start, _length, srcChars, 0, srcLength); }
04140 
04141 inline UnicodeString&
04142 UnicodeString::replace(int32_t start,
04143                int32_t _length,
04144                const UChar *srcChars,
04145                int32_t srcStart,
04146                int32_t srcLength)
04147 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
04148 
04149 inline UnicodeString&
04150 UnicodeString::replace(int32_t start,
04151                int32_t _length,
04152                UChar srcChar)
04153 { return doReplace(start, _length, &srcChar, 0, 1); }
04154 
04155 inline UnicodeString&
04156 UnicodeString::replaceBetween(int32_t start,
04157                   int32_t limit,
04158                   const UnicodeString& srcText)
04159 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
04160 
04161 inline UnicodeString&
04162 UnicodeString::replaceBetween(int32_t start,
04163                   int32_t limit,
04164                   const UnicodeString& srcText,
04165                   int32_t srcStart,
04166                   int32_t srcLimit)
04167 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
04168 
04169 inline UnicodeString&
04170 UnicodeString::findAndReplace(const UnicodeString& oldText,
04171                   const UnicodeString& newText)
04172 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
04173             newText, 0, newText.length()); }
04174 
04175 inline UnicodeString&
04176 UnicodeString::findAndReplace(int32_t start,
04177                   int32_t _length,
04178                   const UnicodeString& oldText,
04179                   const UnicodeString& newText)
04180 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
04181             newText, 0, newText.length()); }
04182 
04183 // ============================
04184 // extract
04185 // ============================
04186 inline void
04187 UnicodeString::doExtract(int32_t start,
04188              int32_t _length,
04189              UnicodeString& target) const
04190 { target.replace(0, target.length(), *this, start, _length); }
04191 
04192 inline void
04193 UnicodeString::extract(int32_t start,
04194                int32_t _length,
04195                UChar *target,
04196                int32_t targetStart) const
04197 { doExtract(start, _length, target, targetStart); }
04198 
04199 inline void
04200 UnicodeString::extract(int32_t start,
04201                int32_t _length,
04202                UnicodeString& target) const
04203 { doExtract(start, _length, target); }
04204 
04205 #if !UCONFIG_NO_CONVERSION
04206 
04207 inline int32_t
04208 UnicodeString::extract(int32_t start,
04209                int32_t _length,
04210                char *dst,
04211                const char *codepage) const
04212 
04213 {
04214   // This dstSize value will be checked explicitly
04215   return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
04216 }
04217 
04218 #endif
04219 
04220 inline void
04221 UnicodeString::extractBetween(int32_t start,
04222                   int32_t limit,
04223                   UChar *dst,
04224                   int32_t dstStart) const {
04225   pinIndex(start);
04226   pinIndex(limit);
04227   doExtract(start, limit - start, dst, dstStart);
04228 }
04229 
04230 inline UnicodeString
04231 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
04232     return tempSubString(start, limit - start);
04233 }
04234 
04235 inline UChar
04236 UnicodeString::doCharAt(int32_t offset) const
04237 {
04238   if((uint32_t)offset < (uint32_t)length()) {
04239     return getArrayStart()[offset];
04240   } else {
04241     return kInvalidUChar;
04242   }
04243 }
04244 
04245 inline UChar
04246 UnicodeString::charAt(int32_t offset) const
04247 { return doCharAt(offset); }
04248 
04249 inline UChar
04250 UnicodeString::operator[] (int32_t offset) const
04251 { return doCharAt(offset); }
04252 
04253 inline UBool
04254 UnicodeString::isEmpty() const {
04255   return fShortLength == 0;
04256 }
04257 
04258 //========================================
04259 // Write implementation methods
04260 //========================================
04261 inline void
04262 UnicodeString::setLength(int32_t len) {
04263   if(len <= 127) {
04264     fShortLength = (int8_t)len;
04265   } else {
04266     fShortLength = (int8_t)-1;
04267     fUnion.fFields.fLength = len;
04268   }
04269 }
04270 
04271 inline void
04272 UnicodeString::setToEmpty() {
04273   fShortLength = 0;
04274   fFlags = kShortString;
04275 }
04276 
04277 inline void
04278 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
04279   setLength(len);
04280   fUnion.fFields.fArray = array;
04281   fUnion.fFields.fCapacity = capacity;
04282 }
04283 
04284 inline const UChar *
04285 UnicodeString::getTerminatedBuffer() {
04286   if(!isWritable()) {
04287     return 0;
04288   } else {
04289     UChar *array = getArrayStart();
04290     int32_t len = length();
04291     if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) {
04292       /*
04293        * kRefCounted: Do not write the NUL if the buffer is shared.
04294        * That is mostly safe, except when the length of one copy was modified
04295        * without copy-on-write, e.g., via truncate(newLength) or remove(void).
04296        * Then the NUL would be written into the middle of another copy's string.
04297        */
04298       if(!(fFlags&kBufferIsReadonly)) {
04299         /*
04300          * We must not write to a readonly buffer, but it is known to be
04301          * NUL-terminated if len<capacity.
04302          * A shared, allocated buffer (refCount()>1) must not have its contents
04303          * modified, but the NUL at [len] is beyond the string contents,
04304          * and multiple string objects and threads writing the same NUL into the
04305          * same location is harmless.
04306          * In all other cases, the buffer is fully writable and it is anyway safe
04307          * to write the NUL.
04308          *
04309          * Note: An earlier version of this code tested whether there is a NUL
04310          * at [len] already, but, while safe, it generated lots of warnings from
04311          * tools like valgrind and Purify.
04312          */
04313         array[len] = 0;
04314       }
04315       return array;
04316     } else if(cloneArrayIfNeeded(len+1)) {
04317       array = getArrayStart();
04318       array[len] = 0;
04319       return array;
04320     } else {
04321       return 0;
04322     }
04323   }
04324 }
04325 
04326 inline UnicodeString&
04327 UnicodeString::operator= (UChar ch)
04328 { return doReplace(0, length(), &ch, 0, 1); }
04329 
04330 inline UnicodeString&
04331 UnicodeString::operator= (UChar32 ch)
04332 { return replace(0, length(), ch); }
04333 
04334 inline UnicodeString&
04335 UnicodeString::setTo(const UnicodeString& srcText,
04336              int32_t srcStart,
04337              int32_t srcLength)
04338 {
04339   unBogus();
04340   return doReplace(0, length(), srcText, srcStart, srcLength);
04341 }
04342 
04343 inline UnicodeString&
04344 UnicodeString::setTo(const UnicodeString& srcText,
04345              int32_t srcStart)
04346 {
04347   unBogus();
04348   srcText.pinIndex(srcStart);
04349   return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
04350 }
04351 
04352 inline UnicodeString&
04353 UnicodeString::setTo(const UnicodeString& srcText)
04354 {
04355   return copyFrom(srcText);
04356 }
04357 
04358 inline UnicodeString&
04359 UnicodeString::setTo(const UChar *srcChars,
04360              int32_t srcLength)
04361 {
04362   unBogus();
04363   return doReplace(0, length(), srcChars, 0, srcLength);
04364 }
04365 
04366 inline UnicodeString&
04367 UnicodeString::setTo(UChar srcChar)
04368 {
04369   unBogus();
04370   return doReplace(0, length(), &srcChar, 0, 1);
04371 }
04372 
04373 inline UnicodeString&
04374 UnicodeString::setTo(UChar32 srcChar)
04375 {
04376   unBogus();
04377   return replace(0, length(), srcChar);
04378 }
04379 
04380 inline UnicodeString&
04381 UnicodeString::append(const UnicodeString& srcText,
04382               int32_t srcStart,
04383               int32_t srcLength)
04384 { return doReplace(length(), 0, srcText, srcStart, srcLength); }
04385 
04386 inline UnicodeString&
04387 UnicodeString::append(const UnicodeString& srcText)
04388 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
04389 
04390 inline UnicodeString&
04391 UnicodeString::append(const UChar *srcChars,
04392               int32_t srcStart,
04393               int32_t srcLength)
04394 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }
04395 
04396 inline UnicodeString&
04397 UnicodeString::append(const UChar *srcChars,
04398               int32_t srcLength)
04399 { return doReplace(length(), 0, srcChars, 0, srcLength); }
04400 
04401 inline UnicodeString&
04402 UnicodeString::append(UChar srcChar)
04403 { return doReplace(length(), 0, &srcChar, 0, 1); }
04404 
04405 inline UnicodeString&
04406 UnicodeString::operator+= (UChar ch)
04407 { return doReplace(length(), 0, &ch, 0, 1); }
04408 
04409 inline UnicodeString&
04410 UnicodeString::operator+= (UChar32 ch) {
04411   return append(ch);
04412 }
04413 
04414 inline UnicodeString&
04415 UnicodeString::operator+= (const UnicodeString& srcText)
04416 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
04417 
04418 inline UnicodeString&
04419 UnicodeString::insert(int32_t start,
04420               const UnicodeString& srcText,
04421               int32_t srcStart,
04422               int32_t srcLength)
04423 { return doReplace(start, 0, srcText, srcStart, srcLength); }
04424 
04425 inline UnicodeString&
04426 UnicodeString::insert(int32_t start,
04427               const UnicodeString& srcText)
04428 { return doReplace(start, 0, srcText, 0, srcText.length()); }
04429 
04430 inline UnicodeString&
04431 UnicodeString::insert(int32_t start,
04432               const UChar *srcChars,
04433               int32_t srcStart,
04434               int32_t srcLength)
04435 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
04436 
04437 inline UnicodeString&
04438 UnicodeString::insert(int32_t start,
04439               const UChar *srcChars,
04440               int32_t srcLength)
04441 { return doReplace(start, 0, srcChars, 0, srcLength); }
04442 
04443 inline UnicodeString&
04444 UnicodeString::insert(int32_t start,
04445               UChar srcChar)
04446 { return doReplace(start, 0, &srcChar, 0, 1); }
04447 
04448 inline UnicodeString&
04449 UnicodeString::insert(int32_t start,
04450               UChar32 srcChar)
04451 { return replace(start, 0, srcChar); }
04452 
04453 
04454 inline UnicodeString&
04455 UnicodeString::remove()
04456 {
04457   // remove() of a bogus string makes the string empty and non-bogus
04458   // we also un-alias a read-only alias to deal with NUL-termination
04459   // issues with getTerminatedBuffer()
04460   if(fFlags & (kIsBogus|kBufferIsReadonly)) {
04461     setToEmpty();
04462   } else {
04463     fShortLength = 0;
04464   }
04465   return *this;
04466 }
04467 
04468 inline UnicodeString&
04469 UnicodeString::remove(int32_t start,
04470              int32_t _length)
04471 {
04472     if(start <= 0 && _length == INT32_MAX) {
04473         // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
04474         return remove();
04475     }
04476     return doReplace(start, _length, NULL, 0, 0);
04477 }
04478 
04479 inline UnicodeString&
04480 UnicodeString::removeBetween(int32_t start,
04481                 int32_t limit)
04482 { return doReplace(start, limit - start, NULL, 0, 0); }
04483 
04484 inline UnicodeString &
04485 UnicodeString::retainBetween(int32_t start, int32_t limit) {
04486   truncate(limit);
04487   return doReplace(0, start, NULL, 0, 0);
04488 }
04489 
04490 inline UBool
04491 UnicodeString::truncate(int32_t targetLength)
04492 {
04493   if(isBogus() && targetLength == 0) {
04494     // truncate(0) of a bogus string makes the string empty and non-bogus
04495     unBogus();
04496     return FALSE;
04497   } else if((uint32_t)targetLength < (uint32_t)length()) {
04498     setLength(targetLength);
04499     if(fFlags&kBufferIsReadonly) {
04500       fUnion.fFields.fCapacity = targetLength;  // not NUL-terminated any more
04501     }
04502     return TRUE;
04503   } else {
04504     return FALSE;
04505   }
04506 }
04507 
04508 inline UnicodeString&
04509 UnicodeString::reverse()
04510 { return doReverse(0, length()); }
04511 
04512 inline UnicodeString&
04513 UnicodeString::reverse(int32_t start,
04514                int32_t _length)
04515 { return doReverse(start, _length); }
04516 
04517 U_NAMESPACE_END
04518 
04519 #endif
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines