ICU 51.2  51.2
messagepattern.h
Go to the documentation of this file.
00001 /*
00002 *******************************************************************************
00003 *   Copyright (C) 2011-2013, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 *******************************************************************************
00006 *   file name:  messagepattern.h
00007 *   encoding:   US-ASCII
00008 *   tab size:   8 (not used)
00009 *   indentation:4
00010 *
00011 *   created on: 2011mar14
00012 *   created by: Markus W. Scherer
00013 */
00014 
00015 #ifndef __MESSAGEPATTERN_H__
00016 #define __MESSAGEPATTERN_H__
00017 
00023 #include "unicode/utypes.h"
00024 
00025 #if !UCONFIG_NO_FORMATTING
00026 
00027 #include "unicode/parseerr.h"
00028 #include "unicode/unistr.h"
00029 
00066 enum UMessagePatternApostropheMode {
00078     UMSGPAT_APOS_DOUBLE_OPTIONAL,
00087     UMSGPAT_APOS_DOUBLE_REQUIRED
00088 };
00092 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
00093 
00098 enum UMessagePatternPartType {
00108     UMSGPAT_PART_TYPE_MSG_START,
00117     UMSGPAT_PART_TYPE_MSG_LIMIT,
00125     UMSGPAT_PART_TYPE_SKIP_SYNTAX,
00132     UMSGPAT_PART_TYPE_INSERT_CHAR,
00140     UMSGPAT_PART_TYPE_REPLACE_NUMBER,
00151     UMSGPAT_PART_TYPE_ARG_START,
00158     UMSGPAT_PART_TYPE_ARG_LIMIT,
00163     UMSGPAT_PART_TYPE_ARG_NUMBER,
00169     UMSGPAT_PART_TYPE_ARG_NAME,
00175     UMSGPAT_PART_TYPE_ARG_TYPE,
00181     UMSGPAT_PART_TYPE_ARG_STYLE,
00187     UMSGPAT_PART_TYPE_ARG_SELECTOR,
00194     UMSGPAT_PART_TYPE_ARG_INT,
00202     UMSGPAT_PART_TYPE_ARG_DOUBLE
00203 };
00207 typedef enum UMessagePatternPartType UMessagePatternPartType;
00208 
00217 enum UMessagePatternArgType {
00222     UMSGPAT_ARG_TYPE_NONE,
00228     UMSGPAT_ARG_TYPE_SIMPLE,
00234     UMSGPAT_ARG_TYPE_CHOICE,
00244     UMSGPAT_ARG_TYPE_PLURAL,
00249     UMSGPAT_ARG_TYPE_SELECT,
00250 #ifndef U_HIDE_DRAFT_API
00251 
00256     UMSGPAT_ARG_TYPE_SELECTORDINAL
00257 #endif /* U_HIDE_DRAFT_API */
00258 };
00262 typedef enum UMessagePatternArgType UMessagePatternArgType;
00263 
00264 #ifndef U_HIDE_DRAFT_API
00265 
00270 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
00271     ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
00272 #endif /* U_HIDE_DRAFT_API */
00273 
00274 enum {
00280     UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
00281 
00289     UMSGPAT_ARG_NAME_NOT_VALID=-2
00290 };
00291 
00298 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
00299 
00300 U_NAMESPACE_BEGIN
00301 
00302 class MessagePatternDoubleList;
00303 class MessagePatternPartsList;
00304 
00361 class U_COMMON_API MessagePattern : public UObject {
00362 public:
00371     MessagePattern(UErrorCode &errorCode);
00372 
00382     MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
00383 
00402     MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
00403 
00409     MessagePattern(const MessagePattern &other);
00410 
00417     MessagePattern &operator=(const MessagePattern &other);
00418 
00423     virtual ~MessagePattern();
00424 
00442     MessagePattern &parse(const UnicodeString &pattern,
00443                           UParseError *parseError, UErrorCode &errorCode);
00444 
00462     MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
00463                                      UParseError *parseError, UErrorCode &errorCode);
00464 
00482     MessagePattern &parsePluralStyle(const UnicodeString &pattern,
00483                                      UParseError *parseError, UErrorCode &errorCode);
00484 
00502     MessagePattern &parseSelectStyle(const UnicodeString &pattern,
00503                                      UParseError *parseError, UErrorCode &errorCode);
00504 
00510     void clear();
00511 
00518     void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
00519         clear();
00520         aposMode=mode;
00521     }
00522 
00528     UBool operator==(const MessagePattern &other) const;
00529 
00535     inline UBool operator!=(const MessagePattern &other) const {
00536         return !operator==(other);
00537     }
00538 
00543     int32_t hashCode() const;
00544 
00549     UMessagePatternApostropheMode getApostropheMode() const {
00550         return aposMode;
00551     }
00552 
00553     // Java has package-private jdkAposMode() here.
00554     // In C++, this is declared in the MessageImpl class.
00555 
00560     const UnicodeString &getPatternString() const {
00561         return msg;
00562     }
00563 
00569     UBool hasNamedArguments() const {
00570         return hasArgNames;
00571     }
00572 
00578     UBool hasNumberedArguments() const {
00579         return hasArgNumbers;
00580     }
00581 
00593     static int32_t validateArgumentName(const UnicodeString &name);
00594 
00605     UnicodeString autoQuoteApostropheDeep() const;
00606 
00607     class Part;
00608 
00615     int32_t countParts() const {
00616         return partsLength;
00617     }
00618 
00625     const Part &getPart(int32_t i) const {
00626         return parts[i];
00627     }
00628 
00636     UMessagePatternPartType getPartType(int32_t i) const {
00637         return getPart(i).type;
00638     }
00639 
00647     int32_t getPatternIndex(int32_t partIndex) const {
00648         return getPart(partIndex).index;
00649     }
00650 
00658     UnicodeString getSubstring(const Part &part) const {
00659         return msg.tempSubString(part.index, part.length);
00660     }
00661 
00669     UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
00670         return 0==msg.compare(part.index, part.length, s);
00671     }
00672 
00679     double getNumericValue(const Part &part) const;
00680 
00687     double getPluralOffset(int32_t pluralStart) const;
00688 
00697     int32_t getLimitPartIndex(int32_t start) const {
00698         int32_t limit=getPart(start).limitPartIndex;
00699         if(limit<start) {
00700             return start;
00701         }
00702         return limit;
00703     }
00704 
00712     class Part : public UMemory {
00713     public:
00718         Part() {}
00719 
00725         UMessagePatternPartType getType() const {
00726             return type;
00727         }
00728 
00734         int32_t getIndex() const {
00735             return index;
00736         }
00737 
00744         int32_t getLength() const {
00745             return length;
00746         }
00747 
00754         int32_t getLimit() const {
00755             return index+length;
00756         }
00757 
00764         int32_t getValue() const {
00765             return value;
00766         }
00767 
00774         UMessagePatternArgType getArgType() const {
00775             UMessagePatternPartType type=getType();
00776             if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
00777                 return (UMessagePatternArgType)value;
00778             } else {
00779                 return UMSGPAT_ARG_TYPE_NONE;
00780             }
00781         }
00782 
00790         static UBool hasNumericValue(UMessagePatternPartType type) {
00791             return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
00792         }
00793 
00799         UBool operator==(const Part &other) const;
00800 
00806         inline UBool operator!=(const Part &other) const {
00807             return !operator==(other);
00808         }
00809 
00814         int32_t hashCode() const {
00815             return ((type*37+index)*37+length)*37+value;
00816         }
00817 
00818     private:
00819         friend class MessagePattern;
00820 
00821         static const int32_t MAX_LENGTH=0xffff;
00822         static const int32_t MAX_VALUE=0x7fff;
00823 
00824         // Some fields are not final because they are modified during pattern parsing.
00825         // After pattern parsing, the parts are effectively immutable.
00826         UMessagePatternPartType type;
00827         int32_t index;
00828         uint16_t length;
00829         int16_t value;
00830         int32_t limitPartIndex;
00831     };
00832 
00833 private:
00834     void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
00835 
00836     void postParse();
00837 
00838     int32_t parseMessage(int32_t index, int32_t msgStartLength,
00839                          int32_t nestingLevel, UMessagePatternArgType parentType,
00840                          UParseError *parseError, UErrorCode &errorCode);
00841 
00842     int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
00843                      UParseError *parseError, UErrorCode &errorCode);
00844 
00845     int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
00846 
00847     int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
00848                              UParseError *parseError, UErrorCode &errorCode);
00849 
00850     int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
00851                                      UParseError *parseError, UErrorCode &errorCode);
00852 
00861     static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
00862 
00863     int32_t parseArgNumber(int32_t start, int32_t limit) {
00864         return parseArgNumber(msg, start, limit);
00865     }
00866 
00875     void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
00876                      UParseError *parseError, UErrorCode &errorCode);
00877 
00878     // Java has package-private appendReducedApostrophes() here.
00879     // In C++, this is declared in the MessageImpl class.
00880 
00881     int32_t skipWhiteSpace(int32_t index);
00882 
00883     int32_t skipIdentifier(int32_t index);
00884 
00889     int32_t skipDouble(int32_t index);
00890 
00891     static UBool isArgTypeChar(UChar32 c);
00892 
00893     UBool isChoice(int32_t index);
00894 
00895     UBool isPlural(int32_t index);
00896 
00897     UBool isSelect(int32_t index);
00898 
00899     UBool isOrdinal(int32_t index);
00900 
00905     UBool inMessageFormatPattern(int32_t nestingLevel);
00906 
00911     UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
00912 
00913     void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
00914                  int32_t value, UErrorCode &errorCode);
00915 
00916     void addLimitPart(int32_t start,
00917                       UMessagePatternPartType type, int32_t index, int32_t length,
00918                       int32_t value, UErrorCode &errorCode);
00919 
00920     void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
00921 
00922     void setParseError(UParseError *parseError, int32_t index);
00923 
00924     UBool init(UErrorCode &errorCode);
00925     UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
00926 
00927     UMessagePatternApostropheMode aposMode;
00928     UnicodeString msg;
00929     // ArrayList<Part> parts=new ArrayList<Part>();
00930     MessagePatternPartsList *partsList;
00931     Part *parts;
00932     int32_t partsLength;
00933     // ArrayList<Double> numericValues;
00934     MessagePatternDoubleList *numericValuesList;
00935     double *numericValues;
00936     int32_t numericValuesLength;
00937     UBool hasArgNames;
00938     UBool hasArgNumbers;
00939     UBool needsAutoQuoting;
00940 };
00941 
00942 U_NAMESPACE_END
00943 
00944 #endif  // !UCONFIG_NO_FORMATTING
00945 
00946 #endif  // __MESSAGEPATTERN_H__
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines