Rogue Wave banner
Previous fileTop of DocumentContentsIndex pageNext file
Internationalization Module Reference Guide
Rogue Wave web site:  Home Page  |  Main Documentation Page

RWUCharTraits

Module:  Internationalization Module   Group:  Unicode String Processing


Does Not Inherit

Local Index

Members

Header File

#include <rw/i18n/RWUCharTraits.h> 

Description

RWUCharTraits provides methods for querying the properties of Unicode characters.

The Unicode Standard defines a comprehensive set of properties for each code point in the Unicode character set. The set of properties and the values of those properties are specified by the Unicode Character Database:

http://www.unicode.org/unicode/onlinedat/online.html

that is published as part of the Unicode Standard:

Unicode character properties may be either normative or informative, as defined in Chapter 3, "Conformance", of the Unicode Standard:

RWUCharTraits provides access to both normative and informative properties of Unicode characters.

Public Enums

enum GeneralCategory {
                    BeginGeneralCategory,
                    Unassigned = BeginGeneralCategory,
                    UppercaseLetter,      // Lu  (Normative)
                    LowercaseLetter,      // Ll  (Normative)
                    TitlecaseLetter,      // Lt  (Normative)
                    ModifierLetter,       // Lm  (Informative)
                    OtherLetter,          // Lo  (Informative) 
                    NonSpacingMark,       // Mn  (Normative)
                    EnclosingMark,        // Me  (Normative)
                    CombiningSpacingMark, // Mc  (Normative)
                    DecimalDigitNumber,   // Nd  (Normative)
                    LetterNumber,         // Nl  (Normative)
                    OtherNumber,          // No  (Normative)
                    SpaceSeparator,       // Zs  (Normative)
                    LineSeparator,        // Zl  (Normative)
                    ParagraphSeparator,   // Zp  (Normative)
                    ControlChar,          // Cc  (Normative)
                    FormatChar,           // Cf  (Normative)
                    PrivateUseChar,       // Co  (Normative)
                    Surrogate,            // Cs  (Normative)
                    DashPunctuation,      // Pd  (Informative)
                    StartPunctuation,     // Ps  (Informative)
                    EndPunctuation,       // Pe  (Informative)
                    ConnectorPunctuation, // Pc  (Informative)
                    OtherPunctuation,     // Po  (Informative)
                    MathSymbol,           // Sm  (Informative)
                    CurrencySymbol,       // Sc  (Informative)
                    ModifierSymbol,       // Sk  (Informative)
                    OtherSymbol,          // So  (Informative)
                    InitialPunctuation,   // Pi  (Informative)
                    FinalPunctuation,     // Pf  (Informative)
                    GeneralOtherTypes,    // Cn  (Normative)
                    EndGeneralCategory
  };
enum BidirectionalCategory {
                           NoBidirectionalCategory,
                           BeginBidirectionalCategory,
                           LeftToRight = 
                               BeginBidirectionalCategory,
                                                       // L
                           RightToLeft,                // R
                           EuropeanNumber,             // EN
                           EuropeanNumberSeparator,    // ES
                           EuropeanNumberTerminator,   // ET
                           ArabicNumber,               // AN
                           CommonNumberSeparator,      // CS
                           BlockSeparator,             // B
                           SegmentSeparator,           // S
                           WhiteSpaceNeutral,          // WS
                           OtherNeutral,               // ON
                           LeftToRightEmbedding,       // LRE
                           LeftToRightOverride,        // LRO
                           RightToLeftArabic,          // AL
                           RightToLeftEmbedding,       // RLE
                           RightToLeftOverride,        // RLO
                           PopDirectionalFormat,       // PDF
                           DirNonSpacingMark,          // NSM
                           BoundaryNeutral,            // BN
                           EndBidirectionalCategory
  };
enum Block {
             BeginBlock, 
             BasicLatinBlock = BeginBlock,
             Latin1SupplementBlock,
             LatinExtendedABlock,
             LatinExtendedBBlock,
             IpaExtensionsBlock,
             SpacingModifierLettersBlock,
             CombiningDiacriticalMarksBlock,
             GreekAndCopticBlock,
             CyrillicBlock,
             ArmenianBlock,
             HebrewBlock,
             ArabicBlock,
             SyriacBlock,
             ThaanaBlock,
             DevanagariBlock,
             BengaliBlock,
             GurmukhiBlock,
             GujaratiBlock,
             OriyaBlock,
             TamilBlock,
             TeluguBlock,
             KannadaBlock,
             MalayalamBlock,
             SinhalaBlock,
             ThaiBlock,
             LaoBlock,
             TibetanBlock,
             MyanmarBlock,
             GeorgianBlock,
             HangulJamoBlock,
             EthiopicBlock,
             CherokeeBlock,
             UnifiedCanadianAboriginalSyllabicsBlock,
             OghamBlock,
             RunicBlock,
             TagalogBlock,  
             HanunooBlock,
             BuhidBlock, 
             TagbanwaBlock,
             KhmerBlock,
             MongolianBlock,
             LatinExtendedAdditionalBlock,
             GreekExtendedBlock,
             GeneralPunctuationBlock,
             SuperscriptsAndSubscriptsBlock,
             CurrencySymbolsBlock,
             CombiningDiacriticalMarksForSymbolsBlock, 
             LetterlikeSymbolsBlock,
             NumberFormsBlock,
             ArrowsBlock,
             MathematicalOperatorsBlock,
             MiscellaneousTechnicalBlock,
             ControlPicturesBlock,
             OpticalCharacterRecognitionBlock,
             EnclosedAlphanumericsBlock,
             BoxDrawingBlock,
             BlockElementsBlock,
             GeometricShapesBlock,
             MiscellaneousSymbolsBlock,
             DingbatsBlock,
             MiscellaneousMathematicalSymbolsABlock,
             SupplementalArrowsABlock,
             BraillePatternsBlock,
             SupplementalArrowsBBlock,
             MiscellaneousMathematicalSymbolsBBlock,
             SupplementalMathematicalOperatorsBlock,
             CjkRadicalsSupplementBlock,
             KangxiRadicalsBlock,
             IdeographicDescriptionCharactersBlock,
             CjkSymbolsAndPunctuationBlock,
             HiraganaBlock,
             KatakanaBlock,
             BopomofoBlock,
             HangulCompatibilityJamoBlock,
             KanbunBlock,
             BopomofoExtendedBlock,
             KatakanaPhoneticExtensionsBlock,
             EnclosedCjkLettersAndMonthsBlock,
             CjkCompatibilityBlock,
             CjkUnifiedIdeographsExtensionABlock,
             CjkUnifiedIdeographsBlock,
             YiSyllablesBlock,
             YiRadicalsBlock,
             HangulSyllablesBlock,
             HighSurrogatesBlock,
             HighPrivateUseSurrogatesBlock,
             LowSurrogatesBlock,
             PrivateUseAreaBlock,
             CjkCompatibilityIdeographsBlock,
             AlphabeticPresentationFormsBlock,
             ArabicPresentationFormsABlock,
             CombiningHalfMarksBlock,
             CjkCompatibilityFormsBlock,
             SmallFormVariantsBlock,
             ArabicPresentationFormsBBlock,
             VariationSelectorsBlock,
             HalfwidthAndFullwidthFormsBlock,
             SpecialsBlock,
             OldItalicBlock,
             GothicBlock,
             DeseretBlock,
             ByzantineMusicalSymbolsBlock,
             MusicalSymbolsBlock,
             MathematicalAlphanumericSymbolsBlock,
             CjkUnifiedIdeographsExtensionBBlock,
             CjkCompatibilityIdeographsSupplementBlock,
             TagsBlock,
             PrivateUseBlock,
             SupplementaryPrivateUseAreaABlock,       
             SupplementaryPrivateUseAreaBBlock,       
             NoBlock,
             InvalidBlock,
             EndBlock = NoBlock
  }; 
enum Script { BeginScript,
              Latin = BeginScript,
              Greek,
              Cyrillic,
              Armenian, 
              Hebrew,
              Arabic,
              Syriac,
              Thaana,
              Devanagari, 
              Bengali,
              Gurmukhi,
              Gujarati,
              Oriya,
              Tamil, 
              Telugu,
              Kannada,
              Malayalam,
              Sinhala,
              Thai,
              Lao, 
              Tibetan,
              Myanmar,
              Georgian,
              Hangul,
              Ethiopic, 
              Cherokee,
              CanadianAboriginal,
              Ogham,
              Runic, 
              Khmer,
              Mongolian,
              Hiragana,
              Katakana,
              Bopomofo, 
              Han,
              Yi,
              OldItalic,
              Gothic,
              Deseret,
              Inherited, 
              NoScript,
              InvalidScript = NoScript,
              EndScript = NoScript
};
enum EastAsianWidth { NeutralWidth,
                      AmbiguousWidth,
                      HalfWidth, 
                      FullWidth, 
                      NarrowWidth, 
                      WideWidth 
};

NOTE -- The Internationalization Module adopts the set of values that is supported by the underlying ICU implementation.
enum CombiningClass { BeginCombiningClass,
                      BaseEquivalent = BeginCombiningClass,
                      Spacing = BaseEquivalent,
                      Nonspacing = BaseEquivalent,
                      Split = BaseEquivalent,
                      Enclosing = BaseEquivalent,
                      Reordrant = BaseEquivalent,
                      TibetanSubjoined = BaseEquivalent,
                      OverlayInterior,
                      Nuktas,
                      KanaVoicingMarks,
                      Viramas,
                      StartOfFixedPositionClasses,
                      HebrewPointSheva =
                          StartOfFixedPositionClasses,
                      HebrewPointHatafSegol,
                      HebrewPointHatafPatah,
                      HebrewPointHatafQamats,
                      HebrewPointHiriq,
                      HebrewPointTsere,
                      HebrewPointSegol,
                      HebrewPointPatah,
                      HebrewPointQamats,
                      HebrewPointHolam,
                      HebrewPointQubuts,
                      HebrewPointDagishOrMapiq,
                      HebrewPointMeteg,
                      HebrewPointRafe,
                      HebrewPointShinDot,
                      HebrewPointSinDot,
                      HebrewPointJudeoSpanishVarika,
                      ArabicFathatan,
                      ArabicDammatan,
                      ArabicKasratan,
                      ArabicFatha,
                      ArabicDamma,
                      ArabicKasra,
                      ArabicShadda,
                      ArabicSukun,
                      ArabicLetterSuperscriptAlef,
                      SyriacLetterSuperscriptAlaph,
                      TeluguLengthMark,
                      TeluguAiLengthMark,
                      ThaiCharacterSaraU,
                      ThaiCharacterSaraUu =
                          ThaiCharacterSaraU,
                      ThaiCharacterMaiEk,
                      ThaiCharacterMaiTho = 
                          ThaiCharacterMaiEk,
                      ThaiCharacterMaiTri = 
                          ThaiCharacterMaiEk,
                      ThaiCharacterMaiChattawa =
                          ThaiCharacterMaiEk,
                      LaoVowelSignU,
                      LaoVowelSignUu = LaoVowelSignU,
                      LaoToneMaiEk,
                      LaoToneMaiTho = LaoToneMaiEk,
                      LaoToneMaiTi = LaoToneMaiEk,
                      LaoToneMaiCatawa = LaoToneMaiEk,
                      TibetanVowelSignAa,
                      TibetanVowelSignI,
                      TibetanVowelSignE = TibetanVowelSignI,
                      TibetanVowelSignEe = TibetanVowelSignI,
                      TibetanVowelSignO = TibetanVowelSignI,
                      TibetanVowelSignOo = TibetanVowelSignI,
                      TibetanVowelSignReversedI 
                      TibetanVowelSignU,
                      EndOfFixedPositionClasses,
                      BelowLeftAttached,
                      BelowAttached,
                      BelowRightAttached,
                      LeftAttached,
                      RightAttached,
                      AboveLeftAttached,
                      AboveAttached,
                      AboveRightAttached,
                      BelowLeft,
                      Below,
                      BelowRight,
                      Left,
                      Right,
                      AboveLeft,
                      Above,
                      AboveRight,
                      DoubleBelow,
                      DoubleAbove,
                      IotaSubscript,
                      EndCombiningClass,
                      NoCombiningClass
  };

NOTE -- Some of the combining classes in this enumeration do not currently describe any Unicode characters but are specified here for completeness.

Static Member Functions

static BidirectionalCategory
getBidirectionalCategory(RWUChar32 cp);
static Block
getBlock(RWUChar32 cp);
static RWUChar32
getChar32(const char* name, bool isDeprecatedName = false);
static CombiningClass
getCombiningClass(RWUChar32 cp);
static int32_t
getDecimalValue(RWUChar32 cp);
static RWUChar32
getDigit(int32_t value, int8_t radix);
static EastAsianWidth
getEastAsianWidth(RWUChar32 cp);
static GeneralCategory
getGeneralCategory(RWUChar32 cp);
static RWUChar32
getMirror(RWUChar32 cp);
static RWCString
getName(RWUChar32 cp, bool getDeprecatedName = false);
static int32_t
getNumericValue(RWUChar32 cp, int8_t radix);
static Script
getScript(RWUChar32 cp);
static const RWUChar32*
getWhitespace();
static bool
isCharacter(RWUChar32 cp);
static bool
isControl(RWUChar32 cp);
static bool
isDecimalDigit(RWUChar32 cp);
static bool
isDefined(RWUChar32 cp);
static bool
isDigit(RWUChar32 cp);
static bool
isError(RWUChar32 cp);
static bool
isHighSurrogate(RWUChar16 cu);
static bool
isLetter(RWUChar32 cp);
static bool
isLower(RWUChar32 cp);
static bool
isLowSurrogate(RWUChar16 cu);
static bool
isMirrored(RWUChar32 cp);
static bool
isNumeric(RWUChar32 cp);
static bool
isPunctuation(RWUChar32 cp);
static bool
isSingle(RWUChar16 cu);
static bool
isSpace(RWUChar32 cp);
static bool
isSurrogate(RWUChar16 cu);
static bool
isTitle(RWUChar32 cp);
static bool
isUpper(RWUChar32 cp);
static bool
isWhitespace(RWUChar32 cp);
static bool
requiresSurrogatePair(RWUChar32 cp);

NOTE -- For efficiency, this method does not check whether the given code point is valid.
static RWUChar32
toLower(RWUChar32 cp);
static RWUChar32
toTitle(RWUChar32 cp);

NOTE -- This method can only map characters that map to a single character. This method cannot be used for mappings where the source character maps to more than one character, or where the mapping is context-dependent. The single character mappings provided by this method are insufficient for languages such as German. For full case mappings, use RWUString::toTitle().
static RWUChar32
toUpper(RWUChar32 cp);


Previous fileTop of DocumentContentsIndex pageNext file

© Copyright Rogue Wave Software, Inc. All Rights Reserved.
Rogue Wave and SourcePro are registered trademarks of Rogue Wave Software, Inc. in the United States and other countries. All other trademarks are the property of their respective owners.
Contact Rogue Wave about documentation or support issues.