![]() | InDesign SDK 20.5 |
#include <UnicodeClass.h>
Public Types | |
| enum | CharacterType { kCharacterType_Unknown = 0, kCharacterType_DoubleByte = 0x0001, kCharacterType_Roman = 0x0002, kCharacterType_Upper = 0x0004, kCharacterType_Numeric = 0x0008, kCharacterType_Hiragana = 0x0010, kCharacterType_Katakana = 0x0020, kCharacterType_SmallKana = 0x0040, kCharacterType_Nobashi = 0x0080, kCharacterType_Kanji = 0x0100, kCharacterType_Symbol = 0x0200, kCharacterType_OpenParenthesis = 0x0400, kCharacterType_CloseParenthesis = 0x0800, kCharacterType_Period = 0x1000, kCharacterType_Comma = 0x2000, kCharacterType_MiddlePunc = 0x4000, kCharacterType_Other = 0x8000 } |
| enum | IgnoreCharacterDetails { kIgnoreZeroWidthOnly = 0, kIgnoreDiscretionaryHyphens = 0x01, kIgnoreCalculatedText = 0x02, kIgnoreTableCharacters = 0x04, kIgnoreInlineGraphics = 0x08, kIgnoreNewLine = 0x010, kIgnoreSpecialGlyph = 0x020, kIgnoreUnicodeVariation = 0x040, kIgnoreNonLegal = kIgnoreTableCharacters + kIgnoreInlineGraphics + kIgnoreNewLine, kIgnoreSpellingIgnorable = 0x07F } |
Static Public Member Functions | |
| static bool | IsLetter (const UTF32TextChar &c) |
| static bool | IsCJKLetter (const UTF32TextChar &c) |
| static bool | IsNonCJKLetter (const UTF32TextChar &c) |
| static bool | IsHiragana (const UTF32TextChar &c) |
| static bool | IsKatakana (const UTF32TextChar &c) |
| static bool | IsCJKIdeograph (const UTF32TextChar &c) |
| static bool | IsBopomofo (const UTF32TextChar &c) |
| static bool | IsJamo (const UTF32TextChar &c) |
| static bool | IsHangul (const UTF32TextChar &c) |
| static bool | IsRomanDigit (const UTF32TextChar &c) |
| static bool | IsJapaneseNumber (const UTF32TextChar &c) |
| static bool | IsAnyNumber (const UTF32TextChar &c) |
| static bool | IsWhiteSpace (const UTF32TextChar &c) |
| static bool | IsCombiningMark (const UTF32TextChar &c) |
| static bool | IsDiacritic_WorldReady (const UTF32TextChar &c) |
| static bool | IsPunctuation (const UTF32TextChar &c) |
| static bool | IsPunctuationDash (const UTF32TextChar &c) |
| static bool | IsPunctuationOpen (const UTF32TextChar &c) |
| static bool | IsPunctuationClose (const UTF32TextChar &c) |
| static bool | IsPunctuationInitialQuote (const UTF32TextChar &c) |
| static bool | IsPunctuationFinalQuote (const UTF32TextChar &c) |
| static bool | IsMidWordPunctuation (const UTF32TextChar &c) |
| static bool | IsSymbol (const UTF32TextChar &c) |
| static bool | IsSymbolMath (const UTF32TextChar &c) |
| static bool | IsSymbolCurrency (const UTF32TextChar &c) |
| static bool | IsGreek (const UTF32TextChar &c) |
| static bool | IsCyrillic (const UTF32TextChar &c) |
| static bool | IsThai (const UTF32TextChar &c) |
| static int | GetLocale (const UTF32TextChar &c) |
| static UTF32TextChar | ToUpper (const UTF32TextChar &c) |
| static bool | IsUppercase (const UTF32TextChar &c) |
| static bool | IsUpper (const UTF32TextChar &c) |
| static bool | CanChangeToUppercase (const UTF32TextChar &c) |
| static UTF32TextChar | ToLower (const UTF32TextChar &c) |
| static bool | IsLowercase (const UTF32TextChar &c) |
| static bool | IsLower (const UTF32TextChar &c) |
| static bool | CanChangeToLowercase (const UTF32TextChar &c) |
| static UTF32TextChar | ToTitle (const UTF32TextChar &c) |
| static bool | IsTitlecase (const UTF32TextChar &c) |
| static bool | CanChangeToTitlecase (const UTF32TextChar &c) |
| static bool | StartsUppercase (const UTF32TextChar &c) |
| static bool | IsCJKFullWidth (const UTF32TextChar &c) |
| static UTF32TextChar | ToFullWidthVariant (const UTF32TextChar &c) |
| static bool | IsNarrowVariant (const UTF32TextChar &c) |
| static UTF32TextChar | ToNarrowVariant (const UTF32TextChar &c) |
| static UTF32TextChar | ToFirstBaseChar (const UTF32TextChar &c) |
| static UTF32TextChar | ToUltimateBaseChar (const UTF32TextChar &c) |
| static CharacterType | GetCharacterType (const UTF32TextChar &c) |
| static bool | IsHighSurrogate (UTF16TextChar c) |
| static bool | IsLowSurrogate (UTF16TextChar c) |
| static bool | IsSurrogate (UTF16TextChar c) |
| static bool | IsVariationSelector (const UTF32TextChar &c) |
| static bool | IsBasicLatin (const UTF32TextChar &c) |
| static bool | IsLatin1 (const UTF32TextChar &c) |
| static bool | IsLatinExtendedA (const UTF32TextChar &c) |
| static bool | IsLatinExtendedB (const UTF32TextChar &c) |
| static bool | IsSuperscriptOrSubscript (const UTF32TextChar &c) |
| static bool | IsIgnoredCharacter (const UTF32TextChar &n, IgnoreCharacterDetails ignoreDischy=kIgnoreDiscretionaryHyphens) |
| static bool | IsHebrewLetter (const UTF32TextChar &c) |
| static bool | IsArabicLetter (const UTF32TextChar &c) |
UnicodeClass is used for classification of Unicode characters used in InDesign. It is primarily a wrapper around the ICU library.
Character Type. Bit-field used to classify characters into variable classes for processing.
IgnoreCharacterDetails. What type of characters should be "ignored" by IsIgnoredCharacter.
| static |
Get character type.
| static |
Locale.
| static |
Any numbers.
| inlinestatic |
Basic Latin (low ascii). @ return true if unicode character c is low ascii
| static |
Chinese bopomofo.
| static |
Full width (CJK). @ return true if unicode character c is full width (1 em-box).
| static |
CJK unified ideographs.
| static |
CJK Letters. All CJK letters: ideograph, kana, hangul, half-width kana, etc.
| static |
Combining marks.
| static |
Cyrillic.
| static |
Diacritics.
| static |
Greek.
| static |
Korean hangul.
| inlinestatic |
High surrogate.
| static |
Japanese Hiragana.
| static |
The set of "characters" that occupy spots in the model that should be treated neither as whitespace nor as an actual character. These include markers in the text that the user would not consider as something that they had entered into the text. This set does NOT include inline graphic markers or table markers, which always take up a position in the visible text
| static |
Korean jamo.
| static |
Japanese Kanji numbers.
| static |
Japanese Katakana.
| inlinestatic |
Latin 1. @ return true if unicode character c is in the Latin 1 unicode group.
| inlinestatic |
Latin extended A. @ return true if unicode character c is in the Latin extended A unicode group.
| inlinestatic |
Latin extended B. @ return true if unicode character c is in the Latin extended B unicode group.
| static |
Letters. Not symbols or punctuation, only letters, but ALL letters: ideograph, kana, latin, greek, devanagari, etc.
| static |
Is lowercase.
| inlinestatic |
Low surrogate.
| static |
Mid-word punctuation. Used for word counting mostly.
| inlinestatic |
Is narrow variant. NOT the same as !ISCJKFullWidth().
| static |
Letters, but not CJK letters. E.g. latin, greek, hebrew, etc.
| static |
Punctuation.
| static |
Closing punctuation.
| static |
Punctuation dash.
| static |
Final quotation mark.
| static |
Initial quotation mark.
| static |
Open punctuation.
| static |
Numbers 0-9.
| inlinestatic |
Superscript, subscript @ return true if unicode character c is a unicode superscript or subscript.
| inlinestatic |
Is surrogate.
| static |
Symbol.
| static |
Currency symbol.
| static |
Math symbol.
| static |
Thai.
| static |
Is title case. For unicode characters that have the idea of 2 glyphs (u1CB = Nj, u1F2 = Dz, etc).
| static |
Is uppercase.
| static |
White Space.
| static |
Starts uppercase.
| static |
To first base chararacter.
| static |
To full width (CJK). @ return the full-width variant (if one exists) of the unicode character c.
| static |
Lowercase.
| static |
To narrow variant.
| static |
Title case. For unicode characters that have the idea of 2 glyphs (u1CB = Nj, u1F2 = Dz, etc). NOTE: for lowercase letters in general, ToTitle does NOT capitalize them.
| static |
To ultimate base chararacter.
| static |
Uppercase.