InDesign SDK  20.5
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
UnicodeClass Class Reference

#include <UnicodeClass.h>

Public Types

enum  CharacterType {
  kCharacterType_Unknown = 0, kCharacterType_DoubleByte = 0x0001, kCharacterType_Roman = 0x0002, kCharacterType_Upper = 0x0004,
  kCharacterType_Numeric = 0x0008, kCharacterType_Hiragana = 0x0010, kCharacterType_Katakana = 0x0020, kCharacterType_SmallKana = 0x0040,
  kCharacterType_Nobashi = 0x0080, kCharacterType_Kanji = 0x0100, kCharacterType_Symbol = 0x0200, kCharacterType_OpenParenthesis = 0x0400,
  kCharacterType_CloseParenthesis = 0x0800, kCharacterType_Period = 0x1000, kCharacterType_Comma = 0x2000, kCharacterType_MiddlePunc = 0x4000,
  kCharacterType_Other = 0x8000
}
 
enum  IgnoreCharacterDetails {
  kIgnoreZeroWidthOnly = 0, kIgnoreDiscretionaryHyphens = 0x01, kIgnoreCalculatedText = 0x02, kIgnoreTableCharacters = 0x04,
  kIgnoreInlineGraphics = 0x08, kIgnoreNewLine = 0x010, kIgnoreSpecialGlyph = 0x020, kIgnoreUnicodeVariation = 0x040,
  kIgnoreNonLegal = kIgnoreTableCharacters + kIgnoreInlineGraphics + kIgnoreNewLine, kIgnoreSpellingIgnorable = 0x07F
}
 

Static Public Member Functions

static bool IsLetter (const UTF32TextChar &c)
 
static bool IsCJKLetter (const UTF32TextChar &c)
 
static bool IsNonCJKLetter (const UTF32TextChar &c)
 
static bool IsHiragana (const UTF32TextChar &c)
 
static bool IsKatakana (const UTF32TextChar &c)
 
static bool IsCJKIdeograph (const UTF32TextChar &c)
 
static bool IsBopomofo (const UTF32TextChar &c)
 
static bool IsJamo (const UTF32TextChar &c)
 
static bool IsHangul (const UTF32TextChar &c)
 
static bool IsRomanDigit (const UTF32TextChar &c)
 
static bool IsJapaneseNumber (const UTF32TextChar &c)
 
static bool IsAnyNumber (const UTF32TextChar &c)
 
static bool IsWhiteSpace (const UTF32TextChar &c)
 
static bool IsCombiningMark (const UTF32TextChar &c)
 
static bool IsDiacritic_WorldReady (const UTF32TextChar &c)
 
static bool IsPunctuation (const UTF32TextChar &c)
 
static bool IsPunctuationDash (const UTF32TextChar &c)
 
static bool IsPunctuationOpen (const UTF32TextChar &c)
 
static bool IsPunctuationClose (const UTF32TextChar &c)
 
static bool IsPunctuationInitialQuote (const UTF32TextChar &c)
 
static bool IsPunctuationFinalQuote (const UTF32TextChar &c)
 
static bool IsMidWordPunctuation (const UTF32TextChar &c)
 
static bool IsSymbol (const UTF32TextChar &c)
 
static bool IsSymbolMath (const UTF32TextChar &c)
 
static bool IsSymbolCurrency (const UTF32TextChar &c)
 
static bool IsGreek (const UTF32TextChar &c)
 
static bool IsCyrillic (const UTF32TextChar &c)
 
static bool IsThai (const UTF32TextChar &c)
 
static int GetLocale (const UTF32TextChar &c)
 
static UTF32TextChar ToUpper (const UTF32TextChar &c)
 
static bool IsUppercase (const UTF32TextChar &c)
 
static bool IsUpper (const UTF32TextChar &c)
 
static bool CanChangeToUppercase (const UTF32TextChar &c)
 
static UTF32TextChar ToLower (const UTF32TextChar &c)
 
static bool IsLowercase (const UTF32TextChar &c)
 
static bool IsLower (const UTF32TextChar &c)
 
static bool CanChangeToLowercase (const UTF32TextChar &c)
 
static UTF32TextChar ToTitle (const UTF32TextChar &c)
 
static bool IsTitlecase (const UTF32TextChar &c)
 
static bool CanChangeToTitlecase (const UTF32TextChar &c)
 
static bool StartsUppercase (const UTF32TextChar &c)
 
static bool IsCJKFullWidth (const UTF32TextChar &c)
 
static UTF32TextChar ToFullWidthVariant (const UTF32TextChar &c)
 
static bool IsNarrowVariant (const UTF32TextChar &c)
 
static UTF32TextChar ToNarrowVariant (const UTF32TextChar &c)
 
static UTF32TextChar ToFirstBaseChar (const UTF32TextChar &c)
 
static UTF32TextChar ToUltimateBaseChar (const UTF32TextChar &c)
 
static CharacterType GetCharacterType (const UTF32TextChar &c)
 
static bool IsHighSurrogate (UTF16TextChar c)
 
static bool IsLowSurrogate (UTF16TextChar c)
 
static bool IsSurrogate (UTF16TextChar c)
 
static bool IsVariationSelector (const UTF32TextChar &c)
 
static bool IsBasicLatin (const UTF32TextChar &c)
 
static bool IsLatin1 (const UTF32TextChar &c)
 
static bool IsLatinExtendedA (const UTF32TextChar &c)
 
static bool IsLatinExtendedB (const UTF32TextChar &c)
 
static bool IsSuperscriptOrSubscript (const UTF32TextChar &c)
 
static bool IsIgnoredCharacter (const UTF32TextChar &n, IgnoreCharacterDetails ignoreDischy=kIgnoreDiscretionaryHyphens)
 
static bool IsHebrewLetter (const UTF32TextChar &c)
 
static bool IsArabicLetter (const UTF32TextChar &c)
 

Detailed Description

UnicodeClass is used for classification of Unicode characters used in InDesign. It is primarily a wrapper around the ICU library.

See Also
IGlyphUtils.h for GetUnicodeName.

Member Enumeration Documentation

Character Type. Bit-field used to classify characters into variable classes for processing.

IgnoreCharacterDetails. What type of characters should be "ignored" by IsIgnoredCharacter.
Enumerator
kIgnoreDiscretionaryHyphens 

zero-width stuff, break run-in, indent-here

kIgnoreCalculatedText 

discretionary hyphens

kIgnoreTableCharacters 

page number, section name, footnote

kIgnoreInlineGraphics 

table, table continued

kIgnoreNewLine 

inline graphic

kIgnoreSpecialGlyph 

CR, LF

kIgnoreUnicodeVariation 

roman & non-roman

kIgnoreNonLegal 

unicode variation sequence characters

Member Function Documentation

static CharacterType UnicodeClass::GetCharacterType (const UTF32TextCharc)
static

Get character type.

Returns
a classification of the unicode character c.
static int UnicodeClass::GetLocale (const UTF32TextCharc)
static
Locale.

Returns
int corresponding to the locale to which the character belongs.
static bool UnicodeClass::IsAnyNumber (const UTF32TextCharc)
static

Any numbers.

Returns
true if unicode character c is a number in any script.
static bool UnicodeClass::IsBasicLatin (const UTF32TextCharc)
inlinestatic

Basic Latin (low ascii). @ return true if unicode character c is low ascii

static bool UnicodeClass::IsBopomofo (const UTF32TextCharc)
static

Chinese bopomofo.

Returns
true if unicode character c is Chinese bopomofo.
static bool UnicodeClass::IsCJKFullWidth (const UTF32TextCharc)
static

Full width (CJK). @ return true if unicode character c is full width (1 em-box).

static bool UnicodeClass::IsCJKIdeograph (const UTF32TextCharc)
static

CJK unified ideographs.

Returns
true if unicode character c is a CJK unified ideographs.
static bool UnicodeClass::IsCJKLetter (const UTF32TextCharc)
static

CJK Letters. All CJK letters: ideograph, kana, hangul, half-width kana, etc.

Returns
true if unicode character c is a CJK letter.
static bool UnicodeClass::IsCombiningMark (const UTF32TextCharc)
static

Combining marks.

Returns
true if unicode character c is any kind of combining mark (mostly diacritics).
static bool UnicodeClass::IsCyrillic (const UTF32TextCharc)
static

Cyrillic.

Returns
true if unicode character c is a Cyrillic character.
static bool UnicodeClass::IsDiacritic_WorldReady (const UTF32TextCharc)
static

Diacritics.

Returns
true if the unicode character c is a diacritic according to WRUDGetCharacterProperty.
static bool UnicodeClass::IsGreek (const UTF32TextCharc)
static

Greek.

Returns
true if unicode character c is a Greek character.
static bool UnicodeClass::IsHangul (const UTF32TextCharc)
static

Korean hangul.

Returns
true if unicode character c is Korean hangul.
static bool UnicodeClass::IsHighSurrogate (UTF16TextChar c)
inlinestatic

High surrogate.

Returns
true if the UTF16 character c is a high surrogate.
static bool UnicodeClass::IsHiragana (const UTF32TextCharc)
static

Japanese Hiragana.

Returns
true if unicode character c is hiragana.
static bool UnicodeClass::IsIgnoredCharacter (const UTF32TextCharn,
IgnoreCharacterDetails ignoreDischy = kIgnoreDiscretionaryHyphens 
)
static

The set of "characters" that occupy spots in the model that should be treated neither as whitespace nor as an actual character. These include markers in the text that the user would not consider as something that they had entered into the text. This set does NOT include inline graphic markers or table markers, which always take up a position in the visible text

static bool UnicodeClass::IsJamo (const UTF32TextCharc)
static

Korean jamo.

Returns
true if unicode character c is Korean jamo.
static bool UnicodeClass::IsJapaneseNumber (const UTF32TextCharc)
static

Japanese Kanji numbers.

Returns
true if unicode character c is a Japanese Kanji numeral.
static bool UnicodeClass::IsKatakana (const UTF32TextCharc)
static

Japanese Katakana.

Returns
true if unicode character c is Katakana.
static bool UnicodeClass::IsLatin1 (const UTF32TextCharc)
inlinestatic

Latin 1. @ return true if unicode character c is in the Latin 1 unicode group.

static bool UnicodeClass::IsLatinExtendedA (const UTF32TextCharc)
inlinestatic

Latin extended A. @ return true if unicode character c is in the Latin extended A unicode group.

static bool UnicodeClass::IsLatinExtendedB (const UTF32TextCharc)
inlinestatic

Latin extended B. @ return true if unicode character c is in the Latin extended B unicode group.

static bool UnicodeClass::IsLetter (const UTF32TextCharc)
static

Letters. Not symbols or punctuation, only letters, but ALL letters: ideograph, kana, latin, greek, devanagari, etc.

Returns
true if unicode character c is a letter.
static bool UnicodeClass::IsLowercase (const UTF32TextCharc)
static

Is lowercase.

Returns
true if the unicode character c is lowercase.
static bool UnicodeClass::IsLowSurrogate (UTF16TextChar c)
inlinestatic

Low surrogate.

Returns
true if the UTF16 character c is a low surrogate.
static bool UnicodeClass::IsMidWordPunctuation (const UTF32TextCharc)
static

Mid-word punctuation. Used for word counting mostly.

Returns
true if unicode character c is any kind of mid-word punctuation.
static bool UnicodeClass::IsNarrowVariant (const UTF32TextCharc)
inlinestatic

Is narrow variant. NOT the same as !ISCJKFullWidth().

Returns
true if the unicode character c is the narrow variant as another character.
static bool UnicodeClass::IsNonCJKLetter (const UTF32TextCharc)
static

Letters, but not CJK letters. E.g. latin, greek, hebrew, etc.

Returns
true if unicode character c is a non-CJK letter.
static bool UnicodeClass::IsPunctuation (const UTF32TextCharc)
static

Punctuation.

Returns
true if unicode character c is any kind of punctuation.
static bool UnicodeClass::IsPunctuationClose (const UTF32TextCharc)
static

Closing punctuation.

Returns
true if unicode character c is any kind of closing punctuation mark.
static bool UnicodeClass::IsPunctuationDash (const UTF32TextCharc)
static

Punctuation dash.

Returns
true if unicode character c is any kind of dash
static bool UnicodeClass::IsPunctuationFinalQuote (const UTF32TextCharc)
static

Final quotation mark.

Returns
true if unicode character c is any kind of final quotation punctuation.
static bool UnicodeClass::IsPunctuationInitialQuote (const UTF32TextCharc)
static

Initial quotation mark.

Returns
true if unicode character c is any kind of initial quotation punctuation.
static bool UnicodeClass::IsPunctuationOpen (const UTF32TextCharc)
static

Open punctuation.

Returns
true if unicode character c is any kind of opening punctuation mark.
static bool UnicodeClass::IsRomanDigit (const UTF32TextCharc)
static

Numbers 0-9.

Returns
true if unicode character c is 0-9.
static bool UnicodeClass::IsSuperscriptOrSubscript (const UTF32TextCharc)
inlinestatic

Superscript, subscript @ return true if unicode character c is a unicode superscript or subscript.

static bool UnicodeClass::IsSurrogate (UTF16TextChar c)
inlinestatic

Is surrogate.

Returns
true if the UTF16 character c is a high or low surrogate.
static bool UnicodeClass::IsSymbol (const UTF32TextCharc)
static

Symbol.

Returns
true if unicode character c is any kind symbol character.
static bool UnicodeClass::IsSymbolCurrency (const UTF32TextCharc)
static

Currency symbol.

Returns
true if unicode character c is any kind of currency symbol.
static bool UnicodeClass::IsSymbolMath (const UTF32TextCharc)
static

Math symbol.

Returns
true if unicode character c is any kind of math symbol
static bool UnicodeClass::IsThai (const UTF32TextCharc)
static

Thai.

Returns
true if unicode character c is a Thai character.
static bool UnicodeClass::IsTitlecase (const UTF32TextCharc)
static

Is title case. For unicode characters that have the idea of 2 glyphs (u1CB = Nj, u1F2 = Dz, etc).

Returns
whether the unicode character c is a 2-letter title-case unicode character.
static bool UnicodeClass::IsUppercase (const UTF32TextCharc)
static

Is uppercase.

Returns
true if the unicode character c is uppercase.
static bool UnicodeClass::IsWhiteSpace (const UTF32TextCharc)
static

White Space.

Returns
true if unicode character c is white space: spaces, tabs, etc.
static bool UnicodeClass::StartsUppercase (const UTF32TextCharc)
static

Starts uppercase.

Returns
true if the unicode character c is uppercase or title case.
static UTF32TextChar UnicodeClass::ToFirstBaseChar (const UTF32TextCharc)
static

To first base chararacter.

Returns
the unicode character the results from stripping off the lowest priority diacritical mark.
static UTF32TextChar UnicodeClass::ToFullWidthVariant (const UTF32TextCharc)
static

To full width (CJK). @ return the full-width variant (if one exists) of the unicode character c.

static UTF32TextChar UnicodeClass::ToLower (const UTF32TextCharc)
static

Lowercase.

Returns
the lowercase variant of the unicode character c.
static UTF32TextChar UnicodeClass::ToNarrowVariant (const UTF32TextCharc)
static

To narrow variant.

Returns
the narrow variant of the unicode character c.
static UTF32TextChar UnicodeClass::ToTitle (const UTF32TextCharc)
static

Title case. For unicode characters that have the idea of 2 glyphs (u1CB = Nj, u1F2 = Dz, etc). NOTE: for lowercase letters in general, ToTitle does NOT capitalize them.

Returns
the title-case variant of 2-letter unicode character c.
static UTF32TextChar UnicodeClass::ToUltimateBaseChar (const UTF32TextCharc)
static

To ultimate base chararacter.

Returns
the unicode character the results from stripping off all diacritical marks.
static UTF32TextChar UnicodeClass::ToUpper (const UTF32TextCharc)
static

Uppercase.

Returns
the uppercase variant of the unicode character c.