29 #ifndef __WIDESTRING__ 30 #define __WIDESTRING__ 33 #include "K2Iterator.h" 34 #include "K2TypeTraits.h" 35 #include "Invariant.h" 36 #include "UnicodeClass.h" 37 #include "K2SmartPtr.h" 41 #include "UnicodeSavvyString.h" 77 typedef uint32 UniCodePoint;
93 typedef std::bidirectional_iterator_tag iterator_category;
94 typedef UniCodePoint value_type;
95 typedef std::ptrdiff_t difference_type;
96 typedef UTF16TextChar* pointer;
97 typedef UTF16TextChar& reference;
101 WideStringConstUTF32Iter(
const UTF16TextChar *buffer, bool16 hasSurrogates, int32 numChars) : fCurrent(buffer), fHasSurrogates(hasSurrogates)
104 fNumChars = numChars;
124 ASSERT(fPosition >= 0 && fPosition <= fNumChars);
126 return fHasSurrogates ? surro_operStar() : *fCurrent;
132 {
if (fHasSurrogates) surro_operPP();
136 ASSERT(fPosition <= fNumChars);
148 {
if (fHasSurrogates) surro_operMM();
152 ASSERT(fPosition >= 0);
162 {
if (fHasSurrogates) surro_operPE(n);
168 ASSERT(fPosition <= fNumChars);
178 {
if (fHasSurrogates) surro_operME(n);
184 ASSERT(fPosition >= 0);
194 {
return fHasSurrogates ? surro_operDiff(other) : fCurrent - other.fCurrent; }
203 {
return x.fCurrent == y.fCurrent; }
206 {
return x.fCurrent != y.fCurrent; }
209 {
return x.fCurrent < y.fCurrent; }
212 {
return x.fCurrent <= y.fCurrent; }
215 {
return x.fCurrent > y.fCurrent; }
218 {
return x.fCurrent >= y.fCurrent; }
223 int32 Position()
const 224 {
return fPosition; }
228 int32 NumChars()
const 229 {
return fNumChars; }
234 {
return fNumChars; }
239 const UTF16TextChar * operator->()
const {
return fCurrent; }
240 UniCodePoint surro_operStar()
const;
242 void surro_operPE(int32 n);
244 void surro_operME(int32 n);
248 const UTF16TextChar *fCurrent;
249 bool16 fHasSurrogates;
260 template <
typename T>
270 typedef UTF16TextChar& reference_raw;
271 typedef const UTF16TextChar& const_reference_raw;
272 typedef UTF16TextChar value_type_raw;
273 typedef std::ptrdiff_t difference_type;
274 typedef UTF16TextChar* pointer_raw;
275 typedef const UTF16TextChar* const_pointer_raw;
277 typedef UTF16TextChar* iterator_raw;
278 typedef const UTF16TextChar* const_iterator_raw;
296 explicit WideString(WideString::const_pointer_raw s, int32 len = kMaxInt32, int32 numChars = -1);
304 explicit WideString(ConstCString
string, int32 numChars = -1);
308 explicit WideString(
const wchar_t* s, int32 len = kMaxInt32);
318 template <
class IteratorType>
319 WideString(IteratorType b, IteratorType e, size_type nCodePoints = 0)
322 assign(b, e, nCodePoints);
345 void SetCString(ConstCString C, bool16 convertEmbeddedUnicode = kFalse, int32 numChars = -1);
347 void SetX16String(WideString::const_pointer_raw x, int32 len = kMaxInt32, int32 numChars = -1);
348 void SetX16String(
const wchar_t* s, int32 len = kMaxInt32, int32 numChars = -1);
367 iterator_raw end_raw();
369 const_iterator_raw end_raw()
const;
372 reverse_iterator_raw rbegin_raw()
373 { TRACE_IF(
HasMultiWordUnicode(),
"About to hand back non-const iterator on WideString's buffer! Be careful with surrogates!");
return reverse_iterator_raw(end_raw()); }
374 const_reverse_iterator_raw rbegin_raw()
const 375 {
return const_reverse_iterator_raw(end_raw()); }
377 reverse_iterator_raw rend_raw()
378 { TRACE_IF(
HasMultiWordUnicode(),
"About to hand back non-const iterator on WideString's buffer! Be careful with surrogates!");
return reverse_iterator_raw(
begin_raw()); }
379 const_reverse_iterator_raw rend_raw()
const 380 {
return const_reverse_iterator_raw(
begin_raw()); }
390 const_iterator end()
const 391 {
return const_iterator(
this,
CharCount()); }
393 const_reverse_iterator rbegin()
const 394 {
return const_reverse_iterator(end()); }
396 const_reverse_iterator rend()
const 397 {
return const_reverse_iterator(
begin()); }
402 int32 UTF16IndexToCodePointIndex(int32 index)
const;
407 {
return fUTF16BufferLength == 0; }
417 bool16 IsNull()
const 434 { Append(iter.
PtrAt(), (iter + numChars).PtrAt() - iter.
PtrAt(), numChars); }
436 void Append(WideString::const_pointer_raw buf, int32 n, int32 numChars = -1);
445 WideString&
append(WideString::const_pointer_raw s, size_type nCodeValues, size_type nCodePoints = 0)
489 { UnicodeSavvyString::AppendUTF32TextChar(c32); }
492 void Insert(
const WideString &s, int32 position = 0, int32 count = kMaxInt32);
493 void Insert(WideString::const_pointer_raw buf, int32 len, int32 pos = 0)
494 {
if (len > 0) UnicodeSavvyString::InsertUTF16String(buf, len, pos); }
496 { UnicodeSavvyString::InsertUTF32TextChar(c, pos); }
498 WideString* Substring(int32 position, int32 count = kMaxInt32)
const;
509 void remove_raw(int32 utf16Pos, int32 utf16Count);
510 void RemoveCodePoints(int32 startCodePointIndex, int32 numCodePoints);
512 int32 IndexOf(
const WideString& keyString, int32 position = 0)
const;
515 bool16 Contains(
const WideString& key, int32 pos = 0)
const 516 {
return IndexOf(key, pos) >= 0; }
520 void Shrink(bool16 maxshrink = kFalse);
527 UnicodeSavvyString::CopyFrom(other);
540 return assign(copy, copy + UTF16TextCharLength(copy));
546 {
return GetChar(index); }
560 template <
class IteratorType>
563 typedef typename std::iterator_traits<IteratorType>::value_type iterator_value_type;
566 BOOST_STATIC_ASSERT(
sizeof(iterator_value_type)*CHAR_BIT == 16 ||
567 sizeof(iterator_value_type)*CHAR_BIT == 32);
570 BOOST_STATIC_ASSERT(!std::numeric_limits<iterator_value_type>::is_signed);
573 EncodingSelector<sizeof(iterator_value_type)*CHAR_BIT> encodingSel;
575 WideString::assign_impl(b, e, nCodePoints, encodingSel);
585 inline WideString&
assign(WideString::const_pointer_raw src, size_type nCodeValues, size_type nCodePoints = 0)
588 ASSERT_MSG(nCodeValues != static_cast<size_type>(-1),
"-1 is not a valid value for nCodeValues");
589 ASSERT_MSG(nCodePoints != static_cast<size_type>(-1),
"-1 is not a valid value for nCodePoints");
591 return assign(src, src + nCodeValues, nCodePoints);
595 bool16 operator >= (
const WideString &s)
const 596 {
return compare(s) >= 0; }
598 {
return compare(s) > 0; }
599 bool16 operator <= (
const WideString &s)
const 600 {
return compare(s) <= 0; }
602 {
return compare(s) < 0; }
604 bool16 operator == (
const WideString &s)
const;
605 bool16 operator != (
const WideString &s)
const 606 {
return !(*
this == s); }
608 bool16 operator == (WideString::const_pointer_raw b)
const;
609 bool16 operator != (
const UTF16TextChar *b)
const 610 {
return !(*
this == b); }
613 { UnicodeSavvyString::AppendUTF32TextChar(c);
623 uint32
Hash(
void)
const;
630 {
return Strip_If(*
this, [c](
const auto& t){
return t==c;});}
640 void BuildFromSystemString(
const PMString& ss);
641 void GetSystemString(
PMString *ss )
const;
649 template <
unsigned int i>
class EncodingSelector {};
653 template <
class IteratorType>
654 inline void assign_impl(IteratorType b, IteratorType e, size_type nCodePoints, EncodingSelector<16>)
662 template <
class IteratorType>
663 void assign_impl(IteratorType b, IteratorType e, size_type nCodePoints, EncodingSelector<32>)
674 std::copy(b, e, std::back_inserter(*
this));
680 mutable InvariantCount fInvariant;
682 void Invariant()
const;
684 static bool16 ts_ForceRoundtrip;
686 bool16 ParseForMultiWordUnicode()
const;
689 static const boost::shared_ptr<WideString> kNil_shared_ptr;
692 inline WideStringConstUTF32Iter::WideStringConstUTF32Iter(
const WideString *
string, int32 charIndex)
696 fCurrent =
string->GrabUTF16Buffer(nil) +
string->CodePointIndexToUTF16Index(charIndex);
697 fHasSurrogates =
string->HasMultiWordUnicode();
699 fPosition = charIndex;
700 fNumChars =
string->CharCount();
706 fHasSurrogates = kFalse;
717 swap(static_cast<UnicodeSavvyString&>(left), static_cast<UnicodeSavvyString&>(right));
722 template <
typename T>
725 const int32 originalNumChars =
string.CharCount();
728 for (int32 strIndex = originalNumChars - 1; strIndex >= 0; --strIndex)
730 if (f(
string.GetChar(strIndex)))
734 string.RemoveCodePoints(strIndex+1, num2Strip);
741 string.RemoveCodePoints(0, num2Strip);
743 return originalNumChars -
string.CharCount();
748 {
return iter.
PtrAt(); }
749 inline WideString::const_pointer_raw IterToPtr(
const WideString::const_iterator_raw& i)
752 {
return (val > 0xFFFF ? 2 : 1); }
767 ADOBE_NAME_TYPE_0(
"widestring:indesign:adobe",
WideString);