InDesign SDK  20.5
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
UnicodeSavvyString.h
1 //========================================================================================
2 //
3 // $File$
4 //
5 // Owner:
6 //
7 // $Author$
8 //
9 // $DateTime$
10 //
11 // $Revision$
12 //
13 // $Change$
14 //
15 // Copyright 1997-2010 Adobe Systems Incorporated. All rights reserved.
16 //
17 // NOTICE: Adobe permits you to use, modify, and distribute this file in accordance
18 // with the terms of the Adobe license agreement accompanying it. If you have received
19 // this file from a source other than Adobe, then your use, modification, or
20 // distribution of it requires the prior written permission of Adobe.
21 //
22 //
23 // A 16-bit character string.
24 //
25 //========================================================================================
26 
27 #ifndef __UNICODESAVVYSTRING
28 #define __UNICODESAVVYSTRING
29 
30 #include "PlatformChar.h"
31 #include <cstddef>
32 #include <iterator> // for iterator_traits
33 #include <boost/static_assert.hpp>
34 #include <adobe/move.hpp>
35 
36  int32 CharOffsetToUTF16Offset(const UTF16TextChar *buffer, int32 length, int32 charOffset);
37  int UTF16TextCharCompare(const UTF16TextChar *utf16str1, const UTF16TextChar *utf16str2);
38  size_t UTF16TextCharLength(const UTF16TextChar *utf16str);
39 
40 //class StringStorage;
41 // We can't debug the StringStorage if we use a forward declare (bug in MSDev Studio 2003)
42 // This bug is fixed in MS Dev Studio 2005, so we should go back to a forward declare than.
43 #include "StringStorage.h"
44 
53 {
54  // Copy constructor and assignment disabled
56  UnicodeSavvyString& operator=(const UnicodeSavvyString& s);
57 
58 public:
59  // traits
60  typedef int32 size_type;
61  typedef std::ptrdiff_t difference_type;
62  typedef UTF16TextChar code_value;
63  typedef UTF32TextChar code_point;
64  typedef code_value* code_value_iterator;
65  typedef code_value const* const_code_value_iterator;
66  typedef const UnicodeSavvyString& const_reference;
67  typedef UTF16TextChar value_type;
68 
72  bool16 HasMultiWordUnicode() const
73  {
74  return fNumChars != fUTF16BufferLength;
75  }
76 
82  size_type CharCount() const
83  {
84  return fNumChars;
85  }
86 
91  size_type NumUTF16TextChars() const
92  {
93  return fUTF16BufferLength;
94  }
95 
101  size_type capacity(void) const
102  {
103  return (UnicodeBufferIsValid()) ? fStorage->capacity() : kMaxSmallString;
104  }
105 
116  void reserve(size_type newCapacity);
117 
127  void resize(size_type newSize, code_value fill = code_value());
128 
139  friend inline void swap(UnicodeSavvyString& lhs, UnicodeSavvyString& rhs) noexcept
140  {
141  std::swap_ranges(lhs.fSmallStorage, lhs.fSmallStorage + kMaxSmallString + 1, rhs.fSmallStorage);
142  std::swap(lhs.fStorage, rhs.fStorage);
143  std::swap(lhs.fUTF16BufferLength, rhs.fUTF16BufferLength);
144  std::swap(lhs.fNumChars, rhs.fNumChars);
145  }
146 
150  void clear();
151 
159  const UTF16TextChar* GrabUTF16Buffer(int32* numUTF16s) const;
160 
165  int32 CodePointIndexToUTF16Index(int32 index) const;
166 
170  void Truncate(CharCounter count);
171 
177  void Remove(int32 position, CharCounter count);
178 
179 
184  UTF32TextChar GetUTF32TextChar(int32 pos) const;
185 
186 
190  const_code_value_iterator begin() const
191  {
192  return ConstBuffer();
193  }
194 
198  const_code_value_iterator end() const
199  {
200  return ConstBuffer() + fUTF16BufferLength;
201  }
202 
203 
204 protected:
205  UnicodeSavvyString() : fStorage(nil), fUTF16BufferLength(0), fNumChars(0)
206  {
207  fSmallStorage[0] = UTF16TextChar();
208  }
209 
212  UnicodeSavvyString(adobe::move_from<UnicodeSavvyString> other)
213  : fStorage(nil)
214  {
215  move_from(other.source);
216  }
217 
218  UnicodeSavvyString(UnicodeSavvyString &&other) noexcept
219  : fStorage(nil)
220  {
221  move_from(other);
222  }
223 
226  void move_from(UnicodeSavvyString& other) noexcept
227  {
228  std::swap(fStorage, other.fStorage);
229  fUTF16BufferLength = other.fUTF16BufferLength;
230  fNumChars = other.fNumChars;
231 
232  if (!fStorage && fUTF16BufferLength)
233  std::copy(other.fSmallStorage, other.fSmallStorage + kMaxSmallString + 1, fSmallStorage);
234  else
235  fSmallStorage[0] = code_value();
236 
237  other.fUTF16BufferLength = other.fNumChars = 0;
238  }
239 
241 
250  template <class IteratorType>
251  UnicodeSavvyString(IteratorType b, IteratorType e, size_type nCodePoints = 0)
252  : fStorage(nil), fUTF16BufferLength(0), fNumChars(0)
253  {
254  assign(b, e, nCodePoints);
255  }
256 
257  int32 CountChars() const;
258  int32 CountCharsUtil(const UTF16TextChar* buffer, int32 bufferLength) const;
259 
260  // Buffer manipulation
261  void InsertGap(uint32 wordWiseIndex, size_type numberOfSpaces);
262  void RemoveGap(uint32 wordWiseIndex, size_type numberOfSpaces);
263 
264  void InsertUTF32TextChar(UTF32TextChar c, int32 pos = 0);
265  void InsertUTF16String(const UTF16TextChar* buf, int32 len, int32 position = 0);
266 
267  void AppendUTF32TextChar(UTF32TextChar c32);
268 
269  void CopyFrom(const UnicodeSavvyString& other);
270 
275  bool16 operator==(const UnicodeSavvyString& s) const;
276 
277 #ifdef DEBUG
278  void CheckPairedSurrogates(const UTF16TextChar* buffer, int32 utf16Count) const;
279 
282  void InvariantCheck() const;
283 
286  struct InvariantChecker
287  {
288  InvariantChecker(UnicodeSavvyString const& s) : fString(s)
289  {
290  fString.InvariantCheck();
291  }
292 
293  ~InvariantChecker()
294  {
295  fString.InvariantCheck();
296  }
297  private:
298  UnicodeSavvyString const& fString;
299  };
300 #endif
301 
310  template <class IteratorType>
311  UnicodeSavvyString& assign(IteratorType b, IteratorType e, size_type nCodePoints = 0);
312 
323  UnicodeSavvyString& replace(size_type pos, size_type n1,
324  code_value const* s, size_type n2);
325 
333  UnicodeSavvyString& append(code_value const* s, size_type nCodeValues, size_type nCodePoints = 0);
334 
335 protected:
336  UTF32TextChar surro_GetUTF32TextChar(int32 pos) const;
337  const UTF16TextChar* ConstBuffer() const
338  {
339  return UnicodeBufferIsValid() ? fStorage->ConstBuffer() : fSmallStorage;
340  }
341 
342  // Utility functions that are called under a "safe" circumstance.
343  // Should not be used directly
344  void insert_safe(code_value_iterator i, const_code_value_iterator sb, const_code_value_iterator se);
345  void erase_safe(code_value_iterator b, code_value_iterator e);
346  void replace_safe(code_value_iterator b, code_value_iterator e,
347  const_code_value_iterator sb, const_code_value_iterator se);
348 
349 
350  template <class InputIterator>
351  void assign_impl(InputIterator b, InputIterator e, size_type nCodePoints, std::input_iterator_tag);
352 
353  template <class FwdIterator>
354  void assign_impl(FwdIterator b, FwdIterator e, size_type nCodePoints, std::forward_iterator_tag);
355 
356  //fStorage may have two states:
357  // 1) It is allocated and terminated with a trailing zero
358  // 2) Is is nil, and the storage has already been released
359  // UnicodeBufferIsValid() distinguishes these states
360  bool16 UnicodeBufferIsValid() const
361  {
362  return (fStorage ? true : false);
363  }
364 
365  UTF16TextChar* GetBufferForWriting(size_type size);
366 
367 protected:
368  StringStorage* fStorage; // big storage
369 
370  enum {kMaxSmallString = 23}; // Threshold for the small buffer
371  UTF16TextChar fSmallStorage[kMaxSmallString + 1]; // Small string optimization
372 
373  size_type fUTF16BufferLength; // The number of UTF16s in here
374  size_type fNumChars; // The number of Unicode codepoints (2 surrogates == 1 char == 1 codepoint) in here
375 };
376 
377 
378 // Inline implementations below:
379 
380 // Assign with iterators
381 template <class IteratorType>
382 inline UnicodeSavvyString& UnicodeSavvyString::assign(IteratorType b, IteratorType e, size_type nCodePoints)
383 {
384  // The value from the iterator should be 16 bit
385  typedef typename std::iterator_traits<IteratorType>::value_type iterator_value_type;
386  BOOST_STATIC_ASSERT(sizeof(iterator_value_type)*CHAR_BIT == 16);
387  typedef typename std::iterator_traits<IteratorType>::iterator_category iterator_category;
388 
389 #ifdef DEBUG
390  InvariantChecker checkThis(*this);
391 #endif
392 
393  // Dispatch to the optimized version
394  assign_impl(b, e, nCodePoints, iterator_category());
395  return *this;
396 }
397 
398 // Input iterators assign implementation
399 template <class InputIterator>
400 inline void UnicodeSavvyString::assign_impl(InputIterator b, InputIterator e, size_type nCodePoints, std::input_iterator_tag)
401 {
402  UnicodeSavvyString temp;
403 
404  // Try to optimize allocations in case the client told us the number of code points.
405  if (nCodePoints)
406  {
407  temp.reserve(nCodePoints);
408  }
409 
410  std::copy(b, e, std::back_inserter(temp));
411  swap(*this, temp);
412 }
413 
414 template <class FwdIterator>
415 inline void UnicodeSavvyString::assign_impl(FwdIterator b, FwdIterator e, size_type nCodePoints, std::forward_iterator_tag)
416 {
417  const difference_type nCodeValues = std::distance(b, e);
418  ASSERT(nCodeValues >= 0);
419 
420  // Make room for the chars
421  code_value* buffer = GetBufferForWriting(nCodeValues);
422 
423  // Copy them into the buffer
424  std::copy(b, e, buffer);
425 
426  // Ensure terminating null
427  buffer[nCodeValues] = code_value();
428 
429  fUTF16BufferLength = nCodeValues;
430 
431  // Calculate how many code points we will have at the end
432  if (nCodePoints == 0 && nCodeValues)
433  {
434  fNumChars = CountCharsUtil(buffer, nCodeValues);
435  }
436  else
437  {
438  ASSERT((size_type)CountCharsUtil(buffer, nCodeValues) == nCodePoints);
439  fNumChars = nCodePoints;
440  }
441 }
442 
443 
444 
445 inline int32 UnicodeSavvyString::CodePointIndexToUTF16Index(int32 index) const
446 {
447  return HasMultiWordUnicode() ? CharOffsetToUTF16Offset(ConstBuffer(), fUTF16BufferLength, index) : index;
448 }
449 
450 inline const UTF16TextChar * UnicodeSavvyString::GrabUTF16Buffer(int32* numUTF16s) const
451 {
452  if (numUTF16s)
453  {
454  *numUTF16s = fUTF16BufferLength;
455  }
456  return ConstBuffer();
457 }
458 
460 {
461  return HasMultiWordUnicode() ? surro_GetUTF32TextChar(pos) : UTF32TextChar(ConstBuffer()[pos]);
462 }
463 
464 
465 inline int32 UnicodeSavvyString::CountChars() const
466 {
467  return CountCharsUtil(ConstBuffer(), fUTF16BufferLength);
468 }
469 
470 
471 
472 inline UTF16TextChar * UnicodeSavvyString::GetBufferForWriting(UnicodeSavvyString::size_type len)
473 {
474  reserve(len);
475  return fStorage ? fStorage->GetBuffer() : &fSmallStorage[0];
476 }
477 
478 
479 
480 
481 
482 
483 
484 
485 
486 #endif
487