StringBuilder.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #pragma once
  6. namespace Js
  7. {
  8. template <typename TAllocator>
  9. class StringBuilder
  10. {
  11. private:
  12. struct Data
  13. {
  14. public:
  15. union {
  16. struct st_Single
  17. {
  18. char16 buffer[];
  19. } single;
  20. struct st_Chained
  21. {
  22. charcount_t length;
  23. Data *next;
  24. char16 buffer[];
  25. } chained;
  26. }u;
  27. };
  28. private:
  29. static const charcount_t MaxLength = INT_MAX - 1;
  30. const static charcount_t MaxRealloc = 64;
  31. TAllocator* alloc;
  32. // First chunk is just a buffer, and which can be detached without copying.
  33. Data *firstChunk;
  34. // Second chunk is a chained list of chunks. UnChain() needs to be called to copy the first chunk
  35. // and the list of chained chunks to a single buffer on calls to GetBuffer().
  36. Data *secondChunk;
  37. Data *lastChunk;
  38. char16 * appendPtr;
  39. charcount_t length; // Total capacity (allocated number of elements - 1), in all chunks. Note that we keep one allocated element which is not accounted in length for terminating '\0'.
  40. charcount_t count; // Total number of elements, in all chunks.
  41. charcount_t firstChunkLength;
  42. charcount_t initialSize;
  43. bool IsChained() { return this->secondChunk != NULL; }
  44. Data *NewChainedChunk(charcount_t bufLengthRequested)
  45. {
  46. CompileAssert(sizeof(charcount_t) == sizeof(uint32));
  47. // allocation = (bufLengthRequested * sizeof(char16) + sizeof(Data)
  48. charcount_t alloc32 = UInt32Math::MulAdd<sizeof(char16), sizeof(Data)>(bufLengthRequested);
  49. size_t allocation = TAllocator::GetAlignedSize(alloc32);
  50. size_t size_t_length = (allocation - sizeof(Data)) / sizeof(char16);
  51. charcount_t bufLength = (charcount_t)size_t_length;
  52. Assert(bufLength == size_t_length);
  53. Data *newChunk = AllocatorNewStructPlus(TAllocator, this->alloc, allocation, Data);
  54. newChunk->u.chained.length = bufLength;
  55. newChunk->u.chained.next = NULL;
  56. // Recycler gives zeroed memory, so rely on that instead of memsetting the tail
  57. #if 0
  58. // Align memset to machine register size for perf
  59. bufLengthRequested &= ~(sizeof(size_t) - 1);
  60. memset(newChunk->u.chained.buffer + bufLengthRequested, 0, (bufLength - bufLengthRequested) * sizeof(char16));
  61. #endif
  62. return newChunk;
  63. }
  64. Data *NewSingleChunk(charcount_t *pBufLengthRequested)
  65. {
  66. Assert(*pBufLengthRequested <= MaxLength);
  67. // Let's just grow the current chunk in place
  68. CompileAssert(sizeof(charcount_t) == sizeof(uint32));
  69. //// allocation = (bufLengthRequested+1) * sizeof(char16)
  70. charcount_t alloc32 = UInt32Math::AddMul< 1, sizeof(char16) >(*pBufLengthRequested);
  71. size_t allocation = HeapInfo::GetAlignedSize(alloc32);
  72. size_t size_t_newLength = allocation / sizeof(char16) - 1;
  73. charcount_t newLength = (charcount_t)size_t_newLength;
  74. Assert(newLength == size_t_newLength);
  75. Assert(newLength <= MaxLength + 1);
  76. if (newLength == MaxLength + 1)
  77. {
  78. // newLength could be MaxLength + 1 because of alignment.
  79. // In this case alloc size is 2 elements more than newLength (normally 1 elements more for NULL), that's fine.
  80. newLength = MaxLength;
  81. }
  82. Assert(newLength <= MaxLength);
  83. Data* newChunk = AllocatorNewStructPlus(TAllocator, this->alloc, allocation, Data);
  84. newChunk->u.single.buffer[newLength] = _u('\0');
  85. *pBufLengthRequested = newLength;
  86. return newChunk;
  87. }
  88. __declspec(noinline) void ExtendBuffer(charcount_t newLength)
  89. {
  90. Data *newChunk;
  91. // To maintain this->length under MaxLength, check it here/throw, this is the only place we grow the buffer.
  92. if (newLength > MaxLength)
  93. {
  94. Throw::OutOfMemory();
  95. }
  96. Assert(this->length <= MaxLength);
  97. charcount_t newLengthTryGrowPolicy = newLength + (this->length*2/3); // Note: this would never result in uint32 overflow.
  98. if (newLengthTryGrowPolicy <= MaxLength)
  99. {
  100. newLength = newLengthTryGrowPolicy;
  101. }
  102. Assert(newLength <= MaxLength);
  103. // We already have linked chunks
  104. if (this->IsChained() || (this->firstChunk != NULL && newLength - this->length > MaxRealloc))
  105. {
  106. newChunk = this->NewChainedChunk(newLength - this->count);
  107. if (this->IsChained())
  108. {
  109. this->lastChunk->u.chained.next = newChunk;
  110. // We're not going to use the extra space in the current chunk...
  111. Assert(this->lastChunk->u.chained.length > this->length - this->count);
  112. this->lastChunk->u.chained.length -= (this->length - this->count);
  113. }
  114. else
  115. {
  116. // Time to add our first linked chunk
  117. Assert(this->secondChunk == NULL);
  118. this->secondChunk = newChunk;
  119. // We're not going to use the extra space in the current chunk...
  120. this->firstChunkLength = this->count;
  121. }
  122. this->length = this->count + newChunk->u.chained.length;
  123. this->lastChunk = newChunk;
  124. this->appendPtr = newChunk->u.chained.buffer;
  125. }
  126. else
  127. {
  128. if (this->initialSize < MaxLength)
  129. {
  130. newLength = max(newLength, this->initialSize + 1);
  131. }
  132. else
  133. {
  134. newLength = MaxLength;
  135. }
  136. Assert(newLength <= MaxLength);
  137. // Let's just grow the current chunk in place
  138. newChunk = this->NewSingleChunk(&newLength);
  139. if (this->count)
  140. {
  141. js_memcpy_s(newChunk->u.single.buffer, newLength * sizeof(char16), this->firstChunk->u.single.buffer, sizeof(char16) * this->count);
  142. }
  143. this->firstChunk = this->lastChunk = newChunk;
  144. this->firstChunkLength = newLength;
  145. this->length = newLength;
  146. this->appendPtr = newChunk->u.single.buffer + this->count;
  147. }
  148. }
  149. void EnsureBuffer(charcount_t countNeeded)
  150. {
  151. if(countNeeded == 0) return;
  152. if (countNeeded >= this->length - this->count)
  153. {
  154. if (countNeeded > MaxLength)
  155. {
  156. // Check upfront to prevent potential uint32 overflow caused by (this->count + countNeeded + 1).
  157. Throw::OutOfMemory();
  158. }
  159. ExtendBuffer(this->count + countNeeded + 1);
  160. }
  161. }
  162. public:
  163. static StringBuilder<TAllocator> *
  164. New(TAllocator* alloc, charcount_t initialSize)
  165. {
  166. if (initialSize > MaxLength)
  167. {
  168. Throw::OutOfMemory();
  169. }
  170. return AllocatorNew(TAllocator, alloc, StringBuilder<TAllocator>, alloc, initialSize);
  171. }
  172. StringBuilder(TAllocator* alloc)
  173. {
  174. new (this) StringBuilder(alloc, 0);
  175. }
  176. StringBuilder(TAllocator* alloc, charcount_t initialSize) : alloc(alloc), length(0), count(0), firstChunk(NULL),
  177. secondChunk(NULL), appendPtr(NULL), initialSize(initialSize)
  178. {
  179. if (initialSize > MaxLength)
  180. {
  181. Throw::OutOfMemory();
  182. }
  183. }
  184. void UnChain(__out __ecount(bufLen) char16 *pBuf, charcount_t bufLen)
  185. {
  186. charcount_t lastChunkCount = this->count;
  187. Assert(this->IsChained());
  188. Assert(bufLen >= this->count);
  189. char16 *pSrcBuf = this->firstChunk->u.single.buffer;
  190. Data *next = this->secondChunk;
  191. charcount_t srcLength = this->firstChunkLength;
  192. for (Data *chunk = this->firstChunk; chunk != this->lastChunk; next = chunk->u.chained.next)
  193. {
  194. if (bufLen < srcLength)
  195. {
  196. Throw::FatalInternalError();
  197. }
  198. js_memcpy_s(pBuf, bufLen * sizeof(char16), pSrcBuf, sizeof(char16) * srcLength);
  199. bufLen -= srcLength;
  200. pBuf += srcLength;
  201. lastChunkCount -= srcLength;
  202. chunk = next;
  203. pSrcBuf = chunk->u.chained.buffer;
  204. srcLength = chunk->u.chained.length;
  205. }
  206. if (bufLen < lastChunkCount)
  207. {
  208. Throw::FatalInternalError();
  209. }
  210. js_memcpy_s(pBuf, bufLen * sizeof(char16), this->lastChunk->u.chained.buffer, sizeof(char16) * lastChunkCount);
  211. }
  212. void UnChain()
  213. {
  214. Assert(this->IsChained());
  215. charcount_t newLength = this->count;
  216. Data *newChunk = this->NewSingleChunk(&newLength);
  217. this->length = newLength;
  218. this->UnChain(newChunk->u.single.buffer, newLength);
  219. this->firstChunk = this->lastChunk = newChunk;
  220. this->secondChunk = NULL;
  221. this->appendPtr = newChunk->u.single.buffer + this->count;
  222. }
  223. void Copy(__out __ecount(bufLen) char16 *pBuf, charcount_t bufLen)
  224. {
  225. if (this->IsChained())
  226. {
  227. this->UnChain(pBuf, bufLen);
  228. }
  229. else
  230. {
  231. if (bufLen < this->count)
  232. {
  233. Throw::FatalInternalError();
  234. }
  235. js_memcpy_s(pBuf, bufLen * sizeof(char16), this->firstChunk->u.single.buffer, this->count * sizeof(char16));
  236. }
  237. }
  238. inline char16* Buffer()
  239. {
  240. if (this->IsChained())
  241. {
  242. this->UnChain();
  243. }
  244. if (this->firstChunk)
  245. {
  246. this->firstChunk->u.single.buffer[this->count] = _u('\0');
  247. return this->firstChunk->u.single.buffer;
  248. }
  249. else
  250. {
  251. return _u("");
  252. }
  253. }
  254. inline charcount_t Count() { return this->count; }
  255. void Append(char16 c)
  256. {
  257. if (this->count == this->length)
  258. {
  259. ExtendBuffer(this->length+1);
  260. }
  261. *(this->appendPtr++) = c;
  262. this->count++;
  263. }
  264. void AppendSz(const char16 * str)
  265. {
  266. // WARNING!!
  267. // Do not use this to append JavascriptStrings. They can have embedded
  268. // nulls which obviously won't be handled correctly here. Instead use
  269. // Append with a length, which will use memcpy and correctly include any
  270. // embedded null characters.
  271. // WARNING!!
  272. while (*str != _u('\0'))
  273. {
  274. Append(*str++);
  275. }
  276. }
  277. void Append(const char16 * str, charcount_t countNeeded)
  278. {
  279. EnsureBuffer(countNeeded);
  280. char16 *dst = this->appendPtr;
  281. JavascriptString::CopyHelper(dst, str, countNeeded);
  282. this->appendPtr += countNeeded;
  283. this->count += countNeeded;
  284. }
  285. template <size_t N>
  286. void AppendCppLiteral(const char16(&str)[N])
  287. {
  288. // Need to account for the terminating null character in C++ string literals, hence N > 2 and N - 1 below
  289. static_assert(N > 2, "Use Append(char16) for appending literal single characters and do not append empty string literal");
  290. Append(str, N - 1);
  291. }
  292. // If we expect str to be large - we should just use this version that uses memcpy directly instead of Append
  293. void AppendLarge(const char16 * str, charcount_t countNeeded)
  294. {
  295. EnsureBuffer(countNeeded);
  296. char16 *dst = this->appendPtr;
  297. js_memcpy_s(dst, sizeof(WCHAR) * countNeeded, str, sizeof(WCHAR) * countNeeded);
  298. this->appendPtr += countNeeded;
  299. this->count += countNeeded;
  300. }
  301. errno_t AppendUint64(unsigned __int64 value)
  302. {
  303. const int max_length = 20; // maximum length of 64-bit value converted to base 10 string
  304. const int radix = 10;
  305. WCHAR buf[max_length+1];
  306. errno_t result = _ui64tow_s(value, buf, max_length+1, radix);
  307. AssertMsg(result==0, "Failed to translate value to string");
  308. if (result == 0)
  309. {
  310. AppendSz(buf);
  311. }
  312. return result;
  313. }
  314. char16 *AllocBufferSpace(charcount_t countNeeded)
  315. {
  316. EnsureBuffer(countNeeded);
  317. return this->appendPtr;
  318. }
  319. void IncreaseCount(charcount_t countInc)
  320. {
  321. if(countInc == 0) return;
  322. this->count += countInc;
  323. this->appendPtr += countInc;
  324. Assert(this->count < this->length);
  325. }
  326. char16* Detach()
  327. {
  328. // NULL terminate the string
  329. Append(_u('\0'));
  330. // if there is a chain we need to account for that also, so that the new buffer will have the NULL at the end.
  331. if (this->IsChained())
  332. {
  333. this->UnChain();
  334. }
  335. // Now decrement the count to adjust according to number of chars
  336. this->count--;
  337. char16* result = this->firstChunk->u.single.buffer;
  338. this->firstChunk = this->lastChunk = NULL;
  339. return result;
  340. }
  341. void TrimTrailingNULL()
  342. {
  343. Assert(this->count);
  344. if (this->IsChained())
  345. {
  346. Assert(this->lastChunk->u.chained.buffer[this->count - (this->length - this->lastChunk->u.chained.length) - 1] == _u('\0'));
  347. }
  348. else
  349. {
  350. Assert(this->lastChunk->u.single.buffer[this->count - 1] == _u('\0'));
  351. }
  352. this->appendPtr--;
  353. this->count--;
  354. }
  355. void Reset()
  356. {
  357. this->length = 0;
  358. this->count = 0;
  359. this->firstChunk = NULL;
  360. this->secondChunk = NULL;
  361. this->lastChunk = NULL;
  362. }
  363. };
  364. }