LineOffsetCache.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "RuntimeBasePch.h"
  6. namespace Js
  7. {
  8. int LineOffsetCache::FindLineForCharacterOffset(
  9. _In_z_ LPCUTF8 sourceStartCharacter,
  10. _In_z_ LPCUTF8 sourceEndCharacter,
  11. charcount_t &inOutLineCharOffset,
  12. charcount_t &inOutByteOffset,
  13. charcount_t characterOffset)
  14. {
  15. int lastLine = 0;
  16. while (FindNextLine(sourceStartCharacter, sourceEndCharacter, inOutLineCharOffset, inOutByteOffset, characterOffset))
  17. {
  18. lastLine++;
  19. }
  20. return lastLine;
  21. }
  22. LineOffsetCache::LineOffsetCache(Recycler* allocator,
  23. _In_z_ LPCUTF8 sourceStartCharacter,
  24. _In_z_ LPCUTF8 sourceEndCharacter,
  25. charcount_t startingCharacterOffset,
  26. charcount_t startingByteOffset) :
  27. lineCharacterOffsetCacheList(nullptr),
  28. lineByteOffsetCacheList(nullptr)
  29. {
  30. AssertMsg(allocator, "An allocator must be supplied to the cache for allocation of items.");
  31. this->BuildCache(allocator, sourceStartCharacter, sourceEndCharacter, startingCharacterOffset, startingByteOffset);
  32. }
  33. LineOffsetCache::LineOffsetCache(Recycler *allocator,
  34. _In_reads_(numberOfLines) const charcount_t *lineCharacterOffsets,
  35. _In_reads_opt_(numberOfLines) const charcount_t *lineByteOffsets,
  36. __in int numberOfLines)
  37. {
  38. this->lineCharacterOffsetCacheList = LineOffsetCacheReadOnlyList::New(allocator, (charcount_t *)lineCharacterOffsets, numberOfLines);
  39. if (lineByteOffsets)
  40. {
  41. this->lineByteOffsetCacheList = LineOffsetCacheReadOnlyList::New(allocator, (charcount_t *)lineByteOffsets, numberOfLines);
  42. }
  43. else
  44. {
  45. this->lineByteOffsetCacheList = nullptr;
  46. }
  47. }
  48. // outLineCharOffset - The character offset of the start of the line returned
  49. int LineOffsetCache::GetLineForCharacterOffset(charcount_t characterOffset, charcount_t *outLineCharOffset, charcount_t *outByteOffset)
  50. {
  51. Assert(this->lineCharacterOffsetCacheList->Count() > 0);
  52. // The list is sorted, so binary search to find the line info.
  53. int closestIndex = -1;
  54. int minRange = INT_MAX;
  55. this->lineCharacterOffsetCacheList->BinarySearch([&](const charcount_t item, int index)
  56. {
  57. int offsetRange = characterOffset - item;
  58. if (offsetRange >= 0)
  59. {
  60. if (offsetRange < minRange)
  61. {
  62. // There are potentially many lines with starting offsets greater than the one we're searching
  63. // for. As a result, we should track which index we've encountered so far that is the closest
  64. // to the offset we're looking for without going under. This will find the line that contains
  65. // the offset.
  66. closestIndex = index;
  67. minRange = offsetRange;
  68. }
  69. // Search lower to see if we can find a closer index.
  70. return -1;
  71. }
  72. else
  73. {
  74. // Search higher to get into a range that is greater than the offset.
  75. return 1;
  76. }
  77. // Note that we purposely don't return 0 (==) here. We want the search to end in failure (-1) because
  78. // we're searching for the closest element, not necessarily an exact element offset. Exact offsets
  79. // are possible when the offset we're searching for is the first character of the line, but that will
  80. // be handled by the if statement above.
  81. });
  82. if (closestIndex >= 0)
  83. {
  84. charcount_t lastItemCharacterOffset = GetCharacterOffsetForLine(closestIndex, outByteOffset);
  85. if (outLineCharOffset != nullptr)
  86. {
  87. *outLineCharOffset = lastItemCharacterOffset;
  88. }
  89. }
  90. return closestIndex;
  91. }
  92. charcount_t LineOffsetCache::GetCharacterOffsetForLine(charcount_t line, charcount_t *outByteOffset) const
  93. {
  94. AssertMsg(line < this->GetLineCount(), "Invalid line value passed in.");
  95. charcount_t characterOffset = this->lineCharacterOffsetCacheList->Item(line);
  96. if (outByteOffset != nullptr)
  97. {
  98. *outByteOffset = this->lineByteOffsetCacheList? this->lineByteOffsetCacheList->Item(line) : characterOffset;
  99. }
  100. return characterOffset;
  101. }
  102. uint32 LineOffsetCache::GetLineCount() const
  103. {
  104. AssertMsg(this->lineCharacterOffsetCacheList != nullptr, "The list was either not set from the ByteCode or not created.");
  105. return this->lineCharacterOffsetCacheList->Count();
  106. }
  107. const charcount_t * LineOffsetCache::GetLineCharacterOffsetBuffer() const
  108. {
  109. return this->lineCharacterOffsetCacheList->GetBuffer();
  110. }
  111. const charcount_t * LineOffsetCache::GetLineByteOffsetBuffer() const
  112. {
  113. return this->lineByteOffsetCacheList ? this->lineByteOffsetCacheList->GetBuffer() : nullptr;
  114. }
  115. bool LineOffsetCache::FindNextLine(_In_z_ LPCUTF8 &currentSourcePosition, _In_z_ LPCUTF8 sourceEndCharacter, charcount_t &inOutCharacterOffset, charcount_t &inOutByteOffset, charcount_t maxCharacterOffset)
  116. {
  117. charcount_t currentCharacterOffset = inOutCharacterOffset;
  118. charcount_t currentByteOffset = inOutByteOffset;
  119. utf8::DecodeOptions options = utf8::doAllowThreeByteSurrogates;
  120. while (currentSourcePosition < sourceEndCharacter)
  121. {
  122. LPCUTF8 previousCharacter = currentSourcePosition;
  123. // Decode from UTF8 to wide char. Note that Decode will advance the current character by 1 at least.
  124. char16 decodedCharacter = utf8::Decode(currentSourcePosition, sourceEndCharacter, options);
  125. bool wasLineEncountered = false;
  126. switch (decodedCharacter)
  127. {
  128. case _u('\r'):
  129. // Check if the next character is a '\n'. If so, consume that character as well
  130. // (consider as one line).
  131. if (*currentSourcePosition == '\n')
  132. {
  133. ++currentSourcePosition;
  134. ++currentCharacterOffset;
  135. }
  136. // Intentional fall-through.
  137. case _u('\n'):
  138. case 0x2028:
  139. case 0x2029:
  140. // Found a new line.
  141. wasLineEncountered = true;
  142. break;
  143. }
  144. // Move to the next character offset.
  145. ++currentCharacterOffset;
  146. // Count the current byte offset we're at in the UTF-8 buffer.
  147. // The character size can be > 1 for unicode characters.
  148. currentByteOffset += static_cast<int>(currentSourcePosition - previousCharacter);
  149. if (wasLineEncountered)
  150. {
  151. inOutCharacterOffset = currentCharacterOffset;
  152. inOutByteOffset = currentByteOffset;
  153. return true;
  154. }
  155. else if (currentCharacterOffset >= maxCharacterOffset)
  156. {
  157. return false;
  158. }
  159. }
  160. return false;
  161. }
  162. // Builds the cache of line offsets from the passed in source.
  163. void LineOffsetCache::BuildCache(Recycler * allocator, _In_z_ LPCUTF8 sourceStartCharacter,
  164. _In_z_ LPCUTF8 sourceEndCharacter,
  165. charcount_t startingCharacterOffset,
  166. charcount_t startingByteOffset)
  167. {
  168. AssertMsg(sourceStartCharacter, "The source start character passed in is null.");
  169. AssertMsg(sourceEndCharacter, "The source end character passed in is null.");
  170. AssertMsg(sourceStartCharacter <= sourceEndCharacter, "The source start character should not be beyond the source end character.");
  171. AssertMsg(!this->lineCharacterOffsetCacheList, "The cache is already built.");
  172. this->lineCharacterOffsetCacheList = RecyclerNew(allocator, LineOffsetCacheList, allocator);
  173. // Add the first line in the cache list.
  174. this->AddLine(allocator, startingCharacterOffset, startingByteOffset);
  175. while (FindNextLine(sourceStartCharacter, sourceEndCharacter, startingCharacterOffset, startingByteOffset))
  176. {
  177. this->AddLine(allocator, startingCharacterOffset, startingByteOffset);
  178. }
  179. }
  180. // Tracks a new line offset in the cache.
  181. void LineOffsetCache::AddLine(Recycler * allocator, charcount_t characterOffset, charcount_t byteOffset)
  182. {
  183. LineOffsetCacheList * characterOffsetList = (LineOffsetCacheList *)(LineOffsetCacheReadOnlyList*)this->lineCharacterOffsetCacheList;
  184. characterOffsetList->Add(characterOffset);
  185. LineOffsetCacheList * byteOffsetList = (LineOffsetCacheList *)(LineOffsetCacheReadOnlyList*)this->lineByteOffsetCacheList;
  186. if (byteOffsetList == nullptr && characterOffset != byteOffset)
  187. {
  188. byteOffsetList = RecyclerNew(allocator, LineOffsetCacheList, allocator);
  189. byteOffsetList->Copy(characterOffsetList);
  190. this->lineByteOffsetCacheList = byteOffsetList;
  191. }
  192. else if (byteOffsetList != nullptr)
  193. {
  194. byteOffsetList->Add(byteOffset);
  195. }
  196. #if DBG
  197. Assert(this->lineByteOffsetCacheList == nullptr || this->lineByteOffsetCacheList->Count() == this->lineCharacterOffsetCacheList->Count());
  198. if (this->lineCharacterOffsetCacheList->Count() > 1)
  199. {
  200. // Ensure that the list remains sorted during insertion.
  201. charcount_t previousCharacterOffset = this->lineCharacterOffsetCacheList->Item(this->lineCharacterOffsetCacheList->Count() - 2);
  202. AssertMsg(characterOffset > previousCharacterOffset, "The character offsets must be inserted in increasing order per line.");
  203. if (this->lineByteOffsetCacheList != nullptr)
  204. {
  205. charcount_t previousByteOffset = this->lineByteOffsetCacheList->Item(this->lineByteOffsetCacheList->Count() - 2);
  206. AssertMsg(byteOffset > previousByteOffset, "The byte offsets must be inserted in increasing order per line.");
  207. }
  208. }
  209. #endif // DBG
  210. }
  211. }