JSONScanner.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "RuntimeLibraryPch.h"
  6. #include "JSONScanner.h"
  7. using namespace Js;
  8. namespace JSON
  9. {
  10. // -------- Scanner implementation ------------//
  11. JSONScanner::JSONScanner()
  12. : inputText(0), inputLen(0), pToken(0), stringBuffer(0), allocator(0), allocatorObject(0),
  13. currentRangeCharacterPairList(0), stringBufferLength(0), currentIndex(0)
  14. {
  15. }
  16. void JSONScanner::Finalizer()
  17. {
  18. // All dynamic memory allocated by this object is on the arena - either the one this object owns or by the
  19. // one shared with JSON parser - here we will deallocate ours. The others will be deallocated when JSONParser
  20. // goes away which should happen right after this.
  21. if (this->allocatorObject != nullptr)
  22. {
  23. // We created our own allocator, so we have to free it
  24. this->scriptContext->ReleaseTemporaryGuestAllocator(allocatorObject);
  25. }
  26. }
  27. void JSONScanner::Init(const char16* input, uint len, Token* pOutToken, Js::ScriptContext* sc, const char16* current, ArenaAllocator* allocator)
  28. {
  29. // Note that allocator could be nullptr from JSONParser, if we could not reuse an allocator, keep our own
  30. inputText = input;
  31. currentChar = current;
  32. inputLen = len;
  33. pToken = pOutToken;
  34. scriptContext = sc;
  35. this->allocator = allocator;
  36. }
  37. tokens JSONScanner::Scan()
  38. {
  39. pTokenString = currentChar;
  40. while (currentChar < inputText + inputLen)
  41. {
  42. switch(ReadNextChar())
  43. {
  44. case 0:
  45. //EOF
  46. currentChar--;
  47. return (pToken->tk = tkEOF);
  48. case '\t':
  49. case '\r':
  50. case '\n':
  51. case ' ':
  52. //WS - keep looping
  53. break;
  54. case '"':
  55. //check for string
  56. return ScanString();
  57. case '0':
  58. case '1':
  59. case '2':
  60. case '3':
  61. case '4':
  62. case '5':
  63. case '6':
  64. case '7':
  65. case '8':
  66. case '9':
  67. //decimal digit starts a number
  68. {
  69. currentChar--;
  70. // we use StrToDbl() here for compat with the rest of the engine. StrToDbl() accept a larger syntax.
  71. // Verify first the JSON grammar.
  72. const char16* saveCurrentChar = currentChar;
  73. if(!IsJSONNumber())
  74. {
  75. ThrowSyntaxError(JSERR_JsonBadNumber);
  76. }
  77. currentChar = saveCurrentChar;
  78. double val;
  79. const char16* end;
  80. val = Js::NumberUtilities::StrToDbl(currentChar, &end, scriptContext);
  81. if(currentChar == end)
  82. {
  83. ThrowSyntaxError(JSERR_JsonBadNumber);
  84. }
  85. AssertMsg(!Js::JavascriptNumber::IsNan(val), "Bad result from string to double conversion");
  86. pToken->tk = tkFltCon;
  87. pToken->SetDouble(val, false);
  88. currentChar = end;
  89. return tkFltCon;
  90. }
  91. case ',':
  92. return (pToken->tk = tkComma);
  93. case ':':
  94. return (pToken->tk = tkColon);
  95. case '[':
  96. return (pToken->tk = tkLBrack);
  97. case ']':
  98. return (pToken->tk = tkRBrack);
  99. case '-':
  100. return (pToken->tk = tkSub);
  101. case 'n':
  102. //check for 'null'
  103. if (currentChar + 2 < inputText + inputLen && currentChar[0] == 'u' && currentChar[1] == 'l' && currentChar[2] == 'l')
  104. {
  105. currentChar += 3;
  106. return (pToken->tk = tkNULL);
  107. }
  108. ThrowSyntaxError(JSERR_JsonIllegalChar);
  109. case 't':
  110. //check for 'true'
  111. if (currentChar + 2 < inputText + inputLen && currentChar[0] == 'r' && currentChar[1] == 'u' && currentChar[2] == 'e')
  112. {
  113. currentChar += 3;
  114. return (pToken->tk = tkTRUE);
  115. }
  116. ThrowSyntaxError(JSERR_JsonIllegalChar);
  117. case 'f':
  118. //check for 'false'
  119. if (currentChar + 3 < inputText + inputLen && currentChar[0] == 'a' && currentChar[1] == 'l' && currentChar[2] == 's' && currentChar[3] == 'e')
  120. {
  121. currentChar += 4;
  122. return (pToken->tk = tkFALSE);
  123. }
  124. ThrowSyntaxError(JSERR_JsonIllegalChar);
  125. case '{':
  126. return (pToken->tk = tkLCurly);
  127. case '}':
  128. return (pToken->tk = tkRCurly);
  129. default:
  130. ThrowSyntaxError(JSERR_JsonIllegalChar);
  131. }
  132. }
  133. return (pToken->tk = tkEOF);
  134. }
  135. bool JSONScanner::IsJSONNumber()
  136. {
  137. bool firstDigitIsAZero = false;
  138. if (PeekNextChar() == '0')
  139. {
  140. firstDigitIsAZero = true;
  141. currentChar++;
  142. }
  143. //partial verification of number JSON grammar.
  144. while (currentChar < inputText + inputLen)
  145. {
  146. switch(ReadNextChar())
  147. {
  148. case 0:
  149. return false;
  150. case '0':
  151. case '1':
  152. case '2':
  153. case '3':
  154. case '4':
  155. case '5':
  156. case '6':
  157. case '7':
  158. case '8':
  159. case '9':
  160. if (firstDigitIsAZero)
  161. {
  162. return false;
  163. }
  164. break;
  165. case '.':
  166. {
  167. // at least one digit after '.'
  168. if(currentChar < inputText + inputLen)
  169. {
  170. char16 nch = ReadNextChar();
  171. if('0' <= nch && nch <= '9')
  172. {
  173. return true;
  174. }
  175. else
  176. {
  177. return false;
  178. }
  179. }
  180. else
  181. {
  182. return false;
  183. }
  184. }
  185. //case 'E':
  186. //case 'e':
  187. // return true;
  188. default:
  189. return true;
  190. }
  191. firstDigitIsAZero = false;
  192. }
  193. return true;
  194. }
  195. tokens JSONScanner::ScanString()
  196. {
  197. char16 ch;
  198. this->currentIndex = 0;
  199. this->currentString = const_cast<char16*>(currentChar);
  200. bool endFound = false;
  201. bool isStringDirectInputTextMapped = true;
  202. LPCWSTR bulkStart = currentChar;
  203. uint bulkLength = 0;
  204. while (currentChar < inputText + inputLen)
  205. {
  206. ch = ReadNextChar();
  207. int tempHex;
  208. if (ch == '"')
  209. {
  210. //end of the string
  211. endFound = true;
  212. break;
  213. }
  214. else if (ch <= 0x1F)
  215. {
  216. //JSON doesn't accept \u0000 - \u001f range, LS(\u2028) and PS(\u2029) are ok
  217. ThrowSyntaxError(JSERR_JsonIllegalChar);
  218. }
  219. else if ( 0 == ch )
  220. {
  221. currentChar--;
  222. ThrowSyntaxError(JSERR_JsonNoStrEnd);
  223. }
  224. else if ('\\' == ch)
  225. {
  226. //JSON escape sequence in a string \", \/, \\, \b, \f, \n, \r, \t, unicode seq
  227. // unlikely V5.8 regular chars are not escaped, i.e '\g'' in a string is illegal not 'g'
  228. if (currentChar >= inputText + inputLen )
  229. {
  230. ThrowSyntaxError(JSERR_JsonNoStrEnd);
  231. }
  232. ch = ReadNextChar();
  233. switch (ch)
  234. {
  235. case 0:
  236. currentChar--;
  237. ThrowSyntaxError(JSERR_JsonNoStrEnd);
  238. case '"':
  239. case '/':
  240. case '\\':
  241. //keep ch
  242. break;
  243. case 'b':
  244. ch = 0x08;
  245. break;
  246. case 'f':
  247. ch = 0x0C;
  248. break;
  249. case 'n':
  250. ch = 0x0A;
  251. break;
  252. case 'r':
  253. ch = 0x0D;
  254. break;
  255. case 't':
  256. ch = 0x09;
  257. break;
  258. case 'u':
  259. {
  260. int chcode;
  261. // 4 hex digits
  262. if (currentChar + 3 >= inputText + inputLen)
  263. {
  264. //no room left for 4 hex chars
  265. ThrowSyntaxError(JSERR_JsonNoStrEnd);
  266. }
  267. if (!Js::NumberUtilities::FHexDigit((WCHAR)ReadNextChar(), &tempHex))
  268. {
  269. ThrowSyntaxError(JSERR_JsonBadHexDigit);
  270. }
  271. chcode = tempHex * 0x1000;
  272. if (!Js::NumberUtilities::FHexDigit((WCHAR)ReadNextChar(), &tempHex))
  273. {
  274. ThrowSyntaxError(JSERR_JsonBadHexDigit);
  275. }
  276. chcode += tempHex * 0x0100;
  277. if (!Js::NumberUtilities::FHexDigit((WCHAR)ReadNextChar(), &tempHex))
  278. {
  279. ThrowSyntaxError(JSERR_JsonBadHexDigit);
  280. }
  281. chcode += tempHex * 0x0010;
  282. if (!Js::NumberUtilities::FHexDigit((WCHAR)ReadNextChar(), &tempHex))
  283. {
  284. ThrowSyntaxError(JSERR_JsonBadHexDigit);
  285. }
  286. chcode += tempHex;
  287. AssertMsg(chcode == (chcode & 0xFFFF), "Bad unicode code");
  288. ch = (char16)chcode;
  289. }
  290. break;
  291. default:
  292. // Any other '\o' is an error in JSON
  293. ThrowSyntaxError(JSERR_JsonIllegalChar);
  294. }
  295. // flush
  296. this->GetCurrentRangeCharacterPairList()->Add(RangeCharacterPair((uint)(bulkStart - inputText), bulkLength, ch));
  297. uint oldIndex = currentIndex;
  298. currentIndex += bulkLength;
  299. currentIndex++;
  300. if (currentIndex < oldIndex)
  301. {
  302. // Overflow
  303. Js::Throw::OutOfMemory();
  304. }
  305. // mark the mode as 'string transformed' (no direct mapping in inputText possible)
  306. isStringDirectInputTextMapped = false;
  307. // reset (to next char)
  308. bulkStart = currentChar;
  309. bulkLength = 0;
  310. }
  311. else
  312. {
  313. // continue
  314. bulkLength++;
  315. }
  316. }
  317. if (!endFound)
  318. {
  319. // no ending '"' found
  320. ThrowSyntaxError(JSERR_JsonNoStrEnd);
  321. }
  322. if (isStringDirectInputTextMapped == false)
  323. {
  324. // If the last bulk is not ended with an escape character, make sure that is
  325. // not built into the final unescaped string
  326. bool shouldSkipLastCharacter = false;
  327. if (bulkLength > 0)
  328. {
  329. shouldSkipLastCharacter = true;
  330. this->GetCurrentRangeCharacterPairList()->Add(RangeCharacterPair((uint)(bulkStart - inputText), bulkLength, _u('\0')));
  331. uint oldIndex = currentIndex;
  332. currentIndex += bulkLength;
  333. if (currentIndex < oldIndex)
  334. {
  335. // Overflow
  336. Js::Throw::OutOfMemory();
  337. }
  338. }
  339. this->BuildUnescapedString(shouldSkipLastCharacter);
  340. this->GetCurrentRangeCharacterPairList()->Clear();
  341. this->currentString = this->stringBuffer;
  342. }
  343. else
  344. {
  345. // make currentIndex the length (w/o the \0)
  346. currentIndex = bulkLength;
  347. OUTPUT_TRACE_DEBUGONLY(Js::JSONPhase, _u("ScanString(): direct-mapped string as '%.*s'\n"),
  348. GetCurrentStringLen(), GetCurrentString());
  349. }
  350. return (pToken->tk = tkStrCon);
  351. }
  352. void JSONScanner::BuildUnescapedString(bool shouldSkipLastCharacter)
  353. {
  354. AssertMsg(this->allocator != nullptr, "We must have built the allocator");
  355. AssertMsg(this->currentRangeCharacterPairList != nullptr, "We must have built the currentRangeCharacterPairList");
  356. AssertMsg(this->currentRangeCharacterPairList->Count() > 0, "We need to build the current string only because we have escaped characters");
  357. // Step 1: Ensure the buffer has sufficient space
  358. int requiredSize = this->GetCurrentStringLen();
  359. if (requiredSize > this->stringBufferLength)
  360. {
  361. if (this->stringBuffer)
  362. {
  363. AdeleteArray(this->allocator, this->stringBufferLength, this->stringBuffer);
  364. this->stringBuffer = nullptr;
  365. }
  366. this->stringBuffer = AnewArray(this->allocator, char16, requiredSize);
  367. this->stringBufferLength = requiredSize;
  368. }
  369. // Step 2: Copy the data to the buffer
  370. int totalCopied = 0;
  371. char16* begin_copy = this->stringBuffer;
  372. int lastCharacterIndex = this->currentRangeCharacterPairList->Count() - 1;
  373. for (int i = 0; i <= lastCharacterIndex; i++)
  374. {
  375. RangeCharacterPair data = this->currentRangeCharacterPairList->Item(i);
  376. int charactersToCopy = data.m_rangeLength;
  377. js_wmemcpy_s(begin_copy, charactersToCopy, this->inputText + data.m_rangeStart, charactersToCopy);
  378. begin_copy += charactersToCopy;
  379. totalCopied += charactersToCopy;
  380. if (i == lastCharacterIndex && shouldSkipLastCharacter)
  381. {
  382. continue;
  383. }
  384. *begin_copy = data.m_char;
  385. begin_copy++;
  386. totalCopied++;
  387. }
  388. if (totalCopied != requiredSize)
  389. {
  390. OUTPUT_TRACE_DEBUGONLY(Js::JSONPhase, _u("BuildUnescapedString(): allocated size = %d != copying size %d\n"), requiredSize, totalCopied);
  391. AssertMsg(totalCopied == requiredSize, "BuildUnescapedString(): The allocated size and copying size should match.");
  392. }
  393. OUTPUT_TRACE_DEBUGONLY(Js::JSONPhase, _u("BuildUnescapedString(): unescaped string as '%.*s'\n"), GetCurrentStringLen(), this->stringBuffer);
  394. }
  395. JSONScanner::RangeCharacterPairList* JSONScanner::GetCurrentRangeCharacterPairList(void)
  396. {
  397. if (this->currentRangeCharacterPairList == nullptr)
  398. {
  399. if (this->allocator == nullptr)
  400. {
  401. this->allocatorObject = this->scriptContext->GetTemporaryGuestAllocator(_u("JSONScanner"));
  402. this->allocator = this->allocatorObject->GetAllocator();
  403. }
  404. this->currentRangeCharacterPairList = Anew(this->allocator, RangeCharacterPairList, this->allocator, 4);
  405. }
  406. return this->currentRangeCharacterPairList;
  407. }
  408. } // namespace JSON