| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473 |
- //-------------------------------------------------------------------------------------------------------
- // Copyright (C) Microsoft. All rights reserved.
- // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
- //-------------------------------------------------------------------------------------------------------
- #include "RuntimeLibraryPch.h"
- #include "JSONScanner.h"
- using namespace Js;
- namespace JSON
- {
- // -------- Scanner implementation ------------//
- JSONScanner::JSONScanner()
- : inputText(0), inputLen(0), pToken(0), stringBuffer(0), allocator(0), allocatorObject(0),
- currentRangeCharacterPairList(0), stringBufferLength(0), currentIndex(0)
- {
- }
- void JSONScanner::Finalizer()
- {
- // All dynamic memory allocated by this object is on the arena - either the one this object owns or by the
- // one shared with JSON parser - here we will deallocate ours. The others will be deallocated when JSONParser
- // goes away which should happen right after this.
- if (this->allocatorObject != nullptr)
- {
- // We created our own allocator, so we have to free it
- this->scriptContext->ReleaseTemporaryGuestAllocator(allocatorObject);
- }
- }
- void JSONScanner::Init(const char16* input, uint len, Token* pOutToken, Js::ScriptContext* sc, const char16* current, ArenaAllocator* allocator)
- {
- // Note that allocator could be nullptr from JSONParser, if we could not reuse an allocator, keep our own
- inputText = input;
- currentChar = current;
- inputLen = len;
- pToken = pOutToken;
- scriptContext = sc;
- this->allocator = allocator;
- }
- tokens JSONScanner::Scan()
- {
- pTokenString = currentChar;
- while (currentChar < inputText + inputLen)
- {
- switch(ReadNextChar())
- {
- case 0:
- //EOF
- currentChar--;
- return (pToken->tk = tkEOF);
- case '\t':
- case '\r':
- case '\n':
- case ' ':
- //WS - keep looping
- break;
- case '"':
- //check for string
- return ScanString();
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- //decimal digit starts a number
- {
- currentChar--;
- // we use StrToDbl() here for compat with the rest of the engine. StrToDbl() accept a larger syntax.
- // Verify first the JSON grammar.
- const char16* saveCurrentChar = currentChar;
- if(!IsJSONNumber())
- {
- ThrowSyntaxError(JSERR_JsonBadNumber);
- }
- currentChar = saveCurrentChar;
- double val;
- const char16* end;
- val = Js::NumberUtilities::StrToDbl(currentChar, &end, scriptContext);
- if(currentChar == end)
- {
- ThrowSyntaxError(JSERR_JsonBadNumber);
- }
- AssertMsg(!Js::JavascriptNumber::IsNan(val), "Bad result from string to double conversion");
- pToken->tk = tkFltCon;
- pToken->SetDouble(val, false);
- currentChar = end;
- return tkFltCon;
- }
- case ',':
- return (pToken->tk = tkComma);
- case ':':
- return (pToken->tk = tkColon);
- case '[':
- return (pToken->tk = tkLBrack);
- case ']':
- return (pToken->tk = tkRBrack);
- case '-':
- return (pToken->tk = tkSub);
- case 'n':
- //check for 'null'
- if (currentChar + 2 < inputText + inputLen && currentChar[0] == 'u' && currentChar[1] == 'l' && currentChar[2] == 'l')
- {
- currentChar += 3;
- return (pToken->tk = tkNULL);
- }
- ThrowSyntaxError(JSERR_JsonIllegalChar);
- case 't':
- //check for 'true'
- if (currentChar + 2 < inputText + inputLen && currentChar[0] == 'r' && currentChar[1] == 'u' && currentChar[2] == 'e')
- {
- currentChar += 3;
- return (pToken->tk = tkTRUE);
- }
- ThrowSyntaxError(JSERR_JsonIllegalChar);
- case 'f':
- //check for 'false'
- if (currentChar + 3 < inputText + inputLen && currentChar[0] == 'a' && currentChar[1] == 'l' && currentChar[2] == 's' && currentChar[3] == 'e')
- {
- currentChar += 4;
- return (pToken->tk = tkFALSE);
- }
- ThrowSyntaxError(JSERR_JsonIllegalChar);
- case '{':
- return (pToken->tk = tkLCurly);
- case '}':
- return (pToken->tk = tkRCurly);
- default:
- ThrowSyntaxError(JSERR_JsonIllegalChar);
- }
- }
- return (pToken->tk = tkEOF);
- }
- bool JSONScanner::IsJSONNumber()
- {
- bool firstDigitIsAZero = false;
- if (PeekNextChar() == '0')
- {
- firstDigitIsAZero = true;
- currentChar++;
- }
- //partial verification of number JSON grammar.
- while (currentChar < inputText + inputLen)
- {
- switch(ReadNextChar())
- {
- case 0:
- return false;
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- if (firstDigitIsAZero)
- {
- return false;
- }
- break;
- case '.':
- {
- // at least one digit after '.'
- if(currentChar < inputText + inputLen)
- {
- char16 nch = ReadNextChar();
- if('0' <= nch && nch <= '9')
- {
- return true;
- }
- else
- {
- return false;
- }
- }
- else
- {
- return false;
- }
- }
- //case 'E':
- //case 'e':
- // return true;
- default:
- return true;
- }
- firstDigitIsAZero = false;
- }
- return true;
- }
- tokens JSONScanner::ScanString()
- {
- char16 ch;
- this->currentIndex = 0;
- this->currentString = const_cast<char16*>(currentChar);
- bool endFound = false;
- bool isStringDirectInputTextMapped = true;
- LPCWSTR bulkStart = currentChar;
- uint bulkLength = 0;
- while (currentChar < inputText + inputLen)
- {
- ch = ReadNextChar();
- int tempHex;
- if (ch == '"')
- {
- //end of the string
- endFound = true;
- break;
- }
- else if (ch <= 0x1F)
- {
- //JSON doesn't accept \u0000 - \u001f range, LS(\u2028) and PS(\u2029) are ok
- ThrowSyntaxError(JSERR_JsonIllegalChar);
- }
- else if ( 0 == ch )
- {
- currentChar--;
- ThrowSyntaxError(JSERR_JsonNoStrEnd);
- }
- else if ('\\' == ch)
- {
- //JSON escape sequence in a string \", \/, \\, \b, \f, \n, \r, \t, unicode seq
- // unlikely V5.8 regular chars are not escaped, i.e '\g'' in a string is illegal not 'g'
- if (currentChar >= inputText + inputLen )
- {
- ThrowSyntaxError(JSERR_JsonNoStrEnd);
- }
- ch = ReadNextChar();
- switch (ch)
- {
- case 0:
- currentChar--;
- ThrowSyntaxError(JSERR_JsonNoStrEnd);
- case '"':
- case '/':
- case '\\':
- //keep ch
- break;
- case 'b':
- ch = 0x08;
- break;
- case 'f':
- ch = 0x0C;
- break;
- case 'n':
- ch = 0x0A;
- break;
- case 'r':
- ch = 0x0D;
- break;
- case 't':
- ch = 0x09;
- break;
- case 'u':
- {
- int chcode;
- // 4 hex digits
- if (currentChar + 3 >= inputText + inputLen)
- {
- //no room left for 4 hex chars
- ThrowSyntaxError(JSERR_JsonNoStrEnd);
- }
- if (!Js::NumberUtilities::FHexDigit((WCHAR)ReadNextChar(), &tempHex))
- {
- ThrowSyntaxError(JSERR_JsonBadHexDigit);
- }
- chcode = tempHex * 0x1000;
- if (!Js::NumberUtilities::FHexDigit((WCHAR)ReadNextChar(), &tempHex))
- {
- ThrowSyntaxError(JSERR_JsonBadHexDigit);
- }
- chcode += tempHex * 0x0100;
- if (!Js::NumberUtilities::FHexDigit((WCHAR)ReadNextChar(), &tempHex))
- {
- ThrowSyntaxError(JSERR_JsonBadHexDigit);
- }
- chcode += tempHex * 0x0010;
- if (!Js::NumberUtilities::FHexDigit((WCHAR)ReadNextChar(), &tempHex))
- {
- ThrowSyntaxError(JSERR_JsonBadHexDigit);
- }
- chcode += tempHex;
- AssertMsg(chcode == (chcode & 0xFFFF), "Bad unicode code");
- ch = (char16)chcode;
- }
- break;
- default:
- // Any other '\o' is an error in JSON
- ThrowSyntaxError(JSERR_JsonIllegalChar);
- }
- // flush
- this->GetCurrentRangeCharacterPairList()->Add(RangeCharacterPair((uint)(bulkStart - inputText), bulkLength, ch));
- uint oldIndex = currentIndex;
- currentIndex += bulkLength;
- currentIndex++;
- if (currentIndex < oldIndex)
- {
- // Overflow
- Js::Throw::OutOfMemory();
- }
- // mark the mode as 'string transformed' (no direct mapping in inputText possible)
- isStringDirectInputTextMapped = false;
- // reset (to next char)
- bulkStart = currentChar;
- bulkLength = 0;
- }
- else
- {
- // continue
- bulkLength++;
- }
- }
- if (!endFound)
- {
- // no ending '"' found
- ThrowSyntaxError(JSERR_JsonNoStrEnd);
- }
- if (isStringDirectInputTextMapped == false)
- {
- // If the last bulk is not ended with an escape character, make sure that is
- // not built into the final unescaped string
- bool shouldSkipLastCharacter = false;
- if (bulkLength > 0)
- {
- shouldSkipLastCharacter = true;
- this->GetCurrentRangeCharacterPairList()->Add(RangeCharacterPair((uint)(bulkStart - inputText), bulkLength, _u('\0')));
- uint oldIndex = currentIndex;
- currentIndex += bulkLength;
- if (currentIndex < oldIndex)
- {
- // Overflow
- Js::Throw::OutOfMemory();
- }
- }
- this->BuildUnescapedString(shouldSkipLastCharacter);
- this->GetCurrentRangeCharacterPairList()->Clear();
- this->currentString = this->stringBuffer;
- }
- else
- {
- // make currentIndex the length (w/o the \0)
- currentIndex = bulkLength;
- OUTPUT_TRACE_DEBUGONLY(Js::JSONPhase, _u("ScanString(): direct-mapped string as '%.*s'\n"),
- GetCurrentStringLen(), GetCurrentString());
- }
- return (pToken->tk = tkStrCon);
- }
- void JSONScanner::BuildUnescapedString(bool shouldSkipLastCharacter)
- {
- AssertMsg(this->allocator != nullptr, "We must have built the allocator");
- AssertMsg(this->currentRangeCharacterPairList != nullptr, "We must have built the currentRangeCharacterPairList");
- AssertMsg(this->currentRangeCharacterPairList->Count() > 0, "We need to build the current string only because we have escaped characters");
- // Step 1: Ensure the buffer has sufficient space
- int requiredSize = this->GetCurrentStringLen();
- if (requiredSize > this->stringBufferLength)
- {
- if (this->stringBuffer)
- {
- AdeleteArray(this->allocator, this->stringBufferLength, this->stringBuffer);
- this->stringBuffer = nullptr;
- }
- this->stringBuffer = AnewArray(this->allocator, char16, requiredSize);
- this->stringBufferLength = requiredSize;
- }
- // Step 2: Copy the data to the buffer
- int totalCopied = 0;
- char16* begin_copy = this->stringBuffer;
- int lastCharacterIndex = this->currentRangeCharacterPairList->Count() - 1;
- for (int i = 0; i <= lastCharacterIndex; i++)
- {
- RangeCharacterPair data = this->currentRangeCharacterPairList->Item(i);
- int charactersToCopy = data.m_rangeLength;
- js_wmemcpy_s(begin_copy, charactersToCopy, this->inputText + data.m_rangeStart, charactersToCopy);
- begin_copy += charactersToCopy;
- totalCopied += charactersToCopy;
- if (i == lastCharacterIndex && shouldSkipLastCharacter)
- {
- continue;
- }
- *begin_copy = data.m_char;
- begin_copy++;
- totalCopied++;
- }
- if (totalCopied != requiredSize)
- {
- OUTPUT_TRACE_DEBUGONLY(Js::JSONPhase, _u("BuildUnescapedString(): allocated size = %d != copying size %d\n"), requiredSize, totalCopied);
- AssertMsg(totalCopied == requiredSize, "BuildUnescapedString(): The allocated size and copying size should match.");
- }
- OUTPUT_TRACE_DEBUGONLY(Js::JSONPhase, _u("BuildUnescapedString(): unescaped string as '%.*s'\n"), GetCurrentStringLen(), this->stringBuffer);
- }
- JSONScanner::RangeCharacterPairList* JSONScanner::GetCurrentRangeCharacterPairList(void)
- {
- if (this->currentRangeCharacterPairList == nullptr)
- {
- if (this->allocator == nullptr)
- {
- this->allocatorObject = this->scriptContext->GetTemporaryGuestAllocator(_u("JSONScanner"));
- this->allocator = this->allocatorObject->GetAllocator();
- }
- this->currentRangeCharacterPairList = Anew(this->allocator, RangeCharacterPairList, this->allocator, 4);
- }
- return this->currentRangeCharacterPairList;
- }
- } // namespace JSON
|