| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588 |
- //-------------------------------------------------------------------------------------------------------
- // Copyright (C) Microsoft. All rights reserved.
- // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
- //-------------------------------------------------------------------------------------------------------
- #include "RuntimeLibraryPch.h"
- namespace Js
- {
- Var UriHelper::EncodeCoreURI(ScriptContext* scriptContext, Arguments& args, unsigned char flags )
- {
- AssertMsg(args.Info.Count > 0, "Should always have implicit 'this'");
- JavascriptString * strURI;
- //TODO make sure this string is pinned when the memory recycler is in
- if(args.Info.Count < 2)
- {
- strURI = scriptContext->GetLibrary()->GetUndefinedDisplayString();
- }
- else
- {
- if (JavascriptString::Is(args[1]))
- {
- strURI = JavascriptString::FromVar(args[1]);
- }
- else
- {
- strURI = JavascriptConversion::ToString(args[1], scriptContext);
- }
- }
- return Encode(strURI, flags, scriptContext);
- }
- unsigned char UriHelper::s_uriProps[128] =
- {
- //0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0a 0x0b 0x0c 0x0d 0x0e 0x0f
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- //0x10 0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19 0x1a 0x1b 0x1c 0x1d 0x1e 0x1f
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- // ! " # $ % & ' ( ) * + , - . /
- 0, 0x02, 0, 0x01, 0x01, 0, 0x01, 0x02, 0x02, 0x02, 0x02, 0x01, 0x01, 0x02, 0x02, 0x01,
- // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
- 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x01, 0x01, 0, 0x01, 0, 0x01,
- // @ A B C D E F G H I J K L M N O
- 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
- // P Q R S T U V W X Y Z [ \ ] ^ _
- 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0, 0, 0, 0, 0x02,
- // ` a b c d e f g h i j k l m n o
- 0, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
- // p q r s t u v w x y z { | } ~ 0x7f
- 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0, 0, 0, 0x02, 0,
- };
- // Convert 'uVal' to it's UTF-8 encoding in the array 'bUTF8'. Returns
- // the number of characters in the output array.
- // This routine assumes that it's input 'uVal' is a valid Unicode code-point value
- // and does no error checking.
- uint32 UriHelper::ToUTF8( uint32 uVal, BYTE bUTF8[MaxUTF8Len])
- {
- uint32 uRet;
- if( uVal <= 0x007F )
- {
- bUTF8[0] = (BYTE)uVal;
- uRet = 1;
- }
- else if( uVal <= 0x07FF )
- {
- uint32 z = uVal & 0x3F;
- uint32 y = uVal >> 6;
- bUTF8[0] = (BYTE) (0xC0 | y);
- bUTF8[1] = (BYTE) (0x80 | z);
- uRet = 2;
- }
- else if( uVal <= 0xFFFF )
- {
- Assert( uVal <= 0xD7FF || uVal >= 0xE000 );
- uint32 z = uVal & 0x3F;
- uint32 y = (uVal >> 6) & 0x3F;
- uint32 x = (uVal >> 12);
- bUTF8[0] = (BYTE) (0xE0 | x);
- bUTF8[1] = (BYTE) (0x80 | y);
- bUTF8[2] = (BYTE) (0x80 | z);
- uRet = 3;
- }
- else
- {
- uint32 z = uVal & 0x3F;
- uint32 y = (uVal >> 6) &0x3F;
- uint32 x = (uVal >> 12) &0x3F;
- uint32 w = (uVal >> 18);
- bUTF8[0] = (BYTE) (0xF0 | w);
- bUTF8[1] = (BYTE) (0x80 | x);
- bUTF8[2] = (BYTE) (0x80 | y);
- bUTF8[3] = (BYTE) (0x80 | z);
- uRet = 4;
- }
- return uRet;
- }
- // Return the Unicode code-point value of the UTF-8 encoding passed in as the
- // array 'bUTF8'. uLen is the number of characters in the UTF-8 encoding.
- // This routine assumes that a valid UTF-8 encoding of a character is passed in
- // and does no error checking.
- uint32 UriHelper::FromUTF8( BYTE bUTF8[MaxUTF8Len], uint32 uLen )
- {
- Assert( 1 <= uLen && uLen <= MaxUTF8Len );
- if( uLen == 1 )
- {
- return bUTF8[0];
- }
- else if( uLen == 2 )
- {
- return ((bUTF8[0] & 0x1F) << 6 ) | (bUTF8[1] & 0x3F);
- }
- else if( uLen == 3 )
- {
- return ((bUTF8[0] & 0x0F) << 12) | ((bUTF8[1] & 0x3F) << 6) | (bUTF8[2] & 0x3F);
- }
- else
- {
- Assert( uLen == 4 );
- return ((bUTF8[0] & 0x07) << 18) | ((bUTF8[1] & 0x3F) << 12) | ((bUTF8[2] & 0x3F) << 6 ) | (bUTF8[3] & 0x3F) ;
- }
- }
- // The Encode algorithm described in sec. 15.1.3 of the spec. The input string is
- // 'strURI' and the Unescaped set is described by the flags 'unescapedFlags'. The
- // output is a string var.
- Var UriHelper::Encode(JavascriptString* strURI, unsigned char unescapedFlags, ScriptContext* scriptContext )
- {
- charcount_t len = strURI->GetLength();
- __in_ecount(len) const char16* input = strURI->GetString();
- bool needsChanges = false;
- BYTE bUTF8[MaxUTF8Len];
- // pass 1 calculate output length and error check
- uint32 outputLen = 0;
- for( uint32 k = 0; k < len; k++ )
- {
- char16 c = input[k];
- uint32 uVal;
- if( InURISet(c, unescapedFlags) )
- {
- outputLen = UInt32Math::Add(outputLen, 1);
- }
- else
- {
- needsChanges = true;
- if( c >= 0xDC00 && c <= 0xDFFF )
- {
- JavascriptError::ThrowURIError(scriptContext, JSERR_URIEncodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- else if( c < 0xD800 || c > 0xDBFF )
- {
- uVal = (uint32)c;
- }
- else
- {
- ++k;
- if(k == len)
- {
- JavascriptError::ThrowURIError(scriptContext, JSERR_URIEncodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- __analysis_assume(k < len); // because we throw exception if k==len
- char16 c1 = input[k];
- if( c1 < 0xDC00 || c1 > 0xDFFF )
- {
- JavascriptError::ThrowURIError(scriptContext, JSERR_URIEncodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- uVal = (c - 0xD800) * 0x400 + (c1 - 0xDC00) + 0x10000;
- }
- uint32 utfLen = ToUTF8(uVal, bUTF8);
- utfLen = UInt32Math::Mul(utfLen, 3);
- outputLen = UInt32Math::Add(outputLen, utfLen);
- }
- }
- // If nothing needs encoding, then avoid extra work
- if (!needsChanges)
- {
- AssertMsg(scriptContext == strURI->GetScriptContext(), "Should have already marshaled the string in cross site thunk");
- return strURI;
- }
- //pass 2 generate the encoded URI
- uint32 allocSize = UInt32Math::Add(outputLen, 1);
- char16* outURI = RecyclerNewArrayLeaf(scriptContext->GetRecycler(), char16, allocSize);
- char16* outCurrent = outURI;
- const char16 *hexStream = _u("0123456789ABCDEF");
- for( uint32 k = 0; k < len; k++ )
- {
- char16 c = input[k];
- uint32 uVal;
- if( InURISet(c, unescapedFlags) )
- {
- __analysis_assume(outCurrent < outURI + allocSize);
- *outCurrent++ = c;
- }
- else
- {
- #if DBG
- if( c >= 0xDC00 && c <= 0xDFFF )
- {
- JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- #endif
- if( c < 0xD800 || c > 0xDBFF )
- {
- uVal = (uint32)c;
- }
- else
- {
- ++k;
- #if DBG
- if(k == len)
- {
- JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- #endif
- __analysis_assume(k < len);// because we throw exception if k==len
- char16 c1 = input[k];
- #if DBG
- if( c1 < 0xDC00 || c1 > 0xDFFF )
- {
- JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- #endif
- uVal = (c - 0xD800) * 0x400 + (c1 - 0xDC00) + 0x10000;
- }
- uint32 utfLen = ToUTF8(uVal, bUTF8);
- for( uint32 j = 0; j < utfLen; j++ )
- {
- #pragma prefast(disable: 26014, "buffer length was calculated earlier");
- BYTE val = bUTF8[j];
- *outCurrent++ = _u('%');
- *outCurrent++ = hexStream[(val >> 4)];
- *outCurrent++ = hexStream[(val & 0xF)];
- #pragma prefast(default: 26014);
- }
- }
- }
- AssertMsg(outURI + outputLen == outCurrent, " URI out buffer out of sync");
- __analysis_assume(outputLen + 1 == allocSize);
- outURI[outputLen] = _u('\0');
- return JavascriptString::NewWithBuffer(outURI, outputLen, scriptContext);
- }
- Var UriHelper::DecodeCoreURI(ScriptContext* scriptContext, Arguments& args, unsigned char reservedFlags )
- {
- AssertMsg(args.Info.Count > 0, "Should always have implicit 'this'");
- JavascriptString * strURI;
- //TODO make sure this string is pinned when the memory recycler is in
- if(args.Info.Count < 2)
- {
- strURI = scriptContext->GetLibrary()->GetUndefinedDisplayString();
- }
- else
- {
- if (JavascriptString::Is(args[1]))
- {
- strURI = JavascriptString::FromVar(args[1]);
- }
- else
- {
- strURI = JavascriptConversion::ToString(args[1], scriptContext);
- }
- }
- return Decode(strURI, reservedFlags, scriptContext);
- }
- // The Decode algorithm described in sec. 15.1.3 of the spec. The input string is
- // 'strURI' and the Reserved set is described by the flags 'reservedFlags'. The
- // output is a string var.
- Var UriHelper::Decode(JavascriptString* strURI, unsigned char reservedFlags, ScriptContext* scriptContext)
- {
- charcount_t len = strURI->GetLength();
- __in_ecount(len) const char16* input = strURI->GetString();
- bool needsChanges = false;
- char16 c1;
- char16 c;
- // pass 1 calculate output length and error check
- uint32 outputLen = 0;
- for( uint32 k = 0; k < len; k++ )
- {
- c = input[k];
- if( c == '%')
- {
- needsChanges = true;
- uint32 start = k;
- if( k + 2 >= len )
- {
- JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- // %-encoded components in a URI may only contain hexadecimal digits from the ASCII character set. 'swscanf_s'
- // only supports those characters when decoding hexadecimal integers. 'iswxdigit' on the other hand, uses the
- // current locale to see if the specified character maps to a hexadecimal digit, which causes it to consider some
- // characters outside the ASCII character set to be hexadecimal digits, so we can't use that. 'swscanf_s' seems
- // to be overkill for this, so using a simple function that parses two hex digits and produces their value.
- BYTE b;
- if(!DecodeByteFromHex(input[k + 1], input[k + 2], b))
- {
- JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError);
- }
- k += 2;
- if( (b & 0x80) == 0)
- {
- c1 = b;
- }
- else
- {
- int n;
- for( n = 1; ((b << n) & 0x80) != 0; n++ )
- ;
- if( n == 1 || n > UriHelper::MaxUTF8Len )
- {
- JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- BYTE bOctets[UriHelper::MaxUTF8Len];
- bOctets[0] = b;
- if( k + 3 * (n-1) >= len )
- {
- JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- for( int j = 1; j < n; j++ )
- {
- if( input[++k] != '%' )
- {
- JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- if(!DecodeByteFromHex(input[k + 1], input[k + 2], b))
- {
- JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- // The two leading bits should be 10 for a valid UTF-8 encoding
- if( (b & 0xC0) != 0x80)
- {
- JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- k += 2;
- bOctets[j] = b;
- }
- uint32 uVal = UriHelper::FromUTF8( bOctets, n );
- if( uVal >= 0xD800 && uVal <= 0xDFFF)
- {
- JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- if( uVal < 0x10000 )
- {
- c1 = (char16)uVal;
- }
- else if( uVal > 0x10ffff )
- {
- JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- else
- {
- outputLen +=2;
- continue;
- }
- }
- if( ! UriHelper::InURISet( c1, reservedFlags ))
- {
- outputLen++;
- }
- else
- {
- outputLen += k - start + 1;
- }
- }
- else // c is not '%'
- {
- outputLen++;
- }
- }
- // If nothing needs decoding, then avoid extra work
- if (!needsChanges)
- {
- AssertMsg(scriptContext == strURI->GetScriptContext(), "Should have already marshaled the string in cross site thunk");
- return strURI;
- }
- //pass 2 generate the decoded URI
- uint32 allocSize = UInt32Math::Add(outputLen, 1);
- char16* outURI = RecyclerNewArrayLeaf(scriptContext->GetRecycler(), char16, allocSize);
- char16* outCurrent = outURI;
- for( uint32 k = 0; k < len; k++ )
- {
- c = input[k];
- if( c == '%')
- {
- uint32 start = k;
- #if DBG
- Assert(!(k + 2 >= len));
- if( k + 2 >= len )
- {
- JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- #endif
- // Let OACR know some things about 'k' that we checked just above, to let it know that we are not going to
- // overflow later. The same checks are done in the first pass in non-debug builds, and the conditions
- // checked upon in the first and second pass are the same.
- __analysis_assume(!(k + 2 >= len));
- BYTE b;
- if(!DecodeByteFromHex(input[k + 1], input[k + 2], b))
- {
- #if DBG
- AssertMsg(false, "!DecodeByteFromHex(input[k + 1], input[k + 2], b)");
- JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
- #endif
- }
- k += 2;
- if( (b & 0x80) == 0)
- {
- c1 = b;
- }
- else
- {
- int n;
- for( n = 1; ((b << n) & 0x80) != 0; n++ )
- ;
- if( n == 1 || n > UriHelper::MaxUTF8Len )
- {
- JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- BYTE bOctets[UriHelper::MaxUTF8Len];
- bOctets[0] = b;
- #if DBG
- Assert(!(k + 3 * (n-1) >= len));
- if( k + 3 * (n-1) >= len )
- {
- JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- #endif
- // Let OACR know some things about 'k' that we checked just above, to let it know that we are not going to
- // overflow later. The same checks are done in the first pass in non-debug builds, and the conditions
- // checked upon in the first and second pass are the same.
- __analysis_assume(!(k + 3 * (n-1) >= len));
- for( int j = 1; j < n; j++ )
- {
- ++k;
- #if DBG
- Assert(!(input[k] != '%'));
- if( input[k] != '%' )
- {
- JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- #endif
- if(!DecodeByteFromHex(input[k + 1], input[k + 2], b))
- {
- #if DBG
- AssertMsg(false, "!DecodeByteFromHex(input[k + 1], input[k + 2], b)");
- JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
- #endif
- }
- #if DBG
- // The two leading bits should be 10 for a valid UTF-8 encoding
- Assert(!((b & 0xC0) != 0x80));
- if( (b & 0xC0) != 0x80)
- {
- JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- #endif
- k += 2;
- bOctets[j] = b;
- }
- uint32 uVal = UriHelper::FromUTF8( bOctets, n );
- #if DBG
- Assert(!(uVal >= 0xD800 && uVal <= 0xDFFF));
- if( uVal >= 0xD800 && uVal <= 0xDFFF)
- {
- JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- #endif
- if( uVal < 0x10000 )
- {
- c1 = (char16)uVal;
- }
- #if DBG
- else if( uVal > 0x10ffff )
- {
- AssertMsg(false, "uVal > 0x10ffff");
- JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
- }
- #endif
- else
- {
- uint32 l = (( uVal - 0x10000) & 0x3ff) + 0xdc00;
- uint32 h = ((( uVal - 0x10000) >> 10) & 0x3ff) + 0xd800;
- __analysis_assume(outCurrent + 2 <= outURI + allocSize);
- *outCurrent++ = (char16)h;
- *outCurrent++ = (char16)l;
- continue;
- }
- }
- if( !UriHelper::InURISet( c1, reservedFlags ))
- {
- __analysis_assume(outCurrent < outURI + allocSize);
- *outCurrent++ = c1;
- }
- else
- {
- js_memcpy_s(outCurrent, (allocSize - (outCurrent - outURI)) * sizeof(char16), &input[start], (k - start + 1)*sizeof(char16));
- outCurrent += k - start + 1;
- }
- }
- else // c is not '%'
- {
- __analysis_assume(outCurrent < outURI + allocSize);
- *outCurrent++ = c;
- }
- }
- AssertMsg(outURI + outputLen == outCurrent, " URI out buffer out of sync");
- __analysis_assume(outputLen + 1 == allocSize);
- outURI[outputLen] = _u('\0');
- return JavascriptString::NewWithBuffer(outURI, outputLen, scriptContext);
- }
- // Decodes a two-hexadecimal-digit wide character pair into the byte value it represents
- bool UriHelper::DecodeByteFromHex(const char16 digit1, const char16 digit2, unsigned char &value)
- {
- int x;
- if(!Js::NumberUtilities::FHexDigit(digit1, &x))
- {
- return false;
- }
- Assert(static_cast<unsigned int>(x) <= 0xfU);
- value = static_cast<unsigned char>(x) << 4;
- if(!Js::NumberUtilities::FHexDigit(digit2, &x))
- {
- return false;
- }
- Assert(static_cast<unsigned int>(x) <= 0xfU);
- value += static_cast<unsigned char>(x);
- return true;
- }
- }
|