SunnyMirror
/
ChakraCore
зеркало из https://github.com/microsoft/ChakraCore.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
							//-------------------------------------------------------------------------------------------------------
// Copyright (C) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
//-------------------------------------------------------------------------------------------------------
#include "RuntimeLibraryPch.h"

namespace Js
{
    Var UriHelper::EncodeCoreURI(ScriptContext* scriptContext, Arguments& args, unsigned char flags )
    {
        AssertMsg(args.Info.Count > 0, "Should always have implicit 'this'");

        JavascriptString * strURI;
        //TODO make sure this string is pinned when the memory recycler is in
        if(args.Info.Count < 2)
        {
            strURI = scriptContext->GetLibrary()->GetUndefinedDisplayString();
        }
        else
        {

            if (JavascriptString::Is(args[1]))
            {
                strURI = JavascriptString::FromVar(args[1]);
            }
            else
            {
                strURI = JavascriptConversion::ToString(args[1], scriptContext);
            }
        }

        return Encode(strURI, flags, scriptContext);
    }

    unsigned char UriHelper::s_uriProps[128] =
    {
        //0x00  0x01  0x02  0x03  0x04  0x05  0x06  0x07  0x08  0x09  0x0a  0x0b  0x0c  0x0d  0x0e  0x0f
        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
        //0x10  0x11  0x12  0x13  0x14  0x15  0x16  0x17  0x18  0x19  0x1a  0x1b  0x1c  0x1d  0x1e  0x1f
        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
        //         !     "     #     $     %     &     '     (     )     *     +     ,     -     .     /
        0, 0x02,    0, 0x01, 0x01,    0, 0x01, 0x02, 0x02, 0x02, 0x02, 0x01, 0x01, 0x02, 0x02, 0x01,
        //   0     1     2     3     4     5     6     7     8     9     :     ;     <     =     >     ?
        0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x01, 0x01,    0, 0x01,    0, 0x01,
        //   @     A     B     C     D     E     F     G     H     I     J     K     L     M     N     O
        0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
        //   P     Q     R     S     T     U     V     W     X     Y     Z     [     \     ]     ^     _
        0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,    0,    0,    0,    0, 0x02,
        //   `     a     b     c     d     e     f     g     h     i     j     k     l     m     n     o
        0, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
        //   p     q     r     s     t     u     v     w     x     y     z     {     |     }     ~  0x7f
        0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,    0,    0,    0, 0x02,    0,
    };

    // Convert 'uVal' to it's UTF-8 encoding in the array 'bUTF8'. Returns
    // the number of characters in the output array.
    // This routine assumes that it's input 'uVal' is a valid Unicode code-point value
    // and does no error checking.
    uint32 UriHelper::ToUTF8( uint32 uVal, BYTE bUTF8[MaxUTF8Len])
    {
        uint32 uRet;
        if( uVal <= 0x007F )
        {
            bUTF8[0] = (BYTE)uVal;
            uRet = 1;
        }
        else if( uVal <= 0x07FF )
        {
            uint32 z = uVal & 0x3F;
            uint32 y = uVal >> 6;
            bUTF8[0] = (BYTE) (0xC0 | y);
            bUTF8[1] = (BYTE) (0x80 | z);
            uRet = 2;
        }
        else if( uVal <= 0xFFFF )
        {
            Assert( uVal <= 0xD7FF || uVal >= 0xE000 );
            uint32 z = uVal & 0x3F;
            uint32 y = (uVal >> 6) & 0x3F;
            uint32 x = (uVal >> 12);
            bUTF8[0] = (BYTE) (0xE0 | x);
            bUTF8[1] = (BYTE) (0x80 | y);
            bUTF8[2] = (BYTE) (0x80 | z);
            uRet = 3;
        }
        else
        {
            uint32 z = uVal & 0x3F;
            uint32 y = (uVal >> 6) &0x3F;
            uint32 x = (uVal >> 12) &0x3F;
            uint32 w = (uVal >> 18);
            bUTF8[0] = (BYTE) (0xF0 | w);
            bUTF8[1] = (BYTE) (0x80 | x);
            bUTF8[2] = (BYTE) (0x80 | y);
            bUTF8[3] = (BYTE) (0x80 | z);
            uRet = 4;
        }

        return uRet;
    }

    // Return the Unicode code-point value of the UTF-8 encoding passed in as the
    // array 'bUTF8'. uLen is the number of characters in the UTF-8 encoding.
    // This routine assumes that a valid UTF-8 encoding of a character is passed in
    // and does no error checking.
    uint32 UriHelper::FromUTF8( BYTE bUTF8[MaxUTF8Len], uint32 uLen )
    {
        Assert( 1 <= uLen && uLen <= MaxUTF8Len );
        if( uLen == 1 )
        {
            return bUTF8[0];
        }
        else if( uLen == 2 )
        {
            return ((bUTF8[0] & 0x1F) << 6 ) | (bUTF8[1] & 0x3F);
        }
        else if( uLen == 3 )
        {
            return ((bUTF8[0] & 0x0F) << 12) | ((bUTF8[1] & 0x3F) << 6) | (bUTF8[2] & 0x3F);
        }
        else
        {
            Assert( uLen == 4 );
            return ((bUTF8[0] & 0x07) << 18) | ((bUTF8[1] & 0x3F) << 12) | ((bUTF8[2] & 0x3F) << 6 ) | (bUTF8[3] & 0x3F) ;
        }
    }

    // The Encode algorithm described in sec. 15.1.3 of the spec. The input string is
    // 'strURI' and the Unescaped set is described by the flags 'unescapedFlags'. The
    // output is a string var.
    Var UriHelper::Encode(JavascriptString* strURI, unsigned char unescapedFlags, ScriptContext* scriptContext )
    {
        charcount_t len = strURI->GetLength();
        __in_ecount(len) const char16* input = strURI->GetString();
        bool needsChanges = false;
        BYTE bUTF8[MaxUTF8Len];

        // pass 1 calculate output length and error check
        uint32 outputLen = 0;
        for( uint32 k = 0; k < len; k++ )
        {
            char16 c = input[k];
            uint32 uVal;
            if( InURISet(c, unescapedFlags) )
            {
                outputLen = UInt32Math::Add(outputLen, 1);
            }
            else
            {
                needsChanges = true;

                if( c >= 0xDC00 && c <= 0xDFFF )
                {
                    JavascriptError::ThrowURIError(scriptContext, JSERR_URIEncodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                }
                else if( c < 0xD800 || c > 0xDBFF )
                {
                    uVal = (uint32)c;
                }
                else
                {
                    ++k;
                    if(k == len)
                    {
                        JavascriptError::ThrowURIError(scriptContext, JSERR_URIEncodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                    }
                    __analysis_assume(k < len); // because we throw exception if k==len
                    char16 c1 = input[k];
                    if( c1 < 0xDC00 || c1 > 0xDFFF )
                    {
                        JavascriptError::ThrowURIError(scriptContext, JSERR_URIEncodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                    }
                    uVal = (c - 0xD800) * 0x400 + (c1 - 0xDC00) + 0x10000;
                }
                uint32 utfLen = ToUTF8(uVal, bUTF8);
                utfLen = UInt32Math::Mul(utfLen, 3);
                outputLen = UInt32Math::Add(outputLen, utfLen);
            }
        }

        // If nothing needs encoding, then avoid extra work
        if (!needsChanges)
        {
            AssertMsg(scriptContext == strURI->GetScriptContext(), "Should have already marshaled the string in cross site thunk");
            return strURI;
        }

        //pass 2 generate the encoded URI

        uint32 allocSize = UInt32Math::Add(outputLen, 1);
        char16* outURI = RecyclerNewArrayLeaf(scriptContext->GetRecycler(), char16, allocSize);
        char16* outCurrent = outURI;
        const char16 *hexStream = _u("0123456789ABCDEF");

        for( uint32 k = 0; k < len; k++ )
        {
            char16 c = input[k];
            uint32 uVal;
            if( InURISet(c, unescapedFlags) )
            {
                __analysis_assume(outCurrent < outURI + allocSize);
                *outCurrent++ = c;
            }
            else
            {
#if DBG
                if( c >= 0xDC00 && c <= 0xDFFF )
                {
                    JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                }
#endif
                if( c < 0xD800 || c > 0xDBFF )
                {
                    uVal = (uint32)c;
                }
                else
                {
                    ++k;
#if DBG
                    if(k == len)
                    {
                        JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                    }
#endif
                    __analysis_assume(k < len);// because we throw exception if k==len
                    char16 c1 = input[k];

#if DBG
                    if( c1 < 0xDC00 || c1 > 0xDFFF )
                    {
                        JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                    }
#endif
                    uVal = (c - 0xD800) * 0x400 + (c1 - 0xDC00) + 0x10000;
                }

                uint32 utfLen = ToUTF8(uVal, bUTF8);
                for( uint32 j = 0; j < utfLen; j++ )
                {
#pragma prefast(disable: 26014, "buffer length was calculated earlier");
                    BYTE val = bUTF8[j];
                    *outCurrent++ = _u('%');
                    *outCurrent++ = hexStream[(val >> 4)];
                    *outCurrent++ = hexStream[(val & 0xF)];
#pragma prefast(default: 26014);
                }
            }
        }
        AssertMsg(outURI + outputLen == outCurrent, " URI out buffer out of sync");
        __analysis_assume(outputLen + 1 == allocSize);
        outURI[outputLen] = _u('\0');

        return JavascriptString::NewWithBuffer(outURI, outputLen, scriptContext);
    }

    Var UriHelper::DecodeCoreURI(ScriptContext* scriptContext, Arguments& args, unsigned char reservedFlags )
    {
        AssertMsg(args.Info.Count > 0, "Should always have implicit 'this'");

        JavascriptString * strURI;

        //TODO make sure this string is pinned when the memory recycler is in
        if(args.Info.Count < 2)
        {
            strURI = scriptContext->GetLibrary()->GetUndefinedDisplayString();
        }
        else
        {

            if (JavascriptString::Is(args[1]))
            {
                strURI = JavascriptString::FromVar(args[1]);
            }
            else
            {
                strURI = JavascriptConversion::ToString(args[1], scriptContext);
            }
        }

        return Decode(strURI, reservedFlags, scriptContext);
    }

    // The Decode algorithm described in sec. 15.1.3 of the spec. The input string is
    // 'strURI' and the Reserved set is described by the flags 'reservedFlags'. The
    // output is a string var.
    Var UriHelper::Decode(JavascriptString* strURI, unsigned char reservedFlags, ScriptContext* scriptContext)
    {
        charcount_t len = strURI->GetLength();
        __in_ecount(len) const char16* input = strURI->GetString();
        bool needsChanges = false;
        char16 c1;
        char16 c;
        // pass 1 calculate output length and error check
        uint32 outputLen = 0;
        for( uint32 k = 0; k < len; k++ )
        {
            c = input[k];

            if( c == '%')
            {
                needsChanges = true;

                uint32 start = k;
                if( k + 2 >= len )
                {
                    JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                }

                // %-encoded components in a URI may only contain hexadecimal digits from the ASCII character set. 'swscanf_s'
                // only supports those characters when decoding hexadecimal integers. 'iswxdigit' on the other hand, uses the
                // current locale to see if the specified character maps to a hexadecimal digit, which causes it to consider some
                // characters outside the ASCII character set to be hexadecimal digits, so we can't use that. 'swscanf_s' seems
                // to be overkill for this, so using a simple function that parses two hex digits and produces their value.
                BYTE b;
                if(!DecodeByteFromHex(input[k + 1], input[k + 2], b))
                {
                    JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError);
                }

                k += 2;

                if( (b & 0x80) ==  0)
                {
                    c1 = b;
                }
                else
                {
                    int n;
                    for( n = 1; ((b << n) & 0x80) != 0; n++ )
                        ;

                    if( n == 1 || n > UriHelper::MaxUTF8Len )
                    {
                        JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                    }

                    BYTE bOctets[UriHelper::MaxUTF8Len];
                    bOctets[0] = b;

                    if( k + 3 * (n-1) >= len )
                    {
                        JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                    }

                    for( int j = 1; j < n; j++ )
                    {
                        if( input[++k] != '%' )
                        {
                            JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                        }

                        if(!DecodeByteFromHex(input[k + 1], input[k + 2], b))
                        {
                            JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                        }

                        // The two leading bits should be 10 for a valid UTF-8 encoding
                        if( (b & 0xC0) != 0x80)
                        {
                            JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                        }
                        k += 2;

                        bOctets[j] = b;
                    }

                    uint32 uVal = UriHelper::FromUTF8( bOctets, n );

                    if( uVal >= 0xD800 && uVal <= 0xDFFF)
                    {
                        JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                    }
                    if( uVal < 0x10000 )
                    {
                        c1 = (char16)uVal;
                    }
                    else if( uVal > 0x10ffff )
                    {
                        JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                    }
                    else
                    {
                        outputLen +=2;
                        continue;
                    }
                }

                if( ! UriHelper::InURISet( c1, reservedFlags ))
                {
                    outputLen++;
                }
                else
                {
                    outputLen += k - start + 1;
                }
            }
            else // c is not '%'
            {
                outputLen++;
            }
        }

        // If nothing needs decoding, then avoid extra work
        if (!needsChanges)
        {
            AssertMsg(scriptContext == strURI->GetScriptContext(), "Should have already marshaled the string in cross site thunk");
            return strURI;
        }

        //pass 2 generate the decoded URI
        uint32 allocSize = UInt32Math::Add(outputLen, 1);
        char16* outURI = RecyclerNewArrayLeaf(scriptContext->GetRecycler(), char16, allocSize);
        char16* outCurrent = outURI;


        for( uint32 k = 0; k < len; k++ )
        {
            c = input[k];
            if( c == '%')
            {
                uint32 start = k;
#if DBG
                Assert(!(k + 2 >= len));
                if( k + 2 >= len )
                {
                    JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                }
#endif
                // Let OACR know some things about 'k' that we checked just above, to let it know that we are not going to
                // overflow later. The same checks are done in the first pass in non-debug builds, and the conditions
                // checked upon in the first and second pass are the same.
                __analysis_assume(!(k + 2 >= len));

                BYTE b;
                if(!DecodeByteFromHex(input[k + 1], input[k + 2], b))
                {
#if DBG
                    AssertMsg(false, "!DecodeByteFromHex(input[k + 1], input[k + 2], b)");
                    JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
#endif
                }

                k += 2;

                if( (b & 0x80) ==  0)
                {
                    c1 = b;
                }
                else
                {
                    int n;
                    for( n = 1; ((b << n) & 0x80) != 0; n++ )
                        ;

                    if( n == 1 || n > UriHelper::MaxUTF8Len )
                    {
                        JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                    }

                    BYTE bOctets[UriHelper::MaxUTF8Len];
                    bOctets[0] = b;

#if DBG
                    Assert(!(k + 3 * (n-1) >= len));
                    if( k + 3 * (n-1) >= len )
                    {
                        JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                    }
#endif
                    // Let OACR know some things about 'k' that we checked just above, to let it know that we are not going to
                    // overflow later. The same checks are done in the first pass in non-debug builds, and the conditions
                    // checked upon in the first and second pass are the same.
                    __analysis_assume(!(k + 3 * (n-1) >= len));

                    for( int j = 1; j < n; j++ )
                    {
                        ++k;

#if DBG
                        Assert(!(input[k] != '%'));
                        if( input[k] != '%' )
                        {
                            JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                        }
#endif

                        if(!DecodeByteFromHex(input[k + 1], input[k + 2], b))
                        {
#if DBG
                            AssertMsg(false, "!DecodeByteFromHex(input[k + 1], input[k + 2], b)");
                            JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
#endif
                        }

#if DBG
                        // The two leading bits should be 10 for a valid UTF-8 encoding
                        Assert(!((b & 0xC0) != 0x80));
                        if( (b & 0xC0) != 0x80)
                        {
                            JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                        }
#endif

                        k += 2;

                        bOctets[j] = b;
                    }

                    uint32 uVal = UriHelper::FromUTF8( bOctets, n );

#if DBG
                    Assert(!(uVal >= 0xD800 && uVal <= 0xDFFF));
                    if( uVal >= 0xD800 && uVal <= 0xDFFF)
                    {
                        JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                    }
#endif

                    if( uVal < 0x10000 )
                    {
                        c1 = (char16)uVal;
                    }

#if DBG
                    else if( uVal > 0x10ffff )
                    {
                        AssertMsg(false, "uVal > 0x10ffff");
                        JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                    }
#endif
                    else
                    {
                        uint32 l = (( uVal - 0x10000) & 0x3ff) + 0xdc00;
                        uint32 h = ((( uVal - 0x10000) >> 10) & 0x3ff) + 0xd800;

                        __analysis_assume(outCurrent + 2 <= outURI + allocSize);
                        *outCurrent++ = (char16)h;
                        *outCurrent++ = (char16)l;
                        continue;
                    }
                }

                if( !UriHelper::InURISet( c1, reservedFlags ))
                {
                    __analysis_assume(outCurrent < outURI + allocSize);
                    *outCurrent++ = c1;
                }
                else
                {
                    js_memcpy_s(outCurrent, (allocSize - (outCurrent - outURI)) * sizeof(char16), &input[start], (k - start + 1)*sizeof(char16));
                    outCurrent += k - start + 1;
                }
            }
            else // c is not '%'
            {
                __analysis_assume(outCurrent < outURI + allocSize);
                *outCurrent++ = c;
            }
        }

        AssertMsg(outURI + outputLen == outCurrent, " URI out buffer out of sync");
        __analysis_assume(outputLen + 1 == allocSize);
        outURI[outputLen] = _u('\0');

        return JavascriptString::NewWithBuffer(outURI, outputLen, scriptContext);
    }

    // Decodes a two-hexadecimal-digit wide character pair into the byte value it represents
    bool UriHelper::DecodeByteFromHex(const char16 digit1, const char16 digit2, unsigned char &value)
    {
        int x;
        if(!Js::NumberUtilities::FHexDigit(digit1, &x))
        {
            return false;
        }
        Assert(static_cast<unsigned int>(x) <= 0xfU);
        value = static_cast<unsigned char>(x) << 4;

        if(!Js::NumberUtilities::FHexDigit(digit2, &x))
        {
            return false;
        }
        Assert(static_cast<unsigned int>(x) <= 0xfU);
        value += static_cast<unsigned char>(x);

        return true;
    }
}