Răsfoiți Sursa

[MERGE #5540 @sethbrenith] Avoid copying strings in (de|en)codeURI(Component)? and trim

Merge pull request #5540 from sethbrenith:user/sethb/encode

* The last step of Encode and Decode was copying the buffer for no reason; skip that copy
* Often the input to Encode or Decode doesn't require any transformation, so return the original string in that case
* Likewise, avoid creating new substrings for trimmed strings if nothing needs to be trimmed

Related to OS:17754314
Seth Brenith 7 ani în urmă
părinte
comite
22ef6955fb

+ 7 - 0
lib/Runtime/Library/JavascriptString.cpp

@@ -2472,6 +2472,13 @@ case_2:
                 Assert(idxEnd >= 0);
             }
         }
+
+        if (idxStart == 0 && idxEnd == len - 1)
+        {
+            AssertMsg(scriptContext == arg->GetScriptContext(), "Should have already marshaled the string in cross site thunk");
+            return arg;
+        }
+
         return SubstringCore(arg, idxStart, idxEnd - idxStart + 1, scriptContext);
     }
 

+ 32 - 8
lib/Runtime/Library/UriHelper.cpp

@@ -29,7 +29,7 @@ namespace Js
             }
         }
 
-        return Encode(strURI->GetString(), strURI->GetLength(), flags, scriptContext);
+        return Encode(strURI, flags, scriptContext);
     }
 
     unsigned char UriHelper::s_uriProps[128] =
@@ -126,10 +126,13 @@ namespace Js
     }
 
     // The Encode algorithm described in sec. 15.1.3 of the spec. The input string is
-    // 'input' and the Unescaped set is described by the flags 'unescapedFlags'. The
+    // 'strURI' and the Unescaped set is described by the flags 'unescapedFlags'. The
     // output is a string var.
-    Var UriHelper::Encode(__in_ecount(len) const  char16* input, uint32 len, unsigned char unescapedFlags, ScriptContext* scriptContext )
+    Var UriHelper::Encode(JavascriptString* strURI, unsigned char unescapedFlags, ScriptContext* scriptContext )
     {
+        charcount_t len = strURI->GetLength();
+        __in_ecount(len) const char16* input = strURI->GetString();
+        bool needsChanges = false;
         BYTE bUTF8[MaxUTF8Len];
 
         // pass 1 calculate output length and error check
@@ -144,6 +147,8 @@ namespace Js
             }
             else
             {
+                needsChanges = true;
+
                 if( c >= 0xDC00 && c <= 0xDFFF )
                 {
                     JavascriptError::ThrowURIError(scriptContext, JSERR_URIEncodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
@@ -173,6 +178,13 @@ namespace Js
             }
         }
 
+        // If nothing needs encoding, then avoid extra work
+        if (!needsChanges)
+        {
+            AssertMsg(scriptContext == strURI->GetScriptContext(), "Should have already marshaled the string in cross site thunk");
+            return strURI;
+        }
+
         //pass 2 generate the encoded URI
 
         uint32 allocSize = UInt32Math::Add(outputLen, 1);
@@ -238,7 +250,7 @@ namespace Js
         __analysis_assume(outputLen + 1 == allocSize);
         outURI[outputLen] = _u('\0');
 
-        return JavascriptString::NewCopyBuffer(outURI, outputLen, scriptContext);
+        return JavascriptString::NewWithBuffer(outURI, outputLen, scriptContext);
     }
 
     Var UriHelper::DecodeCoreURI(ScriptContext* scriptContext, Arguments& args, unsigned char reservedFlags )
@@ -265,14 +277,17 @@ namespace Js
             }
         }
 
-        return Decode(strURI->GetString(), strURI->GetLength(), reservedFlags, scriptContext);
+        return Decode(strURI, reservedFlags, scriptContext);
     }
 
     // The Decode algorithm described in sec. 15.1.3 of the spec. The input string is
-    // 'input' and the Reserved set is described by the flags 'reservedFlags'. The
+    // 'strURI' and the Reserved set is described by the flags 'reservedFlags'. The
     // output is a string var.
-    Var UriHelper::Decode(__in_ecount(len) const char16* input, uint32 len, unsigned char reservedFlags, ScriptContext* scriptContext)
+    Var UriHelper::Decode(JavascriptString* strURI, unsigned char reservedFlags, ScriptContext* scriptContext)
     {
+        charcount_t len = strURI->GetLength();
+        __in_ecount(len) const char16* input = strURI->GetString();
+        bool needsChanges = false;
         char16 c1;
         char16 c;
         // pass 1 calculate output length and error check
@@ -283,6 +298,8 @@ namespace Js
 
             if( c == '%')
             {
+                needsChanges = true;
+
                 uint32 start = k;
                 if( k + 2 >= len )
                 {
@@ -383,6 +400,13 @@ namespace Js
             }
         }
 
+        // If nothing needs decoding, then avoid extra work
+        if (!needsChanges)
+        {
+            AssertMsg(scriptContext == strURI->GetScriptContext(), "Should have already marshaled the string in cross site thunk");
+            return strURI;
+        }
+
         //pass 2 generate the decoded URI
         uint32 allocSize = UInt32Math::Add(outputLen, 1);
         char16* outURI = RecyclerNewArrayLeaf(scriptContext->GetRecycler(), char16, allocSize);
@@ -538,7 +562,7 @@ namespace Js
         __analysis_assume(outputLen + 1 == allocSize);
         outURI[outputLen] = _u('\0');
 
-        return JavascriptString::NewCopyBuffer(outURI, outputLen, scriptContext);
+        return JavascriptString::NewWithBuffer(outURI, outputLen, scriptContext);
     }
 
     // Decodes a two-hexadecimal-digit wide character pair into the byte value it represents

+ 2 - 2
lib/Runtime/Library/UriHelper.h

@@ -18,7 +18,7 @@ namespace Js
         };
 
         static Var EncodeCoreURI(ScriptContext* scriptContext, Arguments& args, unsigned char flags);
-        static Var Decode(__in_ecount(len) const char16* psz, uint32 len, unsigned char reservedFlags, ScriptContext* scriptContext);
+        static Var Decode(JavascriptString* strURI, unsigned char reservedFlags, ScriptContext* scriptContext);
         static Var DecodeCoreURI(ScriptContext* scriptContext, Arguments& args, unsigned char reservedFlags);
 
         static unsigned char s_uriProps[128];
@@ -82,7 +82,7 @@ namespace Js
         static const uint32 MaxUTF8Len = 4;
         static uint32 ToUTF8( uint32 uVal, BYTE bUTF8[MaxUTF8Len]);
         static uint32 FromUTF8( BYTE bUTF8[MaxUTF8Len], uint32 uLen );
-        static Var Encode(__in_ecount(len) const char16* psz, uint32 len, unsigned char unescapedFlags, ScriptContext* scriptContext );
+        static Var Encode(JavascriptString* strURI, unsigned char unescapedFlags, ScriptContext* scriptContext );
 
     private:
         static bool DecodeByteFromHex(const char16 digit1, const char16 digit2, unsigned char &value);