LCOV - code coverage report
Current view: top level - src - uri.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 180 187 96.3 %
Date: 2017-10-20 Functions: 20 23 87.0 %

          Line data    Source code
       1             : // Copyright 2016 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/uri.h"
       6             : 
       7             : #include <vector>
       8             : 
       9             : #include "src/char-predicates-inl.h"
      10             : #include "src/handles.h"
      11             : #include "src/isolate-inl.h"
      12             : #include "src/string-search.h"
      13             : #include "src/unicode-inl.h"
      14             : 
      15             : namespace v8 {
      16             : namespace internal {
      17             : 
      18             : namespace {  // anonymous namespace for DecodeURI helper functions
      19             : bool IsReservedPredicate(uc16 c) {
      20             :   switch (c) {
      21             :     case '#':
      22             :     case '$':
      23             :     case '&':
      24             :     case '+':
      25             :     case ',':
      26             :     case '/':
      27             :     case ':':
      28             :     case ';':
      29             :     case '=':
      30             :     case '?':
      31             :     case '@':
      32             :       return true;
      33             :     default:
      34             :       return false;
      35             :   }
      36             : }
      37             : 
      38             : bool IsReplacementCharacter(const uint8_t* octets, int length) {
      39             :   // The replacement character is at codepoint U+FFFD in the Unicode Specials
      40             :   // table. Its UTF-8 encoding is 0xEF 0xBF 0xBD.
      41         286 :   if (length != 3 || octets[0] != 0xef || octets[1] != 0xbf ||
      42          18 :       octets[2] != 0xbd) {
      43             :     return false;
      44             :   }
      45             :   return true;
      46             : }
      47             : 
      48        5596 : bool DecodeOctets(const uint8_t* octets, int length,
      49             :                   std::vector<uc16>* buffer) {
      50        5596 :   size_t cursor = 0;
      51        5596 :   uc32 value = unibrow::Utf8::ValueOf(octets, length, &cursor);
      52        5864 :   if (value == unibrow::Utf8::kBadChar &&
      53             :       !IsReplacementCharacter(octets, length)) {
      54             :     return false;
      55             :   }
      56             : 
      57        5346 :   if (value <= static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
      58       10008 :     buffer->push_back(value);
      59             :   } else {
      60         684 :     buffer->push_back(unibrow::Utf16::LeadSurrogate(value));
      61         684 :     buffer->push_back(unibrow::Utf16::TrailSurrogate(value));
      62             :   }
      63             :   return true;
      64             : }
      65             : 
      66     8142884 : int TwoDigitHex(uc16 character1, uc16 character2) {
      67     8142884 :   if (character1 > 'f') return -1;
      68     8142464 :   int high = HexValue(character1);
      69     8142464 :   if (high == -1) return -1;
      70     8142264 :   if (character2 > 'f') return -1;
      71     8142204 :   int low = HexValue(character2);
      72     8142204 :   if (low == -1) return -1;
      73     8142024 :   return (high << 4) + low;
      74             : }
      75             : 
      76             : template <typename T>
      77     3276956 : void AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int index,
      78             :                  bool is_uri, std::vector<T>* buffer) {
      79     3276956 :   if (is_uri && IsReservedPredicate(decoded)) {
      80           0 :     buffer->push_back('%');
      81           0 :     uc16 first = uri_content->Get(index + 1);
      82           0 :     uc16 second = uri_content->Get(index + 2);
      83             :     DCHECK_GT(std::numeric_limits<T>::max(), first);
      84             :     DCHECK_GT(std::numeric_limits<T>::max(), second);
      85             : 
      86           0 :     buffer->push_back(first);
      87           0 :     buffer->push_back(second);
      88             :   } else {
      89     6553902 :     buffer->push_back(decoded);
      90             :   }
      91     3276956 : }
      92             : 
      93        5546 : bool IntoTwoByte(int index, bool is_uri, int uri_length,
      94        5696 :                  String::FlatContent* uri_content, std::vector<uc16>* buffer) {
      95       21804 :   for (int k = index; k < uri_length; k++) {
      96        5696 :     uc16 code = uri_content->Get(k);
      97        5696 :     if (code == '%') {
      98             :       int two_digits;
      99       11392 :       if (k + 2 >= uri_length ||
     100        5696 :           (two_digits = TwoDigitHex(uri_content->Get(k + 1),
     101       11392 :                                     uri_content->Get(k + 2))) < 0) {
     102         340 :         return false;
     103             :       }
     104             :       k += 2;
     105        5696 :       uc16 decoded = static_cast<uc16>(two_digits);
     106        5696 :       if (decoded > unibrow::Utf8::kMaxOneByteChar) {
     107             :         uint8_t octets[unibrow::Utf8::kMaxEncodedSize];
     108        5686 :         octets[0] = decoded;
     109             : 
     110             :         int number_of_continuation_bytes = 0;
     111       22582 :         while ((decoded << ++number_of_continuation_bytes) & 0x80) {
     112       11300 :           if (number_of_continuation_bytes > 3 || k + 3 >= uri_length) {
     113         340 :             return false;
     114             :           }
     115       33650 :           if (uri_content->Get(++k) != '%' ||
     116       11210 :               (two_digits = TwoDigitHex(uri_content->Get(k + 1),
     117       22420 :                                         uri_content->Get(k + 2))) < 0) {
     118             :             return false;
     119             :           }
     120             :           k += 2;
     121             :           uc16 continuation_byte = static_cast<uc16>(two_digits);
     122       11210 :           octets[number_of_continuation_bytes] = continuation_byte;
     123             :         }
     124             : 
     125        5596 :         if (!DecodeOctets(octets, number_of_continuation_bytes, buffer)) {
     126             :           return false;
     127             :         }
     128             :       } else {
     129          10 :         AddToBuffer(decoded, uri_content, k - 2, is_uri, buffer);
     130             :       }
     131             :     } else {
     132           0 :       buffer->push_back(code);
     133             :     }
     134             :   }
     135             :   return true;
     136             : }
     137             : 
     138        5780 : bool IntoOneAndTwoByte(Handle<String> uri, bool is_uri,
     139             :                        std::vector<uint8_t>* one_byte_buffer,
     140             :                        std::vector<uc16>* two_byte_buffer) {
     141             :   DisallowHeapAllocation no_gc;
     142        5780 :   String::FlatContent uri_content = uri->GetFlatContent();
     143             : 
     144             :   int uri_length = uri->length();
     145    22289010 :   for (int k = 0; k < uri_length; k++) {
     146    22288796 :     uc16 code = uri_content.Get(k);
     147    22288796 :     if (code == '%') {
     148             :       int two_digits;
     149     6565004 :       if (k + 2 >= uri_length ||
     150     3282492 :           (two_digits = TwoDigitHex(uri_content.Get(k + 1),
     151     6564984 :                                     uri_content.Get(k + 2))) < 0) {
     152             :         return false;
     153             :       }
     154             : 
     155     3282492 :       uc16 decoded = static_cast<uc16>(two_digits);
     156     3282492 :       if (decoded > unibrow::Utf8::kMaxOneByteChar) {
     157             :         return IntoTwoByte(k, is_uri, uri_length, &uri_content,
     158        5546 :                            two_byte_buffer);
     159             :       }
     160             : 
     161     3276946 :       AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer);
     162             :       k += 2;
     163             :     } else {
     164    19006284 :       if (code > unibrow::Utf8::kMaxOneByteChar) {
     165             :         return IntoTwoByte(k, is_uri, uri_length, &uri_content,
     166           0 :                            two_byte_buffer);
     167             :       }
     168    38012568 :       one_byte_buffer->push_back(code);
     169             :     }
     170             :   }
     171             :   return true;
     172             : }
     173             : 
     174             : }  // anonymous namespace
     175             : 
     176        5780 : MaybeHandle<String> Uri::Decode(Isolate* isolate, Handle<String> uri,
     177             :                                 bool is_uri) {
     178        5780 :   uri = String::Flatten(uri);
     179             :   std::vector<uint8_t> one_byte_buffer;
     180             :   std::vector<uc16> two_byte_buffer;
     181             : 
     182        5780 :   if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) {
     183         360 :     THROW_NEW_ERROR(isolate, NewURIError(), String);
     184             :   }
     185             : 
     186        5420 :   if (two_byte_buffer.empty()) {
     187             :     return isolate->factory()->NewStringFromOneByte(Vector<const uint8_t>(
     188         642 :         one_byte_buffer.data(), static_cast<int>(one_byte_buffer.size())));
     189             :   }
     190             : 
     191             :   Handle<SeqTwoByteString> result;
     192             :   int result_length =
     193       15618 :       static_cast<int>(one_byte_buffer.size() + two_byte_buffer.size());
     194       10412 :   ASSIGN_RETURN_ON_EXCEPTION(
     195             :       isolate, result, isolate->factory()->NewRawTwoByteString(result_length),
     196             :       String);
     197             : 
     198       10412 :   CopyChars(result->GetChars(), one_byte_buffer.data(), one_byte_buffer.size());
     199        5206 :   CopyChars(result->GetChars() + one_byte_buffer.size(), two_byte_buffer.data(),
     200       10412 :             two_byte_buffer.size());
     201             : 
     202        5206 :   return result;
     203             : }
     204             : 
     205             : namespace {  // anonymous namespace for EncodeURI helper functions
     206    22869474 : bool IsUnescapePredicateInUriComponent(uc16 c) {
     207    45738948 :   if (IsAlphaNumeric(c)) {
     208             :     return true;
     209             :   }
     210             : 
     211     5255549 :   switch (c) {
     212             :     case '!':
     213             :     case '\'':
     214             :     case '(':
     215             :     case ')':
     216             :     case '*':
     217             :     case '-':
     218             :     case '.':
     219             :     case '_':
     220             :     case '~':
     221             :       return true;
     222             :     default:
     223     3284037 :       return false;
     224             :   }
     225             : }
     226             : 
     227             : bool IsUriSeparator(uc16 c) {
     228             :   switch (c) {
     229             :     case '#':
     230             :     case ':':
     231             :     case ';':
     232             :     case '/':
     233             :     case '?':
     234             :     case '$':
     235             :     case '&':
     236             :     case '+':
     237             :     case ',':
     238             :     case '@':
     239             :     case '=':
     240             :       return true;
     241             :     default:
     242             :       return false;
     243             :   }
     244             : }
     245             : 
     246     3297139 : void AddEncodedOctetToBuffer(uint8_t octet, std::vector<uint8_t>* buffer) {
     247     6594278 :   buffer->push_back('%');
     248     9891417 :   buffer->push_back(HexCharOfValue(octet >> 4));
     249     9891417 :   buffer->push_back(HexCharOfValue(octet & 0x0F));
     250     3297139 : }
     251             : 
     252     3283927 : void EncodeSingle(uc16 c, std::vector<uint8_t>* buffer) {
     253     3283927 :   char s[4] = {};
     254             :   int number_of_bytes;
     255             :   number_of_bytes =
     256     3283927 :       unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false);
     257     6579818 :   for (int k = 0; k < number_of_bytes; k++) {
     258     3295891 :     AddEncodedOctetToBuffer(s[k], buffer);
     259             :   }
     260     3283927 : }
     261             : 
     262         312 : void EncodePair(uc16 cc1, uc16 cc2, std::vector<uint8_t>* buffer) {
     263         312 :   char s[4] = {};
     264             :   int number_of_bytes =
     265             :       unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2),
     266         312 :                             unibrow::Utf16::kNoPreviousCharacter, false);
     267        1560 :   for (int k = 0; k < number_of_bytes; k++) {
     268        1248 :     AddEncodedOctetToBuffer(s[k], buffer);
     269             :   }
     270         312 : }
     271             : 
     272             : }  // anonymous namespace
     273             : 
     274       78637 : MaybeHandle<String> Uri::Encode(Isolate* isolate, Handle<String> uri,
     275             :                                 bool is_uri) {
     276       78637 :   uri = String::Flatten(uri);
     277             :   int uri_length = uri->length();
     278             :   std::vector<uint8_t> buffer;
     279       78637 :   buffer.reserve(uri_length);
     280             : 
     281             :   {
     282             :     DisallowHeapAllocation no_gc;
     283       78637 :     String::FlatContent uri_content = uri->GetFlatContent();
     284             : 
     285    22948423 :     for (int k = 0; k < uri_length; k++) {
     286    22874718 :       uc16 cc1 = uri_content.Get(k);
     287    45749436 :       if (unibrow::Utf16::IsLeadSurrogate(cc1)) {
     288        5118 :         k++;
     289        5118 :         if (k < uri_length) {
     290             :           uc16 cc2 = uri->Get(k);
     291       10236 :           if (unibrow::Utf16::IsTrailSurrogate(cc2)) {
     292         312 :             EncodePair(cc1, cc2, &buffer);
     293         312 :             continue;
     294             :           }
     295             :         }
     296    22869600 :       } else if (!unibrow::Utf16::IsTrailSurrogate(cc1)) {
     297    45738948 :         if (IsUnescapePredicateInUriComponent(cc1) ||
     298     3279600 :             (is_uri && IsUriSeparator(cc1))) {
     299    39171094 :           buffer.push_back(cc1);
     300             :         } else {
     301     3283927 :           EncodeSingle(cc1, &buffer);
     302             :         }
     303             :         continue;
     304             :       }
     305             : 
     306             :       AllowHeapAllocation allocate_error_and_return;
     307        4932 :       THROW_NEW_ERROR(isolate, NewURIError(), String);
     308             :     }
     309             :   }
     310             : 
     311             :   return isolate->factory()->NewStringFromOneByte(
     312      221115 :       Vector<const uint8_t>(buffer.data(), static_cast<int>(buffer.size())));
     313             : }
     314             : 
     315             : namespace {  // Anonymous namespace for Escape and Unescape
     316             : 
     317             : template <typename Char>
     318    14314322 : int UnescapeChar(Vector<const Char> vector, int i, int length, int* step) {
     319    28628644 :   uint16_t character = vector[i];
     320             :   int32_t hi = 0;
     321             :   int32_t lo = 0;
     322    19531370 :   if (character == '%' && i <= length - 6 && vector[i + 1] == 'u' &&
     323     1485414 :       (hi = TwoDigitHex(vector[i + 2], vector[i + 3])) > -1 &&
     324     1484934 :       (lo = TwoDigitHex(vector[i + 4], vector[i + 5])) > -1) {
     325      494818 :     *step = 6;
     326      494818 :     return (hi << 8) + lo;
     327    17672874 :   } else if (character == '%' && i <= length - 3 &&
     328    11560110 :              (lo = TwoDigitHex(vector[i + 1], vector[i + 2])) > -1) {
     329     3852830 :     *step = 3;
     330             :     return lo;
     331             :   } else {
     332     9966674 :     *step = 1;
     333     9966674 :     return character;
     334             :   }
     335             : }
     336             : 
     337             : template <typename Char>
     338       61703 : MaybeHandle<String> UnescapeSlow(Isolate* isolate, Handle<String> string,
     339             :                                  int start_index) {
     340             :   bool one_byte = true;
     341             :   int length = string->length();
     342             : 
     343             :   int unescaped_length = 0;
     344             :   {
     345             :     DisallowHeapAllocation no_allocation;
     346             :     Vector<const Char> vector = string->GetCharVector<Char>();
     347     7218864 :     for (int i = start_index; i < length; unescaped_length++) {
     348             :       int step;
     349     7157161 :       if (UnescapeChar(vector, i, length, &step) >
     350             :           String::kMaxOneByteCharCode) {
     351             :         one_byte = false;
     352             :       }
     353     7157161 :       i += step;
     354             :     }
     355             :   }
     356             : 
     357             :   DCHECK(start_index < length);
     358             :   Handle<String> first_part =
     359       61703 :       isolate->factory()->NewProperSubString(string, 0, start_index);
     360             : 
     361             :   int dest_position = 0;
     362             :   Handle<String> second_part;
     363             :   DCHECK_LE(unescaped_length, String::kMaxLength);
     364       61703 :   if (one_byte) {
     365             :     Handle<SeqOneByteString> dest = isolate->factory()
     366             :                                         ->NewRawOneByteString(unescaped_length)
     367        1908 :                                         .ToHandleChecked();
     368             :     DisallowHeapAllocation no_allocation;
     369             :     Vector<const Char> vector = string->GetCharVector<Char>();
     370       11787 :     for (int i = start_index; i < length; dest_position++) {
     371             :       int step;
     372       10833 :       dest->SeqOneByteStringSet(dest_position,
     373       10833 :                                 UnescapeChar(vector, i, length, &step));
     374       10833 :       i += step;
     375             :     }
     376         954 :     second_part = dest;
     377             :   } else {
     378             :     Handle<SeqTwoByteString> dest = isolate->factory()
     379             :                                         ->NewRawTwoByteString(unescaped_length)
     380      121498 :                                         .ToHandleChecked();
     381             :     DisallowHeapAllocation no_allocation;
     382             :     Vector<const Char> vector = string->GetCharVector<Char>();
     383     7207077 :     for (int i = start_index; i < length; dest_position++) {
     384             :       int step;
     385     7146328 :       dest->SeqTwoByteStringSet(dest_position,
     386     7146328 :                                 UnescapeChar(vector, i, length, &step));
     387     7146328 :       i += step;
     388             :     }
     389       60749 :     second_part = dest;
     390             :   }
     391       61703 :   return isolate->factory()->NewConsString(first_part, second_part);
     392             : }
     393             : 
     394     7095603 : bool IsNotEscaped(uint16_t c) {
     395    14191206 :   if (IsAlphaNumeric(c)) {
     396             :     return true;
     397             :   }
     398             :   //  @*_+-./
     399             :   switch (c) {
     400             :     case '@':
     401             :     case '*':
     402             :     case '_':
     403             :     case '+':
     404             :     case '-':
     405             :     case '.':
     406             :     case '/':
     407             :       return true;
     408             :     default:
     409     1930312 :       return false;
     410             :   }
     411             : }
     412             : 
     413             : template <typename Char>
     414      129682 : static MaybeHandle<String> UnescapePrivate(Isolate* isolate,
     415             :                                            Handle<String> source) {
     416             :   int index;
     417             :   {
     418             :     DisallowHeapAllocation no_allocation;
     419             :     StringSearch<uint8_t, Char> search(isolate, STATIC_CHAR_VECTOR("%"));
     420             :     index = search.Search(source->GetCharVector<Char>(), 0);
     421      129682 :     if (index < 0) return source;
     422             :   }
     423       61703 :   return UnescapeSlow<Char>(isolate, source, index);
     424             : }
     425             : 
     426             : template <typename Char>
     427       41452 : static MaybeHandle<String> EscapePrivate(Isolate* isolate,
     428             :                                          Handle<String> string) {
     429             :   DCHECK(string->IsFlat());
     430             :   int escaped_length = 0;
     431             :   int length = string->length();
     432             : 
     433             :   {
     434             :     DisallowHeapAllocation no_allocation;
     435             :     Vector<const Char> vector = string->GetCharVector<Char>();
     436     3725145 :     for (int i = 0; i < length; i++) {
     437     7367386 :       uint16_t c = vector[i];
     438     3676304 :       if (c >= 256) {
     439      134968 :         escaped_length += 6;
     440     3548725 :       } else if (IsNotEscaped(c)) {
     441     2583569 :         escaped_length++;
     442             :       } else {
     443      965156 :         escaped_length += 3;
     444             :       }
     445             : 
     446             :       // We don't allow strings that are longer than a maximal length.
     447             :       DCHECK_LT(String::kMaxLength, 0x7fffffff - 6);   // Cannot overflow.
     448     3683693 :       if (escaped_length > String::kMaxLength) break;  // Provoke exception.
     449             :     }
     450             :   }
     451             : 
     452             :   // No length change implies no change.  Return original string if no change.
     453       41452 :   if (escaped_length == length) return string;
     454             : 
     455             :   Handle<SeqOneByteString> dest;
     456       81348 :   ASSIGN_RETURN_ON_EXCEPTION(
     457             :       isolate, dest, isolate->factory()->NewRawOneByteString(escaped_length),
     458             :       String);
     459             :   int dest_position = 0;
     460             : 
     461             :   {
     462             :     DisallowHeapAllocation no_allocation;
     463             :     Vector<const Char> vector = string->GetCharVector<Char>();
     464     3722520 :     for (int i = 0; i < length; i++) {
     465     7363692 :       uint16_t c = vector[i];
     466     3676304 :       if (c >= 256) {
     467             :         dest->SeqOneByteStringSet(dest_position, '%');
     468      134968 :         dest->SeqOneByteStringSet(dest_position + 1, 'u');
     469      269936 :         dest->SeqOneByteStringSet(dest_position + 2, HexCharOfValue(c >> 12));
     470             :         dest->SeqOneByteStringSet(dest_position + 3,
     471      269936 :                                   HexCharOfValue((c >> 8) & 0xf));
     472             :         dest->SeqOneByteStringSet(dest_position + 4,
     473      269936 :                                   HexCharOfValue((c >> 4) & 0xf));
     474      269936 :         dest->SeqOneByteStringSet(dest_position + 5, HexCharOfValue(c & 0xf));
     475      134968 :         dest_position += 6;
     476     3546878 :       } else if (IsNotEscaped(c)) {
     477             :         dest->SeqOneByteStringSet(dest_position, c);
     478     2581722 :         dest_position++;
     479             :       } else {
     480             :         dest->SeqOneByteStringSet(dest_position, '%');
     481     1930312 :         dest->SeqOneByteStringSet(dest_position + 1, HexCharOfValue(c >> 4));
     482     1930312 :         dest->SeqOneByteStringSet(dest_position + 2, HexCharOfValue(c & 0xf));
     483      965156 :         dest_position += 3;
     484             :       }
     485             :     }
     486             :   }
     487             : 
     488       40674 :   return dest;
     489             : }
     490             : 
     491             : }  // Anonymous namespace
     492             : 
     493       41452 : MaybeHandle<String> Uri::Escape(Isolate* isolate, Handle<String> string) {
     494             :   Handle<String> result;
     495       41452 :   string = String::Flatten(string);
     496       41452 :   return string->IsOneByteRepresentationUnderneath()
     497             :              ? EscapePrivate<uint8_t>(isolate, string)
     498       41452 :              : EscapePrivate<uc16>(isolate, string);
     499             : }
     500             : 
     501      129682 : MaybeHandle<String> Uri::Unescape(Isolate* isolate, Handle<String> string) {
     502             :   Handle<String> result;
     503      129682 :   string = String::Flatten(string);
     504      129682 :   return string->IsOneByteRepresentationUnderneath()
     505             :              ? UnescapePrivate<uint8_t>(isolate, string)
     506      129682 :              : UnescapePrivate<uc16>(isolate, string);
     507             : }
     508             : 
     509             : }  // namespace internal
     510             : }  // namespace v8

Generated by: LCOV version 1.10