LCOV - code coverage report
Current view: top level - toolkit/components/protobuf/src/google/protobuf/stubs - structurally_valid.cc (source / functions) Hit Total Coverage
Test: output.info Lines: 34 94 36.2 %
Date: 2017-07-14 16:53:18 Functions: 4 5 80.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Copyright 2005-2008 Google Inc. All Rights Reserved.
       2             : // Author: jrm@google.com (Jim Meehan)
       3             : 
       4             : #include <google/protobuf/stubs/common.h>
       5             : 
       6             : namespace google {
       7             : namespace protobuf {
       8             : namespace internal {
       9             : 
      10             : // These four-byte entries compactly encode how many bytes 0..255 to delete
      11             : // in making a string replacement, how many bytes to add 0..255, and the offset
      12             : // 0..64k-1 of the replacement string in remap_string.
      13             : struct RemapEntry {
      14             :   uint8 delete_bytes;
      15             :   uint8 add_bytes;
      16             :   uint16 bytes_offset;
      17             : };
      18             : 
      19             : // Exit type codes for state tables. All but the first get stuffed into
      20             : // signed one-byte entries. The first is only generated by executable code.
      21             : // To distinguish from next-state entries, these must be contiguous and
      22             : // all <= kExitNone
      23             : typedef enum {
      24             :   kExitDstSpaceFull = 239,
      25             :   kExitIllegalStructure,  // 240
      26             :   kExitOK,                // 241
      27             :   kExitReject,            // ...
      28             :   kExitReplace1,
      29             :   kExitReplace2,
      30             :   kExitReplace3,
      31             :   kExitReplace21,
      32             :   kExitReplace31,
      33             :   kExitReplace32,
      34             :   kExitReplaceOffset1,
      35             :   kExitReplaceOffset2,
      36             :   kExitReplace1S0,
      37             :   kExitSpecial,
      38             :   kExitDoAgain,
      39             :   kExitRejectAlt,
      40             :   kExitNone               // 255
      41             : } ExitReason;
      42             : 
      43             : 
      44             : // This struct represents one entire state table. The three initialized byte
      45             : // areas are state_table, remap_base, and remap_string. state0 and state0_size
      46             : // give the byte offset and length within state_table of the initial state --
      47             : // table lookups are expected to start and end in this state, but for
      48             : // truncated UTF-8 strings, may end in a different state. These allow a quick
      49             : // test for that condition. entry_shift is 8 for tables subscripted by a full
      50             : // byte value and 6 for space-optimized tables subscripted by only six
      51             : // significant bits in UTF-8 continuation bytes.
      52             : typedef struct {
      53             :   const uint32 state0;
      54             :   const uint32 state0_size;
      55             :   const uint32 total_size;
      56             :   const int max_expand;
      57             :   const int entry_shift;
      58             :   const int bytes_per_entry;
      59             :   const uint32 losub;
      60             :   const uint32 hiadd;
      61             :   const uint8* state_table;
      62             :   const RemapEntry* remap_base;
      63             :   const uint8* remap_string;
      64             :   const uint8* fast_state;
      65             : } UTF8StateMachineObj;
      66             : 
      67             : typedef UTF8StateMachineObj UTF8ScanObj;
      68             : 
      69             : #define X__ (kExitIllegalStructure)
      70             : #define RJ_ (kExitReject)
      71             : #define S1_ (kExitReplace1)
      72             : #define S2_ (kExitReplace2)
      73             : #define S3_ (kExitReplace3)
      74             : #define S21 (kExitReplace21)
      75             : #define S31 (kExitReplace31)
      76             : #define S32 (kExitReplace32)
      77             : #define T1_ (kExitReplaceOffset1)
      78             : #define T2_ (kExitReplaceOffset2)
      79             : #define S11 (kExitReplace1S0)
      80             : #define SP_ (kExitSpecial)
      81             : #define D__ (kExitDoAgain)
      82             : #define RJA (kExitRejectAlt)
      83             : 
      84             : //  Entire table has 9 state blocks of 256 entries each
      85             : static const unsigned int utf8acceptnonsurrogates_STATE0 = 0;     // state[0]
      86             : static const unsigned int utf8acceptnonsurrogates_STATE0_SIZE = 256;  // =[1]
      87             : static const unsigned int utf8acceptnonsurrogates_TOTAL_SIZE = 2304;
      88             : static const unsigned int utf8acceptnonsurrogates_MAX_EXPAND_X4 = 0;
      89             : static const unsigned int utf8acceptnonsurrogates_SHIFT = 8;
      90             : static const unsigned int utf8acceptnonsurrogates_BYTES = 1;
      91             : static const unsigned int utf8acceptnonsurrogates_LOSUB = 0x20202020;
      92             : static const unsigned int utf8acceptnonsurrogates_HIADD = 0x00000000;
      93             : 
      94             : static const uint8 utf8acceptnonsurrogates[] = {
      95             : // state[0] 0x000000 Byte 1
      96             :   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
      97             :   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
      98             :   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
      99             :   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
     100             : 
     101             :   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
     102             :   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
     103             :   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
     104             :   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
     105             : 
     106             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     107             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     108             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     109             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     110             : 
     111             : X__, X__,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
     112             :   1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
     113             :   2,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   7,   3,   3,
     114             :   4,   5,   5,   5,   6, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     115             : 
     116             : // state[1] 0x000080 Byte 2 of 2
     117             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     118             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     119             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     120             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     121             : 
     122             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     123             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     124             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     125             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     126             : 
     127             :   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
     128             :   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
     129             :   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
     130             :   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
     131             : 
     132             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     133             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     134             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     135             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     136             : 
     137             : // state[2] 0x000000 Byte 2 of 3
     138             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     139             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     140             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     141             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     142             : 
     143             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     144             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     145             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     146             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     147             : 
     148             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     149             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     150             :   1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
     151             :   1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
     152             : 
     153             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     154             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     155             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     156             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     157             : 
     158             : // state[3] 0x001000 Byte 2 of 3
     159             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     160             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     161             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     162             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     163             : 
     164             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     165             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     166             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     167             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     168             : 
     169             :   1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
     170             :   1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
     171             :   1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
     172             :   1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
     173             : 
     174             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     175             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     176             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     177             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     178             : 
     179             : // state[4] 0x000000 Byte 2 of 4
     180             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     181             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     182             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     183             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     184             : 
     185             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     186             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     187             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     188             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     189             : 
     190             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     191             :   3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
     192             :   3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
     193             :   3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
     194             : 
     195             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     196             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     197             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     198             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     199             : 
     200             : // state[5] 0x040000 Byte 2 of 4
     201             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     202             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     203             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     204             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     205             : 
     206             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     207             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     208             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     209             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     210             : 
     211             :   3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
     212             :   3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
     213             :   3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
     214             :   3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
     215             : 
     216             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     217             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     218             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     219             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     220             : 
     221             : // state[6] 0x100000 Byte 2 of 4
     222             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     223             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     224             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     225             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     226             : 
     227             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     228             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     229             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     230             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     231             : 
     232             :   3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
     233             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     234             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     235             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     236             : 
     237             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     238             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     239             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     240             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     241             : 
     242             : // state[7] 0x00d000 Byte 2 of 3
     243             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     244             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     245             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     246             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     247             : 
     248             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     249             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     250             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     251             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     252             : 
     253             :   1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
     254             :   1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
     255             :   8,   8,   8,   8,   8,   8,   8,   8,    8,   8,   8,   8,   8,   8,   8,   8,
     256             :   8,   8,   8,   8,   8,   8,   8,   8,    8,   8,   8,   8,   8,   8,   8,   8,
     257             : 
     258             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     259             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     260             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     261             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     262             : 
     263             : // state[8] 0x00d800 Byte 3 of 3
     264             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     265             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     266             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     267             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     268             : 
     269             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     270             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     271             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     272             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     273             : 
     274             : RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,  RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
     275             : RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,  RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
     276             : RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,  RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
     277             : RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,  RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
     278             : 
     279             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     280             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     281             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     282             : X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
     283             : };
     284             : 
     285             : // Remap base[0] = (del, add, string_offset)
     286             : static const RemapEntry utf8acceptnonsurrogates_remap_base[] = {
     287             : {0, 0, 0} };
     288             : 
     289             : // Remap string[0]
     290             : static const unsigned char utf8acceptnonsurrogates_remap_string[] = {
     291             : 0 };
     292             : 
     293             : static const unsigned char utf8acceptnonsurrogates_fast[256] = {
     294             : 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
     295             : 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
     296             : 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
     297             : 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
     298             : 
     299             : 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
     300             : 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
     301             : 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
     302             : 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
     303             : 
     304             : 1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
     305             : 1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
     306             : 1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
     307             : 1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
     308             : 
     309             : 1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
     310             : 1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
     311             : 1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
     312             : 1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
     313             : };
     314             : 
     315             : static const UTF8ScanObj utf8acceptnonsurrogates_obj = {
     316             :   utf8acceptnonsurrogates_STATE0,
     317             :   utf8acceptnonsurrogates_STATE0_SIZE,
     318             :   utf8acceptnonsurrogates_TOTAL_SIZE,
     319             :   utf8acceptnonsurrogates_MAX_EXPAND_X4,
     320             :   utf8acceptnonsurrogates_SHIFT,
     321             :   utf8acceptnonsurrogates_BYTES,
     322             :   utf8acceptnonsurrogates_LOSUB,
     323             :   utf8acceptnonsurrogates_HIADD,
     324             :   utf8acceptnonsurrogates,
     325             :   utf8acceptnonsurrogates_remap_base,
     326             :   utf8acceptnonsurrogates_remap_string,
     327             :   utf8acceptnonsurrogates_fast
     328             : };
     329             : 
     330             : 
     331             : #undef X__
     332             : #undef RJ_
     333             : #undef S1_
     334             : #undef S2_
     335             : #undef S3_
     336             : #undef S21
     337             : #undef S31
     338             : #undef S32
     339             : #undef T1_
     340             : #undef T2_
     341             : #undef S11
     342             : #undef SP_
     343             : #undef D__
     344             : #undef RJA
     345             : 
     346             : // Return true if current Tbl pointer is within state0 range
     347             : // Note that unsigned compare checks both ends of range simultaneously
     348           0 : static inline bool InStateZero(const UTF8ScanObj* st, const uint8* Tbl) {
     349           0 :   const uint8* Tbl0 = &st->state_table[st->state0];
     350           0 :   return (static_cast<uint32>(Tbl - Tbl0) < st->state0_size);
     351             : }
     352             : 
     353             : // Scan a UTF-8 string based on state table.
     354             : // Always scan complete UTF-8 characters
     355             : // Set number of bytes scanned. Return reason for exiting
     356         138 : int UTF8GenericScan(const UTF8ScanObj* st,
     357             :                     const char * str,
     358             :                     int str_length,
     359             :                     int* bytes_consumed) {
     360         138 :   *bytes_consumed = 0;
     361         138 :   if (str_length == 0) return kExitOK;
     362             : 
     363           0 :   int eshift = st->entry_shift;
     364           0 :   const uint8* isrc = reinterpret_cast<const uint8*>(str);
     365           0 :   const uint8* src = isrc;
     366           0 :   const uint8* srclimit = isrc + str_length;
     367           0 :   const uint8* srclimit8 = srclimit - 7;
     368           0 :   const uint8* Tbl_0 = &st->state_table[st->state0];
     369             : 
     370             :  DoAgain:
     371             :   // Do state-table scan
     372           0 :   int e = 0;
     373             :   uint8 c;
     374           0 :   const uint8* Tbl2 = &st->fast_state[0];
     375           0 :   const uint32 losub = st->losub;
     376           0 :   const uint32 hiadd = st->hiadd;
     377             :   // Check initial few bytes one at a time until 8-byte aligned
     378             :   //----------------------------
     379           0 :   while ((((uintptr_t)src & 0x07) != 0) &&
     380           0 :          (src < srclimit) &&
     381           0 :          Tbl2[src[0]] == 0) {
     382           0 :     src++;
     383             :   }
     384           0 :   if (((uintptr_t)src & 0x07) == 0) {
     385             :     // Do fast for groups of 8 identity bytes.
     386             :     // This covers a lot of 7-bit ASCII ~8x faster then the 1-byte loop,
     387             :     // including slowing slightly on cr/lf/ht
     388             :     //----------------------------
     389           0 :     while (src < srclimit8) {
     390           0 :       uint32 s0123 = (reinterpret_cast<const uint32 *>(src))[0];
     391           0 :       uint32 s4567 = (reinterpret_cast<const uint32 *>(src))[1];
     392           0 :       src += 8;
     393             :       // This is a fast range check for all bytes in [lowsub..0x80-hiadd)
     394           0 :       uint32 temp = (s0123 - losub) | (s0123 + hiadd) |
     395           0 :                     (s4567 - losub) | (s4567 + hiadd);
     396           0 :       if ((temp & 0x80808080) != 0) {
     397             :         // We typically end up here on cr/lf/ht; src was incremented
     398           0 :         int e0123 = (Tbl2[src[-8]] | Tbl2[src[-7]]) |
     399           0 :                     (Tbl2[src[-6]] | Tbl2[src[-5]]);
     400           0 :         if (e0123 != 0) {
     401           0 :           src -= 8;
     402           0 :           break;
     403             :         }    // Exit on Non-interchange
     404           0 :         e0123 = (Tbl2[src[-4]] | Tbl2[src[-3]]) |
     405           0 :                 (Tbl2[src[-2]] | Tbl2[src[-1]]);
     406           0 :         if (e0123 != 0) {
     407           0 :           src -= 4;
     408           0 :           break;
     409             :         }    // Exit on Non-interchange
     410             :         // Else OK, go around again
     411             :       }
     412             :     }
     413             :   }
     414             :   //----------------------------
     415             : 
     416             :   // Byte-at-a-time scan
     417             :   //----------------------------
     418           0 :   const uint8* Tbl = Tbl_0;
     419           0 :   while (src < srclimit) {
     420           0 :     c = *src;
     421           0 :     e = Tbl[c];
     422           0 :     src++;
     423           0 :     if (e >= kExitIllegalStructure) {break;}
     424           0 :     Tbl = &Tbl_0[e << eshift];
     425             :   }
     426             :   //----------------------------
     427             : 
     428             : 
     429             :   // Exit posibilities:
     430             :   //  Some exit code, !state0, back up over last char
     431             :   //  Some exit code, state0, back up one byte exactly
     432             :   //  source consumed, !state0, back up over partial char
     433             :   //  source consumed, state0, exit OK
     434             :   // For illegal byte in state0, avoid backup up over PREVIOUS char
     435             :   // For truncated last char, back up to beginning of it
     436             : 
     437           0 :   if (e >= kExitIllegalStructure) {
     438             :     // Back up over exactly one byte of rejected/illegal UTF-8 character
     439           0 :     src--;
     440             :     // Back up more if needed
     441           0 :     if (!InStateZero(st, Tbl)) {
     442           0 :       do {
     443           0 :         src--;
     444           0 :       } while ((src > isrc) && ((src[0] & 0xc0) == 0x80));
     445             :     }
     446           0 :   } else if (!InStateZero(st, Tbl)) {
     447             :     // Back up over truncated UTF-8 character
     448           0 :     e = kExitIllegalStructure;
     449           0 :     do {
     450           0 :       src--;
     451           0 :     } while ((src > isrc) && ((src[0] & 0xc0) == 0x80));
     452             :   } else {
     453             :     // Normal termination, source fully consumed
     454           0 :     e = kExitOK;
     455             :   }
     456             : 
     457           0 :   if (e == kExitDoAgain) {
     458             :     // Loop back up to the fast scan
     459           0 :     goto DoAgain;
     460             :   }
     461             : 
     462           0 :   *bytes_consumed = src - isrc;
     463           0 :   return e;
     464             : }
     465             : 
     466         138 : int UTF8GenericScanFastAscii(const UTF8ScanObj* st,
     467             :                     const char * str,
     468             :                     int str_length,
     469             :                     int* bytes_consumed) {
     470         138 :   *bytes_consumed = 0;
     471         138 :   if (str_length == 0) return kExitOK;
     472             : 
     473         138 :   const uint8* isrc =  reinterpret_cast<const uint8*>(str);
     474         138 :   const uint8* src = isrc;
     475         138 :   const uint8* srclimit = isrc + str_length;
     476         138 :   const uint8* srclimit8 = srclimit - 7;
     477             :   int n;
     478             :   int rest_consumed;
     479             :   int exit_reason;
     480           0 :   do {
     481             :     // Check initial few bytes one at a time until 8-byte aligned
     482         138 :     while ((((uintptr_t)src & 0x07) != 0) &&
     483         138 :            (src < srclimit) && (src[0] < 0x80)) {
     484           0 :       src++;
     485             :     }
     486         138 :     if (((uintptr_t)src & 0x07) == 0) {
     487         615 :       while ((src < srclimit8) &&
     488         318 :              (((reinterpret_cast<const uint32*>(src)[0] |
     489         318 :                 reinterpret_cast<const uint32*>(src)[1]) & 0x80808080) == 0)) {
     490         159 :         src += 8;
     491             :       }
     492             :     }
     493        1104 :     while ((src < srclimit) && (src[0] < 0x80)) {
     494         483 :       src++;
     495             :     }
     496             :     // Run state table on the rest
     497         138 :     n = src - isrc;
     498         138 :     exit_reason = UTF8GenericScan(st, str + n, str_length - n, &rest_consumed);
     499         138 :     src += rest_consumed;
     500         138 :   } while ( exit_reason == kExitDoAgain );
     501             : 
     502         138 :   *bytes_consumed = src - isrc;
     503         138 :   return exit_reason;
     504             : }
     505             : 
     506             : // Hack:  On some compilers the static tables are initialized at startup.
     507             : //   We can't use them until they are initialized.  However, some Protocol
     508             : //   Buffer parsing happens at static init time and may try to validate
     509             : //   UTF-8 strings.  Since UTF-8 validation is only used for debugging
     510             : //   anyway, we simply always return success if initialization hasn't
     511             : //   occurred yet.
     512             : namespace {
     513             : 
     514             : bool module_initialized_ = false;
     515             : 
     516             : struct InitDetector {
     517           3 :   InitDetector() {
     518           3 :     module_initialized_ = true;
     519           3 :   }
     520             : };
     521           3 : InitDetector init_detector;
     522             : 
     523             : }  // namespace
     524             : 
     525         738 : bool IsStructurallyValidUTF8(const char* buf, int len) {
     526         738 :   if (!module_initialized_) return true;
     527             :   
     528         138 :   int bytes_consumed = 0;
     529             :   UTF8GenericScanFastAscii(&utf8acceptnonsurrogates_obj,
     530         138 :                            buf, len, &bytes_consumed);
     531         138 :   return (bytes_consumed == len);
     532             : }
     533             : 
     534             : }  // namespace internal
     535             : }  // namespace protobuf
     536             : }  // namespace google

Generated by: LCOV version 1.13