Line data Source code
1 : // Protocol Buffers - Google's data interchange format
2 : // Copyright 2008 Google Inc. All rights reserved.
3 : // https://developers.google.com/protocol-buffers/
4 : //
5 : // Redistribution and use in source and binary forms, with or without
6 : // modification, are permitted provided that the following conditions are
7 : // met:
8 : //
9 : // * Redistributions of source code must retain the above copyright
10 : // notice, this list of conditions and the following disclaimer.
11 : // * Redistributions in binary form must reproduce the above
12 : // copyright notice, this list of conditions and the following disclaimer
13 : // in the documentation and/or other materials provided with the
14 : // distribution.
15 : // * Neither the name of Google Inc. nor the names of its
16 : // contributors may be used to endorse or promote products derived from
17 : // this software without specific prior written permission.
18 : //
19 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 :
31 : // from google3/strings/strutil.cc
32 :
33 : #include <google/protobuf/stubs/strutil.h>
34 : #include <errno.h>
35 : #include <float.h> // FLT_DIG and DBL_DIG
36 : #include <limits>
37 : #include <limits.h>
38 : #include <stdio.h>
39 : #include <iterator>
40 :
41 : #include "mozilla/FloatingPoint.h"
42 :
43 : #ifdef _WIN32
44 : // MSVC has only _snprintf, not snprintf.
45 : //
46 : // MinGW has both snprintf and _snprintf, but they appear to be different
47 : // functions. The former is buggy. When invoked like so:
48 : // char buffer[32];
49 : // snprintf(buffer, 32, "%.*g\n", FLT_DIG, 1.23e10f);
50 : // it prints "1.23000e+10". This is plainly wrong: %g should never print
51 : // trailing zeros after the decimal point. For some reason this bug only
52 : // occurs with some input values, not all. In any case, _snprintf does the
53 : // right thing, so we use it.
54 : #define snprintf _snprintf
55 : #endif
56 :
57 : namespace google {
58 : namespace protobuf {
59 :
60 0 : inline bool IsNaN(double value) {
61 0 : return ::mozilla::IsNaN(value);
62 : }
63 :
64 : // These are defined as macros on some platforms. #undef them so that we can
65 : // redefine them.
66 : #undef isxdigit
67 : #undef isprint
68 :
69 : // The definitions of these in ctype.h change based on locale. Since our
70 : // string manipulation is all in relation to the protocol buffer and C++
71 : // languages, we always want to use the C locale. So, we re-define these
72 : // exactly as we want them.
73 0 : inline bool isxdigit(char c) {
74 0 : return ('0' <= c && c <= '9') ||
75 0 : ('a' <= c && c <= 'f') ||
76 0 : ('A' <= c && c <= 'F');
77 : }
78 :
79 0 : inline bool isprint(char c) {
80 0 : return c >= 0x20 && c <= 0x7E;
81 : }
82 :
83 : // ----------------------------------------------------------------------
84 : // StripString
85 : // Replaces any occurrence of the character 'remove' (or the characters
86 : // in 'remove') with the character 'replacewith'.
87 : // ----------------------------------------------------------------------
88 0 : void StripString(string* s, const char* remove, char replacewith) {
89 0 : const char * str_start = s->c_str();
90 0 : const char * str = str_start;
91 0 : for (str = strpbrk(str, remove);
92 0 : str != NULL;
93 0 : str = strpbrk(str + 1, remove)) {
94 0 : (*s)[str - str_start] = replacewith;
95 : }
96 0 : }
97 :
98 : // ----------------------------------------------------------------------
99 : // StringReplace()
100 : // Replace the "old" pattern with the "new" pattern in a string,
101 : // and append the result to "res". If replace_all is false,
102 : // it only replaces the first instance of "old."
103 : // ----------------------------------------------------------------------
104 :
105 0 : void StringReplace(const string& s, const string& oldsub,
106 : const string& newsub, bool replace_all,
107 : string* res) {
108 0 : if (oldsub.empty()) {
109 0 : res->append(s); // if empty, append the given string.
110 0 : return;
111 : }
112 :
113 0 : string::size_type start_pos = 0;
114 : string::size_type pos;
115 0 : do {
116 0 : pos = s.find(oldsub, start_pos);
117 0 : if (pos == string::npos) {
118 0 : break;
119 : }
120 0 : res->append(s, start_pos, pos - start_pos);
121 0 : res->append(newsub);
122 0 : start_pos = pos + oldsub.size(); // start searching again after the "old"
123 : } while (replace_all);
124 0 : res->append(s, start_pos, s.length() - start_pos);
125 : }
126 :
127 : // ----------------------------------------------------------------------
128 : // StringReplace()
129 : // Give me a string and two patterns "old" and "new", and I replace
130 : // the first instance of "old" in the string with "new", if it
131 : // exists. If "global" is true; call this repeatedly until it
132 : // fails. RETURN a new string, regardless of whether the replacement
133 : // happened or not.
134 : // ----------------------------------------------------------------------
135 :
136 0 : string StringReplace(const string& s, const string& oldsub,
137 : const string& newsub, bool replace_all) {
138 0 : string ret;
139 0 : StringReplace(s, oldsub, newsub, replace_all, &ret);
140 0 : return ret;
141 : }
142 :
143 : // ----------------------------------------------------------------------
144 : // SplitStringUsing()
145 : // Split a string using a character delimiter. Append the components
146 : // to 'result'.
147 : //
148 : // Note: For multi-character delimiters, this routine will split on *ANY* of
149 : // the characters in the string, not the entire string as a single delimiter.
150 : // ----------------------------------------------------------------------
151 : template <typename ITR>
152 : static inline
153 0 : void SplitStringToIteratorUsing(const string& full,
154 : const char* delim,
155 : ITR& result) {
156 : // Optimize the common case where delim is a single character.
157 0 : if (delim[0] != '\0' && delim[1] == '\0') {
158 0 : char c = delim[0];
159 0 : const char* p = full.data();
160 0 : const char* end = p + full.size();
161 0 : while (p != end) {
162 0 : if (*p == c) {
163 0 : ++p;
164 : } else {
165 0 : const char* start = p;
166 0 : while (++p != end && *p != c);
167 0 : *result++ = string(start, p - start);
168 : }
169 : }
170 0 : return;
171 : }
172 :
173 : string::size_type begin_index, end_index;
174 0 : begin_index = full.find_first_not_of(delim);
175 0 : while (begin_index != string::npos) {
176 0 : end_index = full.find_first_of(delim, begin_index);
177 0 : if (end_index == string::npos) {
178 0 : *result++ = full.substr(begin_index);
179 0 : return;
180 : }
181 0 : *result++ = full.substr(begin_index, (end_index - begin_index));
182 0 : begin_index = full.find_first_not_of(delim, end_index);
183 : }
184 : }
185 :
186 0 : void SplitStringUsing(const string& full,
187 : const char* delim,
188 : vector<string>* result) {
189 0 : back_insert_iterator< vector<string> > it(*result);
190 0 : SplitStringToIteratorUsing(full, delim, it);
191 0 : }
192 :
193 : // Split a string using a character delimiter. Append the components
194 : // to 'result'. If there are consecutive delimiters, this function
195 : // will return corresponding empty strings. The string is split into
196 : // at most the specified number of pieces greedily. This means that the
197 : // last piece may possibly be split further. To split into as many pieces
198 : // as possible, specify 0 as the number of pieces.
199 : //
200 : // If "full" is the empty string, yields an empty string as the only value.
201 : //
202 : // If "pieces" is negative for some reason, it returns the whole string
203 : // ----------------------------------------------------------------------
204 : template <typename StringType, typename ITR>
205 : static inline
206 0 : void SplitStringToIteratorAllowEmpty(const StringType& full,
207 : const char* delim,
208 : int pieces,
209 : ITR& result) {
210 : string::size_type begin_index, end_index;
211 0 : begin_index = 0;
212 :
213 0 : for (int i = 0; (i < pieces-1) || (pieces == 0); i++) {
214 0 : end_index = full.find_first_of(delim, begin_index);
215 0 : if (end_index == string::npos) {
216 0 : *result++ = full.substr(begin_index);
217 0 : return;
218 : }
219 0 : *result++ = full.substr(begin_index, (end_index - begin_index));
220 0 : begin_index = end_index + 1;
221 : }
222 0 : *result++ = full.substr(begin_index);
223 : }
224 :
225 0 : void SplitStringAllowEmpty(const string& full, const char* delim,
226 : vector<string>* result) {
227 0 : back_insert_iterator<vector<string> > it(*result);
228 0 : SplitStringToIteratorAllowEmpty(full, delim, 0, it);
229 0 : }
230 :
231 : // ----------------------------------------------------------------------
232 : // JoinStrings()
233 : // This merges a vector of string components with delim inserted
234 : // as separaters between components.
235 : //
236 : // ----------------------------------------------------------------------
237 : template <class ITERATOR>
238 0 : static void JoinStringsIterator(const ITERATOR& start,
239 : const ITERATOR& end,
240 : const char* delim,
241 : string* result) {
242 0 : GOOGLE_CHECK(result != NULL);
243 0 : result->clear();
244 0 : int delim_length = strlen(delim);
245 :
246 : // Precompute resulting length so we can reserve() memory in one shot.
247 0 : int length = 0;
248 0 : for (ITERATOR iter = start; iter != end; ++iter) {
249 0 : if (iter != start) {
250 0 : length += delim_length;
251 : }
252 0 : length += iter->size();
253 : }
254 0 : result->reserve(length);
255 :
256 : // Now combine everything.
257 0 : for (ITERATOR iter = start; iter != end; ++iter) {
258 0 : if (iter != start) {
259 0 : result->append(delim, delim_length);
260 : }
261 0 : result->append(iter->data(), iter->size());
262 : }
263 0 : }
264 :
265 0 : void JoinStrings(const vector<string>& components,
266 : const char* delim,
267 : string * result) {
268 0 : JoinStringsIterator(components.begin(), components.end(), delim, result);
269 0 : }
270 :
271 : // ----------------------------------------------------------------------
272 : // UnescapeCEscapeSequences()
273 : // This does all the unescaping that C does: \ooo, \r, \n, etc
274 : // Returns length of resulting string.
275 : // The implementation of \x parses any positive number of hex digits,
276 : // but it is an error if the value requires more than 8 bits, and the
277 : // result is truncated to 8 bits.
278 : //
279 : // The second call stores its errors in a supplied string vector.
280 : // If the string vector pointer is NULL, it reports the errors with LOG().
281 : // ----------------------------------------------------------------------
282 :
283 : #define IS_OCTAL_DIGIT(c) (((c) >= '0') && ((c) <= '7'))
284 :
285 0 : inline int hex_digit_to_int(char c) {
286 : /* Assume ASCII. */
287 : assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61);
288 0 : assert(isxdigit(c));
289 0 : int x = static_cast<unsigned char>(c);
290 0 : if (x > '9') {
291 0 : x += 9;
292 : }
293 0 : return x & 0xf;
294 : }
295 :
296 : // Protocol buffers doesn't ever care about errors, but I don't want to remove
297 : // the code.
298 : #define LOG_STRING(LEVEL, VECTOR) GOOGLE_LOG_IF(LEVEL, false)
299 :
300 0 : int UnescapeCEscapeSequences(const char* source, char* dest) {
301 0 : return UnescapeCEscapeSequences(source, dest, NULL);
302 : }
303 :
304 0 : int UnescapeCEscapeSequences(const char* source, char* dest,
305 : vector<string> *errors) {
306 0 : GOOGLE_DCHECK(errors == NULL) << "Error reporting not implemented.";
307 :
308 0 : char* d = dest;
309 0 : const char* p = source;
310 :
311 : // Small optimization for case where source = dest and there's no escaping
312 0 : while ( p == d && *p != '\0' && *p != '\\' )
313 0 : p++, d++;
314 :
315 0 : while (*p != '\0') {
316 0 : if (*p != '\\') {
317 0 : *d++ = *p++;
318 : } else {
319 0 : switch ( *++p ) { // skip past the '\\'
320 : case '\0':
321 0 : LOG_STRING(ERROR, errors) << "String cannot end with \\";
322 0 : *d = '\0';
323 0 : return d - dest; // we're done with p
324 0 : case 'a': *d++ = '\a'; break;
325 0 : case 'b': *d++ = '\b'; break;
326 0 : case 'f': *d++ = '\f'; break;
327 0 : case 'n': *d++ = '\n'; break;
328 0 : case 'r': *d++ = '\r'; break;
329 0 : case 't': *d++ = '\t'; break;
330 0 : case 'v': *d++ = '\v'; break;
331 0 : case '\\': *d++ = '\\'; break;
332 0 : case '?': *d++ = '\?'; break; // \? Who knew?
333 0 : case '\'': *d++ = '\''; break;
334 0 : case '"': *d++ = '\"'; break;
335 : case '0': case '1': case '2': case '3': // octal digit: 1 to 3 digits
336 : case '4': case '5': case '6': case '7': {
337 0 : char ch = *p - '0';
338 0 : if ( IS_OCTAL_DIGIT(p[1]) )
339 0 : ch = ch * 8 + *++p - '0';
340 0 : if ( IS_OCTAL_DIGIT(p[1]) ) // safe (and easy) to do this twice
341 0 : ch = ch * 8 + *++p - '0'; // now points at last digit
342 0 : *d++ = ch;
343 0 : break;
344 : }
345 : case 'x': case 'X': {
346 0 : if (!isxdigit(p[1])) {
347 0 : if (p[1] == '\0') {
348 0 : LOG_STRING(ERROR, errors) << "String cannot end with \\x";
349 : } else {
350 0 : LOG_STRING(ERROR, errors) <<
351 0 : "\\x cannot be followed by non-hex digit: \\" << *p << p[1];
352 : }
353 0 : break;
354 : }
355 0 : unsigned int ch = 0;
356 0 : const char *hex_start = p;
357 0 : while (isxdigit(p[1])) // arbitrarily many hex digits
358 0 : ch = (ch << 4) + hex_digit_to_int(*++p);
359 0 : if (ch > 0xFF)
360 0 : LOG_STRING(ERROR, errors) << "Value of " <<
361 0 : "\\" << string(hex_start, p+1-hex_start) << " exceeds 8 bits";
362 0 : *d++ = ch;
363 0 : break;
364 : }
365 : #if 0 // TODO(kenton): Support \u and \U? Requires runetochar().
366 : case 'u': {
367 : // \uhhhh => convert 4 hex digits to UTF-8
368 : char32 rune = 0;
369 : const char *hex_start = p;
370 : for (int i = 0; i < 4; ++i) {
371 : if (isxdigit(p[1])) { // Look one char ahead.
372 : rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p.
373 : } else {
374 : LOG_STRING(ERROR, errors)
375 : << "\\u must be followed by 4 hex digits: \\"
376 : << string(hex_start, p+1-hex_start);
377 : break;
378 : }
379 : }
380 : d += runetochar(d, &rune);
381 : break;
382 : }
383 : case 'U': {
384 : // \Uhhhhhhhh => convert 8 hex digits to UTF-8
385 : char32 rune = 0;
386 : const char *hex_start = p;
387 : for (int i = 0; i < 8; ++i) {
388 : if (isxdigit(p[1])) { // Look one char ahead.
389 : // Don't change rune until we're sure this
390 : // is within the Unicode limit, but do advance p.
391 : char32 newrune = (rune << 4) + hex_digit_to_int(*++p);
392 : if (newrune > 0x10FFFF) {
393 : LOG_STRING(ERROR, errors)
394 : << "Value of \\"
395 : << string(hex_start, p + 1 - hex_start)
396 : << " exceeds Unicode limit (0x10FFFF)";
397 : break;
398 : } else {
399 : rune = newrune;
400 : }
401 : } else {
402 : LOG_STRING(ERROR, errors)
403 : << "\\U must be followed by 8 hex digits: \\"
404 : << string(hex_start, p+1-hex_start);
405 : break;
406 : }
407 : }
408 : d += runetochar(d, &rune);
409 : break;
410 : }
411 : #endif
412 : default:
413 0 : LOG_STRING(ERROR, errors) << "Unknown escape sequence: \\" << *p;
414 : }
415 0 : p++; // read past letter we escaped
416 : }
417 : }
418 0 : *d = '\0';
419 0 : return d - dest;
420 : }
421 :
422 : // ----------------------------------------------------------------------
423 : // UnescapeCEscapeString()
424 : // This does the same thing as UnescapeCEscapeSequences, but creates
425 : // a new string. The caller does not need to worry about allocating
426 : // a dest buffer. This should be used for non performance critical
427 : // tasks such as printing debug messages. It is safe for src and dest
428 : // to be the same.
429 : //
430 : // The second call stores its errors in a supplied string vector.
431 : // If the string vector pointer is NULL, it reports the errors with LOG().
432 : //
433 : // In the first and second calls, the length of dest is returned. In the
434 : // the third call, the new string is returned.
435 : // ----------------------------------------------------------------------
436 0 : int UnescapeCEscapeString(const string& src, string* dest) {
437 0 : return UnescapeCEscapeString(src, dest, NULL);
438 : }
439 :
440 0 : int UnescapeCEscapeString(const string& src, string* dest,
441 : vector<string> *errors) {
442 0 : scoped_array<char> unescaped(new char[src.size() + 1]);
443 0 : int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), errors);
444 0 : GOOGLE_CHECK(dest);
445 0 : dest->assign(unescaped.get(), len);
446 0 : return len;
447 : }
448 :
449 0 : string UnescapeCEscapeString(const string& src) {
450 0 : scoped_array<char> unescaped(new char[src.size() + 1]);
451 0 : int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), NULL);
452 0 : return string(unescaped.get(), len);
453 : }
454 :
455 : // ----------------------------------------------------------------------
456 : // CEscapeString()
457 : // CHexEscapeString()
458 : // Copies 'src' to 'dest', escaping dangerous characters using
459 : // C-style escape sequences. This is very useful for preparing query
460 : // flags. 'src' and 'dest' should not overlap. The 'Hex' version uses
461 : // hexadecimal rather than octal sequences.
462 : // Returns the number of bytes written to 'dest' (not including the \0)
463 : // or -1 if there was insufficient space.
464 : //
465 : // Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped.
466 : // ----------------------------------------------------------------------
467 0 : int CEscapeInternal(const char* src, int src_len, char* dest,
468 : int dest_len, bool use_hex, bool utf8_safe) {
469 0 : const char* src_end = src + src_len;
470 0 : int used = 0;
471 0 : bool last_hex_escape = false; // true if last output char was \xNN
472 :
473 0 : for (; src < src_end; src++) {
474 0 : if (dest_len - used < 2) // Need space for two letter escape
475 0 : return -1;
476 :
477 0 : bool is_hex_escape = false;
478 0 : switch (*src) {
479 0 : case '\n': dest[used++] = '\\'; dest[used++] = 'n'; break;
480 0 : case '\r': dest[used++] = '\\'; dest[used++] = 'r'; break;
481 0 : case '\t': dest[used++] = '\\'; dest[used++] = 't'; break;
482 0 : case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break;
483 0 : case '\'': dest[used++] = '\\'; dest[used++] = '\''; break;
484 0 : case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break;
485 : default:
486 : // Note that if we emit \xNN and the src character after that is a hex
487 : // digit then that digit must be escaped too to prevent it being
488 : // interpreted as part of the character code by C.
489 0 : if ((!utf8_safe || static_cast<uint8>(*src) < 0x80) &&
490 0 : (!isprint(*src) ||
491 0 : (last_hex_escape && isxdigit(*src)))) {
492 0 : if (dest_len - used < 4) // need space for 4 letter escape
493 0 : return -1;
494 0 : sprintf(dest + used, (use_hex ? "\\x%02x" : "\\%03o"),
495 0 : static_cast<uint8>(*src));
496 0 : is_hex_escape = use_hex;
497 0 : used += 4;
498 : } else {
499 0 : dest[used++] = *src; break;
500 : }
501 : }
502 0 : last_hex_escape = is_hex_escape;
503 : }
504 :
505 0 : if (dest_len - used < 1) // make sure that there is room for \0
506 0 : return -1;
507 :
508 0 : dest[used] = '\0'; // doesn't count towards return value though
509 0 : return used;
510 : }
511 :
512 0 : int CEscapeString(const char* src, int src_len, char* dest, int dest_len) {
513 0 : return CEscapeInternal(src, src_len, dest, dest_len, false, false);
514 : }
515 :
516 : // ----------------------------------------------------------------------
517 : // CEscape()
518 : // CHexEscape()
519 : // Copies 'src' to result, escaping dangerous characters using
520 : // C-style escape sequences. This is very useful for preparing query
521 : // flags. 'src' and 'dest' should not overlap. The 'Hex' version
522 : // hexadecimal rather than octal sequences.
523 : //
524 : // Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped.
525 : // ----------------------------------------------------------------------
526 0 : string CEscape(const string& src) {
527 0 : const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
528 0 : scoped_array<char> dest(new char[dest_length]);
529 0 : const int len = CEscapeInternal(src.data(), src.size(),
530 0 : dest.get(), dest_length, false, false);
531 0 : GOOGLE_DCHECK_GE(len, 0);
532 0 : return string(dest.get(), len);
533 : }
534 :
535 : namespace strings {
536 :
537 0 : string Utf8SafeCEscape(const string& src) {
538 0 : const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
539 0 : scoped_array<char> dest(new char[dest_length]);
540 0 : const int len = CEscapeInternal(src.data(), src.size(),
541 0 : dest.get(), dest_length, false, true);
542 0 : GOOGLE_DCHECK_GE(len, 0);
543 0 : return string(dest.get(), len);
544 : }
545 :
546 0 : string CHexEscape(const string& src) {
547 0 : const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
548 0 : scoped_array<char> dest(new char[dest_length]);
549 0 : const int len = CEscapeInternal(src.data(), src.size(),
550 0 : dest.get(), dest_length, true, false);
551 0 : GOOGLE_DCHECK_GE(len, 0);
552 0 : return string(dest.get(), len);
553 : }
554 :
555 : } // namespace strings
556 :
557 : // ----------------------------------------------------------------------
558 : // strto32_adaptor()
559 : // strtou32_adaptor()
560 : // Implementation of strto[u]l replacements that have identical
561 : // overflow and underflow characteristics for both ILP-32 and LP-64
562 : // platforms, including errno preservation in error-free calls.
563 : // ----------------------------------------------------------------------
564 :
565 0 : int32 strto32_adaptor(const char *nptr, char **endptr, int base) {
566 0 : const int saved_errno = errno;
567 0 : errno = 0;
568 0 : const long result = strtol(nptr, endptr, base);
569 0 : if (errno == ERANGE && result == LONG_MIN) {
570 0 : return kint32min;
571 0 : } else if (errno == ERANGE && result == LONG_MAX) {
572 0 : return kint32max;
573 0 : } else if (errno == 0 && result < kint32min) {
574 0 : errno = ERANGE;
575 0 : return kint32min;
576 0 : } else if (errno == 0 && result > kint32max) {
577 0 : errno = ERANGE;
578 0 : return kint32max;
579 : }
580 0 : if (errno == 0)
581 0 : errno = saved_errno;
582 0 : return static_cast<int32>(result);
583 : }
584 :
585 0 : uint32 strtou32_adaptor(const char *nptr, char **endptr, int base) {
586 0 : const int saved_errno = errno;
587 0 : errno = 0;
588 0 : const unsigned long result = strtoul(nptr, endptr, base);
589 0 : if (errno == ERANGE && result == ULONG_MAX) {
590 0 : return kuint32max;
591 0 : } else if (errno == 0 && result > kuint32max) {
592 0 : errno = ERANGE;
593 0 : return kuint32max;
594 : }
595 0 : if (errno == 0)
596 0 : errno = saved_errno;
597 0 : return static_cast<uint32>(result);
598 : }
599 :
600 0 : inline bool safe_parse_sign(string* text /*inout*/,
601 : bool* negative_ptr /*output*/) {
602 0 : const char* start = text->data();
603 0 : const char* end = start + text->size();
604 :
605 : // Consume whitespace.
606 0 : while (start < end && (start[0] == ' ')) {
607 0 : ++start;
608 : }
609 0 : while (start < end && (end[-1] == ' ')) {
610 0 : --end;
611 : }
612 0 : if (start >= end) {
613 0 : return false;
614 : }
615 :
616 : // Consume sign.
617 0 : *negative_ptr = (start[0] == '-');
618 0 : if (*negative_ptr || start[0] == '+') {
619 0 : ++start;
620 0 : if (start >= end) {
621 0 : return false;
622 : }
623 : }
624 0 : *text = text->substr(start - text->data(), end - start);
625 0 : return true;
626 : }
627 :
628 0 : inline bool safe_parse_positive_int(
629 : string text, int32* value_p) {
630 0 : int base = 10;
631 0 : int32 value = 0;
632 0 : const int32 vmax = std::numeric_limits<int32>::max();
633 : assert(vmax > 0);
634 : assert(vmax >= base);
635 0 : const int32 vmax_over_base = vmax / base;
636 0 : const char* start = text.data();
637 0 : const char* end = start + text.size();
638 : // loop over digits
639 0 : for (; start < end; ++start) {
640 0 : unsigned char c = static_cast<unsigned char>(start[0]);
641 0 : int digit = c - '0';
642 0 : if (digit >= base || digit < 0) {
643 0 : *value_p = value;
644 0 : return false;
645 : }
646 0 : if (value > vmax_over_base) {
647 0 : *value_p = vmax;
648 0 : return false;
649 : }
650 0 : value *= base;
651 0 : if (value > vmax - digit) {
652 0 : *value_p = vmax;
653 0 : return false;
654 : }
655 0 : value += digit;
656 : }
657 0 : *value_p = value;
658 0 : return true;
659 : }
660 :
661 0 : inline bool safe_parse_negative_int(
662 : string text, int32* value_p) {
663 0 : int base = 10;
664 0 : int32 value = 0;
665 0 : const int32 vmin = std::numeric_limits<int32>::min();
666 : assert(vmin < 0);
667 : assert(vmin <= 0 - base);
668 0 : int32 vmin_over_base = vmin / base;
669 : // 2003 c++ standard [expr.mul]
670 : // "... the sign of the remainder is implementation-defined."
671 : // Although (vmin/base)*base + vmin%base is always vmin.
672 : // 2011 c++ standard tightens the spec but we cannot rely on it.
673 0 : if (vmin % base > 0) {
674 0 : vmin_over_base += 1;
675 : }
676 0 : const char* start = text.data();
677 0 : const char* end = start + text.size();
678 : // loop over digits
679 0 : for (; start < end; ++start) {
680 0 : unsigned char c = static_cast<unsigned char>(start[0]);
681 0 : int digit = c - '0';
682 0 : if (digit >= base || digit < 0) {
683 0 : *value_p = value;
684 0 : return false;
685 : }
686 0 : if (value < vmin_over_base) {
687 0 : *value_p = vmin;
688 0 : return false;
689 : }
690 0 : value *= base;
691 0 : if (value < vmin + digit) {
692 0 : *value_p = vmin;
693 0 : return false;
694 : }
695 0 : value -= digit;
696 : }
697 0 : *value_p = value;
698 0 : return true;
699 : }
700 :
701 0 : bool safe_int(string text, int32* value_p) {
702 0 : *value_p = 0;
703 : bool negative;
704 0 : if (!safe_parse_sign(&text, &negative)) {
705 0 : return false;
706 : }
707 0 : if (!negative) {
708 0 : return safe_parse_positive_int(text, value_p);
709 : } else {
710 0 : return safe_parse_negative_int(text, value_p);
711 : }
712 : }
713 :
714 : // ----------------------------------------------------------------------
715 : // FastIntToBuffer()
716 : // FastInt64ToBuffer()
717 : // FastHexToBuffer()
718 : // FastHex64ToBuffer()
719 : // FastHex32ToBuffer()
720 : // ----------------------------------------------------------------------
721 :
722 : // Offset into buffer where FastInt64ToBuffer places the end of string
723 : // null character. Also used by FastInt64ToBufferLeft.
724 : static const int kFastInt64ToBufferOffset = 21;
725 :
726 0 : char *FastInt64ToBuffer(int64 i, char* buffer) {
727 : // We could collapse the positive and negative sections, but that
728 : // would be slightly slower for positive numbers...
729 : // 22 bytes is enough to store -2**64, -18446744073709551616.
730 0 : char* p = buffer + kFastInt64ToBufferOffset;
731 0 : *p-- = '\0';
732 0 : if (i >= 0) {
733 0 : do {
734 0 : *p-- = '0' + i % 10;
735 0 : i /= 10;
736 0 : } while (i > 0);
737 0 : return p + 1;
738 : } else {
739 : // On different platforms, % and / have different behaviors for
740 : // negative numbers, so we need to jump through hoops to make sure
741 : // we don't divide negative numbers.
742 0 : if (i > -10) {
743 0 : i = -i;
744 0 : *p-- = '0' + i;
745 0 : *p = '-';
746 0 : return p;
747 : } else {
748 : // Make sure we aren't at MIN_INT, in which case we can't say i = -i
749 0 : i = i + 10;
750 0 : i = -i;
751 0 : *p-- = '0' + i % 10;
752 : // Undo what we did a moment ago
753 0 : i = i / 10 + 1;
754 0 : do {
755 0 : *p-- = '0' + i % 10;
756 0 : i /= 10;
757 0 : } while (i > 0);
758 0 : *p = '-';
759 0 : return p;
760 : }
761 : }
762 : }
763 :
764 : // Offset into buffer where FastInt32ToBuffer places the end of string
765 : // null character. Also used by FastInt32ToBufferLeft
766 : static const int kFastInt32ToBufferOffset = 11;
767 :
768 : // Yes, this is a duplicate of FastInt64ToBuffer. But, we need this for the
769 : // compiler to generate 32 bit arithmetic instructions. It's much faster, at
770 : // least with 32 bit binaries.
771 0 : char *FastInt32ToBuffer(int32 i, char* buffer) {
772 : // We could collapse the positive and negative sections, but that
773 : // would be slightly slower for positive numbers...
774 : // 12 bytes is enough to store -2**32, -4294967296.
775 0 : char* p = buffer + kFastInt32ToBufferOffset;
776 0 : *p-- = '\0';
777 0 : if (i >= 0) {
778 0 : do {
779 0 : *p-- = '0' + i % 10;
780 0 : i /= 10;
781 0 : } while (i > 0);
782 0 : return p + 1;
783 : } else {
784 : // On different platforms, % and / have different behaviors for
785 : // negative numbers, so we need to jump through hoops to make sure
786 : // we don't divide negative numbers.
787 0 : if (i > -10) {
788 0 : i = -i;
789 0 : *p-- = '0' + i;
790 0 : *p = '-';
791 0 : return p;
792 : } else {
793 : // Make sure we aren't at MIN_INT, in which case we can't say i = -i
794 0 : i = i + 10;
795 0 : i = -i;
796 0 : *p-- = '0' + i % 10;
797 : // Undo what we did a moment ago
798 0 : i = i / 10 + 1;
799 0 : do {
800 0 : *p-- = '0' + i % 10;
801 0 : i /= 10;
802 0 : } while (i > 0);
803 0 : *p = '-';
804 0 : return p;
805 : }
806 : }
807 : }
808 :
809 0 : char *FastHexToBuffer(int i, char* buffer) {
810 0 : GOOGLE_CHECK(i >= 0) << "FastHexToBuffer() wants non-negative integers, not " << i;
811 :
812 : static const char *hexdigits = "0123456789abcdef";
813 0 : char *p = buffer + 21;
814 0 : *p-- = '\0';
815 0 : do {
816 0 : *p-- = hexdigits[i & 15]; // mod by 16
817 0 : i >>= 4; // divide by 16
818 0 : } while (i > 0);
819 0 : return p + 1;
820 : }
821 :
822 0 : char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) {
823 : static const char *hexdigits = "0123456789abcdef";
824 0 : buffer[num_byte] = '\0';
825 0 : for (int i = num_byte - 1; i >= 0; i--) {
826 : #ifdef _M_X64
827 : // MSVC x64 platform has a bug optimizing the uint32(value) in the #else
828 : // block. Given that the uint32 cast was to improve performance on 32-bit
829 : // platforms, we use 64-bit '&' directly.
830 : buffer[i] = hexdigits[value & 0xf];
831 : #else
832 0 : buffer[i] = hexdigits[uint32(value) & 0xf];
833 : #endif
834 0 : value >>= 4;
835 : }
836 0 : return buffer;
837 : }
838 :
839 0 : char *FastHex64ToBuffer(uint64 value, char* buffer) {
840 0 : return InternalFastHexToBuffer(value, buffer, 16);
841 : }
842 :
843 0 : char *FastHex32ToBuffer(uint32 value, char* buffer) {
844 0 : return InternalFastHexToBuffer(value, buffer, 8);
845 : }
846 :
847 : static inline char* PlaceNum(char* p, int num, char prev_sep) {
848 : *p-- = '0' + num % 10;
849 : *p-- = '0' + num / 10;
850 : *p-- = prev_sep;
851 : return p;
852 : }
853 :
854 : // ----------------------------------------------------------------------
855 : // FastInt32ToBufferLeft()
856 : // FastUInt32ToBufferLeft()
857 : // FastInt64ToBufferLeft()
858 : // FastUInt64ToBufferLeft()
859 : //
860 : // Like the Fast*ToBuffer() functions above, these are intended for speed.
861 : // Unlike the Fast*ToBuffer() functions, however, these functions write
862 : // their output to the beginning of the buffer (hence the name, as the
863 : // output is left-aligned). The caller is responsible for ensuring that
864 : // the buffer has enough space to hold the output.
865 : //
866 : // Returns a pointer to the end of the string (i.e. the null character
867 : // terminating the string).
868 : // ----------------------------------------------------------------------
869 :
870 : static const char two_ASCII_digits[100][2] = {
871 : {'0','0'}, {'0','1'}, {'0','2'}, {'0','3'}, {'0','4'},
872 : {'0','5'}, {'0','6'}, {'0','7'}, {'0','8'}, {'0','9'},
873 : {'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'},
874 : {'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'},
875 : {'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'},
876 : {'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'},
877 : {'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'},
878 : {'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'},
879 : {'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'},
880 : {'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'},
881 : {'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'},
882 : {'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'},
883 : {'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'},
884 : {'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'},
885 : {'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'},
886 : {'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'},
887 : {'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'},
888 : {'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'},
889 : {'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'},
890 : {'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'}
891 : };
892 :
893 0 : char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
894 : int digits;
895 0 : const char *ASCII_digits = NULL;
896 : // The idea of this implementation is to trim the number of divides to as few
897 : // as possible by using multiplication and subtraction rather than mod (%),
898 : // and by outputting two digits at a time rather than one.
899 : // The huge-number case is first, in the hopes that the compiler will output
900 : // that case in one branch-free block of code, and only output conditional
901 : // branches into it from below.
902 0 : if (u >= 1000000000) { // >= 1,000,000,000
903 0 : digits = u / 100000000; // 100,000,000
904 0 : ASCII_digits = two_ASCII_digits[digits];
905 0 : buffer[0] = ASCII_digits[0];
906 0 : buffer[1] = ASCII_digits[1];
907 0 : buffer += 2;
908 : sublt100_000_000:
909 0 : u -= digits * 100000000; // 100,000,000
910 : lt100_000_000:
911 0 : digits = u / 1000000; // 1,000,000
912 0 : ASCII_digits = two_ASCII_digits[digits];
913 0 : buffer[0] = ASCII_digits[0];
914 0 : buffer[1] = ASCII_digits[1];
915 0 : buffer += 2;
916 : sublt1_000_000:
917 0 : u -= digits * 1000000; // 1,000,000
918 : lt1_000_000:
919 0 : digits = u / 10000; // 10,000
920 0 : ASCII_digits = two_ASCII_digits[digits];
921 0 : buffer[0] = ASCII_digits[0];
922 0 : buffer[1] = ASCII_digits[1];
923 0 : buffer += 2;
924 : sublt10_000:
925 0 : u -= digits * 10000; // 10,000
926 : lt10_000:
927 0 : digits = u / 100;
928 0 : ASCII_digits = two_ASCII_digits[digits];
929 0 : buffer[0] = ASCII_digits[0];
930 0 : buffer[1] = ASCII_digits[1];
931 0 : buffer += 2;
932 : sublt100:
933 0 : u -= digits * 100;
934 : lt100:
935 0 : digits = u;
936 0 : ASCII_digits = two_ASCII_digits[digits];
937 0 : buffer[0] = ASCII_digits[0];
938 0 : buffer[1] = ASCII_digits[1];
939 0 : buffer += 2;
940 : done:
941 0 : *buffer = 0;
942 0 : return buffer;
943 : }
944 :
945 0 : if (u < 100) {
946 0 : digits = u;
947 0 : if (u >= 10) goto lt100;
948 0 : *buffer++ = '0' + digits;
949 0 : goto done;
950 : }
951 0 : if (u < 10000) { // 10,000
952 0 : if (u >= 1000) goto lt10_000;
953 0 : digits = u / 100;
954 0 : *buffer++ = '0' + digits;
955 0 : goto sublt100;
956 : }
957 0 : if (u < 1000000) { // 1,000,000
958 0 : if (u >= 100000) goto lt1_000_000;
959 0 : digits = u / 10000; // 10,000
960 0 : *buffer++ = '0' + digits;
961 0 : goto sublt10_000;
962 : }
963 0 : if (u < 100000000) { // 100,000,000
964 0 : if (u >= 10000000) goto lt100_000_000;
965 0 : digits = u / 1000000; // 1,000,000
966 0 : *buffer++ = '0' + digits;
967 0 : goto sublt1_000_000;
968 : }
969 : // we already know that u < 1,000,000,000
970 0 : digits = u / 100000000; // 100,000,000
971 0 : *buffer++ = '0' + digits;
972 0 : goto sublt100_000_000;
973 : }
974 :
975 0 : char* FastInt32ToBufferLeft(int32 i, char* buffer) {
976 0 : uint32 u = i;
977 0 : if (i < 0) {
978 0 : *buffer++ = '-';
979 0 : u = -i;
980 : }
981 0 : return FastUInt32ToBufferLeft(u, buffer);
982 : }
983 :
984 0 : char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
985 : int digits;
986 0 : const char *ASCII_digits = NULL;
987 :
988 0 : uint32 u = static_cast<uint32>(u64);
989 0 : if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
990 :
991 0 : uint64 top_11_digits = u64 / 1000000000;
992 0 : buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
993 0 : u = u64 - (top_11_digits * 1000000000);
994 :
995 0 : digits = u / 10000000; // 10,000,000
996 0 : GOOGLE_DCHECK_LT(digits, 100);
997 0 : ASCII_digits = two_ASCII_digits[digits];
998 0 : buffer[0] = ASCII_digits[0];
999 0 : buffer[1] = ASCII_digits[1];
1000 0 : buffer += 2;
1001 0 : u -= digits * 10000000; // 10,000,000
1002 0 : digits = u / 100000; // 100,000
1003 0 : ASCII_digits = two_ASCII_digits[digits];
1004 0 : buffer[0] = ASCII_digits[0];
1005 0 : buffer[1] = ASCII_digits[1];
1006 0 : buffer += 2;
1007 0 : u -= digits * 100000; // 100,000
1008 0 : digits = u / 1000; // 1,000
1009 0 : ASCII_digits = two_ASCII_digits[digits];
1010 0 : buffer[0] = ASCII_digits[0];
1011 0 : buffer[1] = ASCII_digits[1];
1012 0 : buffer += 2;
1013 0 : u -= digits * 1000; // 1,000
1014 0 : digits = u / 10;
1015 0 : ASCII_digits = two_ASCII_digits[digits];
1016 0 : buffer[0] = ASCII_digits[0];
1017 0 : buffer[1] = ASCII_digits[1];
1018 0 : buffer += 2;
1019 0 : u -= digits * 10;
1020 0 : digits = u;
1021 0 : *buffer++ = '0' + digits;
1022 0 : *buffer = 0;
1023 0 : return buffer;
1024 : }
1025 :
1026 0 : char* FastInt64ToBufferLeft(int64 i, char* buffer) {
1027 0 : uint64 u = i;
1028 0 : if (i < 0) {
1029 0 : *buffer++ = '-';
1030 0 : u = -i;
1031 : }
1032 0 : return FastUInt64ToBufferLeft(u, buffer);
1033 : }
1034 :
1035 : // ----------------------------------------------------------------------
1036 : // SimpleItoa()
1037 : // Description: converts an integer to a string.
1038 : //
1039 : // Return value: string
1040 : // ----------------------------------------------------------------------
1041 :
1042 0 : string SimpleItoa(int i) {
1043 : char buffer[kFastToBufferSize];
1044 0 : return (sizeof(i) == 4) ?
1045 : FastInt32ToBuffer(i, buffer) :
1046 0 : FastInt64ToBuffer(i, buffer);
1047 : }
1048 :
1049 0 : string SimpleItoa(unsigned int i) {
1050 : char buffer[kFastToBufferSize];
1051 : return string(buffer, (sizeof(i) == 4) ?
1052 : FastUInt32ToBufferLeft(i, buffer) :
1053 0 : FastUInt64ToBufferLeft(i, buffer));
1054 : }
1055 :
1056 0 : string SimpleItoa(long i) {
1057 : char buffer[kFastToBufferSize];
1058 0 : return (sizeof(i) == 4) ?
1059 : FastInt32ToBuffer(i, buffer) :
1060 0 : FastInt64ToBuffer(i, buffer);
1061 : }
1062 :
1063 0 : string SimpleItoa(unsigned long i) {
1064 : char buffer[kFastToBufferSize];
1065 : return string(buffer, (sizeof(i) == 4) ?
1066 : FastUInt32ToBufferLeft(i, buffer) :
1067 0 : FastUInt64ToBufferLeft(i, buffer));
1068 : }
1069 :
1070 0 : string SimpleItoa(long long i) {
1071 : char buffer[kFastToBufferSize];
1072 0 : return (sizeof(i) == 4) ?
1073 : FastInt32ToBuffer(i, buffer) :
1074 0 : FastInt64ToBuffer(i, buffer);
1075 : }
1076 :
1077 0 : string SimpleItoa(unsigned long long i) {
1078 : char buffer[kFastToBufferSize];
1079 : return string(buffer, (sizeof(i) == 4) ?
1080 : FastUInt32ToBufferLeft(i, buffer) :
1081 0 : FastUInt64ToBufferLeft(i, buffer));
1082 : }
1083 :
1084 : // ----------------------------------------------------------------------
1085 : // SimpleDtoa()
1086 : // SimpleFtoa()
1087 : // DoubleToBuffer()
1088 : // FloatToBuffer()
1089 : // We want to print the value without losing precision, but we also do
1090 : // not want to print more digits than necessary. This turns out to be
1091 : // trickier than it sounds. Numbers like 0.2 cannot be represented
1092 : // exactly in binary. If we print 0.2 with a very large precision,
1093 : // e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
1094 : // On the other hand, if we set the precision too low, we lose
1095 : // significant digits when printing numbers that actually need them.
1096 : // It turns out there is no precision value that does the right thing
1097 : // for all numbers.
1098 : //
1099 : // Our strategy is to first try printing with a precision that is never
1100 : // over-precise, then parse the result with strtod() to see if it
1101 : // matches. If not, we print again with a precision that will always
1102 : // give a precise result, but may use more digits than necessary.
1103 : //
1104 : // An arguably better strategy would be to use the algorithm described
1105 : // in "How to Print Floating-Point Numbers Accurately" by Steele &
1106 : // White, e.g. as implemented by David M. Gay's dtoa(). It turns out,
1107 : // however, that the following implementation is about as fast as
1108 : // DMG's code. Furthermore, DMG's code locks mutexes, which means it
1109 : // will not scale well on multi-core machines. DMG's code is slightly
1110 : // more accurate (in that it will never use more digits than
1111 : // necessary), but this is probably irrelevant for most users.
1112 : //
1113 : // Rob Pike and Ken Thompson also have an implementation of dtoa() in
1114 : // third_party/fmt/fltfmt.cc. Their implementation is similar to this
1115 : // one in that it makes guesses and then uses strtod() to check them.
1116 : // Their implementation is faster because they use their own code to
1117 : // generate the digits in the first place rather than use snprintf(),
1118 : // thus avoiding format string parsing overhead. However, this makes
1119 : // it considerably more complicated than the following implementation,
1120 : // and it is embedded in a larger library. If speed turns out to be
1121 : // an issue, we could re-implement this in terms of their
1122 : // implementation.
1123 : // ----------------------------------------------------------------------
1124 :
1125 0 : string SimpleDtoa(double value) {
1126 : char buffer[kDoubleToBufferSize];
1127 0 : return DoubleToBuffer(value, buffer);
1128 : }
1129 :
1130 0 : string SimpleFtoa(float value) {
1131 : char buffer[kFloatToBufferSize];
1132 0 : return FloatToBuffer(value, buffer);
1133 : }
1134 :
1135 0 : static inline bool IsValidFloatChar(char c) {
1136 0 : return ('0' <= c && c <= '9') ||
1137 0 : c == 'e' || c == 'E' ||
1138 0 : c == '+' || c == '-';
1139 : }
1140 :
1141 0 : void DelocalizeRadix(char* buffer) {
1142 : // Fast check: if the buffer has a normal decimal point, assume no
1143 : // translation is needed.
1144 0 : if (strchr(buffer, '.') != NULL) return;
1145 :
1146 : // Find the first unknown character.
1147 0 : while (IsValidFloatChar(*buffer)) ++buffer;
1148 :
1149 0 : if (*buffer == '\0') {
1150 : // No radix character found.
1151 0 : return;
1152 : }
1153 :
1154 : // We are now pointing at the locale-specific radix character. Replace it
1155 : // with '.'.
1156 0 : *buffer = '.';
1157 0 : ++buffer;
1158 :
1159 0 : if (!IsValidFloatChar(*buffer) && *buffer != '\0') {
1160 : // It appears the radix was a multi-byte character. We need to remove the
1161 : // extra bytes.
1162 0 : char* target = buffer;
1163 0 : do { ++buffer; } while (!IsValidFloatChar(*buffer) && *buffer != '\0');
1164 0 : memmove(target, buffer, strlen(buffer) + 1);
1165 : }
1166 : }
1167 :
1168 0 : char* DoubleToBuffer(double value, char* buffer) {
1169 : // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
1170 : // platforms these days. Just in case some system exists where DBL_DIG
1171 : // is significantly larger -- and risks overflowing our buffer -- we have
1172 : // this assert.
1173 : GOOGLE_COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
1174 :
1175 0 : if (value == numeric_limits<double>::infinity()) {
1176 0 : strcpy(buffer, "inf");
1177 0 : return buffer;
1178 0 : } else if (value == -numeric_limits<double>::infinity()) {
1179 0 : strcpy(buffer, "-inf");
1180 0 : return buffer;
1181 0 : } else if (IsNaN(value)) {
1182 0 : strcpy(buffer, "nan");
1183 0 : return buffer;
1184 : }
1185 :
1186 : int snprintf_result =
1187 0 : snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value);
1188 :
1189 : // The snprintf should never overflow because the buffer is significantly
1190 : // larger than the precision we asked for.
1191 0 : GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
1192 :
1193 : // We need to make parsed_value volatile in order to force the compiler to
1194 : // write it out to the stack. Otherwise, it may keep the value in a
1195 : // register, and if it does that, it may keep it as a long double instead
1196 : // of a double. This long double may have extra bits that make it compare
1197 : // unequal to "value" even though it would be exactly equal if it were
1198 : // truncated to a double.
1199 0 : volatile double parsed_value = strtod(buffer, NULL);
1200 0 : if (parsed_value != value) {
1201 : int snprintf_result =
1202 0 : snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value);
1203 :
1204 : // Should never overflow; see above.
1205 0 : GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
1206 : }
1207 :
1208 0 : DelocalizeRadix(buffer);
1209 0 : return buffer;
1210 : }
1211 :
1212 0 : bool safe_strtof(const char* str, float* value) {
1213 : char* endptr;
1214 0 : errno = 0; // errno only gets set on errors
1215 : #if defined(_WIN32) || defined (__hpux) // has no strtof()
1216 : *value = strtod(str, &endptr);
1217 : #else
1218 0 : *value = strtof(str, &endptr);
1219 : #endif
1220 0 : return *str != 0 && *endptr == 0 && errno == 0;
1221 : }
1222 :
1223 0 : char* FloatToBuffer(float value, char* buffer) {
1224 : // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
1225 : // platforms these days. Just in case some system exists where FLT_DIG
1226 : // is significantly larger -- and risks overflowing our buffer -- we have
1227 : // this assert.
1228 : GOOGLE_COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
1229 :
1230 0 : if (value == numeric_limits<double>::infinity()) {
1231 0 : strcpy(buffer, "inf");
1232 0 : return buffer;
1233 0 : } else if (value == -numeric_limits<double>::infinity()) {
1234 0 : strcpy(buffer, "-inf");
1235 0 : return buffer;
1236 0 : } else if (IsNaN(value)) {
1237 0 : strcpy(buffer, "nan");
1238 0 : return buffer;
1239 : }
1240 :
1241 : int snprintf_result =
1242 0 : snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value);
1243 :
1244 : // The snprintf should never overflow because the buffer is significantly
1245 : // larger than the precision we asked for.
1246 0 : GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1247 :
1248 : float parsed_value;
1249 0 : if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
1250 : int snprintf_result =
1251 0 : snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG+2, value);
1252 :
1253 : // Should never overflow; see above.
1254 0 : GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1255 : }
1256 :
1257 0 : DelocalizeRadix(buffer);
1258 0 : return buffer;
1259 : }
1260 :
1261 0 : string ToHex(uint64 num) {
1262 0 : if (num == 0) {
1263 0 : return string("0");
1264 : }
1265 :
1266 : // Compute hex bytes in reverse order, writing to the back of the
1267 : // buffer.
1268 : char buf[16]; // No more than 16 hex digits needed.
1269 0 : char* bufptr = buf + 16;
1270 : static const char kHexChars[] = "0123456789abcdef";
1271 0 : while (num != 0) {
1272 0 : *--bufptr = kHexChars[num & 0xf];
1273 0 : num >>= 4;
1274 : }
1275 :
1276 0 : return string(bufptr, buf + 16 - bufptr);
1277 : }
1278 :
1279 : } // namespace protobuf
1280 : } // namespace google
|