Line data Source code
1 : //========= Copyright Valve Corporation ============//
2 : #include "strtools_public.h"
3 : #include <string.h>
4 : #include <stdio.h>
5 : #include <stdlib.h>
6 :
7 : //-----------------------------------------------------------------------------
8 : // Purpose:
9 : //-----------------------------------------------------------------------------
10 0 : bool StringHasPrefix( const std::string & sString, const std::string & sPrefix )
11 : {
12 0 : return 0 == strnicmp( sString.c_str(), sPrefix.c_str(), sPrefix.length() );
13 : }
14 :
15 0 : bool StringHasPrefixCaseSensitive( const std::string & sString, const std::string & sPrefix )
16 : {
17 0 : return 0 == strncmp( sString.c_str(), sPrefix.c_str(), sPrefix.length() );
18 : }
19 :
20 :
21 0 : bool StringHasSuffix( const std::string &sString, const std::string &sSuffix )
22 : {
23 0 : size_t cStrLen = sString.length();
24 0 : size_t cSuffixLen = sSuffix.length();
25 :
26 0 : if ( cSuffixLen > cStrLen )
27 0 : return false;
28 :
29 0 : std::string sStringSuffix = sString.substr( cStrLen - cSuffixLen, cSuffixLen );
30 :
31 0 : return 0 == stricmp( sStringSuffix.c_str(), sSuffix.c_str() );
32 : }
33 :
34 0 : bool StringHasSuffixCaseSensitive( const std::string &sString, const std::string &sSuffix )
35 : {
36 0 : size_t cStrLen = sString.length();
37 0 : size_t cSuffixLen = sSuffix.length();
38 :
39 0 : if ( cSuffixLen > cStrLen )
40 0 : return false;
41 :
42 0 : std::string sStringSuffix = sString.substr( cStrLen - cSuffixLen, cSuffixLen );
43 :
44 0 : return 0 == strncmp( sStringSuffix.c_str(), sSuffix.c_str(),cSuffixLen );
45 : }
46 :
47 : //-----------------------------------------------------------------------------
48 : // Purpose:
49 : //-----------------------------------------------------------------------------
50 0 : std::string UTF16to8(const wchar_t * in)
51 : {
52 0 : std::string out;
53 0 : unsigned int codepoint = 0;
54 0 : for ( ; in && *in != 0; ++in )
55 : {
56 0 : if (*in >= 0xd800 && *in <= 0xdbff)
57 0 : codepoint = ((*in - 0xd800) << 10) + 0x10000;
58 : else
59 : {
60 0 : if (*in >= 0xdc00 && *in <= 0xdfff)
61 0 : codepoint |= *in - 0xdc00;
62 : else
63 0 : codepoint = *in;
64 :
65 0 : if (codepoint <= 0x7f)
66 0 : out.append(1, static_cast<char>(codepoint));
67 0 : else if (codepoint <= 0x7ff)
68 : {
69 0 : out.append(1, static_cast<char>(0xc0 | ((codepoint >> 6) & 0x1f)));
70 0 : out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
71 : }
72 0 : else if (codepoint <= 0xffff)
73 : {
74 0 : out.append(1, static_cast<char>(0xe0 | ((codepoint >> 12) & 0x0f)));
75 0 : out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
76 0 : out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
77 : }
78 : else
79 : {
80 0 : out.append(1, static_cast<char>(0xf0 | ((codepoint >> 18) & 0x07)));
81 0 : out.append(1, static_cast<char>(0x80 | ((codepoint >> 12) & 0x3f)));
82 0 : out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
83 0 : out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
84 : }
85 0 : codepoint = 0;
86 : }
87 : }
88 0 : return out;
89 : }
90 :
91 0 : std::wstring UTF8to16(const char * in)
92 : {
93 0 : std::wstring out;
94 0 : unsigned int codepoint = 0;
95 0 : int following = 0;
96 0 : for ( ; in && *in != 0; ++in )
97 : {
98 0 : unsigned char ch = *in;
99 0 : if (ch <= 0x7f)
100 : {
101 0 : codepoint = ch;
102 0 : following = 0;
103 : }
104 0 : else if (ch <= 0xbf)
105 : {
106 0 : if (following > 0)
107 : {
108 0 : codepoint = (codepoint << 6) | (ch & 0x3f);
109 0 : --following;
110 : }
111 : }
112 0 : else if (ch <= 0xdf)
113 : {
114 0 : codepoint = ch & 0x1f;
115 0 : following = 1;
116 : }
117 0 : else if (ch <= 0xef)
118 : {
119 0 : codepoint = ch & 0x0f;
120 0 : following = 2;
121 : }
122 : else
123 : {
124 0 : codepoint = ch & 0x07;
125 0 : following = 3;
126 : }
127 0 : if (following == 0)
128 : {
129 0 : if (codepoint > 0xffff)
130 : {
131 0 : out.append(1, static_cast<wchar_t>(0xd800 + (codepoint >> 10)));
132 0 : out.append(1, static_cast<wchar_t>(0xdc00 + (codepoint & 0x03ff)));
133 : }
134 : else
135 0 : out.append(1, static_cast<wchar_t>(codepoint));
136 0 : codepoint = 0;
137 : }
138 : }
139 0 : return out;
140 : }
141 :
142 :
143 0 : void strcpy_safe( char *pchBuffer, size_t unBufferSizeBytes, const char *pchSource )
144 : {
145 0 : pchBuffer[ unBufferSizeBytes - 1 ] = '\0';
146 0 : strncpy( pchBuffer, pchSource, unBufferSizeBytes - 1 );
147 0 : }
148 :
149 :
150 : // --------------------------------------------------------------------
151 : // Purpose: converts a string to upper case
152 : // --------------------------------------------------------------------
153 0 : std::string StringToUpper( const std::string & sString )
154 : {
155 0 : std::string sOut;
156 0 : sOut.reserve( sString.size() + 1 );
157 0 : for( std::string::const_iterator i = sString.begin(); i != sString.end(); i++ )
158 : {
159 0 : sOut.push_back( (char)toupper( *i ) );
160 : }
161 :
162 0 : return sOut;
163 : }
164 :
165 :
166 : // --------------------------------------------------------------------
167 : // Purpose: converts a string to lower case
168 : // --------------------------------------------------------------------
169 0 : std::string StringToLower( const std::string & sString )
170 : {
171 0 : std::string sOut;
172 0 : sOut.reserve( sString.size() + 1 );
173 0 : for( std::string::const_iterator i = sString.begin(); i != sString.end(); i++ )
174 : {
175 0 : sOut.push_back( (char)tolower( *i ) );
176 : }
177 :
178 0 : return sOut;
179 : }
180 :
181 :
182 0 : uint32_t ReturnStdString( const std::string & sValue, char *pchBuffer, uint32_t unBufferLen )
183 : {
184 0 : uint32_t unLen = (uint32_t)sValue.length() + 1;
185 0 : if( !pchBuffer || !unBufferLen )
186 0 : return unLen;
187 :
188 0 : if( unBufferLen < unLen )
189 : {
190 0 : pchBuffer[0] = '\0';
191 : }
192 : else
193 : {
194 0 : memcpy( pchBuffer, sValue.c_str(), unLen );
195 : }
196 :
197 0 : return unLen;
198 : }
199 :
200 0 : void BufferToStdString( std::string & sDest, const char *pchBuffer, uint32_t unBufferLen )
201 : {
202 0 : sDest.resize( unBufferLen + 1 );
203 0 : memcpy( const_cast< char* >( sDest.c_str() ), pchBuffer, unBufferLen );
204 0 : const_cast< char* >( sDest.c_str() )[ unBufferLen ] = '\0';
205 0 : }
206 :
207 : // Commented out by Mozilla, please see README.mozilla
208 : /** Returns a std::string from a uint64_t */
209 : /*
210 : std::string Uint64ToString( uint64_t ulValue )
211 : {
212 : char buf[ 22 ];
213 : #if defined( _WIN32 )
214 : sprintf_s( buf, "%llu", ulValue );
215 : #else
216 : snprintf( buf, sizeof( buf ), "%llu", (long long unsigned int ) ulValue );
217 : #endif
218 : return buf;
219 : }
220 : */
221 :
222 : /** returns a uint64_t from a string */
223 0 : uint64_t StringToUint64( const std::string & sValue )
224 : {
225 0 : return strtoull( sValue.c_str(), NULL, 0 );
226 : }
227 :
228 : //-----------------------------------------------------------------------------
229 : // Purpose: Helper for converting a numeric value to a hex digit, value should be 0-15.
230 : //-----------------------------------------------------------------------------
231 0 : char cIntToHexDigit( int nValue )
232 : {
233 : //Assert( nValue >= 0 && nValue <= 15 );
234 0 : return "0123456789ABCDEF"[ nValue & 15 ];
235 : }
236 :
237 : //-----------------------------------------------------------------------------
238 : // Purpose: Helper for converting a hex char value to numeric, return -1 if the char
239 : // is not a valid hex digit.
240 : //-----------------------------------------------------------------------------
241 0 : int iHexCharToInt( char cValue )
242 : {
243 0 : int32_t iValue = cValue;
244 0 : if ( (uint32_t)( iValue - '0' ) < 10 )
245 0 : return iValue - '0';
246 :
247 0 : iValue |= 0x20;
248 0 : if ( (uint32_t)( iValue - 'a' ) < 6 )
249 0 : return iValue - 'a' + 10;
250 :
251 0 : return -1;
252 : }
253 :
254 : //-----------------------------------------------------------------------------
255 : // Purpose: Internal implementation of encode, works in the strict RFC manner, or
256 : // with spaces turned to + like HTML form encoding.
257 : //-----------------------------------------------------------------------------
258 0 : void V_URLEncodeInternal( char *pchDest, int nDestLen, const char *pchSource, int nSourceLen, bool bUsePlusForSpace )
259 : {
260 : //AssertMsg( nDestLen > 3*nSourceLen, "Target buffer for V_URLEncode should be 3x source length, plus one for terminating null\n" );
261 :
262 0 : int iDestPos = 0;
263 0 : for ( int i=0; i < nSourceLen; ++i )
264 : {
265 : // worst case we need 3 additional chars
266 0 : if( (iDestPos+3) > nDestLen )
267 : {
268 0 : pchDest[0] = '\0';
269 : // AssertMsg( false, "Target buffer too short\n" );
270 0 : return;
271 : }
272 :
273 : // We allow only a-z, A-Z, 0-9, period, underscore, and hyphen to pass through unescaped.
274 : // These are the characters allowed by both the original RFC 1738 and the latest RFC 3986.
275 : // Current specs also allow '~', but that is forbidden under original RFC 1738.
276 0 : if ( !( pchSource[i] >= 'a' && pchSource[i] <= 'z' ) && !( pchSource[i] >= 'A' && pchSource[i] <= 'Z' ) && !(pchSource[i] >= '0' && pchSource[i] <= '9' )
277 0 : && pchSource[i] != '-' && pchSource[i] != '_' && pchSource[i] != '.'
278 : )
279 : {
280 0 : if ( bUsePlusForSpace && pchSource[i] == ' ' )
281 : {
282 0 : pchDest[iDestPos++] = '+';
283 : }
284 : else
285 : {
286 0 : pchDest[iDestPos++] = '%';
287 0 : uint8_t iValue = pchSource[i];
288 0 : if ( iValue == 0 )
289 : {
290 0 : pchDest[iDestPos++] = '0';
291 0 : pchDest[iDestPos++] = '0';
292 : }
293 : else
294 : {
295 0 : char cHexDigit1 = cIntToHexDigit( iValue % 16 );
296 0 : iValue /= 16;
297 0 : char cHexDigit2 = cIntToHexDigit( iValue );
298 0 : pchDest[iDestPos++] = cHexDigit2;
299 0 : pchDest[iDestPos++] = cHexDigit1;
300 : }
301 0 : }
302 : }
303 : else
304 : {
305 0 : pchDest[iDestPos++] = pchSource[i];
306 : }
307 : }
308 :
309 0 : if( (iDestPos+1) > nDestLen )
310 : {
311 0 : pchDest[0] = '\0';
312 : //AssertMsg( false, "Target buffer too short to terminate\n" );
313 0 : return;
314 : }
315 :
316 : // Null terminate
317 0 : pchDest[iDestPos++] = 0;
318 : }
319 :
320 :
321 : //-----------------------------------------------------------------------------
322 : // Purpose: Internal implementation of decode, works in the strict RFC manner, or
323 : // with spaces turned to + like HTML form encoding.
324 : //
325 : // Returns the amount of space used in the output buffer.
326 : //-----------------------------------------------------------------------------
327 0 : size_t V_URLDecodeInternal( char *pchDecodeDest, int nDecodeDestLen, const char *pchEncodedSource, int nEncodedSourceLen, bool bUsePlusForSpace )
328 : {
329 0 : if ( nDecodeDestLen < nEncodedSourceLen )
330 : {
331 : //AssertMsg( false, "V_URLDecode needs a dest buffer at least as large as the source" );
332 0 : return 0;
333 : }
334 :
335 0 : int iDestPos = 0;
336 0 : for( int i=0; i < nEncodedSourceLen; ++i )
337 : {
338 0 : if ( bUsePlusForSpace && pchEncodedSource[i] == '+' )
339 : {
340 0 : pchDecodeDest[ iDestPos++ ] = ' ';
341 : }
342 0 : else if ( pchEncodedSource[i] == '%' )
343 : {
344 : // Percent signifies an encoded value, look ahead for the hex code, convert to numeric, and use that
345 :
346 : // First make sure we have 2 more chars
347 0 : if ( i < nEncodedSourceLen - 2 )
348 : {
349 0 : char cHexDigit1 = pchEncodedSource[i+1];
350 0 : char cHexDigit2 = pchEncodedSource[i+2];
351 :
352 : // Turn the chars into a hex value, if they are not valid, then we'll
353 : // just place the % and the following two chars direct into the string,
354 : // even though this really shouldn't happen, who knows what bad clients
355 : // may do with encoding.
356 0 : bool bValid = false;
357 0 : int iValue = iHexCharToInt( cHexDigit1 );
358 0 : if ( iValue != -1 )
359 : {
360 0 : iValue *= 16;
361 0 : int iValue2 = iHexCharToInt( cHexDigit2 );
362 0 : if ( iValue2 != -1 )
363 : {
364 0 : iValue += iValue2;
365 0 : pchDecodeDest[ iDestPos++ ] = (char)iValue;
366 0 : bValid = true;
367 : }
368 : }
369 :
370 0 : if ( !bValid )
371 : {
372 0 : pchDecodeDest[ iDestPos++ ] = '%';
373 0 : pchDecodeDest[ iDestPos++ ] = cHexDigit1;
374 0 : pchDecodeDest[ iDestPos++ ] = cHexDigit2;
375 : }
376 : }
377 :
378 : // Skip ahead
379 0 : i += 2;
380 : }
381 : else
382 : {
383 0 : pchDecodeDest[ iDestPos++ ] = pchEncodedSource[i];
384 : }
385 : }
386 :
387 : // We may not have extra room to NULL terminate, since this can be used on raw data, but if we do
388 : // go ahead and do it as this can avoid bugs.
389 0 : if ( iDestPos < nDecodeDestLen )
390 : {
391 0 : pchDecodeDest[iDestPos] = 0;
392 : }
393 :
394 0 : return (size_t)iDestPos;
395 : }
396 :
397 : //-----------------------------------------------------------------------------
398 : // Purpose: Encodes a string (or binary data) from URL encoding format, see rfc1738 section 2.2.
399 : // This version of the call isn't a strict RFC implementation, but uses + for space as is
400 : // the standard in HTML form encoding, despite it not being part of the RFC.
401 : //
402 : // Dest buffer should be at least as large as source buffer to guarantee room for decode.
403 : //-----------------------------------------------------------------------------
404 0 : void V_URLEncode( char *pchDest, int nDestLen, const char *pchSource, int nSourceLen )
405 : {
406 0 : return V_URLEncodeInternal( pchDest, nDestLen, pchSource, nSourceLen, true );
407 : }
408 :
409 :
410 : //-----------------------------------------------------------------------------
411 : // Purpose: Decodes a string (or binary data) from URL encoding format, see rfc1738 section 2.2.
412 : // This version of the call isn't a strict RFC implementation, but uses + for space as is
413 : // the standard in HTML form encoding, despite it not being part of the RFC.
414 : //
415 : // Dest buffer should be at least as large as source buffer to guarantee room for decode.
416 : // Dest buffer being the same as the source buffer (decode in-place) is explicitly allowed.
417 : //-----------------------------------------------------------------------------
418 0 : size_t V_URLDecode( char *pchDecodeDest, int nDecodeDestLen, const char *pchEncodedSource, int nEncodedSourceLen )
419 : {
420 0 : return V_URLDecodeInternal( pchDecodeDest, nDecodeDestLen, pchEncodedSource, nEncodedSourceLen, true );
421 : }
422 :
423 : //-----------------------------------------------------------------------------
424 0 : void V_StripExtension( std::string &in )
425 : {
426 : // Find the last dot. If it's followed by a dot or a slash, then it's part of a
427 : // directory specifier like ../../somedir/./blah.
428 0 : std::string::size_type test = in.rfind( '.' );
429 0 : if ( test != std::string::npos )
430 : {
431 : // This handles things like ".\blah" or "c:\my@email.com\abc\def\geh"
432 : // Which would otherwise wind up with "" and "c:\my@email", respectively.
433 0 : if ( in.rfind( '\\' ) < test && in.rfind( '/' ) < test )
434 : {
435 0 : in.resize( test );
436 : }
437 : }
438 0 : }
439 :
|