Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : /* A JSON pretty-printer class. */
8 :
9 : // A typical JSON-writing library requires you to first build up a data
10 : // structure that represents a JSON object and then serialize it (to file, or
11 : // somewhere else). This approach makes for a clean API, but building the data
12 : // structure takes up memory. Sometimes that isn't desirable, such as when the
13 : // JSON data is produced for memory reporting.
14 : //
15 : // The JSONWriter class instead allows JSON data to be written out
16 : // incrementally without building up large data structures.
17 : //
18 : // The API is slightly uglier than you would see in a typical JSON-writing
19 : // library, but still fairly easy to use. It's possible to generate invalid
20 : // JSON with JSONWriter, but typically the most basic testing will identify any
21 : // such problems.
22 : //
23 : // Similarly, there are no RAII facilities for automatically closing objects
24 : // and arrays. These would be nice if you are generating all your code within
25 : // nested functions, but in other cases you'd have to maintain an explicit
26 : // stack of RAII objects and manually unwind it, which is no better than just
27 : // calling "end" functions. Furthermore, the consequences of forgetting to
28 : // close an object or array are obvious and, again, will be identified via
29 : // basic testing, unlike other cases where RAII is typically used (e.g. smart
30 : // pointers) and the consequences of defects are more subtle.
31 : //
32 : // Importantly, the class does solve the two hard problems of JSON
33 : // pretty-printing, which are (a) correctly escaping strings, and (b) adding
34 : // appropriate indentation and commas between items.
35 : //
36 : // By default, every property is placed on its own line. However, it is
37 : // possible to request that objects and arrays be placed entirely on a single
38 : // line, which can reduce output size significantly in some cases.
39 : //
40 : // Strings used (for property names and string property values) are |const
41 : // char*| throughout, and can be ASCII or UTF-8.
42 : //
43 : // EXAMPLE
44 : // -------
45 : // Assume that |MyWriteFunc| is a class that implements |JSONWriteFunc|. The
46 : // following code:
47 : //
48 : // JSONWriter w(MakeUnique<MyWriteFunc>());
49 : // w.Start();
50 : // {
51 : // w.NullProperty("null");
52 : // w.BoolProperty("bool", true);
53 : // w.IntProperty("int", 1);
54 : // w.StartArrayProperty("array");
55 : // {
56 : // w.StringElement("string");
57 : // w.StartObjectElement();
58 : // {
59 : // w.DoubleProperty("double", 3.4);
60 : // w.StartArrayProperty("single-line array", w.SingleLineStyle);
61 : // {
62 : // w.IntElement(1);
63 : // w.StartObjectElement(); // SingleLineStyle is inherited from
64 : // w.EndObjectElement(); // above for this collection
65 : // }
66 : // w.EndArray();
67 : // }
68 : // w.EndObjectElement();
69 : // }
70 : // w.EndArrayProperty();
71 : // }
72 : // w.End();
73 : //
74 : // will produce pretty-printed output for the following JSON object:
75 : //
76 : // {
77 : // "null": null,
78 : // "bool": true,
79 : // "int": 1,
80 : // "array": [
81 : // "string",
82 : // {
83 : // "double": 3.4,
84 : // "single-line array": [1, {}]
85 : // }
86 : // ]
87 : // }
88 : //
89 : // The nesting in the example code is obviously optional, but can aid
90 : // readability.
91 :
92 : #ifndef mozilla_JSONWriter_h
93 : #define mozilla_JSONWriter_h
94 :
95 : #include "mozilla/double-conversion.h"
96 : #include "mozilla/IntegerPrintfMacros.h"
97 : #include "mozilla/PodOperations.h"
98 : #include "mozilla/Sprintf.h"
99 : #include "mozilla/UniquePtr.h"
100 : #include "mozilla/Vector.h"
101 :
102 : #include <stdio.h>
103 :
104 : namespace mozilla {
105 :
106 : // A quasi-functor for JSONWriter. We don't use a true functor because that
107 : // requires templatizing JSONWriter, and the templatization seeps to lots of
108 : // places we don't want it to.
109 0 : class JSONWriteFunc
110 : {
111 : public:
112 : virtual void Write(const char* aStr) = 0;
113 0 : virtual ~JSONWriteFunc() {}
114 : };
115 :
116 : // Ideally this would be within |EscapedString| but when compiling with GCC
117 : // on Linux that caused link errors, whereas this formulation didn't.
118 : namespace detail {
119 : extern MFBT_DATA const char gTwoCharEscapes[256];
120 : } // namespace detail
121 :
122 0 : class JSONWriter
123 : {
124 : // From http://www.ietf.org/rfc/rfc4627.txt:
125 : //
126 : // "All Unicode characters may be placed within the quotation marks except
127 : // for the characters that must be escaped: quotation mark, reverse
128 : // solidus, and the control characters (U+0000 through U+001F)."
129 : //
130 : // This implementation uses two-char escape sequences where possible, namely:
131 : //
132 : // \", \\, \b, \f, \n, \r, \t
133 : //
134 : // All control characters not in the above list are represented with a
135 : // six-char escape sequence, e.g. '\u000b' (a.k.a. '\v').
136 : //
137 : class EscapedString
138 : {
139 : // Only one of |mUnownedStr| and |mOwnedStr| are ever non-null. |mIsOwned|
140 : // indicates which one is in use. They're not within a union because that
141 : // wouldn't work with UniquePtr.
142 : bool mIsOwned;
143 : const char* mUnownedStr;
144 : UniquePtr<char[]> mOwnedStr;
145 :
146 0 : void SanityCheck() const
147 : {
148 0 : MOZ_ASSERT_IF( mIsOwned, mOwnedStr.get() && !mUnownedStr);
149 0 : MOZ_ASSERT_IF(!mIsOwned, !mOwnedStr.get() && mUnownedStr);
150 0 : }
151 :
152 0 : static char hexDigitToAsciiChar(uint8_t u)
153 : {
154 0 : u = u & 0xf;
155 0 : return u < 10 ? '0' + u : 'a' + (u - 10);
156 : }
157 :
158 : public:
159 0 : explicit EscapedString(const char* aStr)
160 0 : : mUnownedStr(nullptr)
161 0 : , mOwnedStr(nullptr)
162 : {
163 : const char* p;
164 :
165 : // First, see if we need to modify the string.
166 0 : size_t nExtra = 0;
167 0 : p = aStr;
168 : while (true) {
169 0 : uint8_t u = *p; // ensure it can't be interpreted as negative
170 0 : if (u == 0) {
171 0 : break;
172 : }
173 0 : if (detail::gTwoCharEscapes[u]) {
174 0 : nExtra += 1;
175 0 : } else if (u <= 0x1f) {
176 0 : nExtra += 5;
177 : }
178 0 : p++;
179 0 : }
180 :
181 0 : if (nExtra == 0) {
182 : // No escapes needed. Easy.
183 0 : mIsOwned = false;
184 0 : mUnownedStr = aStr;
185 0 : return;
186 : }
187 :
188 : // Escapes are needed. We'll create a new string.
189 0 : mIsOwned = true;
190 0 : size_t len = (p - aStr) + nExtra;
191 0 : mOwnedStr = MakeUnique<char[]>(len + 1);
192 :
193 0 : p = aStr;
194 0 : size_t i = 0;
195 :
196 : while (true) {
197 0 : uint8_t u = *p; // ensure it can't be interpreted as negative
198 0 : if (u == 0) {
199 0 : mOwnedStr[i] = 0;
200 0 : break;
201 : }
202 0 : if (detail::gTwoCharEscapes[u]) {
203 0 : mOwnedStr[i++] = '\\';
204 0 : mOwnedStr[i++] = detail::gTwoCharEscapes[u];
205 0 : } else if (u <= 0x1f) {
206 0 : mOwnedStr[i++] = '\\';
207 0 : mOwnedStr[i++] = 'u';
208 0 : mOwnedStr[i++] = '0';
209 0 : mOwnedStr[i++] = '0';
210 0 : mOwnedStr[i++] = hexDigitToAsciiChar((u & 0x00f0) >> 4);
211 0 : mOwnedStr[i++] = hexDigitToAsciiChar(u & 0x000f);
212 : } else {
213 0 : mOwnedStr[i++] = u;
214 : }
215 0 : p++;
216 0 : }
217 : }
218 :
219 0 : ~EscapedString()
220 0 : {
221 0 : SanityCheck();
222 0 : }
223 :
224 0 : const char* get() const
225 : {
226 0 : SanityCheck();
227 0 : return mIsOwned ? mOwnedStr.get() : mUnownedStr;
228 : }
229 : };
230 :
231 : public:
232 : // Collections (objects and arrays) are printed in a multi-line style by
233 : // default. This can be changed to a single-line style if SingleLineStyle is
234 : // specified. If a collection is printed in single-line style, every nested
235 : // collection within it is also printed in single-line style, even if
236 : // multi-line style is requested.
237 : enum CollectionStyle {
238 : MultiLineStyle, // the default
239 : SingleLineStyle
240 : };
241 :
242 : protected:
243 : const UniquePtr<JSONWriteFunc> mWriter;
244 : Vector<bool, 8> mNeedComma; // do we need a comma at depth N?
245 : Vector<bool, 8> mNeedNewlines; // do we need newlines at depth N?
246 : size_t mDepth; // the current nesting depth
247 :
248 0 : void Indent()
249 : {
250 0 : for (size_t i = 0; i < mDepth; i++) {
251 0 : mWriter->Write(" ");
252 : }
253 0 : }
254 :
255 : // Adds whatever is necessary (maybe a comma, and then a newline and
256 : // whitespace) to separate an item (property or element) from what's come
257 : // before.
258 0 : void Separator()
259 : {
260 0 : if (mNeedComma[mDepth]) {
261 0 : mWriter->Write(",");
262 : }
263 0 : if (mDepth > 0 && mNeedNewlines[mDepth]) {
264 0 : mWriter->Write("\n");
265 0 : Indent();
266 0 : } else if (mNeedComma[mDepth]) {
267 0 : mWriter->Write(" ");
268 : }
269 0 : }
270 :
271 0 : void PropertyNameAndColon(const char* aName)
272 : {
273 0 : EscapedString escapedName(aName);
274 0 : mWriter->Write("\"");
275 0 : mWriter->Write(escapedName.get());
276 0 : mWriter->Write("\": ");
277 0 : }
278 :
279 0 : void Scalar(const char* aMaybePropertyName, const char* aStringValue)
280 : {
281 0 : Separator();
282 0 : if (aMaybePropertyName) {
283 0 : PropertyNameAndColon(aMaybePropertyName);
284 : }
285 0 : mWriter->Write(aStringValue);
286 0 : mNeedComma[mDepth] = true;
287 0 : }
288 :
289 0 : void QuotedScalar(const char* aMaybePropertyName, const char* aStringValue)
290 : {
291 0 : Separator();
292 0 : if (aMaybePropertyName) {
293 0 : PropertyNameAndColon(aMaybePropertyName);
294 : }
295 0 : mWriter->Write("\"");
296 0 : mWriter->Write(aStringValue);
297 0 : mWriter->Write("\"");
298 0 : mNeedComma[mDepth] = true;
299 0 : }
300 :
301 0 : void NewVectorEntries()
302 : {
303 : // If these tiny allocations OOM we might as well just crash because we
304 : // must be in serious memory trouble.
305 0 : MOZ_RELEASE_ASSERT(mNeedComma.resizeUninitialized(mDepth + 1));
306 0 : MOZ_RELEASE_ASSERT(mNeedNewlines.resizeUninitialized(mDepth + 1));
307 0 : mNeedComma[mDepth] = false;
308 0 : mNeedNewlines[mDepth] = true;
309 0 : }
310 :
311 0 : void StartCollection(const char* aMaybePropertyName, const char* aStartChar,
312 : CollectionStyle aStyle = MultiLineStyle)
313 : {
314 0 : Separator();
315 0 : if (aMaybePropertyName) {
316 0 : mWriter->Write("\"");
317 0 : mWriter->Write(aMaybePropertyName);
318 0 : mWriter->Write("\": ");
319 : }
320 0 : mWriter->Write(aStartChar);
321 0 : mNeedComma[mDepth] = true;
322 0 : mDepth++;
323 0 : NewVectorEntries();
324 0 : mNeedNewlines[mDepth] =
325 0 : mNeedNewlines[mDepth - 1] && aStyle == MultiLineStyle;
326 0 : }
327 :
328 : // Adds the whitespace and closing char necessary to end a collection.
329 0 : void EndCollection(const char* aEndChar)
330 : {
331 0 : if (mNeedNewlines[mDepth]) {
332 0 : mWriter->Write("\n");
333 0 : mDepth--;
334 0 : Indent();
335 : } else {
336 0 : mDepth--;
337 : }
338 0 : mWriter->Write(aEndChar);
339 0 : }
340 :
341 : public:
342 0 : explicit JSONWriter(UniquePtr<JSONWriteFunc> aWriter)
343 0 : : mWriter(Move(aWriter))
344 : , mNeedComma()
345 : , mNeedNewlines()
346 0 : , mDepth(0)
347 : {
348 0 : NewVectorEntries();
349 0 : }
350 :
351 : // Returns the JSONWriteFunc passed in at creation, for temporary use. The
352 : // JSONWriter object still owns the JSONWriteFunc.
353 0 : JSONWriteFunc* WriteFunc() const { return mWriter.get(); }
354 :
355 : // For all the following functions, the "Prints:" comment indicates what the
356 : // basic output looks like. However, it doesn't indicate the whitespace and
357 : // trailing commas, which are automatically added as required.
358 : //
359 : // All property names and string properties are escaped as necessary.
360 :
361 : // Prints: {
362 0 : void Start(CollectionStyle aStyle = MultiLineStyle)
363 : {
364 0 : StartCollection(nullptr, "{", aStyle);
365 0 : }
366 :
367 : // Prints: }
368 0 : void End() { EndCollection("}\n"); }
369 :
370 : // Prints: "<aName>": null
371 0 : void NullProperty(const char* aName)
372 : {
373 0 : Scalar(aName, "null");
374 0 : }
375 :
376 : // Prints: null
377 0 : void NullElement() { NullProperty(nullptr); }
378 :
379 : // Prints: "<aName>": <aBool>
380 0 : void BoolProperty(const char* aName, bool aBool)
381 : {
382 0 : Scalar(aName, aBool ? "true" : "false");
383 0 : }
384 :
385 : // Prints: <aBool>
386 : void BoolElement(bool aBool) { BoolProperty(nullptr, aBool); }
387 :
388 : // Prints: "<aName>": <aInt>
389 0 : void IntProperty(const char* aName, int64_t aInt)
390 : {
391 : char buf[64];
392 0 : SprintfLiteral(buf, "%" PRId64, aInt);
393 0 : Scalar(aName, buf);
394 0 : }
395 :
396 : // Prints: <aInt>
397 0 : void IntElement(int64_t aInt) { IntProperty(nullptr, aInt); }
398 :
399 : // Prints: "<aName>": <aDouble>
400 0 : void DoubleProperty(const char* aName, double aDouble)
401 : {
402 : static const size_t buflen = 64;
403 : char buf[buflen];
404 : const double_conversion::DoubleToStringConverter &converter =
405 0 : double_conversion::DoubleToStringConverter::EcmaScriptConverter();
406 0 : double_conversion::StringBuilder builder(buf, buflen);
407 0 : converter.ToShortest(aDouble, &builder);
408 0 : Scalar(aName, builder.Finalize());
409 0 : }
410 :
411 : // Prints: <aDouble>
412 0 : void DoubleElement(double aDouble) { DoubleProperty(nullptr, aDouble); }
413 :
414 : // Prints: "<aName>": "<aStr>"
415 0 : void StringProperty(const char* aName, const char* aStr)
416 : {
417 0 : EscapedString escapedStr(aStr);
418 0 : QuotedScalar(aName, escapedStr.get());
419 0 : }
420 :
421 : // Prints: "<aStr>"
422 0 : void StringElement(const char* aStr) { StringProperty(nullptr, aStr); }
423 :
424 : // Prints: "<aName>": [
425 0 : void StartArrayProperty(const char* aName,
426 : CollectionStyle aStyle = MultiLineStyle)
427 : {
428 0 : StartCollection(aName, "[", aStyle);
429 0 : }
430 :
431 : // Prints: [
432 0 : void StartArrayElement(CollectionStyle aStyle = MultiLineStyle)
433 : {
434 0 : StartArrayProperty(nullptr, aStyle);
435 0 : }
436 :
437 : // Prints: ]
438 0 : void EndArray() { EndCollection("]"); }
439 :
440 : // Prints: "<aName>": {
441 0 : void StartObjectProperty(const char* aName,
442 : CollectionStyle aStyle = MultiLineStyle)
443 : {
444 0 : StartCollection(aName, "{", aStyle);
445 0 : }
446 :
447 : // Prints: {
448 0 : void StartObjectElement(CollectionStyle aStyle = MultiLineStyle)
449 : {
450 0 : StartObjectProperty(nullptr, aStyle);
451 0 : }
452 :
453 : // Prints: }
454 0 : void EndObject() { EndCollection("}"); }
455 : };
456 :
457 : } // namespace mozilla
458 :
459 : #endif /* mozilla_JSONWriter_h */
460 :
|