Coverage Report

Created: 2026-03-10 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/json5format/src/lib.rs
Line
Count
Source
1
// Copyright (c) 2020 Google LLC All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
//! A stylized formatter for [JSON5](https://json5.org) ("JSON for Humans") documents.
6
//!
7
//! The intent of this formatter is to rewrite a given valid JSON5 document, restructuring the
8
//! output (if required) to conform to a consistent style.
9
//!
10
//! The resulting document should preserve all data precision, data format representations, and
11
//! semantic intent. Readability should be maintained, if not improved by the consistency within and
12
//! across documents.
13
//!
14
//! Most importantly, all JSON5 comments should be preserved, maintaining the
15
//! positional relationship with the JSON5 data elements they were intended to document.
16
//!
17
//! # Example
18
//!
19
//! ```rust
20
//!   use json5format::*;
21
//!   use maplit::hashmap;
22
//!   use maplit::hashset;
23
//!
24
//!   let json5=r##"{
25
//!       "name": {
26
//!           "last": "Smith",
27
//!           "first": "John",
28
//!           "middle": "Jacob"
29
//!       },
30
//!       "children": [
31
//!           "Buffy",
32
//!           "Biff",
33
//!           "Balto"
34
//!       ],
35
//!       // Consider adding a note field to the `other` contact option
36
//!       "contact_options": [
37
//!           {
38
//!               "home": {
39
//!                   "email": "jj@notreallygmail.com",   // This was the original user id.
40
//!                                                       // Now user id's are hash values.
41
//!                   "phone": "212-555-4321"
42
//!               },
43
//!               "other": {
44
//!                   "email": "volunteering@serviceprojectsrus.org"
45
//!               },
46
//!               "work": {
47
//!                   "phone": "212-555-1234",
48
//!                   "email": "john.j.smith@worksforme.gov"
49
//!               }
50
//!           }
51
//!       ],
52
//!       "address": {
53
//!           "city": "Anytown",
54
//!           "country": "USA",
55
//!           "state": "New York",
56
//!           "street": "101 Main Street"
57
//!           /* Update schema to support multiple addresses:
58
//!              "work": {
59
//!                  "city": "Anytown",
60
//!                  "country": "USA",
61
//!                  "state": "New York",
62
//!                  "street": "101 Main Street"
63
//!              }
64
//!           */
65
//!       }
66
//!   }
67
//!   "##;
68
//!
69
//!   let options = FormatOptions {
70
//!       indent_by: 2,
71
//!       collapse_containers_of_one: true,
72
//!       options_by_path: hashmap! {
73
//!           "/*" => hashset! {
74
//!               PathOption::PropertyNameOrder(vec![
75
//!                   "name",
76
//!                   "address",
77
//!                   "contact_options",
78
//!               ]),
79
//!           },
80
//!           "/*/name" => hashset! {
81
//!               PathOption::PropertyNameOrder(vec![
82
//!                   "first",
83
//!                   "middle",
84
//!                   "last",
85
//!                   "suffix",
86
//!               ]),
87
//!           },
88
//!           "/*/children" => hashset! {
89
//!               PathOption::SortArrayItems(true),
90
//!           },
91
//!           "/*/*/*" => hashset! {
92
//!               PathOption::PropertyNameOrder(vec![
93
//!                   "work",
94
//!                   "home",
95
//!                   "other",
96
//!               ]),
97
//!           },
98
//!           "/*/*/*/*" => hashset! {
99
//!               PathOption::PropertyNameOrder(vec![
100
//!                   "phone",
101
//!                   "email",
102
//!               ]),
103
//!           },
104
//!       },
105
//!       ..Default::default()
106
//!   };
107
//!
108
//!   let filename = "new_contact.json5".to_string();
109
//!
110
//!   let format = Json5Format::with_options(options)?;
111
//!   let parsed_document = ParsedDocument::from_str(&json5, Some(filename))?;
112
//!   let bytes: Vec<u8> = format.to_utf8(&parsed_document)?;
113
//!
114
//!   assert_eq!(std::str::from_utf8(&bytes)?, r##"{
115
//!   name: {
116
//!     first: "John",
117
//!     middle: "Jacob",
118
//!     last: "Smith",
119
//!   },
120
//!   address: {
121
//!     city: "Anytown",
122
//!     country: "USA",
123
//!     state: "New York",
124
//!     street: "101 Main Street",
125
//!
126
//!     /* Update schema to support multiple addresses:
127
//!        "work": {
128
//!            "city": "Anytown",
129
//!            "country": "USA",
130
//!            "state": "New York",
131
//!            "street": "101 Main Street"
132
//!        }
133
//!     */
134
//!   },
135
//!
136
//!   // Consider adding a note field to the `other` contact option
137
//!   contact_options: [
138
//!     {
139
//!       work: {
140
//!         phone: "212-555-1234",
141
//!         email: "john.j.smith@worksforme.gov",
142
//!       },
143
//!       home: {
144
//!         phone: "212-555-4321",
145
//!         email: "jj@notreallygmail.com", // This was the original user id.
146
//!                                         // Now user id's are hash values.
147
//!       },
148
//!       other: { email: "volunteering@serviceprojectsrus.org" },
149
//!     },
150
//!   ],
151
//!   children: [
152
//!     "Balto",
153
//!     "Biff",
154
//!     "Buffy",
155
//!   ],
156
//! }
157
//! "##);
158
//! # Ok::<(),anyhow::Error>(())
159
//! ```
160
//!
161
//! # Formatter Actions
162
//!
163
//! When the options above are applied to the input, the formatter will make the following changes:
164
//!
165
//!   * The formatted document will be indented by 2 spaces.
166
//!   * Quotes are removed from all property names (since they are all legal ECMAScript identifiers)
167
//!   * The top-level properties will be reordered to [`name`, `address`, `contact_options`]. Since
168
//!     property name `children` was not included in the sort order, it will be placed at the end.
169
//!   * The `name` properties will be reordered to [`first`, `middle`, `last`].
170
//!   * The properties of the unnamed object in array `contact_options` will be reordered to
171
//!     [`work`, `home`, `other`].
172
//!   * The properties of the `work`, `home`, and `other` objects will be reordered to
173
//!     [`phone`, `email`].
174
//!   * The `children` names array of string primitives will be sorted.
175
//!   * All elements (except the top-level object, represented by the outermost curly braces) will
176
//!     end with a comma.
177
//!   * Since the `contact_options` descendant element `other` has only one property, the `other`
178
//!     object structure will collapse to a single line, with internal trailing comma suppressed.
179
//!   * The line comment will retain its relative position, above `contact_options`.
180
//!   * The block comment will retain its relative position, inside and at the end of the `address`
181
//!     object.
182
//!   * The end-of-line comment after `home`/`email` will retain its relative location (appended at
183
//!     the end of the `email` value) and any subsequent line comments with the same vertical
184
//!     alignment are also retained, and vertically adjusted to be left-aligned with the new
185
//!     position of the first comment line.
186
//!
187
//! # Formatter Behavior Details
188
//!
189
//! For reference, the following sections detail how the JSON5 formatter verifies and processes
190
//! JSON5 content.
191
//!
192
//! ## Syntax Validation
193
//!
194
//! * Structural syntax is checked, such as validating matching braces, property name-colon-value
195
//!   syntax, enforced separation of values by commas, properly quoted strings, and both block and
196
//!   line comment extraction.
197
//! * Non-string literal value syntax is checked (null, true, false, and the various legal formats
198
//!   for JSON5 Numbers).
199
//! * Syntax errors produce error messages with the line and column where the problem
200
//!   was encountered.
201
//!
202
//! ## Property Names
203
//!
204
//! * Duplicate property names are retained, but may constitute errors in higher-level JSON5
205
//!   parsers or schema-specific deserializers.
206
//! * All JSON5 unquoted property name characters are supported, including '$' and '_'. Digits are
207
//!   the only valid property name character that cannot be the first character. Property names
208
//!   can also be represented as quoted strings. All valid JSON5 strings, if quoted, are valid
209
//!   property names (including multi-line strings and quoted numbers).
210
//!
211
//! Example:
212
//! ```json
213
//!     $_meta_prop: 'Has "double quotes" and \'single quotes\' and \
214
//! multiple lines with escaped \\ backslash',
215
//! ```
216
//!
217
//! ## Literal Values
218
//!
219
//! * JSON5 supports quoting strings (literal values or quoted property names) by either double (")
220
//!   or single (') quote. The formatter does not change the quotes. Double-quoting is
221
//!   conventional, but single quotes may be used when quoting strings containing double-quotes, and
222
//!   leaving the single quotes as-is is preferred.
223
//! * JSON5 literal values are retained as-is. Strings retain all spacing characters, including
224
//!   escaped newlines. All other literals (unquoted tokens without spaces, such as false, null,
225
//!   0.234, 1337, or l33t) are _not_ interpreted syntactically. Other schema-based tools and JSON5
226
//!   deserializers may flag these invalid values.
227
//!
228
//! ## Optional Sorting
229
//!
230
//! * By default, array items and object properties retain their original order. (Some JSON arrays
231
//!   are order-dependent, and sorting them indiscriminantly might change the meaning of the data.)
232
//! * The formatter can automatically sort array items and object properties if enabled via
233
//!   `FormatOptions`:
234
//!   - To sort all arrays in the document, set
235
//!     [FormatOptions.sort_array_items](struct.FormatOptions.html#structfield.sort_array_items) to
236
//!     `true`
237
//!   - To sort only specific arrays in the target schema, specify the schema location under
238
//!     [FormatOptions.options_by_path](struct.FormatOptions.html#structfield.options_by_path), and
239
//!     set its [SortArrayItems](enum.PathOption.html#variant.SortArrayItems) option.
240
//!   - Properties are sorted based on an explicit user-supplied list of property names in the
241
//!     preferred order, for objects at a specified path. Specify the object's location in the
242
//!     target schema using
243
//!     [FormatOptions.options_by_path](struct.FormatOptions.html#structfield.options_by_path), and
244
//!     provide a vector of property name strings with the
245
//!     [PropertyNameOrder](enum.PathOption.html#variant.PropertyNameOrder) option. Properties not
246
//!     included in this option retain their original order, behind the explicitly ordered
247
//!     properties, if any.
248
//! * When sorting array items, the formatter only sorts array item literal values (strings,
249
//!   numbers, bools, and null). Child arrays or objects are left in their original order, after
250
//!   sorted literals, if any, within the same array.
251
//! * Array items are sorted in case-insensitive unicode lexicographic order. **(Note that, since
252
//!   the formatter does not parse unquoted literals, number types cannot be sorted numerically.)**
253
//!   Items that are case-insensitively equal are re-compared and ordered case-sensitively with
254
//!   respect to each other.
255
//!
256
//! ## Associated Comments
257
//!
258
//! * All comments immediately preceding an element (value or start of an array or object), and
259
//!   trailing line comments (starting on the same line as the element, optionally continued on
260
//!   successive lines if all line comments are left-aligned), are retained and move with the
261
//!   associated item if the item is repositioned during sorting.
262
//! * All line and block comments are retained. Typically, the comments are re-aligned vertically
263
//!   (indented) with the values with which they were associated.
264
//! * A single line comment appearing immediately after a JSON value (primitive or closing brace),
265
//!   on the same line, will remain appended to that value on its line after re-formatting.
266
//! * Spaces separate block comments from blocks of contiguous line comments associated with the
267
//!   same entry.
268
//! * Comments at the end of a list (after the last property or item) are retained at the end of
269
//!   the same list.
270
//! * Block comments with lines that extend to the left of the opening "/\*" are not re-aligned.
271
//!
272
//! ## Whitespace Handling
273
//!
274
//! * Unicode characters are allowed, and unicode space characters should retain their meaning
275
//!   according to unicode standards.
276
//! * All spaces inside single- or multi-line strings are retained. All spaces in comments are
277
//!   retained *except* trailing spaces at the end of a line.
278
//! * All other original spaces are removed.
279
280
#![deny(missing_docs)]
281
#![allow(clippy::len_zero)]
282
283
#[macro_use]
284
mod error;
285
286
mod content;
287
mod formatter;
288
mod options;
289
mod parser;
290
291
use {
292
    crate::formatter::*, std::cell::RefCell, std::collections::HashMap, std::collections::HashSet,
293
    std::rc::Rc,
294
};
295
296
pub use content::Array;
297
pub use content::Comment;
298
pub use content::Comments;
299
pub use content::Object;
300
pub use content::ParsedDocument;
301
pub use content::Primitive;
302
pub use content::Property;
303
pub use content::Value;
304
pub use error::Error;
305
pub use error::Location;
306
pub use options::FormatOptions;
307
pub use options::PathOption;
308
309
/// Format a JSON5 document, applying a consistent style, with given options.
310
///
311
/// See [FormatOptions](struct.FormatOptions.html) for style options, and confirm the defaults by
312
/// reviewing the source of truth via the `src` link for
313
/// [impl Default for FormatOptions](struct.FormatOptions.html#impl-Default).
314
///
315
/// # Format and Style (Default)
316
///
317
/// Unless FormatOptions are modified, the JSON5 formatter takes a JSON5 document (as a unicode
318
/// String) and generates a new document with the following formatting:
319
///
320
/// * Indents 4 spaces.
321
/// * Quotes are removed from property names if they are legal ECMAScript 5.1 identifiers. Property
322
///   names that do not comply with ECMAScript identifier format requirements will retain their
323
///   existing (single or double) quotes.
324
/// * All property and item lists end with a trailing comma.
325
/// * All property and item lists are broken down; that is, the braces are on separate lines and
326
///   all values are indented.
327
///
328
/// ```json
329
/// {
330
///     key: "value",
331
///     array: [
332
///         3.145,
333
///     ]
334
/// }
335
/// ```
336
///
337
/// # Arguments
338
///   * buffer - A unicode string containing the original JSON5 document.
339
///   * filename - An optional filename. Parsing errors typically include the filename (if given),
340
///     and the line number and character column where the error was detected.
341
///   * options - Format style options to override the default style, if provided.
342
/// # Returns
343
///   * The formatted result in UTF-8 encoded bytes.
344
pub fn format(
345
    buffer: &str,
346
    filename: Option<String>,
347
    options: Option<FormatOptions>,
348
) -> Result<Vec<u8>, Error> {
349
    let parsed_document = ParsedDocument::from_str(buffer, filename)?;
350
    let options = match options {
351
        Some(options) => options,
352
        None => FormatOptions { ..Default::default() },
353
    };
354
    Json5Format::with_options(options)?.to_utf8(&parsed_document)
355
}
356
357
/// A JSON5 formatter that parses a valid JSON5 input buffer and produces a new, formatted document.
358
pub struct Json5Format {
359
    /// Options that alter how the formatter generates the formatted output. This instance of
360
    /// FormatOptions is a subset of the FormatOptions passed to the `with_options` constructor.
361
    /// The `options_by_path` are first removed, and then used to initialize the SubpathOptions
362
    /// hierarchy rooted at the `document_root_options_ref`.
363
    default_options: FormatOptions,
364
365
    /// Depth-specific options applied at the document root and below.
366
    document_root_options_ref: Rc<RefCell<SubpathOptions>>,
367
}
368
369
impl Json5Format {
370
    /// Create and return a Json5Format, with the given options to be applied to the
371
    /// [Json5Format::to_utf8()](struct.Json5Format.html#method.to_utf8) operation.
372
    pub fn with_options(mut options: FormatOptions) -> Result<Self, Error> {
373
        let mut document_root_options = SubpathOptions::new(&options);
374
375
        // Typical JSON5 documents start and end with curly braces for a top-level unnamed
376
        // object. This is by convention, and the Json5Format represents this
377
        // top-level object as a single child in a conceptual array. The array square braces
378
        // are not rendered, and by convention, the child object should not have a trailing
379
        // comma, even if trailing commas are the default everywhere else in the document.
380
        //
381
        // Set the SubpathOptions for the document array items to prevent trailing commas.
382
        document_root_options.options.trailing_commas = false;
383
384
        let mut options_by_path =
385
            options.options_by_path.drain().collect::<HashMap<&'static str, HashSet<PathOption>>>();
386
387
        // Default options remain after draining the `options_by_path`
388
        let default_options = options;
389
390
        // Transfer the options_by_path from the given options into the SubpathOptions tree
391
        // rooted at `document_options_root`.
392
        for (path, path_options) in options_by_path.drain() {
393
            let rc; // extend life of temporary
394
            let mut borrowed; // extend life of temporary
395
            let subpath_options = if path == "/" {
396
                &mut document_root_options
397
            } else if let Some(remaining) = path.strip_prefix('/') {
398
                rc = document_root_options.get_or_create_subpath_options(
399
                    &remaining.split('/').collect::<Vec<_>>(),
400
                    &default_options,
401
                );
402
                borrowed = rc.borrow_mut();
403
                &mut *borrowed
404
            } else {
405
                return Err(Error::configuration(format!(
406
                    "PathOption path '{}' is invalid.",
407
                    path
408
                )));
409
            };
410
            subpath_options.override_default_options(&path_options);
411
        }
412
413
        Ok(Json5Format {
414
            default_options,
415
            document_root_options_ref: Rc::new(RefCell::new(document_root_options)),
416
        })
417
    }
418
419
    /// Create and return a Json5Format, with the default settings.
420
    pub fn new() -> Result<Self, Error> {
421
        Self::with_options(FormatOptions { ..Default::default() })
422
    }
423
424
    /// Formats the parsed document into a new Vector of UTF8 bytes.
425
    ///
426
    /// # Arguments
427
    ///   * `parsed_document` - The parsed state of the incoming document.
428
    ///
429
    /// # Example
430
    ///
431
    /// ```
432
    /// # use json5format::*;
433
    /// # let buffer = String::from("{}");
434
    /// # let filename = String::from("example.json5");
435
    /// let format = Json5Format::new()?;
436
    /// let parsed_document = ParsedDocument::from_str(&buffer, Some(filename))?;
437
    /// let bytes = format.to_utf8(&parsed_document)?;
438
    /// # assert_eq!("{}\n", std::str::from_utf8(&bytes).unwrap());
439
    /// # Ok::<(),anyhow::Error>(())
440
    /// ```
441
    pub fn to_utf8(&self, parsed_document: &ParsedDocument) -> Result<Vec<u8>, Error> {
442
        let formatter =
443
            Formatter::new(self.default_options.clone(), self.document_root_options_ref.clone());
444
        formatter.format(parsed_document)
445
    }
446
447
    /// Formats the parsed document into a new String.
448
    ///
449
    /// # Arguments
450
    ///   * `parsed_document` - The parsed state of the incoming document.
451
    ///
452
    /// # Example
453
    ///
454
    /// ```
455
    /// # use json5format::*;
456
    /// # fn main() -> std::result::Result<(), Error> {
457
    /// # let buffer = String::from("{}");
458
    /// # let filename = String::from("example.json5");
459
    /// let format = Json5Format::new()?;
460
    /// let parsed_document = ParsedDocument::from_str(&buffer, Some(filename))?;
461
    /// let formatted = format.to_string(&parsed_document)?;
462
    /// # assert_eq!("{}\n", formatted);
463
    /// # Ok(())
464
    /// # }
465
    /// ```
466
    pub fn to_string(&self, parsed_document: &ParsedDocument) -> Result<String, Error> {
467
        String::from_utf8(self.to_utf8(parsed_document)?)
468
0
            .map_err(|e| Error::internal(None, e.to_string()))
469
    }
470
}