/src/json5format/src/lib.rs
Line | Count | Source |
1 | | // Copyright (c) 2020 Google LLC All rights reserved. |
2 | | // Use of this source code is governed by a BSD-style |
3 | | // license that can be found in the LICENSE file. |
4 | | |
5 | | //! A stylized formatter for [JSON5](https://json5.org) ("JSON for Humans") documents. |
6 | | //! |
7 | | //! The intent of this formatter is to rewrite a given valid JSON5 document, restructuring the |
8 | | //! output (if required) to conform to a consistent style. |
9 | | //! |
10 | | //! The resulting document should preserve all data precision, data format representations, and |
11 | | //! semantic intent. Readability should be maintained, if not improved by the consistency within and |
12 | | //! across documents. |
13 | | //! |
14 | | //! Most importantly, all JSON5 comments should be preserved, maintaining the |
15 | | //! positional relationship with the JSON5 data elements they were intended to document. |
16 | | //! |
17 | | //! # Example |
18 | | //! |
19 | | //! ```rust |
20 | | //! use json5format::*; |
21 | | //! use maplit::hashmap; |
22 | | //! use maplit::hashset; |
23 | | //! |
24 | | //! let json5=r##"{ |
25 | | //! "name": { |
26 | | //! "last": "Smith", |
27 | | //! "first": "John", |
28 | | //! "middle": "Jacob" |
29 | | //! }, |
30 | | //! "children": [ |
31 | | //! "Buffy", |
32 | | //! "Biff", |
33 | | //! "Balto" |
34 | | //! ], |
35 | | //! // Consider adding a note field to the `other` contact option |
36 | | //! "contact_options": [ |
37 | | //! { |
38 | | //! "home": { |
39 | | //! "email": "jj@notreallygmail.com", // This was the original user id. |
40 | | //! // Now user id's are hash values. |
41 | | //! "phone": "212-555-4321" |
42 | | //! }, |
43 | | //! "other": { |
44 | | //! "email": "volunteering@serviceprojectsrus.org" |
45 | | //! }, |
46 | | //! "work": { |
47 | | //! "phone": "212-555-1234", |
48 | | //! "email": "john.j.smith@worksforme.gov" |
49 | | //! } |
50 | | //! } |
51 | | //! ], |
52 | | //! "address": { |
53 | | //! "city": "Anytown", |
54 | | //! "country": "USA", |
55 | | //! "state": "New York", |
56 | | //! "street": "101 Main Street" |
57 | | //! /* Update schema to support multiple addresses: |
58 | | //! "work": { |
59 | | //! "city": "Anytown", |
60 | | //! "country": "USA", |
61 | | //! "state": "New York", |
62 | | //! "street": "101 Main Street" |
63 | | //! } |
64 | | //! */ |
65 | | //! } |
66 | | //! } |
67 | | //! "##; |
68 | | //! |
69 | | //! let options = FormatOptions { |
70 | | //! indent_by: 2, |
71 | | //! collapse_containers_of_one: true, |
72 | | //! options_by_path: hashmap! { |
73 | | //! "/*" => hashset! { |
74 | | //! PathOption::PropertyNameOrder(vec![ |
75 | | //! "name", |
76 | | //! "address", |
77 | | //! "contact_options", |
78 | | //! ]), |
79 | | //! }, |
80 | | //! "/*/name" => hashset! { |
81 | | //! PathOption::PropertyNameOrder(vec![ |
82 | | //! "first", |
83 | | //! "middle", |
84 | | //! "last", |
85 | | //! "suffix", |
86 | | //! ]), |
87 | | //! }, |
88 | | //! "/*/children" => hashset! { |
89 | | //! PathOption::SortArrayItems(true), |
90 | | //! }, |
91 | | //! "/*/*/*" => hashset! { |
92 | | //! PathOption::PropertyNameOrder(vec![ |
93 | | //! "work", |
94 | | //! "home", |
95 | | //! "other", |
96 | | //! ]), |
97 | | //! }, |
98 | | //! "/*/*/*/*" => hashset! { |
99 | | //! PathOption::PropertyNameOrder(vec![ |
100 | | //! "phone", |
101 | | //! "email", |
102 | | //! ]), |
103 | | //! }, |
104 | | //! }, |
105 | | //! ..Default::default() |
106 | | //! }; |
107 | | //! |
108 | | //! let filename = "new_contact.json5".to_string(); |
109 | | //! |
110 | | //! let format = Json5Format::with_options(options)?; |
111 | | //! let parsed_document = ParsedDocument::from_str(&json5, Some(filename))?; |
112 | | //! let bytes: Vec<u8> = format.to_utf8(&parsed_document)?; |
113 | | //! |
114 | | //! assert_eq!(std::str::from_utf8(&bytes)?, r##"{ |
115 | | //! name: { |
116 | | //! first: "John", |
117 | | //! middle: "Jacob", |
118 | | //! last: "Smith", |
119 | | //! }, |
120 | | //! address: { |
121 | | //! city: "Anytown", |
122 | | //! country: "USA", |
123 | | //! state: "New York", |
124 | | //! street: "101 Main Street", |
125 | | //! |
126 | | //! /* Update schema to support multiple addresses: |
127 | | //! "work": { |
128 | | //! "city": "Anytown", |
129 | | //! "country": "USA", |
130 | | //! "state": "New York", |
131 | | //! "street": "101 Main Street" |
132 | | //! } |
133 | | //! */ |
134 | | //! }, |
135 | | //! |
136 | | //! // Consider adding a note field to the `other` contact option |
137 | | //! contact_options: [ |
138 | | //! { |
139 | | //! work: { |
140 | | //! phone: "212-555-1234", |
141 | | //! email: "john.j.smith@worksforme.gov", |
142 | | //! }, |
143 | | //! home: { |
144 | | //! phone: "212-555-4321", |
145 | | //! email: "jj@notreallygmail.com", // This was the original user id. |
146 | | //! // Now user id's are hash values. |
147 | | //! }, |
148 | | //! other: { email: "volunteering@serviceprojectsrus.org" }, |
149 | | //! }, |
150 | | //! ], |
151 | | //! children: [ |
152 | | //! "Balto", |
153 | | //! "Biff", |
154 | | //! "Buffy", |
155 | | //! ], |
156 | | //! } |
157 | | //! "##); |
158 | | //! # Ok::<(),anyhow::Error>(()) |
159 | | //! ``` |
160 | | //! |
161 | | //! # Formatter Actions |
162 | | //! |
163 | | //! When the options above are applied to the input, the formatter will make the following changes: |
164 | | //! |
165 | | //! * The formatted document will be indented by 2 spaces. |
166 | | //! * Quotes are removed from all property names (since they are all legal ECMAScript identifiers) |
167 | | //! * The top-level properties will be reordered to [`name`, `address`, `contact_options`]. Since |
168 | | //! property name `children` was not included in the sort order, it will be placed at the end. |
169 | | //! * The `name` properties will be reordered to [`first`, `middle`, `last`]. |
170 | | //! * The properties of the unnamed object in array `contact_options` will be reordered to |
171 | | //! [`work`, `home`, `other`]. |
172 | | //! * The properties of the `work`, `home`, and `other` objects will be reordered to |
173 | | //! [`phone`, `email`]. |
174 | | //! * The `children` names array of string primitives will be sorted. |
175 | | //! * All elements (except the top-level object, represented by the outermost curly braces) will |
176 | | //! end with a comma. |
177 | | //! * Since the `contact_options` descendant element `other` has only one property, the `other` |
178 | | //! object structure will collapse to a single line, with internal trailing comma suppressed. |
179 | | //! * The line comment will retain its relative position, above `contact_options`. |
180 | | //! * The block comment will retain its relative position, inside and at the end of the `address` |
181 | | //! object. |
182 | | //! * The end-of-line comment after `home`/`email` will retain its relative location (appended at |
183 | | //! the end of the `email` value) and any subsequent line comments with the same vertical |
184 | | //! alignment are also retained, and vertically adjusted to be left-aligned with the new |
185 | | //! position of the first comment line. |
186 | | //! |
187 | | //! # Formatter Behavior Details |
188 | | //! |
189 | | //! For reference, the following sections detail how the JSON5 formatter verifies and processes |
190 | | //! JSON5 content. |
191 | | //! |
192 | | //! ## Syntax Validation |
193 | | //! |
194 | | //! * Structural syntax is checked, such as validating matching braces, property name-colon-value |
195 | | //! syntax, enforced separation of values by commas, properly quoted strings, and both block and |
196 | | //! line comment extraction. |
197 | | //! * Non-string literal value syntax is checked (null, true, false, and the various legal formats |
198 | | //! for JSON5 Numbers). |
199 | | //! * Syntax errors produce error messages with the line and column where the problem |
200 | | //! was encountered. |
201 | | //! |
202 | | //! ## Property Names |
203 | | //! |
204 | | //! * Duplicate property names are retained, but may constitute errors in higher-level JSON5 |
205 | | //! parsers or schema-specific deserializers. |
206 | | //! * All JSON5 unquoted property name characters are supported, including '$' and '_'. Digits are |
207 | | //! the only valid property name character that cannot be the first character. Property names |
208 | | //! can also be represented as quoted strings. All valid JSON5 strings, if quoted, are valid |
209 | | //! property names (including multi-line strings and quoted numbers). |
210 | | //! |
211 | | //! Example: |
212 | | //! ```json |
213 | | //! $_meta_prop: 'Has "double quotes" and \'single quotes\' and \ |
214 | | //! multiple lines with escaped \\ backslash', |
215 | | //! ``` |
216 | | //! |
217 | | //! ## Literal Values |
218 | | //! |
219 | | //! * JSON5 supports quoting strings (literal values or quoted property names) by either double (") |
220 | | //! or single (') quote. The formatter does not change the quotes. Double-quoting is |
221 | | //! conventional, but single quotes may be used when quoting strings containing double-quotes, and |
222 | | //! leaving the single quotes as-is is preferred. |
223 | | //! * JSON5 literal values are retained as-is. Strings retain all spacing characters, including |
224 | | //! escaped newlines. All other literals (unquoted tokens without spaces, such as false, null, |
225 | | //! 0.234, 1337, or l33t) are _not_ interpreted syntactically. Other schema-based tools and JSON5 |
226 | | //! deserializers may flag these invalid values. |
227 | | //! |
228 | | //! ## Optional Sorting |
229 | | //! |
230 | | //! * By default, array items and object properties retain their original order. (Some JSON arrays |
231 | | //! are order-dependent, and sorting them indiscriminantly might change the meaning of the data.) |
232 | | //! * The formatter can automatically sort array items and object properties if enabled via |
233 | | //! `FormatOptions`: |
234 | | //! - To sort all arrays in the document, set |
235 | | //! [FormatOptions.sort_array_items](struct.FormatOptions.html#structfield.sort_array_items) to |
236 | | //! `true` |
237 | | //! - To sort only specific arrays in the target schema, specify the schema location under |
238 | | //! [FormatOptions.options_by_path](struct.FormatOptions.html#structfield.options_by_path), and |
239 | | //! set its [SortArrayItems](enum.PathOption.html#variant.SortArrayItems) option. |
240 | | //! - Properties are sorted based on an explicit user-supplied list of property names in the |
241 | | //! preferred order, for objects at a specified path. Specify the object's location in the |
242 | | //! target schema using |
243 | | //! [FormatOptions.options_by_path](struct.FormatOptions.html#structfield.options_by_path), and |
244 | | //! provide a vector of property name strings with the |
245 | | //! [PropertyNameOrder](enum.PathOption.html#variant.PropertyNameOrder) option. Properties not |
246 | | //! included in this option retain their original order, behind the explicitly ordered |
247 | | //! properties, if any. |
248 | | //! * When sorting array items, the formatter only sorts array item literal values (strings, |
249 | | //! numbers, bools, and null). Child arrays or objects are left in their original order, after |
250 | | //! sorted literals, if any, within the same array. |
251 | | //! * Array items are sorted in case-insensitive unicode lexicographic order. **(Note that, since |
252 | | //! the formatter does not parse unquoted literals, number types cannot be sorted numerically.)** |
253 | | //! Items that are case-insensitively equal are re-compared and ordered case-sensitively with |
254 | | //! respect to each other. |
255 | | //! |
256 | | //! ## Associated Comments |
257 | | //! |
258 | | //! * All comments immediately preceding an element (value or start of an array or object), and |
259 | | //! trailing line comments (starting on the same line as the element, optionally continued on |
260 | | //! successive lines if all line comments are left-aligned), are retained and move with the |
261 | | //! associated item if the item is repositioned during sorting. |
262 | | //! * All line and block comments are retained. Typically, the comments are re-aligned vertically |
263 | | //! (indented) with the values with which they were associated. |
264 | | //! * A single line comment appearing immediately after a JSON value (primitive or closing brace), |
265 | | //! on the same line, will remain appended to that value on its line after re-formatting. |
266 | | //! * Spaces separate block comments from blocks of contiguous line comments associated with the |
267 | | //! same entry. |
268 | | //! * Comments at the end of a list (after the last property or item) are retained at the end of |
269 | | //! the same list. |
270 | | //! * Block comments with lines that extend to the left of the opening "/\*" are not re-aligned. |
271 | | //! |
272 | | //! ## Whitespace Handling |
273 | | //! |
274 | | //! * Unicode characters are allowed, and unicode space characters should retain their meaning |
275 | | //! according to unicode standards. |
276 | | //! * All spaces inside single- or multi-line strings are retained. All spaces in comments are |
277 | | //! retained *except* trailing spaces at the end of a line. |
278 | | //! * All other original spaces are removed. |
279 | | |
280 | | #![deny(missing_docs)] |
281 | | #![allow(clippy::len_zero)] |
282 | | |
283 | | #[macro_use] |
284 | | mod error; |
285 | | |
286 | | mod content; |
287 | | mod formatter; |
288 | | mod options; |
289 | | mod parser; |
290 | | |
291 | | use { |
292 | | crate::formatter::*, std::cell::RefCell, std::collections::HashMap, std::collections::HashSet, |
293 | | std::rc::Rc, |
294 | | }; |
295 | | |
296 | | pub use content::Array; |
297 | | pub use content::Comment; |
298 | | pub use content::Comments; |
299 | | pub use content::Object; |
300 | | pub use content::ParsedDocument; |
301 | | pub use content::Primitive; |
302 | | pub use content::Property; |
303 | | pub use content::Value; |
304 | | pub use error::Error; |
305 | | pub use error::Location; |
306 | | pub use options::FormatOptions; |
307 | | pub use options::PathOption; |
308 | | |
309 | | /// Format a JSON5 document, applying a consistent style, with given options. |
310 | | /// |
311 | | /// See [FormatOptions](struct.FormatOptions.html) for style options, and confirm the defaults by |
312 | | /// reviewing the source of truth via the `src` link for |
313 | | /// [impl Default for FormatOptions](struct.FormatOptions.html#impl-Default). |
314 | | /// |
315 | | /// # Format and Style (Default) |
316 | | /// |
317 | | /// Unless FormatOptions are modified, the JSON5 formatter takes a JSON5 document (as a unicode |
318 | | /// String) and generates a new document with the following formatting: |
319 | | /// |
320 | | /// * Indents 4 spaces. |
321 | | /// * Quotes are removed from property names if they are legal ECMAScript 5.1 identifiers. Property |
322 | | /// names that do not comply with ECMAScript identifier format requirements will retain their |
323 | | /// existing (single or double) quotes. |
324 | | /// * All property and item lists end with a trailing comma. |
325 | | /// * All property and item lists are broken down; that is, the braces are on separate lines and |
326 | | /// all values are indented. |
327 | | /// |
328 | | /// ```json |
329 | | /// { |
330 | | /// key: "value", |
331 | | /// array: [ |
332 | | /// 3.145, |
333 | | /// ] |
334 | | /// } |
335 | | /// ``` |
336 | | /// |
337 | | /// # Arguments |
338 | | /// * buffer - A unicode string containing the original JSON5 document. |
339 | | /// * filename - An optional filename. Parsing errors typically include the filename (if given), |
340 | | /// and the line number and character column where the error was detected. |
341 | | /// * options - Format style options to override the default style, if provided. |
342 | | /// # Returns |
343 | | /// * The formatted result in UTF-8 encoded bytes. |
344 | | pub fn format( |
345 | | buffer: &str, |
346 | | filename: Option<String>, |
347 | | options: Option<FormatOptions>, |
348 | | ) -> Result<Vec<u8>, Error> { |
349 | | let parsed_document = ParsedDocument::from_str(buffer, filename)?; |
350 | | let options = match options { |
351 | | Some(options) => options, |
352 | | None => FormatOptions { ..Default::default() }, |
353 | | }; |
354 | | Json5Format::with_options(options)?.to_utf8(&parsed_document) |
355 | | } |
356 | | |
357 | | /// A JSON5 formatter that parses a valid JSON5 input buffer and produces a new, formatted document. |
358 | | pub struct Json5Format { |
359 | | /// Options that alter how the formatter generates the formatted output. This instance of |
360 | | /// FormatOptions is a subset of the FormatOptions passed to the `with_options` constructor. |
361 | | /// The `options_by_path` are first removed, and then used to initialize the SubpathOptions |
362 | | /// hierarchy rooted at the `document_root_options_ref`. |
363 | | default_options: FormatOptions, |
364 | | |
365 | | /// Depth-specific options applied at the document root and below. |
366 | | document_root_options_ref: Rc<RefCell<SubpathOptions>>, |
367 | | } |
368 | | |
369 | | impl Json5Format { |
370 | | /// Create and return a Json5Format, with the given options to be applied to the |
371 | | /// [Json5Format::to_utf8()](struct.Json5Format.html#method.to_utf8) operation. |
372 | | pub fn with_options(mut options: FormatOptions) -> Result<Self, Error> { |
373 | | let mut document_root_options = SubpathOptions::new(&options); |
374 | | |
375 | | // Typical JSON5 documents start and end with curly braces for a top-level unnamed |
376 | | // object. This is by convention, and the Json5Format represents this |
377 | | // top-level object as a single child in a conceptual array. The array square braces |
378 | | // are not rendered, and by convention, the child object should not have a trailing |
379 | | // comma, even if trailing commas are the default everywhere else in the document. |
380 | | // |
381 | | // Set the SubpathOptions for the document array items to prevent trailing commas. |
382 | | document_root_options.options.trailing_commas = false; |
383 | | |
384 | | let mut options_by_path = |
385 | | options.options_by_path.drain().collect::<HashMap<&'static str, HashSet<PathOption>>>(); |
386 | | |
387 | | // Default options remain after draining the `options_by_path` |
388 | | let default_options = options; |
389 | | |
390 | | // Transfer the options_by_path from the given options into the SubpathOptions tree |
391 | | // rooted at `document_options_root`. |
392 | | for (path, path_options) in options_by_path.drain() { |
393 | | let rc; // extend life of temporary |
394 | | let mut borrowed; // extend life of temporary |
395 | | let subpath_options = if path == "/" { |
396 | | &mut document_root_options |
397 | | } else if let Some(remaining) = path.strip_prefix('/') { |
398 | | rc = document_root_options.get_or_create_subpath_options( |
399 | | &remaining.split('/').collect::<Vec<_>>(), |
400 | | &default_options, |
401 | | ); |
402 | | borrowed = rc.borrow_mut(); |
403 | | &mut *borrowed |
404 | | } else { |
405 | | return Err(Error::configuration(format!( |
406 | | "PathOption path '{}' is invalid.", |
407 | | path |
408 | | ))); |
409 | | }; |
410 | | subpath_options.override_default_options(&path_options); |
411 | | } |
412 | | |
413 | | Ok(Json5Format { |
414 | | default_options, |
415 | | document_root_options_ref: Rc::new(RefCell::new(document_root_options)), |
416 | | }) |
417 | | } |
418 | | |
419 | | /// Create and return a Json5Format, with the default settings. |
420 | | pub fn new() -> Result<Self, Error> { |
421 | | Self::with_options(FormatOptions { ..Default::default() }) |
422 | | } |
423 | | |
424 | | /// Formats the parsed document into a new Vector of UTF8 bytes. |
425 | | /// |
426 | | /// # Arguments |
427 | | /// * `parsed_document` - The parsed state of the incoming document. |
428 | | /// |
429 | | /// # Example |
430 | | /// |
431 | | /// ``` |
432 | | /// # use json5format::*; |
433 | | /// # let buffer = String::from("{}"); |
434 | | /// # let filename = String::from("example.json5"); |
435 | | /// let format = Json5Format::new()?; |
436 | | /// let parsed_document = ParsedDocument::from_str(&buffer, Some(filename))?; |
437 | | /// let bytes = format.to_utf8(&parsed_document)?; |
438 | | /// # assert_eq!("{}\n", std::str::from_utf8(&bytes).unwrap()); |
439 | | /// # Ok::<(),anyhow::Error>(()) |
440 | | /// ``` |
441 | | pub fn to_utf8(&self, parsed_document: &ParsedDocument) -> Result<Vec<u8>, Error> { |
442 | | let formatter = |
443 | | Formatter::new(self.default_options.clone(), self.document_root_options_ref.clone()); |
444 | | formatter.format(parsed_document) |
445 | | } |
446 | | |
447 | | /// Formats the parsed document into a new String. |
448 | | /// |
449 | | /// # Arguments |
450 | | /// * `parsed_document` - The parsed state of the incoming document. |
451 | | /// |
452 | | /// # Example |
453 | | /// |
454 | | /// ``` |
455 | | /// # use json5format::*; |
456 | | /// # fn main() -> std::result::Result<(), Error> { |
457 | | /// # let buffer = String::from("{}"); |
458 | | /// # let filename = String::from("example.json5"); |
459 | | /// let format = Json5Format::new()?; |
460 | | /// let parsed_document = ParsedDocument::from_str(&buffer, Some(filename))?; |
461 | | /// let formatted = format.to_string(&parsed_document)?; |
462 | | /// # assert_eq!("{}\n", formatted); |
463 | | /// # Ok(()) |
464 | | /// # } |
465 | | /// ``` |
466 | | pub fn to_string(&self, parsed_document: &ParsedDocument) -> Result<String, Error> { |
467 | | String::from_utf8(self.to_utf8(parsed_document)?) |
468 | 0 | .map_err(|e| Error::internal(None, e.to_string())) |
469 | | } |
470 | | } |