1# ___
2# \./ DANGER: This project implements some code generation
3# .--.O.--. techniques involving string concatenation.
4# \/ \/ If you look at it, you might die.
5#
6
7r"""
8Installation
9************
10
11.. code-block:: bash
12
13 pip install fastjsonschema
14
15Support only for Python 3.3 and higher.
16
17About
18*****
19
20``fastjsonschema`` implements validation of JSON documents by JSON schema.
21The library implements JSON schema drafts 04, 06, and 07. The main purpose is
22to have a really fast implementation. See some numbers:
23
24 * Probably the most popular, ``jsonschema``, can take up to 5 seconds for valid
25 inputs and 1.2 seconds for invalid inputs.
26 * Second most popular, ``json-spec``, is even worse with up to 7.2 and 1.7 seconds.
27 * Last ``validictory``, now deprecated, is much better with 370 or 23 milliseconds,
28 but it does not follow all standards, and it can be still slow for some purposes.
29
30With this library you can gain big improvements as ``fastjsonschema`` takes
31only about 25 milliseconds for valid inputs and 2 milliseconds for invalid ones.
32Pretty amazing, right? :-)
33
34Technically it works by generating the most stupid code on the fly, which is fast but
35is hard to write by hand. The best efficiency is achieved when a validator is compiled
36once and used many times, of course. It works similarly like regular expressions. But
37you can also generate the code to a file, which is even slightly faster.
38
39You can run the performance benchmarks on your computer or server with the included
40script:
41
42.. code-block:: bash
43
44 $ make performance
45 fast_compiled valid ==> 0.0993900
46 fast_compiled invalid ==> 0.0041089
47 fast_compiled_without_exc valid ==> 0.0465258
48 fast_compiled_without_exc invalid ==> 0.0023688
49 fast_file valid ==> 0.0989483
50 fast_file invalid ==> 0.0041104
51 fast_not_compiled valid ==> 11.9572681
52 fast_not_compiled invalid ==> 2.9512092
53 jsonschema valid ==> 5.2233240
54 jsonschema invalid ==> 1.3227916
55 jsonschema_compiled valid ==> 0.4447982
56 jsonschema_compiled invalid ==> 0.0231333
57 jsonspec valid ==> 4.1450569
58 jsonspec invalid ==> 1.0485777
59 validictory valid ==> 0.2730411
60 validictory invalid ==> 0.0183669
61
62This library follows and implements `JSON schema draft-04, draft-06, and draft-07
63<http://json-schema.org>`_. Sometimes it's not perfectly clear, so I recommend also
64check out this `understanding JSON schema <https://spacetelescope.github.io/understanding-json-schema>`_.
65
66Note that there are some differences compared to JSON schema standard:
67
68 * Regular expressions are full Python ones, not only what JSON schema allows. It's easier
69 to allow everything, and also it's faster to compile without limits. So keep in mind that when
70 you will use a more advanced regular expression, it may not work with other libraries or in
71 other languages.
72 * Because Python matches new line for a dollar in regular expressions (``a$`` matches ``a`` and ``a\\n``),
73 instead of ``$`` is used ``\Z`` and all dollars in your regular expression are changed to ``\\Z``
74 as well. When you want to use dollar as regular character, you have to escape it (``\$``).
75 * JSON schema says you can use keyword ``default`` for providing default values. This implementation
76 uses that and always returns transformed input data.
77
78Usage
79*****
80
81.. code-block:: python
82
83 import fastjsonschema
84
85 point_schema = {
86 "type": "object",
87 "properties": {
88 "x": {
89 "type": "number",
90 },
91 "y": {
92 "type": "number",
93 },
94 },
95 "required": ["x", "y"],
96 "additionalProperties": False,
97 }
98
99 point_validator = fastjsonschema.compile(point_schema)
100 try:
101 point_validator({"x": 1.0, "y": 2.0})
102 except fastjsonschema.JsonSchemaException as e:
103 print(f"Data failed validation: {e}")
104
105API
106***
107"""
108from functools import partial, update_wrapper
109
110from .draft04 import CodeGeneratorDraft04
111from .draft06 import CodeGeneratorDraft06
112from .draft07 import CodeGeneratorDraft07
113from .draft2019 import CodeGeneratorDraft2019
114from .exceptions import (
115 JsonSchemaException,
116 JsonSchemaValueException,
117 JsonSchemaValuesException,
118 JsonSchemaDefinitionException,
119)
120from .ref_resolver import RefResolver
121from .version import VERSION
122
123__all__ = (
124 'VERSION',
125 'JsonSchemaException',
126 'JsonSchemaValueException',
127 'JsonSchemaValuesException',
128 'JsonSchemaDefinitionException',
129 'validate',
130 'compile',
131 'compile_to_code',
132)
133
134
135def validate(
136 definition: dict | bool,
137 data,
138 handlers: dict = {},
139 formats: dict = {},
140 use_default: bool = True,
141 use_formats: bool = True,
142 detailed_exceptions: bool = True,
143 fast_fail: bool = True,
144):
145 """
146 Validation function for lazy programmers or for use cases when you need
147 to call validation only once, so you do not have to compile it first.
148 Use it only when you do not care about performance (even though it will
149 be still faster than alternative implementations).
150
151 .. code-block:: python
152
153 import fastjsonschema
154
155 fastjsonschema.validate({'type': 'string'}, 'hello')
156 # same as: compile({'type': 'string'})('hello')
157
158 Preferred is to use :any:`compile` function.
159
160 The ``handlers`` parameter controls resolution of remote ``$ref`` URIs; see
161 :any:`compile` for details and security considerations when schemas are not
162 fully trusted.
163 """
164 return compile(definition, handlers, formats, use_default, use_formats, detailed_exceptions, fast_fail)(data)
165
166
167#TODO: Change use_default to False when upgrading to version 3.
168# pylint: disable=redefined-builtin,dangerous-default-value,exec-used
169def compile(
170 definition: dict | bool,
171 handlers: dict = {},
172 formats: dict = {},
173 use_default: bool = True,
174 use_formats: bool = True,
175 detailed_exceptions: bool = True,
176 fast_fail: bool = True,
177):
178 """
179 Generates validation function for validating JSON schema passed in ``definition``.
180 Example:
181
182 .. code-block:: python
183
184 import fastjsonschema
185
186 validate = fastjsonschema.compile({'type': 'string'})
187 validate('hello')
188
189 This implementation supports keyword ``default`` (can be turned off
190 by passing `use_default=False`):
191
192 .. code-block:: python
193
194 validate = fastjsonschema.compile({
195 'type': 'object',
196 'properties': {
197 'a': {'type': 'number', 'default': 42},
198 },
199 })
200
201 data = validate({})
202 assert data == {'a': 42}
203
204 Supported implementations are draft-04, draft-06 and draft-07. Which version
205 should be used is determined by `$draft` in your ``definition``. When not
206 specified, the latest implementation is used (draft-07).
207
208 .. code-block:: python
209
210 validate = fastjsonschema.compile({
211 '$schema': 'http://json-schema.org/draft-04/schema',
212 'type': 'number',
213 })
214
215 You can pass mapping from URI scheme to function that should be used to
216 retrieve remote references used in your ``definition`` in parameter
217 ``handlers``. When no handler is registered for a scheme, the URI is
218 fetched automatically via :mod:`urllib` (for example ``http``, ``https``,
219 or ``file`` URLs).
220
221 .. warning::
222
223 Do not compile or validate untrusted schemas without custom
224 ``handlers``. A schema containing ``$ref`` can trigger outbound HTTP
225 requests to arbitrary URLs, including internal or loopback addresses
226 (server-side request forgery). Provide ``handlers`` to restrict which
227 URIs are resolved, or pre-resolve references before passing the schema
228 to this library.
229
230 .. code-block:: python
231
232 def http_handler(uri):
233 if not uri.startswith('https://schemas.example.com/'):
234 raise ValueError('ref not allowed')
235 import urllib.request
236 with urllib.request.urlopen(uri) as response:
237 return json.loads(response.read())
238
239 validate = fastjsonschema.compile(definition, handlers={
240 'http': http_handler,
241 'https': http_handler,
242 })
243
244 Also, you can pass mapping for custom formats. Key is the name of your
245 formatter and value can be regular expression, which will be compiled or
246 callback returning `bool` (or you can raise your own exception).
247
248 .. code-block:: python
249
250 validate = fastjsonschema.compile(definition, formats={
251 'foo': r'foo|bar',
252 'bar': lambda value: value in ('foo', 'bar'),
253 })
254
255 Note that formats are automatically used as assertions. It can be turned
256 off by passing `use_formats=False`. When disabled, custom formats are
257 disabled as well. (Added in 2.19.0.)
258
259 If you don't need detailed exceptions, you can turn the details off and gain
260 additional performance by passing `detailed_exceptions=False`.
261
262 By default, the execution stops with the first validation error. If you need
263 to collect all the errors, turn this off by passing `fast_fail=False`.
264
265 Exception :any:`JsonSchemaDefinitionException` is raised when generating the
266 code fails (bad definition).
267
268 Exception :any:`JsonSchemaValueException` is raised from generated function when
269 validation fails (data do not follow the definition).
270
271 Exception :any:`JsonSchemaValuesException` is raised from generated function when
272 validation fails (data do not follow the definition) contatining all the errors
273 (when fast_fail is set to `False`).
274 """
275 resolver, code_generator = _factory(
276 definition,
277 handlers,
278 formats,
279 use_default,
280 use_formats,
281 detailed_exceptions,
282 fast_fail,
283 )
284 global_state = code_generator.global_state
285 # Do not pass local state so it can recursively call itself.
286 exec(code_generator.func_code, global_state)
287 func = global_state[resolver.get_scope_name()]
288 if formats:
289 return update_wrapper(partial(func, custom_formats=formats), func)
290 return func
291
292
293# pylint: disable=dangerous-default-value
294def compile_to_code(
295 definition: dict | bool,
296 handlers: dict = {},
297 formats: dict = {},
298 use_default: bool = True,
299 use_formats: bool = True,
300 detailed_exceptions: bool = True,
301 fast_fail: bool = True,
302):
303 """
304 Generates validation code for validating JSON schema passed in ``definition``.
305 Example:
306
307 .. code-block:: python
308
309 import fastjsonschema
310
311 code = fastjsonschema.compile_to_code({'type': 'string'})
312 with open('your_file.py', 'w') as f:
313 f.write(code)
314
315 You can also use it as a script:
316
317 .. code-block:: bash
318
319 echo "{'type': 'string'}" | python3 -m fastjsonschema > your_file.py
320 python3 -m fastjsonschema "{'type': 'string'}" > your_file.py
321
322 Exception :any:`JsonSchemaDefinitionException` is raised when generating the
323 code fails (bad definition).
324
325 Remote ``$ref`` URIs are resolved the same way as in :any:`compile`; see its
326 documentation for ``handlers`` and security considerations.
327 """
328 _, code_generator = _factory(
329 definition,
330 handlers,
331 formats,
332 use_default,
333 use_formats,
334 detailed_exceptions,
335 fast_fail,
336 )
337 return (
338 'VERSION = "' + VERSION + '"\n' +
339 code_generator.global_state_code + '\n' +
340 code_generator.func_code
341 )
342
343
344def _factory(
345 definition: dict | bool,
346 handlers: dict,
347 formats: dict = {},
348 use_default: bool = True,
349 use_formats: bool = True,
350 detailed_exceptions: bool = True,
351 fast_fail: bool = True,
352):
353 resolver = RefResolver.from_schema(definition, handlers=handlers, store={})
354 code_generator = _get_code_generator_class(definition)(
355 definition,
356 resolver=resolver,
357 formats=formats,
358 use_default=use_default,
359 use_formats=use_formats,
360 detailed_exceptions=detailed_exceptions,
361 fast_fail=fast_fail,
362 )
363 return resolver, code_generator
364
365
366def _get_code_generator_class(schema: dict | bool):
367 # Schema in from draft-06 can be just the boolean value.
368 if isinstance(schema, dict):
369 schema_version = schema.get('$schema', '')
370 if 'draft-04' in schema_version:
371 return CodeGeneratorDraft04
372 if 'draft-06' in schema_version:
373 return CodeGeneratorDraft06
374 if 'draft-07' in schema_version:
375 return CodeGeneratorDraft07
376 if 'draft/2019' in schema_version or 'draft-2019' in schema_version:
377 return CodeGeneratorDraft2019
378 return CodeGeneratorDraft2019