Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pikepdf/_xml.py: 35%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# SPDX-FileCopyrightText: 2022 James R. Barlow
2# SPDX-License-Identifier: MPL-2.0
4from __future__ import annotations
6from typing import IO, TYPE_CHECKING, Any, AnyStr
8if TYPE_CHECKING:
9 from lxml.etree import _Element, _ElementTree
11_XMLParser = None
13def parse_xml(source: AnyStr | IO[Any], recover: bool = False) -> _ElementTree:
14 """Wrap lxml's parse to provide protection against XXE attacks."""
15 from lxml.etree import XMLParser as _UnsafeXMLParser
16 from lxml.etree import parse as _parse
18 global _XMLParser
20 if _XMLParser is None:
21 class _XMLParserImpl(_UnsafeXMLParser):
22 def __init__(self, *args: Any, **kwargs: Any):
23 # Prevent XXE attacks
24 # https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-2755
25 kwargs['resolve_entities'] = False
26 kwargs['no_network'] = True
27 super().__init__(*args, **kwargs)
28 _XMLParser = _XMLParserImpl
30 parser = _XMLParser(recover=recover, remove_pis=False)
31 return _parse(source, parser=parser)
33def __getattr__(name: str):
34 if name in {'_Element', '_ElementTree'}:
35 from lxml import etree
36 value = getattr(etree, name)
37 globals()[name] = value
38 return value
40 raise AttributeError(f"module {__name__} has no attribute {name}")
42__all__ = ['parse_xml', '_ElementTree', '_Element']