Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pikepdf/_xml.py: 35%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

26 statements  

1# SPDX-FileCopyrightText: 2022 James R. Barlow 

2# SPDX-License-Identifier: MPL-2.0 

3 

4from __future__ import annotations 

5 

6from typing import IO, TYPE_CHECKING, Any, AnyStr 

7 

8if TYPE_CHECKING: 

9 from lxml.etree import _Element, _ElementTree 

10 

11_XMLParser = None 

12 

13def parse_xml(source: AnyStr | IO[Any], recover: bool = False) -> _ElementTree: 

14 """Wrap lxml's parse to provide protection against XXE attacks.""" 

15 from lxml.etree import XMLParser as _UnsafeXMLParser 

16 from lxml.etree import parse as _parse 

17 

18 global _XMLParser 

19 

20 if _XMLParser is None: 

21 class _XMLParserImpl(_UnsafeXMLParser): 

22 def __init__(self, *args: Any, **kwargs: Any): 

23 # Prevent XXE attacks 

24 # https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-2755 

25 kwargs['resolve_entities'] = False 

26 kwargs['no_network'] = True 

27 super().__init__(*args, **kwargs) 

28 _XMLParser = _XMLParserImpl 

29 

30 parser = _XMLParser(recover=recover, remove_pis=False) 

31 return _parse(source, parser=parser) 

32 

33def __getattr__(name: str): 

34 if name in {'_Element', '_ElementTree'}: 

35 from lxml import etree 

36 value = getattr(etree, name) 

37 globals()[name] = value 

38 return value 

39 

40 raise AttributeError(f"module {__name__} has no attribute {name}") 

41 

42__all__ = ['parse_xml', '_ElementTree', '_Element']