Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tinycss2/bytes.py: 18%

22 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1from webencodings import UTF8, decode, lookup 

2 

3from .parser import parse_stylesheet 

4 

5 

6def decode_stylesheet_bytes(css_bytes, protocol_encoding=None, 

7 environment_encoding=None): 

8 """Determine the character encoding of a CSS stylesheet and decode it. 

9 

10 This is based on the presence of a :abbr:`BOM (Byte Order Mark)`, 

11 a ``@charset`` rule, and encoding meta-information. 

12 

13 :type css_bytes: :obj:`bytes` 

14 :param css_bytes: A CSS byte string. 

15 :type protocol_encoding: :obj:`str` 

16 :param protocol_encoding: 

17 The encoding label, if any, defined by HTTP or equivalent protocol. 

18 (e.g. via the ``charset`` parameter of the ``Content-Type`` header.) 

19 :type environment_encoding: :class:`webencodings.Encoding` 

20 :param environment_encoding: 

21 The `environment encoding 

22 <https://www.w3.org/TR/css-syntax/#environment-encoding>`_, if any. 

23 :returns: 

24 A 2-tuple of a decoded Unicode string and the 

25 :class:`webencodings.Encoding` object that was used. 

26 

27 """ 

28 # https://drafts.csswg.org/css-syntax/#the-input-byte-stream 

29 if protocol_encoding: 

30 fallback = lookup(protocol_encoding) 

31 if fallback: 

32 return decode(css_bytes, fallback) 

33 if css_bytes.startswith(b'@charset "'): 

34 # 10 is len(b'@charset "') 

35 # 100 is arbitrary so that no encoding label is more than 100-10 bytes. 

36 end_quote = css_bytes.find(b'"', 10, 100) 

37 if end_quote != -1 and css_bytes.startswith(b'";', end_quote): 

38 fallback = lookup(css_bytes[10:end_quote].decode('latin1')) 

39 if fallback: 

40 if fallback.name in ('utf-16be', 'utf-16le'): 

41 return decode(css_bytes, UTF8) 

42 return decode(css_bytes, fallback) 

43 if environment_encoding: 

44 return decode(css_bytes, environment_encoding) 

45 return decode(css_bytes, UTF8) 

46 

47 

48def parse_stylesheet_bytes(css_bytes, protocol_encoding=None, 

49 environment_encoding=None, 

50 skip_comments=False, skip_whitespace=False): 

51 """Parse :diagram:`stylesheet` from bytes, 

52 determining the character encoding as web browsers do. 

53 

54 This is used when reading a file or fetching a URL. 

55 The character encoding is determined from the initial bytes 

56 (a :abbr:`BOM (Byte Order Mark)` or a ``@charset`` rule) 

57 as well as the parameters. The ultimate fallback is UTF-8. 

58 

59 :type css_bytes: :obj:`bytes` 

60 :param css_bytes: A CSS byte string. 

61 :type protocol_encoding: :obj:`str` 

62 :param protocol_encoding: 

63 The encoding label, if any, defined by HTTP or equivalent protocol. 

64 (e.g. via the ``charset`` parameter of the ``Content-Type`` header.) 

65 :type environment_encoding: :class:`webencodings.Encoding` 

66 :param environment_encoding: 

67 The `environment encoding`_, if any. 

68 :type skip_comments: :obj:`bool` 

69 :param skip_comments: 

70 Ignore CSS comments at the top-level of the stylesheet. 

71 If the input is a string, ignore all comments. 

72 :type skip_whitespace: :obj:`bool` 

73 :param skip_whitespace: 

74 Ignore whitespace at the top-level of the stylesheet. 

75 Whitespace is still preserved 

76 in the :attr:`~tinycss2.ast.QualifiedRule.prelude` 

77 and the :attr:`~tinycss2.ast.QualifiedRule.content` of rules. 

78 :returns: 

79 A ``(rules, encoding)`` tuple. 

80 

81 * ``rules`` is a list of 

82 :class:`~tinycss2.ast.QualifiedRule`, 

83 :class:`~tinycss2.ast.AtRule`, 

84 :class:`~tinycss2.ast.Comment` (if ``skip_comments`` is false), 

85 :class:`~tinycss2.ast.WhitespaceToken` 

86 (if ``skip_whitespace`` is false), 

87 and :class:`~tinycss2.ast.ParseError` objects. 

88 * ``encoding`` is the :class:`webencodings.Encoding` object 

89 that was used. 

90 If ``rules`` contains an ``@import`` rule, this is 

91 the `environment encoding`_ for the imported stylesheet. 

92 

93 .. _environment encoding: 

94 https://www.w3.org/TR/css-syntax/#environment-encoding 

95 

96 .. code-block:: python 

97 

98 response = urlopen('http://example.net/foo.css') 

99 rules, encoding = parse_stylesheet_bytes( 

100 css_bytes=response.read(), 

101 # Python 3.x 

102 protocol_encoding=response.info().get_content_type().get_param('charset'), 

103 # Python 2.x 

104 protocol_encoding=response.info().gettype().getparam('charset'), 

105 ) 

106 for rule in rules: 

107 ... 

108 

109 """ 

110 css_unicode, encoding = decode_stylesheet_bytes( 

111 css_bytes, protocol_encoding, environment_encoding) 

112 stylesheet = parse_stylesheet(css_unicode, skip_comments, skip_whitespace) 

113 return stylesheet, encoding