Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/babel/messages/jslexer.py: 58%

1"""

2babel.messages.jslexer

3~~~~~~~~~~~~~~~~~~~~~~

5A simple JavaScript 1.5 lexer which is used for the JavaScript

6extractor.

9:license: BSD, see LICENSE for more details.

10"""

12from __future__ import annotations

14import re

15from collections.abc import Generator

16from typing import NamedTuple

18operators: list[str] = sorted([

19 '+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=',

20 '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=',

21 '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')',

22 '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':',

23], key=len, reverse=True) # fmt: skip

25escapes: dict[str, str] = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'}

27name_re = re.compile(r'[\w$_][\w\d$_]*', re.UNICODE)

28dotted_name_re = re.compile(r'[\w$_][\w\d$_.]*[\w\d$_.]', re.UNICODE)

29division_re = re.compile(r'/=?')

30regex_re = re.compile(r'/(?:[^/\\]*(?:\\.[^/\\]*)*)/[a-zA-Z]*', re.DOTALL)

31line_re = re.compile(r'(\r\n|\n|\r)')

32line_join_re = re.compile(r'\\' + line_re.pattern)

33uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}')

34hex_escape_re = re.compile(r'[a-fA-F0-9]{1,2}')

37class Token(NamedTuple):

38 type: str

39 value: str

40 lineno: int

43_rules: list[tuple[str | None, re.Pattern[str]]] = [

44 (None, re.compile(r'\s+', re.UNICODE)),

45 (None, re.compile(r'<!--.*')),

46 ('linecomment', re.compile(r'//.*')),

47 ('multilinecomment', re.compile(r'/\*.*?\*/', re.UNICODE | re.DOTALL)),

48 ('dotted_name', dotted_name_re),

49 ('name', name_re),

50 ('number', re.compile(r'''(

51 (?:0|[1-9]\d*)

52 (\.\d+)?

53 ([eE][-+]?\d+)? |

54 (0x[a-fA-F0-9]+)

55 )''', re.VERBOSE)),

56 ('jsx_tag', re.compile(r'(?:</?[^>\s]+|/>)', re.I)), # May be mangled in `get_rules`

57 ('operator', re.compile(r'(%s)' % '|'.join(re.escape(op) for op in operators))),

58 ('template_string', re.compile(r'''`(?:[^`\\]*(?:\\.[^`\\]*)*)`''', re.UNICODE)),

59 ('string', re.compile(r'''(

60 '(?:[^'\\]*(?:\\.[^'\\]*)*)' |

61 "(?:[^"\\]*(?:\\.[^"\\]*)*)"

62 )''', re.VERBOSE | re.DOTALL)),

63] # fmt: skip

66def get_rules(

67 jsx: bool,

68 dotted: bool,

69 template_string: bool,

70) -> list[tuple[str | None, re.Pattern[str]]]:

71 """

72 Get a tokenization rule list given the passed syntax options.

74 Internal to this module.

75 """

76 rules = []

77 for token_type, rule in _rules:

78 if not jsx and token_type and 'jsx' in token_type:

79 continue

80 if not template_string and token_type == 'template_string':

81 continue

82 if token_type == 'dotted_name':

83 if not dotted:

84 continue

85 token_type = 'name'

86 rules.append((token_type, rule))

87 return rules

90def indicates_division(token: Token) -> bool:

91 """A helper function that helps the tokenizer to decide if the current

92 token may be followed by a division operator.

93 """

94 if token.type == 'operator':

95 return token.value in (')', ']', '}', '++', '--')

96 return token.type in ('name', 'number', 'string', 'regexp')

99def unquote_string(string: str) -> str:

100 """Unquote a string with JavaScript rules. The string has to start with

101 string delimiters (``'``, ``"`` or the back-tick/grave accent (for template strings).)

102 """

103 assert string and string[0] == string[-1] and string[0] in '"\'`', (

104 'string provided is not properly delimited'

105 )

106 string = line_join_re.sub('\\1', string[1:-1])

107 result: list[str] = []

108 add = result.append

109 pos = 0

110

111 while True:

112 # scan for the next escape

113 escape_pos = string.find('\\', pos)

114 if escape_pos < 0:

115 break

116 add(string[pos:escape_pos])

117

118 # check which character is escaped

119 next_char = string[escape_pos + 1]

120 if next_char in escapes:

121 add(escapes[next_char])

122

123 # unicode escapes. trie to consume up to four characters of

124 # hexadecimal characters and try to interpret them as unicode

125 # character point. If there is no such character point, put

126 # all the consumed characters into the string.

127 elif next_char in 'uU':

128 escaped = uni_escape_re.match(string, escape_pos + 2)

129 if escaped is not None:

130 escaped_value = escaped.group()

131 if len(escaped_value) == 4:

132 try:

133 add(chr(int(escaped_value, 16)))

134 except ValueError:

135 pass

136 else:

137 pos = escape_pos + 6

138 continue

139 add(next_char + escaped_value)

140 pos = escaped.end()

141 continue

142 else:

143 add(next_char)

144

145 # hex escapes. conversion from 2-digits hex to char is infallible

146 elif next_char in 'xX':

147 escaped = hex_escape_re.match(string, escape_pos + 2)

148 if escaped is not None:

149 escaped_value = escaped.group()

150 add(chr(int(escaped_value, 16)))

151 pos = escape_pos + 2 + len(escaped_value)

152 continue

153 else:

154 add(next_char)

155

156 # bogus escape. Just remove the backslash.

157 else:

158 add(next_char)

159 pos = escape_pos + 2

160

161 if pos < len(string):

162 add(string[pos:])

163

164 return ''.join(result)

165

166

167def tokenize(

168 source: str,

169 jsx: bool = True,

170 dotted: bool = True,

171 template_string: bool = True,

172 lineno: int = 1,

173) -> Generator[Token, None, None]:

174 """

175 Tokenize JavaScript/JSX source. Returns a generator of tokens.

176

177 :param source: The JavaScript source to tokenize.

178 :param jsx: Enable (limited) JSX parsing.

179 :param dotted: Read dotted names as single name token.

180 :param template_string: Support ES6 template strings

181 :param lineno: starting line number (optional)

182 """

183 may_divide = False

184 pos = 0

185 end = len(source)

186 rules = get_rules(jsx=jsx, dotted=dotted, template_string=template_string)

187

188 while pos < end:

189 # handle regular rules first

190 for token_type, rule in rules: # noqa: B007

191 match = rule.match(source, pos)

192 if match is not None:

193 break

194 # if we don't have a match we don't give up yet, but check for

195 # division operators or regular expression literals, based on

196 # the status of `may_divide` which is determined by the last

197 # processed non-whitespace token using `indicates_division`.

198 else:

199 if may_divide:

200 match = division_re.match(source, pos)

201 token_type = 'operator'

202 else:

203 match = regex_re.match(source, pos)

204 token_type = 'regexp'

205 if match is None:

206 # woops. invalid syntax. jump one char ahead and try again.

207 pos += 1

208 continue

209

210 token_value = match.group()

211 if token_type is not None:

212 token = Token(token_type, token_value, lineno)

213 may_divide = indicates_division(token)

214 yield token

215 lineno += len(line_re.findall(token_value))

216 pos = match.end()