Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/babel/messages/jslexer.py: 58%

1"""

2 babel.messages.jslexer

3 ~~~~~~~~~~~~~~~~~~~~~~

5 A simple JavaScript 1.5 lexer which is used for the JavaScript

6 extractor.

9 :license: BSD, see LICENSE for more details.

10"""

11from __future__ import annotations

13import re

14from collections.abc import Generator

15from typing import NamedTuple

17operators: list[str] = sorted([

18 '+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=',

19 '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=',

20 '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')',

21 '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':',

22], key=len, reverse=True)

24escapes: dict[str, str] = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'}

26name_re = re.compile(r'[\w$_][\w\d$_]*', re.UNICODE)

27dotted_name_re = re.compile(r'[\w$_][\w\d$_.]*[\w\d$_.]', re.UNICODE)

28division_re = re.compile(r'/=?')

29regex_re = re.compile(r'/(?:[^/\\]*(?:\\.[^/\\]*)*)/[a-zA-Z]*', re.DOTALL)

30line_re = re.compile(r'(\r\n|\n|\r)')

31line_join_re = re.compile(r'\\' + line_re.pattern)

32uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}')

33hex_escape_re = re.compile(r'[a-fA-F0-9]{1,2}')

36class Token(NamedTuple):

37 type: str

38 value: str

39 lineno: int

42_rules: list[tuple[str | None, re.Pattern[str]]] = [

43 (None, re.compile(r'\s+', re.UNICODE)),

44 (None, re.compile(r'<!--.*')),

45 ('linecomment', re.compile(r'//.*')),

46 ('multilinecomment', re.compile(r'/\*.*?\*/', re.UNICODE | re.DOTALL)),

47 ('dotted_name', dotted_name_re),

48 ('name', name_re),

49 ('number', re.compile(r'''(

50 (?:0|[1-9]\d*)

51 (\.\d+)?

52 ([eE][-+]?\d+)? |

53 (0x[a-fA-F0-9]+)

54 )''', re.VERBOSE)),

55 ('jsx_tag', re.compile(r'(?:</?[^>\s]+|/>)', re.I)), # May be mangled in `get_rules`

56 ('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))),

57 ('template_string', re.compile(r'''`(?:[^`\\]*(?:\\.[^`\\]*)*)`''', re.UNICODE)),

58 ('string', re.compile(r'''(

59 '(?:[^'\\]*(?:\\.[^'\\]*)*)' |

60 "(?:[^"\\]*(?:\\.[^"\\]*)*)"

61 )''', re.VERBOSE | re.DOTALL)),

62]

65def get_rules(jsx: bool, dotted: bool, template_string: bool) -> list[tuple[str | None, re.Pattern[str]]]:

66 """

67 Get a tokenization rule list given the passed syntax options.

69 Internal to this module.

70 """

71 rules = []

72 for token_type, rule in _rules:

73 if not jsx and token_type and 'jsx' in token_type:

74 continue

75 if not template_string and token_type == 'template_string':

76 continue

77 if token_type == 'dotted_name':

78 if not dotted:

79 continue

80 token_type = 'name'

81 rules.append((token_type, rule))

82 return rules

85def indicates_division(token: Token) -> bool:

86 """A helper function that helps the tokenizer to decide if the current

87 token may be followed by a division operator.

88 """

89 if token.type == 'operator':

90 return token.value in (')', ']', '}', '++', '--')

91 return token.type in ('name', 'number', 'string', 'regexp')

94def unquote_string(string: str) -> str:

95 """Unquote a string with JavaScript rules. The string has to start with

96 string delimiters (``'``, ``"`` or the back-tick/grave accent (for template strings).)

97 """

98 assert string and string[0] == string[-1] and string[0] in '"\'`', \

99 'string provided is not properly delimited'

100 string = line_join_re.sub('\\1', string[1:-1])

101 result: list[str] = []

102 add = result.append

103 pos = 0

104

105 while True:

106 # scan for the next escape

107 escape_pos = string.find('\\', pos)

108 if escape_pos < 0:

109 break

110 add(string[pos:escape_pos])

111

112 # check which character is escaped

113 next_char = string[escape_pos + 1]

114 if next_char in escapes:

115 add(escapes[next_char])

116

117 # unicode escapes. trie to consume up to four characters of

118 # hexadecimal characters and try to interpret them as unicode

119 # character point. If there is no such character point, put

120 # all the consumed characters into the string.

121 elif next_char in 'uU':

122 escaped = uni_escape_re.match(string, escape_pos + 2)

123 if escaped is not None:

124 escaped_value = escaped.group()

125 if len(escaped_value) == 4:

126 try:

127 add(chr(int(escaped_value, 16)))

128 except ValueError:

129 pass

130 else:

131 pos = escape_pos + 6

132 continue

133 add(next_char + escaped_value)

134 pos = escaped.end()

135 continue

136 else:

137 add(next_char)

138

139 # hex escapes. conversion from 2-digits hex to char is infallible

140 elif next_char in 'xX':

141 escaped = hex_escape_re.match(string, escape_pos + 2)

142 if escaped is not None:

143 escaped_value = escaped.group()

144 add(chr(int(escaped_value, 16)))

145 pos = escape_pos + 2 + len(escaped_value)

146 continue

147 else:

148 add(next_char)

149

150 # bogus escape. Just remove the backslash.

151 else:

152 add(next_char)

153 pos = escape_pos + 2

154

155 if pos < len(string):

156 add(string[pos:])

157

158 return ''.join(result)

159

160

161def tokenize(source: str, jsx: bool = True, dotted: bool = True, template_string: bool = True, lineno: int = 1) -> Generator[Token, None, None]:

162 """

163 Tokenize JavaScript/JSX source. Returns a generator of tokens.

164

165 :param jsx: Enable (limited) JSX parsing.

166 :param dotted: Read dotted names as single name token.

167 :param template_string: Support ES6 template strings

168 :param lineno: starting line number (optional)

169 """

170 may_divide = False

171 pos = 0

172 end = len(source)

173 rules = get_rules(jsx=jsx, dotted=dotted, template_string=template_string)

174

175 while pos < end:

176 # handle regular rules first

177 for token_type, rule in rules: # noqa: B007

178 match = rule.match(source, pos)

179 if match is not None:

180 break

181 # if we don't have a match we don't give up yet, but check for

182 # division operators or regular expression literals, based on

183 # the status of `may_divide` which is determined by the last

184 # processed non-whitespace token using `indicates_division`.

185 else:

186 if may_divide:

187 match = division_re.match(source, pos)

188 token_type = 'operator'

189 else:

190 match = regex_re.match(source, pos)

191 token_type = 'regexp'

192 if match is None:

193 # woops. invalid syntax. jump one char ahead and try again.

194 pos += 1

195 continue

196

197 token_value = match.group()

198 if token_type is not None:

199 token = Token(token_type, token_value, lineno)

200 may_divide = indicates_division(token)

201 yield token

202 lineno += len(line_re.findall(token_value))

203 pos = match.end()