Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/babel/messages/jslexer.py: 58%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

104 statements  

1""" 

2babel.messages.jslexer 

3~~~~~~~~~~~~~~~~~~~~~~ 

4 

5A simple JavaScript 1.5 lexer which is used for the JavaScript 

6extractor. 

7 

8:copyright: (c) 2013-2025 by the Babel Team. 

9:license: BSD, see LICENSE for more details. 

10""" 

11 

12from __future__ import annotations 

13 

14import re 

15from collections.abc import Generator 

16from typing import NamedTuple 

17 

18operators: list[str] = sorted([ 

19 '+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=', 

20 '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=', 

21 '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')', 

22 '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':', 

23], key=len, reverse=True) # fmt: skip 

24 

25escapes: dict[str, str] = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'} 

26 

27name_re = re.compile(r'[\w$_][\w\d$_]*', re.UNICODE) 

28dotted_name_re = re.compile(r'[\w$_][\w\d$_.]*[\w\d$_.]', re.UNICODE) 

29division_re = re.compile(r'/=?') 

30regex_re = re.compile(r'/(?:[^/\\]*(?:\\.[^/\\]*)*)/[a-zA-Z]*', re.DOTALL) 

31line_re = re.compile(r'(\r\n|\n|\r)') 

32line_join_re = re.compile(r'\\' + line_re.pattern) 

33uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}') 

34hex_escape_re = re.compile(r'[a-fA-F0-9]{1,2}') 

35 

36 

37class Token(NamedTuple): 

38 type: str 

39 value: str 

40 lineno: int 

41 

42 

43_rules: list[tuple[str | None, re.Pattern[str]]] = [ 

44 (None, re.compile(r'\s+', re.UNICODE)), 

45 (None, re.compile(r'<!--.*')), 

46 ('linecomment', re.compile(r'//.*')), 

47 ('multilinecomment', re.compile(r'/\*.*?\*/', re.UNICODE | re.DOTALL)), 

48 ('dotted_name', dotted_name_re), 

49 ('name', name_re), 

50 ('number', re.compile(r'''( 

51 (?:0|[1-9]\d*) 

52 (\.\d+)? 

53 ([eE][-+]?\d+)? | 

54 (0x[a-fA-F0-9]+) 

55 )''', re.VERBOSE)), 

56 ('jsx_tag', re.compile(r'(?:</?[^>\s]+|/>)', re.I)), # May be mangled in `get_rules` 

57 ('operator', re.compile(r'(%s)' % '|'.join(re.escape(op) for op in operators))), 

58 ('template_string', re.compile(r'''`(?:[^`\\]*(?:\\.[^`\\]*)*)`''', re.UNICODE)), 

59 ('string', re.compile(r'''( 

60 '(?:[^'\\]*(?:\\.[^'\\]*)*)' | 

61 "(?:[^"\\]*(?:\\.[^"\\]*)*)" 

62 )''', re.VERBOSE | re.DOTALL)), 

63] # fmt: skip 

64 

65 

66def get_rules( 

67 jsx: bool, 

68 dotted: bool, 

69 template_string: bool, 

70) -> list[tuple[str | None, re.Pattern[str]]]: 

71 """ 

72 Get a tokenization rule list given the passed syntax options. 

73 

74 Internal to this module. 

75 """ 

76 rules = [] 

77 for token_type, rule in _rules: 

78 if not jsx and token_type and 'jsx' in token_type: 

79 continue 

80 if not template_string and token_type == 'template_string': 

81 continue 

82 if token_type == 'dotted_name': 

83 if not dotted: 

84 continue 

85 token_type = 'name' 

86 rules.append((token_type, rule)) 

87 return rules 

88 

89 

90def indicates_division(token: Token) -> bool: 

91 """A helper function that helps the tokenizer to decide if the current 

92 token may be followed by a division operator. 

93 """ 

94 if token.type == 'operator': 

95 return token.value in (')', ']', '}', '++', '--') 

96 return token.type in ('name', 'number', 'string', 'regexp') 

97 

98 

99def unquote_string(string: str) -> str: 

100 """Unquote a string with JavaScript rules. The string has to start with 

101 string delimiters (``'``, ``"`` or the back-tick/grave accent (for template strings).) 

102 """ 

103 assert string and string[0] == string[-1] and string[0] in '"\'`', ( 

104 'string provided is not properly delimited' 

105 ) 

106 string = line_join_re.sub('\\1', string[1:-1]) 

107 result: list[str] = [] 

108 add = result.append 

109 pos = 0 

110 

111 while True: 

112 # scan for the next escape 

113 escape_pos = string.find('\\', pos) 

114 if escape_pos < 0: 

115 break 

116 add(string[pos:escape_pos]) 

117 

118 # check which character is escaped 

119 next_char = string[escape_pos + 1] 

120 if next_char in escapes: 

121 add(escapes[next_char]) 

122 

123 # unicode escapes. trie to consume up to four characters of 

124 # hexadecimal characters and try to interpret them as unicode 

125 # character point. If there is no such character point, put 

126 # all the consumed characters into the string. 

127 elif next_char in 'uU': 

128 escaped = uni_escape_re.match(string, escape_pos + 2) 

129 if escaped is not None: 

130 escaped_value = escaped.group() 

131 if len(escaped_value) == 4: 

132 try: 

133 add(chr(int(escaped_value, 16))) 

134 except ValueError: 

135 pass 

136 else: 

137 pos = escape_pos + 6 

138 continue 

139 add(next_char + escaped_value) 

140 pos = escaped.end() 

141 continue 

142 else: 

143 add(next_char) 

144 

145 # hex escapes. conversion from 2-digits hex to char is infallible 

146 elif next_char in 'xX': 

147 escaped = hex_escape_re.match(string, escape_pos + 2) 

148 if escaped is not None: 

149 escaped_value = escaped.group() 

150 add(chr(int(escaped_value, 16))) 

151 pos = escape_pos + 2 + len(escaped_value) 

152 continue 

153 else: 

154 add(next_char) 

155 

156 # bogus escape. Just remove the backslash. 

157 else: 

158 add(next_char) 

159 pos = escape_pos + 2 

160 

161 if pos < len(string): 

162 add(string[pos:]) 

163 

164 return ''.join(result) 

165 

166 

167def tokenize( 

168 source: str, 

169 jsx: bool = True, 

170 dotted: bool = True, 

171 template_string: bool = True, 

172 lineno: int = 1, 

173) -> Generator[Token, None, None]: 

174 """ 

175 Tokenize JavaScript/JSX source. Returns a generator of tokens. 

176 

177 :param source: The JavaScript source to tokenize. 

178 :param jsx: Enable (limited) JSX parsing. 

179 :param dotted: Read dotted names as single name token. 

180 :param template_string: Support ES6 template strings 

181 :param lineno: starting line number (optional) 

182 """ 

183 may_divide = False 

184 pos = 0 

185 end = len(source) 

186 rules = get_rules(jsx=jsx, dotted=dotted, template_string=template_string) 

187 

188 while pos < end: 

189 # handle regular rules first 

190 for token_type, rule in rules: # noqa: B007 

191 match = rule.match(source, pos) 

192 if match is not None: 

193 break 

194 # if we don't have a match we don't give up yet, but check for 

195 # division operators or regular expression literals, based on 

196 # the status of `may_divide` which is determined by the last 

197 # processed non-whitespace token using `indicates_division`. 

198 else: 

199 if may_divide: 

200 match = division_re.match(source, pos) 

201 token_type = 'operator' 

202 else: 

203 match = regex_re.match(source, pos) 

204 token_type = 'regexp' 

205 if match is None: 

206 # woops. invalid syntax. jump one char ahead and try again. 

207 pos += 1 

208 continue 

209 

210 token_value = match.group() 

211 if token_type is not None: 

212 token = Token(token_type, token_value, lineno) 

213 may_divide = indicates_division(token) 

214 yield token 

215 lineno += len(line_re.findall(token_value)) 

216 pos = match.end()