Coverage for /pythoncovmergedfiles/medio/medio/usr/lib/python3.9/shlex.py: 10%

263 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:05 +0000

1"""A lexical analyzer class for simple shell-like syntaxes.""" 

2 

3# Module and documentation by Eric S. Raymond, 21 Dec 1998 

4# Input stacking and error message cleanup added by ESR, March 2000 

5# push_source() and pop_source() made explicit by ESR, January 2001. 

6# Posix compliance, split(), string arguments, and 

7# iterator interface by Gustavo Niemeyer, April 2003. 

8# changes to tokenize more like Posix shells by Vinay Sajip, July 2016. 

9 

10import os 

11import re 

12import sys 

13from collections import deque 

14 

15from io import StringIO 

16 

17__all__ = ["shlex", "split", "quote", "join"] 

18 

19class shlex: 

20 "A lexical analyzer class for simple shell-like syntaxes." 

21 def __init__(self, instream=None, infile=None, posix=False, 

22 punctuation_chars=False): 

23 if isinstance(instream, str): 

24 instream = StringIO(instream) 

25 if instream is not None: 

26 self.instream = instream 

27 self.infile = infile 

28 else: 

29 self.instream = sys.stdin 

30 self.infile = None 

31 self.posix = posix 

32 if posix: 

33 self.eof = None 

34 else: 

35 self.eof = '' 

36 self.commenters = '#' 

37 self.wordchars = ('abcdfeghijklmnopqrstuvwxyz' 

38 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_') 

39 if self.posix: 

40 self.wordchars += ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ' 

41 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ') 

42 self.whitespace = ' \t\r\n' 

43 self.whitespace_split = False 

44 self.quotes = '\'"' 

45 self.escape = '\\' 

46 self.escapedquotes = '"' 

47 self.state = ' ' 

48 self.pushback = deque() 

49 self.lineno = 1 

50 self.debug = 0 

51 self.token = '' 

52 self.filestack = deque() 

53 self.source = None 

54 if not punctuation_chars: 

55 punctuation_chars = '' 

56 elif punctuation_chars is True: 

57 punctuation_chars = '();<>|&' 

58 self._punctuation_chars = punctuation_chars 

59 if punctuation_chars: 

60 # _pushback_chars is a push back queue used by lookahead logic 

61 self._pushback_chars = deque() 

62 # these chars added because allowed in file names, args, wildcards 

63 self.wordchars += '~-./*?=' 

64 #remove any punctuation chars from wordchars 

65 t = self.wordchars.maketrans(dict.fromkeys(punctuation_chars)) 

66 self.wordchars = self.wordchars.translate(t) 

67 

68 @property 

69 def punctuation_chars(self): 

70 return self._punctuation_chars 

71 

72 def push_token(self, tok): 

73 "Push a token onto the stack popped by the get_token method" 

74 if self.debug >= 1: 

75 print("shlex: pushing token " + repr(tok)) 

76 self.pushback.appendleft(tok) 

77 

78 def push_source(self, newstream, newfile=None): 

79 "Push an input source onto the lexer's input source stack." 

80 if isinstance(newstream, str): 

81 newstream = StringIO(newstream) 

82 self.filestack.appendleft((self.infile, self.instream, self.lineno)) 

83 self.infile = newfile 

84 self.instream = newstream 

85 self.lineno = 1 

86 if self.debug: 

87 if newfile is not None: 

88 print('shlex: pushing to file %s' % (self.infile,)) 

89 else: 

90 print('shlex: pushing to stream %s' % (self.instream,)) 

91 

92 def pop_source(self): 

93 "Pop the input source stack." 

94 self.instream.close() 

95 (self.infile, self.instream, self.lineno) = self.filestack.popleft() 

96 if self.debug: 

97 print('shlex: popping to %s, line %d' \ 

98 % (self.instream, self.lineno)) 

99 self.state = ' ' 

100 

101 def get_token(self): 

102 "Get a token from the input stream (or from stack if it's nonempty)" 

103 if self.pushback: 

104 tok = self.pushback.popleft() 

105 if self.debug >= 1: 

106 print("shlex: popping token " + repr(tok)) 

107 return tok 

108 # No pushback. Get a token. 

109 raw = self.read_token() 

110 # Handle inclusions 

111 if self.source is not None: 

112 while raw == self.source: 

113 spec = self.sourcehook(self.read_token()) 

114 if spec: 

115 (newfile, newstream) = spec 

116 self.push_source(newstream, newfile) 

117 raw = self.get_token() 

118 # Maybe we got EOF instead? 

119 while raw == self.eof: 

120 if not self.filestack: 

121 return self.eof 

122 else: 

123 self.pop_source() 

124 raw = self.get_token() 

125 # Neither inclusion nor EOF 

126 if self.debug >= 1: 

127 if raw != self.eof: 

128 print("shlex: token=" + repr(raw)) 

129 else: 

130 print("shlex: token=EOF") 

131 return raw 

132 

133 def read_token(self): 

134 quoted = False 

135 escapedstate = ' ' 

136 while True: 

137 if self.punctuation_chars and self._pushback_chars: 

138 nextchar = self._pushback_chars.pop() 

139 else: 

140 nextchar = self.instream.read(1) 

141 if nextchar == '\n': 

142 self.lineno += 1 

143 if self.debug >= 3: 

144 print("shlex: in state %r I see character: %r" % (self.state, 

145 nextchar)) 

146 if self.state is None: 

147 self.token = '' # past end of file 

148 break 

149 elif self.state == ' ': 

150 if not nextchar: 

151 self.state = None # end of file 

152 break 

153 elif nextchar in self.whitespace: 

154 if self.debug >= 2: 

155 print("shlex: I see whitespace in whitespace state") 

156 if self.token or (self.posix and quoted): 

157 break # emit current token 

158 else: 

159 continue 

160 elif nextchar in self.commenters: 

161 self.instream.readline() 

162 self.lineno += 1 

163 elif self.posix and nextchar in self.escape: 

164 escapedstate = 'a' 

165 self.state = nextchar 

166 elif nextchar in self.wordchars: 

167 self.token = nextchar 

168 self.state = 'a' 

169 elif nextchar in self.punctuation_chars: 

170 self.token = nextchar 

171 self.state = 'c' 

172 elif nextchar in self.quotes: 

173 if not self.posix: 

174 self.token = nextchar 

175 self.state = nextchar 

176 elif self.whitespace_split: 

177 self.token = nextchar 

178 self.state = 'a' 

179 else: 

180 self.token = nextchar 

181 if self.token or (self.posix and quoted): 

182 break # emit current token 

183 else: 

184 continue 

185 elif self.state in self.quotes: 

186 quoted = True 

187 if not nextchar: # end of file 

188 if self.debug >= 2: 

189 print("shlex: I see EOF in quotes state") 

190 # XXX what error should be raised here? 

191 raise ValueError("No closing quotation") 

192 if nextchar == self.state: 

193 if not self.posix: 

194 self.token += nextchar 

195 self.state = ' ' 

196 break 

197 else: 

198 self.state = 'a' 

199 elif (self.posix and nextchar in self.escape and self.state 

200 in self.escapedquotes): 

201 escapedstate = self.state 

202 self.state = nextchar 

203 else: 

204 self.token += nextchar 

205 elif self.state in self.escape: 

206 if not nextchar: # end of file 

207 if self.debug >= 2: 

208 print("shlex: I see EOF in escape state") 

209 # XXX what error should be raised here? 

210 raise ValueError("No escaped character") 

211 # In posix shells, only the quote itself or the escape 

212 # character may be escaped within quotes. 

213 if (escapedstate in self.quotes and 

214 nextchar != self.state and nextchar != escapedstate): 

215 self.token += self.state 

216 self.token += nextchar 

217 self.state = escapedstate 

218 elif self.state in ('a', 'c'): 

219 if not nextchar: 

220 self.state = None # end of file 

221 break 

222 elif nextchar in self.whitespace: 

223 if self.debug >= 2: 

224 print("shlex: I see whitespace in word state") 

225 self.state = ' ' 

226 if self.token or (self.posix and quoted): 

227 break # emit current token 

228 else: 

229 continue 

230 elif nextchar in self.commenters: 

231 self.instream.readline() 

232 self.lineno += 1 

233 if self.posix: 

234 self.state = ' ' 

235 if self.token or (self.posix and quoted): 

236 break # emit current token 

237 else: 

238 continue 

239 elif self.state == 'c': 

240 if nextchar in self.punctuation_chars: 

241 self.token += nextchar 

242 else: 

243 if nextchar not in self.whitespace: 

244 self._pushback_chars.append(nextchar) 

245 self.state = ' ' 

246 break 

247 elif self.posix and nextchar in self.quotes: 

248 self.state = nextchar 

249 elif self.posix and nextchar in self.escape: 

250 escapedstate = 'a' 

251 self.state = nextchar 

252 elif (nextchar in self.wordchars or nextchar in self.quotes 

253 or (self.whitespace_split and 

254 nextchar not in self.punctuation_chars)): 

255 self.token += nextchar 

256 else: 

257 if self.punctuation_chars: 

258 self._pushback_chars.append(nextchar) 

259 else: 

260 self.pushback.appendleft(nextchar) 

261 if self.debug >= 2: 

262 print("shlex: I see punctuation in word state") 

263 self.state = ' ' 

264 if self.token or (self.posix and quoted): 

265 break # emit current token 

266 else: 

267 continue 

268 result = self.token 

269 self.token = '' 

270 if self.posix and not quoted and result == '': 

271 result = None 

272 if self.debug > 1: 

273 if result: 

274 print("shlex: raw token=" + repr(result)) 

275 else: 

276 print("shlex: raw token=EOF") 

277 return result 

278 

279 def sourcehook(self, newfile): 

280 "Hook called on a filename to be sourced." 

281 if newfile[0] == '"': 

282 newfile = newfile[1:-1] 

283 # This implements cpp-like semantics for relative-path inclusion. 

284 if isinstance(self.infile, str) and not os.path.isabs(newfile): 

285 newfile = os.path.join(os.path.dirname(self.infile), newfile) 

286 return (newfile, open(newfile, "r")) 

287 

288 def error_leader(self, infile=None, lineno=None): 

289 "Emit a C-compiler-like, Emacs-friendly error-message leader." 

290 if infile is None: 

291 infile = self.infile 

292 if lineno is None: 

293 lineno = self.lineno 

294 return "\"%s\", line %d: " % (infile, lineno) 

295 

296 def __iter__(self): 

297 return self 

298 

299 def __next__(self): 

300 token = self.get_token() 

301 if token == self.eof: 

302 raise StopIteration 

303 return token 

304 

305def split(s, comments=False, posix=True): 

306 """Split the string *s* using shell-like syntax.""" 

307 if s is None: 

308 import warnings 

309 warnings.warn("Passing None for 's' to shlex.split() is deprecated.", 

310 DeprecationWarning, stacklevel=2) 

311 lex = shlex(s, posix=posix) 

312 lex.whitespace_split = True 

313 if not comments: 

314 lex.commenters = '' 

315 return list(lex) 

316 

317 

318def join(split_command): 

319 """Return a shell-escaped string from *split_command*.""" 

320 return ' '.join(quote(arg) for arg in split_command) 

321 

322 

323_find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search 

324 

325def quote(s): 

326 """Return a shell-escaped version of the string *s*.""" 

327 if not s: 

328 return "''" 

329 if _find_unsafe(s) is None: 

330 return s 

331 

332 # use single quotes, and put single quotes into double quotes 

333 # the string $'b is then quoted as '$'"'"'b' 

334 return "'" + s.replace("'", "'\"'\"'") + "'" 

335 

336 

337def _print_tokens(lexer): 

338 while 1: 

339 tt = lexer.get_token() 

340 if not tt: 

341 break 

342 print("Token: " + repr(tt)) 

343 

344if __name__ == '__main__': 

345 if len(sys.argv) == 1: 

346 _print_tokens(shlex()) 

347 else: 

348 fn = sys.argv[1] 

349 with open(fn) as f: 

350 _print_tokens(shlex(f, fn))