1# This copy of shlex.py from Python 3.6 is distributed with argcomplete.
2# It contains only the shlex class, with modifications as noted.
3
4"""A lexical analyzer class for simple shell-like syntaxes."""
5
6# Module and documentation by Eric S. Raymond, 21 Dec 1998
7# Input stacking and error message cleanup added by ESR, March 2000
8# push_source() and pop_source() made explicit by ESR, January 2001.
9# Posix compliance, split(), string arguments, and
10# iterator interface by Gustavo Niemeyer, April 2003.
11# changes to tokenize more like Posix shells by Vinay Sajip, July 2016.
12
13import os
14import sys
15from collections import deque
16from io import StringIO
17from typing import Optional
18
19
20class shlex:
21 "A lexical analyzer class for simple shell-like syntaxes."
22
23 def __init__(self, instream=None, infile=None, posix=False, punctuation_chars=False):
24 # Modified by argcomplete: 2/3 compatibility
25 if isinstance(instream, str):
26 instream = StringIO(instream)
27 if instream is not None:
28 self.instream = instream
29 self.infile = infile
30 else:
31 self.instream = sys.stdin
32 self.infile = None
33 self.posix = posix
34 if posix:
35 self.eof = None
36 else:
37 self.eof = ''
38 self.commenters = '#'
39 self.wordchars = 'abcdfeghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
40 # Modified by argcomplete: 2/3 compatibility
41 # if self.posix:
42 # self.wordchars += ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
43 # 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
44 self.whitespace = ' \t\r\n'
45 self.whitespace_split = False
46 self.quotes = '\'"'
47 self.escape = '\\'
48 self.escapedquotes = '"'
49 self.state: Optional[str] = ' '
50 self.pushback: deque = deque()
51 self.lineno = 1
52 self.debug = 0
53 self.token = ''
54 self.filestack: deque = deque()
55 self.source = None
56 if not punctuation_chars:
57 punctuation_chars = ''
58 elif punctuation_chars is True:
59 punctuation_chars = '();<>|&'
60 self.punctuation_chars = punctuation_chars
61 if punctuation_chars:
62 # _pushback_chars is a push back queue used by lookahead logic
63 self._pushback_chars: deque = deque()
64 # these chars added because allowed in file names, args, wildcards
65 self.wordchars += '~-./*?='
66 # remove any punctuation chars from wordchars
67 t = self.wordchars.maketrans(dict.fromkeys(punctuation_chars))
68 self.wordchars = self.wordchars.translate(t)
69
70 # Modified by argcomplete: Record last wordbreak position
71 self.last_wordbreak_pos = None
72 self.wordbreaks = ''
73
74 def push_token(self, tok):
75 "Push a token onto the stack popped by the get_token method"
76 if self.debug >= 1:
77 print("shlex: pushing token " + repr(tok))
78 self.pushback.appendleft(tok)
79
80 def push_source(self, newstream, newfile=None):
81 "Push an input source onto the lexer's input source stack."
82 # Modified by argcomplete: 2/3 compatibility
83 if isinstance(newstream, str):
84 newstream = StringIO(newstream)
85 self.filestack.appendleft((self.infile, self.instream, self.lineno))
86 self.infile = newfile
87 self.instream = newstream
88 self.lineno = 1
89 if self.debug:
90 if newfile is not None:
91 print('shlex: pushing to file %s' % (self.infile,))
92 else:
93 print('shlex: pushing to stream %s' % (self.instream,))
94
95 def pop_source(self):
96 "Pop the input source stack."
97 self.instream.close()
98 (self.infile, self.instream, self.lineno) = self.filestack.popleft()
99 if self.debug:
100 print('shlex: popping to %s, line %d' % (self.instream, self.lineno))
101 self.state = ' '
102
103 def get_token(self):
104 "Get a token from the input stream (or from stack if it's nonempty)"
105 if self.pushback:
106 tok = self.pushback.popleft()
107 if self.debug >= 1:
108 print("shlex: popping token " + repr(tok))
109 return tok
110 # No pushback. Get a token.
111 raw = self.read_token()
112 # Handle inclusions
113 if self.source is not None:
114 while raw == self.source:
115 spec = self.sourcehook(self.read_token())
116 if spec:
117 (newfile, newstream) = spec
118 self.push_source(newstream, newfile)
119 raw = self.get_token()
120 # Maybe we got EOF instead?
121 while raw == self.eof:
122 if not self.filestack:
123 return self.eof
124 else:
125 self.pop_source()
126 raw = self.get_token()
127 # Neither inclusion nor EOF
128 if self.debug >= 1:
129 if raw != self.eof:
130 print("shlex: token=" + repr(raw))
131 else:
132 print("shlex: token=EOF")
133 return raw
134
135 def read_token(self):
136 quoted = False
137 escapedstate = ' '
138 while True:
139 if self.punctuation_chars and self._pushback_chars:
140 nextchar = self._pushback_chars.pop()
141 else:
142 nextchar = self.instream.read(1)
143 if nextchar == '\n':
144 self.lineno += 1
145 if self.debug >= 3:
146 print("shlex: in state %r I see character: %r" % (self.state, nextchar))
147 if self.state is None:
148 self.token = '' # past end of file
149 break
150 elif self.state == ' ':
151 if not nextchar:
152 self.state = None # end of file
153 break
154 elif nextchar in self.whitespace:
155 if self.debug >= 2:
156 print("shlex: I see whitespace in whitespace state")
157 if self.token or (self.posix and quoted):
158 break # emit current token
159 else:
160 continue
161 elif nextchar in self.commenters:
162 self.instream.readline()
163 self.lineno += 1
164 elif self.posix and nextchar in self.escape:
165 escapedstate = 'a'
166 self.state = nextchar
167 elif nextchar in self.wordchars:
168 self.token = nextchar
169 self.state = 'a'
170 elif nextchar in self.punctuation_chars:
171 self.token = nextchar
172 self.state = 'c'
173 elif nextchar in self.quotes:
174 if not self.posix:
175 self.token = nextchar
176 self.state = nextchar
177 elif self.whitespace_split:
178 self.token = nextchar
179 self.state = 'a'
180 # Modified by argcomplete: Record last wordbreak position
181 if nextchar in self.wordbreaks:
182 self.last_wordbreak_pos = len(self.token) - 1
183 else:
184 self.token = nextchar
185 if self.token or (self.posix and quoted):
186 break # emit current token
187 else:
188 continue
189 elif self.state in self.quotes:
190 quoted = True
191 if not nextchar: # end of file
192 if self.debug >= 2:
193 print("shlex: I see EOF in quotes state")
194 # XXX what error should be raised here?
195 raise ValueError("No closing quotation")
196 if nextchar == self.state:
197 if not self.posix:
198 self.token += nextchar
199 self.state = ' '
200 break
201 else:
202 self.state = 'a'
203 elif self.posix and nextchar in self.escape and self.state in self.escapedquotes:
204 escapedstate = self.state
205 self.state = nextchar
206 else:
207 self.token += nextchar
208 elif self.state in self.escape:
209 if not nextchar: # end of file
210 if self.debug >= 2:
211 print("shlex: I see EOF in escape state")
212 # XXX what error should be raised here?
213 raise ValueError("No escaped character")
214 # In posix shells, only the quote itself or the escape
215 # character may be escaped within quotes.
216 if escapedstate in self.quotes and nextchar != self.state and nextchar != escapedstate:
217 self.token += self.state
218 self.token += nextchar
219 self.state = escapedstate
220 elif self.state in ('a', 'c'):
221 if not nextchar:
222 self.state = None # end of file
223 break
224 elif nextchar in self.whitespace:
225 if self.debug >= 2:
226 print("shlex: I see whitespace in word state")
227 self.state = ' '
228 if self.token or (self.posix and quoted):
229 break # emit current token
230 else:
231 continue
232 elif nextchar in self.commenters:
233 self.instream.readline()
234 self.lineno += 1
235 if self.posix:
236 self.state = ' '
237 if self.token or (self.posix and quoted):
238 break # emit current token
239 else:
240 continue
241 elif self.posix and nextchar in self.quotes:
242 self.state = nextchar
243 elif self.posix and nextchar in self.escape:
244 escapedstate = 'a'
245 self.state = nextchar
246 elif self.state == 'c':
247 if nextchar in self.punctuation_chars:
248 self.token += nextchar
249 else:
250 if nextchar not in self.whitespace:
251 self._pushback_chars.append(nextchar)
252 self.state = ' '
253 break
254 elif nextchar in self.wordchars or nextchar in self.quotes or self.whitespace_split:
255 self.token += nextchar
256 # Modified by argcomplete: Record last wordbreak position
257 if nextchar in self.wordbreaks:
258 self.last_wordbreak_pos = len(self.token) - 1
259 else:
260 if self.punctuation_chars:
261 self._pushback_chars.append(nextchar)
262 else:
263 self.pushback.appendleft(nextchar)
264 if self.debug >= 2:
265 print("shlex: I see punctuation in word state")
266 self.state = ' '
267 if self.token or (self.posix and quoted):
268 break # emit current token
269 else:
270 continue
271 result: Optional[str] = self.token
272 self.token = ''
273 if self.posix and not quoted and result == '':
274 result = None
275 if self.debug > 1:
276 if result:
277 print("shlex: raw token=" + repr(result))
278 else:
279 print("shlex: raw token=EOF")
280 # Modified by argcomplete: Record last wordbreak position
281 if self.state == ' ':
282 self.last_wordbreak_pos = None
283 return result
284
285 def sourcehook(self, newfile):
286 "Hook called on a filename to be sourced."
287 if newfile[0] == '"':
288 newfile = newfile[1:-1]
289 # This implements cpp-like semantics for relative-path inclusion.
290 # Modified by argcomplete: 2/3 compatibility
291 if isinstance(self.infile, str) and not os.path.isabs(newfile):
292 newfile = os.path.join(os.path.dirname(self.infile), newfile)
293 return (newfile, open(newfile, "r"))
294
295 def error_leader(self, infile=None, lineno=None):
296 "Emit a C-compiler-like, Emacs-friendly error-message leader."
297 if infile is None:
298 infile = self.infile
299 if lineno is None:
300 lineno = self.lineno
301 return "\"%s\", line %d: " % (infile, lineno)
302
303 def __iter__(self):
304 return self
305
306 def __next__(self):
307 token = self.get_token()
308 if token == self.eof:
309 raise StopIteration
310 return token
311
312 # Modified by argcomplete: 2/3 compatibility
313 next = __next__