1 # -*- coding: iso-8859-1 -*-
2 """A lexical analyzer class for simple shell-like syntaxes."""
4 # Module and documentation by Eric S. Raymond, 21 Dec 1998
5 # Input stacking and error message cleanup added by ESR, March 2000
6 # push_source() and pop_source() made explicit by ESR, January 2001.
7 # Posix compliance, split(), string arguments, and
8 # iterator interface by Gustavo Niemeyer, April 2003.
13 #from collections import deque
20 def appendleft(self, item):
21 self.data.insert(0, item)
23 return self.data.pop(0)
30 return isinstance(s, str)
33 return isinstance(s, basestring)
35 # Use the "imp" module to protect the imports below from fixers.
37 _cStringIO = imp.load_module('cStringIO', *imp.find_module('cStringIO'))
39 _StringIO = imp.load_module('StringIO', *imp.find_module('StringIO'))
40 StringIO = _StringIO.StringIO
43 StringIO = _cStringIO.StringIO
46 __all__ = ["shlex", "split"]
49 "A lexical analyzer class for simple shell-like syntaxes."
50 def __init__(self, instream=None, infile=None, posix=False):
51 if is_basestring(instream):
52 instream = StringIO(instream)
53 if instream is not None:
54 self.instream = instream
57 self.instream = sys.stdin
65 self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
66 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
68 self.wordchars = self.wordchars + ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
69 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
70 self.whitespace = ' \t\r\n'
71 self.whitespace_split = False
74 self.escapedquotes = '"'
76 self.pushback = deque()
80 self.filestack = deque()
83 print 'shlex: reading from %s, line %d' \
84 % (self.instream, self.lineno)
86 def push_token(self, tok):
87 "Push a token onto the stack popped by the get_token method"
89 print "shlex: pushing token " + repr(tok)
90 self.pushback.appendleft(tok)
92 def push_source(self, newstream, newfile=None):
93 "Push an input source onto the lexer's input source stack."
94 if is_basestring(newstream):
95 newstream = StringIO(newstream)
96 self.filestack.appendleft((self.infile, self.instream, self.lineno))
98 self.instream = newstream
101 if newfile is not None:
102 print 'shlex: pushing to file %s' % (self.infile,)
104 print 'shlex: pushing to stream %s' % (self.instream,)
106 def pop_source(self):
107 "Pop the input source stack."
108 self.instream.close()
109 (self.infile, self.instream, self.lineno) = self.filestack.popleft()
111 print 'shlex: popping to %s, line %d' \
112 % (self.instream, self.lineno)
116 "Get a token from the input stream (or from stack if it's nonempty)"
118 tok = self.pushback.popleft()
120 print "shlex: popping token " + repr(tok)
122 # No pushback. Get a token.
123 raw = self.read_token()
125 if self.source is not None:
126 while raw == self.source:
127 spec = self.sourcehook(self.read_token())
129 (newfile, newstream) = spec
130 self.push_source(newstream, newfile)
131 raw = self.get_token()
132 # Maybe we got EOF instead?
133 while raw == self.eof:
134 if not self.filestack:
138 raw = self.get_token()
139 # Neither inclusion nor EOF
142 print "shlex: token=" + repr(raw)
144 print "shlex: token=EOF"
147 def read_token(self):
151 nextchar = self.instream.read(1)
153 self.lineno = self.lineno + 1
155 print "shlex: in state", repr(self.state), \
156 "I see character:", repr(nextchar)
157 if self.state is None:
158 self.token = '' # past end of file
160 elif self.state == ' ':
162 self.state = None # end of file
164 elif nextchar in self.whitespace:
166 print "shlex: I see whitespace in whitespace state"
167 if self.token or (self.posix and quoted):
168 break # emit current token
171 elif nextchar in self.commenters:
172 self.instream.readline()
173 self.lineno = self.lineno + 1
174 elif self.posix and nextchar in self.escape:
176 self.state = nextchar
177 elif nextchar in self.wordchars:
178 self.token = nextchar
180 elif nextchar in self.quotes:
182 self.token = nextchar
183 self.state = nextchar
184 elif self.whitespace_split:
185 self.token = nextchar
188 self.token = nextchar
189 if self.token or (self.posix and quoted):
190 break # emit current token
193 elif self.state in self.quotes:
195 if not nextchar: # end of file
197 print "shlex: I see EOF in quotes state"
198 # XXX what error should be raised here?
199 raise ValueError, "No closing quotation"
200 if nextchar == self.state:
202 self.token = self.token + nextchar
207 elif self.posix and nextchar in self.escape and \
208 self.state in self.escapedquotes:
209 escapedstate = self.state
210 self.state = nextchar
212 self.token = self.token + nextchar
213 elif self.state in self.escape:
214 if not nextchar: # end of file
216 print "shlex: I see EOF in escape state"
217 # XXX what error should be raised here?
218 raise ValueError, "No escaped character"
219 # In posix shells, only the quote itself or the escape
220 # character may be escaped within quotes.
221 if escapedstate in self.quotes and \
222 nextchar != self.state and nextchar != escapedstate:
223 self.token = self.token + self.state
224 self.token = self.token + nextchar
225 self.state = escapedstate
226 elif self.state == 'a':
228 self.state = None # end of file
230 elif nextchar in self.whitespace:
232 print "shlex: I see whitespace in word state"
234 if self.token or (self.posix and quoted):
235 break # emit current token
238 elif nextchar in self.commenters:
239 self.instream.readline()
240 self.lineno = self.lineno + 1
243 if self.token or (self.posix and quoted):
244 break # emit current token
247 elif self.posix and nextchar in self.quotes:
248 self.state = nextchar
249 elif self.posix and nextchar in self.escape:
251 self.state = nextchar
252 elif nextchar in self.wordchars or nextchar in self.quotes \
253 or self.whitespace_split:
254 self.token = self.token + nextchar
256 self.pushback.appendleft(nextchar)
258 print "shlex: I see punctuation in word state"
261 break # emit current token
266 if self.posix and not quoted and result == '':
270 print "shlex: raw token=" + repr(result)
272 print "shlex: raw token=EOF"
275 def sourcehook(self, newfile):
276 "Hook called on a filename to be sourced."
277 if newfile[0] == '"':
278 newfile = newfile[1:-1]
279 # This implements cpp-like semantics for relative-path inclusion.
280 if is_basestring(self.infile) and not os.path.isabs(newfile):
281 newfile = os.path.join(os.path.dirname(self.infile), newfile)
282 return (newfile, open(newfile, "r"))
284 def error_leader(self, infile=None, lineno=None):
285 "Emit a C-compiler-like, Emacs-friendly error-message leader."
290 return "\"%s\", line %d: " % (infile, lineno)
296 token = self.get_token()
297 if token == self.eof:
301 def split(s, comments=False):
302 lex = shlex(s, posix=True)
303 lex.whitespace_split = True
309 token = lex.get_token()
315 if __name__ == '__main__':
316 if len(sys.argv) == 1:
320 lexer = shlex(open(file), file)
322 tt = lexer.get_token()
324 print "Token: " + repr(tt)
330 # indent-tabs-mode:nil
332 # vim: set expandtab tabstop=4 shiftwidth=4: