pycscope.py

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 
00003 # This file is Copyright 2003, 2006, 2007, 2009, 2010 Dean Hall.
00004 #
00005 # This file is part of the Python-on-a-Chip program.
00006 # Python-on-a-Chip is free software: you can redistribute it and/or modify
00007 # it under the terms of the GNU LESSER GENERAL PUBLIC LICENSE Version 2.1.
00008 # 
00009 # Python-on-a-Chip is distributed in the hope that it will be useful,
00010 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00011 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00012 # A copy of the GNU LESSER GENERAL PUBLIC LICENSE Version 2.1
00013 # is seen in the file COPYING up one directory from this.
00014 
00015 """
00016 PyCscope
00017 
00018 PyCscope creates a Cscope-like index file for a tree of Python source.
00019 """
00020 
00021 ## @file
00022 #  @copybrief pycscope
00023 
00024 ## @package pycscope
00025 #  @brief PyCscope creates a Cscope-like index file for a tree of Python source.
00026 #
00027 # 2007/12/25:
00028 #   Improvements contributed by K. Rader of Google:
00029 #   - Added the `-i` argument to specify a file-list file
00030 #   - Fixups to the header and footer to make a valid file that cscope can read
00031 #
00032 
00033 
00034 __author__ = "Dean Hall"
00035 __copyright__ = "Copyright 2003, 2006, 2007, 2009, 2010 Dean Hall.  See LICENSE for details."
00036 __date__ = "2007/12/25"
00037 __version__ = "0.3"
00038 __usage__ = """Usage: pycscope.py [-R] [-f reffile] [-i srclistfile] [files ...]
00039 
00040 -R              Recurse directories for files.
00041 -f reffile      Use reffile as cross-ref file name instead of cscope.out.
00042 -i srclistfile  Use a file that contains a list of source files to scan."""
00043 
00044 
00045 import getopt, sys, os, os.path, string, types
00046 import keyword, parser, symbol, token
00047 
00048 # Marks as defined by Cscope
00049 MARK_FILE = "\t@"
00050 MARK_FUNC_DEF = "\t$"
00051 MARK_FUNC_CALL = "\t`"
00052 MARK_FUNC_END = "\t}"
00053 MARK_INCLUDE = "\t~<"   # TODO: assume all includes are global for now
00054 MARK_ASGN = "\t="
00055 MARK_CLASS = "\tc"
00056 MARK_GLOBAL = "\tg"
00057 MARK_FUNC_PARM = "\tp"
00058 
00059 # Reverse the key,value pairs in the token dict
00060 tok_name_lookup = dict((v,k) for k,v in token.tok_name.iteritems())
00061 TOK_NEWLINE = tok_name_lookup["NEWLINE"]
00062 TOK_NAME = tok_name_lookup["NAME"]
00063 TOK_LPAR = tok_name_lookup["LPAR"]
00064 TOK_ENDMARKER = tok_name_lookup["ENDMARKER"]
00065 TOK_INDENT = tok_name_lookup["INDENT"]
00066 TOK_DEDENT = tok_name_lookup["DEDENT"]
00067 
00068 # Reverse the key,value pairs in the symbol dict
00069 sym_name_lookup = dict((v,k) for k,v in symbol.sym_name.iteritems())
00070 SYM_TRAILER = sym_name_lookup["trailer"]
00071 SYM_VARARGSLIST = sym_name_lookup["varargslist"]
00072 
00073 # Get the list of Python keywords and add a few common builtins
00074 kwlist = keyword.kwlist
00075 kwlist.extend(("True", "False", "None", "object"))
00076 
00077 # Globals for the recursive walkAst function
00078 latestnewline = 1
00079 latestsymbol = ""
00080 latesttoken = ""
00081 prevtoken = ""
00082 mark = ""
00083 infuncdef = False
00084 indentcount = 0
00085 
00086 
00087 def main():
00088     """Parse command line args and act accordingly.
00089     """
00090     # Parse the command line arguments
00091     try:
00092         opts, args = getopt.getopt(sys.argv[1:], "Rf:i:")
00093     except getopt.GetoptError:
00094         print __usage__
00095         sys.exit(2)
00096     recurse = False
00097     indexfn = "cscope.out"
00098     for o, a in opts:
00099         if o == "-R":
00100             recurse = True
00101         if o == "-f":
00102             indexfn = a
00103         if o == "-i":
00104             args.extend(map(string.rstrip, open(a, 'r').readlines()))
00105 
00106     # Create the buffer to store the output (list of strings)
00107     indexbuff = []
00108     fnamesbuff = []
00109 
00110     # Search current dir by default
00111     if len(args) == 0:
00112         args = "."
00113 
00114     # Parse the given list of files/dirs
00115     basepath = os.getcwd()
00116     for name in args:
00117         if os.path.isdir(os.path.join(basepath, name)):
00118             parseDir(basepath, name, indexbuff, recurse, fnamesbuff)
00119         else:
00120             try:
00121                 parseFile(basepath, name, indexbuff, fnamesbuff)
00122             except SyntaxError:
00123                 pass
00124 
00125     # Symbol data for the last file ends with a file mark
00126     indexbuff.append("\n" + MARK_FILE)
00127     writeIndex(basepath, indexfn, indexbuff, fnamesbuff)
00128 
00129 
00130 def parseDir(basepath, relpath, indexbuff, recurse, fnamesbuff):
00131     """Parses all files in the directory and
00132     recurses into subdirectories if requested.
00133     """
00134     dirpath = os.path.join(basepath, relpath)
00135     for name in os.listdir(dirpath):
00136         fullpath = os.path.join(dirpath, name)
00137         if os.path.isdir(fullpath) and recurse:
00138             parseDir(basepath, os.path.join(relpath, name), indexbuff, recurse,
00139                      fnamesbuff)
00140         else:
00141             try:
00142                 parseFile(basepath, os.path.join(relpath, name), indexbuff,
00143                           fnamesbuff)
00144             except SyntaxError:
00145                 pass
00146 
00147 
00148 def parseFile(basepath, relpath, indexbuff, fnamesbuff):
00149     """Parses a source file and puts the resulting index into the buffer.
00150     """
00151     # Don't parse if it's not python source
00152     if relpath[-3:] != ".py":
00153         return
00154 
00155     # Open the file and get the contents
00156     fullpath = os.path.join(basepath, relpath)
00157     f = open(fullpath, 'r')
00158     filecontents = f.read()
00159     f.close()
00160 
00161     # Add the file mark to the index
00162     fnamesbuff.append(relpath)
00163     indexbuff.append("\n%s%s" % (MARK_FILE, relpath))
00164     global latestnewline
00165     latestnewline = len(indexbuff)
00166 
00167     # Add path info to any syntax errors in the source files
00168     try:
00169         parseSource(filecontents, indexbuff)
00170     except SyntaxError, se:
00171         se.filename = fullpath
00172         raise se
00173 
00174 
00175 def parseSource(sourcecode, indexbuff):
00176     """Parses python source code and puts the resulting index into the buffer.
00177     """
00178     # Parse the source to an Abstract Syntax Tree
00179     ast = parser.suite(sourcecode)
00180     astlist = parser.ast2list(ast, True)
00181 
00182     # Set these globals before each file's AST is walked
00183     global sourcelinehassymbol
00184     sourcelinehassymbol = False
00185     global currentlinenum
00186     currentlinenum = 0
00187 
00188     # Walk the AST to index the rest of the file
00189     walkAst(astlist, indexbuff)
00190 
00191 
00192 def walkAst(astlist, indexbuff):
00193     """Scan the AST for tokens, write out index lines.
00194     """
00195     global latestnewline
00196     global latestsymbol
00197     global latesttoken
00198     global prevtoken
00199     global mark
00200     global sourcelinehassymbol
00201     global infuncdef
00202     global indentcount
00203     global currentlinenum
00204 
00205     # Remember the latest symbol
00206     if astlist[0] > 256:
00207         latestsymbol = astlist[0]
00208 
00209     # Handle the tokens
00210     else:
00211         # Save the previous token and get the latest one
00212         prevtoken = latesttoken
00213         latesttoken = astlist[0]
00214 
00215         # If this code is on a new line number
00216         if astlist[2] != currentlinenum:
00217             currentlinenum = astlist[2]
00218 
00219             # If there was a symbol of interest,
00220             # remember this location in the index
00221             if sourcelinehassymbol:
00222                 latestnewline = len(indexbuff)
00223                 sourcelinehassymbol = False
00224 
00225             # If there was no symbol of interest between this and the previous
00226             # newline, remove all entries added since the previous newline
00227             else:
00228                 del indexbuff[latestnewline:]
00229 
00230             # Write the new line number
00231             indexbuff.append("\n\n%d " % astlist[2])
00232 
00233             # Clear an include mark when a newline token is reached
00234             # This is what ends a comma-separated list of modules after import
00235             if mark == MARK_INCLUDE:
00236                 mark = ""
00237 
00238         if latesttoken == TOK_NAME:
00239             # If a name is not a python keyword, it is a symbol of interest
00240             if astlist[1] not in kwlist:
00241 
00242                 # Remember that there is a symbol of interest
00243                 sourcelinehassymbol = True
00244 
00245                 # Write the mark and the symbol
00246                 indexbuff.append("\n%s%s\n" % (mark, astlist[1]))
00247 
00248                 # Clear the mark unless it's an include mark
00249                 # This is what allows a comma-separated list of modules after import
00250                 if mark != MARK_INCLUDE:
00251                     mark = ""
00252 
00253             # If the name is a python keyword
00254             else:
00255                 # Some keywords determine what mark should prefix the next name
00256                 kw = astlist[1]
00257                 if kw == "def":
00258                     mark = MARK_FUNC_DEF
00259 
00260                     # Remember that we're in a function definition
00261                     infuncdef = True
00262                     indentcount = 0
00263                 elif kw == "import":
00264                     mark = MARK_INCLUDE
00265                 elif kw == "class":
00266                     mark = MARK_CLASS
00267 
00268                 # Write out the keyword
00269                 indexbuff.append("%s " % kw)
00270 
00271         # This set of tokens and symbols indicates a function call (not perfect)
00272         elif (latesttoken == TOK_LPAR) and (prevtoken == TOK_NAME) and (
00273             (latestsymbol == SYM_TRAILER) or (latestsymbol == SYM_VARARGSLIST)):
00274 
00275             # Insert a function-call mark before the previous name
00276             indexbuff[-1] = "\n%s%s( " % (MARK_FUNC_CALL, indexbuff[-1][1:])
00277 
00278         # Count the number of indents; to be used by dedent
00279         elif latesttoken == TOK_INDENT:
00280             if infuncdef:
00281                 indentcount += 1
00282 
00283         # When dedent reaches the level of the function def,
00284         # write the function-end mark
00285         elif latesttoken == TOK_DEDENT:
00286             if infuncdef:
00287                 indentcount -= 1
00288                 if indentcount == 0:
00289                     indexbuff.insert(-1, "\n\n%d \n%s\n" % (astlist[2], MARK_FUNC_END))
00290                     latestnewline += 1
00291                     infuncdef = False
00292 
00293         # Replace the last line number placeholder with a newline
00294         # when at the end of a file
00295         elif latesttoken == TOK_ENDMARKER:
00296             if len(indexbuff) > 0:
00297                 indexbuff[-1] = "\n"
00298 
00299         # For uninteresting tokens, just write the accompanying string
00300         else:
00301             if len(astlist[1]) > 0:
00302                 nonsymboltext = astlist[1].replace("\n","\\n") + ' '
00303             else:
00304                 nonsymboltext = ''
00305             indexbuff.append(nonsymboltext)
00306 
00307     # Recurse into all nodes
00308     for i in range(1, len(astlist)):
00309         if type(astlist[i]) == types.ListType:
00310             walkAst(astlist[i], indexbuff)
00311 
00312 
00313 def writeIndex(basepath, indexfn, indexbuff, fnamesbuff):
00314     """Write the index buffer to the output file.
00315     """
00316     fout = open(os.path.join(basepath, indexfn), 'w')
00317 
00318     # Write the header and index
00319     index = ''.join(indexbuff)
00320     index_len = len(index)
00321     hdr_len = len(basepath) + 25
00322     fout.write("cscope 15 %s -c %010d" % (basepath, hdr_len + index_len))
00323     fout.write(index)
00324 
00325     # Write trailer info
00326     fnames = '\n'.join(fnamesbuff) + '\n'
00327     fout.write("\n1\n.\n0\n")
00328     fout.write("%d\n" % len(fnamesbuff))
00329     fout.write("%d\n" % len(fnames))
00330     fout.write(fnames)
00331     fout.close()
00332 
00333 
00334 if __name__ == "__main__":
00335     main()

Generated on Mon Oct 18 07:40:47 2010 for Python-on-a-chip by  doxygen 1.5.9