#!/usr/bin/python

# http://docs.python.org/library/compiler.html
# deprecated in 2.6, removed in 3.0, but we are on 2.5...
import compiler
import sys
# using it to type checking the object in the AST
from compiler.ast import Import, From
import glob
import os
import dircache
import stat
import subprocess

class ImportNotFound(Exception):
    """Errors in importing a module"""
    pass

class ImportParseError(Exception):
    """Errors accessing module information"""
    pass

def convertImportToDebianPkg(imp):
    """Tries to identify the Debian package from the module name"""
    try:
        # import the module, mapping it to 'mod'
        mod = __import__(imp)
        # accessing the __file__ attribute
        file = mod.__file__
        # adjust the extension
        file = file.replace('.pyc','.py')
        # obtain the mode to know if it's a symlink
        # lstat doesn't follow symlinks
        mode = os.lstat(file)[stat.ST_MODE]
        if stat.S_ISLNK(mode):
            # if it's a symlink, follow it
            file = os.readlink(file)
        # exec dpkg -S to obtain the package containing file
        proc = subprocess.Popen("dpkg -S " + file + " | awk -F':' '{ print $1 }'",
                                shell=True,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE,
                                )
        # get (stdout, stderr) tuple
        output = proc.communicate()
        # if no stderr
        if not output[1]:
            pkg = output[0].replace('\n','')
        # else, raise exp
        else:
            raise ImportNotFound()
        return pkg
    except ImportError, e:
        # module not found
        raise ImportNotFound()
    except AttributeError, e:
        # __file__ attribute doesn't exist
        raise ImportParseError()

# find all py files in a given directory
# thanks to recipe 2.19 from Python Cookbook
def find_py_files_in_dir(path):
    # pattern matching any case of "py" extension
    for match in glob.glob(os.path.join(path, "*.[Pp][Yy]")):
        yield match

# find all py files in a given directory, then go recursing subdirs
def find_py_files_in_dir_recursive(path):
    # check first in the dir passed as parameter
    for match in find_py_files_in_dir(path):
        yield match
    # dircache output is sorted and cached
    # let's join path and item, since files list
    # returned from listdir has path stripped off
    for subpath in [os.path.join(path, item) for item in dircache.listdir(path)]:
        # if it's a dir, then go recursive on it
        if os.path.isdir(subpath):
            # yield every item found in the recursive call!
            for subfile in find_py_files_in_dir_recursive(subpath):
                yield subfile

def parseFileImport(file):
    """Parses file's syntax tree to extract import statements"""
    try:
        # creates the syntax tree
        mod = compiler.parseFile(file)

        nodes = mod.node.nodes

        for node in nodes:
            if isinstance(node,Import):
                for name, alias in node.names:
                    yield name
            if isinstance(node,From):
                yield node.modname
    except Exception, e:
        print "Error parsing " + file + "; exception: " + str(e)


def addValuesToDict(dict, key, value):
    """Adds value to dict[key], or add the item if missing"""
    if key in dict:
        dict[key] += value
    else:
        dict[key] = value

# main

import_dict = {}

mod_pkgs = {}
mod_not_found = {}

# main file parse loop
for file in find_py_files_in_dir_recursive(sys.argv[1]):
    for imp in parseFileImport(file):
        #print imp
        addValuesToDict(import_dict, imp, 1)


for module, count in import_dict.iteritems():
    try:
        pkg = convertImportToDebianPkg(module)
        mod_pkgs[module] = (pkg, count)
    except ImportNotFound:
        mod_not_found[module] = ('module not found on this machine', count)
    except ImportParseError:
        mod_not_found[module] = ('error parsing module', count)


import pprint
#pprint.pprint(import_dict)
pprint.pprint(mod_pkgs)
pprint.pprint(mod_not_found)
