python global variable scope confusion. What are these variables not accessible? - python

I see there are a few of these questions on here, but I haven't found one that quit matches what I'm after.
I have a common file, lets call it tools.py. In this file I have a host of path definitions to use and an init_paths function to set some key paths based on command line arguments:
def init_paths(args):
global tools_dir, tools_src, tools_bin
if args.tools_set:
tools_dir = os.path.realpath(os.path.join(args.tools_set,"tools"))
else:
tools_dir = os.path.join(BAR_PATH, "tools")
FOO_PATH = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), ".."))
BAR_PATH = os.path.join(FOO_PATH, "foobar")
tools_dir = none
tools_src = none
tools_bin = none
etc...
I have a main file, lets call it main.py where I want to use these.
if __name__ == "__main__":
args = parseArgs()
from tools import init_paths
init_paths(args)
doStuffFunction(args.one, args.two, args.three)
I have left out the meat and potatoes to be sure, but I believe this should be enough to illustrate my global scope problem. when I run this: python main.py --tools-set=/path/to/tools, I am expecting the call to init_paths, to set up some key paths I wish to use later in the doStuffFunction().
def doStuffFunction():
searchPath = os.path.join(tools_dir, "folder")
this fails: AttributeError: 'NoneType' object has no attribute endswith
pretty sure this is because it is not getting set. but why?
EDIT
main.py
#!/usr/bin/env python
import sys
import os
import argparse
import glob
from tools import *
def parseArgs():
parser = argparse.ArgumentParser(description="parse my args")
parser.add_argument("--toolchain-root", type=str,default=None,help='specify toolchain directory')
args = parser.parse_args()
return args
def doStuffFunction():
output = 'output'
if not os.path.isdir(output):
os.makedirs(output)
gimmySugar(output)
def gimmySugar(output):
fileList = []
linkBook= {}
searchPath = os.path.join(tools_BIN_ROOT,'gcc-4.8.5')
for root, dirs, files in os.walk(searchPath):
for libFile in glob.glob(root+'/*.so*'):
fileList.append(libFile)
if os.path.islink(libFile):
linksWith = os.readlink(libFile)
linkBook[libFile] = linksWith
if __name__ == "__main__":
# script was called directly from the command line
args = parseArgs()
from tools import init_settings
init_settings(args)
doStuffFunction()
tools.py
import os
def init_settings(args):
global tools_DIR, tools_SRC_ROOT, tools_OBJ_ROOT, tools_BIN_ROOT
if args.toolchain_root:
tools_DIR = os.path.realpath(os.path.join(args.toolchain_root, "toolchain"))
else:
tools_DIR = os.path.join(USER_DIR, "")
tools_SRC_ROOT = os.path.join(tools_DIR, "src")
tools_OBJ_ROOT = os.path.join(tools_DIR, "obj")
tools_BIN_ROOT = os.path.join(tools_DIR, "bin")
ROOT_PATH = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), ".."))
OUTPUT_PATH = os.path.join(ROOT_PATH, "outputs")
BUILD_PATH = os.path.join(OUTPUT_PATH, "build")
USER_DIR = "/usr/lib64/"
tools_DIR = None
tools_SRC_ROOT = None
tools_OBJ_ROOT = None
tools_BIN_ROOT = None

#This line will failed
searchPath = os.path.join(tools_BIN_ROOT,'gcc-4.8.5')
The scope of global variables is module level, tools_BIN_ROOT will not be shared without passing across modules.
The variable tools_BIN_ROOT is global variable ONLY in tools.py. Insteadly, main.py do not contain any global variable in tools.py.
For inspecting this, you can use print(globals()) in both files.
Do not use global variable if possible.
This is a simple workaround.
(I strongly recommend you to refactor your code by config or OOP)
tools.py
def get_tools_BIN_ROOT():
return tools_BIN_ROOT
os.py
from tools import get_tools_BIN_ROOT
searchPath = os.path.join(get_tools_BIN_ROOT(),'gcc-4.8.5')

Related

Best practice for keeping a config file in python

I have a python script in which I have a config file which looks like this:
PROD = 'production'
DEV = 'dev'
ENVIRONMENT = None
and I have a function which gets the wanted environment from a command argument and sets it like:
if sys.argv[1] in [config.PROD, config.DEV]:
config.ENVIRONMENT = sys.argv[1]
I understood it's not good practice when I started importing the config file in multiple files and ENV kept resetting back to None.
So, my question is: what is the best practice is this case
I'm not sure exactly what the best practice is but I like using JSON files. I use the following class as a layer of abstraction for interfacing with the config (properties) file. You can create one JSONPropertiesFile and pass it around your application.
import json
from collections import OrderedDict
import os
from stat import * # ST_SIZE etc
from datetime import datetime
from copy import deepcopy
class JSONPropertiesFileError(Exception):
pass
class JSONPropertiesFile(object):
def __init__(self, file_path, default={}):
self.file_path = file_path
self._default_properties = default
self._validate_file_path(file_path)
def _validate_file_path(self, file_path):
if not file_path.endswith(".json"):
raise JSONPropertiesFileError(f"Must be a JSON file: {file_path}")
if not os.path.exists(file_path):
self.set(self._default_properties)
def set(self, properties):
new_properties = deepcopy(self._default_properties)
new_properties.update(properties)
with open(self.file_path, 'w') as file:
json.dump(new_properties, file, indent=4)
def get(self):
properties = deepcopy(self._default_properties)
with open(self.file_path) as file:
properties.update(json.load(file, object_pairs_hook=OrderedDict))
return properties
def get_file_info(self):
st = os.stat(self.file_path)
res = {
'size':st[ST_SIZE],
'size_str':str(round(st[ST_SIZE]/1000,2)) + ' KB',
'last_mod': datetime.fromtimestamp(st[ST_MTIME]).strftime("%Y-%m-%d")
}
return res
In your case you might use it like this:
file_path = "path/to/your/config/file"
default_properties = {
'PROD': 'production',
'DEV': 'dev',
'ENVIRONMENT': ""
}
config_file = JSONPropertiesFile(file_path, default_properties)
config = config_file.get()
print(config["PROD"])
config["PROD"] = "something else"
config_file.set(config) # save new config

Can I configure python to have matlab like print?

Can I configure python to have matlab like print, so that when I just have a function
returnObject()
that it simply prints that object without me having to type print around it? I assume this is not easy, but something like if an object does not get bound by some other var it should get printed, so that this would work.
a = 5 #prints nothing
b = getObject() #prints nothing
a #prints 5
b #prints getObject()
getObject() #prints the object
If you use an ipython notebook individual cells work like this. But you can only view one object per cell by typing the objects name. To see multiple objects you'd need to call print, or use lots of cells.
You could write a script to modify the original script based on a set of rules that define what to print, then run the modified script.
A basic script to do this would be:
f = open('main.py', 'r')
p = open('modified.py', 'w')
p.write('def main(): \n')
for line in f:
temp = line
if len(temp) == 1:
temp = 'print(' + line + ')'
p.write('\t' + temp)
p.close()
from modified import main
main()
The script main.py would then look like this:
x = 236
x
output:
236
Idea is as follows: parse AST of Python code, replace every expression with call to print and content of expression as argument and then run the modified version. I'm not sure whether it works with every code, but you might try. Save it as matlab.py and run your code as python3 -m matlab file.py.
#!/usr/bin/env python3
import ast
import os
import sys
class PrintAdder(ast.NodeTransformer):
def add_print(self, node):
print_func = ast.Name("print", ast.Load())
print_call = ast.Call(print_func, [node.value], [])
print_statement = ast.Expr(print_call)
return print_statement
def visit_Expr(self, node):
if isinstance(node.value, ast.Call) and node.value.func.id == 'print':
return node
return self.add_print(node)
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('infile', type=argparse.FileType(), nargs='?', default='-')
args = parser.parse_args()
with args.infile as infile:
code = infile.read()
file_name = args.infile.name
tree = ast.parse(code, file_name, 'exec')
tree = PrintAdder().visit(tree)
tree = ast.fix_missing_locations(tree)
bytecode = compile(tree, file_name, 'exec')
exec(bytecode)
if __name__ == '__main__':
main()

how to create dynamic configuration file using python

I have a python script which is controlled by a config file called system.config .the structure of config file is like bellow with some default values.
[company]
companyname: XYZ
[profile]
name: ABC
joining: 1/1/2014
the code for config file is : config_parser_details.py
import ConfigParser
import sys
Config = ConfigParser.ConfigParser()
Config.read("system.config")
filename = "system.config"
def ConfigSectionMap(section):
dict1 = {}
options = Config.options(section)
for option in options:
try:
dict1[option] = Config.get(section, option)
if dict1[option] == -1:
DebugPrint("skip: %s" % option)
except:
print("exception on %s!" % option)
dict1[option] = None
return dict1
company = ConfigSectionMap("company")['companyname']
name = ConfigSectionMap("profile")['name']
joindate = ConfigSectionMap("profile")['joining']
now the code for my script is : test.py
import config_parser_details as p
import sys
import warnings
import os
company = p.company
name = p.name
date = p.joindate
print("%s\n" %company)
print("%s\n" %name)
output is
XYZ
ABC
now I want to give input in the config file through command line.
like
python test.py --compname ="testing"
if any argument is missing in the command line than default value will be the input.
You could use argparse library to parse command line arguments.
So your test.py file looks like below :
import config_parser_details as p
import sys
import warnings
import os
import argparse
commandLineArgumentParser = argparse.ArgumentParser()
commandLineArgumentParser.add_argument("-c", "--compname", help="Company name", default=p.company)
commandLineArguments = commandLineArgumentParser.parse_args()
company = commandLineArguments.compname
name = p.name
date = p.joindate
print("%s\n" %company)
print("%s\n" %name)
I'd advise looking into a tool like docopt.
For a quick fix though, you can try doing this
def ConfigSectionMap(section):
options = Config.options(section)
arg_dict = {}
for command_line_argument in sys.argv[1:]:
arg = command_line_argument.split("=")
arg_dict[arg[0][2:]] = arg[1]
for key in arg_dict:
options[key] = arg_dict[key]
return options
This will load up all the default option. Any options put on the command line will override or add to the options dict.
First of all, I'd move code into a main section so that you can import config_parser_details without executing code:
if __name__ == '__main__':
main()
def main():
Config = ConfigParser.ConfigParser()
Config.read("system.config")
filename = "system.config"
company = ConfigSectionMap("company")['companyname']
name = ConfigSectionMap("profile")['name']
joindate = ConfigSectionMap("profile")['joining']
Secondly, I'd use STB land's suggestion of parsing the command line with argparse, something like:
def main():
# do the parsing thing first, then:
filename = args.filename
do_stuff(filename)
This way you can neatly use python's own unit test framework or nosetests to write test file that don't require you to manually specify parameters:
def test_basic():
# create a temporary file with tempfile.NamedTemporaryFile
tmpfile = tempfile.NamedTemporaryFile()
# add test data to tmpfile
do_stuff(tmpfile)
# check the output
assert ....
This comes with the added benefit of not having global variables, which will complicate your life later.

How to print a variable from a function at python prompt

There are various python files in a directory and all these contain a function desciption() as follows :
def description():
desc = 'something'
return desc
Now I have main.py as follows :
def a():
pth = os.listdir('homedir/workspace')
for filename in pth :
exec "import " + filename
desc = eval(filename + '.desciption()')
print desc
Right now when I run python main.py, nothing happens. How do I print this desc when I run python main.py?
Thanks in advance!
Assuming the import worked, and that you have imported a module called filename in each iteration, then you could get the module by name, and call its descrpition() method:
import sys
mod = sys.modules[filename]
print mod.description()
But note that it may make more sense to print the module's pydocs:
print mod.__doc__
You didn't close quotes properly in this line:
pth = os.listdir('homedir/workspace)
Also you should not use eval here:
desc = eval(filename + '.desciption()')
and I assume you wanted to import by variable here:
exec "import " + filename
This is how it should look like:
def a():
import importlib
pth = os.listdir('homedir/workspace')
for filename in pth :
mdl = importlib.import_module(os.path.splitext(filename)[0])
desc = mdl.description()
print desc
see https://docs.python.org/2/library/importlib.html#importlib.import_module
https://docs.python.org/2/library/os.path.html#os.path.splitext
You missed the closing ' in the pth variable
You've referenced desc on the last line without declaring it first.
Start your function with desc = None
def a():
pth = os.listdir('homedir/workspace')
for filename in pth:
module_name = os.path.splitext(filename)[0]
exec "import " + module_name
desc = eval(module_name + '.description()')
print desc
make sure the main.py located at the same path with your modules. or add it to sys path by
import sys
sys.path.append(r"homedir/workspace")

Return a list of imported Python modules used in a script?

I am writing a program that categorizes a list of Python files by which modules they import. As such I need to scan the collection of .py files ad return a list of which modules they import. As an example, if one of the files I import has the following lines:
import os
import sys, gtk
I would like it to return:
["os", "sys", "gtk"]
I played with modulefinder and wrote:
from modulefinder import ModuleFinder
finder = ModuleFinder()
finder.run_script('testscript.py')
print 'Loaded modules:'
for name, mod in finder.modules.iteritems():
print '%s ' % name,
but this returns more than just the modules used in the script. As an example in a script which merely has:
import os
print os.getenv('USERNAME')
The modules returned from the ModuleFinder script return:
tokenize heapq __future__ copy_reg sre_compile _collections cStringIO _sre functools random cPickle __builtin__ subprocess cmd gc __main__ operator array select _heapq _threading_local abc _bisect posixpath _random os2emxpath tempfile errno pprint binascii token sre_constants re _abcoll collections ntpath threading opcode _struct _warnings math shlex fcntl genericpath stat string warnings UserDict inspect repr struct sys pwd imp getopt readline copy bdb types strop _functools keyword thread StringIO bisect pickle signal traceback difflib marshal linecache itertools dummy_thread posix doctest unittest time sre_parse os pdb dis
...whereas I just want it to return 'os', as that was the module used in the script.
Can anyone help me achieve this?
UPDATE: I just want to clarify that I would like to do this without running the Python file being analyzed, and just scanning the code.
IMO the best way todo this is to use the http://furius.ca/snakefood/ package. The author has done all of the required work to get not only directly imported modules but it uses the AST to parse the code for runtime dependencies that a more static analysis would miss.
Worked up a command example to demonstrate:
sfood ./example.py | sfood-cluster > example.deps
That will generate a basic dependency file of each unique module. For even more detail use:
sfood -r -i ./example.py | sfood-cluster > example.deps
To walk a tree and find all imports, you can also do this in code:
Please NOTE - The AST chunks of this routine were lifted from the snakefood source which has this copyright: Copyright (C) 2001-2007 Martin Blais. All Rights Reserved.
import os
import compiler
from compiler.ast import Discard, Const
from compiler.visitor import ASTVisitor
def pyfiles(startPath):
r = []
d = os.path.abspath(startPath)
if os.path.exists(d) and os.path.isdir(d):
for root, dirs, files in os.walk(d):
for f in files:
n, ext = os.path.splitext(f)
if ext == '.py':
r.append([d, f])
return r
class ImportVisitor(object):
def __init__(self):
self.modules = []
self.recent = []
def visitImport(self, node):
self.accept_imports()
self.recent.extend((x[0], None, x[1] or x[0], node.lineno, 0)
for x in node.names)
def visitFrom(self, node):
self.accept_imports()
modname = node.modname
if modname == '__future__':
return # Ignore these.
for name, as_ in node.names:
if name == '*':
# We really don't know...
mod = (modname, None, None, node.lineno, node.level)
else:
mod = (modname, name, as_ or name, node.lineno, node.level)
self.recent.append(mod)
def default(self, node):
pragma = None
if self.recent:
if isinstance(node, Discard):
children = node.getChildren()
if len(children) == 1 and isinstance(children[0], Const):
const_node = children[0]
pragma = const_node.value
self.accept_imports(pragma)
def accept_imports(self, pragma=None):
self.modules.extend((m, r, l, n, lvl, pragma)
for (m, r, l, n, lvl) in self.recent)
self.recent = []
def finalize(self):
self.accept_imports()
return self.modules
class ImportWalker(ASTVisitor):
def __init__(self, visitor):
ASTVisitor.__init__(self)
self._visitor = visitor
def default(self, node, *args):
self._visitor.default(node)
ASTVisitor.default(self, node, *args)
def parse_python_source(fn):
contents = open(fn, 'rU').read()
ast = compiler.parse(contents)
vis = ImportVisitor()
compiler.walk(ast, vis, ImportWalker(vis))
return vis.finalize()
for d, f in pyfiles('/Users/bear/temp/foobar'):
print d, f
print parse_python_source(os.path.join(d, f))
I recently needed all the dependencies for a given python script and I took a different approach than the other answers. I only cared about top level module module names (eg, I wanted foo from import foo.bar).
This is the code using the ast module:
import ast
modules = set()
def visit_Import(node):
for name in node.names:
modules.add(name.name.split(".")[0])
def visit_ImportFrom(node):
# if node.module is missing it's a "from . import ..." statement
# if level > 0 it's a "from .submodule import ..." statement
if node.module is not None and node.level == 0:
modules.add(node.module.split(".")[0])
node_iter = ast.NodeVisitor()
node_iter.visit_Import = visit_Import
node_iter.visit_ImportFrom = visit_ImportFrom
Testing with a python file foo.py that contains:
# foo.py
import sys, os
import foo1
from foo2 import bar
from foo3 import bar as che
import foo4 as boo
import foo5.zoo
from foo6 import *
from . import foo7, foo8
from .foo12 import foo13
from foo9 import foo10, foo11
def do():
import bar1
from bar2 import foo
from bar3 import che as baz
I could get all the modules in foo.py by doing something like this:
with open("foo.py") as f:
node_iter.visit(ast.parse(f.read()))
print(modules)
which would give me this output:
set(['bar1', 'bar3', 'bar2', 'sys', 'foo9', 'foo4', 'foo5', 'foo6', 'os', 'foo1', 'foo2', 'foo3'])
You might want to try dis (pun intended):
import dis
from collections import defaultdict
from pprint import pprint
statements = """
from __future__ import (absolute_import,
division)
import os
import collections, itertools
from math import *
from gzip import open as gzip_open
from subprocess import check_output, Popen
"""
instructions = dis.get_instructions(statements)
imports = [__ for __ in instructions if 'IMPORT' in __.opname]
grouped = defaultdict(list)
for instr in imports:
grouped[instr.opname].append(instr.argval)
pprint(grouped)
outputs
defaultdict(<class 'list'>,
{'IMPORT_FROM': ['absolute_import',
'division',
'open',
'check_output',
'Popen'],
'IMPORT_NAME': ['__future__',
'os',
'collections',
'itertools',
'math',
'gzip',
'subprocess'],
'IMPORT_STAR': [None]})
Your imported modules are grouped['IMPORT_NAME'].
It depends how thorough you want to be. Used modules is a turing complete problem: some python code uses lazy importing to only import things they actually use on a particular run, some generate things to import dynamically (e.g. plugin systems).
python -v will trace import statements - its arguably the simplest thing to check.
This works - using importlib to actually import the module, and inspect to get the members :
#! /usr/bin/env python
#
# test.py
#
# Find Modules
#
import inspect, importlib as implib
if __name__ == "__main__":
mod = implib.import_module( "example" )
for i in inspect.getmembers(mod, inspect.ismodule ):
print i[0]
#! /usr/bin/env python
#
# example.py
#
import sys
from os import path
if __name__ == "__main__":
print "Hello World !!!!"
Output :
tony#laptop .../~:$ ./test.py
path
sys
I was looking for something similar and I found a gem in a package called PyScons. The Scanner does just what you want (in 7 lines), using an import_hook. Here is an abbreviated example:
import modulefinder, sys
class SingleFileModuleFinder(modulefinder.ModuleFinder):
def import_hook(self, name, caller, *arg, **kwarg):
if caller.__file__ == self.name:
# Only call the parent at the top level.
return modulefinder.ModuleFinder.import_hook(self, name, caller, *arg, **kwarg)
def __call__(self, node):
self.name = str(node)
self.run_script(self.name)
if __name__ == '__main__':
# Example entry, run with './script.py filename'
print 'looking for includes in %s' % sys.argv[1]
mf = SingleFileModuleFinder()
mf(sys.argv[1])
print '\n'.join(mf.modules.keys())
Well, you could always write a simple script that searches the file for import statements. This one finds all imported modules and files, including those imported in functions or classes:
def find_imports(toCheck):
"""
Given a filename, returns a list of modules imported by the program.
Only modules that can be imported from the current directory
will be included. This program does not run the code, so import statements
in if/else or try/except blocks will always be included.
"""
import imp
importedItems = []
with open(toCheck, 'r') as pyFile:
for line in pyFile:
# ignore comments
line = line.strip().partition("#")[0].partition("as")[0].split(' ')
if line[0] == "import":
for imported in line[1:]:
# remove commas (this doesn't check for commas if
# they're supposed to be there!
imported = imported.strip(", ")
try:
# check to see if the module can be imported
# (doesn't actually import - just finds it if it exists)
imp.find_module(imported)
# add to the list of items we imported
importedItems.append(imported)
except ImportError:
# ignore items that can't be imported
# (unless that isn't what you want?)
pass
return importedItems
toCheck = raw_input("Which file should be checked: ")
print find_imports(toCheck)
This doesn't do anything for from module import something style imports, though that could easily be added, depending on how you want to deal with those. It also doesn't do any syntax checking, so if you have some funny business like import sys gtk, os it will think you've imported all three modules even though the line is an error. It also doesn't deal with try/except type statements with regards to import - if it could be imported, this function will list it. It also doesn't deal well with multiple imports per line if you use the as keyword. The real issue here is that I'd have to write a full parser to really do this correctly. The given code works in many cases, as long as you understand there are definite corner cases.
One issue is that relative imports will fail if this script isn't in the same directory as the given file. You may want to add the directory of the given script to sys.path.
I know this is old but I was also looking for such a solution like OP did.
So I wrote this code to find imported modules by scripts in a folder.
It works with import abc and from abc import cde format. I hope it helps someone else.
import re
import os
def get_imported_modules(folder):
files = [f for f in os.listdir(folder) if f.endswith(".py")]
imports = []
for file in files:
with open(os.path.join(folder, file), mode="r") as f:
lines = f.read()
result = re.findall(r"(?<!from)import (\w+)[\n.]|from\s+(\w+)\s+import", lines)
for imp in result:
for i in imp:
if len(i):
if i not in imports:
imports.append(i)
return imports
Thanks Tony Suffolk for inspect, importlib samples ... I built this wee module and you're all welcome to use it if it helps you. Giving back, yaaaay!
import timeit
import os
import inspect, importlib as implib
import textwrap as twrap
def src_modules(filename):
assert (len(filename)>1)
mod = implib.import_module(filename.split(".")[0])
ml_alias = []
ml_actual = []
ml_together = []
ml_final = []
for i in inspect.getmembers(mod, inspect.ismodule):
ml_alias.append(i[0])
ml_actual.append((str(i[1]).split(" ")[1]))
ml_together = zip(ml_actual, ml_alias)
for t in ml_together:
(a,b) = t
ml_final.append(a+":="+b)
return ml_final
def l_to_str(itr):
assert(len(itr)>0)
itr.sort()
r_str = ""
for i in itr:
r_str += i+" "
return r_str
def src_info(filename, start_time=timeit.default_timer()):
assert (len(filename)>1)
filename_in = filename
filename = filename_in.split(".")[0]
if __name__ == filename:
output_module = filename
else:
output_module = __name__
print ("\n" + (80 * "#"))
print (" runtime ~= {0} ms".format(round(((timeit.default_timer() - start_time)*1000),3)))
print (" source file --> '{0}'".format(filename_in))
print (" output via --> '{0}'".format(output_module))
print (" modules used in '{0}':".format(filename))
print (" "+"\n ".join(twrap.wrap(l_to_str(src_modules(filename)), 75)))
print (80 * "#")
return ""
if __name__ == "__main__":
src_info(os.path.basename(__file__))
## how to use in X file:
#
# import print_src_info
# import os
#
# < ... your code ... >
#
# if __name__ == "__main__":
# print_src_info.src_info(os.path.basename(__file__))
## example output:
#
# ################################################################################
# runtime ~= 0.049 ms
# source file --> 'print_src_info.py'
# output via --> '__main__'
# modules used in 'print_src_info':
# 'importlib':=implib 'inspect':=inspect 'os':=os 'textwrap':=twrap
# 'timeit':=timeit
# ################################################################################
For the majority of scripts which only import modules at the top level, it is quite sufficient to load the file as a module, and scan its members for modules:
import sys,io,imp,types
scriptname = 'myfile.py'
with io.open(scriptname) as scriptfile:
code = compile(scriptfile.readall(),scriptname,'exec')
newmodule = imp.new_module('__main__')
exec(codeobj,newmodule.__dict__)
scriptmodules = [name for name in dir(newmodule) if isinstance(newmodule.__dict__[name],types.ModuleType)]
This simulates the module being run as a script, by setting the module's name to '__main__'. It should therefore also capture funky dynamic module loading. The only modules it won't capture are those which are imported only into local scopes.
It's actually working quite good with
print [key for key in locals().keys()
if isinstance(locals()[key], type(sys)) and not key.startswith('__')]
I understand that this post is VERY old but I have found an ideal solution.
I came up with this idea:
def find_modules(code):
modules = []
code = code.splitlines()
for item in code:
if item[:7] == "import " and ", " not in item:
if " as " in item:
modules.append(item[7:item.find(" as ")])
else:
modules.append(item[7:])
elif item[:5] == "from ":
modules.append(item[5:item.find(" import ")])
elif ", " in item:
item = item[7:].split(", ")
modules = modules+item
else:
print(item)
return modules
code = """
import foo
import bar
from baz import eggs
import mymodule as test
import hello, there, stack
"""
print(find_modules(code))
it does from, as, commas and normal import statements.
it requires no dependencies and works with other lines of code.
The above code prints:
['foo', 'bar', 'baz', 'mymodule', 'hello', 'there', 'stack']
Just put your code in the find_modules function.
I'm editing my original answer to say this. This is doable with a code snippet like the one below, but parsing the AST may be the best way to go.
def iter_imports(fd):
""" Yield only lines that appear to be imports from an iterable.
fd can be an open file, a list of lines, etc.
"""
for line in fd:
trimmed = line.strip()
if trimmed.startswith('import '):
yield trimmed
elif trimmed.startswith('from ') and ('import ' in trimmed):
yield trimmed
def main():
# File name to read.
filename = '/my/path/myfile.py'
# Safely open the file, exit on error
try:
with open(filename) as f:
# Iterate over the lines in this file, and generate a list of
# lines that appear to be imports.
import_lines = list(iter_imports(f))
except (IOError, OSError) as exIO:
print('Error opening file: {}\n{}'.format(filename, exIO))
return 1
else:
# From here, import_lines should be a list of lines like this:
# from module import thing
# import os, sys
# from module import *
# Do whatever you need to do with the import lines.
print('\n'.join(import_lines))
return 0
if __name__ == '__main__':
sys.exit(main())
Further string parsing will be needed to grab just the module names. This does not catch cases where multi-line strings or doc strings contain the words 'import ' or 'from X import '. This is why I suggested parsing the AST.

Categories