Python not able to find the external dependent dll - python

I am using Python3.6. I have created a C++ extension using (pybind11)[https://github.com/pybind/pybind11]. I copied the compiled *.pyd file along with the dependent dll to the site packages. But when I try to load any functions from the external DLL, python complains that the function is not present. If I want to access the function, I need write
sys.path.append(r'C:\Users\test\AppData\Local\Programs\Python\Python36\Lib\site-packages\CppProject')
or I need to add the same path to the PYTHONPATH environment variable.
Why Python is not able to load the function even though it is present in the same path as the pyd? I don't want to append the sys path everytime I need to use the module or use the environment variable? Is there any way to avoid this? Is there any way to add this path to the sys automatically whenever the user import the module?
Example:
CppExport.dll
#ifdef CPPEXPORT_EXPORTS
#define CPPEXPORT_API __declspec(dllexport)
#else
#define CPPEXPORT_API __declspec(dllimport)
#endif
extern "C" CPPEXPORT_API double sin_impl(double x);
const double e = 2.7182818284590452353602874713527;
double sin_impl(double x){
return (1 - pow(e, (-2 * x))) / (2 * pow(e, -x));
}
CppProject.pyd
PYBIND11_MODULE(CppProject, m) {
m.def("sin_impl", &sin_impl, R"pbdoc(
Compute a hyperbolic tangent of a single argument expressed in radians.
)pbdoc");
#ifdef VERSION_INFO
m.attr("__version__") = VERSION_INFO;
#else
m.attr("__version__") = "dev";
#endif
}
Setup.py
from setuptools import setup
import distutils
import sys
from setuptools.dist import Distribution
from distutils.sysconfig import get_python_lib
relative_site_packages = get_python_lib().split(sys.prefix+os.sep)[1]
date_files_relative_path = os.path.join(relative_site_packages, "CppProject")
class BinaryDistribution(Distribution):
"""Distribution which always forces a binary package with platform name"""
def has_ext_modules(foo):
return True
setup(
name='CppProject',
version='1.0',
description='CppProject Library',
packages=['CppProject'],
package_data={
'CppProject': ['CppProject.pyd'],
},
data_files = [(date_files_relative_path, ["CppExport.dll"])],
distclass=BinaryDistribution
)
In Python:
from CppProject import sin_impl
Error:
ImportError: cannot import name 'sin_impl'
Full Code is present in Github

Sorry for the previous reply here is some better advises :
You want to distribute your library, to do so you need to create setup.py and init.py. Once this done you will be able to install your package using python setup.py install.
For me the setup.py look like :
README_rst = ''
from distutils.core import setup
with open('README.rst', mode='r', encoding='utf-8') as fd:
README_rst = fd.read()
setup(
name='MyStack',
version='0.0.1',
description='Cool short description',
author='Author',
author_email='author#mail.com',
url='repo.com',
packages=['Pkg'],
long_description=README_rst,
include_package_data=True,
classifiers=[
# Trove classifiers
# The full list is here: https://pypi.python.org/pypi?%3Aaction=list_classifiers
'Development Status :: 3 - Alpha',
]
)
In the init.py you will have to find your library and import it. Here is an example how Qt does :
def find_qt():
import os
path = os.environ['PATH']
dll_dir = os.path.dirname(__file__) + '\\Qt\\bin'
if os.path.isfile(dll_dir + '\\Qt5Core.dll'):
path = dll_dir + ';' + path
os.environ['PATH'] = path
else:
for dll_dir in path.split(';'):
if os.path.isfile(dll_dir + '\\Qt5Core.dll'):
break
else:
raise ImportError("unable to find Qt5Core.dll on PATH")
try:
os.add_dll_directory(dll_dir)
except AttributeError:
pass
find_qt()
del find_qt
Hope this help

The fact that your code works when you explicitly add the directory to sys.path is the key to understand what's happening.
Since site-packages is one of the locations searched by the interpreter when importing modules, this statement:
from CppProject import sin_impl
is actually searching for a module named sin_impl inside the CppProject folder.
Instead you should do:
from CppProject.CppProject import sin_impl
which points to the actual module of the same name.
This actually doesn't require the presence of __init__.py inside CppProject to qualify it as a Python package, since Python 3.3+ implements implicit namespace packages.
However, when you are building a complex program with many dependencies the package constructor enables you to add some kind of initialization to be performed before any regular module is executed.

Related

Including and distributing third party libraries with a Python C extension

I'm building a C Python extension which makes use of a "third party" library— in this case, one that I've built using a separate build process and toolchain. Call this library libplumbus.dylib.
Directory structure would be:
grumbo/
include/
plumbus.h
lib/
libplumbus.so
grumbo.c
setup.py
My setup.py looks approximately like:
from setuptools import Extension, setup
native_module = Extension(
'grumbo',
define_macros = [('MAJOR_VERSION', '1'),
('MINOR_VERSION', '0')],
sources = ['grumbo.c'],
include_dirs = ['include'],
libraries = ['plumbus'],
library_dirs = ['lib'])
setup(
name = 'grumbo',
version = '1.0',
ext_modules = [native_module] )
Since libplumbus is an external library, when I run import grumbo I get:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ImportError: dlopen(/path/to/grumbo/grumbo.cpython-37m-darwin.so, 2): Library not loaded: lib/libplumbus.dylib
Referenced from: /path/to/grumbo/grumbo.cpython-37m-darwin.so
Reason: image not found
What's the simplest way to set things up so that libplumbus is included with the distribution and properly loaded when grumbo is imported? (Note that this should work with a virtualenv).
I have tried adding lib/libplumbus.dylib to package_data, but this doesn't work, even if I add -Wl,-rpath,#loader_path/grumbo/lib to the Extension's extra_link_args.
The goal of this post is to have a setup.py which would create a source distribution. That means after running
python setup.py sdist
the resulting dist/grumbo-1.0.tar.gz could be used for installation via
pip install grumbo-1.0.tar.gz
We will start for a setup.py for Linux/MacOS, but then tweak to make it work for Windows as well.
The first step is to get the additional data (includes/library) into the distribution. I'm not sure it is really impossible to add data for a module, but setuptools offers functionality to add data for packages, so let's make a package from your module (which is probably a good idea anyway).
The new structure of package grumbo looks as follows:
src/
grumbo/
__init__.py # empty
grumbo.c
include/
plumbus.h
lib/
libplumbus.so
setup.py
and changed setup.py:
from setuptools import setup, Extension, find_packages
native_module = Extension(
name='grumbo.grumbo',
sources = ["src/grumbo/grumbo.c"],
)
kwargs = {
'name' : 'grumbo',
'version' : '1.0',
'ext_modules' : [native_module],
'packages':find_packages(where='src'),
'package_dir':{"": "src"},
}
setup(**kwargs)
It doesn't do much yet, but at least our package can be found by setuptools. The build fails, because the includes are missing.
Now let's add the needed includes from the include-folder to the distribution via package-data:
...
kwargs = {
...,
'package_data' : { 'grumbo': ['include/*.h']},
}
...
With that our include-files are copied to the source distribution. However because it will be build "somewhere" we don't know yet, adding include_dirs = ['include'] to the Extension definition just doesn't cut it.
There must be a better way (and less brittle) to find the right include path, but that is what I came up with:
...
import os
import sys
import sysconfig
def path_to_build_folder():
"""Returns the name of a distutils build directory"""
f = "{dirname}.{platform}-{version[0]}.{version[1]}"
dir_name = f.format(dirname='lib',
platform=sysconfig.get_platform(),
version=sys.version_info)
return os.path.join('build', dir_name, 'grumbo')
native_module = Extension(
...,
include_dirs = [os.path.join(path_to_build_folder(),'include')],
)
...
Now, the extension is built, but cannot be yet loaded because it is not linked against shared-object libplumbus.so and thus some symbols are unresolved.
Similar to the header files, we can add our library to the distribution:
kwargs = {
...,
'package_data' : { 'grumbo': ['include/*.h', 'lib/*.so']},
}
...
and add the right lib-path for the linker:
...
native_module = Extension(
...
libraries = ['plumbus'],
library_dirs = [os.path.join(path_to_build_folder(), 'lib')],
)
...
Now, we are almost there:
the extension is built an put into site-packages/grumbo/
the extension depends on libplumbus.so as can be seen with help of ldd
libplumbus.so is put into site-packages/grumbo/lib
However, we still cannot import the extension, as import grumbo.grumbo leads to
ImportError: libplumbus.so: cannot open shared object file: No such
file or directory
because the loader cannot find the needed shared object which resides in the folder .\lib relative to our extension. We could use rpath to "help" the loader:
...
native_module = Extension(
...
extra_link_args = ["-Wl,-rpath=$ORIGIN/lib/."],
)
...
And now we are done:
>>> import grumbo.grumbo
# works!
Also building and installing a wheel should work:
python setup.py bdist_wheel
and then:
pip install grumbo-1.0-xxxx.whl
The first mile stone is achieved. Now we extend it, so it works other platforms as well.
Same source distribution for Linux and Macos:
To be able to install the same source distribution on Linux and MacOS, both versions of the shared library (for Linux and MacOS) must be present. An option is to add a suffix to the names of shared objects: e.g. having libplumbus.linux.so and libplumbis.macos.so. The right shared object can be picked in the setup.py depending on the platform:
...
import platform
def pick_library():
my_system = platform.system()
if my_system == 'Linux':
return "plumbus.linux"
if my_system == 'Darwin':
return "plumbus.macos"
if my_system == 'Windows':
return "plumbus"
raise ValueError("Unknown platform: " + my_system)
native_module = Extension(
...
libraries = [pick_library()],
...
)
Tweaking for Windows:
On Windows, dynamic libraries are dlls and not shared objects, so there are some differences that need to be taken into account:
when the C-extension is built, it needs plumbus.lib-file, which we need to put into the lib-subfolder.
when the C-extension is loaded during the run time, it needs plumbus.dll-file.
Windows has no notion of rpath, thus we need to put the dll right next to the extension, so it can be found (see also this SO-post for more details).
That means the folder structure should be as follows:
src/
grumbo/
__init__.py
grumbo.c
plumbus.dll # needed for Windows
include/
plumbus.h
lib/
libplumbus.linux.so # needed on Linux
libplumbus.macos.so # needed on Macos
plumbus.lib # needed on Windows
setup.py
There are also some changes in the setup.py. First, extending the package_data so dll and lib are picked up:
...
kwargs = {
...
'package_data' : { 'grumbo': ['include/*.h', 'lib/*.so',
'lib/*.lib', '*.dll', # for windows
]},
}
...
Second, rpath can only be used on Linux/MacOS, thus:
def get_extra_link_args():
if platform.system() == 'Windows':
return []
else:
return ["-Wl,-rpath=$ORIGIN/lib/."]
native_module = Extension(
...
extra_link_args = get_extra_link_args(),
)
That it!
The complete setup file (you might want to add macro-definition or similar, which I've skipped):
from setuptools import setup, Extension, find_packages
import os
import sys
import sysconfig
def path_to_build_folder():
"""Returns the name of a distutils build directory"""
f = "{dirname}.{platform}-{version[0]}.{version[1]}"
dir_name = f.format(dirname='lib',
platform=sysconfig.get_platform(),
version=sys.version_info)
return os.path.join('build', dir_name, 'grumbo')
import platform
def pick_library():
my_system = platform.system()
if my_system == 'Linux':
return "plumbus.linux"
if my_system == 'Darwin':
return "plumbus.macos"
if my_system == 'Windows':
return "plumbus"
raise ValueError("Unknown platform: " + my_system)
def get_extra_link_args():
if platform.system() == 'Windows':
return []
else:
return ["-Wl,-rpath=$ORIGIN/lib/."]
native_module = Extension(
name='grumbo.grumbo',
sources = ["src/grumbo/grumbo.c"],
include_dirs = [os.path.join(path_to_build_folder(),'include')],
libraries = [pick_library()],
library_dirs = [os.path.join(path_to_build_folder(), 'lib')],
extra_link_args = get_extra_link_args(),
)
kwargs = {
'name' : 'grumbo',
'version' : '1.0',
'ext_modules' : [native_module],
'packages':find_packages(where='src'),
'package_dir':{"": "src"},
'package_data' : { 'grumbo': ['include/*.h', 'lib/*.so',
'lib/*.lib', '*.dll', # for windows
]},
}
setup(**kwargs)

Compile a Python project Windows

I have the following directory structure to my python project:
eplusplus/
|
|
----__main__.py
----model/
----exception/
----controller/
----view/
The directories: model, exception, controller and view each one has its
__init__.py. When I run the program at my machine I always use this following command: py -m eplusplus. But when I tried to use py2exe or pytinstaller the the points to: permission denied. For what I found, this is because its a directory I trying to compile, but when I compiled the __main__.py it compiled normally, but when I try to execute it says: Error! No eplusplus module founded!
I have no setup.py file and I don't know how they worked.
After some very intensive research and error and try I succeeded by doing this:
I added an empty __init__.py at the eplusplus folder
Out of the eplusplus folder, I had to write a compilation.py file (the file doesn't necessary must have this) to include all libraries I was using (I will post the file at the end of this answer)
Finally, at the PowerShell, all I have to type was py compilation.py py2exe
Thanks for all that tried to help me!
compilation.py file:
#To compile we need to run: python compilation.py py2exe
from distutils.core import setup
from glob import glob
import os
import py2exe
import pyDOE
VERSION=1.0
includes = [
"sip",
"PyQt5",
"PyQt5.QtCore",
"PyQt5.QtGui",
"PyQt5.QtWidgets",
"scipy.linalg.cython_blas",
"scipy.linalg.cython_lapack",
"pyDOE"
]
platforms = ["C:\\Python34\\Lib\\site-packages\\PyQt5\\plugins" +
"\\platforms\\qwindows.dll"]
dll = ["C:\\windows\\syswow64\\MSVCP100.dll",
"C:\\windows\\syswow64\\MSVCR100.dll"]
media = ["C:\\Users\\GUSTAVO\\EPlusPlus\\media\\title.png",
"C:\\Users\\GUSTAVO\\EPlusPlus\\media\\icon.png"]
documents = ["C:\\Users\\GUSTAVO\\EPlusPlus\\docs\\"+
"documentacaoEPlusPlus.pdf"]
examples = ["C:\\Users\\GUSTAVO\\EPlusPlus\\files\\"+
"\\examples\\baseline2A.idf",
"C:\\Users\\GUSTAVO\\EPlusPlus\\files\\"+
"\\examples\\vectors.csv",
"C:\\Users\\GUSTAVO\\EPlusPlus\\files\\"+
"\\examples\\BRA_SC_Florianopolis.838970_INMET.epw"]
datafiles = [("platforms", platforms),
("", dll),
("media", media),
("docs", documents),
("Examples", examples)]
imageformats = glob("C:\\Python34\\Lib\\site-packages\\PyQt5\\"+
"plugins\\imageformats\\*")
datafiles.append(("imageformats", imageformats))
setup(
name="eplusplus",
version=VERSION,
packages=["eplusplus"],
url="",
license="",
windows=[{"script": "eplusplus/__main__.py"}],
scripts=[],
data_files = datafiles,
options={
"py2exe": {
"includes": includes,
}
}
)

Distutils: Compiling an Objective-C++ source file as part of a C++ extension

I am writing a Python extension in C++. I compile it by defining a list of the constituent source files in my setup.py file, like so:
extensions = {
'im': [
"im/src/buffer.cpp",
"im/src/detail.cpp",
"im/src/gil.cpp",
"im/src/halideimage.cpp",
"im/src/hybrid.cpp",
"im/src/hybridimage.cpp",
"im/src/options.cpp",
"im/src/pybuffer.cpp",
"im/src/pycapsule.cpp",
"im/src/structcode.cpp",
"im/src/typecode.cpp",
"im/src/module.cpp"
],
}
… these are used to define an instance of setuptools.Extension which is ultimately passed to the setup() function. This has all worked just fine throughout the project, until now, when I tried to add a platform-specific bit:
preview_source = (sys.platform == 'darwin') and 'im/src/plat/preview_mac.mm' or \
(sys.platform == 'linux') and 'im/src/plat/preview_linux.cpp' or \
(sys.platform == 'win32') and 'im/src/plat/preview_windows.cpp' or \
'im/src/plat/preview.cpp'
extensions = {
'im': [
"im/src/buffer.cpp",
"im/src/detail.cpp",
"im/src/gil.cpp",
"im/src/halideimage.cpp",
"im/src/hybrid.cpp",
"im/src/hybridimage.cpp",
"im/src/options.cpp",
preview_source,
"im/src/pybuffer.cpp",
"im/src/pycapsule.cpp",
"im/src/structcode.cpp",
"im/src/typecode.cpp",
"im/src/module.cpp"
],
}
… adding this new bit chooses the right file for compilation – but it fails to compile at all on Mac OS X. Apparently distutils/setuptools doesn’t recognize the “.mm” extension as a source file:
error: unknown file type '.mm'
I am no expert when it comes to distutils and setuptools platform-specific configuration – what’s a simple way to conditionally add this one source file to the source file list on the Mac?
I ran into the same issue, did you ever find some solution?
It looks like '.mm' is not supported in my version of distutils but '.m' is. So I separated the C++ parts of the .mm file into a .cpp file, and create a small C header to access the .m file from that .cpp.
I'm working on a project where I ran into this problem. Here is something that I put together. Its kind of hacky but it works.
from distutils.unixccompiler import UnixCCompiler
from setuptools import setup
from setuptools.command.build_ext import build_ext
class DarwinInteropBuildExt(build_ext):
def initialize_options(self):
# add support for ".mm" files
UnixCCompiler.src_extensions.append(".mm")
UnixCCompiler.language_map[".mm"] = "objc"
# then intercept and patch the compile and link methods to add needed flags
unpatched_compile = UnixCCompiler._compile
unpatched_link = UnixCCompiler.link
def patched_compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts):
# define language specific compile flags here
if ext == ".cpp":
patched_postargs = extra_postargs + ["-std=c++17"]
elif ext == ".mm":
patched_postargs = extra_postargs + [
"-ObjC++",
"-fobjc-weak",
"-fobjc-arc",
]
else:
patched_postargs = extra_postargs
unpatched_compile(self, obj, src, ext, cc_args, patched_postargs, pp_opts)
def patched_link(
self,
target_desc,
objects,
output_filename,
output_dir=None,
libraries=None,
library_dirs=None,
runtime_library_dirs=None,
export_symbols=None,
debug=0,
extra_preargs=None,
extra_postargs=None,
build_temp=None,
target_lang=None,
):
# define additional linking arguments here if needed
existing_postargs = extra_postargs or []
framework_postargs = [
"-framework", "Cocoa",
"-framework", "Metal",
"-framework", "QuartzCore",
]
unpatched_link(
self,
target_desc,
objects,
output_filename,
output_dir,
libraries,
library_dirs,
runtime_library_dirs,
export_symbols,
debug,
extra_preargs,
existing_postargs + framework_postargs,
build_temp,
target_lang,
)
UnixCCompiler._compile = patched_compile
UnixCCompiler.link = patched_link
super().initialize_options()
# ...
setup(
# use the custom cmd class here
cmdclass={"build_ext": DarwinInteropBuildExt},
)

setup.py check if non-python library dependency exists

I'm trying to make a setup.py for cgal-bindings. To install this, the user needs to have at least a certain version of CGAL. In addition, CGAL has a few optional targets that should be built if the user has some libraries (like Eigen3). Is there a cross-platform way in Python to check for this?
I can use find_library in ctypes.util to check if the library exists, but I don't see any easy way to get the version. <-- This doesn't actually work all the time, some libraries are header-only like eigen3, which is a C++ template library.
Using the install_requires argument of setup() only works for Python libraries and CGAL is a C/C++ library.
Whether a particular extension module should be compiled depending on the availability of some library version, can be accomplished by dynamically generating the ext_modules argument of setup() in setup.py.
For the _yaml.so module of ruamel.yaml, that only should be compiled when the libyaml development libraries have been installed on the system I do:
import os
from textwrap import dedent
def check_extensions():
"""check if the C module can be build by trying to compile a small
program against the libyaml development library"""
import tempfile
import shutil
import distutils.sysconfig
import distutils.ccompiler
from distutils.errors import CompileError, LinkError
libraries = ['yaml']
# write a temporary .c file to compile
c_code = dedent("""
#include <yaml.h>
int main(int argc, char* argv[])
{
yaml_parser_t parser;
parser = parser; /* prevent warning */
return 0;
}
""")
tmp_dir = tempfile.mkdtemp(prefix = 'tmp_ruamel_yaml_')
bin_file_name = os.path.join(tmp_dir, 'test_yaml')
file_name = bin_file_name + '.c'
with open(file_name, 'w') as fp:
fp.write(c_code)
# and try to compile it
compiler = distutils.ccompiler.new_compiler()
assert isinstance(compiler, distutils.ccompiler.CCompiler)
distutils.sysconfig.customize_compiler(compiler)
try:
compiler.link_executable(
compiler.compile([file_name]),
bin_file_name,
libraries=libraries,
)
except CompileError:
print('libyaml compile error')
ret_val = None
except LinkError:
print('libyaml link error')
ret_val = None
else:
ret_val = [
Extension(
'_yaml',
sources=['ext/_yaml.c'],
libraries=libraries,
),
]
shutil.rmtree(tmp_dir)
return ret_val
This way you require no extra files in the distribution. Even if you cannot fail to compile based on the version number at compile time, you should be
able to run the resulting program from the temporary directory and check the exit value and/or output.

py2app picking up .git subdir of a package during build

We use py2app extensively at our facility to produce self contained .app packages for easy internal deployment without dependency issues. Something I noticed recently, and have no idea how it began, is that when building an .app, py2app started including the .git directory of our main library.
commonLib, for instance, is our root python library package, which is a git repo. Under this package are the various subpackages such as database, utility, etc.
commonLib/
|- .git/ # because commonLib is a git repo
|- __init__.py
|- database/
|- __init__.py
|- utility/
|- __init__.py
# ... etc
In a given project, say Foo, we will do imports like from commonLib import xyz to use our common packages. Building via py2app looks something like: python setup.py py2app
So the recent issue I am seeing is that when building an app for project Foo, I will see it include everything in commonLib/.git/ into the app, which is extra bloat. py2app has an excludes option but that only seems to be for python modules. I cant quite figure out what it would take to exclude the .git subdir, or in fact, what is causing it to be included in the first place.
Has anyone experienced this when using a python package import that is a git repo?
Nothing has changed in our setup.py files for each project, and commonLib has always been a git repo. So the only thing I can think of being a variable is the version of py2app and its deps which have obviously been upgraded over time.
Edit
I'm using the latest py2app 0.6.4 as of right now. Also, my setup.py was first generated from py2applet a while back, but has been hand configured since and copied over as a template for every new project. I am using PyQt4/sip for every single one of these projects, so it also makes me wonder if its an issue with one of the recipes?
Update
From the first answer, I tried to fix this using various combinations of exclude_package_data settings. Nothing seems to force the .git directory to become excluded. Here is a sample of what my setup.py files generally look like:
from setuptools import setup
from myApp import VERSION
appname = 'MyApp'
APP = ['myApp.py']
DATA_FILES = []
OPTIONS = {
'includes': 'atexit, sip, PyQt4.QtCore, PyQt4.QtGui',
'strip': True,
'iconfile':'ui/myApp.icns',
'resources':['src/myApp.png'],
'plist':{
'CFBundleIconFile':'ui/myApp.icns',
'CFBundleIdentifier':'com.company.myApp',
'CFBundleGetInfoString': appname,
'CFBundleVersion' : VERSION,
'CFBundleShortVersionString' : VERSION
}
}
setup(
app=APP,
data_files=DATA_FILES,
options={'py2app': OPTIONS},
setup_requires=['py2app'],
)
I have tried things like:
setup(
...
exclude_package_data = { 'commonLib': ['.git'] },
#exclude_package_data = { '': ['.git'] },
#exclude_package_data = { 'commonLib/.git/': ['*'] },
#exclude_package_data = { '.git': ['*'] },
...
)
Update #2
I have posted my own answer which does a monkeypatch on distutils. Its ugly and not preferred, but until someone can offer me a better solution, I guess this is what I have.
I am adding an answer to my own question, to document the only thing I have found to work thus far. My approach was to monkeypatch distutils to ignore certain patterns when creating a directory or copying a file. This is really not what I wanted to do, but like I said, its the only thing that works so far.
## setup.py ##
import re
# file_util has to come first because dir_util uses it
from distutils import file_util, dir_util
def wrapper(fn):
def wrapped(src, *args, **kwargs):
if not re.search(r'/\.git/?', src):
fn(src, *args, **kwargs)
return wrapped
file_util.copy_file = wrapper(file_util.copy_file)
dir_util.mkpath = wrapper(dir_util.mkpath)
# now import setuptools so it uses the monkeypatched methods
from setuptools import setup
Hopefully someone will comment on this and tell me a higher level approach to avoid doing this. But as of now, I will probably wrap this into a utility method like exclude_data_patterns(re_pattern) to be reused in my projects.
I can see two options for excluding the .git directory.
Build the application from a 'clean' checkout of the code. When deploying a new version, we always build from a fresh svn export based on a tag to ensure we don't pick up spurious changes/files. You could try the equivalent here - although the git equivalent seems somewhat more involved.
Modify the setup.py file to massage the files included in the application. This might be done using the exclude_package_data functionality as described in the docs, or build the list of data_files and pass it to setup.
As for why it has suddenly started happening, knowing the version of py2app you are using might help, as will knowing the contents of your setup.py and perhaps how this was made (by hand or using py2applet).
I have a similar experience with Pyinstaller, so I'm not sure it applies directly.
Pyinstaller creates a "manifest" of all files to be included in the distribution, before running the export process. You could "massage" this manifest, as per Mark's second suggestion, to exclude any files you want. Including anything within .git or .git itself.
In the end, I stuck with checking out my code before producing a binary as there was more than just .git being bloat (such as UML documents and raw resource files for Qt). A checkout guaranteed a clean result and I experienced no issues automating that process along with the process of creating the installer for the binary.
There is a good answer to this, but I have a more elaborate answer to solve the problem mentioned here with a white-list approach. To have the monkey patch also work for packages outside site-packages.zip I had to monkey patch also copy_tree (because it imports copy_file inside its function), this helps in making a standalone application.
In addition, I create a white-list recipe to mark certain packages zip-unsafe. The approach makes it easy to add filters other than white-list.
import pkgutil
from os.path import join, dirname, realpath
from distutils import log
# file_util has to come first because dir_util uses it
from distutils import file_util, dir_util
# noinspection PyUnresolvedReferences
from py2app import util
def keep_only_filter(base_mod, sub_mods):
prefix = join(realpath(dirname(base_mod.filename)), '')
all_prefix = [join(prefix, sm) for sm in sub_mods]
log.info("Set filter for prefix %s" % prefix)
def wrapped(mod):
name = getattr(mod, 'filename', None)
if name is None:
# ignore anything that does not have file name
return True
name = join(realpath(dirname(name)), '')
if not name.startswith(prefix):
# ignore those that are not in this prefix
return True
for p in all_prefix:
if name.startswith(p):
return True
# log.info('ignoring %s' % name)
return False
return wrapped
# define all the filters we need
all_filts = {
'mypackage': (keep_only_filter, [
'subpackage1', 'subpackage2',
]),
}
def keep_only_wrapper(fn, is_dir=False):
filts = [(f, k[1]) for (f, k) in all_filts.iteritems()
if k[0] == keep_only_filter]
prefixes = {}
for f, sms in filts:
pkg = pkgutil.get_loader(f)
assert pkg, '{f} package not found'.format(f=f)
p = join(pkg.filename, '')
sp = [join(p, sm, '') for sm in sms]
prefixes[p] = sp
def wrapped(src, *args, **kwargs):
name = src
if not is_dir:
name = dirname(src)
name = join(realpath(name), '')
keep = True
for prefix, sub_prefixes in prefixes.iteritems():
if name == prefix:
# let the root pass
continue
# if it is a package we have a filter for
if name.startswith(prefix):
keep = False
for sub_prefix in sub_prefixes:
if name.startswith(sub_prefix):
keep = True
break
if keep:
return fn(src, *args, **kwargs)
return []
return wrapped
file_util.copy_file = keep_only_wrapper(file_util.copy_file)
dir_util.mkpath = keep_only_wrapper(dir_util.mkpath, is_dir=True)
util.copy_tree = keep_only_wrapper(util.copy_tree, is_dir=True)
class ZipUnsafe(object):
def __init__(self, _module, _filt):
self.module = _module
self.filt = _filt
def check(self, dist, mf):
m = mf.findNode(self.module)
if m is None:
return None
# Do not put this package in site-packages.zip
if self.filt:
return dict(
packages=[self.module],
filters=[self.filt[0](m, self.filt[1])],
)
return dict(
packages=[self.module]
)
# Any package that is zip-unsafe (uses __file__ ,... ) should be added here
# noinspection PyUnresolvedReferences
import py2app.recipes
for module in [
'sklearn', 'mypackage',
]:
filt = all_filts.get(module)
setattr(py2app.recipes, module, ZipUnsafe(module, filt))

Categories