How to compile *.po gettext translations in setup.py python script - python

Consider a python package that has multilanguage support (using gettext). How to compile *.po files to *.mo files on the fly when executing setup.py? I really don't want to distribute precompiled *.mo files.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
from distutils.core import setup
setup(
name='tractorbeam',
version='0.1.0',
url='http://starfleet.org/tractorbeam/',
description='Pull beer out of the fridge while sitting on the couch.',
author='James T. Kirk',
author_email= 'jkirk#starfleet.org',
packages=['tractorbeam'],
package_data={
'tractorbeam': [
'locale/*.po',
'locale/*.mo', # How to compile on the fly?
]
},
install_requires=[
'requests'
]
)
Thanks in advance!

I know this question begins to be a bit old, but in case anyone's still looking for an answer: it's possible to add a function to setup.py that will compile po files and return the data_files list. I didn't choose to include them in package_data because data_files's description looked more appropriate:
configuration files, message catalogs, data files, anything which doesn’t fit in the previous categories.
Of course you can only append this list to the one you're already using, but assuming you only have these mo files to add in data_files, you can write:
setup(
.
.
.
data_files=create_mo_files(),
.
.
.
)
For your information, here's the function create_mo_files() I use. I don't pretend it's the best implementation possible. I put it here because it looks useful and is easy to adapt. Note that it's a bit more extra-complicated than what you need because it doesn't assume there's only one po file to compile per directory, it deals with several instead; note also that it assumes that all po files are located in something like locale/language/LC_MESSAGES/*.po, you'll have to change it to fit your needs:
def create_mo_files():
data_files = []
localedir = 'relative/path/to/locale'
po_dirs = [localedir + '/' + l + '/LC_MESSAGES/'
for l in next(os.walk(localedir))[1]]
for d in po_dirs:
mo_files = []
po_files = [f
for f in next(os.walk(d))[2]
if os.path.splitext(f)[1] == '.po']
for po_file in po_files:
filename, extension = os.path.splitext(po_file)
mo_file = filename + '.mo'
msgfmt_cmd = 'msgfmt {} -o {}'.format(d + po_file, d + mo_file)
subprocess.call(msgfmt_cmd, shell=True)
mo_files.append(d + mo_file)
data_files.append((d, mo_files))
return data_files
(you'll have to import os and subprocess to use it)

I could share my version of *.mo files compilation process:
import glob
import pathlib
import subprocess
(...)
PO_FILES = 'translations/locale/*/LC_MESSAGES/app_name.po'
def create_mo_files():
mo_files = []
prefix = 'app_name'
for po_path in glob.glob(str(pathlib.Path(prefix) / PO_FILES)):
mo = pathlib.Path(po_path.replace('.po', '.mo'))
subprocess.run(['msgfmt', '-o', str(mo), po_path], check=True)
mo_files.append(str(mo.relative_to(prefix)))
return mo_files
(...)
setup(
(...)
package_data = {
'app_name': [
(...)
] + create_mo_files(),
},
)
#edit Comment:
For example pl translation file:
app_name/translations/locale/pl/LC_MESSAGES/app_name.po
function create_mo_files() creates compiled app_name.mo file
app_name/translations/locale/pl/LC_MESSAGES/app_name.mo
and then on package build this app_name.mo file is copying to
package/translations/locale/pl/LC_MESSAGES/app_name.po

Related

Including and distributing third party libraries with a Python C extension

I'm building a C Python extension which makes use of a "third party" library— in this case, one that I've built using a separate build process and toolchain. Call this library libplumbus.dylib.
Directory structure would be:
grumbo/
include/
plumbus.h
lib/
libplumbus.so
grumbo.c
setup.py
My setup.py looks approximately like:
from setuptools import Extension, setup
native_module = Extension(
'grumbo',
define_macros = [('MAJOR_VERSION', '1'),
('MINOR_VERSION', '0')],
sources = ['grumbo.c'],
include_dirs = ['include'],
libraries = ['plumbus'],
library_dirs = ['lib'])
setup(
name = 'grumbo',
version = '1.0',
ext_modules = [native_module] )
Since libplumbus is an external library, when I run import grumbo I get:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ImportError: dlopen(/path/to/grumbo/grumbo.cpython-37m-darwin.so, 2): Library not loaded: lib/libplumbus.dylib
Referenced from: /path/to/grumbo/grumbo.cpython-37m-darwin.so
Reason: image not found
What's the simplest way to set things up so that libplumbus is included with the distribution and properly loaded when grumbo is imported? (Note that this should work with a virtualenv).
I have tried adding lib/libplumbus.dylib to package_data, but this doesn't work, even if I add -Wl,-rpath,#loader_path/grumbo/lib to the Extension's extra_link_args.
The goal of this post is to have a setup.py which would create a source distribution. That means after running
python setup.py sdist
the resulting dist/grumbo-1.0.tar.gz could be used for installation via
pip install grumbo-1.0.tar.gz
We will start for a setup.py for Linux/MacOS, but then tweak to make it work for Windows as well.
The first step is to get the additional data (includes/library) into the distribution. I'm not sure it is really impossible to add data for a module, but setuptools offers functionality to add data for packages, so let's make a package from your module (which is probably a good idea anyway).
The new structure of package grumbo looks as follows:
src/
grumbo/
__init__.py # empty
grumbo.c
include/
plumbus.h
lib/
libplumbus.so
setup.py
and changed setup.py:
from setuptools import setup, Extension, find_packages
native_module = Extension(
name='grumbo.grumbo',
sources = ["src/grumbo/grumbo.c"],
)
kwargs = {
'name' : 'grumbo',
'version' : '1.0',
'ext_modules' : [native_module],
'packages':find_packages(where='src'),
'package_dir':{"": "src"},
}
setup(**kwargs)
It doesn't do much yet, but at least our package can be found by setuptools. The build fails, because the includes are missing.
Now let's add the needed includes from the include-folder to the distribution via package-data:
...
kwargs = {
...,
'package_data' : { 'grumbo': ['include/*.h']},
}
...
With that our include-files are copied to the source distribution. However because it will be build "somewhere" we don't know yet, adding include_dirs = ['include'] to the Extension definition just doesn't cut it.
There must be a better way (and less brittle) to find the right include path, but that is what I came up with:
...
import os
import sys
import sysconfig
def path_to_build_folder():
"""Returns the name of a distutils build directory"""
f = "{dirname}.{platform}-{version[0]}.{version[1]}"
dir_name = f.format(dirname='lib',
platform=sysconfig.get_platform(),
version=sys.version_info)
return os.path.join('build', dir_name, 'grumbo')
native_module = Extension(
...,
include_dirs = [os.path.join(path_to_build_folder(),'include')],
)
...
Now, the extension is built, but cannot be yet loaded because it is not linked against shared-object libplumbus.so and thus some symbols are unresolved.
Similar to the header files, we can add our library to the distribution:
kwargs = {
...,
'package_data' : { 'grumbo': ['include/*.h', 'lib/*.so']},
}
...
and add the right lib-path for the linker:
...
native_module = Extension(
...
libraries = ['plumbus'],
library_dirs = [os.path.join(path_to_build_folder(), 'lib')],
)
...
Now, we are almost there:
the extension is built an put into site-packages/grumbo/
the extension depends on libplumbus.so as can be seen with help of ldd
libplumbus.so is put into site-packages/grumbo/lib
However, we still cannot import the extension, as import grumbo.grumbo leads to
ImportError: libplumbus.so: cannot open shared object file: No such
file or directory
because the loader cannot find the needed shared object which resides in the folder .\lib relative to our extension. We could use rpath to "help" the loader:
...
native_module = Extension(
...
extra_link_args = ["-Wl,-rpath=$ORIGIN/lib/."],
)
...
And now we are done:
>>> import grumbo.grumbo
# works!
Also building and installing a wheel should work:
python setup.py bdist_wheel
and then:
pip install grumbo-1.0-xxxx.whl
The first mile stone is achieved. Now we extend it, so it works other platforms as well.
Same source distribution for Linux and Macos:
To be able to install the same source distribution on Linux and MacOS, both versions of the shared library (for Linux and MacOS) must be present. An option is to add a suffix to the names of shared objects: e.g. having libplumbus.linux.so and libplumbis.macos.so. The right shared object can be picked in the setup.py depending on the platform:
...
import platform
def pick_library():
my_system = platform.system()
if my_system == 'Linux':
return "plumbus.linux"
if my_system == 'Darwin':
return "plumbus.macos"
if my_system == 'Windows':
return "plumbus"
raise ValueError("Unknown platform: " + my_system)
native_module = Extension(
...
libraries = [pick_library()],
...
)
Tweaking for Windows:
On Windows, dynamic libraries are dlls and not shared objects, so there are some differences that need to be taken into account:
when the C-extension is built, it needs plumbus.lib-file, which we need to put into the lib-subfolder.
when the C-extension is loaded during the run time, it needs plumbus.dll-file.
Windows has no notion of rpath, thus we need to put the dll right next to the extension, so it can be found (see also this SO-post for more details).
That means the folder structure should be as follows:
src/
grumbo/
__init__.py
grumbo.c
plumbus.dll # needed for Windows
include/
plumbus.h
lib/
libplumbus.linux.so # needed on Linux
libplumbus.macos.so # needed on Macos
plumbus.lib # needed on Windows
setup.py
There are also some changes in the setup.py. First, extending the package_data so dll and lib are picked up:
...
kwargs = {
...
'package_data' : { 'grumbo': ['include/*.h', 'lib/*.so',
'lib/*.lib', '*.dll', # for windows
]},
}
...
Second, rpath can only be used on Linux/MacOS, thus:
def get_extra_link_args():
if platform.system() == 'Windows':
return []
else:
return ["-Wl,-rpath=$ORIGIN/lib/."]
native_module = Extension(
...
extra_link_args = get_extra_link_args(),
)
That it!
The complete setup file (you might want to add macro-definition or similar, which I've skipped):
from setuptools import setup, Extension, find_packages
import os
import sys
import sysconfig
def path_to_build_folder():
"""Returns the name of a distutils build directory"""
f = "{dirname}.{platform}-{version[0]}.{version[1]}"
dir_name = f.format(dirname='lib',
platform=sysconfig.get_platform(),
version=sys.version_info)
return os.path.join('build', dir_name, 'grumbo')
import platform
def pick_library():
my_system = platform.system()
if my_system == 'Linux':
return "plumbus.linux"
if my_system == 'Darwin':
return "plumbus.macos"
if my_system == 'Windows':
return "plumbus"
raise ValueError("Unknown platform: " + my_system)
def get_extra_link_args():
if platform.system() == 'Windows':
return []
else:
return ["-Wl,-rpath=$ORIGIN/lib/."]
native_module = Extension(
name='grumbo.grumbo',
sources = ["src/grumbo/grumbo.c"],
include_dirs = [os.path.join(path_to_build_folder(),'include')],
libraries = [pick_library()],
library_dirs = [os.path.join(path_to_build_folder(), 'lib')],
extra_link_args = get_extra_link_args(),
)
kwargs = {
'name' : 'grumbo',
'version' : '1.0',
'ext_modules' : [native_module],
'packages':find_packages(where='src'),
'package_dir':{"": "src"},
'package_data' : { 'grumbo': ['include/*.h', 'lib/*.so',
'lib/*.lib', '*.dll', # for windows
]},
}
setup(**kwargs)

Compile a Python project Windows

I have the following directory structure to my python project:
eplusplus/
|
|
----__main__.py
----model/
----exception/
----controller/
----view/
The directories: model, exception, controller and view each one has its
__init__.py. When I run the program at my machine I always use this following command: py -m eplusplus. But when I tried to use py2exe or pytinstaller the the points to: permission denied. For what I found, this is because its a directory I trying to compile, but when I compiled the __main__.py it compiled normally, but when I try to execute it says: Error! No eplusplus module founded!
I have no setup.py file and I don't know how they worked.
After some very intensive research and error and try I succeeded by doing this:
I added an empty __init__.py at the eplusplus folder
Out of the eplusplus folder, I had to write a compilation.py file (the file doesn't necessary must have this) to include all libraries I was using (I will post the file at the end of this answer)
Finally, at the PowerShell, all I have to type was py compilation.py py2exe
Thanks for all that tried to help me!
compilation.py file:
#To compile we need to run: python compilation.py py2exe
from distutils.core import setup
from glob import glob
import os
import py2exe
import pyDOE
VERSION=1.0
includes = [
"sip",
"PyQt5",
"PyQt5.QtCore",
"PyQt5.QtGui",
"PyQt5.QtWidgets",
"scipy.linalg.cython_blas",
"scipy.linalg.cython_lapack",
"pyDOE"
]
platforms = ["C:\\Python34\\Lib\\site-packages\\PyQt5\\plugins" +
"\\platforms\\qwindows.dll"]
dll = ["C:\\windows\\syswow64\\MSVCP100.dll",
"C:\\windows\\syswow64\\MSVCR100.dll"]
media = ["C:\\Users\\GUSTAVO\\EPlusPlus\\media\\title.png",
"C:\\Users\\GUSTAVO\\EPlusPlus\\media\\icon.png"]
documents = ["C:\\Users\\GUSTAVO\\EPlusPlus\\docs\\"+
"documentacaoEPlusPlus.pdf"]
examples = ["C:\\Users\\GUSTAVO\\EPlusPlus\\files\\"+
"\\examples\\baseline2A.idf",
"C:\\Users\\GUSTAVO\\EPlusPlus\\files\\"+
"\\examples\\vectors.csv",
"C:\\Users\\GUSTAVO\\EPlusPlus\\files\\"+
"\\examples\\BRA_SC_Florianopolis.838970_INMET.epw"]
datafiles = [("platforms", platforms),
("", dll),
("media", media),
("docs", documents),
("Examples", examples)]
imageformats = glob("C:\\Python34\\Lib\\site-packages\\PyQt5\\"+
"plugins\\imageformats\\*")
datafiles.append(("imageformats", imageformats))
setup(
name="eplusplus",
version=VERSION,
packages=["eplusplus"],
url="",
license="",
windows=[{"script": "eplusplus/__main__.py"}],
scripts=[],
data_files = datafiles,
options={
"py2exe": {
"includes": includes,
}
}
)

Distutils: Compiling an Objective-C++ source file as part of a C++ extension

I am writing a Python extension in C++. I compile it by defining a list of the constituent source files in my setup.py file, like so:
extensions = {
'im': [
"im/src/buffer.cpp",
"im/src/detail.cpp",
"im/src/gil.cpp",
"im/src/halideimage.cpp",
"im/src/hybrid.cpp",
"im/src/hybridimage.cpp",
"im/src/options.cpp",
"im/src/pybuffer.cpp",
"im/src/pycapsule.cpp",
"im/src/structcode.cpp",
"im/src/typecode.cpp",
"im/src/module.cpp"
],
}
… these are used to define an instance of setuptools.Extension which is ultimately passed to the setup() function. This has all worked just fine throughout the project, until now, when I tried to add a platform-specific bit:
preview_source = (sys.platform == 'darwin') and 'im/src/plat/preview_mac.mm' or \
(sys.platform == 'linux') and 'im/src/plat/preview_linux.cpp' or \
(sys.platform == 'win32') and 'im/src/plat/preview_windows.cpp' or \
'im/src/plat/preview.cpp'
extensions = {
'im': [
"im/src/buffer.cpp",
"im/src/detail.cpp",
"im/src/gil.cpp",
"im/src/halideimage.cpp",
"im/src/hybrid.cpp",
"im/src/hybridimage.cpp",
"im/src/options.cpp",
preview_source,
"im/src/pybuffer.cpp",
"im/src/pycapsule.cpp",
"im/src/structcode.cpp",
"im/src/typecode.cpp",
"im/src/module.cpp"
],
}
… adding this new bit chooses the right file for compilation – but it fails to compile at all on Mac OS X. Apparently distutils/setuptools doesn’t recognize the “.mm” extension as a source file:
error: unknown file type '.mm'
I am no expert when it comes to distutils and setuptools platform-specific configuration – what’s a simple way to conditionally add this one source file to the source file list on the Mac?
I ran into the same issue, did you ever find some solution?
It looks like '.mm' is not supported in my version of distutils but '.m' is. So I separated the C++ parts of the .mm file into a .cpp file, and create a small C header to access the .m file from that .cpp.
I'm working on a project where I ran into this problem. Here is something that I put together. Its kind of hacky but it works.
from distutils.unixccompiler import UnixCCompiler
from setuptools import setup
from setuptools.command.build_ext import build_ext
class DarwinInteropBuildExt(build_ext):
def initialize_options(self):
# add support for ".mm" files
UnixCCompiler.src_extensions.append(".mm")
UnixCCompiler.language_map[".mm"] = "objc"
# then intercept and patch the compile and link methods to add needed flags
unpatched_compile = UnixCCompiler._compile
unpatched_link = UnixCCompiler.link
def patched_compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts):
# define language specific compile flags here
if ext == ".cpp":
patched_postargs = extra_postargs + ["-std=c++17"]
elif ext == ".mm":
patched_postargs = extra_postargs + [
"-ObjC++",
"-fobjc-weak",
"-fobjc-arc",
]
else:
patched_postargs = extra_postargs
unpatched_compile(self, obj, src, ext, cc_args, patched_postargs, pp_opts)
def patched_link(
self,
target_desc,
objects,
output_filename,
output_dir=None,
libraries=None,
library_dirs=None,
runtime_library_dirs=None,
export_symbols=None,
debug=0,
extra_preargs=None,
extra_postargs=None,
build_temp=None,
target_lang=None,
):
# define additional linking arguments here if needed
existing_postargs = extra_postargs or []
framework_postargs = [
"-framework", "Cocoa",
"-framework", "Metal",
"-framework", "QuartzCore",
]
unpatched_link(
self,
target_desc,
objects,
output_filename,
output_dir,
libraries,
library_dirs,
runtime_library_dirs,
export_symbols,
debug,
extra_preargs,
existing_postargs + framework_postargs,
build_temp,
target_lang,
)
UnixCCompiler._compile = patched_compile
UnixCCompiler.link = patched_link
super().initialize_options()
# ...
setup(
# use the custom cmd class here
cmdclass={"build_ext": DarwinInteropBuildExt},
)

How can I share matplotlib style?

One can load a custom plotting style in matplotlib with something like:
>>> import matplotlib.pyplot as plt
>>> plt.style.use('ggplot')
And I know that I can create my own, http://matplotlib.org/users/style_sheets.html explains how.
Let's say that I create an amazing matplotlib style -- how can I share this with other people? Is there a way to do it with pip/conda or something else appropriate?
The docs include the suggestion to "create custom styles and use them by calling style.use with the path or URL to the style sheet." -- so I guess that I could maintain a link w/ that on some public git repository, and people would just get the most recent style if they put that URL?
You could organize your code in a structure like this:
|
└─── setup.py
└─── mplstyles
style_01.mplstyle
style_02.mplstyle
Then, in your file setup.py write something like the following:
# -*- coding: utf-8 -*-
import matplotlib as mpl
import glob
import os.path
import shutil
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-install', action='store_true', default=True)
parser.add_argument('-upgrade', action='store_true')
options = parser.parse_args()
#~ # ref -> matplotlib/style/core
BASE_LIBRARY_PATH = os.path.join(mpl.get_data_path(), 'stylelib')
STYLE_PATH = os.path.join(os.getcwd(),'mplstyles')
STYLE_EXTENSION = 'mplstyle'
style_files = glob.glob(os.path.join(STYLE_PATH,"*.%s"%(STYLE_EXTENSION)))
for _path_file in style_files:
_, fname = os.path.split(_path_file)
dest = os.path.join(BASE_LIBRARY_PATH, fname)
if not os.path.isfile(dest) and options.install:
shutil.copy(_path_file, dest)
print("%s style installed"%(fname))
elif options.upgrade:
shutil.copy(_path_file, dest)
print("%s style upgraded"%(fname))
elif os.path.isfile(dest):
print("%s style already exists (use -upgrade to upgrade)"%(fname))
else:
pass # ¿?
The code above copy each .mplstyle (or stylesheet) file from "mplstyles" folder to Matplotlib installation directory.
"Install" styles
>> python setup.py -install
"Upgrade" styles
>> python setup.py -upgrade
I just had this exact same question. A petty that was not been solved yet. I have found a solution to be able to distribute the style(s) using PyPi (in my case goosempl, also on GitHub).
I have created a Python module of which the mplstyle-files are a part:
|-- setup.py
|-- package_name
| |-- __init__.py
| |-- styles
| | |-- example.mplstyle
The idea is now:
The .mplstyle file(s) gets packaged with the module.
The module gets installed.
At the end of the installation a small script runs that extracts the .mplstyle file(s) from the newly installed package and writes them to the matplotlib config directory.
Here are the essentials
setup.py
import atexit
from setuptools import setup
from setuptools.command.install import install
def _post_install():
import goosempl
package_name.copy_style()
class new_install(install):
def __init__(self, *args, **kwargs):
super(new_install, self).__init__(*args, **kwargs)
atexit.register(_post_install)
__version__ = '0.1.0'
setup(
name = 'package_name',
version = __version__,
...
install_requires = ['matplotlib>=2.0.0'],
packages = ['package_name'],
cmdclass = {'install': new_install},
package_data = {'package_name/styles':[
'package_name/styles/example.mplstyle',
]},
)
init.py
def copy_style():
import os
import matplotlib
from pkg_resources import resource_string
files = [
'styles/example.mplstyle',
]
for fname in files:
path = os.path.join(matplotlib.get_configdir(),fname)
text = resource_string(__name__,fname).decode()
open(path,'w').write(text)
For future reference see these related questions / documentation:
Why use package_data and not data_files.
Running a post-installation script/command that relies on the installed package
Uploading to PyPi

py2app picking up .git subdir of a package during build

We use py2app extensively at our facility to produce self contained .app packages for easy internal deployment without dependency issues. Something I noticed recently, and have no idea how it began, is that when building an .app, py2app started including the .git directory of our main library.
commonLib, for instance, is our root python library package, which is a git repo. Under this package are the various subpackages such as database, utility, etc.
commonLib/
|- .git/ # because commonLib is a git repo
|- __init__.py
|- database/
|- __init__.py
|- utility/
|- __init__.py
# ... etc
In a given project, say Foo, we will do imports like from commonLib import xyz to use our common packages. Building via py2app looks something like: python setup.py py2app
So the recent issue I am seeing is that when building an app for project Foo, I will see it include everything in commonLib/.git/ into the app, which is extra bloat. py2app has an excludes option but that only seems to be for python modules. I cant quite figure out what it would take to exclude the .git subdir, or in fact, what is causing it to be included in the first place.
Has anyone experienced this when using a python package import that is a git repo?
Nothing has changed in our setup.py files for each project, and commonLib has always been a git repo. So the only thing I can think of being a variable is the version of py2app and its deps which have obviously been upgraded over time.
Edit
I'm using the latest py2app 0.6.4 as of right now. Also, my setup.py was first generated from py2applet a while back, but has been hand configured since and copied over as a template for every new project. I am using PyQt4/sip for every single one of these projects, so it also makes me wonder if its an issue with one of the recipes?
Update
From the first answer, I tried to fix this using various combinations of exclude_package_data settings. Nothing seems to force the .git directory to become excluded. Here is a sample of what my setup.py files generally look like:
from setuptools import setup
from myApp import VERSION
appname = 'MyApp'
APP = ['myApp.py']
DATA_FILES = []
OPTIONS = {
'includes': 'atexit, sip, PyQt4.QtCore, PyQt4.QtGui',
'strip': True,
'iconfile':'ui/myApp.icns',
'resources':['src/myApp.png'],
'plist':{
'CFBundleIconFile':'ui/myApp.icns',
'CFBundleIdentifier':'com.company.myApp',
'CFBundleGetInfoString': appname,
'CFBundleVersion' : VERSION,
'CFBundleShortVersionString' : VERSION
}
}
setup(
app=APP,
data_files=DATA_FILES,
options={'py2app': OPTIONS},
setup_requires=['py2app'],
)
I have tried things like:
setup(
...
exclude_package_data = { 'commonLib': ['.git'] },
#exclude_package_data = { '': ['.git'] },
#exclude_package_data = { 'commonLib/.git/': ['*'] },
#exclude_package_data = { '.git': ['*'] },
...
)
Update #2
I have posted my own answer which does a monkeypatch on distutils. Its ugly and not preferred, but until someone can offer me a better solution, I guess this is what I have.
I am adding an answer to my own question, to document the only thing I have found to work thus far. My approach was to monkeypatch distutils to ignore certain patterns when creating a directory or copying a file. This is really not what I wanted to do, but like I said, its the only thing that works so far.
## setup.py ##
import re
# file_util has to come first because dir_util uses it
from distutils import file_util, dir_util
def wrapper(fn):
def wrapped(src, *args, **kwargs):
if not re.search(r'/\.git/?', src):
fn(src, *args, **kwargs)
return wrapped
file_util.copy_file = wrapper(file_util.copy_file)
dir_util.mkpath = wrapper(dir_util.mkpath)
# now import setuptools so it uses the monkeypatched methods
from setuptools import setup
Hopefully someone will comment on this and tell me a higher level approach to avoid doing this. But as of now, I will probably wrap this into a utility method like exclude_data_patterns(re_pattern) to be reused in my projects.
I can see two options for excluding the .git directory.
Build the application from a 'clean' checkout of the code. When deploying a new version, we always build from a fresh svn export based on a tag to ensure we don't pick up spurious changes/files. You could try the equivalent here - although the git equivalent seems somewhat more involved.
Modify the setup.py file to massage the files included in the application. This might be done using the exclude_package_data functionality as described in the docs, or build the list of data_files and pass it to setup.
As for why it has suddenly started happening, knowing the version of py2app you are using might help, as will knowing the contents of your setup.py and perhaps how this was made (by hand or using py2applet).
I have a similar experience with Pyinstaller, so I'm not sure it applies directly.
Pyinstaller creates a "manifest" of all files to be included in the distribution, before running the export process. You could "massage" this manifest, as per Mark's second suggestion, to exclude any files you want. Including anything within .git or .git itself.
In the end, I stuck with checking out my code before producing a binary as there was more than just .git being bloat (such as UML documents and raw resource files for Qt). A checkout guaranteed a clean result and I experienced no issues automating that process along with the process of creating the installer for the binary.
There is a good answer to this, but I have a more elaborate answer to solve the problem mentioned here with a white-list approach. To have the monkey patch also work for packages outside site-packages.zip I had to monkey patch also copy_tree (because it imports copy_file inside its function), this helps in making a standalone application.
In addition, I create a white-list recipe to mark certain packages zip-unsafe. The approach makes it easy to add filters other than white-list.
import pkgutil
from os.path import join, dirname, realpath
from distutils import log
# file_util has to come first because dir_util uses it
from distutils import file_util, dir_util
# noinspection PyUnresolvedReferences
from py2app import util
def keep_only_filter(base_mod, sub_mods):
prefix = join(realpath(dirname(base_mod.filename)), '')
all_prefix = [join(prefix, sm) for sm in sub_mods]
log.info("Set filter for prefix %s" % prefix)
def wrapped(mod):
name = getattr(mod, 'filename', None)
if name is None:
# ignore anything that does not have file name
return True
name = join(realpath(dirname(name)), '')
if not name.startswith(prefix):
# ignore those that are not in this prefix
return True
for p in all_prefix:
if name.startswith(p):
return True
# log.info('ignoring %s' % name)
return False
return wrapped
# define all the filters we need
all_filts = {
'mypackage': (keep_only_filter, [
'subpackage1', 'subpackage2',
]),
}
def keep_only_wrapper(fn, is_dir=False):
filts = [(f, k[1]) for (f, k) in all_filts.iteritems()
if k[0] == keep_only_filter]
prefixes = {}
for f, sms in filts:
pkg = pkgutil.get_loader(f)
assert pkg, '{f} package not found'.format(f=f)
p = join(pkg.filename, '')
sp = [join(p, sm, '') for sm in sms]
prefixes[p] = sp
def wrapped(src, *args, **kwargs):
name = src
if not is_dir:
name = dirname(src)
name = join(realpath(name), '')
keep = True
for prefix, sub_prefixes in prefixes.iteritems():
if name == prefix:
# let the root pass
continue
# if it is a package we have a filter for
if name.startswith(prefix):
keep = False
for sub_prefix in sub_prefixes:
if name.startswith(sub_prefix):
keep = True
break
if keep:
return fn(src, *args, **kwargs)
return []
return wrapped
file_util.copy_file = keep_only_wrapper(file_util.copy_file)
dir_util.mkpath = keep_only_wrapper(dir_util.mkpath, is_dir=True)
util.copy_tree = keep_only_wrapper(util.copy_tree, is_dir=True)
class ZipUnsafe(object):
def __init__(self, _module, _filt):
self.module = _module
self.filt = _filt
def check(self, dist, mf):
m = mf.findNode(self.module)
if m is None:
return None
# Do not put this package in site-packages.zip
if self.filt:
return dict(
packages=[self.module],
filters=[self.filt[0](m, self.filt[1])],
)
return dict(
packages=[self.module]
)
# Any package that is zip-unsafe (uses __file__ ,... ) should be added here
# noinspection PyUnresolvedReferences
import py2app.recipes
for module in [
'sklearn', 'mypackage',
]:
filt = all_filts.get(module)
setattr(py2app.recipes, module, ZipUnsafe(module, filt))

Categories