I'm using python's ftplib to write a small FTP client, but some of the functions in the package don't return string output, but print to stdout. I want to redirect stdout to an object which I'll be able to read the output from.
I know stdout can be redirected into any regular file with:
stdout = open("file", "a")
But I prefer a method that doesn't uses the local drive.
I'm looking for something like the BufferedReader in Java that can be used to wrap a buffer into a stream.
from cStringIO import StringIO # Python3 use: from io import StringIO
import sys
old_stdout = sys.stdout
sys.stdout = mystdout = StringIO()
# blah blah lots of code ...
sys.stdout = old_stdout
# examine mystdout.getvalue()
There is a contextlib.redirect_stdout() function in Python 3.4+:
import io
from contextlib import redirect_stdout
with io.StringIO() as buf, redirect_stdout(buf):
print('redirected')
output = buf.getvalue()
Here's a code example that shows how to implement it on older Python versions.
Just to add to Ned's answer above: you can use this to redirect output to any object that implements a write(str) method.
This can be used to good effect to "catch" stdout output in a GUI application.
Here's a silly example in PyQt:
import sys
from PyQt4 import QtGui
class OutputWindow(QtGui.QPlainTextEdit):
def write(self, txt):
self.appendPlainText(str(txt))
app = QtGui.QApplication(sys.argv)
out = OutputWindow()
sys.stdout=out
out.show()
print "hello world !"
A context manager for python3:
import sys
from io import StringIO
class RedirectedStdout:
def __init__(self):
self._stdout = None
self._string_io = None
def __enter__(self):
self._stdout = sys.stdout
sys.stdout = self._string_io = StringIO()
return self
def __exit__(self, type, value, traceback):
sys.stdout = self._stdout
def __str__(self):
return self._string_io.getvalue()
use like this:
>>> with RedirectedStdout() as out:
>>> print('asdf')
>>> s = str(out)
>>> print('bsdf')
>>> print(s, out)
'asdf\n' 'asdf\nbsdf\n'
Starting with Python 2.6 you can use anything implementing the TextIOBase API from the io module as a replacement.
This solution also enables you to use sys.stdout.buffer.write() in Python 3 to write (already) encoded byte strings to stdout (see stdout in Python 3).
Using StringIO wouldn't work then, because neither sys.stdout.encoding nor sys.stdout.buffer would be available.
A solution using TextIOWrapper:
import sys
from io import TextIOWrapper, BytesIO
# setup the environment
old_stdout = sys.stdout
sys.stdout = TextIOWrapper(BytesIO(), sys.stdout.encoding)
# do something that writes to stdout or stdout.buffer
# get output
sys.stdout.seek(0) # jump to the start
out = sys.stdout.read() # read output
# restore stdout
sys.stdout.close()
sys.stdout = old_stdout
This solution works for Python 2 >= 2.6 and Python 3.
Please note that our new sys.stdout.write() only accepts unicode strings and sys.stdout.buffer.write() only accepts byte strings.
This might not be the case for old code, but is often the case for code that is built to run on Python 2 and 3 without changes, which again often makes use of sys.stdout.buffer.
You can build a slight variation that accepts unicode and byte strings for write():
class StdoutBuffer(TextIOWrapper):
def write(self, string):
try:
return super(StdoutBuffer, self).write(string)
except TypeError:
# redirect encoded byte strings directly to buffer
return super(StdoutBuffer, self).buffer.write(string)
You don't have to set the encoding of the buffer the sys.stdout.encoding, but this helps when using this method for testing/comparing script output.
This method restores sys.stdout even if there's an exception. It also gets any output before the exception.
import io
import sys
real_stdout = sys.stdout
fake_stdout = io.BytesIO() # or perhaps io.StringIO()
try:
sys.stdout = fake_stdout
# do what you have to do to create some output
finally:
sys.stdout = real_stdout
output_string = fake_stdout.getvalue()
fake_stdout.close()
# do what you want with the output_string
Tested in Python 2.7.10 using io.BytesIO()
Tested in Python 3.6.4 using io.StringIO()
Bob, added for a case if you feel anything from the modified / extended code experimentation might get interesting in any sense, otherwise feel free to delete it
Ad informandum ... a few remarks from extended experimentation during finding some viable mechanics to "grab" outputs, directed by numexpr.print_versions() directly to the <stdout> ( upon a need to clean GUI and collecting details into debugging-report )
# THIS WORKS AS HELL: as Bob Stein proposed years ago:
# py2 SURPRISEDaBIT:
#
import io
import sys
#
real_stdout = sys.stdout # PUSH <stdout> ( store to REAL_ )
fake_stdout = io.BytesIO() # .DEF FAKE_
try: # FUSED .TRY:
sys.stdout.flush() # .flush() before
sys.stdout = fake_stdout # .SET <stdout> to use FAKE_
# ----------------------------------------- # + do what you gotta do to create some output
print 123456789 # +
import numexpr # +
QuantFX.numexpr.__version__ # + [3] via fake_stdout re-assignment, as was bufferred + "late" deferred .get_value()-read into print, to finally reach -> real_stdout
QuantFX.numexpr.print_versions() # + [4] via fake_stdout re-assignment, as was bufferred + "late" deferred .get_value()-read into print, to finally reach -> real_stdout
_ = os.system( 'echo os.system() redir-ed' )# + [1] via real_stdout + "late" deferred .get_value()-read into print, to finally reach -> real_stdout, if not ( _ = )-caught from RET-d "byteswritten" / avoided from being injected int fake_stdout
_ = os.write( sys.stderr.fileno(), # + [2] via stderr + "late" deferred .get_value()-read into print, to finally reach -> real_stdout, if not ( _ = )-caught from RET-d "byteswritten" / avoided from being injected int fake_stdout
b'os.write() redir-ed' )# *OTHERWISE, if via fake_stdout, EXC <_io.BytesIO object at 0x02C0BB10> Traceback (most recent call last):
# ----------------------------------------- # ? io.UnsupportedOperation: fileno
#''' ? YET: <_io.BytesIO object at 0x02C0BB10> has a .fileno() method listed
#>>> 'fileno' in dir( sys.stdout ) -> True ? HAS IT ADVERTISED,
#>>> pass; sys.stdout.fileno -> <built-in method fileno of _io.BytesIO object at 0x02C0BB10>
#>>> pass; sys.stdout.fileno()-> Traceback (most recent call last):
# File "<stdin>", line 1, in <module>
# io.UnsupportedOperation: fileno
# ? BUT REFUSES TO USE IT
#'''
finally: # == FINALLY:
sys.stdout.flush() # .flush() before ret'd back REAL_
sys.stdout = real_stdout # .SET <stdout> to use POP'd REAL_
sys.stdout.flush() # .flush() after ret'd back REAL_
out_string = fake_stdout.getvalue() # .GET string from FAKE_
fake_stdout.close() # <FD>.close()
# +++++++++++++++++++++++++++++++++++++ # do what you want with the out_string
#
print "\n{0:}\n{1:}{0:}".format( 60 * "/\\",# "LATE" deferred print the out_string at the very end reached -> real_stdout
out_string #
)
'''
PASS'd:::::
...
os.system() redir-ed
os.write() redir-ed
/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\
123456789
'2.5'
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Numexpr version: 2.5
NumPy version: 1.10.4
Python version: 2.7.13 |Anaconda 4.0.0 (32-bit)| (default, May 11 2017, 14:07:41) [MSC v.1500 32 bit (Intel)]
AMD/Intel CPU? True
VML available? True
VML/MKL version: Intel(R) Math Kernel Library Version 11.3.1 Product Build 20151021 for 32-bit applications
Number of threads used by default: 4 (out of 4 detected cores)
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\
>>>
EXC'd :::::
...
os.system() redir-ed
/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\
123456789
'2.5'
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Numexpr version: 2.5
NumPy version: 1.10.4
Python version: 2.7.13 |Anaconda 4.0.0 (32-bit)| (default, May 11 2017, 14:07:41) [MSC v.1500 32 bit (Intel)]
AMD/Intel CPU? True
VML available? True
VML/MKL version: Intel(R) Math Kernel Library Version 11.3.1 Product Build 20151021 for 32-bit applications
Number of threads used by default: 4 (out of 4 detected cores)
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\
Traceback (most recent call last):
File "<stdin>", line 9, in <module>
io.UnsupportedOperation: fileno
'''
In Python3.6, the StringIO and cStringIO modules are gone, you should use io.StringIO instead.So you should do this like the first answer:
import sys
from io import StringIO
old_stdout = sys.stdout
old_stderr = sys.stderr
my_stdout = sys.stdout = StringIO()
my_stderr = sys.stderr = StringIO()
# blah blah lots of code ...
sys.stdout = self.old_stdout
sys.stderr = self.old_stderr
// if you want to see the value of redirect output, be sure the std output is turn back
print(my_stdout.getvalue())
print(my_stderr.getvalue())
my_stdout.close()
my_stderr.close()
Use pipe() and write to the appropriate file descriptor.
https://docs.python.org/library/os.html#file-descriptor-operations
Here's another take on this. contextlib.redirect_stdout with io.StringIO() as documented is great, but it's still a bit verbose for every day use. Here's how to make it a one-liner by subclassing contextlib.redirect_stdout:
import sys
import io
from contextlib import redirect_stdout
class capture(redirect_stdout):
def __init__(self):
self.f = io.StringIO()
self._new_target = self.f
self._old_targets = [] # verbatim from parent class
def __enter__(self):
self._old_targets.append(getattr(sys, self._stream)) # verbatim from parent class
setattr(sys, self._stream, self._new_target) # verbatim from parent class
return self # instead of self._new_target in the parent class
def __repr__(self):
return self.f.getvalue()
Since __enter__ returns self, you have the context manager object available after the with block exits. Moreover, thanks to the __repr__ method, the string representation of the context manager object is, in fact, stdout. So now you have,
with capture() as message:
print('Hello World!')
print(str(message)=='Hello World!\n') # returns True
Related
I would like to capture error messages which are generated from a python module A. A is written in C++ and SWIG, and so I cannot capture Python's sys.stderr.
>>> import A
>>> A.func() # this prints an error message by C++ std::cerr.
this is an error message from module A
What I want to do are to suppress the error message and to change my script behavior according to the error type. But A.func() does not return error numbers.
Assuming that my use of contextlib below is correct, it did not help.
>>> import io
>>> f = io.StringIO()
>>> import contextlib
>>> with contextlib.redirect_stderr(f):
... no_return = A.func()
ERROR MESSAGE HERE
>>> f.getvalue()
>>>
Thank you #PM2Ring
https://eli.thegreenplace.net/2015/redirecting-all-kinds-of-stdout-in-python/#id1
With minor modifications: replacing all the stdout with stderr and supporting macOS as shown below, it worked perfectly as expected.
if sys.platform == 'darwin':
c_stderr = ctypes.c_void_p.in_dll(libc, '__stderrp')
else:
c_stderr = ctypes.c_void_p.in_dll(libc, 'stderr')
I have a program that outputs many calculations and results to the console through the print statement. I want to write some code to export (or save) all the contents of the console to a simple text file.
I searched StackOverflow and other sites but I found some methods to redirect the print statement to print to a file directly, but I want the program to work normally, to display outputs to the console, then to save its contents AFTER all operations of the program done.
I am using PyCharm with Python2.7 if it matters
Ok, so normally to get it done, you have to rewrite python print built-in function. But... There is ipython, which provides some hooks.
First you need to have ipython installed:
#bash
sudo pip install ipython
(I'm using sudo to simple locate then folder I need to reach, read further)
After ipython installation you'll have ipython extensions folder available, so get to it:
#bash
cd ~/.ipython/extensions/
and create there let's say a file called print_to_file.py, here is its content:
#python
class PrintWatcher(object):
def __init__(self, ip):
self.shell = ip
def post_execute(self):
with open('/home/turkus/shell.txt', 'a+') as f:
in_len = len(self.shell.user_ns['In'])
i = in_len - 1
in_ = self.shell.user_ns['In'][i]
out = self.shell.user_ns['Out'].get(i, '')
# you can edit this line if you want different input in shell.txt
f.write('{}\n{}\n'.format(in_, out))
def load_ipython_extension(ip):
pw = PrintWatcher(ip)
ip.events.register('post_run_cell', pw.post_execute)
After saving a file just run:
#bash
ipython profile create
# you will get something like that:
[ProfileCreate] Generating default config file: u'/home/turkus/.ipython/profile_default/ipython_config.py'
Now get back to setting up our hook. We must open ipython_config.py created under path above and put there some magic (there is a lot of stuff there, so go to the end of file):
# some commented lines here
c = get_config()
c.InteractiveShellApp.extensions = [
'print_to_file'
]
After saving it, you can run ipython and write your code. Every your input will be written in a file under path you provided above, in my case it was:
/home/turkus/shell.txt
Notes
You can avoid loading your extension every time ipython fires up, by just delete 'print_to_file' from c.InteractiveShellApp.extensions list in ipython_config.py. But remember that you can load it anytime you need, just by typing in ipython console:
➜ ~ ipython
Python 2.7.12 (default, Jul 1 2016, 15:12:24)
Type "copyright", "credits" or "license" for more information.
IPython 4.0.0 -- An enhanced Interactive Python.
? -> Introduction and overview of IPython's features.
%quickref -> Quick reference.
help -> Python's own help system.
object? -> Details about 'object', use 'object??' for extra details.
In [1]: %load_ext print_to_file
Any change in print_to_file.py is being reflected in open ipython shell after using %reload_ext print_to_file command, so you don't have to exit from and fire up it again.
I am unsure how you could receive the contents of a console for any editor however this can be achieved quite simply by replacing your print() statements with .write
class Writer(object):
def __init__(self, out_file, overwrite=False):
self.file_name = out_file
self.overwrite = overwrite
self.history = []
def write(self, statement):
self.history.append(statement)
print statement
def close(self):
if self.overwrite:
self.out_file = open(self.file_name, 'wb')
else:
self.out_file = open(self.file_name, 'ab')
for x in self.history:
self.out_file.write(x+'/n')
self.out_file.close()
self.history = []
p = Writer('my_output_file.txt')
p.write('my string to print and save!')
p.close() #close the writer to save the contents to a file before exiting
After I know understood your question I think you search the tee command
python your_program | tee output.txt
This will show you the output both, in the console and in output.txt
PS: Since you did not answer to my comment which OS you use I assumed that you use either Linux or MACOS. Should work on both. I don't know how to do this on windows...
You could override the print function which will still be accessible through the builtins module
import builtins
f = open("logs.txt", "w")
def print(*args, sep=' ', end='\n', **kwargs):
builtins.print(*args, sep=sep, end=end, **kwargs)
f.write(sep.join(*args) + end)
EDIT: A similar solution for Python 2
from __future__ import print_function
class Print:
def __init__(self, print_function, filename='test', mode='w'):
self.print_function = print_function
self.file = open(filename, 'w')
def __call__(self, *args, **kwargs):
self.print_function(*args, **kwargs)
kwargs['file'] = self.file
self.print_function(*args, **kwargs)
print = Print(print, 'logs.txt')
This creates a print function that you use exactly as the function you import from __future__.
To close the file when everything is done you have to run:
print.file.close()
Maybe you should create a variable that will log the outputs and then put it into a file.
For ex:
print statement
logger += statement+"\n" #a new line char so each statement is on a new line
with open('file.txt', 'a') as f:
f.write(statement)
With all thanks and respect to all who contributed to this question. I have finally found a solution to this problem with minimal modifications to my original code. The solution is provided by the member #Status and here is its link .
Although I searched a lot before posting my question, but the answers of the respected members enlightened my mind to a precise search especially the contributions of #turkus, who performs an exceptional work, and #Glostas who opened my eyes to the "tee" which guided me to find the solution I posted (although it does not contain "tee").
The solution, as of the mentioned post with slight modifications:
1- Put the following Class in the program:
class Logger(object):
"""
Lumberjack class - duplicates sys.stdout to a log file and it's okay
source: https://stackoverflow.com/a/24583265/5820024
"""
def __init__(self, filename="Red.Wood", mode="a", buff=0):
self.stdout = sys.stdout
self.file = open(filename, mode, buff)
sys.stdout = self
def __del__(self):
self.close()
def __enter__(self):
pass
def __exit__(self, *args):
pass
def write(self, message):
self.stdout.write(message)
self.file.write(message)
def flush(self):
self.stdout.flush()
self.file.flush()
os.fsync(self.file.fileno())
def close(self):
if self.stdout != None:
sys.stdout = self.stdout
self.stdout = None
if self.file != None:
self.file.close()
self.file = None
2- At the beginning of the program, before any print statements, put this line:
my_console = Logger('my_console_file.txt') # you can change the file's name
3- At the end of the program, after all of the print statements, put this line:
my_console.close()
I tested this, and It works perfectly, and finally I have a clone of the console's output after the program ends.
With best regards to everybody, and Many thanks to all contributors.
There is a very obvious but not very elegant solution.
instead of:
print statement 1
calculation
print statement 2
you can make something like
sexport =''
calculation
print statement 1
sexport += statement1 + "\n"
calculaztion
print statement 2
sexport += statement 2
finally just save sexport to a file
Eli Bendersky has explained thoroughly how to "Redirecting all kinds of stdout in Python", and specifically Redirecting C-level streams, e.g. stdout of a shared library (dll). However, the example is in Linux and does not work in windows, mainly due to the following lines:
libc = ctypes.CDLL(None)
c_stdout = ctypes.c_void_p.in_dll(libc = ctypes.CDLL(None), 'stdout')
How can we make it work in Windows?
I found the answer buried in Drekin's code. Based on that, I made a small change to Eli Bendersky's example:
Update: This code has been tested on Python 3.4 64-bit on Windows and Python 3.5 64-bit on Linux. For Python 3.5 on Windows, please see eryksun's comment.
from contextlib import contextmanager
import ctypes
import io
import os
import sys
import tempfile
import ctypes.util
from ctypes import *
import platform
if platform.system() == "Linux":
libc = ctypes.CDLL(None)
c_stdout = ctypes.c_void_p.in_dll(libc, 'stdout')
if platform.system() == "Windows":
class FILE(ctypes.Structure):
_fields_ = [
("_ptr", c_char_p),
("_cnt", c_int),
("_base", c_char_p),
("_flag", c_int),
("_file", c_int),
("_charbuf", c_int),
("_bufsize", c_int),
("_tmpfname", c_char_p),
]
# Gives you the name of the library that you should really use (and then load through ctypes.CDLL
msvcrt = CDLL(ctypes.util.find_msvcrt())
libc = msvcrt # libc was used in the original example in _redirect_stdout()
iob_func = msvcrt.__iob_func
iob_func.restype = POINTER(FILE)
iob_func.argtypes = []
array = iob_func()
s_stdin = addressof(array[0])
c_stdout = addressof(array[1])
#contextmanager
def stdout_redirector(stream):
# The original fd stdout points to. Usually 1 on POSIX systems.
original_stdout_fd = sys.stdout.fileno()
def _redirect_stdout(to_fd):
"""Redirect stdout to the given file descriptor."""
# Flush the C-level buffer stdout
libc.fflush(c_stdout)
# Flush and close sys.stdout - also closes the file descriptor (fd)
sys.stdout.close()
# Make original_stdout_fd point to the same file as to_fd
os.dup2(to_fd, original_stdout_fd)
# Create a new sys.stdout that points to the redirected fd
sys.stdout = io.TextIOWrapper(os.fdopen(original_stdout_fd, 'wb'))
# Save a copy of the original stdout fd in saved_stdout_fd
saved_stdout_fd = os.dup(original_stdout_fd)
try:
# Create a temporary file and redirect stdout to it
tfile = tempfile.TemporaryFile(mode='w+b')
_redirect_stdout(tfile.fileno())
# Yield to caller, then redirect stdout back to the saved fd
yield
_redirect_stdout(saved_stdout_fd)
# Copy contents of temporary file to the given stream
tfile.flush()
tfile.seek(0, io.SEEK_SET)
stream.write(tfile.read())
finally:
tfile.close()
os.close(saved_stdout_fd)
if __name__ == '__main__':
f = io.BytesIO()
print('...')
with stdout_redirector(f):
print('foobar')
print(12)
libc.puts(b'this comes from C')
os.system('echo and this is from echo')
print('Got stdout:"\n{0}\n"'.format(f.getvalue().decode('utf-8')))
print('Resuming normal operation...')
Roughly speaking, I want to port this to pure Python:
#!/bin/bash
{
python test.py
} &> /tmp/test.log
This didn't work for some unknown reasons:
import os.path, sys
import tempfile
with open(os.path.join(tempfile.gettempdir(), "test.log"), "a") as fp:
sys.stdout = sys.stderr = fp
raise Exception("I'm dying")
The resulting test.log was empty (and I didn't see anything on my console,) when I tested it with Python 2.6.6, Python 2.7.8 and Python 3.4.2 on CentOS x86_64.
But Ideally I'd like a solution for Python 2.6.
(For now, it's tolerable to clutter the log file with intermixed output from stdout and stderr or multithreading, as long as any data won't simply disappear into a blackhole.)
Show me a concise and portable solution which is confirmed to work with an exception stack trace on sys.stderr. (Preferably something other than os.dup2)
Remember that file objects are closed after with blocks :)
Use simply this:
sys.stdout = sys.stderr = open("test.log","w")
raise Exception("Dead")
Content of test.log after exit:
Traceback (most recent call last):
File "test.py", line 5, in <module>
raise Exception("Dead")
Exception: Dead
You can use a method like this one:
import traceback
import sys
from contextlib import contextmanager
#contextmanager
def output_to_file(filepath, write_mode='w'):
stdout_orig = None
stderr_orig = None
stdout_orig = sys.stdout
stderr_orig = sys.stderr
f = open(filepath, write_mode)
sys.stdout = f
sys.stderr = f
try:
yield
except:
info = sys.exc_info()
f.write('\n'.join(traceback.format_exception(*info)))
f.close()
sys.stdout = stdout_orig
sys.stderr = stderr_orig
And the the usage is:
with output_to_file('test.log'):
print('hello')
raise Exception('I am dying')
And the cat test.log produces:
hello
Traceback (most recent call last):
File "<ipython-input-3-a3b702c7b741>", line 20, in outputi_to_file
yield
File "<ipython-input-4-f879d82580b2>", line 3, in <module>
raise Exception('I am dying')
Exception: I am dying
This works for me:
#!/usr/bin/env python
from __future__ import print_function
import os, os.path, sys, tempfile
old_out = os.dup(sys.stdout.fileno())
old_err = os.dup(sys.stderr.fileno())
with open(os.path.join(tempfile.gettempdir(), "test.log"), "a") as fp:
fd = fp.fileno()
os.dup2(fd, sys.stdout.fileno())
os.dup2(fd, sys.stderr.fileno())
print("Testing")
print('testing errs', file=sys.stderr)
raise Exception("I'm dying")
The future is just for cleaner handling of Python2 or Python3 with the same example. (I've also changed the raise statement to instantiate an exception, strings as exceptions have been deprecated for a long time and they're not properly supported under Python3).
The old_* values are just if we wanted to restore our original stdout and/or stderr after using our redirected file.
I'm getting an error in a program that is supposed to run for a long time that too many files are open. Is there any way I can keep track of which files are open so I can print that list out occasionally and see where the problem is?
To list all open files in a cross-platform manner, I would recommend psutil.
#!/usr/bin/env python
import psutil
for proc in psutil.process_iter():
print(proc.open_files())
The original question implicitly restricts the operation to the currently running process, which can be accessed through psutil's Process class.
proc = psutil.Process()
print(proc.open_files())
Lastly, you'll want to run the code using an account with the appropriate permissions to access this information or you may see AccessDenied errors.
I ended up wrapping the built-in file object at the entry point of my program. I found out that I wasn't closing my loggers.
import io
import sys
import builtins
import traceback
from functools import wraps
def opener(old_open):
#wraps(old_open)
def tracking_open(*args, **kw):
file = old_open(*args, **kw)
old_close = file.close
#wraps(old_close)
def close():
old_close()
open_files.remove(file)
file.close = close
file.stack = traceback.extract_stack()
open_files.add(file)
return file
return tracking_open
def print_open_files():
print(f'### {len(open_files)} OPEN FILES: [{", ".join(f.name for f in open_files)}]', file=sys.stderr)
for file in open_files:
print(f'Open file {file.name}:\n{"".join(traceback.format_list(file.stack))}', file=sys.stderr)
open_files = set()
io.open = opener(io.open)
builtins.open = opener(builtins.open)
On Linux, you can look at the contents of /proc/self/fd:
$ ls -l /proc/self/fd/
total 0
lrwx------ 1 foo users 64 Jan 7 15:15 0 -> /dev/pts/3
lrwx------ 1 foo users 64 Jan 7 15:15 1 -> /dev/pts/3
lrwx------ 1 foo users 64 Jan 7 15:15 2 -> /dev/pts/3
lr-x------ 1 foo users 64 Jan 7 15:15 3 -> /proc/9527/fd
Although the solutions above that wrap opens are useful for one's own code, I was debugging my client to a third party library including some c extension code, so I needed a more direct way. The following routine works under darwin, and (I hope) other unix-like environments:
def get_open_fds():
'''
return the number of open file descriptors for current process
.. warning: will only work on UNIX-like os-es.
'''
import subprocess
import os
pid = os.getpid()
procs = subprocess.check_output(
[ "lsof", '-w', '-Ff', "-p", str( pid ) ] )
nprocs = len(
filter(
lambda s: s and s[ 0 ] == 'f' and s[1: ].isdigit(),
procs.split( '\n' ) )
)
return nprocs
If anyone can extend to be portable to windows, I'd be grateful.
On Linux, you can use lsof to show all files opened by a process.
As said earlier, you can list fds on Linux in /proc/self/fd, here is a simple method to list them programmatically:
import os
import sys
import errno
def list_fds():
"""List process currently open FDs and their target """
if not sys.platform.startswith('linux'):
raise NotImplementedError('Unsupported platform: %s' % sys.platform)
ret = {}
base = '/proc/self/fd'
for num in os.listdir(base):
path = None
try:
path = os.readlink(os.path.join(base, num))
except OSError as err:
# Last FD is always the "listdir" one (which may be closed)
if err.errno != errno.ENOENT:
raise
ret[int(num)] = path
return ret
On Windows, you can use Process Explorer to show all file handles owned by a process.
There are some limitations to the accepted response, in that it does not seem to count pipes. I had a python script that opened many sub-processes, and was failing to properly close standard input, output, and error pipes, which were used for communication. If I use the accepted response, it will fail to count these open pipes as open files, but (at least in Linux) they are open files and count toward the open file limit. The lsof -p solution suggested by sumid and shunc works in this situation, because it also shows you the open pipes.
Get a list of all open files. handle.exe is part of Microsoft's Sysinternals Suite. An alternative is the psutil Python module, but I find 'handle' will print out more files in use.
Here is what I made. Kludgy code warning.
#!/bin/python3
# coding: utf-8
"""Build set of files that are in-use by processes.
Requires 'handle.exe' from Microsoft SysInternals Suite.
This seems to give a more complete list than using the psutil module.
"""
from collections import OrderedDict
import os
import re
import subprocess
# Path to handle executable
handle = "E:/Installers and ZIPs/Utility/Sysinternalssuite/handle.exe"
# Get output string from 'handle'
handle_str = subprocess.check_output([handle]).decode(encoding='ASCII')
""" Build list of lists.
1. Split string output, using '-' * 78 as section breaks.
2. Ignore first section, because it is executable version info.
3. Turn list of strings into a list of lists, ignoring first item (it's empty).
"""
work_list = [x.splitlines()[1:] for x in handle_str.split(sep='-' * 78)[1:]]
""" Build OrderedDict of pid information.
pid_dict['pid_num'] = ['pid_name','open_file_1','open_file_2', ...]
"""
pid_dict = OrderedDict()
re1 = re.compile("(.*?\.exe) pid: ([0-9]+)") # pid name, pid number
re2 = re.compile(".*File.*\s\s\s(.*)") # File name
for x_list in work_list:
key = ''
file_values = []
m1 = re1.match(x_list[0])
if m1:
key = m1.group(2)
# file_values.append(m1.group(1)) # pid name first item in list
for y_strings in x_list:
m2 = re2.match(y_strings)
if m2:
file_values.append(m2.group(1))
pid_dict[key] = file_values
# Make a set of all the open files
values = []
for v in pid_dict.values():
values.extend(v)
files_open = sorted(set(values))
txt_file = os.path.join(os.getenv('TEMP'), 'lsof_handle_files')
with open(txt_file, 'w') as fd:
for a in sorted(files_open):
fd.write(a + '\n')
subprocess.call(['notepad', txt_file])
os.remove(txt_file)
I'd guess that you are leaking file descriptors. You probably want to look through your code to make sure that you are closing all of the files that you open.
You can use the following script. It builds on Claudiu's answer. It addresses some of the issues and adds additional features:
Prints a stack trace of where the file was opened
Prints on program exit
Keyword argument support
Here's the code and a link to the gist, which is possibly more up to date.
"""
Collect stacktraces of where files are opened, and prints them out before the
program exits.
Example
========
monitor.py
----------
from filemonitor import FileMonitor
FileMonitor().patch()
f = open('/bin/ls')
# end of monitor.py
$ python monitor.py
----------------------------------------------------------------------------
path = /bin/ls
> File "monitor.py", line 3, in <module>
> f = open('/bin/ls')
----------------------------------------------------------------------------
Solution modified from:
https://stackoverflow.com/questions/2023608/check-what-files-are-open-in-python
"""
from __future__ import print_function
import __builtin__
import traceback
import atexit
import textwrap
class FileMonitor(object):
def __init__(self, print_only_open=True):
self.openfiles = []
self.oldfile = __builtin__.file
self.oldopen = __builtin__.open
self.do_print_only_open = print_only_open
self.in_use = False
class File(self.oldfile):
def __init__(this, *args, **kwargs):
path = args[0]
self.oldfile.__init__(this, *args, **kwargs)
if self.in_use:
return
self.in_use = True
self.openfiles.append((this, path, this._stack_trace()))
self.in_use = False
def close(this):
self.oldfile.close(this)
def _stack_trace(this):
try:
raise RuntimeError()
except RuntimeError as e:
stack = traceback.extract_stack()[:-2]
return traceback.format_list(stack)
self.File = File
def patch(self):
__builtin__.file = self.File
__builtin__.open = self.File
atexit.register(self.exit_handler)
def unpatch(self):
__builtin__.file = self.oldfile
__builtin__.open = self.oldopen
def exit_handler(self):
indent = ' > '
terminal_width = 80
for file, path, trace in self.openfiles:
if file.closed and self.do_print_only_open:
continue
print("-" * terminal_width)
print(" {} = {}".format('path', path))
lines = ''.join(trace).splitlines()
_updated_lines = []
for l in lines:
ul = textwrap.fill(l,
initial_indent=indent,
subsequent_indent=indent,
width=terminal_width)
_updated_lines.append(ul)
lines = _updated_lines
print('\n'.join(lines))
print("-" * terminal_width)
print()