python matplotlib animation save error when using multiprocessing - python

I'm creating a matplotlib animation that runs through a sequence of images from file. The files that I'm visualizing are typically quite large and there is a significant load time (~5sec) for each stack of images. I've managed to get the animation to run smoothly by staggering the loading processes with multiprocessing, but I'm having trouble saving the animation as a video file.
Here's the code:
from matplotlib import animation
import pylab as plt
import numpy as np
import multiprocessing as mp
import logging
logger = mp.log_to_stderr(logging.INFO)
import time
def qloader(queue, threshold=100, nfiles=3):
'''trigger a load process if number of items in queue drops below threshold'''
while nfiles:
if queue.qsize() < threshold:
logger.info( 'qsize {}'.format(queue.qsize()) )
time.sleep( 1 ) #pretend to load data
data = np.random.rand(25,100,100)
logger.info( 'Adding data to queue' )
for d in data:
queue.put(d)
logger.info( 'Done adding data!' )
nfiles -= 1
else:
queue.put( None ) #sentinal
def update(frame, im, queue):
'''update the image'''
logger.info( 'Updating frame %d'%frame )
data = queue.get()
if data is None:
print( 'Queue is empty!' )
return
im.set_data( data )
return im
#create data queue
mgr = mp.Manager()
queue = mgr.Queue()
threshold = 20 #
#start load process
p = mp.Process( name='loader', target=qloader, args=(queue, threshold) )
p.start()
#start animation
fig, ax = plt.subplots()
im = ax.imshow( np.random.rand(100,100) )
ani = animation.FuncAnimation( fig, update, frames=75, interval=100, repeat=0, fargs=(im, queue) )
ani.save('foo.mp4', 'ffmpeg')
The code runs without errors, but the file it produces is somehow corrupted. When I try view it with vlc I get a long repeating error stream...
$ vlc foo.mp4
VLC media player 2.0.8 Twoflower (revision 2.0.8a-0-g68cf50b)
[0xf69108] main libvlc: Running vlc with the default interface. Use 'cvlc' to use vlc without interface.
[0x7f37fcc01ac8] mp4 demux error: cannot find any /moov/trak
[0x7f37fcc01ac8] es demux error: cannot peek
...
[0x7f37fcc01ac8] ps demux error: cannot peek
[0x7f37fcc01ac8] mpgv demux error: cannot peek
[0x7f37fcc01ac8] mjpeg demux error: cannot peek
[0x7f37fcc01ac8] ps demux error: cannot peek
[0x7f3824000b78] main input error: no suitable demux module for `file/://.../foo.mp4'
...
I've tried saving in various file formats, using various writers and encoders, with much the same result.
This problem only occurs when using multiprocessing to load the data. If I just create the data with data = np.random.rand(75,100,100), the animation saves without problems.
Question: How do I get matplotlib.animation to play along with multiprocessing?

By default animation.MovieWriter uses a subprocess.PIPE to feed the frames to the writer. This does not seem to work when using multiprocessing for some reason. Changing the last line to
ani.save('foo.mp4', 'ffmpeg_file')
tells the writer to temporarily save the frames to disc before composing the movie, which side-steps the problem.

Related

Sensor data with pythin does not get written to File

I'm currently working on a script for my sensor on my Raspberry Pi. The code underneath should get the values of my sensor and write it into a the data.json file. My problem is, if I run the scipt with my the Thonny editor everything works but if I add the script to my crontab menu the data does not get written to the data.json file.
The Code:
import time
import board
import adafruit_dht
import psutil
import io
import json
import os
from gpiozero import LED
from datetime import date
from datetime import datetime
# We first check if a libgpiod process is running. If yes, we kill it!
for proc in psutil.process_iter():
if proc.name() == "libgpiod_pulsein" or proc.name() == "libgpiod_pulsei":
proc.kill()
sensor = adafruit_dht.DHT11(board.D23)
# init
temp_values = [10]
hum_values = [10]
counter = 0
dataLED = LED(13)
dataList = []
def errSignal():
for i in range(0,3):
dataLED.on()
time.sleep(0.1)
dataLED.off()
time.sleep(0.1)
#on startup
def runSignal():
for i in range(0,5):
dataLED.on()
time.sleep(0.2)
dataLED.off()
time.sleep(0.2)
def getExistingData():
with open('data.json') as fp:
dataList = json.load(fp)
print(dataList)
def startupCheck():
if os.path.isfile("data.json") and os.access("data.json", os.R_OK):
# checks if file exists
print("File exists and is readable.")
# get json data an push into arr on startup
getExistingData()
else:
print("Either file is missing or is not readable, creating file...")
# create json file
with open("data.json", "w") as f:
print("The json file is created.")#
def calc_avgValue(values):
sum = 0
for iterator in values:
sum += iterator
return sum / len(values)
def onOFF():
dataLED.on()
time.sleep(0.7)
dataLED.off()
# data led blinking on startup
runSignal()
# checks if file exists
startupCheck()
while True:
try:
temp_values.insert(counter, sensor.temperature)
hum_values.insert(counter, sensor.humidity)
counter += 1
time.sleep(6)
if counter >= 10:
print(
"Temperature: {}*C Humidity: {}% ".format(
round(calc_avgValue(temp_values), 2),
round(calc_avgValue(hum_values), 2)
)
)
# get time
today = date.today()
now = datetime.now()
# create json obj
data = {
"temperature": round(calc_avgValue(temp_values), 2),
"humidity": round(calc_avgValue(hum_values), 2),
"fullDate": str(today),
"fullDate2": str(today.strftime("%d/%m/%Y")),
"fullDate3": str(today.strftime("%B %d, %Y")),
"fullDate4": str(today.strftime("%b-%d-%Y")),
"date_time": str(now.strftime("%d/%m/%Y %H:%M:%S"))
}
# push data into list
dataList.append(data)
# writing to data.json
with open("data.json", "w") as f:
json.dump(dataList, f, indent=4, separators=(',',': '))
# if data is written signal appears
onOFF()
print("Data has been written to data.json...")
counter = 0
except RuntimeError as error:
continue
except Exception as error:
sensor.exit()
while True:
errSignal()
raise error
time.sleep(0.2)
Crontab Menu:
The line in the center is the script.
Investigation areas:
Do not put & in crontab, it serves no purpose.
You should capture the output of your scripts to see what is going on. You do this by adding >/tmp/stats.out 2>/tmp/stats.err (and similar for the other 2 lines). You will see what output and errors your scripts encounter.
cron does not run your scripts in the same environment, and from the same directory you are running them. Load what you require in the script.
cron might not have permissions to write into data.yml in the directory it is running from. Specify a full path, and ensure cron can write in that directory.
Look at https://unix.stackexchange.com/questions/109804/crontabs-reboot-only-works-for-root for usage of #reboot. Things that should occur at startup should be configured through systemd or init.d (I do not know what Rasperry Pie uses vs distro). Cron is to schedule jobs, not run things at startup.
It could be as simple as not having python3 in the PATH configured in cron.

python: monitor updates in /proc/mydev file

I wrote a kernel module that writes in /proc/mydev to notify the python program in userspace. I want to trigger a function in the python program whenever there is an update of data in /proc/mydev from the kernel module. What is the best way to listen for an update here? I am thinking about using "watchdog" (https://pythonhosted.org/watchdog/). Is there a better way for this?
This is an easy and efficient way:
import os
from time import sleep
from datetime import datetime
def myfuction(_time):
print("file modified, time: "+datetime.fromtimestamp(_time).strftime("%H:%M:%S"))
if __name__ == "__main__":
_time = 0
while True:
last_modified_time = os.stat("/proc/mydev").st_mtime
if last_modified_time > _time:
myfuction(last_modified_time)
_time = last_modified_time
sleep(1) # prevent high cpu usage
result:
file modified, time: 11:44:09
file modified, time: 11:46:15
file modified, time: 11:46:24
The while loop guarantees that the program keeps listening to changes forever.
You can set the interval by changing the sleep time. Low sleep time causes high CPU usage.
import time
import os
# get the file descriptor for the proc file
fd = os.open("/proc/mydev", os.O_RDONLY)
# create a polling object to monitor the file for updates
poller = select.poll()
poller.register(fd, select.POLLIN)
# create a loop to monitor the file for updates
while True:
events = poller.poll(10000)
if len(events) > 0:
# read the contents of the file if updated
print(os.read(fd, 1024))
sudo pip install inotify
Example
Code for monitoring a simple, flat path (see “Recursive Watching” for watching a hierarchical structure):
import inotify.adapters
def _main():
i = inotify.adapters.Inotify()
i.add_watch('/tmp')
with open('/tmp/test_file', 'w'):
pass
for event in i.event_gen(yield_nones=False):
(_, type_names, path, filename) = event
print("PATH=[{}] FILENAME=[{}] EVENT_TYPES={}".format(
path, filename, type_names))
if __name__ == '__main__':
_main()
Expected output:
PATH=[/tmp] FILENAME=[test_file] EVENT_TYPES=['IN_MODIFY']
PATH=[/tmp] FILENAME=[test_file] EVENT_TYPES=['IN_OPEN']
PATH=[/tmp] FILENAME=[test_file] EVENT_TYPES=['IN_CLOSE_WRITE']
I'm not sure if this would work for your situation, since it seems that you're wanting to watch a folder, but this program watches a file at a time until the main() loop repeats:
import os
import time
def main():
contents = os.listdir("/proc/mydev")
for file in contents:
f = open("/proc/mydev/" + file, "r")
init = f.read()
f.close()
while different = false:
f = open("/proc/mydev/" + file, "r")
check = f.read()
f.close()
if init !== check:
different = true
else:
different = false
time.sleep(1)
main()
# Write what you would want to happen if a change occured here...
main()
main()
main()
You could then write what you would want to happen right before the last usage of main(), as it would then repeat.
Also, this may contain errors, since I rushed this.
Hope this at least helps!
You can't do this efficiently without modifying your kernel driver.
Instead of using procfs, have it register a new character device under /dev, and write that driver to make new content available to read from that device only when new content has in fact come in from the underlying hardware, such that the application layer can issue a blocking read and have it return only when new content exists.
A good example to work from (which also has plenty of native Python clients) is the evdev devices in the input core.

Concurrent.futures Problems

So I have this code that needs to use Concurrent.futures module and for some reason it is telling me it does not exist. I have looked it up and I can not find what the problem is. I have tried installing the tools I need from it thinking that was the case but I can only get one of them to download.
Error message:
from concurrent.futures import ProcessPoolExecutor
ModuleNotFoundError: No module named 'concurrent.futures';
'concurrent' is not a package
my code:
import requests, time
from concurrent.futures import ProcessPoolExecutor
sites = ["http://www.youtube.com"]
def get_one(site):
resp = requests.get(site)
size = len(resp.content)
print(f"download {site} bytes from {site}")
return size
def main():
total_size = 0
start = time.perf_counter()
with ProcessPoolExecutor as exec:
total_size = sum(exec.map(get_one, sites))
end = time.perf_counter()
for site in sites:
total_size += size
#print(f"downlded {size} bytes from {site}")
#end = time.perf_counter()
print(f"elapsed time: {end - start} seconds")
print (f"downloaded a totla of {total_size} bytes")
if __name__== "__main__":
main()
I know that normally there should be a file when I say "from" but everything I look up says concurrent.futures is a part of python, but for some reason mine will not work properly. If it is out there do I have to install it?
I found that I had a file named concurrent.py in my folder that was messing everything up!

Is it possible to detect corrupt Python dictionaries

I have a data file saved using the shelve module in python 2.7 which is somehow corrupt. I can load it with db = shelve.open('file.db') but when I call len(db) or even bool(db) it hangs, and I have to kill the process.
However, I am able to loop through the entire thing and create a new non-corrupt file:
db = shelve.open('orig.db')
db2 = shelve.open('copy.db')
for k, v in db.items():
db2[k] = v
db2.close() # copy.db will now be a fully working copy
The question is, how can I test the dict and avoid the hang?
BTW, I still have the original file, and it exhibits the same behaviour when copied to other machines, in case someone also wants to help me get to the bottom of what's actually wrong with the file in the first place!
I'm unaware of any inspection methods other than dbm.whichdb(). For debugging a possible pickle protocol mismatch in a manner that allows you to timeout long running tests maybe try:
import shelve
import pickle
import dbm
import multiprocessing
import time
import psutil
def protocol_check():
print('orig.db is', dbm.whichdb('orig.db'))
print('copy.db is', dbm.whichdb('copy.db'))
for p in range(pickle.HIGHEST_PROTOCOL + 1):
print('trying protocol', p)
db = shelve.open('orig.db', protocol=p)
db2 = shelve.open('copy.db')
try:
for k, v in db.items():
db2[k] = v
finally:
db2.close()
db.close()
print('great success on', p)
def terminate(grace_period=2):
procs = psutil.Process().children()
for p in procs:
p.terminate()
gone, still_alive = psutil.wait_procs(procs, timeout=grace_period)
for p in still_alive:
p.kill()
process = multiprocessing.Process(target=protocol_check)
process.start()
time.sleep(10)
terminate()

Why doesn't pickle.dump write the file?

I currently have a project that runs for several days. As errors might occur in some steps of execution, I pickle critical steps to be able to restart the script at the correct step (so I don't have to do the work of eventually over 24h of execution again).
One thing I store with pickle is a list steps. This list contains every step that successfully finished. It is used to skip steps when I start the script again.
The problem is that pickle seems not to update this after I switch the module.
Code
mainscript.py
import subscript
def set_status(mysql_script_instance_id, status, state=None):
# [...]
# update status in database (works as expected)
# [...]
if state is not None:
with open("state.pickle", "wb") as f:
pickle.dump(state, f)
logging.debug("Dumped pickle. steps_done: %s" % state['steps_done'])
logging.info(status)
subscript.py
import mainscript
[...]
logging.info("%s finished." % (step.__name__))
self.state['steps_done'].append(step.__name__)
[...]
logging.debug("self.state['steps_done'] = %s" % self.state['steps_done'])
mainscript.set_status(self.mysql_script_instance_id, "step xy done", self.state)
pickleviewer
#!/usr/bin/env python
import pickle
import pprint
state = pickle.load(open("state.pickle", "rb"))
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(state)
What I've tried
I get all log messages I've expected:
2014-04-03 08:23:07,727 INFO: step1 finished.
2014-04-03 08:23:07,728 DEBUG: self.state['steps_done'] = ['Fetch recordings', 'preparation', 'step1']
2014-04-03 08:23:07,927 DEBUG: Dumped pickle. steps_done: ['Fetch recordings', 'preparation', 'step1']
but when I look at the pickle file, I get:
{ [...]
'steps_done': ['Fetch recordings', 'preparation'],
[...]}
What could be the error? What can I do to find the error?
(If open would not work, I would get an exception, right?)
Use absolute file paths to open your pickle files; by using relative paths you now are writing a pickle file in the current working directory.
You can write in the same location as the script by basing the path of the __file__ global:
import os
here = os.path.dirname(os.path.abspath(__file__))
then use
with open(os.path.join(here, "state.pickle"), "wb") as f:
to create an absolute path to a pickle file in the same directory.

Categories