Memory management in Django - python

I'm doing some analysis on my Django database. I do many queries in a loop and some of these queries may return big results.
So, after a while the whole 8 GB of RAM on my EC2 instance is eaten and I cannot even ssh to the machine any longer.
I have to reboot the instance then start over again.
I tried the solution mentioned here:
https://baxeico.wordpress.com/2014/09/30/optimize-django-memory-usage/
But the queryset_iterator method seems not to work with aggregated queries.
I'm pretty sure that any single query cannot consume all 8 GB of RAM. So, this means that the old results are not deleted from memory.
How do I force a query out of the memory before the end of its loop iteration and before executing the next query?
Here is my code:
def get_users_event_distribution(monthYear, event_type=None):
title = event_type if (event_type) else 'All'
filename = 'charts/%s_%s_event_dist.png'%(monthYear, title)
filename = filename.replace(' ', '')
if os.path.isfile(filename):
print 'Chart already in file %s'%(filename)
else:
users = None
if event_type:
users = EVENT.objects.filter(time__month=monthYear.month, time__year=monthYear.year, event_type=event_type).values_list('user').annotate(count=Count('id'))
else:
users = EVENT.objects.filter(time__month=monthYear.month, time__year=monthYear.year).values_list('user').annotate(count=Count('id'))
uc = users.count()
print 'We have %d users'%(uc)
print 'Building Count Dictionary'
count_dict = dict()
for u in users:
try:
count_dict[u[1]] += 1
except:
count_dict[u[1]] = 1
count += 1
print 'Built the count dictionary with %d keys'%(len(count_dict.keys()))
fig, ax = plt.subplots(figsize=(20, 20))
bars = plt.bar(range(len(count_dict)), count_dict.values(),
align='edge')
locs, labels = plt.xticks(range(len(count_dict)), count_dict.keys())
ax.set_ylabel('# Users')
ax.set_xlabel('# %s Events' % (title))
ax.set_title('%s Event Distribution'%(title))
ax.relim()
# update ax.viewLim using the new dataLim
ax.autoscale_view()
def autolabel(rects):
"""
Attach a text label above each bar displaying its height
"""
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height,
'%d' % int(height),
ha='center', va='bottom')
autolabel(bars)
plt.savefig(filename, bbox_inches='tight', dpi=100)
print 'saved the distribution chart to %s'%(filename)
def get_users_all_event_distribution(monthYear):
get_users_event_distribution(monthYear)
for event_type in [event_type[0] for event_type in EVENT_TYPE]:
get_users_event_distribution(monthYear, transaction_type)
I run get_users_all_event_distribution for different dates in a loop.

With more analysis, I found out that the problem was in matplot figures as stated here in this warning:
/usr/local/lib64/python2.7/site-packages/matplotlib/pyplot.py:524:
RuntimeWarning: More than 20 figures have been opened. Figures created
through the pyplot interface (matplotlib.pyplot.figure) are retained
until explicitly closed and may consume too much memory. (To control
this warning, see the rcParam figure.max_open_warning).
max_open_warning, RuntimeWarning)
I add the plt.close('all') line.

Related

Multiple route mapping to different matplotlib graphs in flask app

I have this "flask app" with two links, each mapping to different matplotlib visualizations, for example: localhost:5000/line_chart and localhost:5000/bar_chart.
When I start the server, and click the a route (any of them), I see what I expect.
localhost:5000/bar_chart
When I go back and view the other link, both graphs break.
localhost:5000/line_chart
localhost:5000/bar_chart
I can reproduce this every time by closing the server then running the "run.py" script again. Seems to be an overwriting conflict with the in-memory buffer. Has anyone had this issue before?
app/views.py
import matplotlib
matplotlib.use('Agg') # this allows PNG plotting
import matplotlib.pyplot as plt
import base64
from flask import render_template
from app import app
from io import BytesIO
#app.route('/')
#app.route('/index')
def index():
res = ''
navigation = [['Line Chart','line_chart'],['Bar Chart','bar_chart']]
res = res + '<h1>Matplotlib Chart Examples</h1>'
res = res + '<ul>'
for item in navigation:
name = item[0]
link = item[1]
res = res + '<li>'+ name +'</li>'
res = res +'</ul>'
return res
#app.route('/bar_chart')
def bar_chart():
movies = ["Annie Hall", "Ben-Hur", "Casablanca", "Gandhi", "West Side Story"]
num_oscars = [5, 11, 3, 8, 10]
# bars are by default width 0.8, so we'll add 0.1 to the left coordinates
# so that each bar is centered
xs = [i + 0.1 for i, _ in enumerate(movies)]
# plot bars with left x-coordinates [xs], heights [num_oscars]
plt.bar(xs, num_oscars)
plt.ylabel("# of Academy Awards")
plt.title("My Favorite Movies")
# label x-axis with movie names at bar centers
plt.xticks([i + 0.5 for i, _ in enumerate(movies)], movies)
return compute(plt)
#app.route('/line_chart')
def line_chart():
years = [1950, 1960, 1970, 1980, 1990, 2000, 2010]
gdp = [300.2, 543.3, 1075.9, 2862.5, 5979.6, 10289.7, 14958.3]
# create a line chart, years on x-axis, gdp on y-axis
plt.plot(years, gdp, color='green', marker='o', linestyle='solid')
# add a title
plt.title("Nominal GDP")
# add a label to the y-axis
plt.ylabel("Billions of $")
return compute(plt)
def compute(plt):
# run plt.plot, plt.title, etc.
figfile = BytesIO()
plt.savefig(figfile, format='png')
figfile.seek(0) # rewind to beginning of file
#figfile.getvalue() extracts string (stream of bytes)
figdata_png = base64.b64encode(figfile.getvalue())
return render_template('index.html',
title='matplotlib chart',
results=figdata_png)
Thank you for your time.
I guess you need two figures, test this code and tell what happened:
#app.route('/bar_chart')
def bar_chart():
movies = ["Annie Hall", "Ben-Hur", "Casablanca", "Gandhi", "West Side Story"]
num_oscars = [5, 11, 3, 8, 10]
# bars are by default width 0.8, so we'll add 0.1 to the left coordinates
# so that each bar is centered
xs = [i + 0.1 for i, _ in enumerate(movies)]
# plot bars with left x-coordinates [xs], heights [num_oscars]
plt.figure(1)
plt.bar(xs, num_oscars)
plt.ylabel("# of Academy Awards")
plt.title("My Favorite Movies")
# label x-axis with movie names at bar centers
plt.xticks([i + 0.5 for i, _ in enumerate(movies)], movies)
return compute(plt, 1)
#app.route('/line_chart')
def line_chart():
years = [1950, 1960, 1970, 1980, 1990, 2000, 2010]
gdp = [300.2, 543.3, 1075.9, 2862.5, 5979.6, 10289.7, 14958.3]
# create a line chart, years on x-axis, gdp on y-axis
plt.figure(2)
plt.plot(years, gdp, color='green', marker='o', linestyle='solid')
# add a title
plt.title("Nominal GDP")
# add a label to the y-axis
plt.ylabel("Billions of $")
return compute(plt,2)
def compute(plt, fignum):
# run plt.plot, plt.title, etc.
plt.figure(fignum)
figfile = BytesIO()
plt.savefig(figfile, format='png')
figfile.seek(0) # rewind to beginning of file
#figfile.getvalue() extracts string (stream of bytes)
figdata_png = base64.b64encode(figfile.getvalue())
return render_template('index.html',
title='matplotlib chart',
results=figdata_png)
In my case, that solution didn't work. It seems that there is a race condition when trying to access plot. I first tried to use a lock from a library, but that didn't work, so instead I sort of engineered out a lock. In my case, I wanted to create n images using the same function on the same view, so I started by creating a list in the following way:
queue = [False for i in range(n)]
Then, my flask app look something like this:
#app.route('/vis/<j>')
def vis(j):
global queue
# We check that it's image's #j turn, as if it was single threaded
j = int(j)
if j == 0:
for i in range(len(queue)):
queue[i] = False
else:
while not queue[j-1]:
# If it's not, we sleep for a short time (from time import sleep)
sleep(0.5)
# This is not important, it's how I was plotting some random figures
# (from random import seed) (from datetime import datetime)
seed(datetime.now())
n = 10
p1 = [randint(0, 10) for _ in range(n)]
p2 = [randint(0, 10) for _ in range(n)]
t = [i for i in range(n)]
fig = plt.figure(j)
plt.clf()
plt.plot(t, p1, color='blue')
plt.plot(t, p2, color='orange')
plt.xlabel('Time')
plt.ylabel('Value')
# Save the plot
img = BytesIO()
fig.savefig(img, dpi=128)
img.seek(0)
# We finished using everything related to plot, so we free the "lock"
queue[j] = True
# Return the object as a file that can be accessed
return send_file(img, mimetype='image/png')
Finally, when wanting to display this in my flask app, all I had to do was using this <img src="/vis/1"> in my html file.
Edit: I forgot one of the most important part! For some reason, this would still create some unrelated thread issue. I looked it up and that's when I came with the full solution. The threading issue was solved by adding at the beginning of the file:
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')
For some reason, using that Agg backend solved the second threading I was having. I don't really have a good explanation for that, but it does work, so it's enough for me.
Alternatively, what also worked was running the app disabling threads by adding:
if __name__ == '__main__':
app.run(threading=False, debug=True)
I don't know however, at the moment, whether this works in production, so I preferred the other solution. :)
I hope this helps if you had the same issue!

How to add significance levels on bar graph using Python's Matplotlib?

I have written some code to graph some data in Python's Matplotlib.
The plot currently:
The code to produce this plot:
groups=['Control','30min','24hour']
cell_lysate_avg=[11887.42595, 4862.429689, 3414.337554]
cell_lysate_sd=[1956.212855, 494.8437915, 525.8556207]
cell_lysate_avg=[i/1000 for i in cell_lysate_avg]
cell_lysate_sd=[i/1000 for i in cell_lysate_sd]
media_avg=[14763.71106,8597.475539,6374.732852]
media_sd=[240.8983759, 167.005365, 256.1374017]
media_avg=[i/1000 for i in media_avg] #to get ng/ml
media_sd=[i/1000 for i in media_sd]
fig, ax = plt.subplots()
index = numpy.arange(len(groups)) #where to put the bars
bar_width=0.45
opacity = 0.5
error_config = {'ecolor': '0.3'}
cell_lysate_plt=plt.bar(index,cell_lysate_avg,bar_width,alpha=opacity,color='black',yerr=cell_lysate_sd,error_kw=error_config,label='Cell Lysates')
media_plt=plt.bar(index+bar_width,media_avg,bar_width,alpha=opacity,color='green',yerr=media_sd,error_kw=error_config,label='Media')
plt.xlabel('Groups',fontsize=15)
plt.ylabel('ng/ml',fontsize=15)
plt.title('\n'.join(wrap('Average Over Biological Repeats for TIMP1 ELISA (n=3)',45)),fontsize=15)
plt.xticks(index + bar_width, groups)
plt.legend()
ax.tick_params(axis='x', labelsize=14)
ax.tick_params(axis='y', labelsize=14)
I have calculated the various two tailed t tests associated with this data and I want to display using standard scientific journal representation - i.e. a line connecting two bars with a star which represents a significance level of (say) >0.05. Can anybody tell me how to do this?
As far as I know there is no standard scientific journal representation for showing significance. The exact way you draw it is a matter of taste. This is probably the reason why matplotlib has no specific function for significance bars (at least to my knowledge). You could just do it manually. E.g:
from matplotlib.markers import TICKDOWN
def significance_bar(start,end,height,displaystring,linewidth = 1.2,markersize = 8,boxpad =0.3,fontsize = 15,color = 'k'):
# draw a line with downticks at the ends
plt.plot([start,end],[height]*2,'-',color = color,lw=linewidth,marker = TICKDOWN,markeredgewidth=linewidth,markersize = markersize)
# draw the text with a bounding box covering up the line
plt.text(0.5*(start+end),height,displaystring,ha = 'center',va='center',bbox=dict(facecolor='1.', edgecolor='none',boxstyle='Square,pad='+str(boxpad)),size = fontsize)
pvals = [0.001,0.1,0.00001]
offset =1
for i,p in enumerate(pvals):
if p>=0.05:
displaystring = r'n.s.'
elif p<0.0001:
displaystring = r'***'
elif p<0.001:
displaystring = r'**'
else:
displaystring = r'*'
height = offset + max(cell_lysate_avg[i],media_avg[i])
bar_centers = index[i] + numpy.array([0.5,1.5])*bar_width
significance_bar(bar_centers[0],bar_centers[1],height,displaystring)
Instead of the stars you could of course also explicitly write p<0.05 or something similar. You can then spend hours fiddling with the parameters until it looks just right.

Can't save all matlibplots to pdf - missing some graphs (IPython)

I have a script that I run in IPython, and basically takes an input .csv file of gene_names and pushes them into this for loop, where
with open('C:\Users\Work\Desktop\Book1.csv', 'rU') as f:
reader = csv.reader(f)
with PdfPages('poopyheadjoe04.pdf') as pdf:
for row in reader:
gene_name = row
probe_exclusion_keyword = []
print gene_name
The gene_name values from this list(in the .csv file) is then fed into a line, if inference_method == "approximate_random": (in Scripts.py)
with open('C:\Users\Work\Desktop\Book1.csv', 'rU') as f:
reader = csv.reader(f)
with PdfPages('poopyheadjoe04.pdf') as pdf:
for row in reader:
gene_name = row
probe_exclusion_keyword = []
print gene_name
print "Fetching probe ids for gene %s" % gene_name
probes_dict = get_probes_from_genes(gene_name)
print "Found %s probes: %s" % (len(probes_dict), ", ".join(probes_dict.values()))
if probe_exclusion_keyword:
probes_dict = {probe_id: probe_name for (probe_id, probe_name) in probes_dict.iteritems() if not args.probe_exclusion_keyword in probe_name}
print "Probes after applying exclusion cryterion: %s" % (", ".join(probes_dict.values()))
print "Fetching expression values for probes %s" % (", ".join(probes_dict.values()))
expression_values, well_ids, donor_names = get_expression_values_from_probe_ids(
probes_dict.keys())
print "Found data from %s wells sampled across %s donors" % (len(well_ids), len(set(donor_names)))
print "Combining information from selected probes"
combined_expression_values = combine_expression_values(
expression_values, method=probes_reduction_method)
print "Translating locations of the wells to MNI space"
mni_coordinates = get_mni_coordinates_from_wells(well_ids)
print "Checking values of the provided NIFTI file at well locations"
nifti_values = get_values_at_locations(
stat_map, mni_coordinates, mask_file=mask, radius=radius, verbose=True)
# preparing the data frame
names = ["NIFTI values", "%s expression" % gene_name, "donor ID"]
data = pd.DataFrame(np.array(
[nifti_values, combined_expression_values, donor_names]).T, columns=names)
data = data.convert_objects(convert_numeric=True)
len_before = len(data)
data.dropna(axis=0, inplace=True)
nans = len_before - len(data)
if nans > 0:
print "%s wells fall outside of the mask" % nans
if inference_method == "fixed":
print "Performing fixed effect analysis"
fixed_effects(data, ["NIFTI values", "%s expression" % gene_name])
**if inference_method == "approximate_random":**
print "Performing approximate random effect analysis"
approximate_random_effects(
data, ["NIFTI values", "%s expression" % gene_name], "donor ID")
print "poopy"
pdf.savefig()
plt.ion() #should i add ion() here?
if inference_method == "bayesian_random":
print "Fitting Bayesian hierarchical model"
bayesian_random_effects(
data, ["NIFTI values", "%s expression" % gene_name], "donor ID", n_samples, n_burnin)
# if __name__ == '__main__': #What exactly does this do? Start trigger for the script to run?
# main()
that triggers approximate_random_effects(in Analysis.py) to plot two graphs, the violinplot and the lmplot:
def approximate_random_effects(data, labels, group):
correlation_per_donor = {}
for donor_id in set(data[group]):
correlation_per_donor[donor_id], _, _, _, _ = linregress(list(data[labels[0]][data[group] == donor_id]),
list(data[labels[1]][data[group] == donor_id]))
average_slope = np.array(correlation_per_donor.values()).mean()
t, p_val = ttest_1samp(correlation_per_donor.values(), 0)
print "Averaged slope across donors = %g (t=%g, p=%g)"%(average_slope, t, p_val)
sns.violinplot([correlation_per_donor.values()], inner="points", names=["donors"])
plt.ylabel("Linear regression slopes between %s and %s"%(labels[0],labels[1]))
plt.axhline(0, color="red")
sns.lmplot(labels[0], labels[1], data, hue=group, col=group, col_wrap=3)
plt.ion()
return average_slope, t, p_val
I'm trying to save both graphs for all the gene_names into a pdf file, by roughly following "Saving multiple figures to one pdf file in matplotlib" and the matplotlib.PdfPages approach.
However, in the pdf file, I am only getting the lmplot for all my gene_names and NOT the violin plot. What do I do to fix this?
Thanks! Help will be much appreciated!
It looks like your code is creating two figures, one for each plots, but you only call pdf.savefig() once after the second figure is created, therefore only saving the second figure.
If you want one figure per page in your pdf, you need to call pdf.savefig() twice: once after creating each plot.
I would recommend that you change the structure of your program a bit, so you can save the pdf after each plot:
def approximate_random_effects(data, labels, group):
correlation_per_donor = {}
for donor_id in set(data[group]):
correlation_per_donor[donor_id], _, _, _, _ = linregress(list(data[labels[0]][data[group] == donor_id]),
list(data[labels[1]][data[group] == donor_id]))
average_slope = np.array(correlation_per_donor.values()).mean()
t, p_val = ttest_1samp(correlation_per_donor.values(), 0)
print "Averaged slope across donors = %g (t=%g, p=%g)"%(average_slope, t, p_val)
with PdfPages('poopyheadjoe04.pdf') as pdf:
fig = plt.figure()
sns.violinplot([correlation_per_donor.values()], inner="points", names=["donors"])
plt.ylabel("Linear regression slopes between %s and %s"%(labels[0],labels[1]))
plt.axhline(0, color="red")
pdf.savefig(fig) ## Saving first figure
fig = plt.figure()
sns.lmplot(labels[0], labels[1], data, hue=group, col=group, col_wrap=3)
pdf.savefig(fig) ## Saving second figure
return average_slope, t, p_val
You then need to delete in your main program the lines with PdfPages('poopyheadjoe04.pdf') as pdf:, pdf.savefig() and plt.ion().
If you need the two plots on the same pdf page, you need change the violinplot and lmplot in such a way that they use different axes on the same figure.

Making a timelapse over geographical positions using pyplot

I am making a scatter plot of positions marked with latitude and longitude which works all right as it shows all the positions in a time period in a static image. But I was wondering if there is any easy way of utilizing that I have the unixtime to every position - so I can show the movements as a timelapse - kind of looping through the positions and showing an animation of the movement.
EDIT:
I have set up a dynamicly updating plot that plots all the positions one by one, now I just have to add the basemap in the background. The codes come from this answer.
import matplotlib.pyplot as plt
import sqlite3 as lite
from operator import itemgetter
def getData():
con = lite.connect('database.db')
with con:
cur = con.cursor()
cur.execute('SELECT latitude, longitude, unixtime FROM Message WHERE latitude > 50 AND longitude > -30 AND longitude < 40 AND latitude < 80')
all_rows = [[int(x[0]), int(x[1]), int(x[2])] for x in cur]
all_rows = sorted(all_rows, key=itemgetter(2))
return all_rows
plt.ion()
class DynamicUpdate():
#Suppose we know the x range
min_x = 0
max_x = 10000
def on_launch(self):
#Set up plot
self.figure, self.ax = plt.subplots()
self.lines, = self.ax.plot([],[], 'o')
#Autoscale on unknown axis and known lims on the other
self.ax.set_autoscaley_on(True)
self.ax.set_xlim(-50, 50)
self.ax.set_ylim(40, 80)
#Other stuff
self.ax.grid()
def on_running(self, xdata, ydata):
#Update data (with the new _and_ the old points)
self.lines.set_xdata(xdata)
self.lines.set_ydata(ydata)
#Need both of these in order to rescale
self.ax.relim()
self.ax.autoscale_view()
#We need to draw *and* flush
self.figure.canvas.draw()
self.figure.canvas.flush_events()
#Example
def __call__(self):
import numpy as np
import time
self.on_launch()
xdata = []
ydata = []
all_rows = getData()
for x in all_rows:
a,b,f = zip(x)
xdata.append(b)
ydata.append(a)
self.on_running(xdata, ydata)
return xdata, ydata
d = DynamicUpdate()
d()
Old Code:
This shows the static data
map = Basemap(projection='merc', lat_0=59.45, lon_0=10.5,
resolution = 'c', area_thresh = 1000,
llcrnrlon=-30, llcrnrlat=50,
urcrnrlon=40, urcrnrlat=80)
map.drawcoastlines()
map.fillcontinents(color='black')
con = lite.connect('database.db')
with con:
cur = con.cursor()
cur.execute('SELECT latitude, longitude FROM Message WHERE latitude > 50 AND longitude > -30 AND longitude < 40 AND latitude < 80')
data = cur.fetchall()
y,x = zip(*data)
x,y = map(x,y)
plt.scatter(x,y, s=0.07, alpha=0.6, color="#e74c3c", edgecolors='none')
plt.show()
There are are few ways to do animations in matplotlib, the matplotlib.animation provides a framework but this can be a little involved. Probabaly the easiest way to do it is using plt.ion(). I don't know how you access your date with cur.execute but does something like this work:
map = Basemap(projection='merc', lat_0=59.45, lon_0=10.5,
resolution = 'c', area_thresh = 1000,
llcrnrlon=-30, llcrnrlat=50,
urcrnrlon=40, urcrnrlat=80)
fig, ax = plt.subplots(1,1)
plt.ion()
plt.show()
map.drawcoastlines(ax=ax)
map.fillcontinents(color='black',ax=ax)
con = lite.connect('database.db')
with con:
cur = con.cursor()
i=0
for unixtime in range(1406851200,1409529600):
cur.execute('SELECT latitude, longitude FROM Message WHERE latitude > 50 AND longitude > -30 AND longitude < 40 AND latitude < 80 AND unixtime ==' + str(unixtime))
data = cur.fetchall()
y,x = zip(*data)
x,y = map(x,y)
pts = ax.scatter(x,y, s=0.07, alpha=0.6, color="#e74c3c", edgecolors='none')
plt.draw()
plt.pause(0.0001)
#i += 1
#plt.savefig('./out.{0:07d}.png'.format(i))
pts.remove()
Even though I've gotten an adequate answer over, I found that it was a hassle to get the plot exactly like I wanted, and still animate it - so I did it utilizing some linux tools to make a movie of snapshots instead. This is what I did, for future reference and for others having the same problem:
Timelapse animation, the lazy way
I simply made a plot over all geographical positions for every hour for the whole timespan. This can be done down to every minute, second etc:
con = lite.connect('database/SAISREAL.db')
with con:
cur = con.cursor()
i = 0
for k in range(0,137*24): #This is the timespan - every hour for 137 days
map = Basemap(projection='merc', lat_0=59.45, lon_0=10.5,
resolution = 'c', area_thresh = 1000,
llcrnrlon=-30, llcrnrlat=50,
urcrnrlon=40, urcrnrlat=80)
map.drawcoastlines()
map.fillcontinents(color='#27ae60')
start = 0+k*60*60 #This is again the timespan
end = 0+(k+1)*60*60
Ok - so now I've established the timespan which I will query data from, as well as drawn the map overlay
cur.execute('SELECT distinct userid, latitude, longitude FROM geodata WHERE unixtime > {start} AND unixtime < {end}'.format(start = start, end = end))
data = cur.fetchall()
if len(data)>0: #Simply check if there is data available
i = i+1
filename = ''
if i<10:
filename = '0000'+str(i)
elif i<100:
filename = '000'+str(i)
elif i<1000:
filename = '00'+str(i)
else:
filename = '0'+str(i)
f,y,x = zip(*data)
x,y = map(x,y)
The whole filename thing is used later, when I convert the images into a movie - its important that they are named sequentially where everyone has the same number of digits.
plt.title( str(datetime.datetime.fromtimestamp(int(end)).strftime('%Y-%m-%d')) + ' kl '+str(datetime.datetime.fromtimestamp(int(end)).strftime('%H')))
plt.scatter(x,y, s=8, alpha=0.7, color="#2980b9", edgecolors='none')
Here I just plotted the info with a timestamp as title.
plt.savefig('Fisheriesplot/fishplot/'+str(filename)+'.png', format='png')
plt.clf()
And then, saving the picture. This gives some 3000 .png images - this can obviously be done with other file formats.
Before I either make them to a GIF or a movie, I want to remove the transparent background - to make them appear nicer (less colour shifting between frames)
mkdir batch
for file in *.png ; do convert "${file}" -background black -alpha remove -flatten -alpha off "batch/${file}" ; done
cd batch
If the goal is to make a gif - skip the rest and do this: convert -delay 10 -loop 0 *.png animaion.gif
Option1: Make a movie out of .png
ffmpeg -y -f image2 -framerate 20 -i %05d.png -vcodec png -b 8000k a20k.avi
Just do this is in the folder. Set bitrakte and framerate as you want it- notice that this movie can be quite big.
Option2: Convert images to other format, then make movie
mogrify -format jpg *.png
This is done in terminal in the same folder as the pictures. Then I want to move all the jpg's in their own folder:
mkdir jpgfolder
mv *.jpg jpgfolder
And now, lastly I can make the movie:
cd jpgfolder
ffmpeg -y -f image2 -framerate 4 -i %05d.jpg -vcodec mpeg4 -b 800k a88k.avi
The framerate, which is set to 4 here, should be set to whatever you want. Notice that %05d.jpg says that every file has a leading 0, and has a total of five digits. If its four digits, write 4 etc.
Note that this isn't the most streamlined or smart way to do this, but it is a solution for everyone not wanting to change your code, other than putting it in a loop.

numpy plot create figure failed after several minutes idel

I write a small program using web.py, and in one of classes I use numpy/plot.
I found that every first I visit the page , it works fine. but after several minutes, the function of plt.figure() frozen! this function will never return! That's so weird.
please have a look of my codes:
def DrawMapMain(MapParameter,inputfile='out.txt',imgfile='out.png'):
print "DrawMapMain..."
plt.ioff() # turn off interactive mode
plt.close('all')
xmin,xmax,ymin,ymax = MapParameter['xmin'],MapParameter['xmax'],MapParameter['ymin'],MapParameter['ymax']
print('LevelFile:',MapParameter['LevelFile'])
LonCenter = (xmin+xmax)/2.0
LatCenter = (ymin+ymax)/2.0
nx, ny = 200,200
if(not os.path.isfile(inputfile)):
print(u'输入文件%s不存在,请检查!'%(inputfile))
sys.exit(0)
Region = np.loadtxt(inputfile)
#print(Region)
x,y,z = Region[:,1],Region[:,2],Region[:,3]
lon_array = np.linspace(xmin, xmax, nx)
lat_array = np.linspace(ymin, ymax, ny)
print('Data lon/lat box :',x.min(),x.max(),y.min(),y.max())
print(u'离散点插值到网格')
zi,xi,yi = Interpolater.griddata_all(x,y,z,lon_array,lat_array,func='line_rbf')#scipy_idw')# #line_rbf
print(u'扩展矩阵插值: ')
zi,xi,yi,lon_array,lat_array,nx,ny=Interpolater.extened_grid(zi,lon_array,lat_array,zoom=int(2)) #
print(u'mask非绘图区域')
grid1 = Interpolater.build_inside_mask_array(MapParameter['ShapeFile'],lon_array,lat_array)
zi[np.logical_not(grid1)]=np.NaN
#-----------------------------------------------------------------------------------
print(u'Create figure...')
#fig = plt.figure(num=1,figsize=(12, 9), dpi=100)
fig = plt.figure(figsize=(12, 9), dpi=100)
#fig = plt.figure()
print(u'Create figure...Done')
.........skipped
first time I visit the page, I got:
mask非绘图区域
Create figure...
Create figure...Done
(104, 35, 108, 39.5)
this is ok, but after a while, visit again, I got:
mask非绘图区域
Create figure...
and I can see the process 'python' take 25% of my cpu(which have 4 core), that means it falls into a deadloop!
this is my web.py class, , for reference:
class Month:
def POST(self):
form = ParameterForm()
if not form.validates():
return render.Month(form)
else:
StationInfoFile='./StationsId.txt' # make sure this file is exist.
if(not os.path.isfile(StationInfoFile)):
print(u'StationInfoFile 文件%s不存在!'%(inputfile))
sys.exit(0)
StationsInfo = np.loadtxt(StationInfoFile) # load all data as integer and float, not string
StationsId,StationsLon,StationsLat = StationsInfo[:,0].astype(np.int64),StationsInfo[:,1],StationsInfo[:,2]
basedir, DataCats, DataCatsDict=u'D:/测试数据',[ u'逐日平均', u'逐日降水'],{ u'逐日平均':'td', u'逐日降水':'rd'}
iFrom,iEnd= \
int(form['Start Year'].value)*10000+ int(form['Start Month'].value)*100+ int(form['Start Day'].value), \
int(form['End Year'].value)*10000+ int(form['End Month'].value)*100+ int(form['End Day'].value) # value from form is string!
MapParameter=GetMapParameter()
if (u'温度' == form['Data Source'].value):
d=u'逐日平均'
tmpDataTxt='Test_temp.txt'
tmpOutPNG='./static/'+'Test_temp.png'
MapParameter['LevelFile']='.\maplev_temp.LEV'
MapParameter['Title']=u'逐日平均'
elif (u'降水(mm)' == form['Data Source'].value):
d=u'逐日降水'
tmpDataTxt='Test_pred.txt'
tmpOutPNG='./static/'+'Test_temp.png'
MapParameter['LevelFile']='.\maplev_rain.LEV'
MapParameter['Title']=u'逐日降水'
else:
print "form['Data Source'].value=",form['Data Source'].value
print "----------- PROCESSING FOR CATEGORY:",d
tmpMeanVal=[]
for i in range(len(StationsId)):
s,lo,la=StationsId[i],StationsLon[i],StationsLat[i]
#print basedir,d, str(s),DataCatsDict[d]+'.txt'
datafile=os.path.join(basedir,d, str(s))+DataCatsDict[d]+'.txt'
print datafile,iFrom,iEnd
data=getdata.GetData(datafile,iFrom,iEnd)
a=np.mean(np.array(data)[:,1])*0.1
tmpMeanVal.append([s,lo,la,a])
rec=np.array(tmpMeanVal,dtype=[('int','int'),('float','float')])
print 'Writing data ...'
np.savetxt(tmpDataTxt,tmpMeanVal,fmt="%6i %-7.2f %-7.2f %8.2f")
print 'Writing data ... Done.'
sssss=open(tmpDataTxt,'r')
print sssss.read()
sssss.close()
DrawMapMain(MapParameter,inputfile=tmpDataTxt,imgfile=tmpOutPNG)
return render.Reports(tmpOutPNG)
First I suspect that the plt.figure may have some memory leak problems, so I us clf,plt.close('all') at the beginning and end of the function both! I even wrote a segment of test code :
if __name__ == "__main__":
MapParameter=GetMapParameter()
MapParameter['LevelFile']='.\maplev_rain.LEV'
MapParameter['Title']=u'逐日降水'
for iloop in range(0,10):
DrawMapMain(MapParameter,inputfile='Test_pred.txt',imgfile='c:/Test_pred'+str(iloop)+'.png')
MapParameter['LevelFile']='.\maplev_temp.LEV'
MapParameter['Title']=u'逐日temp'
for iloop in range(0,10):
DrawMapMain(MapParameter,inputfile='Test_temp.txt',imgfile='c:/Test_temp'+str(iloop)+'.png')
this code works fine. It's so wired, does anybody know some clue? very thanks!
This question has an answer in the comments:
What matplotlib backend are you using? You should be using one of the non-interactive ones if you're running things from a webserver. E.g. do import matplotlib; matplotlib.use('Agg') before import matplotlib.pyplot as plt. – Joe Kington Jan 16 at 17:56
For more information on matplotlib backends, see: http://matplotlib.org/faq/usage_faq.html#what-is-a-backend – Joe Kington Jan 17 at 15:21

Categories