Making a timelapse over geographical positions using pyplot - python

I am making a scatter plot of positions marked with latitude and longitude which works all right as it shows all the positions in a time period in a static image. But I was wondering if there is any easy way of utilizing that I have the unixtime to every position - so I can show the movements as a timelapse - kind of looping through the positions and showing an animation of the movement.
EDIT:
I have set up a dynamicly updating plot that plots all the positions one by one, now I just have to add the basemap in the background. The codes come from this answer.
import matplotlib.pyplot as plt
import sqlite3 as lite
from operator import itemgetter
def getData():
con = lite.connect('database.db')
with con:
cur = con.cursor()
cur.execute('SELECT latitude, longitude, unixtime FROM Message WHERE latitude > 50 AND longitude > -30 AND longitude < 40 AND latitude < 80')
all_rows = [[int(x[0]), int(x[1]), int(x[2])] for x in cur]
all_rows = sorted(all_rows, key=itemgetter(2))
return all_rows
plt.ion()
class DynamicUpdate():
#Suppose we know the x range
min_x = 0
max_x = 10000
def on_launch(self):
#Set up plot
self.figure, self.ax = plt.subplots()
self.lines, = self.ax.plot([],[], 'o')
#Autoscale on unknown axis and known lims on the other
self.ax.set_autoscaley_on(True)
self.ax.set_xlim(-50, 50)
self.ax.set_ylim(40, 80)
#Other stuff
self.ax.grid()
def on_running(self, xdata, ydata):
#Update data (with the new _and_ the old points)
self.lines.set_xdata(xdata)
self.lines.set_ydata(ydata)
#Need both of these in order to rescale
self.ax.relim()
self.ax.autoscale_view()
#We need to draw *and* flush
self.figure.canvas.draw()
self.figure.canvas.flush_events()
#Example
def __call__(self):
import numpy as np
import time
self.on_launch()
xdata = []
ydata = []
all_rows = getData()
for x in all_rows:
a,b,f = zip(x)
xdata.append(b)
ydata.append(a)
self.on_running(xdata, ydata)
return xdata, ydata
d = DynamicUpdate()
d()
Old Code:
This shows the static data
map = Basemap(projection='merc', lat_0=59.45, lon_0=10.5,
resolution = 'c', area_thresh = 1000,
llcrnrlon=-30, llcrnrlat=50,
urcrnrlon=40, urcrnrlat=80)
map.drawcoastlines()
map.fillcontinents(color='black')
con = lite.connect('database.db')
with con:
cur = con.cursor()
cur.execute('SELECT latitude, longitude FROM Message WHERE latitude > 50 AND longitude > -30 AND longitude < 40 AND latitude < 80')
data = cur.fetchall()
y,x = zip(*data)
x,y = map(x,y)
plt.scatter(x,y, s=0.07, alpha=0.6, color="#e74c3c", edgecolors='none')
plt.show()

There are are few ways to do animations in matplotlib, the matplotlib.animation provides a framework but this can be a little involved. Probabaly the easiest way to do it is using plt.ion(). I don't know how you access your date with cur.execute but does something like this work:
map = Basemap(projection='merc', lat_0=59.45, lon_0=10.5,
resolution = 'c', area_thresh = 1000,
llcrnrlon=-30, llcrnrlat=50,
urcrnrlon=40, urcrnrlat=80)
fig, ax = plt.subplots(1,1)
plt.ion()
plt.show()
map.drawcoastlines(ax=ax)
map.fillcontinents(color='black',ax=ax)
con = lite.connect('database.db')
with con:
cur = con.cursor()
i=0
for unixtime in range(1406851200,1409529600):
cur.execute('SELECT latitude, longitude FROM Message WHERE latitude > 50 AND longitude > -30 AND longitude < 40 AND latitude < 80 AND unixtime ==' + str(unixtime))
data = cur.fetchall()
y,x = zip(*data)
x,y = map(x,y)
pts = ax.scatter(x,y, s=0.07, alpha=0.6, color="#e74c3c", edgecolors='none')
plt.draw()
plt.pause(0.0001)
#i += 1
#plt.savefig('./out.{0:07d}.png'.format(i))
pts.remove()

Even though I've gotten an adequate answer over, I found that it was a hassle to get the plot exactly like I wanted, and still animate it - so I did it utilizing some linux tools to make a movie of snapshots instead. This is what I did, for future reference and for others having the same problem:
Timelapse animation, the lazy way
I simply made a plot over all geographical positions for every hour for the whole timespan. This can be done down to every minute, second etc:
con = lite.connect('database/SAISREAL.db')
with con:
cur = con.cursor()
i = 0
for k in range(0,137*24): #This is the timespan - every hour for 137 days
map = Basemap(projection='merc', lat_0=59.45, lon_0=10.5,
resolution = 'c', area_thresh = 1000,
llcrnrlon=-30, llcrnrlat=50,
urcrnrlon=40, urcrnrlat=80)
map.drawcoastlines()
map.fillcontinents(color='#27ae60')
start = 0+k*60*60 #This is again the timespan
end = 0+(k+1)*60*60
Ok - so now I've established the timespan which I will query data from, as well as drawn the map overlay
cur.execute('SELECT distinct userid, latitude, longitude FROM geodata WHERE unixtime > {start} AND unixtime < {end}'.format(start = start, end = end))
data = cur.fetchall()
if len(data)>0: #Simply check if there is data available
i = i+1
filename = ''
if i<10:
filename = '0000'+str(i)
elif i<100:
filename = '000'+str(i)
elif i<1000:
filename = '00'+str(i)
else:
filename = '0'+str(i)
f,y,x = zip(*data)
x,y = map(x,y)
The whole filename thing is used later, when I convert the images into a movie - its important that they are named sequentially where everyone has the same number of digits.
plt.title( str(datetime.datetime.fromtimestamp(int(end)).strftime('%Y-%m-%d')) + ' kl '+str(datetime.datetime.fromtimestamp(int(end)).strftime('%H')))
plt.scatter(x,y, s=8, alpha=0.7, color="#2980b9", edgecolors='none')
Here I just plotted the info with a timestamp as title.
plt.savefig('Fisheriesplot/fishplot/'+str(filename)+'.png', format='png')
plt.clf()
And then, saving the picture. This gives some 3000 .png images - this can obviously be done with other file formats.
Before I either make them to a GIF or a movie, I want to remove the transparent background - to make them appear nicer (less colour shifting between frames)
mkdir batch
for file in *.png ; do convert "${file}" -background black -alpha remove -flatten -alpha off "batch/${file}" ; done
cd batch
If the goal is to make a gif - skip the rest and do this: convert -delay 10 -loop 0 *.png animaion.gif
Option1: Make a movie out of .png
ffmpeg -y -f image2 -framerate 20 -i %05d.png -vcodec png -b 8000k a20k.avi
Just do this is in the folder. Set bitrakte and framerate as you want it- notice that this movie can be quite big.
Option2: Convert images to other format, then make movie
mogrify -format jpg *.png
This is done in terminal in the same folder as the pictures. Then I want to move all the jpg's in their own folder:
mkdir jpgfolder
mv *.jpg jpgfolder
And now, lastly I can make the movie:
cd jpgfolder
ffmpeg -y -f image2 -framerate 4 -i %05d.jpg -vcodec mpeg4 -b 800k a88k.avi
The framerate, which is set to 4 here, should be set to whatever you want. Notice that %05d.jpg says that every file has a leading 0, and has a total of five digits. If its four digits, write 4 etc.
Note that this isn't the most streamlined or smart way to do this, but it is a solution for everyone not wanting to change your code, other than putting it in a loop.

Related

python folium polygons map color categories

I have a dataframe in Python containing zipcodes (4 digits), pologyons, and would like to create maps with colored zipcodes based on a column value. I tried this with the geopandas and folium packages but it seems to be not working.
The shapefigures I downloaded from this website https://www.cbs.nl/nl-nl/dossier/nederland-regionaal/geografische-data/gegevens-per-postcode .
The shapefile seems to work when I use this code:
file_path = r'C:\xxxx\CBS_pc4_2020_v1.shp'
gdf_zipcodes = gpd.read_file(file_path)
fig, ax = plt.subplots(figsize=(8,8))
gdf_zipcodes.plot(ax=ax, facecolor='none', edgecolor='gray')
plt.show()
Currently, this is (part of) my dataframe
zipcodes =
Zipcode
Var
Location
Allocation
9941
o
Polygon
7023
7023
o
Polygon
7023
6321
o
Multipolygon
4020
I also have a location dataframe containing zipcodes and specific lon & latitudes like this:
loc_df =
Zipcode
lat
lon
9941
xxx
yyy
7231
xxx
yyy
What does work is creating a map with these specific coordinates, this is the code I used:
map = folium.Map(location =[loc_df['lat'].mean(),loc_df['lon'].mean()], zoom_start=12)
step = cm.StepColormap(colors, vmin = 0, vmax = 9999, index = [], caption = 'step')
color_pallete = sns.color_palette()
color_pallete = sns.color_palette("Set2", 99999)
color_pallete = color_pallete.as_hex()
for index, row in results_1.iterrows():
if row['Var'] == 'o':
folium.Marker([row['lat'], row['lon']], popup = row['Variable Name']).add_to(map)
if row['Var'] == 'x':
c = color_pallete[int(row['Allocation'])]
folium.CircleMarker([row['lat'], row['lon']],radius=5, color = c).add_to(map)
else:
continue
folium.CircleMarker(
location=[row['lon'], row['lat']],
radius=12,
weight=2,
fill=True,
fill_color=colors[int(row['cluster'])],
color=colors[int(row['cluster'])]
).add_to(map)
map.save('map1.html')
Then, I thought this code would work to use the pologyons instead of specific lat and lon values to create the map but unfortunately it does not:
for _, r in zipcodes.iterrows():
# Without simplifying the representation of each borough,
# the map might not be displayed
sim_geo = gpd.GeoSeries(r['geometry']).simplify(tolerance=0.001)
geo_j = sim_geo.to_json()
geo_j = folium.GeoJson(data=geo_j,
style_function=lambda x: {'fillColor': 'blue'})
folium.Popup(r['Location']).add_to(geo_j)
geo_j.add_to(map)
In the end I would like to color based on the allocation of the dataframe and create markers such as in the first folium map but for now I would really like to visualize the polygons (and hopefully I can find out how I create the colors after that).

Change the scale of the graph image

I try to generate a graph and save an image of the graph in python. Although the "plotting" of the values seems ok and I can get my picture, the scale of the graph is badly shifted.
If you compare the correct graph from tutorial example with my bad graph generated from different dataset, the curves are cut at the bottom to early: Y-axis should start just above the highest values and I should also see the curves for the highest X-values (in my case around 10^3).
But honestly, I think that problem is the scale of the y-axis, but actually do not know what parameteres should I change to fix it. I tried to play with some numbers (see below script), but without any good results.
This is the code for calculation and generation of the graph image:
import numpy as np
hic_data = load_hic_data_from_reads('/home/besy/Hi-C/MOREX/TCC35_parsedV2/TCC35_V2_interaction_filtered.tsv', resolution=100000)
min_diff = 1
max_diff = 500
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(12, 12))
for cnum, c in enumerate(hic_data.chromosomes):
if c in ['ChrUn']:
continue
dist_intr = []
for diff in xrange(min_diff, min((max_diff, 1 + hic_data.chromosomes[c]))):
beg, end = hic_data.section_pos[c]
dist_intr.append([])
for i in xrange(beg, end - diff):
dist_intr[-1].append(hic_data[i, i + diff])
mean_intrp = []
for d in dist_intr:
if len(d):
mean_intrp.append(float(np.nansum(d)) / len(d))
else:
mean_intrp.append(0.0)
xp, yp = range(min_diff, max_diff), mean_intrp
x = []
y = []
for k in xrange(len(xp)):
if yp[k]:
x.append(xp[k])
y.append(yp[k])
l = plt.plot(x, y, '-', label=c, alpha=0.8)
plt.hlines(mean_intrp[2], 3, 5.25 + np.exp(cnum / 4.3), color=l[0].get_color(),
linestyle='--', alpha=0.5)
plt.text(5.25 + np.exp(cnum / 4.3), mean_intrp[2], c, color=l[0].get_color())
plt.plot(3, mean_intrp[2], '+', color=l[0].get_color())
plt.xscale('log')
plt.yscale('log')
plt.ylabel('number of interactions')
plt.xlabel('Distance between bins (in 100 kb bins)')
plt.grid()
plt.ylim(2, 250)
_ = plt.xlim(1, 110)
fig.savefig('/home/besy/Hi-C/MOREX/TCC35_V2_results/filtered/TCC35_V2_decay.png', dpi=fig.dpi)
I think that problem is in scale I need y-axis to start from 10^-1 (0.1), in order to change this I tried this:
min_diff = 0.1
.
.
.
dist_intr = []
for diff in xrange(min_diff, min((max_diff, 0.1 + hic_data.chromosomes[c]))):
.
.
.
plt.ylim((0.1, 20))
But this values return: "integer argument expected, got float"
I also tried to play with:
max_diff, plt.ylim and plt.xlim parameters little bit, but nothing changed to much.
I would like to ask you what parameter/s and how I need change to generate image of the correctly focused graph. Thank you in advance.

matplotlig speed on plot a gps point on a chart

with the following code I'm trying to add a gps point over a map using a raspberry pi 3:
from gps import *
import matplotlib.pyplot as plt
import time, inspect, os
logPath = "/home/solergy/IBUZ/LOGS/"
inputMapFilePath = logPath + "srcMap/" + "TBM-min.png"
actualOutMapFilePath = logPath + "Map_GPS"
TRX = -12.653 #top right longitude
TRY = 41.8675 #top right latitude
BLX = -12.6332 #bottom left longitude
BLY = 41.8619 #bottom left latitude
gpsd = gps(mode=WATCH_ENABLE|WATCH_NEWSTYLE)
im = plt.imread(inputMapFilePath)
try:
while True:
report = gpsd.next() #
if report['class'] == 'TPV':
GPStime = str(getattr(report,'time',''))
lat = str(getattr(report,'lat',0.0))
lon = str(getattr(report,'lon',0.0))
speed = str(getattr(report,'speed','nan'))
# Create Image File
pos_y = float(lat)
pos_x = -float(lon)
actualTime = time.strftime("%H-%M-%S", time.localtime())
plt.text(BLX, BLY, actualTime)
plt.imshow(im, extent=[BLX, TRX, BLY, TRY], zorder=1)
plt.scatter(x=[pos_x], y=[pos_y], s=3, c='r', zorder=2)
cur_axes = plt.gca()
cur_axes.axes.get_xaxis().set_visible(False)
cur_axes.axes.get_yaxis().set_visible(False)
plt.savefig(actualOutMapFilePath, dpi=150, type="png")
plt.close('all')
print (actualTime , GPStime)
except (KeyboardInterrupt, SystemExit): #when you press ctrl+c
f.close()
f_now.close()
my problem is that I need to update this map at least every second (10Hz is better). The problem is not the gps part but the matplotlib that produce this image. Any idea about how to achieve a higher speed for this code?
Antonio
An obvious optimization would be avoid clearing and redrawing everything at every iteraiton, but instead updating the XY values of the dot. See How to update a plot in matplotlib?
I would expect a considerable speedup (especially from avoiding the plt.imshow call), but it is hard to promise anything about performance without access to the actual data (or even hardware).

Adding a single label to the legend for a series of different data points plotted inside a designated bin in Python using matplotlib.pyplot.plot()

I have a script for plotting astronomical data of redmapping clusters using a csv file. I could get the data points in it and want to plot them using different colors depending on their redshift values: I am binning the dataset into 3 bins (0.1-0.2, 0.2-0.25, 0.25,0.31) based on the redshift.
The problem arises with my code after I distinguish to what bin the datapoint belongs: I want to have 3 labels in the legend corresponding to red, green and blue data points, but this is not happening and I don't know why. I am using plot() instead of scatter() as I also had to do the best fit from the data in the same figure. So everything needs to be in 1 figure.
import numpy as np
import matplotlib.pyplot as py
import csv
z = open("Sheet4CSV.csv","rU")
data = csv.reader(z)
x = []
y = []
ylow = []
yupp = []
xlow = []
xupp = []
redshift = []
for r in data:
x.append(float(r[2]))
y.append(float(r[5]))
xlow.append(float(r[3]))
xupp.append(float(r[4]))
ylow.append(float(r[6]))
yupp.append(float(r[7]))
redshift.append(float(r[1]))
from operator import sub
xerr_l = map(sub,x,xlow)
xerr_u = map(sub,xupp,x)
yerr_l = map(sub,y,ylow)
yerr_u = map(sub,yupp,y)
py.xlabel("$Original\ Tx\ XCS\ pipeline\ Tx\ keV$")
py.ylabel("$Iterative\ Tx\ pipeline\ keV$")
py.xlim(0,12)
py.ylim(0,12)
py.title("Redmapper Clusters comparison of Tx pipelines")
ax1 = py.subplot(111)
##Problem starts here after the previous line##
for p in redshift:
for i in xrange(84):
p=redshift[i]
if 0.1<=p<0.2:
ax1.plot(x[i],y[i],color="b", marker='.', linestyle = " ")#, label = "$z < 0.2$")
exit
if 0.2<=p<0.25:
ax1.plot(x[i],y[i],color="g", marker='.', linestyle = " ")#, label="$0.2 \leq z < 0.25$")
exit
if 0.25<=p<=0.3:
ax1.plot(x[i],y[i],color="r", marker='.', linestyle = " ")#, label="$z \geq 0.25$")
exit
##There seems nothing wrong after this point##
py.errorbar(x,y,yerr=[yerr_l,yerr_u],xerr=[xerr_l,xerr_u], fmt= " ",ecolor='magenta', label="Error bars")
cof = np.polyfit(x,y,1)
p = np.poly1d(cof)
l = np.linspace(0,12,100)
py.plot(l,p(l),"black",label="Best fit")
py.plot([0,15],[0,15],"black", linestyle="dotted", linewidth=2.0, label="line $y=x$")
py.grid()
box = ax1.get_position()
ax1.set_position([box.x1,box.y1,box.width, box.height])
py.legend(loc='center left',bbox_to_anchor=(1,0.5))
py.show()
In the 1st 'for' loop, I have indexed every value 'p' in the list 'redshift' so that bins can be created using 'if' statement. But if I add the labels that are hashed out against each py.plot() inside the 'if' statements, each data point 'i' that gets plotted in the figure as an intersection of (x[i],y[i]) takes the label and my entire legend attains in total 87 labels (including the 3 mentioned in the code at other places)!!!!!!
I essentially need 1 label for each bin...
Please tell me what needs to done after the bins are created and py.plot() commands used...Thanks in advance :-)
Sorry I cannot post my image here due to low reputation!
The data 'appended' for x, y and redshift lists from the csv file are as follows:
x=[5.031,10.599,10.589,8.548,9.089,8.675,3.588,1.244,3.023,8.632,8.953,7.603,7.513,2.917,7.344,7.106,3.889,7.287,3.367,6.839,2.801,2.316,1.328,6.31,6.19,6.329,6.025,5.629,6.123,5.892,5.438,4.398,4.542,4.624,4.501,4.504,5.033,5.068,4.197,2.854,4.784,2.158,4.054,3.124,3.961,4.42,3.853,3.658,1.858,4.537,2.072,3.573,3.041,5.837,3.652,3.209,2.742,2.732,1.312,3.635,2.69,3.32,2.488,2.996,2.269,1.701,3.935,2.015,0.798,2.212,1.672,1.925,3.21,1.979,1.794,2.624,2.027,3.66,1.073,1.007,1.57,0.854,0.619,0.547]
y=[5.255,10.897,11.045,9.125,9.387,17.719,4.025,1.389,4.152,8.703,9.051,8.02,7.774,3.139,7.543,7.224,4.155,7.416,3.905,6.868,2.909,2.658,1.651,6.454,6.252,6.541,6.152,5.647,6.285,6.079,5.489,4.541,4.634,8.851,4.554,4.555,5.559,5.144,5.311,5.839,5.364,3.18,4.352,3.379,4.059,4.575,3.914,5.736,2.304,4.68,3.187,3.756,3.419,9.118,4.595,3.346,3.603,6.313,1.816,4.34,2.732,4.978,2.719,3.761,2.623,2.1,4.956,2.316,4.231,2.831,1.954,2.248,6.573,2.276,2.627,3.85,3.545,25.405,3.996,1.347,1.679,1.435,0.759,0.677]
redshift = [0.12,0.25,0.23,0.23,0.27,0.26,0.12,0.27,0.17,0.18,0.17,0.3,0.23,0.1,0.23,0.29,0.29,0.12,0.13,0.26,0.11,0.24,0.13,0.21,0.17,0.2,0.3,0.29,0.23,0.27,0.25,0.21,0.11,0.15,0.1,0.26,0.23,0.12,0.23,0.26,0.2,0.17,0.22,0.26,0.25,0.12,0.19,0.24,0.18,0.15,0.27,0.14,0.14,0.29,0.29,0.26,0.15,0.29,0.24,0.24,0.23,0.26,0.29,0.22,0.13,0.18,0.24,0.14,0.24,0.24,0.17,0.26,0.29,0.11,0.14,0.26,0.28,0.26,0.28,0.27,0.23,0.26,0.23,0.19]
Working with numerical data like this, you should really consider using a numerical library, like numpy.
The problem in your code arises from processing each record (a coordinate (x,y) and the corresponding value redshift) one at a time. You are calling plot for each point, thereby creating legends for each of those 84 datapoints. You should consider your "bins" as groups of data that belong to the same dataset and process them as such. You could use "logical masks" to distinguish between your "bins", as shown below.
It's also not clear why you call exit after each plotting action.
import numpy as np
import matplotlib.pyplot as plt
x = np.array([5.031,10.599,10.589,8.548,9.089,8.675,3.588,1.244,3.023,8.632,8.953,7.603,7.513,2.917,7.344,7.106,3.889,7.287,3.367,6.839,2.801,2.316,1.328,6.31,6.19,6.329,6.025,5.629,6.123,5.892,5.438,4.398,4.542,4.624,4.501,4.504,5.033,5.068,4.197,2.854,4.784,2.158,4.054,3.124,3.961,4.42,3.853,3.658,1.858,4.537,2.072,3.573,3.041,5.837,3.652,3.209,2.742,2.732,1.312,3.635,2.69,3.32,2.488,2.996,2.269,1.701,3.935,2.015,0.798,2.212,1.672,1.925,3.21,1.979,1.794,2.624,2.027,3.66,1.073,1.007,1.57,0.854,0.619,0.547])
y = np.array([5.255,10.897,11.045,9.125,9.387,17.719,4.025,1.389,4.152,8.703,9.051,8.02,7.774,3.139,7.543,7.224,4.155,7.416,3.905,6.868,2.909,2.658,1.651,6.454,6.252,6.541,6.152,5.647,6.285,6.079,5.489,4.541,4.634,8.851,4.554,4.555,5.559,5.144,5.311,5.839,5.364,3.18,4.352,3.379,4.059,4.575,3.914,5.736,2.304,4.68,3.187,3.756,3.419,9.118,4.595,3.346,3.603,6.313,1.816,4.34,2.732,4.978,2.719,3.761,2.623,2.1,4.956,2.316,4.231,2.831,1.954,2.248,6.573,2.276,2.627,3.85,3.545,25.405,3.996,1.347,1.679,1.435,0.759,0.677])
redshift = np.array([0.12,0.25,0.23,0.23,0.27,0.26,0.12,0.27,0.17,0.18,0.17,0.3,0.23,0.1,0.23,0.29,0.29,0.12,0.13,0.26,0.11,0.24,0.13,0.21,0.17,0.2,0.3,0.29,0.23,0.27,0.25,0.21,0.11,0.15,0.1,0.26,0.23,0.12,0.23,0.26,0.2,0.17,0.22,0.26,0.25,0.12,0.19,0.24,0.18,0.15,0.27,0.14,0.14,0.29,0.29,0.26,0.15,0.29,0.24,0.24,0.23,0.26,0.29,0.22,0.13,0.18,0.24,0.14,0.24,0.24,0.17,0.26,0.29,0.11,0.14,0.26,0.28,0.26,0.28,0.27,0.23,0.26,0.23,0.19])
bin3 = 0.25 <= redshift
bin2 = np.logical_and(0.2 <= redshift, redshift < 0.25)
bin1 = np.logical_and(0.1 <= redshift, redshift < 0.2)
plt.ion()
labels = ("$z < 0.2$", "$0.2 \leq z < 0.25$", "$z \geq 0.25$")
colors = ('r', 'g', 'b')
for bin, label, co in zip( (bin1, bin2, bin3), labels, colors):
plt.plot(x[bin], y[bin], color=co, ls='none', marker='o', label=label)
plt.legend()
plt.show()

world map without rivers with matplotlib / Basemap?

Would there be a way to plot the borders of the continents with Basemap (or without Basemap, if there is some other way), without those annoying rivers coming along? Especially that piece of Kongo River, not even reaching the ocean, is disturbing.
EDIT: I intend to further plot data over the map, like in the Basemap gallery (and still have the borderlines of the continents drawn as black lines over the data, to give structure for the worldmap) so while the solution by Hooked below is nice, masterful even, it's not applicable for this purpose.
Image produced by:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8, 4.5))
plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.00)
m = Basemap(projection='robin',lon_0=0,resolution='c')
m.fillcontinents(color='gray',lake_color='white')
m.drawcoastlines()
plt.savefig('world.png',dpi=75)
For reasons like this i often avoid Basemap alltogether and read the shapefile in with OGR and convert them to a Matplotlib artist myself. Which is alot more work but also gives alot more flexibility.
Basemap has some very neat features like converting the coordinates of input data to your 'working projection'.
If you want to stick with Basemap, get a shapefile which doesnt contain the rivers. Natural Earth for example has a nice 'Land' shapefile in the physical section (download 'scale rank' data and uncompress). See http://www.naturalearthdata.com/downloads/10m-physical-vectors/
You can read the shapefile in with the m.readshapefile() method from Basemap. This allows you to get the Matplotlib Path vertices and codes in the projection coordinates which you can then convert into a new Path. Its a bit of a detour but it gives you all styling options from Matplotlib, most of which are not directly available via Basemap. Its a bit hackish, but i dont now another way while sticking to Basemap.
So:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
from matplotlib.collections import PathCollection
from matplotlib.path import Path
fig = plt.figure(figsize=(8, 4.5))
plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.00)
# MPL searches for ne_10m_land.shp in the directory 'D:\\ne_10m_land'
m = Basemap(projection='robin',lon_0=0,resolution='c')
shp_info = m.readshapefile('D:\\ne_10m_land', 'scalerank', drawbounds=True)
ax = plt.gca()
ax.cla()
paths = []
for line in shp_info[4]._paths:
paths.append(Path(line.vertices, codes=line.codes))
coll = PathCollection(paths, linewidths=0, facecolors='grey', zorder=2)
m = Basemap(projection='robin',lon_0=0,resolution='c')
# drawing something seems necessary to 'initiate' the map properly
m.drawcoastlines(color='white', zorder=0)
ax = plt.gca()
ax.add_collection(coll)
plt.savefig('world.png',dpi=75)
Gives:
How to remove "annoying" rivers:
If you want to post-process the image (instead of working with Basemap directly) you can remove bodies of water that don't connect to the ocean:
import pylab as plt
A = plt.imread("world.png")
import numpy as np
import scipy.ndimage as nd
import collections
# Get a counter of the greyscale colors
a = A[:,:,0]
colors = collections.Counter(a.ravel())
outside_and_water_color, land_color = colors.most_common(2)
# Find the contigous landmass
land_idx = a == land_color[0]
# Index these land masses
L = np.zeros(a.shape,dtype=int)
L[land_idx] = 1
L,mass_count = nd.measurements.label(L)
# Loop over the land masses and fill the "holes"
# (rivers without outlays)
L2 = np.zeros(a.shape,dtype=int)
L2[land_idx] = 1
L2 = nd.morphology.binary_fill_holes(L2)
# Remap onto original image
new_land = L2==1
A2 = A.copy()
c = [land_color[0],]*3 + [1,]
A2[new_land] = land_color[0]
# Plot results
plt.subplot(221)
plt.imshow(A)
plt.axis('off')
plt.subplot(222)
plt.axis('off')
B = A.copy()
B[land_idx] = [1,0,0,1]
plt.imshow(B)
plt.subplot(223)
L = L.astype(float)
L[L==0] = None
plt.axis('off')
plt.imshow(L)
plt.subplot(224)
plt.axis('off')
plt.imshow(A2)
plt.tight_layout() # Only with newer matplotlib
plt.show()
The first image is the original, the second identifies the land mass. The third is not needed but fun as it ID's each separate contiguous landmass. The fourth picture is what you want, the image with the "rivers" removed.
Following user1868739's example, I am able to select only the paths (for some lakes) that I want:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8, 4.5))
plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.00)
m = Basemap(resolution='c',projection='robin',lon_0=0)
m.fillcontinents(color='white',lake_color='white',zorder=2)
coasts = m.drawcoastlines(zorder=1,color='white',linewidth=0)
coasts_paths = coasts.get_paths()
ipolygons = range(83) + [84] # want Baikal, but not Tanganyika
# 80 = Superior+Michigan+Huron, 81 = Victoria, 82 = Aral, 83 = Tanganyika,
# 84 = Baikal, 85 = Great Bear, 86 = Great Slave, 87 = Nyasa, 88 = Erie
# 89 = Winnipeg, 90 = Ontario
for ipoly in ipolygons:
r = coasts_paths[ipoly]
# Convert into lon/lat vertices
polygon_vertices = [(vertex[0],vertex[1]) for (vertex,code) in
r.iter_segments(simplify=False)]
px = [polygon_vertices[i][0] for i in xrange(len(polygon_vertices))]
py = [polygon_vertices[i][2] for i in xrange(len(polygon_vertices))]
m.plot(px,py,linewidth=0.5,zorder=3,color='black')
plt.savefig('world2.png',dpi=100)
But this only works when using white background for the continents. If I change color to 'gray' in the following line, we see that other rivers and lakes are not filled with the same color as the continents are. (Also playing with area_thresh will not remove those rivers that are connected to ocean.)
m.fillcontinents(color='gray',lake_color='white',zorder=2)
The version with white background is adequate for further color-plotting all kind of land information over the continents, but a more elaborate solution would be needed, if one wants to retain the gray background for continents.
I frequently modify Basemap's drawcoastlines() to avoid those 'broken' rivers. I also modify drawcountries() for the sake of data source consistency.
Here is what I use in order to support the different resolutions available in Natural Earth data:
from mpl_toolkits.basemap import Basemap
class Basemap(Basemap):
""" Modify Basemap to use Natural Earth data instead of GSHHG data """
def drawcoastlines(self):
shapefile = 'data/naturalearth/coastline/ne_%sm_coastline' % \
{'l':110, 'm':50, 'h':10}[self.resolution]
self.readshapefile(shapefile, 'coastline', linewidth=1.)
def drawcountries(self):
shapefile = 'data/naturalearth/countries/ne_%sm_admin_0_countries' % \
{'l':110, 'm':50, 'h':10}[self.resolution]
self.readshapefile(shapefile, 'countries', linewidth=0.5)
m = Basemap(llcrnrlon=-90, llcrnrlat=-40, urcrnrlon=-30, urcrnrlat=+20,
resolution='l') # resolution = (l)ow | (m)edium | (h)igh
m.drawcoastlines()
m.drawcountries()
Here is the output:
Please note that by default Basemap uses resolution='c' (crude), which is not supported in the code shown.
If you're OK with plotting outlines rather than shapefiles, it's pretty easy to plot coastlines that you can get from wherever. I got my coastlines from the NOAA Coastline Extractor in MATLAB format:
http://www.ngdc.noaa.gov/mgg/shorelines/shorelines.html
To edit the coastlines, I converted to SVG, then edited them with Inkscape, then converted back to the lat/lon text file ("MATLAB" format).
All Python code is included below.
# ---------------------------------------------------------------
def plot_lines(mymap, lons, lats, **kwargs) :
"""Plots a custom coastline. This plots simple lines, not
ArcInfo-style SHAPE files.
Args:
lons: Longitude coordinates for line segments (degrees E)
lats: Latitude coordinates for line segments (degrees N)
Type Info:
len(lons) == len(lats)
A NaN in lons and lats signifies a new line segment.
See:
giss.noaa.drawcoastline_file()
"""
# Project onto the map
x, y = mymap(lons, lats)
# BUG workaround: Basemap projects our NaN's to 1e30.
x[x==1e30] = np.nan
y[y==1e30] = np.nan
# Plot projected line segments.
mymap.plot(x, y, **kwargs)
# Read "Matlab" format files from NOAA Coastline Extractor.
# See: http://www.ngdc.noaa.gov/mgg/coast/
lineRE=re.compile('(.*?)\s+(.*)')
def read_coastline(fname, take_every=1) :
nlines = 0
xdata = array.array('d')
ydata = array.array('d')
for line in file(fname) :
# if (nlines % 10000 == 0) :
# print 'nlines = %d' % (nlines,)
if (nlines % take_every == 0 or line[0:3] == 'nan') :
match = lineRE.match(line)
lon = float(match.group(1))
lat = float(match.group(2))
xdata.append(lon)
ydata.append(lat)
nlines = nlines + 1
return (np.array(xdata),np.array(ydata))
def drawcoastline_file(mymap, fname, **kwargs) :
"""Reads and plots a coastline file.
See:
giss.basemap.drawcoastline()
giss.basemap.read_coastline()
"""
lons, lats = read_coastline(fname, take_every=1)
return drawcoastline(mymap, lons, lats, **kwargs)
# =========================================================
# coastline2svg.py
#
import giss.io.noaa
import os
import numpy as np
import sys
svg_header = """<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
version="1.1"
width="360"
height="180"
id="svg2">
<defs
id="defs4" />
<metadata
id="metadata7">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title></dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<g
id="layer1">
"""
path_tpl = """
<path
d="%PATH%"
id="%PATH_ID%"
style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
"""
svg_footer = "</g></svg>"
# Set up paths
data_root = os.path.join(os.environ['HOME'], 'data')
#modelerc = giss.modele.read_modelerc()
#cmrun = modelerc['CMRUNDIR']
#savedisk = modelerc['SAVEDISK']
ifname = sys.argv[1]
ofname = ifname.replace('.dat', '.svg')
lons, lats = giss.io.noaa.read_coastline(ifname, 1)
out = open(ofname, 'w')
out.write(svg_header)
path_id = 1
points = []
for lon, lat in zip(lons, lats) :
if np.isnan(lon) or np.isnan(lat) :
# Process what we have
if len(points) > 2 :
out.write('\n<path d="')
out.write('m %f,%f L' % (points[0][0], points[0][1]))
for pt in points[1:] :
out.write(' %f,%f' % pt)
out.write('"\n id="path%d"\n' % (path_id))
# out.write('style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"')
out.write(' />\n')
path_id += 1
points = []
else :
lon += 180
lat = 180 - (lat + 90)
points.append((lon, lat))
out.write(svg_footer)
out.close()
# =============================================================
# svg2coastline.py
import os
import sys
import re
# Reads the output of Inkscape's "Plain SVG" format, outputs in NOAA MATLAB coastline format
mainRE = re.compile(r'\s*d=".*"')
lineRE = re.compile(r'\s*d="(m|M)\s*(.*?)"')
fname = sys.argv[1]
lons = []
lats = []
for line in open(fname, 'r') :
# Weed out extraneous lines in the SVG file
match = mainRE.match(line)
if match is None :
continue
match = lineRE.match(line)
# Stop if something is wrong
if match is None :
sys.stderr.write(line)
sys.exit(-1)
type = match.group(1)[0]
spairs = match.group(2).split(' ')
x = 0
y = 0
for spair in spairs :
if spair == 'L' :
type = 'M'
continue
(sdelx, sdely) = spair.split(',')
delx = float(sdelx)
dely = float(sdely)
if type == 'm' :
x += delx
y += dely
else :
x = delx
y = dely
lon = x - 180
lat = 90 - y
print '%f\t%f' % (lon, lat)
print 'nan\tnan'
Okay I think I have a partial solution.
The basic idea is that the paths used by drawcoastlines() are ordered by the size/area. Which means the first N paths are (for most applications) the main land masses and lakes and the later paths the smaller islands and rivers.
The issue is that the first N paths that you want will depend on the projection (e.g., global, polar, regional), if area_thresh has been applied and whether you want lakes or small islands etc. In other words, you will have to tweak this per application.
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
mp = 'cyl'
m = Basemap(resolution='c',projection=mp,lon_0=0,area_thresh=200000)
fill_color = '0.9'
# If you don't want lakes set lake_color to fill_color
m.fillcontinents(color=fill_color,lake_color='white')
# Draw the coastlines, with a thin line and same color as the continent fill.
coasts = m.drawcoastlines(zorder=100,color=fill_color,linewidth=0.5)
# Exact the paths from coasts
coasts_paths = coasts.get_paths()
# In order to see which paths you want to retain or discard you'll need to plot them one
# at a time noting those that you want etc.
for ipoly in xrange(len(coasts_paths)):
print ipoly
r = coasts_paths[ipoly]
# Convert into lon/lat vertices
polygon_vertices = [(vertex[0],vertex[1]) for (vertex,code) in
r.iter_segments(simplify=False)]
px = [polygon_vertices[i][0] for i in xrange(len(polygon_vertices))]
py = [polygon_vertices[i][1] for i in xrange(len(polygon_vertices))]
m.plot(px,py,'k-',linewidth=1)
plt.show()
Once you know the relevant ipoly to stop drawing (poly_stop) then you can do something like this...
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
mproj = ['nplaea','cyl']
mp = mproj[0]
if mp == 'nplaea':
m = Basemap(resolution='c',projection=mp,lon_0=0,boundinglat=30,area_thresh=200000,round=1)
poly_stop = 10
else:
m = Basemap(resolution='c',projection=mp,lon_0=0,area_thresh=200000)
poly_stop = 18
fill_color = '0.9'
# If you don't want lakes set lake_color to fill_color
m.fillcontinents(color=fill_color,lake_color='white')
# Draw the coastlines, with a thin line and same color as the continent fill.
coasts = m.drawcoastlines(zorder=100,color=fill_color,linewidth=0.5)
# Exact the paths from coasts
coasts_paths = coasts.get_paths()
# In order to see which paths you want to retain or discard you'll need to plot them one
# at a time noting those that you want etc.
for ipoly in xrange(len(coasts_paths)):
if ipoly > poly_stop: continue
r = coasts_paths[ipoly]
# Convert into lon/lat vertices
polygon_vertices = [(vertex[0],vertex[1]) for (vertex,code) in
r.iter_segments(simplify=False)]
px = [polygon_vertices[i][0] for i in xrange(len(polygon_vertices))]
py = [polygon_vertices[i][1] for i in xrange(len(polygon_vertices))]
m.plot(px,py,'k-',linewidth=1)
plt.show()
As per my comment to #sampo-smolander
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8, 4.5))
plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.00)
m = Basemap(resolution='c',projection='robin',lon_0=0)
m.fillcontinents(color='gray',lake_color='white',zorder=2)
coasts = m.drawcoastlines(zorder=1,color='white',linewidth=0)
coasts_paths = coasts.get_paths()
ipolygons = range(83) + [84]
for ipoly in xrange(len(coasts_paths)):
r = coasts_paths[ipoly]
# Convert into lon/lat vertices
polygon_vertices = [(vertex[0],vertex[1]) for (vertex,code) in
r.iter_segments(simplify=False)]
px = [polygon_vertices[i][0] for i in xrange(len(polygon_vertices))]
py = [polygon_vertices[i][1] for i in xrange(len(polygon_vertices))]
if ipoly in ipolygons:
m.plot(px,py,linewidth=0.5,zorder=3,color='black')
else:
m.plot(px,py,linewidth=0.5,zorder=4,color='grey')
plt.savefig('world2.png',dpi=100)

Categories