Country labels on Basemap - python

I would like to plot a trajectory on a Basemap, and have country labels (names) shown as an overlay.
Here is the current code and the map it produces:
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
path = "path\\to\\data"
animal_data = pd.DataFrame.from_csv(path, header=None)
animal_data.columns = ["date", "time", "gps_lat", "gps_long"]
# data cleaning omitted for clarity
params = {
'projection':'merc',
'lat_0':animal_data.gps_lat.mean(),
'lon_0':animal_data.gps_long.mean(),
'resolution':'h',
'area_thresh':0.1,
'llcrnrlon':animal_data.gps_long.min()-10,
'llcrnrlat':animal_data.gps_lat.min()-10,
'urcrnrlon':animal_data.gps_long.max()+10,
'urcrnrlat':animal_data.gps_lat.max()+10
}
map = Basemap(**params)
map.drawcoastlines()
map.drawcountries()
map.fillcontinents(color = 'coral')
map.drawmapboundary()
x, y = map(animal_data.gps_long.values, animal_data.gps_lat.values)
map.plot(x, y, 'b-', linewidth=1)
plt.show()
This results in the map:
This is a map of the trajectory of a migrating bird. While this is a very nice map (!), I need country-name labels so it is easy to determine the countries the bird is flying through.
Is there a straight-forward way of adding the country names?

My solution relies on an external data file that may or may not be available in the future. However, similar data can be found elsewhere, so that should not be too much of a problem.
First, the code for printing the country-name labels:
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
class MyBasemap(Basemap):
def printcountries(self, d=3, max_len=12):
data = pd.io.parsers.read_csv("http://opengeocode.org/cude/download.php?file=/home/fashions/public_html/opengeocode.org/download/cow.txt",
sep=";", skiprows=28 )
data = data[(data.latitude > self.llcrnrlat+d) & (data.latitude < self.urcrnrlat-d) & (data.longitude > self.llcrnrlon+d) & (data.longitude < self.urcrnrlon-d)]
for ix, country in data.iterrows():
plt.text(*self(country.longitude, country.latitude), s=country.BGN_name[:max_len])
All this does is to download a country-location database from here, then select countries that are currently on the map, and label them.
The complete code:
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
class MyBasemap(Basemap):
def printcountries(self, d=3, max_len=12):
data = pd.io.parsers.read_csv("http://opengeocode.org/cude/download.php?file=/home/fashions/public_html/opengeocode.org/download/cow.txt",
sep=";", skiprows=28 )
data = data[(data.latitude > self.llcrnrlat+d) & (data.latitude < self.urcrnrlat-d) & (data.longitude > self.llcrnrlon+d) & (data.longitude < self.urcrnrlon-d)]
for ix, country in data.iterrows():
plt.text(*self(country.longitude, country.latitude), s=country.BGN_name[:max_len])
path = "path\\to\\data"
animal_data = pd.DataFrame.from_csv(path, header=None)
animal_data.columns = ["date", "time", "gps_lat", "gps_long"]
params = {
'projection':'merc',
'lat_0':animal_data.gps_lat.mean(),
'lon_0':animal_data.gps_long.mean(),
'resolution':'h',
'area_thresh':0.1,
'llcrnrlon':animal_data.gps_long.min()-10,
'llcrnrlat':animal_data.gps_lat.min()-10,
'urcrnrlon':animal_data.gps_long.max()+10,
'urcrnrlat':animal_data.gps_lat.max()+10
}
plt.figure()
map = MyBasemap(**params)
map.drawcoastlines()
map.fillcontinents(color = 'coral')
map.drawmapboundary()
map.drawcountries()
map.printcountries()
x, y = map(animal_data.gps_long.values, animal_data.gps_lat.values)
map.plot(x, y, 'b-', linewidth=1)
plt.show()
and finally, the result:
Clearly this isn't as carefully labeled as one might hope, and some heuristics regarding country size, name length and map size should be implemented to make this perfect, but this is a good starting point.

Related

How to name each city in a matplotlib/geopandas map from csv file?

I have a csv file containing the cities in senegal, theres a city column, i have already used the long and lat columns to plot the points, but now id like to have each point have its city name next to it, and maybe later more information. how can i do this with matplotlib? thank you.
import geopandas as gpd
import matplotlib.pyplot as plt
import os
import pandas as pd
file = os.path.join("senegal_administrative","senegal_administrative.shp")
cities_file = os.path.join("senegal_administrative","sn.csv")
cities = pd.read_csv(cities_file)
senegal = gpd.read_file(file)
## THE MAP IS PLOTED HERE
axis = senegal.plot(color="lightblue",edgecolor = "black",figsize = (20,20))
def_geo = gpd.GeoDataFrame(cities,geometry = gpd.points_from_xy(cities.lng,cities.lat))
print(def_geo)
## I PLOT THE POINTS HERE
def_geo.plot(ax = axis,color = "black")
plt.show()
heres a dropbox link if you need the files, i really appreciate it : https://www.dropbox.com/sh/dr54pbc9a5zc5ke/AADMxnYHe4maAnwKArCsh1m8a?dl=0
To annotate the map, you can use .annotate() function.
Here is the relevant parts of the code:
axis = senegal.plot(color="lightblue",edgecolor = "black",figsize = (12,12))
for idx,dat in cities.iterrows():
#print(dat.city, dat.lng, dat.lat)
axis.scatter(dat.lng, dat.lat, s=10, color='red')
axis.annotate(dat.city, (dat.lng, dat.lat))
Plot will be similar to this:

LAS files - Python

I'm pretty sure this is a very menial question about LAS files, but I wasn't entirely sure how to google this. For context, I'm trying to create a plot given the information in a LAS file.
import lasio as ls
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
well = ls.read(r'1051325649.las')
df = well.df()
fig = plt.subplots(figsize=(10,10))
#Set up the plot axes
ax1 = plt.subplot2grid((1,3), (0,0), rowspan=1, colspan = 1)
ax2 = plt.subplot2grid((1,3), (0,1), rowspan=1, colspan = 1)
ax3 = plt.subplot2grid((1,3), (0,2), rowspan=1, colspan = 1)
ax1.plot("GR", "DEPT", data = df, color = "green") # Call the data from the well dataframe
ax1.set_title("Gamma") # Assign a track title
ax1.set_xlim(0, 200) # Change the limits for the curve being plotted
ax1.set_ylim(400, 1000) # Set the depth range
ax1.grid() # Display the grid
The LAS file pretty much looks like this where I want to create a plot where the far left column "DEPT" should be the X-axis. However, the "DEPT" or depth column isn't able to be made into a format to allow for me to plot it. **Note: there is a GR column on the right not in this picture, so don't worry. Any tips would help greatly.
Short answer:
plt.plot expects that both "GR" and "DEPT" are columns in df, however the latter (DEPT) is not a column, it is the index. You can solve it by converting the index in df to a column:
df2 = df.reset_index()
ax1.plot("GR", "DEPT", data = df2, color = "green")
When reading .las files using lasio library and converting them to pandas dataframe, it automatically sets DEPT as the index for the dataframe.
There are two solutions for this problem:
Use the data as-is:
import matplotlib.pyplot as plt
import lasio
well = lasio.read('filename.las')
well_df = well.df()
plt.plot(well_df.GR, well_df.index)
And well_df.index will be the DEPT values.
Reset the index and use DEPT as a column
import matplotlib.pyplot as plt
import lasio
well = lasio.read('filename.las')
well_df = well.df()
well_df = well_df.reset_index()
plt.plot(well_df.GR, well_df.DEPT)

Annotating scatterplot points with DF column text Matplotlib

I'm fairly new to Python and I'm struggling annotating plots at the minute.
I've come from R so I'm used to the ease of being able to annotate scatterplot points with minimum code.
Code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
url = ('https://fbref.com/en/share/nXtrf')
df = pd.read_html(url)[0]
df = df[['Unnamed: 1_level_0', 'Unnamed: 2_level_0', 'Play', 'Perf']].copy()
df.columns = df.columns.droplevel()
df = df[['Player','Squad','Min','SoTA','Saves']]
df = df.drop([25])
df['Min'] = pd.to_numeric(df['Min'])
df['SoTA'] = pd.to_numeric(df['SoTA'])
df['Saves'] = pd.to_numeric(df['Saves'])
df['Min'] = df[df['Min'] > 1600]['Min']
df = df.dropna()
df.plot(x = 'Saves', y = 'SoTA', kind = "scatter")
I've tried numerous ways to annotate this plot. I'd like the points to be annotated with corresponding data from 'Player' column.
I've tried using a label_point function that I've found while trying to find a work around buy I keep getting Key Error 0 on most ways I try.
Any assistance would be great. Thanks.
You could loop through both columns and add a text for each entry. Note that you need to save the ax returned by df.plot(...).
ax = df.plot(x='Saves', y='SoTA', kind="scatter")
for x, y, player in zip(df['Saves'], df['SoTA'], df['Player']):
ax.text(x, y, f'{player}', ha='left', va='bottom')
xmin, xmax = ax.get_xlim()
ax.set_xlim(xmin, xmax + 0.15 * (xmax - xmin)) # some more margin to fit the texts
An alternative is to use the mplcursors library to show an annotation while hovering (or after a click):
import mplcursors
mplcursors.cursor(hover=True)

how to rotate a seaborn lineplot

How can I rotate a seaborn.lineplot so that the result will be as a function of y and not a function of x.
For example, this code:
import pandas as pd
import seaborn as sns
df = pd.DataFrame([[0,1],[0,2],[0,1.5],[1,1],[1,5]], columns=['group','val'])
sns.lineplot(x='group',y='val',data=df)
Create this figure:
But is there a way to rotate the figure in 90° ? so that in the X we will have "val" and in Y we will have "group" and the std will go from left to right and not from bottom to up.
Thanks
EDIT: I've opened a ticket in seaborn to ask for this feature: https://github.com/mwaskom/seaborn/issues/1661
Per the seaborn docs on lineplot, the dataframe passed to data must be
Tidy (“long-form”) dataframe where each column is a variable and each row is an observation.
Which seems to imply there is no way to force the axes to switch, even by manipulating the data. If there is a way to do that I haven't found it - I'm sure there is a more elegant way to do this, but one way you could go about it is to do it by hand so to speak. Something like this would do the trick
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
df = pd.DataFrame([[0,1],[0,2],[0,1.5],[1,1],[1,5]], columns=['group','val'])
group = df['group'].tolist()
val = df['val'].tolist()
yl = list()
yu = list()
avg = list()
ii = 0
while ii < len(group): #Loop through all the groups
g = group[ii]
y0 = val[ii]
y1 = val[ii]
s = 0
jj = ii
while (jj < len(group) and group[jj] == g):
s += val[jj]
#This takes the min and max, but could easily take the standard deviation
if val[jj] > y1:
y1 = val[jj]
if val[jj] < y0:
y0 = val[jj]
jj += 1
avg.append(s/(jj - ii))
ii = jj
yl.append(y0)
yu.append(y1)
x = np.linspace(min(group), max(group), len(yl))
plt.ylabel(df.columns[0])
plt.xlabel(df.columns[1])
plt.plot(avg, x, color="#5a9edd", linestyle="-", linewidth=1.5)
plt.fill_betweenx(x, yl, yu, alpha=0.3)
This will give you the following plot:
For brevity this uses the minimum and maximum from each group to give the error band, but that can be easily changed to standard error or standard deviation as needed.
Consider what you'd do if not using seaborn. You would calculate the mean and standard deviation and plot those as a function of the group. Now it is quite straight forward to exchange x and y for a plot(x,y): plot(y,x). For the filled region, you can use fill_betweenx instead of fill_between.
Below the two cases for comparisson.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame([[0,1],[0,2],[0,1.5],[1,1],[1,5]], columns=['group','val'])
mean = df.groupby("group").mean()
std = df.groupby("group").std()
fig, (ax, ax2) = plt.subplots(ncols=2)
ax.plot(mean.index, mean["val"].values)
ax.fill_between(mean.index, (mean-std)["val"].values, (mean+std)["val"].values, alpha=.5)
ax.set(xlabel="group", ylabel="val")
ax2.plot(mean["val"].values, mean.index)
ax2.fill_betweenx(mean.index, (mean-std)["val"].values, (mean+std)["val"].values, alpha=.5)
ax2.set(ylabel="group", xlabel="val")
fig.tight_layout()
plt.show()

Plotting pandas dataframe with two groups

I'm using Pandas and matplotlib to try to replicate this graph from tableau:
So far, I have this code:
group = df.groupby(["Region","Rep"]).sum()
total_price = group["Total Price"].groupby(level=0, group_keys=False)
total_price.nlargest(5).plot(kind="bar")
Which produces this graph:
It correctly groups the data, but is it possible to get it grouped similar to how Tableau shows it?
You can create some lines and labels using the respective matplotlib methods (ax.text and ax.axhline).
import pandas as pd
import numpy as np; np.random.seed(5)
import matplotlib.pyplot as plt
a = ["West"]*25+ ["Central"]*10+ ["East"]*10
b = ["Mattz","McDon","Jeffs","Warf","Utter"]*5 + ["Susanne","Lokomop"]*5 + ["Richie","Florence"]*5
c = np.random.randint(5,55, size=len(a))
df=pd.DataFrame({"Region":a, "Rep":b, "Total Price":c})
group = df.groupby(["Region","Rep"]).sum()
total_price = group["Total Price"].groupby(level=0, group_keys=False)
gtp = total_price.nlargest(5)
ax = gtp.plot(kind="bar")
#draw lines and titles
count = gtp.groupby("Region").count()
cum = np.cumsum(count)
for i in range(len(count)):
title = count.index.values[i]
ax.axvline(cum[i]-.5, lw=0.8, color="k")
ax.text(cum[i]-(count[i]+1)/2., 1.02, title, ha="center",
transform=ax.get_xaxis_transform())
# shorten xticklabels
ax.set_xticklabels([l.get_text().split(", ")[1][:-1] for l in ax.get_xticklabels()])
plt.show()

Categories