How to turn individual points into a kernel density map? - python

I am trying to recreate in Python a map I made in Tableau. I'm pretty sure it's called a kernel density map (just "density" map in Tableau). Each point is just a single point and doesn't correspond with any kind of value.
I have plotted my points on a map, but am unable to figure out how to give them the contour appearance in the Tableau map. I see some examples that include np.meshgrid or matplotlib's contourf function, but I'm unable to apply it to my data because I don't have a Z coordinate (from what I can tell). Below is what I have currently:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import timedelta, date
import matplotlib
from pandas import Series, DataFrame
import geopandas as gpd
from geopandas import GeoDataFrame
from shapely.geometry import Point, mapping
# import data
df = pd.read_csv('./Data/complete.csv', on_bad_lines='skip') # csv had some bad data, had to skip
# drop unneccessary columns
df = df[['datetime', 'latitude', 'longitude']]
# convert 'datetime' to YYYY-MM-DD
df['datetime'] = pd.to_datetime(df['datetime'], dayfirst=True)
# clean the data
# remove values that contain '/' and 'q'
df = df.drop(df[df.latitude.str.contains(r'[/q]')].index)
# convert lat/long to float
df['latitude'] = df.latitude.astype('float')
df['longitude'] = df.longitude.astype('float')
# create GDF from lat/long
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude)).set_crs('EPSG:4326')
# import shapefile
us_map = gpd.read_file(r'./Data/USA_States_(Generalized)/USA_States_Generalized.shp')
#remove AK and HI
us_map = us_map[~us_map['STATE_NAME'].isin(['Alaska', 'Hawaii'])]
# plotting only 2012 sightings
sightings2012 = df[(df['datetime'] > '12/31/2011') & (df['datetime'] < '01/01/2013')]
# create GDF for 2012 sightings
gdf2012 = gpd.GeoDataFrame(sightings2012,
geometry=gpd.points_from_xy(sightings2012.longitude,
sightings2012.latitude)).set_crs('EPSG:4326')
# clip the sightings data
us_sightings_2012 = gpd.clip(gdf2012, us_map)
cmap = matplotlib.cm.get_cmap('plasma')
# plot
fig, ax = plt.subplots(1, 1)
us_map.plot(ax=ax)
us_sightings_2012.plot(ax=ax, cmap=cmap)
plt.show()
And here is my output:

Related

Folium HeatMapWithTime html file generated is blank

I created a self-contained code to create a HeatMapWithTime map but it shows up as a blank file. This code is run on Jupyter and the output is a 14KB file and I've tried to open it in Chrome, Safari, Firefox but it is still blank.
import folium
import pandas as pd
import numpy as np
from folium.plugins import HeatMapWithTime
# Generate dummy data
latitudes = np.random.uniform(low=45.523, high=45.524, size=50)
longitudes = np.random.uniform(low=-122.675, high=-122.676, size=50)
times = np.sort(np.random.uniform(low=1580000000, high=1600000000, size=50))
data = {'latitude': latitudes, 'longitude': longitudes, 'time': times}
# Create a pandas dataframe from the dummy data
df = pd.DataFrame(data)
df['time'] = pd.to_datetime(df['time'], unit='s')
# Create a base map
map = folium.Map(location=[45.523, -122.675], zoom_start=13)
# Create a heat map with timeline
HeatMapWithTime(data=df[['latitude', 'longitude', 'time']].values.tolist(),
index=df['time'].dt.strftime("%Y-%m-%d %H:%M:%S"),
auto_play=True,
max_opacity=0.8).add_to(map)
# Save the map to an html file
map.save("heatmap_with_timeline.html")
Folium version: 0.14.0
Python version: 3.9.12
To begin with, the target data for the heatmap is time-series data in date format. The sample data itself was raw data, but it was converted to date format. Also, I think the index of the time animation of the heatmap also needs to be in list format. Finally, the sample data is a latitude/longitude and heatmap value for one time series. Since this folium heatmap is a densitiy heatmap, multiple groups of data may be necessary. To create the data to draw the heatmap, utilizing your sample data, I have added an array of 50 latitude/longitude and heatmap values for each time series index in a loop process for 50 indexes.
For data structures and examples, please refer to the following references. HeatMapWithTime Plugin
import folium
import pandas as pd
import numpy as np
from folium.plugins import HeatMapWithTime
# Generate dummy data
times = np.sort(np.random.uniform(low=1580000000, high=1600000000, size=50))
data = []
for i in range(len(times)):
latitudes = np.random.uniform(low=45.423, high=45.524, size=50)
longitudes = np.random.uniform(low=-122.575, high=-122.676, size=50)
value = np.random.uniform(0.0, 20.0, 50)
row = []
for lat, lon ,v in zip(latitudes,longitudes,value):
row.append([lat, lon, v])
data.append(row)
index_time = pd.to_datetime(times, unit='s')
index_time = index_time.strftime("%Y-%m-%d %H:%M:%S").tolist()
# Create a base map
m = folium.Map(location=[45.523, -122.675], tiles="stamentoner", zoom_start=11)
# Create a heat map with timeline
hm = HeatMapWithTime(data,
index=index_time,
auto_play=True,
max_opacity=0.8)
hm.add_to(m)
# Save the map to an html file
#map.save("heatmap_with_timeline.html")
m
This should help.
import pandas as pd
import folium
from folium.plugins import HeatMap
#for_map = pd.read_csv('campaign_contributions_for_map.tsv', sep='\t')
df = pd.read_csv('C:\\your_path\\business.csv')
df.head(3)
max_amount = float(df['stars'].max())
hmap = folium.Map(location=[42.5, -75.5], zoom_start=7, )
hm_wide = HeatMap( list(zip(df.latitude.values, df.longitude.values, df.stars.values)),
min_opacity=0.2,
max_val=max_amount,
radius=17, blur=15,
max_zoom=1,
)
hmap.add_child(hm_wide)
import pandas as pd
import gmplot
import matplotlib.pyplot as plt
import folium
from folium import plugins
import seaborn as sns
df = pd.read_csv('C:\\your_path\\lat_lon.csv')
m = folium.Map([40.7379601, -73.9666422], zoom_start=11)
m
X = df[['longitude', 'latitude', 'LOT']].copy()
# mark each station as a point
for index, row in X.iterrows():
folium.CircleMarker([row['latitude'], row['longitude']],
radius=15,
popup=row['LOT'],
fill_color="#3db7e4", # divvy color
).add_to(m)
# convert to (n, 2) nd-array format for heatmap
stationArr = df[['latitude', 'longitude']].to_numpy()
# plot heatmap
m.add_child(plugins.HeatMap(stationArr, radius=15))
m
https://github.com/ASH-WICUS/Notebooks/blob/master/Plotting%20Longitude%20and%20Latitude%20Coordinates%20using%20Folium%20CircleMarker.ipynb
https://github.com/ASH-WICUS/Notebooks/blob/master/Plotting%20Longitude%20and%20Latitude%20to%20Visualize%20Spatial%20Data%20for%20NYC%20Taxis.ipynb

How to add a an additional point location while plotting geopandas dataframe using matplotlib

I am using the following code to plot an additional coordinate/point on top of a matplotlib plot of a geopandas dataframe. However, as the image indicates the point is not overlapping the choropleth - it should, since the latitude & longitude lies within the geographic location for which the data has been obtained from census. Please advise.
from cenpy import products
import pandas as pd
import matplotlib.pyplot as plt
import geopandas
%matplotlib inline
tustin = products.ACS(2018).from_county('Orange County, CA', level='tract',
variables=['B23025_005E', 'B23025_003E'])
tustin['pct_unemployed'] = tustin.B23025_005E / tustin.B23025_003E * 100
print(tustin.crs)
# additional data co-ordinate
lat = 3374569.5
lon = -11782886.0
df = pd.DataFrame(
{'Place': ['X'],
'Latitude': [lat],
'Longitude': [lon]})
gdf = geopandas.GeoDataFrame(
df, geometry=geopandas.points_from_xy(df.Longitude, df.Latitude))
# setting the same crs as the main geopandas dataframe
gdf.crs = {'init': 'epsg:3857'}
# plot the first dataframe 'tustin' as a choropleth
f, ax = plt.subplots(1,1,figsize=(20,20))
tustin.dropna(subset=['pct_unemployed'], axis=0).plot('pct_unemployed', ax=ax, cmap='plasma')
ax.set_facecolor('k')
gdf.plot(ax=ax, color='red')
#ax.plot(-13144890.450, 3992372.350, "ro")
plt.show()
enter image description here

How to create a Boxplot with Timestamp using Matplotlib and Seaborn?

I have been trying to get a boxplot with each box representing an emotion over a period of time.
The data frame used to plot this contains timestamp and emotion name. I have tried converting the timestamp into a string first and then to datetime and finally to int64. This resulted in the gaps between x labels as seen in the plot. I have tried the same without converting to int64, but the matplotlib doesn't seem to allow the dates in the plot.
I'm attaching the code I have used here:
import matplotlib as mpl
import matplotlib.pyplot as plt
plt.style.use('classic')
%matplotlib qt
import pandas as pd
import numpy as np
from datetime import datetime
import seaborn as sns
data = pd.read_csv("TX-governor-sentiment.csv")
## check data types
data.dtypes
# drop rows with all missing values
data = data.dropna(how='all')
## transforming the timestamp column
#convert from obj type to string then to date type
data['timestamp2'] = data['timestamp']
data['timestamp2'] = pd.to_datetime(data['timestamp2'].astype(str), format='%m/%d/%Y %H:%M')
# convert to number format with the following logic:
# yyyymmddhourmin --> this allows us to treat dates as a continuous variable
data['timestamp2'] = data['timestamp2'].dt.strftime('%Y%m%d%H%M')
data['timestamp2'] = data['timestamp2'].astype('int64')
print (data[['timestamp','timestamp2']])
#data transformation for data from Orange
df = pd.DataFrame(columns=('timestamp', 'emotion'))
for index, row in data.iterrows():
if row['sentiment'] == 0:
df.loc[index] = [row['timestamp2'], 'Neutral']
else:
df.loc[index] = [row['timestamp2'], row['Emotion']]
# Plot using Seaborn & Matplotlib
#convert timestamp in case it's not in number format
df['timestamp'] = df['timestamp'].astype('int64')
fig = plt.figure(figsize=(10,10))
#colors = {"Neutral": "grey", "Joy": "pink", "Surprise":"blue"}
#visualize as boxplot
plot_ = sns.boxplot(x="timestamp", y="emotion", data=df, width=0.5,whis=np.inf);
#add data point on top
plot_ = sns.stripplot(x="timestamp", y="emotion", data=df, alpha=0.8, color="black");
fig.canvas.draw()
#modify ticks and labels
plt.xlim([202003010000,202004120000])
plt.xticks([202003010000, 202003150000, 202003290000, 202004120000], ['2020/03/01', '2020/03/15', '2020/03/29', '2020/04/12'])
#add colors
for patch in plot_.artists:
r, g, b, a = patch.get_facecolor()
patch.set_facecolor((r, g, b, .3))
Please let me know how I can overcome this problem of gaps in the boxplot. Thank you!

Geopandas data not plotting correctly

I don't have very much experience with GeoPandas at all, so I am a little lost. I am trying to plot this data
jupyterNotebook dataframe image
I have followed many references on the GeoPandas website, read through blog posts, and this stack overflow post. All of them tell me to do the same thing, but it seems to still now be working.
Ploting data in geopandas
When I try to plot this data, it comes out this like:
enter image description here
All I am trying to do is plot points from this csv file that has latitude and longitude data onto a map (eventually a map that I have loaded from an .shp file).
Anyways, here is the code I have written so far:
import csv
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import descartes
from shapely.geometry import Point, Polygon
#Load in the CSV Bike Station Location Data
df = pd.read_csv('HRSQ12020.csv')
#combine the latitude and longitude to make coordinates
df['coordinates'] = df[['Longitude', 'Latitude']].values.tolist()
# Change the coordinates to a geoPoint
df['coordinates'] = df['coordinates'].apply(Point)
df
#convert df to a geodf
df = gpd.GeoDataFrame(df, geometry='coordinates')
df
#plot the geodf
df.plot(figsize=(20,10));
Any ideas what is wrong? I check all 100 coordinates and they all seem to be fine. Any suggestions would be great! Thanks!
It's likely to be a problem of projection system. A good thing to do is defining immediately the crs when creating a Geopandas object. If you try,
df = gpd.GeoDataFrame(df, geometry='coordinates', crs = 4326)
maybe you will be able to see your points. I put "4326" because your x-y coordinates look like GPS coordinates which are WSG84 standards (crs code: 4326). Change to the relevent crs code if it's not the good one.
Those responses above are helpful. This also turned out to be another solution as lingo suggested to set the crs. I was getting an error, but this worked out when I ignored the error. Here is my code that ended up working.
import csv
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import descartes
from shapely.geometry import Point, Polygon
#Load in the CSV Bike Station Location Data
df = pd.read_csv('HRSQ12020.csv')
#combine the latitude and longitude to make coordinates
df['coordinates'] = df[['Longitude', 'Latitude']].values.tolist()
# Change the coordinates to a geoPoint
df['coordinates'] = df['coordinates'].apply(Point)
df.head()
#fixing wrong negative value for Latitude
df.loc[df["Latitude"] == df["Latitude"].min()]
df.at[80, 'Latitude'] = 40.467715
#count the numner of racks at each station
rackTot = 0
for index, row in df.iterrows():
rackTot += row['NumRacks']
crs = {'init' :'epsg:4326'}
geometry = [Point(xy) for xy in zip(df.Longitude, df.Latitude)]
geobikes = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
geobikes.head()
#plot the geodf
#not working for some reason, fix later
geobikes.plot()
When I run your code with the first four rows of coords, I get what you'd expect. From the extent of your plot, it looks like you might have some negative latitude values. Can you do df['Latitude'].min() to check?
import csv
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from shapely.geometry import Point, Polygon
df = pd.DataFrame({'Latitude' :[40.441326, 40.440877, 40.439030, 40.437200],
'Longitude' :[-80.004679, -80.003080, -80.001860, -80.000375]})
df['coordinates'] = df[['Longitude', 'Latitude']].values.tolist()
# Change the coordinates to a geoPoint
df['coordinates'] = df['coordinates'].apply(Point)
df
#convert df to a geodf
df = gpd.GeoDataFrame(df, geometry='coordinates')
df
#plot the geodf
df.plot(figsize=(20,10));
You can also use plt.subplots() and then set xlim and ylim for your data.
df = pd.DataFrame({'Latitude' :[40.441326, 41.440877, 42.439030, 43.437200],
'Longitude' :[-78.004679, -79.003080, -80.001860, -81.000375]})
df['coordinates'] = df[['Longitude', 'Latitude']].values.tolist()
# Change the coordinates to a geoPoint
df['coordinates'] = df['coordinates'].apply(Point)
df
#convert df to a geodf
df = gpd.GeoDataFrame(df, geometry='coordinates')
print(type(df))
#plot the geodf
fig, ax = plt.subplots(figsize=(14,6))
df.plot(ax = ax)
xlim = ([df.total_bounds[0] - 1, df.total_bounds[2] + 1])
ylim = ([df.total_bounds[1] - 1, df.total_bounds[3] + 1])
# you can also pass in the xlim or ylim vars defined above
ax.set_xlim([-82, -77])
ax.set_ylim([40, 42])
plt.show()

Uncertain why trendline is not appearing on matplotlib scatterplot

I am trying to plot a trendline for a matplotlib scatterplot and am uncertain why the trendline is not appearing. What should I change in my code to make the trendline appear? Event is a categorical data type.
I've followed what most other stackoverflow questions suggest about plotting a trendline, but am uncertain why my trendline is not appearing.
#import libraries
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pandas.plotting import register_matplotlib_converters
#register datetime converters
register_matplotlib_converters()
#read dataset using pandas
dataset = pd.read_csv("UsrNonCallCDCEvents_CDCEventType.csv")
#convert date to datetime type
dataset['Interval'] = pd.to_datetime(dataset['Interval'])
#convert other columns to numeric type
for cols in list(dataset):
if cols != 'Interval' and cols != 'CDCEventType':
dataset[cols] = pd.to_numeric(dataset[cols])
#create pivot of dataset
pivot_dataset = dataset.pivot(index='Interval',columns='CDCEventType',values='AvgWeight(B)')
#create scatterplot with trendline
x = pivot_dataset.index.values.astype('float64')
y = pivot_dataset['J-STD-025']
plt.scatter(x,y)
z = np.polyfit(x,y,1)
p = np.poly1d(z)
plt.plot(x,p(x),"r--")
plt.show()
This is the graph currently being output. I am trying to get this same graph, but with a trendline: https://imgur.com/a/o18a5Y3
It's also fine that x axis is not showing dates
A snippet of my dataframe looks like this: https://imgur.com/a/xJAcgEI
I've painted out the irrelvant column names

Categories