Related
I have a dataset from an aircraft flight and I am trying to plot the position of the plane (longitude x latitude) then color that line by the altitude of the plan at those coordinates. My code looks like this:
lat_data = np.array( [ 39.916294, 39.87139 , 39.8005 , 39.70801 , 39.64645 , 39.58172 ,
39.537853, 39.55141 , 39.6787 , 39.796528, 39.91702 , 40.008347,
40.09513 , 40.144157, 40.090584, 39.96447 , 39.838924, 39.712112,
39.597103, 39.488377, 39.499096, 39.99354 , 40.112175, 39.77281 ,
39.641186, 39.51512 , 39.538853, 39.882736, 39.90413 , 39.811333,
39.73279 , 39.65676 , 39.584026, 39.5484 , 39.54484 , 39.629486,
39.96 , 40.07143 , 40.187405, 40.304718, 40.423153, 40.549305,
40.673313, 40.794548, 40.74402 , 40.755558, 40.770306, 40.73574 ,
40.795086, 40.774628] )
long_data = np.array( [ -105.13034 , -105.144104, -105.01132 , -104.92708 , -104.78505 ,
-104.6449 , -104.49255 , -104.36578 , -104.32623 , -104.31285 ,
-104.32199 , -104.41774 , -104.527435, -104.673935, -104.81152 ,
-104.82184 , -104.81882 , -104.81314 , -104.74657 , -104.78108 ,
-104.93442 , -104.98039 , -105.0168 , -105.04967 , -105.056564,
-105.03639 , -105.13429 , -105.05214 , -105.17435 , -105.070526,
-104.93587 , -104.80029 , -104.65973 , -104.50339 , -104.33972 ,
-104.21634 , -103.96216 , -103.84808 , -103.72534 , -103.60455 ,
-103.48926 , -103.376495, -103.25937 , -103.10858 , -103.08469 ,
-103.24878 , -103.4169 , -103.53073 , -103.23694 , -103.41254 ] )
altitude_data = np.array( [1.6957603e+00, 1.9788861e+00, 1.8547169e+00, 1.8768315e+00,
1.9633590e+00, 2.0504241e+00, 2.1115899e+00, 2.1085002e+00,
1.8621666e+00, 1.8893014e+00, 1.8268168e+00, 1.7574688e+00,
1.7666028e+00, 1.7682364e+00, 1.8120643e+00, 1.7637002e+00,
1.8054264e+00, 1.9149075e+00, 2.0173934e+00, 2.0875392e+00,
2.1486480e+00, 1.8622510e+00, 1.7937366e+00, 1.8748144e+00,
1.9063262e+00, 1.9397615e+00, 2.1261981e+00, 2.0180094e+00,
1.9827688e+00, -9.9999990e+06, 1.8933343e+00, 1.9615903e+00,
2.1000245e+00, 2.1989927e+00, 2.3200927e+00, -9.9999990e+06,
4.0542388e+00, 4.0591464e+00, 4.0597038e+00, 4.3395977e+00,
4.6702847e+00, 5.0433373e+00, 5.2824092e+00, 5.2813010e+00,
5.2735353e+00, 5.2784677e+00, 5.2784038e+00, 5.2795196e+00,
4.9482727e+00, 4.2531524e+00] )
import matplotlib as plt
fig, ax1 = plt.subplots( figsize = ( 10, 10 ) )
ax1.plot( long_data, lat_data, alpha = .4)
ax1.scatter( long_data, lat_data, c = altitude_data )
plt.show()
Which gives us this track:
.
Is there a way to consolidate the data into one line that plots the location of the aircraft and adjusts the color for the elevation?
While plotting a line and a scatter together works, it does not look very good when I put in all the data (n = 2400 ). Thanks!
It looks like if you want to use a Line2D object, you're stuck with a single color per object. As a workaround, you could plot each line segment as a set of (first order linearly) interpolated segments and color each of those by its corresponding infinitesimal value.
It looks like this functionality is contained in a LineCollection instance, however I just went for a more quick and dirty approach below.
For extra credit, since we're talking about geospatial data here, why not use cartopy to plot your data? That way you can have a "basemap" which gives you some reference. After all, if it's worth plotting, it's worth plotting beautifully.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import cartopy
import cartopy.crs as ccrs
import numpy as np
import scipy
from scipy import interpolate
import matplotlib
#matplotlib.use('Agg')
import matplotlib.pyplot as plt
### clean data
filter_inds = np.where(np.abs(altitude_data) < 100)
lat_data = lat_data[filter_inds]
long_data = long_data[filter_inds]
altitude_data = altitude_data[filter_inds]
# =============== plot
plt.close('all')
plt.style.use('dark_background') ## 'default'
fig = plt.figure(figsize=(1500/100, 1000/100))
#ax1 = plt.gca()
lon_center = np.mean(long_data); lat_center = np.mean(lat_data)
ax1 = plt.axes(projection=ccrs.Orthographic(central_longitude=lon_center, central_latitude=lat_center))
ax1.set_aspect('equal')
scale = 3 ### 'zoom' with smaller numbers
ax1.set_extent((lon_center-((0.9*scale)), lon_center+((0.7*scale)), lat_center-(0.5*scale), lat_center+(0.5*scale)), crs=ccrs.PlateCarree())
### states
ax1.add_feature(cartopy.feature.NaturalEarthFeature(category='cultural', scale='10m', facecolor='none', name='admin_1_states_provinces_shp'), zorder=2, linewidth=1.0, edgecolor='w')
ax1.add_feature(cartopy.feature.RIVERS.with_scale('10m'), zorder=2, linewidth=1.0, edgecolor='lightblue')
ax1.add_feature(cartopy.feature.LAKES.with_scale('10m'), zorder=2, linewidth=1.0, edgecolor='gray')
### download counties from https://prd-tnm.s3.amazonaws.com/StagedProducts/Small-scale/data/Boundaries/countyl010g_shp_nt00964.tar.gz
### untar with : tar -xzf countyl010g_shp_nt00964.tar.gz
try:
reader = cartopy.io.shapereader.Reader('countyl010g.shp')
counties = list(reader.geometries())
COUNTIES = cartopy.feature.ShapelyFeature(counties, ccrs.PlateCarree())
ax1.add_feature(COUNTIES, facecolor='none', alpha=0.5, zorder=2, edgecolor='gray')
except:
pass
#norm = matplotlib.colors.Normalize(vmin=altitude_data.min(), vmax=altitude_data.max())
norm = matplotlib.colors.Normalize(vmin=1.0, vmax=6.0)
cmap = matplotlib.cm.viridis
mappableCmap = matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap)
# ===== plot line segments individually for gradient effect
for i in range(long_data.size-1):
long_data_this_segment = long_data[i:i+2]
lat_data_this_segment = lat_data[i:i+2]
altitude_data_this_segment = altitude_data[i:i+2]
### create linear interp objects
### scipy doesnt like when the data isn't ascending (hence the flip)
try:
spl_lon = scipy.interpolate.splrep(altitude_data_this_segment, long_data_this_segment, k=1)
spl_lat = scipy.interpolate.splrep(altitude_data_this_segment, lat_data_this_segment, k=1)
except:
long_data_this_segment = np.flip(long_data_this_segment)
lat_data_this_segment = np.flip(lat_data_this_segment)
altitude_data_this_segment = np.flip(altitude_data_this_segment)
spl_lon = scipy.interpolate.splrep(altitude_data_this_segment, long_data_this_segment, k=1)
spl_lat = scipy.interpolate.splrep(altitude_data_this_segment, lat_data_this_segment, k=1)
### linearly resample on each segment
nrsmpl=100
altitude_data_this_segment_rsmpl = np.linspace(altitude_data_this_segment[0],altitude_data_this_segment[1],nrsmpl)
long_data_this_segment_rsmpl = scipy.interpolate.splev(altitude_data_this_segment_rsmpl, spl_lon)
lat_data_this_segment_rsmpl = scipy.interpolate.splev(altitude_data_this_segment_rsmpl, spl_lat)
for j in range(long_data_this_segment_rsmpl.size-1):
long_data_this_segment_2 = long_data_this_segment_rsmpl[j:j+2]
lat_data_this_segment_2 = lat_data_this_segment_rsmpl[j:j+2]
altitude_data_this_segment_2 = altitude_data_this_segment_rsmpl[j:j+2]
ax1.plot(long_data_this_segment_2, lat_data_this_segment_2, transform=ccrs.PlateCarree(), c=mappableCmap.to_rgba(np.mean(altitude_data_this_segment_2)), zorder=3, linestyle='solid', alpha=0.8, lw=5.0)
# =====
### plot the actual data points as a scatter plot
pts = ax1.scatter(long_data, lat_data, transform=ccrs.PlateCarree(), alpha=1.0, marker='o', c=mappableCmap.to_rgba(altitude_data), edgecolor='w', zorder=4)
cbar = fig.colorbar(mappable=mappableCmap, ax=ax1, orientation='vertical', fraction=0.046, pad=0.04)
cbar.set_label(r'$Altitude$ [units]', fontsize=20)
cbar.ax.tick_params(labelsize=16)
cbar.set_ticks(np.linspace(1.0, 6.0, 5+1), update_ticks=True)
cbar.set_ticklabels([ ('%0.1f' % x) for x in cbar.get_ticks() ])
fig.tight_layout()
fig.savefig('flightPath.png',dpi=100)
plt.show()
So, I have something that is pretty close. there will be some missing/averaging of altitude data though.
from matplotlib import pyplot as plt
import matplotlib
import matplotlib.cm as cm
#... define arrays ...
fig, ax1 = plt.subplots( figsize = ( 10, 10 ) )
minima = min(altitude_data)
maxima = max(altitude_data)
norm = matplotlib.colors.Normalize(vmin=0, vmax=maxima, clip=True)
mapper = cm.ScalarMappable(norm=norm, cmap=cm.summer)
pointsPerColor = 2
for x in range(len(lat_data)//pointsPerColor):
startIndex = x * pointsPerColor
stopIndex = startIndex + pointsPerColor + 1
#get color for this section
avgAltitude = sum(altitude_data[startIndex:stopIndex])/pointsPerColor
rbga = mapper.to_rgba(avgAltitude)
#plot section (leng)
ax1.plot( long_data[startIndex:stopIndex],
lat_data[startIndex:stopIndex],
alpha=.7,color=rbga )
plt.show()
So what's happening in order is..
get min & max of your altitude & use that to make a color mapper
there's several color options
determine interval. need atleast 2 points to make a line obviously
loop for (number of points)/pointsPerColor (need to do integer division)
a. get average color
b. plot segment with color
thats it!.. I probably could've done this a lil prettier but it works
also.. those super low values messed the mapping..so I just set min to 0
line plot with color scale of altitude data
Update
As discussed, here now the code without a for loop and including a fourth category, e.g., acceleration. Now the code uses Line3DCollection to generate the trajectory and a custom-made color map with LinearSegmentedColormap to indicate the fourth category (acceleration):
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.art3d import Line3DCollection
from matplotlib.colors import LinearSegmentedColormap
fig = plt.figure(figsize=(12,12))
ax = fig.gca(projection='3d')
#rolling average between two acceleration data points
aver_accel = np.convolve(acceleration_data, np.ones((2,))/2, mode='valid')
#custom colour map to visualize acceleartion and decelaration
cmap_bgr = LinearSegmentedColormap.from_list("bluegreyred", ["red", "lightgrey", "lightgrey", "blue"])
#creating the trajectory as line segments
points = np.transpose([lat_data, long_data, altitude_data])
window = (2, 3)
view_shape = (len(points) - window[0] + 1,) + window
segments = np.lib.stride_tricks.as_strided(points, shape = view_shape, strides = (points.itemsize,) + points.strides)
trajectory = Line3DCollection(segments, cmap=cmap_bgr, linewidth=3)
#set the colour according to the acceleration data
trajectory.set_array(aver_accel)
#add line collection and plot color bar for acceleration
cb = ax.add_collection(trajectory)
cbar = plt.colorbar(cb, shrink=0.5)
cbar.set_label("acceleration", rotation=270)
#let's call it "autoscale"
ax.set_xlim(min(lat_data), max(lat_data))
ax.set_ylim(min(long_data), max(long_data))
ax.set_zlim(min(altitude_data), max(altitude_data))
ax.set_xlabel("latitude")
ax.set_ylabel("longitude")
ax.set_zlabel("altitude")
plt.show()
Sample output (with arbitrary acceleration data):
Thanks to the tailored colormap, one can clearly see acceleration and deceleration phases. Since we directly use the array, a colorbar for calibration can be easily added. Mind you, you still have the variable linewidth that also takes an array (for instance for velocity), although this will probably then be difficult to read. There is also substantial time gain in the generation of large-scale 3D line collections thanks to this marvellous answer.
For comparison, here the 2D view as produced by the other answers:
Original answer
Since you have 3D data, why not create a 3D projection? You can always move the view into a 2D projection if you feel like it. To avoid the problem that the color is defined by the first point of each line (i.e., a steep ascent would look different from a steep descent), this program determines the middle point of each line for the color-coded altitude calculation. Disadvantages: Uses a slow for loop, and the altitude colors are normalized between 0 and 1 (which doesn't matter here because altitude is overdetermined in this 3D projection but will become a problem if you want to color-code another parameter).
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
fig = plt.figure(figsize=(10,10))
ax = fig.gca(projection='3d')
min_alt = np.min(altitude_data)
max_alt = np.max(altitude_data)
#generate normalized altitude array for colour code
#the factor 0.95 filters out the end of this colormap
cols_raw = 0.95 * (altitude_data-min_alt) / (max_alt-min_alt)
#rolling average between two data point colors
cols = np.convolve(cols_raw, np.ones((2,))/2, mode='valid')
for i, col in enumerate(cols):
ax.plot(lat_data[i:i+2], long_data[i:i+2], altitude_data[i:i+2], c=cm.gnuplot(col))
ax.set_xlabel("latitude")
ax.set_ylabel("longitude")
ax.set_zlabel("altitude")
plt.show()
The sample data for the above outputs:
lat_data = np.array( [ 39.916294, 39.87139 , 39.8005 , 39.70801 , 39.64645 , 39.58172 ,
39.537853, 39.55141 , 39.6787 , 39.796528, 39.91702 , 40.008347,
40.09513 , 40.144157, 40.090584, 39.96447 , 39.838924, 39.712112,
39.597103, 39.488377, 39.499096, 39.99354 , 40.112175, 39.77281 ,
39.641186, 39.51512 , 39.538853, 39.882736, 39.90413 , 39.811333,
39.73279 , 39.65676 , 39.584026, 39.5484 , 39.54484 , 39.629486,
39.96 , 40.07143 , 40.187405, 40.304718, 40.423153, 40.549305,
40.673313, 40.794548, 40.74402 , 40.755558, 40.770306, 40.73574 ,
40.795086, 40.774628] )
long_data = np.array( [ -105.13034 , -105.144104, -105.01132 , -104.92708 , -104.78505 ,
-104.6449 , -104.49255 , -104.36578 , -104.32623 , -104.31285 ,
-104.32199 , -104.41774 , -104.527435, -104.673935, -104.81152 ,
-104.82184 , -104.81882 , -104.81314 , -104.74657 , -104.78108 ,
-104.93442 , -104.98039 , -105.0168 , -105.04967 , -105.056564,
-105.03639 , -105.13429 , -105.05214 , -105.17435 , -105.070526,
-104.93587 , -104.80029 , -104.65973 , -104.50339 , -104.33972 ,
-104.21634 , -103.96216 , -103.84808 , -103.72534 , -103.60455 ,
-103.48926 , -103.376495, -103.25937 , -103.10858 , -103.08469 ,
-103.24878 , -103.4169 , -103.53073 , -103.23694 , -103.41254 ] )
altitude_data = np.array( [1.6957603e+00, 1.9788861e+00, 1.8547169e+00, 1.8768315e+00,
1.9633590e+00, 2.0504241e+00, 2.1115899e+00, 2.1085002e+00,
1.8621666e+00, 1.8893014e+00, 1.8268168e+00, 1.7574688e+00,
1.7666028e+00, 1.7682364e+00, 1.8120643e+00, 1.7637002e+00,
1.8054264e+00, 1.9149075e+00, 2.0173934e+00, 2.0875392e+00,
2.1486480e+00, 1.8622510e+00, 1.7937366e+00, 1.8748144e+00,
1.9063262e+00, 1.9397615e+00, 2.1261981e+00, 2.0180094e+00,
1.9827688e+00, 1.9999990e+00, 1.8933343e+00, 1.9615903e+00,
2.1000245e+00, 2.1989927e+00, 2.3200927e+00, 2.9999990e+00,
4.0542388e+00, 4.0591464e+00, 4.0597038e+00, 4.3395977e+00,
4.6702847e+00, 5.0433373e+00, 5.2824092e+00, 5.2813010e+00,
5.2735353e+00, 5.2784677e+00, 5.2784038e+00, 5.2795196e+00,
4.9482727e+00, 4.2531524e+00] )
acceleration_data = np.array(
[1, 2, 2, 3,
3, 3, 2, 2,
2, 2, 4, 5,
4, 3, 4, 3,
3, 3, 3, 4,
3, 3, 4, 5,
4, 4, 4, 5,
4, 15, 26, 49,
67, 83, 89, 72,
77, 63, 75, 82,
69, 37, 5, -29,
-37, -27, -29, -14,
9, 4] )
Here is my solution using Plotly's ScatterGeo object as well as Pandas and NumPy to load in the data. I chose this package since you could then have an interactive plot (with zoom and hover data) and also see which states the plane flew over :).
# Import packages
import pandas as pd
import numpy as np
import plotly.graph_objects as go
# Load your data into a Pandas DataFrame object
d = {'Lat': lat_data, 'Long': long_data, 'Altitude': altitude_data}
df = pd.DataFrame(data=d)
# Create scatterGeo object with the proper data
scatterMapData = go.Scattergeo(lon = df['Long'], lat = df['Lat'], text=df['Altitude'],
mode = 'markers+lines', marker_color = df['Altitude'],
marker = dict(colorscale = 'Viridis', cmin = 0,
cmax = df['Altitude'].max(),
colorbar_title = "Altitude",
#line = dict(width=1, color='black')
)
)
# Load scatterMapData object into Plotly Figure
# and configure basic options for title and scoping
fig = go.Figure(data=scatterMapData)
fig.update_layout(title = 'Plane Flight Data', geo_scope = 'usa',
geo = dict(scope = 'usa',
#projection_scale = 5,
center={'lat': np.median(df['Lat']), 'lon': np.median(df['Long'])})
)
# Finally show the plot
fig.show()
Here is a zoomed in version of the plot:
I just want to point out that you can change to mode='marker' in the scattergeo object for just a scatter plot and mode='lines' for just a line plot connecting each of the locations.
This question is related to a previous question I posted here. My code for my seaborn scatterplot looks as follows:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
df = pd.DataFrame()
df['First PCA dimension'] = [1,2,3,4]
df['Second PCA dimension'] = [0,5,5,7]
df['Third PCA dimension'] = [1,2,6,4]
df['Data points'] = [1,2,3,4]
plt.figure(figsize=(42,30))
plt.title('2-D PCA of my data points',fontsize=32)
colors = ["#FF9926", "#2ACD37","#FF9926", "#FF0800"]
b = sns.scatterplot(x="First PCA dimension", y="Second PCA dimension", hue="Data points", palette=sns.color_palette(colors), data=df, legend="full", alpha=0.3)
sns.set_context("paper", rc={"font.size":48,"axes.titlesize":48,"axes.labelsize":48})
b.set_ylabel('mylabely', size=54)
b.set_xlabel('mylabelx', size=54)
b.set_xticklabels([1,2,3,4,5,6,7,8], fontsize = 36)
lgnd = plt.legend(fontsize='22')
for handle in lgnd.legendHandles:
handle.set_sizes([26.0])
plt.show()
The alpha value of 0.3 sets a transparency value for each point in my scatterplot. However, I would like to have a different transparency value for each data point (based on the category it belongs to) instead. Is this possible by providing a list of alpha values, similar to the way I provide a list of colours in the example above?
As noted in comments, this is something you can't currently do with seaborn.
However, you can hack it by using key colours for the markers, and find-replacing those colours using PathCollection.get_facecolor() and PathCollection.set_facecolor() with RGBA colours.
So for example, I needed a swarmplot on top of a violinplot, with certain classes of points at different opacities. To change greys into transparent blacks (what I needed to do), we can do:
seaborn.violinplot(...)
points = seaborn.swarmplot(...)
for c in points.collections:
if not isinstance(c, PathCollection):
continue
fc = c.get_facecolor()
if fc.shape[1] == 4:
for i, r in enumerate(fc):
# change mid-grey to 50% black
if numpy.array_equiv(r, array([0.5, 0.5, 0.5, 1])):
fc[i] = array([0, 0, 0, 0.5])
# change white to transparent
elif numpy.array_equiv(r, array([1, 1, 1, 1])):
fc[i] = array([0, 0, 0, 0])
c.set_facecolor(fc)
Very awful, but it got me what I needed for a one-shot graphic.
Context
I have some points
points = np.random.uniform(0,10, size = (10,2))
# array([[ 7.35906037, 6.50049804],
[ 3.21883403, 3.81452312],
[ 3.52107154, 1.68233797],
[ 1.47699577, 6.01692348],
[ 3.76051589, 0.25213394],
[ 8.93701081, 5.20377479],
[ 6.5347188 , 2.12940006],
[ 3.62550069, 5.80619507],
[ 1.33393325, 5.0088937 ],
[ 6.99034593, 7.40277623]])
and they are "classified" or labelled. This means that I have a list
labels = np.random.randint(0,3, size = 10)
# array([2, 0, 1, 2, 2, 1, 1, 0, 1, 2])
which represents the labels (in order) of each point in points.
I also have some extra points
extraPoints = np.random.uniform(0,10, size = (3,2))
# array([[ 1.91211141, 3.71208978],
# [ 8.10463536, 1.88948511],
# [ 9.79796593, 3.39432552]])
basically each of these points determines the class label. It doesn't matter HOW it determined the label. But all you have to know is that each of these extra points is associated to one and only one label. So there is an equal number of extraPoints and label possibilities.
problem
I want to do a scatter plot. I want to assign a different color to each point in extraPoints, and hence this color will be the correspective to each class. This basically means that extraPoints[0] is associated with class 0, extraPoints[1] is associated with class 1 and extraPoints[2] is associated with class 2.
Also, I want to scatter plot the points in points. Remember that each point in points is associated with the correspective label in labels.
For instance [ 7.35906037, 6.50049804] is in class 2 and thus has the same color of extraPoints[2] = [ 9.79796593, 3.39432552]. Similarly the point [ 3.21883403, 3.81452312] in points is associated with class 0 in labels and thus has the same color of extraPoints[0] = [ 1.91211141, 3.71208978]
My try
I tried using the c argument in plt.scatter() however I don't really understand how it works, and sometimes it sort of works, sometimes it says "Invalid RGBA argument 0.0" but seems to be arbitrary..
Notice that to distinguish points from extraPoints, I will make extraPoints larger and with more transparency.
import matplotlib.pyplot as plt
# I scatter the points, and assign c to labels. So hopefully each
# point that ends up in the same label will have the same
# color? I think this part is fine, although I am not sure
plt.scatter(points[:,0], points[:,1], c = labels)
plt.scatter(extraPoints[:,0], extraPoints[:,1], s = 100, alpha = 0.3, c = np.arange(len(extraPoints)))
As you can try out for yourself, for different executions (since every time we have random arrays) we might either get it right (or almost) or get the error in the title. Why does this happen?
Extra -for the braves
Given this context, imagine I had also some values
values = np.random.uniform(0,50, size = 3)
# array([ 14.63459424, 37.41573654, 34.45202082])
I have the same number of values as I have types of labels and extraPoints (i.e. 3 in this case). Now each of these is associated with the corresponding extraPoints. Thus the first value to the first extraPoint and so on..
I would like to do the above plot, but the colors will have a "gradient" that becomes, for instance, lighter for smaller values and darker for larger values (or the opposite). How can I do that? I read about colormaps, but I can't quite integrate it with my problem.
Example
For instance for the values above, we obtain:
As you can see, I have no control over the colors. Not only that, but I have no idea of which point is in which class (unless I go back and manually look at each point, but obviously I don't want this). This is why (and other reasons that I wont cover here) I want to color them based on a value in values. Specifically, I would like, say to have a range of values [10, 20 30] that can guide the color of my points, so that I know which class is "strongest"
First problem: the code does not run, since np.random.uniform(0,10, size = 3) gives a 1D array, while you later expect it to be 2D (extraPoints[:,0]).
Second problem: labels may have between 1 and 3 unique entries, hence np.unique(labels) may be of length 1 to 3 (e.g. labels may be all zeros, such that np.unique(labels) == [0]) such that you have more points than colors. However c expects either a single color argument or a list of values of the same length than the input coordinates.
Third problem: If supplying a list or array of length 3 or 4, it is not clear whether this should be a single RGB or RGBA color or a list of values to colormap. If you actually run into this problem or not, cannot be said for sure until you have solved the first and second problem.
Update: after the first two problems are solved, you are probably just looking for a colorbar and a useful colormap.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
points = np.random.uniform(0,10, size = (10,2))
labels = np.random.randint(0,3, size = 10)
extraPoints = np.random.uniform(0,10, size = (3,2))
sc = plt.scatter(points[:,0], points[:,1], c = labels)
sc2 = plt.scatter(extraPoints[:,0], extraPoints[:,1], s = 144, alpha = 0.7,
c = np.arange(len(extraPoints)))
plt.colorbar(sc)
plt.show()
Or, if you want to have individual colors:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
points = np.random.uniform(0,10, size = (10,2))
labels = np.random.randint(0,3, size = 10)
extraPoints = np.random.uniform(0,10, size = (3,2))
colors=["red", "gold", "limegreen"]
cmap = matplotlib.colors.ListedColormap(colors)
sc = plt.scatter(points[:,0], points[:,1], c = labels, cmap=cmap, vmin=-0.5,vmax=2.5 )
sc2 = plt.scatter(extraPoints[:,0], extraPoints[:,1], s = 144, alpha = 0.7,
c = np.arange(len(extraPoints)), cmap=cmap, vmin=-0.5,vmax=2.5)
plt.colorbar(sc, ticks=np.arange(len(extraPoints)))
plt.show()
Thanks to ImportanceOfBeingErnest I managed to solve the problem. I know my explanation was really bad, but here I post it for someone who might find the same problem in the future:
ImportanceOfBeingErnest solution
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
points = np.random.uniform(0,10, size = (10,2))
labels = np.random.randint(0,3, size = 10)
extraPoints = np.random.uniform(0,10, size = (3,2))
colors=["red", "gold", "limegreen"]
cmap = matplotlib.colors.ListedColormap(colors)
sc = plt.scatter(points[:,0], points[:,1], c = labels, cmap=cmap, vmin=-0.5,vmax=2.5 )
sc2 = plt.scatter(extraPoints[:,0], extraPoints[:,1], s = 144, alpha = 0.7,
c = np.arange(len(extraPoints)), cmap=cmap, vmin=-0.5,vmax=2.5)
plt.colorbar(sc, ticks=np.arange(len(extraPoints)))
plt.show()
My add-in that does what I want
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
points = np.random.uniform(0,10, size = (10,2))
labels = np.random.randint(0,3, size = 10)
extraPoints = np.random.uniform(0,10, size = (3,2))
# CREATE VALUES
values = np.random.uniform(0,50, size=3)
colors=["red", "gold", "limegreen"]
cmap = matplotlib.colors.ListedColormap(colors)
sc = plt.scatter(points[:,0], points[:,1], c = np.array([values[j] for j in labels]), cmap=cmap, vmin=-0.5,vmax=2.5 )
sc2 = plt.scatter(extraPoints[:,0], extraPoints[:,1], s = 144, alpha = 0.7,
c = values, cmap=cmap, vmin=-0.5,vmax=2.5)
plt.colorbar(sc, ticks=np.arange(len(extraPoints)))
plt.show()
The difference is that the colors in sc are now determined by the values in values in the same order of the labels, and at the same time, the points in extraPoints are being colored with the strength and order of values in values.
I'm trying to visualize a data set and I'm stuck pretty early in the process because I don't understand the logic of matplotlib.
The data is kept in two 2d tables of the same format:
>>> data1
array([[ 1. , 1.384 ],
[ 2. , 3.65218531],
[ 3. , 3.17718531],
[ 1. , 2.34513543],
[ 2. , 3.43223443],
[ 3. , 1.23333255],
...
>>> data2
...
I want to visualize these tables in the same plot. The data1 table has many more rows than data2, so my idea is to display data1 with column-wise histograms and data2 with points like this:
I know how to make 1D histograms of data, and I know how to make scatterplots, but how do I get colored bins and overlay them with the scatterplots in the right positions?
Edit:
Example code:
import pylab
import numpy
import matplotlib.pyplot as plt
fig = plt.figure()
data1 = numpy.array([[1,1.5],[2,1.7],[3,1.8],[1,1.2],[2,1.8],[3,2.2]])
data2 = numpy.array([[1,2.1],[2,1.3],[3,1.4],[1,1.1],[2,1.9],[3,2.25]])
plt.scatter(data1[:,0], data1[:,1])
plt.scatter(data2[:,0], data2[:,1], color="red")
plt.show()
In the real code the data1 array contains so many entries that it becomes a mess. Hence the wish for a 'histogram'.
I think you want to do something like this:
w = .2
bins = np.linspace(-4, 4, 11, endpoint=True)
fig, ax = plt.subplots()
for j in range(1, 15):
tt = randn(50)
nn, _bins = np.histogram(tt, bins)
# don't use scatter unless you need to change the size or color of the markers
ax.plot(j*np.ones_like(tt), tt, 'r.')
ax.imshow(nn.reshape(-1, 1)[::-1], extent=[j-w, j+w, -4, 4], cmap='Blues', aspect='auto', interpolation='none')
ax.set_xlim([0, 15])
ax.set_ylim([-4, 4])
It might be better to draw each bin as a Rectangle.
You probably also want to use the vmax and vmin kwargs for imshow so that the colors have the same meaning between columns.
Suppose I have three data sets:
X = [1,2,3,4]
Y1 = [4,8,12,16]
Y2 = [1,4,9,16]
I can scatter plot this:
from matplotlib import pyplot as plt
plt.scatter(X,Y1,color='red')
plt.scatter(X,Y2,color='blue')
plt.show()
How can I do this with 10 sets?
I searched for this and could find any reference to what I'm asking.
Edit: clarifying (hopefully) my question
If I call scatter multiple times, I can only set the same color on each scatter. Also, I know I can set a color array manually but I'm sure there is a better way to do this.
My question is then, "How can I automatically scatter-plot my several data sets, each with a different color.
If that helps, I can easily assign a unique number to each data set.
I don't know what you mean by 'manually'. You can choose a colourmap and make a colour array easily enough:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
x = np.arange(10)
ys = [i+x+(i*x)**2 for i in range(10)]
colors = cm.rainbow(np.linspace(0, 1, len(ys)))
for y, c in zip(ys, colors):
plt.scatter(x, y, color=c)
Or you can make your own colour cycler using itertools.cycle and specifying the colours you want to loop over, using next to get the one you want. For example, with 3 colours:
import itertools
colors = itertools.cycle(["r", "b", "g"])
for y in ys:
plt.scatter(x, y, color=next(colors))
Come to think of it, maybe it's cleaner not to use zip with the first one neither:
colors = iter(cm.rainbow(np.linspace(0, 1, len(ys))))
for y in ys:
plt.scatter(x, y, color=next(colors))
The normal way to plot plots with points in different colors in matplotlib is to pass a list of colors as a parameter.
E.g.:
import matplotlib.pyplot
matplotlib.pyplot.scatter([1,2,3],[4,5,6],color=['red','green','blue'])
When you have a list of lists and you want them colored per list.
I think the most elegant way is that suggesyted by #DSM,
just do a loop making multiple calls to scatter.
But if for some reason you wanted to do it with just one call, you can make a big list of colors, with a list comprehension and a bit of flooring division:
import matplotlib
import numpy as np
X = [1,2,3,4]
Ys = np.array([[4,8,12,16],
[1,4,9,16],
[17, 10, 13, 18],
[9, 10, 18, 11],
[4, 15, 17, 6],
[7, 10, 8, 7],
[9, 0, 10, 11],
[14, 1, 15, 5],
[8, 15, 9, 14],
[20, 7, 1, 5]])
nCols = len(X)
nRows = Ys.shape[0]
colors = matplotlib.cm.rainbow(np.linspace(0, 1, len(Ys)))
cs = [colors[i//len(X)] for i in range(len(Ys)*len(X))] #could be done with numpy's repmat
Xs=X*nRows #use list multiplication for repetition
matplotlib.pyplot.scatter(Xs,Ys.flatten(),color=cs)
cs = [array([ 0.5, 0. , 1. , 1. ]),
array([ 0.5, 0. , 1. , 1. ]),
array([ 0.5, 0. , 1. , 1. ]),
array([ 0.5, 0. , 1. , 1. ]),
array([ 0.28039216, 0.33815827, 0.98516223, 1. ]),
array([ 0.28039216, 0.33815827, 0.98516223, 1. ]),
array([ 0.28039216, 0.33815827, 0.98516223, 1. ]),
array([ 0.28039216, 0.33815827, 0.98516223, 1. ]),
...
array([ 1.00000000e+00, 1.22464680e-16, 6.12323400e-17,
1.00000000e+00]),
array([ 1.00000000e+00, 1.22464680e-16, 6.12323400e-17,
1.00000000e+00]),
array([ 1.00000000e+00, 1.22464680e-16, 6.12323400e-17,
1.00000000e+00]),
array([ 1.00000000e+00, 1.22464680e-16, 6.12323400e-17,
1.00000000e+00])]
An easy fix
If you have only one type of collections (e.g. scatter with no error bars) you can also change the colours after that you have plotted them, this sometimes is easier to perform.
import matplotlib.pyplot as plt
from random import randint
import numpy as np
#Let's generate some random X, Y data X = [ [frst group],[second group] ...]
X = [ [randint(0,50) for i in range(0,5)] for i in range(0,24)]
Y = [ [randint(0,50) for i in range(0,5)] for i in range(0,24)]
labels = range(1,len(X)+1)
fig = plt.figure()
ax = fig.add_subplot(111)
for x,y,lab in zip(X,Y,labels):
ax.scatter(x,y,label=lab)
The only piece of code that you need:
#Now this is actually the code that you need, an easy fix your colors just cut and paste not you need ax.
colormap = plt.cm.gist_ncar #nipy_spectral, Set1,Paired
colorst = [colormap(i) for i in np.linspace(0, 0.9,len(ax.collections))]
for t,j1 in enumerate(ax.collections):
j1.set_color(colorst[t])
ax.legend(fontsize='small')
The output gives you differnent colors even when you have many different scatter plots in the same subplot.
You can always use the plot() function like so:
import matplotlib.pyplot as plt
import numpy as np
x = np.arange(10)
ys = [i+x+(i*x)**2 for i in range(10)]
plt.figure()
for y in ys:
plt.plot(x, y, 'o')
plt.show()
This question is a bit tricky before Jan 2013 and matplotlib 1.3.1 (Aug 2013), which is the oldest stable version you can find on matpplotlib website. But after that it is quite trivial.
Because present version of matplotlib.pylab.scatter support assigning: array of colour name string, array of float number with colour map, array of RGB or RGBA.
this answer is dedicate to #Oxinabox's endless passion for correcting the 2013 version of myself in 2015.
you have two option of using scatter command with multiple colour in a single call.
as pylab.scatter command support use RGBA array to do whatever colour you want;
back in early 2013, there is no way to do so, since the command only support single colour for the whole scatter point collection. When I was doing my 10000-line project I figure out a general solution to bypass it. so it is very tacky, but I can do it in whatever shape, colour, size and transparent. this trick also could be apply to draw path collection, line collection....
the code is also inspired by the source code of pyplot.scatter, I just duplicated what scatter does without trigger it to draw.
the command pyplot.scatter return a PatchCollection Object, in the file "matplotlib/collections.py" a private variable _facecolors in Collection class and a method set_facecolors.
so whenever you have a scatter points to draw you can do this:
# rgbaArr is a N*4 array of float numbers you know what I mean
# X is a N*2 array of coordinates
# axx is the axes object that current draw, you get it from
# axx = fig.gca()
# also import these, to recreate the within env of scatter command
import matplotlib.markers as mmarkers
import matplotlib.transforms as mtransforms
from matplotlib.collections import PatchCollection
import matplotlib.markers as mmarkers
import matplotlib.patches as mpatches
# define this function
# m is a string of scatter marker, it could be 'o', 's' etc..
# s is the size of the point, use 1.0
# dpi, get it from axx.figure.dpi
def addPatch_point(m, s, dpi):
marker_obj = mmarkers.MarkerStyle(m)
path = marker_obj.get_path()
trans = mtransforms.Affine2D().scale(np.sqrt(s*5)*dpi/72.0)
ptch = mpatches.PathPatch(path, fill = True, transform = trans)
return ptch
patches = []
# markerArr is an array of maker string, ['o', 's'. 'o'...]
# sizeArr is an array of size float, [1.0, 1.0. 0.5...]
for m, s in zip(markerArr, sizeArr):
patches.append(addPatch_point(m, s, axx.figure.dpi))
pclt = PatchCollection(
patches,
offsets = zip(X[:,0], X[:,1]),
transOffset = axx.transData)
pclt.set_transform(mtransforms.IdentityTransform())
pclt.set_edgecolors('none') # it's up to you
pclt._facecolors = rgbaArr
# in the end, when you decide to draw
axx.add_collection(pclt)
# and call axx's parent to draw_idle()
A MUCH faster solution for large dataset and limited number of colors is the use of Pandas and the groupby function:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
# a generic set of data with associated colors
nsamples=1000
x=np.random.uniform(0,10,nsamples)
y=np.random.uniform(0,10,nsamples)
colors={0:'r',1:'g',2:'b',3:'k'}
c=[colors[i] for i in np.round(np.random.uniform(0,3,nsamples),0)]
plt.close('all')
# "Fast" Scatter plotting
starttime=time.time()
# 1) make a dataframe
df=pd.DataFrame()
df['x']=x
df['y']=y
df['c']=c
plt.figure()
# 2) group the dataframe by color and loop
for g,b in df.groupby(by='c'):
plt.scatter(b['x'],b['y'],color=g)
print('Fast execution time:', time.time()-starttime)
# "Slow" Scatter plotting
starttime=time.time()
plt.figure()
# 2) group the dataframe by color and loop
for i in range(len(x)):
plt.scatter(x[i],y[i],color=c[i])
print('Slow execution time:', time.time()-starttime)
plt.show()
This works for me:
for each series, use a random rgb colour generator
c = color[np.random.random_sample(), np.random.random_sample(), np.random.random_sample()]
You can also create a list of colors which includes all the colors you need in your scatter plot and give it as a parameter inside like:
colors = ["red", "blue", "green"]
plt.scatter(X, Y, color = colors)