Issues in displaying year by year line plot using bokeh (pyspark dataframe) - python

I have an issue when I try to displaying line plot from pyspark dataframe using bokeh, it was displayed successfully but it shows what I'm not expected. The problem is some line plot dots are not connected year by year sequentially.
Previously I tried to sort the source data frame using orderBy :
# Join df_max, and df_avg to df_quake_freq
df_quake_freq = df_quake_freq.join(df_avg, ['Year']).join(df_max, ['Year'])
df_quake_freq = df_quake_freq.orderBy(asc('Year'))
df_quake_freq.show(5)
And the output is :
dataframe source for line plot
This is code for plotting:
# Create a magnitude plot
def plotMagnitude():
# Load the datasource
cds = ColumnDataSource(data=dict(
yrs = df_quake_freq['Year'].values.tolist(),
avg_mag = df_quake_freq['Avg_Magnitude'].round(1).values.tolist(),
max_mag = df_quake_freq['Max_Magnitude'].values.tolist()
))
# Tooltip
TOOLTIPS = [
('Year', ' #yrs'),
('Average Magnitude', ' #avg_mag'),
('Maximum Magnitude', ' #max_mag')
]
# Create the figure
mp = figure(title='Maximum and Average Magnitude by Year',
plot_width=1150, plot_height=400,
x_axis_label='Years',
y_axis_label='Magnitude',
x_minor_ticks=2,
y_range=(5, df_quake_freq['Max_Magnitude'].max() + 1),
toolbar_location=None,
tooltips=TOOLTIPS)
# Max Magnitude
mp.line(x='yrs', y='max_mag', color='#cc0000', line_width=2, legend='Max Magnitude', source=cds)
mp.circle(x='yrs', y='max_mag', color='#cc0000', size=8, fill_color='#cc0000', source=cds)
# Average Magnitude
mp.line(x='yrs', y='avg_mag', color='yellow', line_width=2, legend='Avg Magnitude', source=cds)
mp.circle(x='yrs', y='avg_mag', color='yellow', size=8, fill_color='yellow', source=cds)
mp = style(mp)
show(mp)
return mp
plotMagnitude()
The output is :
line plot
We can see from the picture that some dots are not connected sequentially, for example like 1965-1967-1966

My problem is solved, the reason why the dots are not sequenced is that plotMagnitude function is relying on global variables, so to handle this I put
sort.values() inside the function. This is the syntax:
def plotMagnitude():
# Load the datasource
cds = ColumnDataSource(data=dict(
yrs = df_quake_freq['Year'].sort_values().values.tolist(),
avg_mag = df_quake_freq['Avg_Magnitude'].round(1).values.tolist(),
max_mag = df_quake_freq['Max_Magnitude'].values.tolist()

Related

Dynamic Regression Line on Bokeh Plot - Python

I have a bokeh scatter plot that updates the data displayed based on the checkbox selection for time of day. The legend allows you to hide or show the different datasets.
I'd like to add a regression line that updates with the selections. For example, if hour 0 and both data sets are showing it would calculate and show the corresponding regression line. Then if hour 9 was added the regression line would update accordingly.
Example of Current Bokeh Plot
Below is a sample of the code corresponding to the example plot.
# create filtered plot
filt_time_fig = figure(title = 'AC Power vs Expected Power',
x_axis_label = 'AC Power',
y_axis_label = 'Expected Power')
# define source
source = ColumnDataSource(master_interval_df)
# create checkbox
active_hour = '0'
f = BooleanFilter(booleans=[l == active_hour for l in master_interval_df['hour']])
uniq_hours = list(map(str,list(range(0,24))))
cg = CheckboxGroup(labels=uniq_hours, active=[uniq_hours.index(active_hour)])
cg.js_on_change('active',
CustomJS(args=dict(source=source, f=f),
code="""\
const hours = cb_obj.active.map(idx => cb_obj.labels[idx]);
f.booleans = source.data.hour.map(l => hours.includes(l));
source.change.emit();
"""))
#create glyph for each dataset
for turb in turbine_list:
filt_time_fig.circle(x = 'AC_p_'+turb,
y = 'Exp_p_'+turb,
source = source,
legend_label = turb,
line_color = None,
fill_color={"field":"hour","transform":colormap},
view = CDSView(source=source, filters=[f]))
cg.max_width=50
filt_time_fig.legend.click_policy = 'hide'
show(filt_time_fig)

Bokeh callback not updating chart [duplicate]

Struggling to understand why this bokeh visual will not allow me to change plots and see the predicted data. The plot and select (dropdown-looking) menu appears, but I'm not able to change the plot for items in the menu.
Running Bokeh 1.2.0 via Anaconda. The code has been run both inside & outside of Jupyter. No errors display when the code is run. I've looked through the handful of SO posts relating to this same issue, but I've not been able to apply the same solutions successfully.
I wasn't sure how to create a toy problem out of this, so in addition to the code sample below, the full code (including the regression code and corresponding data) can be found at my github here (code: Regression&Plotting.ipynb, data: pred_data.csv, historical_data.csv, features_created.pkd.)
import pandas as pd
import datetime
from bokeh.io import curdoc, output_notebook, output_file
from bokeh.layouts import row, column
from bokeh.models import Select, DataRange1d, ColumnDataSource
from bokeh.plotting import figure
#Must be run from the command line
def get_historical_data(src_hist, drug_id):
historical_data = src_hist.loc[src_hist['ndc'] == drug_id]
historical_data.drop(['Unnamed: 0', 'date'], inplace = True, axis = 1)#.dropna()
historical_data['date'] = pd.to_datetime(historical_data[['year', 'month', 'day']], infer_datetime_format=True)
historical_data = historical_data.set_index(['date'])
historical_data.sort_index(inplace = True)
# csd_historical = ColumnDataSource(historical_data)
return historical_data
def get_prediction_data(src_test, drug_id):
#Assign the new date
#Write a new dataframe with values for the new dates
df_pred = src_test.loc[src_test['ndc'] == drug_id].copy()
df_pred.loc[:, 'year'] = input_date.year
df_pred.loc[:, 'month'] = input_date.month
df_pred.loc[:, 'day'] = input_date.day
df_pred.drop(['Unnamed: 0', 'date'], inplace = True, axis = 1)
prediction = lin_model.predict(df_pred)
prediction_data = pd.DataFrame({'drug_id': prediction[0][0], 'predictions': prediction[0][1], 'date': pd.to_datetime(df_pred[['year', 'month', 'day']], infer_datetime_format=True, errors = 'coerce')})
prediction_data = prediction_data.set_index(['date'])
prediction_data.sort_index(inplace = True)
# csd_prediction = ColumnDataSource(prediction_data)
return prediction_data
def make_plot(historical_data, prediction_data, title):
#Historical Data
plot = figure(plot_width=800, plot_height = 800, x_axis_type = 'datetime',
toolbar_location = 'below')
plot.xaxis.axis_label = 'Time'
plot.yaxis.axis_label = 'Price ($)'
plot.axis.axis_label_text_font_style = 'bold'
plot.x_range = DataRange1d(range_padding = 0.0)
plot.grid.grid_line_alpha = 0.3
plot.title.text = title
plot.line(x = 'date', y='nadac_per_unit', source = historical_data, line_color = 'blue', ) #plot historical data
plot.line(x = 'date', y='predictions', source = prediction_data, line_color = 'red') #plot prediction data (line from last date/price point to date, price point for input_date above)
return plot
def update_plot(attrname, old, new):
ver = vselect.value
new_hist_source = get_historical_data(src_hist, ver) #calls the function above to get the data instead of handling it here on its own
historical_data.data = ColumnDataSource.from_df(new_hist_source)
# new_pred_source = get_prediction_data(src_pred, ver)
# prediction_data.data = new_pred_source.data
#Import data source
src_hist = pd.read_csv('data/historical_data.csv')
src_pred = pd.read_csv('data/pred_data.csv')
#Prep for default view
#Initialize plot with ID number
ver = 781593600
#Set the prediction date
input_date = datetime.datetime(2020, 3, 31) #Make this selectable in future
#Select-menu options
menu_options = src_pred['ndc'].astype(str) #already contains unique values
#Create select (dropdown) menu
vselect = Select(value=str(ver), title='Drug ID', options=sorted((menu_options)))
#Prep datasets for plotting
historical_data = get_historical_data(src_hist, ver)
prediction_data = get_prediction_data(src_pred, ver)
#Create a new plot with the source data
plot = make_plot(historical_data, prediction_data, "Drug Prices")
#Update the plot every time 'vselect' is changed'
vselect.on_change('value', update_plot)
controls = row(vselect)
curdoc().add_root(row(plot, controls))
UPDATED: ERRORS:
1) No errors show up in Jupyter Notebook.
2) CLI shows a UserWarning: Pandas doesn't allow columns to be careated via a new attribute name, referencing `historical_data.data = ColumnDatasource.from_df(new_hist_source).
Ultimately, the plot should have a line for historical data, and another line or dot for predicted data derived from sklearn. It also has a dropdown menu to select each item to plot (one at a time).
Your update_plot is a no-op that does not actually make any changes to Bokeh model state, which is what is necessary to change a Bokeh plot. Changing Bokeh model state means assigning a new value to a property on a Bokeh object. Typically, to update a plot, you would compute a new data dict and then set an existing CDS from it:
source.data = new_data # plain python dict
Or, if you want to update from a DataFame:
source.data = ColumnDataSource.from_df(new_df)
As an aside, don't assign the .data from one CDS to another:
source.data = other_source.data # BAD
By contrast, your update_plot computes some new data and then throws it away. Note there is never any purpose to returning anything at all from any Bokeh callback. The callbacks are called by Bokeh library code, which does not expect or use any return values.
Lastly, I don't think any of those last JS console errors were generated by BokehJS.

Plotly for python, only first data point is being graphed

I am new to plotly and working on a script to generate a graph based on some results pulled from a database. However when I send the data over to plotly, only the first data point for each of the three traces is being graphed. I've verified that the lists contain the right data, I've even simply pasted the lists in instead of dynamically creating the variables. Unfortunately each time only the first data point is being graphed. Does anyone know what I am missing here? I am also open to another library if needed.
Is it also possible to have the x axis show as a string?
import plotly.plotly as py
import plotly.graph_objs as go
# Custom database class, works fine.
from classes.database import DatabaseConnection
# Database Connections and instances
db_instance = DatabaseConnection()
db_conn = db_instance.conn
db_cur = db_instance.cur
def main():
# Get a list of versions and their stats.
db_cur.execute(
"""
select row_to_json(x) from
(SELECT
versions.version_number,
cast(AVG(results.average) as double precision) as average,
cast(AVG(results.minimum) as double precision) as minimum,
cast(AVG(results.maximum) as double precision) as maximum
FROM versions,results
WHERE
versions.version_number = results.version_number
GROUP BY
versions.version_number) x;
"""
)
versions = []
average = []
minimum = []
maximum = []
unclean = db_cur.fetchall()
# Create lists for x and y coordinates.
for row in unclean:
versions.append(row[0]['version_number'])
average.append(int(row[0]['average']))
minimum.append(int(row[0]['minimum']))
maximum.append(int(row[0]['maximum']))
grph_average = go.Scatter(
x=versions,
y=average,
name = 'Average',
mode='lines',
)
grph_minimum = go.Scatter(
x=versions,
y=minimum,
name = 'Minimum',
mode='lines',
)
grph_maximum = go.Scatter(
x=versions,
y=maximum,
name = 'Maximum',
mode='lines',
)
data = go.Data([grph_average, grph_minimum, grph_maximum])
# Edit the layout
layout = dict(title = 'Responses',
xaxis = dict(title = 'Versions'),
yaxis = dict(title = 'Ms'),
)
fig = dict(data=data, layout=layout)
py.plot(fig, filename='response-times', auto_open=False)
if __name__ == '__main__':
main()
The data that query returns is as follows, if you want to plug in the values :
versions = ['6.1', '5.0', '5.2']
average = [11232, 29391, 10429]
minimum = [3641, 7729, 3483]
maximum = [57440, 62535, 45201]
Here is some matplotlib that might get you started on this:
import matplotlib.pyplot as plt
versions = ['6.1', '5.0', '5.2']
average = [11232, 29391, 10429]
minimum = [3641, 7729, 3483]
maximum = [57440, 62535, 45201]
plt.plot(minimum)
plt.plot(average)
plt.plot(maximum)
plt.xticks(range(len(versions)), versions)
It looks like it was an issue with my x axis. By adding some text before the version number and specifically type casting to a string I was able to get the graphs to generate properly.
# Create lists for x and y coordinates.
for row in unclean:
versions.append("Version: " + str(row[0]['version_number']))
average.append(int(row[0]['average']))
minimum.append(int(row[0]['minimum']))
maximum.append(int(row[0]['maximum']))

How to add significance levels on bar graph using Python's Matplotlib?

I have written some code to graph some data in Python's Matplotlib.
The plot currently:
The code to produce this plot:
groups=['Control','30min','24hour']
cell_lysate_avg=[11887.42595, 4862.429689, 3414.337554]
cell_lysate_sd=[1956.212855, 494.8437915, 525.8556207]
cell_lysate_avg=[i/1000 for i in cell_lysate_avg]
cell_lysate_sd=[i/1000 for i in cell_lysate_sd]
media_avg=[14763.71106,8597.475539,6374.732852]
media_sd=[240.8983759, 167.005365, 256.1374017]
media_avg=[i/1000 for i in media_avg] #to get ng/ml
media_sd=[i/1000 for i in media_sd]
fig, ax = plt.subplots()
index = numpy.arange(len(groups)) #where to put the bars
bar_width=0.45
opacity = 0.5
error_config = {'ecolor': '0.3'}
cell_lysate_plt=plt.bar(index,cell_lysate_avg,bar_width,alpha=opacity,color='black',yerr=cell_lysate_sd,error_kw=error_config,label='Cell Lysates')
media_plt=plt.bar(index+bar_width,media_avg,bar_width,alpha=opacity,color='green',yerr=media_sd,error_kw=error_config,label='Media')
plt.xlabel('Groups',fontsize=15)
plt.ylabel('ng/ml',fontsize=15)
plt.title('\n'.join(wrap('Average Over Biological Repeats for TIMP1 ELISA (n=3)',45)),fontsize=15)
plt.xticks(index + bar_width, groups)
plt.legend()
ax.tick_params(axis='x', labelsize=14)
ax.tick_params(axis='y', labelsize=14)
I have calculated the various two tailed t tests associated with this data and I want to display using standard scientific journal representation - i.e. a line connecting two bars with a star which represents a significance level of (say) >0.05. Can anybody tell me how to do this?
As far as I know there is no standard scientific journal representation for showing significance. The exact way you draw it is a matter of taste. This is probably the reason why matplotlib has no specific function for significance bars (at least to my knowledge). You could just do it manually. E.g:
from matplotlib.markers import TICKDOWN
def significance_bar(start,end,height,displaystring,linewidth = 1.2,markersize = 8,boxpad =0.3,fontsize = 15,color = 'k'):
# draw a line with downticks at the ends
plt.plot([start,end],[height]*2,'-',color = color,lw=linewidth,marker = TICKDOWN,markeredgewidth=linewidth,markersize = markersize)
# draw the text with a bounding box covering up the line
plt.text(0.5*(start+end),height,displaystring,ha = 'center',va='center',bbox=dict(facecolor='1.', edgecolor='none',boxstyle='Square,pad='+str(boxpad)),size = fontsize)
pvals = [0.001,0.1,0.00001]
offset =1
for i,p in enumerate(pvals):
if p>=0.05:
displaystring = r'n.s.'
elif p<0.0001:
displaystring = r'***'
elif p<0.001:
displaystring = r'**'
else:
displaystring = r'*'
height = offset + max(cell_lysate_avg[i],media_avg[i])
bar_centers = index[i] + numpy.array([0.5,1.5])*bar_width
significance_bar(bar_centers[0],bar_centers[1],height,displaystring)
Instead of the stars you could of course also explicitly write p<0.05 or something similar. You can then spend hours fiddling with the parameters until it looks just right.

Adding a single label to the legend for a series of different data points plotted inside a designated bin in Python using matplotlib.pyplot.plot()

I have a script for plotting astronomical data of redmapping clusters using a csv file. I could get the data points in it and want to plot them using different colors depending on their redshift values: I am binning the dataset into 3 bins (0.1-0.2, 0.2-0.25, 0.25,0.31) based on the redshift.
The problem arises with my code after I distinguish to what bin the datapoint belongs: I want to have 3 labels in the legend corresponding to red, green and blue data points, but this is not happening and I don't know why. I am using plot() instead of scatter() as I also had to do the best fit from the data in the same figure. So everything needs to be in 1 figure.
import numpy as np
import matplotlib.pyplot as py
import csv
z = open("Sheet4CSV.csv","rU")
data = csv.reader(z)
x = []
y = []
ylow = []
yupp = []
xlow = []
xupp = []
redshift = []
for r in data:
x.append(float(r[2]))
y.append(float(r[5]))
xlow.append(float(r[3]))
xupp.append(float(r[4]))
ylow.append(float(r[6]))
yupp.append(float(r[7]))
redshift.append(float(r[1]))
from operator import sub
xerr_l = map(sub,x,xlow)
xerr_u = map(sub,xupp,x)
yerr_l = map(sub,y,ylow)
yerr_u = map(sub,yupp,y)
py.xlabel("$Original\ Tx\ XCS\ pipeline\ Tx\ keV$")
py.ylabel("$Iterative\ Tx\ pipeline\ keV$")
py.xlim(0,12)
py.ylim(0,12)
py.title("Redmapper Clusters comparison of Tx pipelines")
ax1 = py.subplot(111)
##Problem starts here after the previous line##
for p in redshift:
for i in xrange(84):
p=redshift[i]
if 0.1<=p<0.2:
ax1.plot(x[i],y[i],color="b", marker='.', linestyle = " ")#, label = "$z < 0.2$")
exit
if 0.2<=p<0.25:
ax1.plot(x[i],y[i],color="g", marker='.', linestyle = " ")#, label="$0.2 \leq z < 0.25$")
exit
if 0.25<=p<=0.3:
ax1.plot(x[i],y[i],color="r", marker='.', linestyle = " ")#, label="$z \geq 0.25$")
exit
##There seems nothing wrong after this point##
py.errorbar(x,y,yerr=[yerr_l,yerr_u],xerr=[xerr_l,xerr_u], fmt= " ",ecolor='magenta', label="Error bars")
cof = np.polyfit(x,y,1)
p = np.poly1d(cof)
l = np.linspace(0,12,100)
py.plot(l,p(l),"black",label="Best fit")
py.plot([0,15],[0,15],"black", linestyle="dotted", linewidth=2.0, label="line $y=x$")
py.grid()
box = ax1.get_position()
ax1.set_position([box.x1,box.y1,box.width, box.height])
py.legend(loc='center left',bbox_to_anchor=(1,0.5))
py.show()
In the 1st 'for' loop, I have indexed every value 'p' in the list 'redshift' so that bins can be created using 'if' statement. But if I add the labels that are hashed out against each py.plot() inside the 'if' statements, each data point 'i' that gets plotted in the figure as an intersection of (x[i],y[i]) takes the label and my entire legend attains in total 87 labels (including the 3 mentioned in the code at other places)!!!!!!
I essentially need 1 label for each bin...
Please tell me what needs to done after the bins are created and py.plot() commands used...Thanks in advance :-)
Sorry I cannot post my image here due to low reputation!
The data 'appended' for x, y and redshift lists from the csv file are as follows:
x=[5.031,10.599,10.589,8.548,9.089,8.675,3.588,1.244,3.023,8.632,8.953,7.603,7.513,2.917,7.344,7.106,3.889,7.287,3.367,6.839,2.801,2.316,1.328,6.31,6.19,6.329,6.025,5.629,6.123,5.892,5.438,4.398,4.542,4.624,4.501,4.504,5.033,5.068,4.197,2.854,4.784,2.158,4.054,3.124,3.961,4.42,3.853,3.658,1.858,4.537,2.072,3.573,3.041,5.837,3.652,3.209,2.742,2.732,1.312,3.635,2.69,3.32,2.488,2.996,2.269,1.701,3.935,2.015,0.798,2.212,1.672,1.925,3.21,1.979,1.794,2.624,2.027,3.66,1.073,1.007,1.57,0.854,0.619,0.547]
y=[5.255,10.897,11.045,9.125,9.387,17.719,4.025,1.389,4.152,8.703,9.051,8.02,7.774,3.139,7.543,7.224,4.155,7.416,3.905,6.868,2.909,2.658,1.651,6.454,6.252,6.541,6.152,5.647,6.285,6.079,5.489,4.541,4.634,8.851,4.554,4.555,5.559,5.144,5.311,5.839,5.364,3.18,4.352,3.379,4.059,4.575,3.914,5.736,2.304,4.68,3.187,3.756,3.419,9.118,4.595,3.346,3.603,6.313,1.816,4.34,2.732,4.978,2.719,3.761,2.623,2.1,4.956,2.316,4.231,2.831,1.954,2.248,6.573,2.276,2.627,3.85,3.545,25.405,3.996,1.347,1.679,1.435,0.759,0.677]
redshift = [0.12,0.25,0.23,0.23,0.27,0.26,0.12,0.27,0.17,0.18,0.17,0.3,0.23,0.1,0.23,0.29,0.29,0.12,0.13,0.26,0.11,0.24,0.13,0.21,0.17,0.2,0.3,0.29,0.23,0.27,0.25,0.21,0.11,0.15,0.1,0.26,0.23,0.12,0.23,0.26,0.2,0.17,0.22,0.26,0.25,0.12,0.19,0.24,0.18,0.15,0.27,0.14,0.14,0.29,0.29,0.26,0.15,0.29,0.24,0.24,0.23,0.26,0.29,0.22,0.13,0.18,0.24,0.14,0.24,0.24,0.17,0.26,0.29,0.11,0.14,0.26,0.28,0.26,0.28,0.27,0.23,0.26,0.23,0.19]
Working with numerical data like this, you should really consider using a numerical library, like numpy.
The problem in your code arises from processing each record (a coordinate (x,y) and the corresponding value redshift) one at a time. You are calling plot for each point, thereby creating legends for each of those 84 datapoints. You should consider your "bins" as groups of data that belong to the same dataset and process them as such. You could use "logical masks" to distinguish between your "bins", as shown below.
It's also not clear why you call exit after each plotting action.
import numpy as np
import matplotlib.pyplot as plt
x = np.array([5.031,10.599,10.589,8.548,9.089,8.675,3.588,1.244,3.023,8.632,8.953,7.603,7.513,2.917,7.344,7.106,3.889,7.287,3.367,6.839,2.801,2.316,1.328,6.31,6.19,6.329,6.025,5.629,6.123,5.892,5.438,4.398,4.542,4.624,4.501,4.504,5.033,5.068,4.197,2.854,4.784,2.158,4.054,3.124,3.961,4.42,3.853,3.658,1.858,4.537,2.072,3.573,3.041,5.837,3.652,3.209,2.742,2.732,1.312,3.635,2.69,3.32,2.488,2.996,2.269,1.701,3.935,2.015,0.798,2.212,1.672,1.925,3.21,1.979,1.794,2.624,2.027,3.66,1.073,1.007,1.57,0.854,0.619,0.547])
y = np.array([5.255,10.897,11.045,9.125,9.387,17.719,4.025,1.389,4.152,8.703,9.051,8.02,7.774,3.139,7.543,7.224,4.155,7.416,3.905,6.868,2.909,2.658,1.651,6.454,6.252,6.541,6.152,5.647,6.285,6.079,5.489,4.541,4.634,8.851,4.554,4.555,5.559,5.144,5.311,5.839,5.364,3.18,4.352,3.379,4.059,4.575,3.914,5.736,2.304,4.68,3.187,3.756,3.419,9.118,4.595,3.346,3.603,6.313,1.816,4.34,2.732,4.978,2.719,3.761,2.623,2.1,4.956,2.316,4.231,2.831,1.954,2.248,6.573,2.276,2.627,3.85,3.545,25.405,3.996,1.347,1.679,1.435,0.759,0.677])
redshift = np.array([0.12,0.25,0.23,0.23,0.27,0.26,0.12,0.27,0.17,0.18,0.17,0.3,0.23,0.1,0.23,0.29,0.29,0.12,0.13,0.26,0.11,0.24,0.13,0.21,0.17,0.2,0.3,0.29,0.23,0.27,0.25,0.21,0.11,0.15,0.1,0.26,0.23,0.12,0.23,0.26,0.2,0.17,0.22,0.26,0.25,0.12,0.19,0.24,0.18,0.15,0.27,0.14,0.14,0.29,0.29,0.26,0.15,0.29,0.24,0.24,0.23,0.26,0.29,0.22,0.13,0.18,0.24,0.14,0.24,0.24,0.17,0.26,0.29,0.11,0.14,0.26,0.28,0.26,0.28,0.27,0.23,0.26,0.23,0.19])
bin3 = 0.25 <= redshift
bin2 = np.logical_and(0.2 <= redshift, redshift < 0.25)
bin1 = np.logical_and(0.1 <= redshift, redshift < 0.2)
plt.ion()
labels = ("$z < 0.2$", "$0.2 \leq z < 0.25$", "$z \geq 0.25$")
colors = ('r', 'g', 'b')
for bin, label, co in zip( (bin1, bin2, bin3), labels, colors):
plt.plot(x[bin], y[bin], color=co, ls='none', marker='o', label=label)
plt.legend()
plt.show()

Categories