x = np.log(df_pitcher['adj_salary_filled'][df_pitcher['year'] == 2010])
y = np.log(df_pitcher['adj_salary_filled'][df_pitcher['year'] == 2015])
z = np.log(df_pitcher['adj_salary_filled'][df_pitcher['year'] == 2019])
_ = plt.hist(x, bins=20, alpha=0.5, label='2010 Season')
_ = plt.hist(y, bins=20, alpha=0.5, label='2015 Season')
_ = plt.hist(z, bins=20, alpha=0.5, label='2019 Season')
_ = plt.xlabel('Salaries')
_ = plt.ylabel('Frequency')
_ = plt.legend()
_ = plt.title('Distribution of Salaries for Pitchers')
_ = plt.show()
It's basically returning the log of my salaries, anyway to make it return the actual salary amount? I am taking the log because it'll help show the actual distribution.
You can manually get/set the tick positions and the corresponding labels for the X axis using plt.xticks.
A easy way to fix your specific issue is to get the tick values and set their labels to be the square of each value:
tick_values, tick_labels = plt.xticks()
plt.xticks(tick_values, [round(value**2, 3) for value in tick_values])
Related
fig, ax = plt.subplots(figsize=(1, 1), constrained_layout=True)
_ = ax.set_ylim(-2, 1.75)
_ = ax.set_xlim("1989", "2021")
_ = ax.axhline(0, xmin=0.05, xmax=0.50, c='deeppink', zorder=1)
_ = ax.scatter(dates, np.zeros(len(dates)), s=120, c='palevioletred', zorder=2)
_ = ax.scatter(dates, np.zeros(len(dates)), s=30, c='darkmagenta', zorder=3)
label_offsets = np.zeros(len(dates))
label_offsets[::2] = 0.25
label_offsets[1::2] = -0.4
for i, (l, d) in enumerate(zip(names, dates)):
_ = ax.text(d, label_offsets[i], l, ha='center', fontfamily='serif', fontweight='bold', color='royalblue',fontsize=8)
I want labels to show up but it shows value error.
UserWarning: constrained_layout not applied because axes sizes collapsed to zero. Try making figure larger or axes decorations smaller.
fig.canvas.print_figure(bytes_io, **kw)
It says value error i have tried changing dimensions but result is same
The methods I normally use for prevent overlap of data labels don't seem to apply to this matplotlib-based vertical timeline. Normally I would use something like autofmt_xdate.
I don't actually care how "tall" (long) the chart needs to be or if the vertical distance between the items perfectly preserves the ratio of the date gap between items. I just don't want the text labels to overlap.
I am not very experience with matplotlib but thusfar I've determined that the code that controls the text labels is
_ = ax.text(label_offsets[i], d, l, ha=align, fontfamily='serif', fontweight='bold', color='royalblue',fontsize=12)
where d is a timestamp being used as a y-axis coordinate. I tried creating a loop counter and a list to contain the value of d from each loop iteration, so that I could make a comparison between the current and prior date, then add a fixed value (e.g. 15 days) to the current value of d if the prior value of d was too close. That didn't seem to do the trick.
The plot code:
chartdata = pd.read_csv(
"data/Events.csv"
)
chartdata = chartdata.query('Year > 1939')
dates = pd.to_datetime(chartdata['Date_Clean_Approx'])
min_date = date(np.min(dates).year - 2, np.min(dates).month, np.min(dates).day)
max_date = date(np.max(dates).year + 2, np.max(dates).month, np.max(dates).day)
labels = chartdata['Name']
# labels with associated dates
labels = ['{0:%d %b %Y}:\n{1}'.format(d, l) for l, d in zip (labels, dates)]
fig, ax = plt.subplots(figsize=(6, 32), constrained_layout=True)
_ = ax.set_xlim(-20, 20)
_ = ax.set_ylim(min_date, max_date)
_ = ax.axvline(0, ymin=0.05, ymax=0.95, c='deeppink', zorder=1)
_ = ax.scatter(np.zeros(len(dates)), dates, s=120, c='palevioletred', zorder=2)
_ = ax.scatter(np.zeros(len(dates)), dates, s=30, c='darkmagenta', zorder=3)
label_offsets = np.repeat(2.0, len(dates))
label_offsets[1::2] = -2.0
for i, (l, d) in enumerate(zip(labels, dates)):
d = d - timedelta(days=90)
align = 'right'
if i % 2 == 0:
align = 'left'
_ = ax.text(label_offsets[i], d, l, ha=align, fontfamily='serif', fontweight='bold', color='royalblue',fontsize=12)
stems = np.repeat(2.0, len(dates))
stems[1::2] *= -1.0
x = ax.hlines(dates, 0, stems, color='darkmagenta')
# hide lines around chart
for spine in ["left", "top", "right", "bottom"]:
_ = ax.spines[spine].set_visible(False)
# hide tick labels
_ = ax.set_xticks([])
_ = ax.set_yticks([])
_ = ax.set_title('UAP (UFO) Milestones, 1940 - Present',
fontweight="bold",
fontfamily='serif',
fontsize=16,
color='royalblue')
pyplot(fig)
As you can see in the image, I increased the height of the plot to reduce overlap in the labels. The plot got very long but still has overlap in text. Once a solution is determined I think that embedding the logic to "adjust relative gap size and chart height as needed to avoid text label overlap" into a convenient plot function would be a large contribution to the matplotlib library.
I'm definitely open to better, more programmatic, solutions but for now I replaced the dates with a same-length vector of consecutive integers and used it in place of the actual dates everywhere except within the text of the labels.
###### Timeline#
chartdata = pd.read_csv(
"/home/kodachi/Documents/ET/aliendb/www/app/data/Events.csv"
)
chartdata=chartdata.query('Year > 1939')
dates = pd.to_datetime(chartdata['Date_Clean_Approx'])
min_date = date(np.min(dates).year - 2, np.min(dates).month, np.min(dates).day)
max_date = date(np.max(dates).year + 2, np.max(dates).month, np.max(dates).day)
###
# fake date
fake_d=np.c_[1:len(dates)]
###
labels = chartdata['Name']
# labels with associated dates
labels = ['{0:%d %b %Y}:\n{1}'.format(d, l) for l, d in zip (labels, dates)]
fig, ax = plt.subplots(figsize=(8, 28))#, constrained_layout=True)
_ = ax.set_xlim(-20, 20)
#_ = ax.set_ylim(min_date, max_date)
_ = ax.set_ylim(1, 96)
_ = ax.axvline(0, ymin=0.05, ymax=.985, c='deeppink', zorder=1)#ymax=0.95
#_ = ax.scatter(np.zeros(len(dates)), dates, s=120, c='palevioletred', zorder=2)
#_ = ax.scatter(np.zeros(len(dates)), dates, s=30, c='darkmagenta', zorder=3)
_ = ax.scatter(np.zeros(len(fake_d)), fake_d, s=120, c='palevioletred', zorder=2)
_ = ax.scatter(np.zeros(len(fake_d)), fake_d, s=30, c='darkmagenta', zorder=3)
#label_offsets = np.repeat(2.0, len(dates))
label_offsets = np.repeat(2.0, len(fake_d))
label_offsets[1::2] = -2.0
for i, (l, d) in enumerate(zip(labels, fake_d)): #dates
#d = d - timedelta(days=90)
align = 'right'
if i % 2 == 0:
align = 'left'
_ = ax.text(label_offsets[i], d, l, ha=align, fontfamily='serif',
fontweight='bold', color='royalblue',fontsize=12)
#stems = np.repeat(2.0, len(dates))
stems = np.repeat(2.0, len(fake_d))
stems[1::2] *= -1.0
#x = ax.hlines(dates, 0, stems, color='darkmagenta')
x = ax.hlines(fake_d, 0, stems, color='darkmagenta')
# hide lines around chart
for spine in ["left", "top", "right", "bottom"]:
_ = ax.spines[spine].set_visible(False)
# hide tick labels
_ = ax.set_xticks([])
_ = ax.set_yticks([])
_ = ax.set_title('UAP (UFO) Milestones, 1940 - Present',
fontweight="bold",
fontfamily='serif',
fontsize=16,
color='darkgreen')
x = range(11, 31)
y_1 = [1,0,1,1,2,4,3,2,3,4,4,5,6,5,4,3,3,1,1,1]
y_2 = [1,0,3,1,2,2,3,3,3,2,1,2,1,1,1,1,1,1,1,1]
# 图形大小
plt.figure(figsize=(20, 8), dpi = 80)
# 设置x轴刻度
x_label = ["{}岁".format(i) for i in x]
plt.xticks(list(x), x_label, fontproperties = my_font) # 显示中文刻度
# 绘制网格
plt.grid(alpha = 0.4)
# 添加图例
plt.legend(prop = my_font, loc = "upper left")
# Execution
plt.plot(x, y_1, label = "Mine")
plt.plot(x, y_2, label = "Him")
# Display
plt.show()
This is the code for legends showing in Python matplotlib, and I wanna show the legend, but the result is that there is no show at all.
You need to call plt.legend after plotting the curves:
# first
plt.plot(x, y_1, label = "Mine")
plt.plot(x, y_2, label = "Him")
# next
plt.legend(loc = "upper left")
output (smaller size):
I'm working on using a for loop to produce graphs for each set of data I have. Each plot prints correctly however the savefig() portion of my code only saves the last plot under each file name.
Here is a section of my code
total = 3
idx_list = []
dct = {}
for i, df in enumerate(graph_list):
data = pd.DataFrame(df)
for idx, v in enumerate(data['content'].unique()):
dct[f'x{idx}'] = data.loc[data['content'] == v]
idx_list.append(idx)
xs = dct[f'x{idx}'].Time
yB = dct[f'x{idx}'].Weight
yA = dct[f'x{idx}'].Height
fig, ax = plt.subplots(figsize =(10,8))
legends = ['Weight', 'Height']
ax.plot(xs, yB, linestyle = ':', color ='#4c4c4c', linewidth = 4.0)
ax.plot(xs, yA, color = '#fac346', linewidth = 3.0)
ax.legend(legends, loc = 'lower center', ncol = len(legends), bbox_to_anchor = (0.5, -0.15), frameon = False)
ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals = None, symbol ='%', is_latex = False))
ax.set_xticks(xs[::4])
ax.tick_params(axis = 'x', labelrotation = 45, labelsize = 10)
ax.yaxis.grid()
new_idx = [x+1 for x in idx_list]
for graph in range(total+1):
if graph != 0:
for ids in set(new_idx):
print('Graph {0} ID {1}'.format(graph, ids))
fig.savefig('Graph {0} ID {1}.jpg'.format(graph, ids))
I want each graph to save under the file names:
Graph 1 ID 1
Graph 1 ID 2
Graph 2 ID 1
Graph 2 ID 2
Graph 3 ID 1
Graph 3 ID 2
Thanks for any help you can provide!
You do not keep a reference to each figure, so when you call fig.savefig in the final loop you are actually saving the figure referenced by fig (which is the last figure) each time. There are many ways to manage this: you can save the figure in the same loop that created it, you can assign a unique name to each figure, or you can keep a reference to each figure in a list. The first option is simpler:
dct = {} # I assume this dict is used for something after saving the figures. Otherwise it is not necessary
for i, df in enumerate(graph_list):
data = pd.DataFrame(df)
for idx, v in enumerate(data['content'].unique()):
dct[f'x{idx}'] = data.loc[data['content'] == v]
xs = dct[f'x{idx}'].Time
yB = dct[f'x{idx}'].Weight
yA = dct[f'x{idx}'].Height
fig, ax = plt.subplots(figsize=(10, 8))
legends = ['Weight', 'Height']
ax.plot(xs, yB, linestyle=':', color='#4c4c4c', linewidth=4.0)
ax.plot(xs, yA, color='#fac346', linewidth=3.0)
ax.legend(legends, loc='lower center', ncol=len(legends),
bbox_to_anchor=(0.5, -0.15), frameon=False)
ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1,
decimals=None, symbol='%', is_latex=False))
ax.set_xticks(xs[::4])
ax.tick_params(axis='x', labelrotation=45, labelsize=10)
ax.yaxis.grid()
print('Graph {0} ID {1}'.format(i+1, idx+1))
fig.savefig('Graph {0} ID {1}.jpg'.format(i+1, idx+1))
plt.close(fig) # if you do not need to leave the figures open
Currently I have a plot with too many points, I want to avoid overlapping. Want to know how to reduce the amount of points in order to have a smoother line.
Plot Code
fig = plt.figure(1, figsize = (18,10)) # Figure size in inches (size_x, size_y)
ax = plt.axes()
min_val = prediction_intervals2[:, 0]
max_val = prediction_intervals2[:, 1]
true_values = y_test
predicted_values = PLS_Model1.predict(X_test)
plt.plot(min_val, label = "Min", color='blue')
plt.plot(max_val, label = "Max", color='red')
plt.plot(true_values, label = "y", color = "black")
plt.plot(predicted_values, label = "y\u0302", marker='o')
plt.title('Conformal Predictor Final Predictions')
plt.legend()
plt.show()
Current Plot
Desired Plot
Plot that I want
I was able to revise my code properly and came to the desired output by just selecting less data points, quite simple. Posted the answer just in case.
min_val_normal = plot_normalized_table[['Min']]
max_val_normal = plot_normalized_table[['Max']]
original_normal = plot_normalized_table[['Original Label']]
interval_normal = plot_normalized_table[['Interval Size']]
normal_predicted = predicted_values[0:50]
fig = plt.figure(1, figsize = (18,10)) # Figure size in inches (size_x, size_y)
ax = plt.axes()
#predicted_values = PLS_Model1.predict(X_test) #Predictions from test data (run at least once for the plot to work)
plt.plot(min_val_normal, label = "Min", color='blue')
plt.plot(max_val_normal, label = "Max", color='red')
plt.plot(original_normal, label = "y", color = "black")
plt.plot(normal_predicted, label = "y\u0302", marker='o', )
plt.title('Normalized Final Conformal Predictions')
plt.xlim([-1, 51])
plt.ylim([-1, 2])
plt.legend()
plt.show()