I am doing a project on ICU dataset and I wanted to make double bar charts for survive and infection. I did it and it works fine as Ienter image description here have it here except now that I want to give a legend or label to it is not accepting and just wanted to know is there any ways that I can add a legend and rename my legends?
I want the red one shows the survive and the blue one show the infection
matplotlib.pyplot as plt
import numpy as np
import sys
import csv
import pandas as pd
import itertools
from collections import Counter
import pylab as pl
from matplotlib.dates import date2num
import datetime
with open('ICU.csv') as ICU:
#reads csv
df = pd.read_csv(ICU)
#arrays to hold csv data after parsed to int
Survive = []
Infection = []
# parse data column survive to int
for n in df.Survive:
n = int(n)
# adding the converted INT value to the Survive array
Survive.append(n)
# parse data column Infection to int
for n in df.Infection:
n = int(n)
# adding the converted INT value to the Infection array
Infection.append(n)
S = pd.DataFrame.from_dict(Counter(Survive), orient='index')
I = pd.DataFrame.from_dict(Counter(Infection), orient='index')
width = 0.4
fig = plt.figure() # Create matplotlib figure
ax = fig.add_subplot(111) # Create matplotlib axes
S.plot(kind='bar', color='red', ax=ax, width=width, position=1)
I.plot(kind='bar', color='blue', ax=ax, width=width, position=0,
secondary_y=True)
plt.show()
seeds
You can use
plt.legend(labels=("survive", "infection"))
Related
I'm trying to plot two histogram using the result of a group by. But the labels just appear in one of the labels.
How can I put the label in both charts?
And how can I put different title for the charts (e.g. first as Men's grade and Second as Woman's grade)
import pandas as pd
import matplotlib.pyplot as plt
microdataEnem = pd.read_csv('C:\\Users\\Lucas\\AppData\\Local\\Programs\\Python\\Python39\\Scripts\\Data Science\\Data Analysis\\Projects\\ENEM\\DADOS\\MICRODADOS_ENEM_2019.csv', sep = ';', encoding = 'ISO-8859-1', nrows=10000)
sex_essaygrade = ['TP_SEXO', 'NU_NOTA_REDACAO']
filter_sex_essaygrade = microdataEnem.filter(items = sex_essaygrade)
filter_sex_essaygrade.dropna(subset = ['NU_NOTA_REDACAO'], inplace = True)
filter_sex_essaygrade.groupby('TP_SEXO').hist()
plt.xlabel('Grade')
plt.ylabel('Number of students')
plt.show()
Instead of using filter_sex_essaygrade.groupby('TP_SEXO').hist() you can try the following format: axs = filter_sex_essaygrade['NU_NOTA_REDACAO'].hist(by=filter_sex_essaygrade['TP_SEXO']). This will automatically title each histogram with the group name.
You'll want to set an the variable axs equal to this histogram object so that you can modify the x and y labels for both plots.
I created some data similar to yours, and I get the following result:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
np.random.seed(42)
sex_essaygrade = ['TP_SEXO', 'NU_NOTA_REDACAO']
## create two distinct sets of grades
sample_grades = np.concatenate((np.random.randint(low=70,high=100,size=100), np.random.randint(low=80,high=100,size=100)))
filter_sex_essaygrade = pd.DataFrame({
'NU_NOTA_REDACAO': sample_grades,
'TP_SEXO': ['Men']*100 + ['Women']*100
})
axs = filter_sex_essaygrade['NU_NOTA_REDACAO'].hist(by=filter_sex_essaygrade['TP_SEXO'])
for ax in axs.flatten():
ax.set_xlabel("Grade")
ax.set_ylabel("Number of students")
plt.show()
I'm going insane here ... this should be a simple exercise but I'm stuck:
I have a Jupyter notebook and am using the ruptures Python package. All I want to do is, take the figure or AxesSubplot(s) that the display() function returns and add it to a figure of my own, so I can share the x-axis, have a single image, etc.:
import pandas as pd
import matplotlib.pyplot as plt
myfigure = plt.figure()
l = len(df.columns)
for index, series in enumerate(df):
data = series.to_numpy().astype(int)
algo = rpt.KernelCPD(kernel='rbf', min_size=4).fit(data)
result = algo.predict(pen=3)
myfigure.add_subplot(l, 1, index+1)
rpt.display(data, result)
plt.title(series.name)
plt.show()
What I get is a figure with the desired number of subplots (all empty) and n separate figures from ruptures:
When instead I want want the subplots to be filled with the figures ...
I basically had to recreate the plot that ruptures.display(data,result) produces, to get my desired figure:
import pandas as pd
import numpy as np
import ruptures as rpt
import matplotlib.pyplot as plt
from matplotlib.ticker import EngFormatter
fig, axs = plt.subplots(len(df.columns), figsize=(22,20), dpi=300)
for index, series in enumerate(df):
resampled = df[series].dropna().resample('6H').mean().pad()
data = resampled.to_numpy().astype(int)
algo = rpt.KernelCPD(kernel='rbf', min_size=4).fit(data)
result = algo.predict(pen=3)
# Create ndarray of tuples from the result
result = np.insert(result, 0, 0) # Insert 0 as first result
tuples = np.array([ result[i:i+2] for i in range(len(result)-1) ])
ax = axs[index]
# Fill area beween results alternating blue/red
for i, tup in enumerate(tuples):
if i%2==0:
ax.axvspan(tup[0], tup[1], lw=0, alpha=.25)
else:
ax.axvspan(tup[0], tup[1], lw=0, alpha=.25, color='red')
ax.plot(data)
ax.set_title(series)
ax.yaxis.set_major_formatter(EngFormatter())
plt.subplots_adjust(hspace=.3)
plt.show()
I've wasted more time on this than I can justify, but it's pretty now and I can sleep well tonight :D
I have been trying to get a boxplot with each box representing an emotion over a period of time.
The data frame used to plot this contains timestamp and emotion name. I have tried converting the timestamp into a string first and then to datetime and finally to int64. This resulted in the gaps between x labels as seen in the plot. I have tried the same without converting to int64, but the matplotlib doesn't seem to allow the dates in the plot.
I'm attaching the code I have used here:
import matplotlib as mpl
import matplotlib.pyplot as plt
plt.style.use('classic')
%matplotlib qt
import pandas as pd
import numpy as np
from datetime import datetime
import seaborn as sns
data = pd.read_csv("TX-governor-sentiment.csv")
## check data types
data.dtypes
# drop rows with all missing values
data = data.dropna(how='all')
## transforming the timestamp column
#convert from obj type to string then to date type
data['timestamp2'] = data['timestamp']
data['timestamp2'] = pd.to_datetime(data['timestamp2'].astype(str), format='%m/%d/%Y %H:%M')
# convert to number format with the following logic:
# yyyymmddhourmin --> this allows us to treat dates as a continuous variable
data['timestamp2'] = data['timestamp2'].dt.strftime('%Y%m%d%H%M')
data['timestamp2'] = data['timestamp2'].astype('int64')
print (data[['timestamp','timestamp2']])
#data transformation for data from Orange
df = pd.DataFrame(columns=('timestamp', 'emotion'))
for index, row in data.iterrows():
if row['sentiment'] == 0:
df.loc[index] = [row['timestamp2'], 'Neutral']
else:
df.loc[index] = [row['timestamp2'], row['Emotion']]
# Plot using Seaborn & Matplotlib
#convert timestamp in case it's not in number format
df['timestamp'] = df['timestamp'].astype('int64')
fig = plt.figure(figsize=(10,10))
#colors = {"Neutral": "grey", "Joy": "pink", "Surprise":"blue"}
#visualize as boxplot
plot_ = sns.boxplot(x="timestamp", y="emotion", data=df, width=0.5,whis=np.inf);
#add data point on top
plot_ = sns.stripplot(x="timestamp", y="emotion", data=df, alpha=0.8, color="black");
fig.canvas.draw()
#modify ticks and labels
plt.xlim([202003010000,202004120000])
plt.xticks([202003010000, 202003150000, 202003290000, 202004120000], ['2020/03/01', '2020/03/15', '2020/03/29', '2020/04/12'])
#add colors
for patch in plot_.artists:
r, g, b, a = patch.get_facecolor()
patch.set_facecolor((r, g, b, .3))
Please let me know how I can overcome this problem of gaps in the boxplot. Thank you!
I have this bar graph but the X labels that are long keep overflowing into the other label. Is there a way I can create more space or cause a line break when it is doing this?
Below is the part of the code that accounts for the graph
import pandas as pd
import matplotlib.pyplot as plt
ax = tweets_df.plot(kind='bar', x='name', y='tweet_volume', fontsize=7, width=.5)
ax.set_xlabel('Hastag')
ax.set_ylabel('Tweets w/ Hashtag')
plt.xticks(rotation='horizontal')
plt.show()
IMHO you can use rotation=90 instead of rotation='horizontal' or if you want to keep horizontal and truncating values,
import pandas as pd
import matplotlib.pyplot as plt
N = 5
ax = tweets_df.plot(kind='bar', x='name', y='tweet_volume', fontsize=7, width=.5)
ax.set_xlabel('Hastag')
ax.set_ylabel('Tweets w/ Hashtag')
plt.xticks(rotation='horizontal')
labels = [item.get_text() for item in ax.get_xticklabels()]
ax.set_xticklabels([label[:N] for label in labels])
plt.show()
I'm working on a script that plots a pps count versus time from a csv file. Everything works up to this point however I can't seem to figure out how to change the interval at which the ticks/tick-labels occur at on the X-axis, I want there to be 60 timestamps/tick instead of the default. Here's where I'm at:
import matplotlib
matplotlib.use('Agg')
from matplotlib.mlab import csv2rec
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from pylab import *
data = csv2rec('tpm_counter.log', names=['packets', 'time']) # reads in the data from the csv as column 1 = tweets column 2 = time
rcParams['figure.figsize'] = 12, 4 # this sets the ddimensions of the graph to be made
rcParams['font.size'] = 8
fig = plt.figure()
plt.plot(data['time'], data['packets']) # this sets the fields to be graphed
plt.xlabel("Time(minutes)") # this sets the x label
plt.ylabel("Packets") # this sets the y label
plt.title("Packets Capture Log: Packets Per Minute") # this sets the title
#plt.xticks(range(60)) --- nothing shows on the graph if I use this
fig.autofmt_xdate(bottom=0.2, rotation=90, ha='left')
plt.savefig('tpm.png') # this sets the output file name
I've tried plt.xticks(range(60)) but when the plot generates, it has nothing on it.
bmu's answer above works. But it might be helpful to others to see a more general way of rescaling the xticks and xlabels in a plot. I have generated some example data instead of using a csv file.
import matplotlib
import matplotlib.pyplot as plt
from pylab import *
time=range(5000) #just as an example
data=range(5000) # just as an example
fig = plt.figure()
plt.plot(time,data) # this sets the fields to be graphed
plt.xlabel("Every 60th point") # this sets the x label
plt.ylabel("Data") # this sets the y label
plt.title("Rescaling axes") # this sets the title
#Slice the data into every 60th point. We want ticks at these points
tickpos=data[::60]
#Now create a list of labels for each point...
ticklabels=[]
for point in tickpos:
ticklabels.append(str(point/60))
plt.xticks(tickpos,ticklabels) # set the xtick positions and labels
plt.savefig('tpm.png')
Have a look at the date demo.
You can use the HourLocator or the MinuteLocator together with an adapted DateFormatter.
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.plot_date(data['time'], data['packets'])
hours = mdates.HourLocator()
fmt = mdates.DateFormatter('%H:%M')
ax.xaxis.set_major_locator(hours)
ax.xaxis.set_major_formatter(fmt)