pie chart label overlap - python

I am having problems generating a graph which doesn't overlap with text both for percentages and country codes, I am using python3 matplotlib, see image below.
Any ways of fixing this issue even if it changes layout are appreciated.
from collections import Counter
import numpy as np
import matplotlib.pyplot as plt
import json
countries = []
import os
path = 'data_used_for_graph'
entries = os.listdir(path)
for file in entries:
with open(path+"/"+file) as f:
content = json.load(f)
for x in content['personalNames']:
countries.append(x['countryOrigin'])
counts = Counter(countries)
labels, values = zip(*counts.items())
# sort your values in descending order
indSort = np.argsort(values)[::-1]
# rearrange your data
labels = np.array(labels)[indSort]
values = np.array(values)[indSort]
# Data to plot
sizes = values
# Plot
plt.pie(sizes, labels=labels,autopct='%1.1f%%', shadow=True, startangle=140)
plt.show()

Related

How can I put labels in two charts using matplotlib

I'm trying to plot two histogram using the result of a group by. But the labels just appear in one of the labels.
How can I put the label in both charts?
And how can I put different title for the charts (e.g. first as Men's grade and Second as Woman's grade)
import pandas as pd
import matplotlib.pyplot as plt
microdataEnem = pd.read_csv('C:\\Users\\Lucas\\AppData\\Local\\Programs\\Python\\Python39\\Scripts\\Data Science\\Data Analysis\\Projects\\ENEM\\DADOS\\MICRODADOS_ENEM_2019.csv', sep = ';', encoding = 'ISO-8859-1', nrows=10000)
sex_essaygrade = ['TP_SEXO', 'NU_NOTA_REDACAO']
filter_sex_essaygrade = microdataEnem.filter(items = sex_essaygrade)
filter_sex_essaygrade.dropna(subset = ['NU_NOTA_REDACAO'], inplace = True)
filter_sex_essaygrade.groupby('TP_SEXO').hist()
plt.xlabel('Grade')
plt.ylabel('Number of students')
plt.show()
Instead of using filter_sex_essaygrade.groupby('TP_SEXO').hist() you can try the following format: axs = filter_sex_essaygrade['NU_NOTA_REDACAO'].hist(by=filter_sex_essaygrade['TP_SEXO']). This will automatically title each histogram with the group name.
You'll want to set an the variable axs equal to this histogram object so that you can modify the x and y labels for both plots.
I created some data similar to yours, and I get the following result:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
np.random.seed(42)
sex_essaygrade = ['TP_SEXO', 'NU_NOTA_REDACAO']
## create two distinct sets of grades
sample_grades = np.concatenate((np.random.randint(low=70,high=100,size=100), np.random.randint(low=80,high=100,size=100)))
filter_sex_essaygrade = pd.DataFrame({
'NU_NOTA_REDACAO': sample_grades,
'TP_SEXO': ['Men']*100 + ['Women']*100
})
axs = filter_sex_essaygrade['NU_NOTA_REDACAO'].hist(by=filter_sex_essaygrade['TP_SEXO'])
for ax in axs.flatten():
ax.set_xlabel("Grade")
ax.set_ylabel("Number of students")
plt.show()

My Choropleth map only shows the map, and not the colours

I am new to python and wanted to try using a choropleth map. I have the following code for the graph.
import numpy as np
import pandas as pd
import plotly.express as px
df = pd.read_csv(r'C:\Users\lukee\Desktop\COVID Visualisation\time_series_covid_19_confirmed.csv')
#Data for number of cases for each country across the different dates
geojson = df['Country/Region']
#define the colour codes for the number of cases across the different dates
colourscale = px.colors.sequential.Plasma
#world map to show the intensity of cases in each country
fig = px.choropleth(df,
geojson=geojson,
locationmode= 'country names',
color = df['5/16/21'],
color_continuous_scale = colourscale,
scope='world',
hover_name=df["Country/Region"],
labels={'COVID Cases'})
fig.update(layout_coloraxis_showscale=False)
fig.show()
solution uses sourcing open world, not kaggle
plotting code, there were some inconsistencies on how you requested columns in data frame. addition of featureidkey parameter so dataframe and geojson join correctly
data sourcing
import requests
import pandas as pd
from pathlib import Path
from zipfile import ZipFile
import json, io
# source geojson for country boundaries
geosrc = pd.json_normalize(requests.get("https://pkgstore.datahub.io/core/geo-countries/7/datapackage.json").json()["resources"])
fn = Path(geosrc.loc[geosrc["name"].eq("geo-countries_zip"), "path"].values[0]).name
if not Path.cwd().joinpath(fn).exists():
r = requests.get(geosrc.loc[geosrc["name"].eq("geo-countries_zip"), "path"].values[0],stream=True,)
with open(fn, "wb") as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
zfile = ZipFile(fn)
with zfile.open(zfile.infolist()[0]) as f:
geojson = json.load(f)
# source COVID data
dfall = pd.read_csv(io.StringIO(requests.get("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv").text))
dfall["date"] = pd.to_datetime(dfall["date"])
dflatest = (dfall.sort_values(["iso_code", "date"]).groupby("iso_code", as_index=False).last())
colorcol = "new_cases_smoothed_per_million"
# filter out where data is no for a country or no data available for latest date or a big outlier
dflatest = dflatest.loc[
dflatest[colorcol].gt(0).fillna(False)
& dflatest["iso_code"].str.len().eq(3)
& dflatest[colorcol].lt(dflatest[colorcol].quantile(0.97))
]
plotting
import plotly.express as px
#define the colour codes for the number of cases across the different dates
colourscale = px.colors.sequential.Plasma
#world map to show the intensity of cases in each country
fig = px.choropleth(dflatest,
geojson=geojson,
locations= 'iso_code',
featureidkey="properties.ISO_A3",
color = colorcol,
color_continuous_scale = colourscale,
scope='world',
hover_name="location",
labels={colorcol:'COVID Cases'}
)
fig.update_layout(coloraxis_showscale=False, margin={"l":0,"r":0,"t":0,"r":0})
fig
output

How to name each city in a matplotlib/geopandas map from csv file?

I have a csv file containing the cities in senegal, theres a city column, i have already used the long and lat columns to plot the points, but now id like to have each point have its city name next to it, and maybe later more information. how can i do this with matplotlib? thank you.
import geopandas as gpd
import matplotlib.pyplot as plt
import os
import pandas as pd
file = os.path.join("senegal_administrative","senegal_administrative.shp")
cities_file = os.path.join("senegal_administrative","sn.csv")
cities = pd.read_csv(cities_file)
senegal = gpd.read_file(file)
## THE MAP IS PLOTED HERE
axis = senegal.plot(color="lightblue",edgecolor = "black",figsize = (20,20))
def_geo = gpd.GeoDataFrame(cities,geometry = gpd.points_from_xy(cities.lng,cities.lat))
print(def_geo)
## I PLOT THE POINTS HERE
def_geo.plot(ax = axis,color = "black")
plt.show()
heres a dropbox link if you need the files, i really appreciate it : https://www.dropbox.com/sh/dr54pbc9a5zc5ke/AADMxnYHe4maAnwKArCsh1m8a?dl=0
To annotate the map, you can use .annotate() function.
Here is the relevant parts of the code:
axis = senegal.plot(color="lightblue",edgecolor = "black",figsize = (12,12))
for idx,dat in cities.iterrows():
#print(dat.city, dat.lng, dat.lat)
axis.scatter(dat.lng, dat.lat, s=10, color='red')
axis.annotate(dat.city, (dat.lng, dat.lat))
Plot will be similar to this:

Multiple Labels for Counter() Data Frame Frame Objects in Matplotlib Python?

I am doing a project on ICU dataset and I wanted to make double bar charts for survive and infection. I did it and it works fine as Ienter image description here have it here except now that I want to give a legend or label to it is not accepting and just wanted to know is there any ways that I can add a legend and rename my legends?
I want the red one shows the survive and the blue one show the infection
matplotlib.pyplot as plt
import numpy as np
import sys
import csv
import pandas as pd
import itertools
from collections import Counter
import pylab as pl
from matplotlib.dates import date2num
import datetime
with open('ICU.csv') as ICU:
#reads csv
df = pd.read_csv(ICU)
#arrays to hold csv data after parsed to int
Survive = []
Infection = []
# parse data column survive to int
for n in df.Survive:
n = int(n)
# adding the converted INT value to the Survive array
Survive.append(n)
# parse data column Infection to int
for n in df.Infection:
n = int(n)
# adding the converted INT value to the Infection array
Infection.append(n)
S = pd.DataFrame.from_dict(Counter(Survive), orient='index')
I = pd.DataFrame.from_dict(Counter(Infection), orient='index')
width = 0.4
fig = plt.figure() # Create matplotlib figure
ax = fig.add_subplot(111) # Create matplotlib axes
S.plot(kind='bar', color='red', ax=ax, width=width, position=1)
I.plot(kind='bar', color='blue', ax=ax, width=width, position=0,
secondary_y=True)
plt.show()
seeds
You can use
plt.legend(labels=("survive", "infection"))

Only the graph axis shows up when I try to plot CSV data using pandas

I have a CSV file with zip codes and a corresponding number for each zip code. I want to plot it using a histogram, but right now only the axis are showing up, with none of the actual information.
import pandas as pd
import matplotlib.pyplot as plt
installedbase = 'zipcode.csv'
df = pd.read_csv(installedbase)
df.plot(x = 'zip_code_installed', y = 'installed_NP', kind = 'hist', rwidth = .5, bins = 1000 )
plt.xlabel('zip code')
plt.ylabel('NP sum')
plt.axis([9000,9650,0,6400])
plt.show()
I am using pandas and matplotlib to plot. "x" and "y" are both set to different columns in my CSV file.

Categories