iterate several polygons - 'Polygon object error' - python

I am constructing a graph to represent sales by country.
# to download the file
[file](https://drive.google.com/file/d/16Uw_rJgzJdqhhxYjK3tJGkTeXyOLENMR/view?usp=share_link)
store_sales = pd.read_pickle('store_sales.venta')
store_sales
p1_sales_by_country = store_sales.groupby(['country']).p1_sales.sum()
p1_sales_by_country
use a graph to represent sales values by country.
plt.figure(figsize=(16,6))
ax = plt.axes(projection=crs.PlateCarree())
shpfile = shapereader.natural_earth(resolution='110m',
category='cultural',
name='admin_0_countries')
reader = shapereader.Reader(shpfile)
countries = reader.records()
max_sales = p1_sales_by_country.max()
for country in countries:
country_name = country.attributes['ADM0_A3']
if country_name in p1_sales_by_country:
ax.add_geometries(country.geometry, crs.PlateCarree(),
facecolor=plt.cm.Greens(p1_sales_by_country[country_name]
/max_sales),
edgecolor='k')
else:
ax.add_geometries(country.geometry, crs.PlateCarree(),
facecolor='w',
edgecolor='k')
I have problems with iterating a Polygon
TypeError: 'Polygon' object is not iterable

Related

Add new sheet with graph

newby question.
I would like to add a new sheet to an existing wb that I've created with xlwings.
It seems that when I try to add e write the 2nd sheet the 1st one going to be overwritten.
Here the code :
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns # library for visualization
sns.set() # this command sets the seaborn chart style as the default
import xlwings as xw
from datetime import datetime
df=pd.read_excel('aspire_1909.xls')
df2=df.drop([0,2])
new_header = df2.iloc[1]
df2 = df2[2:]
df2.columns = new_header
df2=df2.set_index('User')
wb = xw.Book()
sht = wb.sheets[0]
sht.name = "Aspire Manager Graph"
sht.range('R1').value = df3
started=len(df3.loc[df3['Manager Review'] == 'Started'])
complete = len(df3.loc[df3['Manager Review'] == 'Complete'])
complete_reopened = len(df3.loc[df3['Manager Review'] == 'Complete (Reopened)'])
not_started = len(df3.loc[df3['Manager Review'] == 'Not Started'])
past_due = len(df3.loc[df3['Manager Review'] == 'Past Due'])
def insert_heading(rng,text):
rng.value = text
rng.font.bold = True
rng.font.size = 24
rng.font.color = (0,0,139)
insert_heading(sht.range("A2"),f"ASPIRE YEAR END REVIEW - MANAGER STATUS del {datetime.today().strftime('%d-%m-%Y')}")
data = {'Not Started':not_started, 'Started':started, 'Completed':complete,'Reopened' : complete_reopened,'Past Due ' : past_due }
status = list(data.keys())
values = list(data.values())
x_labels = list(a + ' ' + str(b) for (a, b) in zip(status, values))
fig = plt.figure(figsize = (10, 5))
# creating the bar plot
fig, ax = plt.subplots(figsize=(15, 15))
bars = ax.bar(status, values, color =['red','blue','green','yellow','violet'],
width = 0.4)
ax.bar_label(bars, fmt="%d", fontsize=26, rotation=0, padding=3)
plt.bar(status, values, color =['red','blue','green','yellow','violet'],
width = 0.4)
plt.xticks(status, x_labels)
plt.xticks(rotation = 45, fontsize = 13)
plt.xlabel("Year End Review Completion Status")
plt.ylabel("No Users",rotation=45,fontsize = 13)
plt.title("Aspire Mgr Year End Review")
plt.show()
sht.pictures.add(fig,
name = "Aspire Mgr Status Graph",
update = True,
left =sht.range("A4").left,
top = sht.range("A4").top,
height= 500,
width= 700)
sht1 = wb.sheets[0]
wb.sheets.add('Aspire Employees Graph')
sht1.range('R1').value = df2
started=len(df2.loc[df2['Aspire year-end reflection (FY22)'] == 'Started'])
complete = len(df2.loc[df2['Aspire year-end reflection (FY22)'] == 'Complete'])
complete_reopened = len(df2.loc[df2['Aspire year-end reflection (FY22)'] == 'Complete (Reopened)'])
not_started = len(df2.loc[df2['Aspire year-end reflection (FY22)'] == 'Not Started'])
past_due = len(df2.loc[df2['Aspire year-end reflection (FY22)'] == 'Past Due'])
def insert_heading(rng,text):
rng.value = text
rng.font.bold = True
rng.font.size = 24
rng.font.color = (0,0,139)
insert_heading(sht1.range("A2"),f"ASPIRE YEAR END REVIEW EMPLOYEE STATUS del {datetime.today().strftime('%d-%m-%Y')}")
data = {'Not Started':not_started, 'Started':started, 'Completed':complete,'Reopened' : complete_reopened,'Past Due ' : past_due }
status = list(data.keys())
values = list(data.values())
x_labels = list(a + ' ' + str(b) for (a, b) in zip(status, values))
fig = plt.figure(figsize = (10, 5))
# creating the bar plot
fig, ax = plt.subplots(figsize=(15, 15))
bars = ax.bar(status, values, color =['red','blue','green','yellow','violet'],
width = 0.4)
ax.bar_label(bars, fmt="%d", fontsize=26, rotation=0, padding=3)
plt.bar(status, values, color =['red','blue','green','yellow','violet'],
width = 0.4)
plt.xticks(status, x_labels)
plt.xticks(rotation = 45, fontsize = 13)
plt.xlabel("Year End Review Completion Status")
plt.ylabel("Nb. Users",rotation=45,fontsize = 13)
plt.title("Aspire Employee Year End Review")
plt.show()
sht1.pictures.add(fig,
name = "Aspire Employee Status Graph",
update = True,
left =sht.range("A4").left,
top = sht.range("A4").top,
height= 500,
width= 700)
Could someone would be able to help me get what Is wrong ? ( I know ,almost everything :-) )
Thanks a lot in advance
In the second half of the code you have:
sht1 = wb.sheets[0]
wb.sheets.add('Aspire Employees Graph')
sht1.range('R1').value = df2
What wb.sheets[0] is returning is the very first sheet of the workbook. Towards the beginning you have the first section, which is:
sht = wb.sheets[0]
sht.name = "Aspire Manager Graph"
sht.range('R1').value = df3
As you use wb.sheets[0] both times, but haven't inserted a sheet at the beginning, you are just referring to the same sheet. The addition of the new sheet is correct, but you haven't set that as variable sht1.
Instead, for the second section, you could re-write to the following, combining the two lines into one so that the variable is the correct sheet:
sht1 = wb.sheets.add('Aspire Employees Graph')
sht1.range('R1').value = df2
Edit
To change the colour of the sheet tab:
sht1.api.Tab.ColorIndex = 3
The full list of colours can be found in the VBA ColorIndex documentation.
For more specific colours, see the answers to this question.

Barplot subplot legend python

Hello everyone how can i make legend for 3 different bar color that code with subplot?
Data frame:
This is my code:
fig,axs = plt.subplots(2,3, figsize=(30,20))
axs[0,1].bar(x = df_7_churn_tenure['Kategori'],height = df_7_churn_tenure['Jumlah Churn'],color = ['lightsalmon','maroon','darkorange'])
axs[0,1].legend(['Low Tenure = x<24','Medium Tenure = 24<= x <=48','High Tenure = x >=48'],loc='best',fontsize=12)
plt.show()
And the result for barplot legend only shows 1 label like this:
Is there any solution to shows all of my legend?
Try this:
fig,axs = plt.subplots(2,3, figsize=(30,20))
axs[0,1].bar(x = df_7_churn_tenure['Kategori'],height = df_7_churn_tenure['Jumlah Churn'],color ['lightsalmon','maroon','darkorange'])
axs = axs[0,1]
lns1 = axs.plot('-',label = 'Low Tenure = x<24')
lns2 = axs.plot('-',label = 'Medium Tenure = 24<= x <=48')
lns3 = axs.plot('-',label = 'High Tenure = x >=48')
# added these three lines
lns = lns1+lns2+lns3
labs = [l.get_label() for l in lns]
axs.legend(lns, labs,loc=0)
plt.show()

How can combine geometry with countries?

I need a file in which I will have the names of European and Asian countries and their "geometry" data.
Part of Russia was jumping to the other side of the chart and I had to correct the data to keep the Russia map in one piece.
I found a code for it, but unfortunately when I execute it, it only keeps geometry data, which cannot be easily linked with the names of countries that I need
Initial map:
enter image description here
Code:
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
asia = world[world["continent"] == "Asia"]
europe = world[world["continent"] == "Europe"]
euroasia = pd.concat([asia, europe])
name = euroasia["name"]
def shift_geom(shift, gdataframe, plotQ=False):
shift -= 180
moved_geom = []
splitted_geom = []
border = LineString([(shift,90),(shift,-90)])
for row in gdataframe["geometry"]:
splitted_geom.append(split(row, border))
for element in splitted_geom:
items = list(element)
for item in items:
minx, miny, maxx, maxy = item.bounds
if minx >= shift:
moved_geom.append(translate(item, xoff=-180-shift))
else:
moved_geom.append(translate(item, xoff=180-shift))
moved_geom_gdf = gpd.GeoDataFrame({"geometry": moved_geom})
if plotQ:
fig1, ax1 = plt.subplots(figsize=[20,30])
moved_geom_gdf.plot(ax=ax1)
plt.show()
return moved_geom_gdf
new = shift_geom(90, euroasia, False)
n_ = shift_geom(-90, new, True)
new
Results (good map, but only geometry data):
enter image description here

Slight challenge. Need to optimize code. Original one takes too long :(

So i'm trying to read 3 different csv files and plotting the information in an img with four graphs.
One with the average delay by airline, expressed in minutes
Another with the ratio of delayed flights, by airline
Another with the average delay by destination airport, expressed in minutes
And finally another with the ratio of flights delayed to arrival, by destination airport
All the information is correct and i got it from the files. The problem is that the program below takes too long to produce the graphs and that they're all being separately plotted and not all together in one image. Is there a way to optimize my code to run faster? And how do i use subplots without changing everything?
import pandas as pd
import matplotlib.pyplot as plt
path_main = '850566403_T_ONTIME.csv'
path_airline = 'L_AIRLINE_ID.csv'
path_airport = 'L_AIRPORT_ID.csv'
df1 = pd.read_csv(path_main)
al = pd.read_csv(path_airline)
ap = pd.read_csv(path_airport)
#remove columns and rows with nan
df1.dropna(axis=1, how='all', inplace=True)
df = df1.dropna(subset=['ARR_DELAY_NEW'])
#------------------------------------------------------------------------------
#Airlines:
#dict with {ID: Name}
d_al = {}
for i in range(len(al)):
d_al[al['Code'][i]] = al['Description'][i]
# array with ID's of airlines and delays
arr_al = df.loc[:, ('AIRLINE_ID', 'ARR_DELAY_NEW')].to_numpy()
# list with ID's of airlines
list_al = []
for i in arr_al:
if i[0] not in list_al:
list_al.append(int(i[0]))
def airline_avg_ratio(ID):
'''
function that requires an airline ID
and returns a tuple with name (first 10
characters), average delay and
delayed flight ratios
'''
nr_voos = 0
soma = 0
nr_atrasos = 0
for i in arr_al:
if i[0] == ID:
soma += i[1]
nr_voos += 1
if i[1] != 0:
nr_atrasos += 1
for k,v in d_al.items():
if k == ID:
nome = v[0:10]
media = round((soma / nr_atrasos), 3)
racio = round((nr_atrasos / nr_voos), 3)
return (nome, media, racio)
dados_al = []
for i in list_al:
dados_al.append(airline_avg_ratio(i))
df_al = pd.DataFrame(dados_al, columns=['Airlines', 'Average', 'Ratio'])
graph1 = df_al.drop(columns='Ratio').sort_values(by='Average').iloc[-10:]
graph1.plot(x='Airlines', y='Average', kind='bar')
plt.title("Atraso Médio por Companhia (top 10)")
plt.xlabel('Companhia Aérea', fontsize=12)
plt.ylabel('Minutos', fontsize=12)
plt.show()
graph2 = df_al.drop(columns='Average').sort_values(by='Ratio').iloc[-10:]
graph2.plot(x='Airlines', y='Ratio', kind='bar')
plt.title("Vôos Atrasados por Companhia (top 10)")
plt.xlabel('Companhia Aérea', fontsize=12)
plt.ylabel('Rácio', fontsize=12)
plt.show()
#------------------------------------------------------------------------------
# Airports:
#dict with {ID: Name}
d_ap = {}
for i in range(len(ap)):
d_ap[ap['Code'][i]] = ap['Description'][i]
#array with ID's of Airports and delays
arr_ap = df.loc[:, ('DEST_AIRPORT_ID', 'ARR_DELAY_NEW')].to_numpy()
#list with ID's of Airports
list_ap = []
for i in arr_ap:
if i[0] not in list_ap:
list_ap.append(int(i[0]))
def airport_avg_ratio(ID):
'''
function that requires an airport
and returns a tuple with name (first 10
characters), average delay and
delayed flight ratios
'''
nr_chegadas = 0
soma = 0
nr_atrasos = 0
for i in arr_ap:
if i[0] == ID:
soma += i[1]
nr_chegadas += 1
if i[1] != 0:
nr_atrasos += 1
for k,v in d_ap.items():
if k == ID:
nome = v[0:10]
media = round((soma / nr_atrasos), 3)
racio = round((nr_atrasos / nr_chegadas), 3)
return (nome, media, racio)
dados_ap = []
for i in list_ap:
dados_ap.append(airport_avg_ratio(i))
df_ap = pd.DataFrame(dados_ap, columns=['Airports', 'Average', 'Ratio'])
graph3 = df_ap.drop(columns='Ratio').sort_values(by='Average').iloc[-10:]
graph3.plot(x='Airports', y='Average', kind='bar')
plt.title("Atraso Médio por Aeroporto (top 10)")
plt.xlabel('Aeroporto', fontsize=12)
plt.ylabel('Minutos', fontsize=12)
plt.show()
graph4 = df_ap.drop(columns='Average').sort_values(by='Ratio').iloc[-10:]
graph4.plot(x='Airports', y='Ratio', kind='bar')
plt.title("Vôos Atrasados por Aeroporto (top 10)")
plt.xlabel('Aeroporto', fontsize=12)
plt.ylabel('Rácio', fontsize=12)
plt.show()

Customize Bokeh Unemployment Example: Replacing Percentage Value

starter code: https://docs.bokeh.org/en/latest/docs/gallery/texas.html
I am trying to replace the unemployment percentage with a different percentage that I have in a csv file. The csv columns are county name and concentration.
I am using the same call method for the county data as in the example. Just pulling in different data for the percentage value.
I have tried turning the csv into a dictionary to then look up the county name value and return the corresponding concentration using the same format as the starter code. I've tried inner joining, outer joining, appending. What am I missing here?
from bokeh.io import show
from bokeh.models import LogColorMapper
from bokeh.palettes import Viridis6 as palette
from bokeh.plotting import figure
from bokeh.sampledata.us_counties import data as counties
import pandas as pd
import csv
#with open('resources/concentration.csv', mode='r') as infile:
#reader = csv.reader(infile)
#with open('concentration_new.csv', mode='w') as outfile:
#writer = csv.writer(outfile)
#mydict = {rows[0]:rows[1] for rows in reader}
#d_1_2= dict(list(counties.items()) + list(mydict.items()))
pharmacy_concentration = []
with open('resources/unemployment.csv', mode = 'r') as infile:
reader = csv.reader(infile, delimiter = ',', quotechar = ' ') # remove
last attribute if you dont have '"' in your csv file
for row in reader:
name, concentration = row
pharmacy_concentration[name] = concentration
counties = {
code: county for code, county in counties.items() if county["state"] ==
"tx"
}
palette.reverse()
county_xs = [county["lons"] for county in counties.values()]
county_ys = [county["lats"] for county in counties.values()]
county_names = [county['name'] for county in counties.values()]
#this is the line I am trying to have pull the corresponding value for the correct county
#county_rates = [d_1_2['concentration'] for county in counties.values()]
color_mapper = LogColorMapper(palette=palette)
data=dict(
x=county_xs,
y=county_ys,
name=county_names,
#rate=county_rates,
)
TOOLS = "pan,wheel_zoom,reset,hover,save"
p = figure(
title="Texas Pharmacy Concentration", tools=TOOLS,
x_axis_location=None, y_axis_location=None,
tooltips=[
("Name", "#name"), ("Pharmacy Concentration", "#rate%"),
(" (Long, Lat)", "($x, $y)")])
p.grid.grid_line_color = None
p.hover.point_policy = "follow_mouse"
p.patches('x', 'y', source=data,
fill_color={'field': 'rate', 'transform': color_mapper},
fill_alpha=0.7, line_color="white", line_width=0.5)
show(p)
Is is hard to speculate without knowing the exact structure of you csv file. Assuming there are just 2 columns in your csv file: county_name + concentration (no first empty column or there between) the following code may work for you:
from bokeh.models import LogColorMapper
from bokeh.palettes import Viridis256 as palette
from bokeh.plotting import figure, show
from bokeh.sampledata.us_counties import data as counties
import csv
pharmacy_concentration = {}
with open('resources/concentration.csv', mode = 'r') as infile:
reader = [row for row in csv.reader(infile.read().splitlines())]
for row in reader:
try:
county_name, concentration = row # add "dummy" before "county_name" if there is an empty column in the csv file
pharmacy_concentration[county_name] = float(concentration)
except Exception, error:
print error, row
counties = { code: county for code, county in counties.items() if county["state"] == "tx" }
county_xs = [county["lons"] for county in counties.values()]
county_ys = [county["lats"] for county in counties.values()]
county_names = [county['name'] for county in counties.values()]
county_pharmacy_concentration_rates = [pharmacy_concentration[counties[county]['name']] for county in counties if counties[county]['name'] in pharmacy_concentration]
palette.reverse()
color_mapper = LogColorMapper(palette = palette)
data = dict(x = county_xs, y = county_ys, name = county_names, rate = county_pharmacy_concentration_rates)
p = figure(title = "Texas Pharmacy Concentration, 2009", tools = "pan,wheel_zoom,reset,hover,save", tooltips = [("Name", "#name"), ("Pharmacy Concentration)", "#rate%"), ("(Long, Lat)", "($x, $y)")], x_axis_location = None, y_axis_location = None,)
p.grid.grid_line_color = None
p.hover.point_policy = "follow_mouse"
p.patches('x', 'y', source = data, fill_color = {'field': 'rate', 'transform': color_mapper}, fill_alpha = 0.7, line_color = "white", line_width = 0.5)
show(p)
The result:

Categories