As can be seen in the following plot, the first bar is not displayed.
Why isn't it displayed and how the code be fixed so the bar does display?
import json
import requests
import matplotlib.pyplot as plt
i = 0
while i<=2:
key = '4ee91801f78b4271a5d90623210211'
main_url = 'http://api.weatherapi.com/v1/current.json'
city = input('Hangi Şehri Almak istiyorsunuz : ')
response = requests.get(main_url, params = {
'key': key,
'q': city,
'lang' : 'tr'
})
data = response.json()
c_values = data['current']['temp_c']
if i == 0:
city1 = city
c_values1 = c_values
elif i == 1:
city2 = city
c_values2 = c_values
else:
city3 = city
c_values3 = c_values
i = i + 1
names = [f'{city1}',f'{city2}',f'{city3}']
values = [f'{c_values1}', f'{c_values2}', f'{c_values3}']
plt.bar(names,values)
plt.show()
The issue is [f'{c_values1}', f'{c_values2}', f'{c_values3}'] is a list of str types, not numbers.
It should be [c_values1, c_values2, c_values3] so the y-axis can be plotted correctly with numbers, not text.
Notice the bottom value of the bar plot is '12', which is the top of 'Paris'
['12.0', '11.0', '17.0'] note the values are strings.
I recommend using while i<3: instead of while i<=2
Alternatively, simplify the code
i = 0
cd = dict() # create an empty dict
while i<3:
key = '4ee91801f78b4271a5d90623210211'
main_url = 'http://api.weatherapi.com/v1/current.json'
city = input('Hangi Şehri Almak istiyorsunuz : ')
response = requests.get(main_url, params = {
'key': key,
'q': city,
'lang' : 'tr'
})
data = response.json()
cd[city] = data['current']['temp_c'] # add the city and temp to the dict
i+=1
plt.bar(cd.keys(), cd.values())
plt.show()
Related
This is a typical example of a json reponse from the US Census Geocoder API request for addresses.
When I geocode the addresses using my API call, I collect the payload into a json file. When parsing the json file using the below Python code, it sometimes so happens that the geocodes are getting wrongly associated with the input address, so when I am converting address geographies to a dataframe format, addresses and their geocodes start to mismatch when the response encounters a timeout/ any exception/ random HTML text in the reponse.
How can I modify my python script to map the corresponding geocodes to the input addresses? Any help would be appreciated!
street = []
city = []
ipstate = []
zipcode = []
status = []
geoid = []
centlat = []
centlon = []
block = []
state = []
basename = []
oid = []
intptlat = []
objectid = []
tract = []
centlon = []
blkgrp = []
arealand = []
intptlon = []
county = []
for i in range(len(payload)):
if '<!DOCTYPE html>' in payload[i]:
print(i,'HTML Response')
status.append('HTML response')
geoid.append(np.nan)
centlat.append(np.nan)
block.append(np.nan)
state.append(np.nan)
basename.append(np.nan)
oid.append(np.nan)
intptlat.append(np.nan)
objectid.append(np.nan)
tract.append(np.nan)
centlon.append(np.nan)
blkgrp.append(np.nan)
arealand.append(np.nan)
intptlon.append(np.nan)
county.append(np.nan)
street.append(np.nan)
city.append(np.nan)
ipstate.append(np.nan)
zipcode.append(np.nan)
else:
data = json.loads(payload[i])
inputAddress = data['result']['input']['address']
street.append(inputAddress['street'])
city.append(inputAddress['city'])
ipstate.append(inputAddress['state'])
zipcode.append(inputAddress['zip'])
censusParams = data['result']['addressMatches']
if len(censusParams) == 0:
# print('No Match', i)
status.append('No Match')
geoid.append(np.nan)
centlat.append(np.nan)
block.append(np.nan)
state.append(np.nan)
basename.append(np.nan)
oid.append(np.nan)
intptlat.append(np.nan)
objectid.append(np.nan)
tract.append(np.nan)
centlon.append(np.nan)
blkgrp.append(np.nan)
arealand.append(np.nan)
intptlon.append(np.nan)
county.append(np.nan)
# print(inputAddress['street'], inputAddress['city'], inputAddress['state'], inputAddress['zip'])
else:
# print('Match', i)
status.append('Match')
# print(inputAddress['street'], inputAddress['city'], inputAddress['state'], inputAddress['zip'])
for c in censusParams:
for key, value in c.items():
if key == 'geographies':
censusBlocks = dict_get(value, 'Census Blocks')
params = censusBlocks[0][0]
geoid.append(params['GEOID'])
centlat.append(params['CENTLAT'])
centlon.append(params['CENTLON'])
block.append(params['BLOCK'])
state.append(params['STATE'])
basename.append(params['BASENAME'])
oid.append(params['OID'])
intptlat.append(params['INTPTLAT'])
intptlon.append(params['INTPTLON'])
objectid.append(params['OBJECTID'])
tract.append(params['TRACT'])
blkgrp.append(params['BLKGRP'])
arealand.append(params['AREALAND'])
county.append(params['COUNTY'])
df_columns = ['Match',
'STREET',
'CITY',
'IP_STATE',
'ZIP',
'GEOID',
'CENTLAT',
'CENTLON',
'BLOCK',
'STATE',
'BASENAME',
'OID',
'INTPTLAT',
'INTPTLON',
'OBJECTID',
'TRACT',
'BLKGRP',
'AREALAND',
'COUNTY']
json_df = pd.DataFrame(list(zip(status,
street,
city,
ipstate,
zipcode,
geoid,
centlat,
centlon,
block,
state,
basename,
oid,
intptlat,
intptlon,
objectid,
tract,
blkgrp,
arealand,
county)), columns = df_columns)
I've written a python program that takes some inputs and turns them into a matplotlib graph. Specifically, it displays wealth distributions by percentile for a country of the user's choosing. However, these inputs are currently given by changing variables in the program.
I want to put this code on a website, allowing users to choose any country and see the wealth distribution for that country, as well as how they compare. Essentially, I am trying to recreate this: https://wid.world/income-comparator/
The code in python is all done but I am struggling to incorporate it into an HTML file. I was trying to use pyscript but it currently loads forever and displays nothing. Would rather not rewrite it in javascript (mainly because I don't know js). My thoughts are that it has something to do with the code importing csv files from my device?
import csv
from typing import List
import matplotlib.pyplot as plt
import collections
import math
from forex_python.converter import CurrencyRates
# ---------------- #
# whether or not the graph includes the top 1 percent in the graph (makes the rest of the graph visible!)
one_percent = False # True or False
# pick which country(ies) you want to view
country = 'China' # String
# what currency should the graph use
currency_used = 'Canada' # String
# if you want to compare an income
compare_income = True # True or False
# what income do you want to compare
income = 100000 # Int
# ---------------- #
codes = {}
# get dictionary of monetary country codes
monetary_codes = {}
with open('codes-all.csv') as csv_file:
list = csv.reader(csv_file, delimiter=',')
for row in list:
if row[5] == "":
monetary_codes[row[0]] = (row[2], row[1])
# get dictionary of country names and codes for WID
with open('WID_countries.csv') as csv_file:
WID_codes = csv.reader(csv_file, delimiter=',')
next(WID_codes)
for row in WID_codes:
if len(row[0]) == 2:
if row[2] != "":
monetary_code = monetary_codes[row[1].upper()][0]
currency_name = monetary_codes[row[1].upper()][1]
codes[row[1].upper()] = (row[0], monetary_code, currency_name)
elif row[2] == "":
codes[row[1].upper()] = (row[0], 'USD', 'United States Dollar')
elif row[0][0] == 'U' and row[0][1] == 'S':
codes[row[1].upper()] = (row[0], 'USD', 'United States Dollar')
# converts user input to upper case
country = country.upper()
currency_used = currency_used.upper()
# gets conversion rate
c = CurrencyRates()
conversion_rate = c.get_rate(codes[country][1], codes[currency_used][1])
# convert money into correct currency
def convert_money(conversion_rate, value):
return float(value) * conversion_rate
# get and clean data
def get_data(country):
aptinc = {}
# cleaning the data
with open(f'country_data/WID_data_{codes[country][0]}.csv') as csv_file:
data = csv.reader(csv_file, delimiter=';')
for row in data:
# I only care about the year 2021 and the variable 'aptinc'
if 'aptinc992' in row[1] and row[3] == '2021':
# translates percentile string into a numerical value
index = 0
for i in row[2]:
# index 0 is always 'p', so we get rid of that
if index == 0:
row[2] = row[2][1:]
# each string has a p in the middle of the numbers we care about. I also only
# care about the rows which measure a single percentile
# (upper bound - lower bound <= 1)
elif i == 'p':
lb = float(row[2][:index - 1])
ub = float(row[2][index:])
# if the top one percent is being filtered out adds another requirement
if not one_percent:
if ub - lb <= 1 and ub <= 99:
row[2] = ub
else:
row[2] = 0
else:
if ub - lb <= 1:
row[2] = ub
else: row[2] = 0
index += 1
# adds wanted, cleaned data to a dictionary. Also converts all values to one currency
if row[2] != 0:
aptinc[row[2]] = convert_money(conversion_rate, row[4])
return aptinc
# find the closest percentile to an income
def closest_percentile(income, data):
closest = math.inf
percentile = float()
for i in data:
difference = income - data[i]
if abs(difference) < closest:
closest = difference
percentile = i
return percentile
# ---------------- #
unsorted_data = {}
percentiles = []
average_income = []
# gets data for the country
data = get_data(country)
for i in data:
unsorted_data[i] = data[i]
# sorts the data
sorted = collections.OrderedDict(sorted(unsorted_data.items()))
for i in sorted:
percentiles.append(i)
average_income.append(data[i])
# makes countries pretty for printing
country = country.lower()
country = country.capitalize()
# calculates where the income places against incomes from country(ies)
blurb = ""
if compare_income:
percentile = closest_percentile(income, sorted)
blurb = f"You are richer than {round(percentile)} percent of {country}'s population"
# plot this data!
plt.plot(percentiles,average_income)
plt.title(f'{country} Average Annual Income by Percentile')
plt.xlabel(f'Percentile\n{blurb}')
plt.ylabel(f'Average Annual Income of {country}({codes[currency_used][1]})')
plt.axvline(x = 99, color = 'r', label = '99th percentile', linestyle=':')
if compare_income:
plt.axvline(x = percentile, color = 'g', label = f'{income} {codes[currency_used][2]}')
plt.legend(bbox_to_anchor = (0, 1), loc = 'upper left')
plt.show()
I am trying to scrape all the possible data from this webpage Gstaad 2017
Here is my code:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from selenium.webdriver.support.ui import Select
#Starts the driver and goes to our starting webpage
driver = webdriver.Chrome( "C:/Users/aldi/Downloads/chromedriver.exe")
driver.get('http://www.bvbinfo.com/Tournament.asp?ID=3294&Process=Matches')
#Imports HTML into python
page = requests.get('http://www.bvbinfo.com/Tournament.asp?ID=3294&Process=Matches')
soup = BeautifulSoup(driver.page_source, 'lxml')
stages = soup.find_all('div')
stages = driver.find_elements_by_class_name('clsTournBracketHeader')[-1].text
#TODO the first row (country quota matches) has no p tag and therefore it is not included in the data
rows = []
paragraphs = []
empty_paragraphs = []
for x in soup.find_all('p'):
if len(x.get_text(strip=True)) != 0:
paragraph = x.extract()
paragraphs.append(paragraph)
if len(x.get_text(strip=True)) == 0:
empty_paragraph = x.extract()
empty_paragraphs.append(empty_paragraph)
# players
home_team_player_1 = ''
home_team_player_2 = ''
away_team_player_1 = ''
away_team_player_2 = ''
for i in range(0, len(paragraphs)):
#round and satege of the competition
round_n= paragraphs[i].find('u').text
paragraph_rows = paragraphs[i].text.split('\n')[1:-1]
counter = 0
for j in range(0,len(paragraph_rows)):
#TODO tournament info, these can vary from tournament to tournament
tournament_info = soup.find('td', class_ = 'clsTournHeader').text.strip().split()
tournament_category = [' '.join(tournament_info[0 : 2])][0]
tournament_prize_money = tournament_info[2]
#TODO tournament city can also have two elements, not just one
tournament_city = tournament_info[3]
tournament_year = tournament_info[-1]
tournament_days = tournament_info[-2][:-1].split("-")
tournament_starting_day = tournament_days[0]
tournament_ending_day = tournament_days[-1]
tournament_month = tournament_info[-3]
tournament_stars = [' '.join(tournament_info[5 : 7])][0]
players = paragraphs[i].find_all('a', {'href':re.compile('.*player.*')})
home_team_player_1 = players[counter+0].text
home_team_player_2 = players[counter+1].text
away_team_player_1 = players[counter+2].text
away_team_player_2 = players[counter+3].text
#matches
match= paragraph_rows[j].split(":")[0].split()[-1].strip()
#nationalities
nationalities = ["United", "States"]
if paragraph_rows[j].split("def.")[0].split("/")[1].split("(")[0].split(" ")[3] in nationalities:
home_team_country = "United States"
else:
home_team_country = paragraph_rows[j].split("def.")[0].split("/")[1].split("(")[0].split(" ")[-2]
if paragraph_rows[j].split("def.")[1].split("/")[1].split(" ")[3] in nationalities:
away_team_country = "United States"
else:
away_team_country = paragraph_rows[j].split("def.")[1].split("/")[1].split("(")[0].split(" ")[-2]
parentheses = re.findall(r'\(.*?\)', paragraph_rows[j])
if "," in parentheses[0]:
home_team_ranking = parentheses[0].split(",")[0]
home_team_ranking = home_team_ranking[1:-1]
home_team_qualification_round = parentheses[0].split(",")[1]
home_team_qualification_round = home_team_qualification_round[1:-1]
else:
home_team_ranking = parentheses[0].split(",")[0]
home_team_ranking = home_team_ranking[1:-1]
home_team_qualification_round = None
if "," in parentheses[1]:
away_team_ranking = parentheses[1].split(",")[0]
away_team_ranking = away_team_ranking[1:-1]
away_team_qualification_round = parentheses[1].split(",")[1]
away_team_qualification_round = away_team_qualification_round[1:-1]
else:
away_team_ranking = parentheses[1].split(",")[0]
away_team_ranking = away_team_ranking[1:-1]
match_duration = parentheses[2]
match_duration = match_duration[1:-1]
away_team_qualification_round = None
# sets
sets = re.findall(r'\).*?\(', paragraph_rows[j])
sets = sets[1][1:-1]
if len(sets.split(",")) == 2:
score_set1 = sets.split(",")[0]
score_set2 = sets.split(",")[1]
score_set3 = None
if len(sets.split(",")) == 3:
score_set1 = sets.split(",")[0]
score_set2 = sets.split(",")[1]
score_set3 = sets.split(",")[2]
row = { " home_team_player_1 ": home_team_player_1 ,
" home_team_player_2": home_team_player_2,
"away_team_player_1": away_team_player_1,
"away_team_player_2":away_team_player_1,
"match": match,
"home_team_country":home_team_country,
"away_team_country": away_team_country,
"home_team_ranking": home_team_ranking,
"away_team_ranking": away_team_ranking,
"match_duration": match_duration,
"home_team_qualification_round": home_team_qualification_round,
"away_team_qualification_round": away_team_qualification_round,
"score_set1":score_set1,
"score_set2":score_set2,
"score_set3":score_set3,
"tournament_category": tournament_category,
"tournament_prize_money": tournament_prize_money,
"tournament_city": tournament_city,
"tournament_year": tournament_year,
"tournament_starting_day": tournament_starting_day,
"tournament_ending_day":tournament_ending_day,
"tournament_month":tournament_month,
"tournament_stars":tournament_stars,
"round_n": round_n
}
counter += 4
rows.append(row)
data = pd.DataFrame(rows)
data.to_csv("beachvb.csv", index = False)
I am not really experienced in web scraping. I have just started as a self-taught and find the HTML source code quite messy and poorly structured.
I want to improve my code in two ways:
Include all the missing matches (country quota matches, semifinals, bronze medal, and gold medal) and the respective category for each match (country quota matches, pool, winner's bracket, semifinals, bronze medal, and gold medal)
iterate the code for more years and tournaments from the dropdown menu at the top of the webpage
I have tried to iterate through different years but my code does not work
tournament_years = {"FIVB 2015", "FIVB 2016"}
dfs = []
for year in tournament_years:
# select desired tournament
box_year = Select(driver.find_element_by_xpath("/html/body/table[3]/tbody/tr/td/table[1]/tbody/tr[1]/td[2]/select"))
box_year.select_by_visible_text(year)
box_matches = Select(driver.find_element_by_xpath("/html/body/table[3]/tbody/tr/td/table[1]/tbody/tr[2]/td[2]/select"))
box_matches.select_by_visible_text("Matches")
The main idea was to create a list of dataframes for each year and each tournament by adding a new loop at the beginning of the code.
If someone has a better idea and technique to do so, it is really appreciated!
I'm working on a personal project and I'm trying to retrieve air quality data from the https://aqicn.org website using their API.
I've used this code, which I've copied and adapted for the city of Bucharest as follows:
import pandas as pd
import folium
import requests
# GET data from AQI website through the API
base_url = "https://api.waqi.info"
path_to_file = "~/path"
# Got token from:- https://aqicn.org/data-platform/token/#/
with open(path_to_file) as f:
contents = f.readlines()
key = contents[0]
# (lat, long)-> bottom left, (lat, lon)-> top right
latlngbox = "44.300264,25.920181,44.566991,26.297836" # For Bucharest
trail_url=f"/map/bounds/?token={key}&latlng={latlngbox}" #
my_data = pd.read_json(base_url + trail_url) # Joined parts of URL
print('columns->', my_data.columns) #2 cols ‘status’ and ‘data’ JSON
### Built a dataframe from the json file
all_rows = []
for each_row in my_data['data']:
all_rows.append([each_row['station']['name'],
each_row['lat'],
each_row['lon'],
each_row['aqi']])
df = pd.DataFrame(all_rows, columns=['station_name', 'lat', 'lon', 'aqi'])
# Cleaned the DataFrame
df['aqi'] = pd.to_numeric(df.aqi, errors='coerce') # Invalid parsing to NaN
# Remove NaN entries in col
df1 = df.dropna(subset = ['aqi'])
Unfortunately it only retrieves 4 stations whereas there are many more available on the actual site. In the API documentation the only limitation I saw was for "1,000 (one thousand) requests per second" so why can't I get more of them?
Also, I've tried to modify the lat-long values and managed to get more stations, but they were outside the city I was interested in.
Here is a view of the actual perimeter I've used in the embedded code.
If you have any suggestions as of how I can solve this issue, I'd be very happy to read your thoughts. Thank you!
Try using waqi through aqicn... not exactly a clean API but I found it to work quite well
import pandas as pd
url1 = 'https://api.waqi.info'
# Get token from:- https://aqicn.org/data-platform/token/#/
token = 'XXX'
box = '113.805332,22.148942,114.434299,22.561716' # polygon around HongKong via bboxfinder.com
url2=f'/map/bounds/?latlng={box}&token={token}'
my_data = pd.read_json(url1 + url2)
all_rows = []
for each_row in my_data['data']:
all_rows.append([each_row['station']['name'],each_row['lat'],each_row['lon'],each_row['aqi']])
df = pd.DataFrame(all_rows,columns=['station_name', 'lat', 'lon', 'aqi'])
From there its easy to plot
df['aqi'] = pd.to_numeric(df.aqi,errors='coerce')
print('with NaN->', df.shape)
df1 = df.dropna(subset = ['aqi'])
df2 = df1[['lat', 'lon', 'aqi']]
init_loc = [22.396428, 114.109497]
max_aqi = int(df1['aqi'].max())
print('max_aqi->', max_aqi)
m = folium.Map(location = init_loc, zoom_start = 5)
heat_aqi = HeatMap(df2, min_opacity = 0.1, max_val = max_aqi,
radius = 60, blur = 20, max_zoom = 2)
m.add_child(heat_aqi)
m
Or as such
centre_point = [22.396428, 114.109497]
m2 = folium.Map(location = centre_point,tiles = 'Stamen Terrain', zoom_start= 6)
for idx, row in df1.iterrows():
lat = row['lat']
lon = row['lon']
station = row['station_name'] + ' AQI=' + str(row['aqi'])
station_aqi = row['aqi']
if station_aqi > 300:
pop_color = 'red'
elif station_aqi > 200:
pop_color = 'orange'
else:
pop_color = 'green'
folium.Marker(location= [lat, lon],
popup = station,
icon = folium.Icon(color = pop_color)).add_to(m2)
m2
checking for stations within HK, returns 19
df[df['station_name'].str.contains('HongKong')]
I am trying to insert records into a table, but only last record(result data) from the loop is inserting into the table
Here is the code i tried:
CDates = ['2020-05-10','2020-05-12','2020-05-13','2020-05-16','2020-05-20']
ResultData = {}
for date in CDates:
filterDate = Key('Date').eq(id)
appResponse = appTable.scan(FilterExpression = filterDate)
accResp = table.query(KeyConditionExpression = Key('PrimaryId').eq('Key'),FilterExpression = Key('Date').eq(date))
if len(accResp['Items']) == 0:
ResultData['PrimaryId'] = 'Key'
ResultData['CreatedDate'] = date
ResultData['Type'] = 'Appt'
ResultData['Id'] = str(uuid.uuid4())
print(ResultData)
table.put_item(Item=ResultData)
Not getting where did I go wrong
You assigned ResultData outside of the loop and changed the values for the same keys every time the loop ran. Try this:
CDates = ['2020-05-10', '2020-05-12', '2020-05-13', '2020-05-16', '2020-05-20']
for date in CDates:
filterDate = Key('Date').eq(id)
appResponse = appTable.scan(FilterExpression=filterDate)
accResp = table.query(
KeyConditionExpression=Key('PrimaryId').eq('Key'),
FilterExpression=Key('Date').eq(date))
if len(accResp['Items']) == 0:
ResultData = {
'PrimaryId': 'Key',
'CreationDate': date,
'Type': 'Appt',
'Id': str(uuid.uuid4())
}
print(ResultData)
table.put_item(Item=ResultData)