I've written a python program that takes some inputs and turns them into a matplotlib graph. Specifically, it displays wealth distributions by percentile for a country of the user's choosing. However, these inputs are currently given by changing variables in the program.
I want to put this code on a website, allowing users to choose any country and see the wealth distribution for that country, as well as how they compare. Essentially, I am trying to recreate this: https://wid.world/income-comparator/
The code in python is all done but I am struggling to incorporate it into an HTML file. I was trying to use pyscript but it currently loads forever and displays nothing. Would rather not rewrite it in javascript (mainly because I don't know js). My thoughts are that it has something to do with the code importing csv files from my device?
import csv
from typing import List
import matplotlib.pyplot as plt
import collections
import math
from forex_python.converter import CurrencyRates
# ---------------- #
# whether or not the graph includes the top 1 percent in the graph (makes the rest of the graph visible!)
one_percent = False # True or False
# pick which country(ies) you want to view
country = 'China' # String
# what currency should the graph use
currency_used = 'Canada' # String
# if you want to compare an income
compare_income = True # True or False
# what income do you want to compare
income = 100000 # Int
# ---------------- #
codes = {}
# get dictionary of monetary country codes
monetary_codes = {}
with open('codes-all.csv') as csv_file:
list = csv.reader(csv_file, delimiter=',')
for row in list:
if row[5] == "":
monetary_codes[row[0]] = (row[2], row[1])
# get dictionary of country names and codes for WID
with open('WID_countries.csv') as csv_file:
WID_codes = csv.reader(csv_file, delimiter=',')
next(WID_codes)
for row in WID_codes:
if len(row[0]) == 2:
if row[2] != "":
monetary_code = monetary_codes[row[1].upper()][0]
currency_name = monetary_codes[row[1].upper()][1]
codes[row[1].upper()] = (row[0], monetary_code, currency_name)
elif row[2] == "":
codes[row[1].upper()] = (row[0], 'USD', 'United States Dollar')
elif row[0][0] == 'U' and row[0][1] == 'S':
codes[row[1].upper()] = (row[0], 'USD', 'United States Dollar')
# converts user input to upper case
country = country.upper()
currency_used = currency_used.upper()
# gets conversion rate
c = CurrencyRates()
conversion_rate = c.get_rate(codes[country][1], codes[currency_used][1])
# convert money into correct currency
def convert_money(conversion_rate, value):
return float(value) * conversion_rate
# get and clean data
def get_data(country):
aptinc = {}
# cleaning the data
with open(f'country_data/WID_data_{codes[country][0]}.csv') as csv_file:
data = csv.reader(csv_file, delimiter=';')
for row in data:
# I only care about the year 2021 and the variable 'aptinc'
if 'aptinc992' in row[1] and row[3] == '2021':
# translates percentile string into a numerical value
index = 0
for i in row[2]:
# index 0 is always 'p', so we get rid of that
if index == 0:
row[2] = row[2][1:]
# each string has a p in the middle of the numbers we care about. I also only
# care about the rows which measure a single percentile
# (upper bound - lower bound <= 1)
elif i == 'p':
lb = float(row[2][:index - 1])
ub = float(row[2][index:])
# if the top one percent is being filtered out adds another requirement
if not one_percent:
if ub - lb <= 1 and ub <= 99:
row[2] = ub
else:
row[2] = 0
else:
if ub - lb <= 1:
row[2] = ub
else: row[2] = 0
index += 1
# adds wanted, cleaned data to a dictionary. Also converts all values to one currency
if row[2] != 0:
aptinc[row[2]] = convert_money(conversion_rate, row[4])
return aptinc
# find the closest percentile to an income
def closest_percentile(income, data):
closest = math.inf
percentile = float()
for i in data:
difference = income - data[i]
if abs(difference) < closest:
closest = difference
percentile = i
return percentile
# ---------------- #
unsorted_data = {}
percentiles = []
average_income = []
# gets data for the country
data = get_data(country)
for i in data:
unsorted_data[i] = data[i]
# sorts the data
sorted = collections.OrderedDict(sorted(unsorted_data.items()))
for i in sorted:
percentiles.append(i)
average_income.append(data[i])
# makes countries pretty for printing
country = country.lower()
country = country.capitalize()
# calculates where the income places against incomes from country(ies)
blurb = ""
if compare_income:
percentile = closest_percentile(income, sorted)
blurb = f"You are richer than {round(percentile)} percent of {country}'s population"
# plot this data!
plt.plot(percentiles,average_income)
plt.title(f'{country} Average Annual Income by Percentile')
plt.xlabel(f'Percentile\n{blurb}')
plt.ylabel(f'Average Annual Income of {country}({codes[currency_used][1]})')
plt.axvline(x = 99, color = 'r', label = '99th percentile', linestyle=':')
if compare_income:
plt.axvline(x = percentile, color = 'g', label = f'{income} {codes[currency_used][2]}')
plt.legend(bbox_to_anchor = (0, 1), loc = 'upper left')
plt.show()
Related
I'm working on a personal project and I'm trying to retrieve air quality data from the https://aqicn.org website using their API.
I've used this code, which I've copied and adapted for the city of Bucharest as follows:
import pandas as pd
import folium
import requests
# GET data from AQI website through the API
base_url = "https://api.waqi.info"
path_to_file = "~/path"
# Got token from:- https://aqicn.org/data-platform/token/#/
with open(path_to_file) as f:
contents = f.readlines()
key = contents[0]
# (lat, long)-> bottom left, (lat, lon)-> top right
latlngbox = "44.300264,25.920181,44.566991,26.297836" # For Bucharest
trail_url=f"/map/bounds/?token={key}&latlng={latlngbox}" #
my_data = pd.read_json(base_url + trail_url) # Joined parts of URL
print('columns->', my_data.columns) #2 cols ‘status’ and ‘data’ JSON
### Built a dataframe from the json file
all_rows = []
for each_row in my_data['data']:
all_rows.append([each_row['station']['name'],
each_row['lat'],
each_row['lon'],
each_row['aqi']])
df = pd.DataFrame(all_rows, columns=['station_name', 'lat', 'lon', 'aqi'])
# Cleaned the DataFrame
df['aqi'] = pd.to_numeric(df.aqi, errors='coerce') # Invalid parsing to NaN
# Remove NaN entries in col
df1 = df.dropna(subset = ['aqi'])
Unfortunately it only retrieves 4 stations whereas there are many more available on the actual site. In the API documentation the only limitation I saw was for "1,000 (one thousand) requests per second" so why can't I get more of them?
Also, I've tried to modify the lat-long values and managed to get more stations, but they were outside the city I was interested in.
Here is a view of the actual perimeter I've used in the embedded code.
If you have any suggestions as of how I can solve this issue, I'd be very happy to read your thoughts. Thank you!
Try using waqi through aqicn... not exactly a clean API but I found it to work quite well
import pandas as pd
url1 = 'https://api.waqi.info'
# Get token from:- https://aqicn.org/data-platform/token/#/
token = 'XXX'
box = '113.805332,22.148942,114.434299,22.561716' # polygon around HongKong via bboxfinder.com
url2=f'/map/bounds/?latlng={box}&token={token}'
my_data = pd.read_json(url1 + url2)
all_rows = []
for each_row in my_data['data']:
all_rows.append([each_row['station']['name'],each_row['lat'],each_row['lon'],each_row['aqi']])
df = pd.DataFrame(all_rows,columns=['station_name', 'lat', 'lon', 'aqi'])
From there its easy to plot
df['aqi'] = pd.to_numeric(df.aqi,errors='coerce')
print('with NaN->', df.shape)
df1 = df.dropna(subset = ['aqi'])
df2 = df1[['lat', 'lon', 'aqi']]
init_loc = [22.396428, 114.109497]
max_aqi = int(df1['aqi'].max())
print('max_aqi->', max_aqi)
m = folium.Map(location = init_loc, zoom_start = 5)
heat_aqi = HeatMap(df2, min_opacity = 0.1, max_val = max_aqi,
radius = 60, blur = 20, max_zoom = 2)
m.add_child(heat_aqi)
m
Or as such
centre_point = [22.396428, 114.109497]
m2 = folium.Map(location = centre_point,tiles = 'Stamen Terrain', zoom_start= 6)
for idx, row in df1.iterrows():
lat = row['lat']
lon = row['lon']
station = row['station_name'] + ' AQI=' + str(row['aqi'])
station_aqi = row['aqi']
if station_aqi > 300:
pop_color = 'red'
elif station_aqi > 200:
pop_color = 'orange'
else:
pop_color = 'green'
folium.Marker(location= [lat, lon],
popup = station,
icon = folium.Icon(color = pop_color)).add_to(m2)
m2
checking for stations within HK, returns 19
df[df['station_name'].str.contains('HongKong')]
I wrote a script to scrape Yahoo Finance stock data using the Yahoo_Fin package
The aim of the script is to grab company financials to be able to perform some calculations. The input to the script is a txt file with a list of company ticker symbols. The output is also supposed to be a txt with only the companies that match a certain number of established criteria.
The script does occasionally work with a small txt file (20 tickers or less) however it does sometimes give me the following error (without me changing any code)
"None of ['Breakdown'] are in the columns" with Breakdown being the index column I set for the df.
I have run the script dozens of times and sometimes it works, sometimes it doesn't. Ran it in Atom and Jupyter Notebook and still have no clue what is causing the problem. I have also updated pandas and all necessary packages.
This is the code:
import pandas as pd
import statistics as stat
from yahoo_fin.stock_info import *
stock_list = [line.rstrip('\n') for line in open("test.txt", "r")]
#print(stock_list)
## The balance sheet df ##
balance_sheet = {ticker: get_balance_sheet(ticker)
for ticker in stock_list}
## The income statement df ##
income_statement = {ticker: get_income_statement(ticker)
for ticker in stock_list}
bs_data=[]
for i in range(0,len(stock_list)):
one_ticker = pd.DataFrame(balance_sheet[stock_list[i]])
one_ticker = one_ticker.set_index('Breakdown')
bs_data.append(one_ticker)
#print(bs_data)
income_data=[]
#one_ticker =[]
for i in range(0,len(stock_list)):
one_ticker = pd.DataFrame(income_statement[stock_list[i]])
one_ticker = one_ticker.set_index('Breakdown')
income_data.append(one_ticker)
#print(income_data)
## These are the balance sheet variables ##
for loop_counter in range(0,len(stock_list)):
# Total Assets
total_assets = (bs_data[loop_counter].loc['Total Assets'].astype(int))
avg_total_assets = stat.mean(total_assets)
#print(avg_total_assets)
# Total Current Liabilities
total_current_liabilities = (bs_data[loop_counter].loc['Total Current Liabilities'].astype(int))
avg_total_current_liabilities = stat.mean(total_current_liabilities)
#print(avg_total_current_liabilities)
#Total Liabilities
total_liabilities = (bs_data[loop_counter].loc['Total Liabilities'].astype(int))
avg_total_liabilities = stat.mean(total_liabilities)
#print(avg_total_liabilities)
## These are the income statement variables ##
# Total Revenue
total_revenue = (income_data[loop_counter].loc['Total Revenue']).astype(int)
avg_total_revenue = stat.mean(total_revenue)
#print(avg_total_revenue)
# Operating Income
operating_income = (income_data[loop_counter].loc['Operating Income or Loss']).astype(int)
avg_operating_income = stat.mean(operating_income)
#print(avg_operating_income)
# Total Operating Expenses
total_operating_expenses = (income_data[loop_counter].loc['Total Operating Expenses'].astype(int))
avg_total_operating_expenses = stat.mean(total_operating_expenses)
#print(avg_total_operating_expenses)
# EBIT
ebit = (avg_total_revenue-avg_total_operating_expenses)
#print(ebit)
## Calculations ##
opm = (avg_operating_income) / (avg_total_revenue)
#print(opm)
roce = (ebit) / ((avg_total_assets) - (avg_total_current_liabilities))
#print(roce)
leverage = (avg_total_liabilities) / (avg_total_assets)
#print(leverage)
#print("Leverage: " + str(round(leverage,2)))
#print("OPM: " + str(round(opm*100,2)) + "%")
#print("ROCE: " + str(round(roce*100,2)) + "%")
## Save to file ##
#print(leverage)
#print(opm)
#print(roce)
if leverage < 1.00 and roce >= 0.2 and opm >= 0.2:
#print("We have a match!")
outfile = open("results.txt", "a")
outfile.write(stock_list[loop_counter])
outfile.write("\n")
outfile.close()
Any clues to what might be the problem??
Update #2 Code:
import pandas as pd
import statistics as stat
from yahooquery import *
# Ticker input here
stock_list = [line.rstrip('\n') for line in open("test.txt", "r")]
#for stock in stock_list:
tickers = Ticker(stock_list)
# Get balance sheet
for stock in stock_list:
#print(stock)
bs = tickers.balance_sheet()
bs = pd.DataFrame(bs)
bs = bs.set_index('endDate')
#print(bs)
## Balance sheet variables to extract ##
# Total Assets
total_assets = bs['totalAssets']
avg_total_assets = stat.mean(total_assets)
# Total Current Liabilities
total_current_liabilities = bs['totalCurrentLiabilities']
avg_total_current_liabilities = stat.mean(total_current_liabilities)
# Total Liabilities
total_liabilities = bs['totalLiab']
avg_total_liabilities = stat.mean(total_liabilities)
## Get income statement ##
inst = tickers.income_statement()
inst = pd.DataFrame(inst)
inst = inst.set_index('endDate')
## Income statement variables to extract ##
# Total Revenue#
total_revenue = inst['totalRevenue']
avg_total_revenue = stat.mean(total_revenue)
# Operating Income
operating_income = inst['operatingIncome']
avg_operating_income = stat.mean(operating_income)
# Total Operating Expenses
total_operating_expenses = inst['totalOperatingExpenses']
avg_total_operating_expenses = stat.mean(total_operating_expenses)
# EBIT
ebit = (avg_total_revenue-avg_total_operating_expenses)
## Parameters ##
opm = (avg_operating_income) / (avg_total_revenue)
roce = (ebit) / ((avg_total_assets) - (avg_total_current_liabilities))
leverage = (avg_total_liabilities) / (avg_total_assets)
## Save to file ##
#print("Hello!")
if leverage < 1.00 and roce >= 0.2 and opm >= 0.2:
#print("Hello")
outfile = open("yahoo_query_results.txt", "w+")
outfile.write(stock)
outfile.write("\n")
outfile.close()
I am trying to update a rating row by row. I have one dataframe of players, that all start with the same rating. For each match, I want the rating to change. Another dataframe contains results of each match.
import pandas as pd
gamesdata = [['paul','tom'],['paul','lisa'],['tom','paul'],['lisa','tom'],['paul','lisa'],['lisa','tom'],['paul','tom']]
games = pd.DataFrame(gamesdata, columns = ['Winner', 'Looser'])
playersdata= ['lisa','paul','tom']
players = pd.DataFrame(playersdata, columns = ['Name'])
mean_elo = 1000
elo_width = 400
k_factor = 64
players['elo'] = mean_elo
def update_elo(winner_elo, loser_elo):
expected_win = expected_result(winner_elo, loser_elo)
change_in_elo = k_factor * (1-expected_win)
winner_elo += change_in_elo
loser_elo -= change_in_elo
return winner_elo, loser_elo
def expected_result(elo_a, elo_b):
expect_a = 1.0/(1+10**((elo_b - elo_a)/elo_width))
return expect_a
for index, row in games.iterrows():
winnername = row['Winner']
losername = row['Looser']
web = players['elo'].loc[players['Name'] == winnername].values[0]
wIndex = players.loc[players['Name'] == winnername]
#I want to return just the index, so I can update the value
print(wIndex)
leb = players['elo'].loc[players['Name'] == losername].values[0]
print('Winner Elo before: ' + str(web))
winner_elo, looser_elo = update_elo(web, leb)
print('Winner Elo after: ' + str(winner_elo))
#here I want to update value
#players.at[wIndex,'elo']=winner_elo
I am trying to update the value in the players table using
players.at[wIndex,'elo']=winner_elo
but i struggle to get the index with this code:
wIndex = players.loc[players['Name'] == winnername]
Found a sollution:
wIndex = players.loc[players['Name'] == winnername].index.values
Can't believe i missed that
There are various posts related to infinite loops on here but none reflect my particular predicament (they deal with Java or they do not match my code format etc). The code I have used is actually source code or 'answer code' to an exercise aimed at new students such as myself and it only supplies the 'correct code without correct format' which for an independent student can complicate things but also provide a more productive challenge.
The code makes solid use of 'functions' and 'calling functions from within other functions' which leaves very little 'global code' as a result, this may make things slightly more complicated but hopefully experienced programmers won't be phased by this.
I think the loop is either an issue with my 'while loop code indentation' or the 'while loop condition/counter code itself'. The loop code takes and uses data from other parts of the program code and shouldn't be completely ruled out but realistically I suspect the problem is one of the two former possible issues of either indentation or internal loop code itself, I have already tried multiple variations of 'indentation layout' as well as making quick fixes (misstyped syntax etc).
The code in question can be found towards the end of the program code (there is only one 'while loop' in the program code) it is in the 'menu options' section of code under '# Loop through quotes selecting those referencing the appropriate month and store the data in the summary dictionary'.
I have included two separate code windows, one highlighting the suspected 'problem code' and the the other with 'full program code'. Any help in any aspect will be appreciated.
Code segment most likely to hold error
def monthlyReport():
file = open(QUOTES_TO_DATE_FILE, 'r')
text = file.read()
file.close()
quotes = text.split()
month = input('Enter month: ')
summary = {'Lawn':{'Quantity' : 0.0, 'Value' : 0.0}, 'Patio' :{'Quantity' : 0.0, 'Value' : 0.0}, 'Water Feature' :{'Quantity' : 0.0, 'Value' : 0.0}}
# Loop through quotes selecting those referencing the appropriate month and
#store the data in summary dictionary
index = 0
while True:
if quotes[index] == month:
inputQuotesFromFile2(quotes[index+1])
summary['Lawn']['Quantity'] = summary['Lawn']['Quantity'] + quote['Lawn']['Width'] * quote['Lawn']['Length']
summary['Lawn']['Value'] = summary['Lawn']['Value'] + quote ['Lawn']['Cost']
summary['Patio']['Quantity'] = summary['Patio']['Quantity'] + quote['Patio']['Width'] * quote['Patio']['Length']
summary['Patio']['Value'] = summary['Patio']['Value'] + quote['Patio']['Cost']
summary['Water Feature']['Quantity'] = summary['Water Feature']['Quantity'] + quote['Water Feature']['Quantity']
summary['Water Feature']['Value'] = summary['Water Feature']['Value'] + quote['Water Feature']['Cost']
index = index + 2
if (index >= len(quotes)):
break
totalValue = summary['Lawn']['Value'] + summary['Patio']['Value'] + summary['Water Feature']['Value']
outputSummaryDictionary(summary, month, totalValue)
Full program code
# `Dictionary containing time values (mins) per square metre/ per feature
##lawn:20 patio:20 water feature:60
TIME = {'Lawn': 20, 'Patio': 20, 'Water Feature': 60}
# Constant for labour cost
##16.49
LABOUR_COST = 16.49
# Variable for filename of list of quotes made to date
##quotesToDateFile
QUOTES_TO_DATE_FILE = 'quotesToDateFile.txt'
# 'Global variables'
# A dictionary that stores quote data temporarily, contains sub dicts for each
#material type including keys for length, width, cost, time/quantity,cost, time
quote = {'Lawn':{'Length': 0 , 'Width': 0 , 'Cost': 0.0 , 'Time': 0.0},
'Patio':{'Length': 0 , 'Width': 0, 'Cost': 0.0 , 'Time': 0.0 },
'Water Feature':{'Quantity': 0 , 'Cost': 0.0 , 'Time': 0.0}}
# A dictionary storing material costs of individual items (can be updated)
materialCost = {'Lawn': 15.5, 'Patio': 20.99, 'Water Feature': 150}
# 'Input'
# Function to input material info defined by a length
##create function with named parameter for 'item'
def inputItemDimensions(item):
s = 'Enter length of ' + item + ':'
length = int(input('Enter length of material: '))
s = 'Enter width of ' + item + ':'
width = int(input('Enter width of material: '))
return length, width
# Function to input material info defined by quantity
##create function with named parameter 'item
def inputItemQuantity(item):
s = 'Enter quantity of ' + item + ':'
quantity = int(input('Enter quantity of items: '))
return quantity
# Function for input of area and quantity
def itemInput():
global quote
quote['Lawn']['Length'], quote['Lawn']['Width'] = inputItemDimensions('lawn')
quote['Patio']['Length'], quote['Patio']['Width'] = inputItemDimensions('concrete patio')
quote['Water Feature']['Quantity'] = inputItemQuantity('water feature')
# 'Cost calculation'
# Function to calculate, output to screen, return the material cost and time
#to install a landscape item installed by length and width
def costCalculation1(num, item, length, width, cost, time):
print('[{0}]'.format(num))
print('Length and width of the {0} = {1} x {2}m'.format(item, length, width))
area = length * width
print('Total area of {0} = {1:.2f}m^2'.format(item, area))
print('Cost of {0} per m^2 = £{1:.2f}'.format(item, cost))
totalCost = area * cost
print('Total cost of {0} = £{1}\n'.format(item, totalCost))
totalTime = area * time
return totalCost, totalTime
# Function to calculate, output to screen and return the material cost and time
#to install a landscape item installed by quantity
def costCalculation2(num, item, quantity, cost, time):
print('[{0}]'.format(num))
print('Quantity of {0} = {1} items'.format(item, quantity))
print('Cost of one {0} = £{1:.2f}'.format(item, cost))
totalCost = quantity * cost
print("Total cost of {0} {1} = £{2}\n".format(quantity, item, totalCost))
totalTime = quantity * time
return totalCost, totalTime
# Function to calculate individual costs of items
def calculateItemCosts():
global quote
quote['Lawn']['Cost'], quote['Lawn']['Time'] = costCalculation1('1', 'lawn', quote['Lawn']['Length'], quote['Lawn']['Width'], materialCost['Lawn'], TIME['Lawn'])
quote['Patio']['Cost'], quote['Patio']['Time'] = costCalculation1('2', 'patio', quote['Patio']['Length'], quote['Patio']['Width'], materialCost['Patio'], TIME['Patio'])
quote['Water Feature']['Cost'], quote['Water Feature']['Time'] = costCalculation2('3', 'water features', quote['Water Feature']['Quantity'], materialCost['Water Feature'], TIME['Water Feature'])
# Function to calculate workimg costs and output them
def workingCost():
print('Working costs:')
totalTime = (quote['Lawn']['Time'] + quote['Patio']['Time'] + quote['Water Feature']['Time']) / 60
labourCost = totalTime * LABOUR_COST
print('Total time to complete work = {0} mins'.format(totalTime))
print('Cost of work per hour = £{0}'.format(LABOUR_COST))
print('Total cost of work = £{0}\n'.format(labourCost))
# Calculate total fee payable by customer, output to screen and file
materialCost = quote['Lawn']['Cost'] + quote['Patio']['Cost'] + quote['Water Feature']['Cost']
totalCost = materialCost + labourCost
print('Total cost to pay = £{0}\n'.format(totalCost))
# 'Output functions'
# Output details concerning item
def outputItems():
outputItems1('1', 'Lawn', quote['Lawn'])
outputItems1('2', 'Patio', quote['Patio'])
outputItems2('3', 'Water Feature', quote['Water Feature'])
# Output dimensions and cost for certain item
def outputItems1(num, item, itemDict):
print('[{0}]'.format(num))
print('Length of width of {0} = {1}m x {2}m'.format(item, itemDict['Length'], itemDict['Width']))
print('Total cost of {0} = £{1}'.format(item, itemDict['Cost']))
print('Time to install {0} = {1}mins\n'.format(item, itemDict['Time'] / 60))
# Output quantity and cost for item
def outputItems2(num, item, itemDict):
print('[{0}]'.format(num))
print('Quantity of {0} = {1} items'.format(item, itemDict['Quantity']))
print('Cost of one {0} = £{1:.2f}'.format(item, itemDict['Cost']))
print('Time to install {0} = {1:.2f} hours\n'.format(item, itemDict['Time'] / 60))
# Output material cost dictionary
def outputMaterialCostDictionary():
for key, value in materialCost.items():
print('{0} = {1}'.format(key, value))
print('\n')
# Output summary dictionary
def outputSummaryDictionary(summaryD, month, totalV):
outputSummaryItem1(['Month', month, '', '', ''])
outputSummaryItem1(['Total', '', 'Total', 'Total', 'Total'])
outputSummaryItem1(['Working', 'Item', 'Square metre', 'Number', 'Monthly'])
outputSummaryItem1(['Costs', '', 'Purchased', 'Purchased', 'Value'])
outputSummaryItem2('Lawn', summaryD['Lawn'])
outputSummaryItem2('Patio', summaryD['Patio'])
outputSummaryItem3('Water Feature', summaryD['Water Feature'])
outputSummaryItem4(totalV)
# Output summary dictionary item ver 1
def outputSummaryItem1(sList):
print('|{0:^13}|{1:^13}|{2:^13}|{3:^13}|{4:^13}|'.format(sList[0], sList[1], sList[2], sList[3], sList[4]))
# Output summary dictionary item ver 2
def outputSummaryItem2(name, item):
print('|{0:^13}|{1:^13}|{2:13.2f}|{3:^13}|{4:13.2f}|'.format('', name, item['Quantity'], '', item['Value']))
# Output summary dictionary item ver 3
def outputSummaryItem3(name, item):
print('|{0:^13}|{1:^13}|{2:^13}|{3:13.0f}|{4:13.2f}|'.format('', name, '', item['Quantity'], item['Value']))
# Output summary dictionary item ver 4
def outputSummaryItem4(totalValue):
print('|{0:^13}|{1:^13}|{2:^13}|{3:^13}|{4:13.2f}|'.format('Total', '', '', '', totalValue))
# 'File handling'
# Function to output file
def outputToFile():
filename = input('Enter file name: ')
file = open(filename, 'w')
month = input('Enter month:' )
print('Filename = {0}....Month = {1}\n'.format(filename, month))
file.write('{0}\n'.format(month))
s = '{0} {1} {2} {3}\n'.format(quote['Lawn']['Length'], quote['Lawn']['Width'], quote['Lawn']['Cost'], quote['Lawn']['Time'])
file.write(s)
s = '{0} {1} {2} {3}\n'.format(quote['Patio']['Length'], quote['Patio']['Width'], quote['Patio']['Cost'], quote['Patio']['Time'])
file.write(s)
s = '{0} {1} {2}\n'.format(quote['Water Feature']['Quantity'], quote['Water Feature']['Cost'], quote['Water Feature']['Time'])
file.write(s)
file.close()
# Update quotes to date file
file = open(QUOTES_TO_DATE_FILE, 'a')
s = '{0} {1}\n'.format(month, filename)
file.write(s)
file.close()
# Function to input quote from file where file name is not known
def inputQuoteFromFile1():
filename = input('Enter name for input file: ')
inputQuoteFromFile2(filename)
# Function to input quote from file when file IS known
def inputQuoteFromFile2(filename):
file = open(filename, 'r')
text = file.read()
list1 = text.split()
file.close()
# Process the data (ignore first item which is the month)
##declare 'quote' dict as global (this might mean this code is within function)
global quote
subDictionary = {'Length' : float(list1[1]), 'Width' : float(list1[2]), 'Cost' : float(list1[3]), 'Time' : float(list1[4])}
quote['Lawn'] = subDictionary
subDictionary = {'Length' : float(list1[5]), 'Width' : float(list1[6]), 'Cost' : float(list1[7]), 'Time' : float(list1[8])}
quote['Patio'] = subDictionary
subDictionary = {'Quantity' : float(list1[9]), 'Cost' : float(list1[10]), 'Time' : float(list1[11])}
quote['Water Feature'] = subDictionary
file.close()
# 'Menu options'
# Function to allow preperation of a new quote
def prepareANewQuote():
itemInput()
calculateItemCosts()
workingCost()
outputToFile()
# Function to load new material costs
def loadNewMaterialCosts():
filename = input('Enter filename: ')
file = open(filename, 'r')
text = file.read()
file.close()
newMaterialCosts = text.split()
# Assign costs to material cost dictionary
index = 0
for key in materialCost.keys():
materialCost['Key'] = float(newMaterialCosts['index'])
index = index + 1
# Output new material costs # NOTE MAY NEED TO BE INDENTED FURTHER
outputMaterialCostDictionary()
# Function to view and load existing quote
def viewExistingQuote():
inputQuoteFromFile1()
outputItems()
workingCost()
# Function to generate monthly report summary
def monthlyReport():
file = open(QUOTES_TO_DATE_FILE, 'r')
text = file.read()
file.close()
quotes = text.split()
month = input('Enter month: ')
summary = {'Lawn':{'Quantity' : 0.0, 'Value' : 0.0}, 'Patio' :{'Quantity' : 0.0, 'Value' : 0.0}, 'Water Feature' :{'Quantity' : 0.0, 'Value' : 0.0}}
# Loop through quotes selecting those referencing the appropriate month and
#store the data in summary dictionary
index = 0
while True:
if quotes[index] == month:
inputQuotesFromFile2(quotes[index+1])
summary['Lawn']['Quantity'] = summary['Lawn']['Quantity'] + quote['Lawn']['Width'] * quote['Lawn']['Length']
summary['Lawn']['Value'] = summary['Lawn']['Value'] + quote ['Lawn']['Cost']
summary['Patio']['Quantity'] = summary['Patio']['Quantity'] + quote['Patio']['Width'] * quote['Patio']['Length']
summary['Patio']['Value'] = summary['Patio']['Value'] + quote['Patio']['Cost']
summary['Water Feature']['Quantity'] = summary['Water Feature']['Quantity'] + quote['Water Feature']['Quantity']
summary['Water Feature']['Value'] = summary['Water Feature']['Value'] + quote['Water Feature']['Cost']
index = index + 2
if (index >= len(quotes)):
break
totalValue = summary['Lawn']['Value'] + summary['Patio']['Value'] + summary['Water Feature']['Value']
outputSummaryDictionary(summary, month, totalValue)
# 'Main' (initialisation)
# Top level function
def start():
while True :
print('Select one of following options')
print('(1) Prepare new quote')
print('(2) Load new cost data')
print('(3) Load and view existing quote')
print('(4) Generate monthly report summary')
print('(5) Exit')
selection = int(input())
if selection == 1:
prepareANewQuote()
elif selection == 2:
loadNewMaterialCosts()
elif selection == 3:
viewExistingQuote()
elif selection == 4:
monthlyReport()
elif selection == 5:
quit()
else:
print('Error unrecognised command')
# Start
start()
index never gets modified if quotes[index] does not equal month, so the code will keep checking the same value over and over again and never proceed.
You should unindent that assignment of index by one level. But really this is not an appropriate use of a while loop; you should use for to iterate over quotes:
for quote in quotes:
(Also note there are two while loops in this code; and actually far too much use of global.)
I am trying to write a script to generate data. I am using random package for this. I execute the script and everything works fine. But when I check through the results, I found out that the script fails to generate the last 100+ rows for some reason.
Can someone suggest me why this could be happening?
from __future__ import print_function
from faker import Faker;
import random;
## Vaue declaration
population = 3;
product = 3;
years = 3;
months = 13;
days = 30;
tax= 3.5;
## Define Column Header
Column_Names = "Population_ID",";","Product_Name",";","Product_ID",";","Year",";",
"Month",";","Day","Quantity_sold",";","Sales_Price",";","Discount",
";","Actual_Sales_Price",tax;
## Function to generate sales related information
def sales_data():
for x in range(0,1):
quantity_sold = random.randint(5,20);
discount = random.choice(range(5,11));
sales_price = random.uniform(20,30);
return quantity_sold,round(sales_price,2),discount,round((sales_price)-(sales_price*discount)+(sales_price*tax));
## Format the month to quarter and return the value
def quarter(month):
if month >= 1 and month <= 3:
return "Q1";
elif month > 3 and month <= 6:
return "Q2";
elif month > 6 and month <= 9:
return "Q3";
else:
return "Q4";
## Generate product_id
def product_name():
str2 = "PROD";
sample2 = random.sample([1,2,3,4,5,6,7,8,9],5);
string_list = [];
for x in sample2:
string_list.append(str(x));
return (str2+''.join(string_list));
### Main starts here ###
result_log = open("C:/Users/Sangamesh.sangamad/Dropbox/Thesis/Data Preparation/GenData.csv",'w')
print (Column_Names, result_log);
### Loop and Generate Data ###
for pop in range(0,population):
pop = random.randint(55000,85000);
for prod_id in range(0,product):
product_name2 = product_name();
for year in range(1,years):
for month in range(1,months):
for day in range(1,31):
a = sales_data();
rows = str(pop)+";"+product_name2+";"+str(prod_id)+";"+str(year)+";"+str(month)+";"+quarter(month)+";"+str(day)+";"+str(a[0])+";"+str(a[1])+";"+str(a[2])+";"+str(tax)+";"+str(a[3]);
print(rows,file=result_log);
#print (rows);
tax = tax+1;
You need to close a file to have the buffers flushed:
result_log.close()
Better still, use the file object as a context manager and have the with statement close it for you when the block exits:
filename = "C:/Users/Sangamesh.sangamad/Dropbox/Thesis/Data Preparation/GenData.csv"
with result_log = open(filename, 'w'):
# code writing to result_log
Rather than manually writing strings with delimiters in between, you should really use the csv module:
import csv
# ..
column_names = (
"Population_ID", "Product_Name", "Product_ID", "Year",
"Month", "Day", "Quantity_sold", "Sales_Price", "Discount",
"Actual_Sales_Price", tax)
# ..
with result_log = open(filename, 'wb'):
writer = csv.writer(result_log, delimiter=';')
writer.writerow(column_names)
# looping
row = [pop, product_name2, prod_id, year, month, quarter(month), day,
a[0], a[1], a[2], tax, a[3]]
writer.writerow(row)