Consider this code in Python
filter_2020 = hdb_million[hdb_million["year"]==2020]
town_2020 = filter_2020["town"].unique()
results_2020 = len(town_2020)
print(town_2020)
print("in 2020 the number of towns are:", results_2020)
print("\n")
filter_2021 = hdb_million[hdb_million["year"]==2021]
town_2021 = filter_2021["town"].unique()
results_2021 = len(town_2021)
print(town_2021)
print("in 2021 the number of towns are:", results_2021)
print("\n")
filter_2022 = hdb_million[hdb_million["year"]==2022]
town_2022 = filter_2022["town"].unique()
results_2022 = len(town_2022)
print(town_2022)
Output
['BUKIT MERAH' 'CLEMENTI' 'TOA PAYOH' 'KALLANG/WHAMPOA' 'QUEENSTOWN'
'BISHAN' 'CENTRAL AREA' 'ANG MO KIO' 'GEYLANG']
in 2020 the number of towns are: 9
['BISHAN' 'BUKIT MERAH' 'CENTRAL AREA' 'CLEMENTI' 'QUEENSTOWN' 'SERANGOON'
'TOA PAYOH' 'BUKIT TIMAH' 'KALLANG/WHAMPOA' 'ANG MO KIO']
in 2021 the number of towns are: 10
['ANG MO KIO' 'BISHAN' 'BUKIT TIMAH' 'CENTRAL AREA' 'CLEMENTI' 'GEYLANG'
'KALLANG/WHAMPOA' 'QUEENSTOWN' 'SERANGOON' 'TOA PAYOH' 'BUKIT MERAH'
'YISHUN' 'PASIR RIS' 'WOODLANDS' 'BUKIT BATOK' 'HOUGANG' 'MARINE PARADE'
'PUNGGOL' 'TAMPINES' 'BEDOK']
in 2022 the number of towns are: 20
Instead of repeating the codes, can I define a function to arrive at the same output ? I tried several def functions but am not successful. Most grateful for any insights. thank you
You can iterate through a loop,
for year in [2020, 2021, 2022]:
filter_year = hdb_million[hdb_million["year"]== year]
town = filter_year["town"].unique()
results = len(town)
print(town)
print(f"in {year} the number of towns are:", results)
print("\n")
If you just want the print things to be repeated using a function.
def print_town_info(year, town_input):
filter_year = town_input[town_input["year"] == year]
town = filter_year["town"].unique()
print(town)
print("in " + str(year) + " the number of towns are:", len(town))
print("\n")
Then if you want it in a loop:
for y in [2020, 2021, 2022]:
print_town_info(y, hdb_million)
I'm using the txt file: https://drive.google.com/file/d/1-VrWf7aqiqvnshVQ964zYsqaqRkcUoL1/view?usp=sharin
I'm running the script:
data = f.read()
ny_sum=0
ny_count=0
sf_sum=0
sf_count=0
for line in data.split('\n'):
print(line)
parts = line.split('\t')
city = parts[2]
amount = float(parts[4])
if city == 'San Francisco':
sf_sum = sf_sum + amount
elif city == 'New York':
ny_sum = ny_sum + amount
ny_count = ny_count + 1
ny_avg = ny_sum / ny_count
sf_avg = sf_sum / sf_count
#print(ny_avg, sf_avg)
f = open('result_file.txt', 'w')
f.write('The average transaction amount based on {} transactions in New York is {}\n'.format(ny_count, ny_avg))
f.write('The average transaction amount based on {} transactions in San Francisco is {}\n'.format(sf_count, sf_avg))
if ny_avg>sf_avg:
f.write('New York has higher average transaction amount than San Francisco\n')
else:
f.write('San Francisco has higher average transaction amount than New York\n')
f.close()
And I ALWAYS get the error:
ValueError: could not convert string to float: ''
I'm pretty new-ish to Python and I'm really not sure what I'm doing wrong here. I'm trying to get averages for New York and San Francisco, then export the results AND the comparison to a txt results file
This should give you what you're looking for:
from collections import defaultdict as DD
with open('New Purchases.txt') as pfile:
sums = DD(lambda: [0.0, 0])
for line in [line.split('\t') for line in pfile]:
try:
k = line[2]
sums[k][0] += float(line[4])
sums[k][1] += 1
except Exception:
pass
for k in ['San Francisco', 'New York']:
v = sums.get(k, [0.0, 1])
print(f'Average for {k} = ${v[0]/v[1]:.2f}')
I have re-arranged the code. I agree with BrutusFocus that the splits are making it difficult to read exactly the location on each row. I have set it so if it sees the location at any point in the row, it counts it.
with open("data.txt", "r") as f:
data = f.read()
ny_sum=0
ny_count=0
sf_sum=0
sf_count=0
for line in data.split('\n'):
parts = line.split('\t')
city = parts[2]
amount = float(parts[4])
print(city, amount)
if "New York" in line:
ny_sum = ny_sum + amount
ny_count = ny_count + 1
elif "San Francisco" in line:
sf_sum = sf_sum + amount
sf_count = sf_count + 1
ny_avg = ny_sum / ny_count
sf_avg = sf_sum / sf_count
#print(ny_avg, sf_avg)
f = open('result_file.txt', 'w')
f.write('The average transaction amount based on {} transactions in New York is
{}\n'.format(ny_count, ny_avg))
f.write('The average transaction amount based on {} transactions in San
Francisco is {}\n'.format(sf_count, sf_avg))
if ny_avg>sf_avg:
f.write('New York has higher average transaction amount than San Francisco\n')
else:
f.write('San Francisco has higher average transaction amount than New York\n')
f.close()
Is there any way to print the numbers in real times instead of printing them one by one? I have 6 different countries
china = 1399746872
india = 1368138206
USA = 327826334
Japan = 12649000
Russia = 146804372
Sweden = 10379295
I change this numbers in the script but how do I print them so I see them change?
!EDITED!
I want to kind of overwrite this list everytime it prints so I see the numbers go up
Countries = []
china = 1399746872
india = 1368138206
USA = 327826334
Japan = 12649000
Russia = 146804372
Sweden = 10379295
Countries.append(china)
Countries.append(india)
Countries.append(USA)
Countries.append(Japan)
Countries.append(Russia)
Countries.append(Sweden)
print(Countries)
you could use os.system("cls") to clear the console.
I made a little demo:
import time, sys, json, os
from random import randint
vals = {
"china": 1399746872,
"india": 1368138206,
"USA": 327826334,
"Japan": 12649000,
"Russia": 146804372,
"Sweden": 10379295
}
for _ in range(100):
# clear console
os.system("cls")
# print values
[print(f"{k}: {v}") for k, v in vals.items()]
# renew values with random generated integers
vals = {k:randint(0, 1000000) for k in vals}
# sleep 5s
time.sleep(5)
How to get numbers 1 to 10 next to the SQL table contents from the Chinook database in a good format? I can't get the loop from 1 to 10 next to the other three elements of the database file. The output I want :
1 Chico Buarque Minha Historia 27
2 Lenny Kravitz Greatest Hits 26
3 Eric Clapton Unplugged 25
4 Titãs Acústico 22
5 Kiss Greatest Kiss 20
6 Caetano Veloso Prenda Minha 19
7 Creedence Clearwater Revival Chronicle, Vol. 2 19
8 The Who My Generation - The Very Best Of The Who 19
9 Green Day International Superhits 18
10 Creedence Clearwater Revival Chronicle, Vol. 1 18
My code :
import sqlite3
try:
conn = sqlite3.connect(r'C:\Users\Just\Downloads\chinook.db')
except Exception as e:
print(e)
cur = conn.cursor()
cur.execute('''SELECT artists.Name, albums.Title, count (albums.AlbumId) AS AlbumAmountListened
FROM albums
INNER JOIN tracks ON albums.AlbumId = tracks.AlbumId
INNER JOIN invoice_items ON tracks.TrackId = invoice_items.TrackId
INNER JOIN artists ON albums.ArtistId = artists.ArtistId
GROUP BY albums.AlbumId
ORDER BY AlbumAmountListened DESC
LIMIT 10''')
top_10_albums = cur.fetchall()
def rank():
for item in top_10_albums:
name = item[0]
artist = item[1]
album_played = item[2]
def num():
for i in range(1,11):
print (i)
return i
print (num(),'\t', name, '\t', artist, '\t', album_played, '\t')
print (rank())
My 1-10 number loops like this:
1
2
3
4
5
6
7
8
9
10
10 Chico Buarque Minha Historia 27
1
2
3
4
5
6
7
8
9
10
10 Lenny Kravitz Greatest Hits 26
And so on. How do I correctly combine my range object?
You can use enumerate() to provide the numbers for you as you iterate over the rows:
top_10_albums = cur.fetchall()
for i, item in enumerate(top_10_albums, start=1):
name = item[0]
artist = item[1]
album_played = item[2]
print(f'{i}\t{name}\t{artist}\t{album_played}')
You don't even have to unpack the item into variables, just reference them directly in the fstring:
for i, item in enumerate(top_10_albums, start=1):
print(f'{i}\t{item[0]}\t{item[1]}\t{item[2]')
But this is perhaps nicer:
for i, (name, artist, album_played) in enumerate(top_10_albums, start=1):
print(f'{i}\t{name}\t{artist}\t{album_played}')
This uses tuple unpacking to bind the fields from the row to descriptively named variables, which makes it self documenting.
Just need to iterate with an index(i) within the for loop such as
top_10_albums = cur.fetchall()
i=0
for item in top_10_albums:
name = item[0]
artist = item[1]
album_played = item[2]
i += 1
print (i,'\t', name, '\t', artist, '\t', album_played, '\t')
in your case, inner loop produces 10 numbers for each step of outer loop.
Numbered Version
def rowView(strnum,row,flen_align=[(30,"l"),(30,"r"),(5,"r")]):
i = 0
line=""
for k,v in row.items():
flen , align = flen_align[i]
strv = str(v)
spaces = "_" * abs(flen - len(strv))
if align == "l":
line += strv+spaces
if align == "r":
line += spaces+strv
i+=1
return strnum+line
dlist=[
{ "name":"Chico Buarque", "title":"Minha Historia","AAL":27},
{ "name":"Lenny Kravit", "title":"Greatest Hits","AAL":26},
{ "name":"Eric Clapton", "title":"Unplugged","AAL":25},
{ "name":"Titã", "title":"Acústico","AAL":22},
{ "name":"Kis", "title":"Greatest Kiss","AAL":20},
{ "name":"Caetano Velos", "title":"Prenda Minha","AAL":19},
{ "name":"Creedence Clearwater Reviva", "title":"Chronicle,Vol.2","AAL":19},
{ "name":"TheWho My Generation", "title":"The Very Best Of The Who","AAL":19},
{ "name":"Green Da", "title":"International Superhits","AAL":18},
{ "name":"Creedence Clearwater Reviva", "title":"Chronicle,Vol.1","AAL":18}
]
for num, row in enumerate(dlist,start=1):
strnum=str(num)
strnum += "_" * (5-len(strnum))
print(rowView(strnum,row))
Or using record id directly
def rowView(row,flen_align=[(5,"l"),(30,"l"),(30,"r"),(5,"r")]):
i,line = 0,""
for k,v in row.items():
flen , align = flen_align[i]
strv = str(v)
spaces = "_" * abs(flen - len(strv))
if align == "l":
line += strv+spaces
if align == "r":
line += spaces+strv
i+=1
return line
dlist=[
{"id":1, "name":"Chico Buarque", "title":"Minha Historia","AAL":27},
{"id":2, "name":"Lenny Kravit", "title":"Greatest Hits","AAL":26},
{"id":3, "name":"Eric Clapton", "title":"Unplugged","AAL":25},
{"id":4, "name":"Titã", "title":"Acústico","AAL":22},
{"id":5, "name":"Kis", "title":"Greatest Kiss","AAL":20},
{"id":6, "name":"Caetano Velos", "title":"Prenda Minha","AAL":19},
{"id":7, "name":"Creedence Clearwater Reviva", "title":"Chronicle,Vol.2","AAL":19},
{"id":8, "name":"TheWho My Generation", "title":"The Very Best Of The Who","AAL":19},
{"id":9, "name":"Green Da", "title":"International Superhits","AAL":18},
{"id":10, "name":"Creedence Clearwater Reviva", "title":"Chronicle,Vol.1","AAL":18}
]
for row in dlist:
print(rowView(row))
same output for both versions:
1____Chico Buarque_________________________________Minha Historia___27
2____Lenny Kravit___________________________________Greatest Hits___26
3____Eric Clapton_______________________________________Unplugged___25
4____Titã________________________________________________Acústico___22
5____Kis____________________________________________Greatest Kiss___20
6____Caetano Velos___________________________________Prenda Minha___19
7____Creedence Clearwater Reviva__________________Chronicle,Vol.2___19
8____TheWho My Generation________________The Very Best Of The Who___19
9____Green Da_____________________________International Superhits___18
10___Creedence Clearwater Reviva__________________Chronicle,Vol.1___18
so I am basically not allowed to use any import or other libraries like pandas or groupby.
and I have to categorize the data and sum up the corresponding values. The data is in the csv file.
For example,
**S** C **T**
A T 100
A. B 102
A. T. 200
A B. 100
C T 203
C. T. 200
C B 200
C T 200
C. B 200
my expected result should be
S C T
A T 300
A B. 202
C T 403
C B. 200
C T. 200
C B. 200
Considering that you have a csv file (i.e., columns split by comma):
with open('myfile.csv', 'r') as file:
header = file.readline().rstrip()
data = {}
for row in file:
state, candidate, value = row.split(',')
k, value = (state, candidate), int(value)
data[k] = data.get(k, 0) + value
result_csv = '\n'.join([header] + [f"{','.join(k)},{v}" for k,v in data.items()])
print(result_csv)
Output:
state,candidate,total votes
Alaska,Trump,300
Alaska,Biden,202
colorado,Trump,403
colorado,Biden,200
California,Trump,200
California,Biden,200
Original content of myfile.csv is (use str.replace if necessary):
state,candidate,total votes
Alaska,Trump,100
Alaska,Biden,102
Alaska,Trump,200
Alaska,Biden,100
colorado,Trump,203
colorado,Trump,200
colorado,Biden,200
California,Trump,200
California,Biden,200
mylist = []
with open("data", "r") as msg:
for line in msg:
mylist.append(line.strip().replace(".",""))
msg.close()
headers = mylist[0].replace("*","").split()
del mylist[0]
headers[2] = headers[2]+" "+headers[3]
mydict = {}
for line in mylist:
state = line.split()[0]
mydict[state] = {}
for line in mylist:
state = line.split()[0]
candidate = line.split()[1]
mydict[state][candidate] = 0
for line in mylist:
state = line.split()[0]
candidate = line.split()[1]
votes = line.split()[2]
mydict[state][candidate] = mydict[state][candidate] + int(votes)
print ("%-15s %-15s %-15s \n\n" % (headers[0],headers[1],headers[2]))
for state in mydict.keys():
for candidate in mydict[state].keys():
print ("%-15s %-15s %-15s" % (state,candidate,str(mydict[state][candidate])))
Output:
state candidate total votes
Alaska Trump 300
Alaska Biden 202
colorado Trump 403
colorado Biden 200
California Trump 200
California Biden 200