Sort Average In A file - python

I have a file with 3 scores for each person. Each person has their own row. I want to use these scores, and get the average of all 3 of them. There scores are separated by tabs and in descending order. For example:
tam 10 6 11
tom 3 7 3
tim 5 4 6
these people would come out with an average of:
tam 9
tom 5
tim 4
I want these to be able to print to the python shell, however not be saved to the file.
with open("file.txt") as file1:
d = {}
count = 0
for line in file1:
column = line.split()
names = column[0]
average = (int(column[1].strip()) + int(column[2].strip()) + int(column[3].strip()))/3
count = 0
while count < 3:
d.setdefault(names, []).append(average)
count = count + 1
for names, v in sorted(d.items()):
averages = (sum(v)/3)
print(names,average)
averageslist=[]
averageslist.append(averages)
My code only finds the first persons average and outputs it for all of them. I also want it to be descending in order of averages.

You can use the following code that parses your file into a list of (name, average) tuples and prints every entry of the by average sorted list:
import operator
with open("file.txt") as f:
data = []
for line in f:
parts = line.split()
name = parts[0]
vals = parts[1:]
avg = sum(int(x) for x in vals)/len(vals)
data.append((name, avg))
for person in sorted(data, key=operator.itemgetter(1), reverse=True):
print("{} {}".format(*person))

You are almost correct.You are calculating average in the first step.So need of sum(v)/3 again.Try this
with open("file.txt") as file1:
d = {}
count = 0
for line in file1:
column = line.split()
names = column[0]
average = (int(column[1].strip()) + int(column[2].strip()) + int(column[3].strip()))/3
d[names] = average
for names, v in sorted(d.items(),key=lambda x:x[1],reverse=True): #increasing order==>sorted(d.items(),key=lambda x:x[1])
print(names,v)
#output
('tam', 9)
('tim', 5)
('tom', 4)
To sort by name
for names, v in sorted(d.items()):
print(names,v)
#output
('tam', 9)
('tim', 5)
('tom', 4)

The issue is this:
averages = (sum(v)/3)
print(names,average)
Notice that on the first line you are computing averages (with an s at the end) and on the next line you are printing average (without an s).

Try This:
from operator import itemgetter
with open("file.txt") as file1:
d = {}
count = 0
for line in file1:
column = line.split()
names = column[0]
average = (int(column[1].strip()) + int(column[2].strip()) + int(column[3].strip()))/3
count = 0
d.setdefault(names, []).append(average)
for names,v in sorted(d.items(), key=itemgetter(1),reverse=True):
print(names,v)

Related

Python CSV sum value if they have same ID/name

I want to sum all values that have the same name / ID in a csv file
Right now I am only looking for ID with the name 'company'
csv file format:
company A, 100
company B, 200
company A, 300
The end result I am looking for is:
company A, 400
company B, 200
total: 600
My code so far:
import csv
name = ''
num = ''
total = 0
with open('xx.csv', 'r', newline='') as csvfile:
reader = csv.reader(csvfile)
next(csvfile)
for a in reader:
if a[0].__contain__('company'):
name = (a[0])
num = (a[1])
total += float(a[1])
print(str(name) + ', ' + str(num))
print('total: ' + str(total))
First, CSV typically have commas, and the delimiter for csv.reader must be a single character, so I suggest updating your file to properly use commas.
Secondly, to aggregate the companies, you need to store them as you iterate the file. Easiest way is to use a dictionary type.
Then only after you've aggregated everything, should you create a second loop to go over the aggregated values, then print the final total.
import csv
from collections import defaultdict
totals = defaultdict(int)
total = 0
with open('companies.csv') as csvfile:
reader = csv.reader(csvfile, delimiter=',')
# next (csvfile) # shown file has no header
for row in reader:
if not row[0].startswith('company'):
continue
name, value = row
totals[name] += float(value)
total = 0
for name, value in totals.items():
print(f'{name},{value}')
total += value
print(f'total: {total}')
You don't necessarily need to use csv module here. Just read every single line split them from right (rsplit) and fill a dictionary like below:
d = {}
with open('your_file.csv') as f:
# next(f) - If header needs to be skipped
for line in f:
name, value = line.rsplit(',', maxsplit=1)
d[name] = d.get(name, 0) + int(value)
for k, v in d.items():
print(f"{k}, {v}")
print(f"total: {sum(d.values())}")
output:
company A, 400
company B, 200
total: 600
In order not to iterate again through the dictionary's values to calculate the total(I mean in sum(d.values()) expression), you can do add to total while you are printing the items like:
d = {}
with open('new.csv') as f:
for line in f:
name, value = line.rsplit(',', maxsplit=1)
d[name] = d.get(name, 0) + int(value)
total = 0
for k, v in d.items():
total += v
print(f"{k}, {v}")
print(f"total: {total}")

Python: How to read space delimited data with different length in text file and parse it

I have space delimited data in a text file look like the following:
0 1 2 3
1 2 3
3 4 5 6
1 3 5
1
2 3 5
3 5
each line has different length.
I need to read it starting from line 2 ('1 2 3')
and parse it and get the following information:
Number of unique data = (1,2,3,4,5,6)=6
Count of each data:
count data (1)=3
count data (2)=2
count data (3)=5
count data (4)=1
count data (5)=4
count data (6)=1
Number of lines=6
Sort the data in descending order:
data (3)
data (5)
data (1)
data (2)
data (4)
data (6)
I did this:
file=open('data.txt')
csvreader=csv.reader(file)
header=[]
header=next(csvreader)
print(header)
rows=[]
for row in csvreader:
rows.append(row)
print(rows)
After this step, what should I do to get the expected results?
I would do something like this:
from collections import Counter
with open('data.txt', 'r') as file:
lines = file.readlines()
lines = lines[1:] # skip first line
data = []
for line in lines:
data += line.strip().split(" ")
counter = Counter(data)
print(f'unique data: {list(counter.keys())}')
print(f'count data: {list(sorted(counter.most_common(), key=lambda x: x[0]))}')
print(f'number of lines: {len(lines)}')
print(f'sort data: {[x[0] for x in counter.most_common()]}')
A simple brute force approach:
nums = []
counts = {}
for row in open('data.txt'):
if row[0] == '0':
continue
nums.extend( [int(k) for k in row.rstrip().split()] )
print(nums)
for n in nums:
if n not in counts:
counts[n] = 1
else:
counts[n] += 1
print(counts)
ordering = list(sorted(counts.items(), key=lambda k: -k[1]))
print(ordering)
Here is another approach
def getData(infile):
""" Read file lines and return lines 1 thru end"""
lnes = []
with open(infile, 'r') as data:
lnes = data.readlines()
return lnes[1:]
def parseData(ld):
""" Parse data and print desired results """
unique_symbols = set()
all_symbols = dict()
for l in ld:
symbols = l.strip().split()
for s in symbols:
unique_symbols.add(s)
cnt = all_symbols.pop(s, 0)
cnt += 1
all_symbols[s] = cnt
print(f'Number of Unique Symbols = {len(unique_symbols)}')
print(f'Number of Lines Processed = {len(ld)}')
for symb in unique_symbols:
print(f'Number of {symb} = {all_symbols[symb]}')
print(f"Descending Sort of Symbols = {', '.join(sorted(list(unique_symbols), reverse=True))}")
On executing:
infile = r'spaced_text.txt'
parseData(getData(infile))
Produces:
Number of Unique Symbols = 6
Number of Lines Processed = 6
Number of 2 = 2
Number of 5 = 4
Number of 3 = 5
Number of 1 = 3
Number of 6 = 1
Number of 4 = 1
Descending Sort of Symbols = 6, 5, 4, 3, 2, 1

get value of one column by another column in csv file python

I have my csv file like this:
ID Value Amount
---- ------- -------
A 3 2
A 4 4
B 3 6
C 5 5
A 3 2
B 10 1
I want sum of column "Value" or "Amount" by the column "ID". I want the output that for 'A' it should give me sum of all values which is related to A means [3+4+3].
My Code:
import csv
file = open(datafile.csv)
rows=csv.DictReader(file)
summ=0.0
count=0
for r in rows:
summ=summ+int(r['Value'])
count=count+1
print "Mean for column Value is: ",(summ/count)
file.close()
You can use a defaultdict of list to group the data by the ID column. Then use sum() to produce the totals.
from collections import defaultdict
with open('datafile.csv') as f:
d = defaultdict(list)
next(f) # skip first header line
next(f) # skip second header line
for line in f:
id_, value, amount = line.split()
d[id_].append((int(value), int(amount)))
# sum and average of column Value by ID
for id_ in d:
total = sum(t[0] for t in d[id_])
average = total / float(len(d[id_]))
print('{}: sum = {}, avg = {:.2f}'.format(id_, total, average))
Output for your input data:
A: sum = 10, avg = 3.33
C: sum = 5, avg = 5.00
B: sum = 13, avg = 6.50
It can also be done with a standard Python dictionary. The solution is very similar:
with open('datafile.csv') as f:
d = {}
next(f) # skip first header line
next(f) # skip second header line
for line in f:
id_, value, amount = line.split()
d[id_] = d.get(id_, []) + [(int(value), int(amount))]
# sum and average of column Value by ID
for id_ in d:
total = sum(t[0] for t in d[id_])
average = total / float(len(d[id_]))
print('{}: sum = {}, avg = {:.2f}'.format(id_, total, average))

Finding the average number

For the task I need the average score of each person so if Dan scored 5 in one line and 7 in another he would then be displayed as having an average of 6.the average is what i need ordered and displayed.
so I have to sort the into the highest average scores that people have gained, to the lowest average and display the sorted version of it in python. one of the file I have to sort looks like this.
Bob:0
Bob:1
Jane:9
Drake:8
Dan:4
Josh:1
Dan:5
How can i do this on python?
d = {}
with open('in.txt') as f:
data = f.readlines()
for x in data:
x = x.strip()
if not x:
continue
name = x.split(':')[0].strip()
score = int(x.split(':')[-1].split('/')[0].strip())
if name not in d:
d[name] = {}
d[name]['score'] = 0
d[name]['count'] = 0
d[name]['count'] += 1
d[name]['score'] = (d[name]['score'] + score) / float(d[name]['count'])
ds = sorted(d.keys(), key=lambda k: d[k]['score'], reverse=True)
for x in ds:
print('{0}: {1}'.format(x, d[x]['score']))

How to Print 2 dictionaries in 3 columns

Hey guys so I have set up 2 dictionaries which have the same keys but different values for both. I am trying to get the code to print out like this
Digit Count %
1
2
3
4
5
6
7
8
9
The count is the countList and the % is the numFreq Values with their numbers also going down in the Count and % respectively.
Okay so the Data File looks like this (only doing some numbers because the file is pretty big
Census Data
Alabama Winfield 4534
Alabama Woodland 208
Alabama Woodstock 1081
Alabama Woodville 743
Alabama Yellow Bluff 175
Alabama York 2477
Alaska Adak 361
the count is the number of occurences of the first digit of the number. I basically turned each line into a list and appended the last value of the list (the number) to a new list. So then I did a Count for how many times 1, 2, 3, 4, 5, 6 , 7, 8 ,9 appear. That's what countList represents. So I stored that in a dictionary with the keys being the digits and the counts being the values. The % is the relative frequency of the count. So I set up a new list and calculated the relative frequency which is basically the count + the sum of all the counts and rounded it off to one digit. The % column has the relative count of each digit. I put that into a dictionary also where the keys are the digits 1, 2, 3, 4, 5, 6, 7, 8, 9. So now I just need to print these numbers into the 3 columns,
Here is my code so far
def main():
num_freq = {}
pop_num = []
inFile = open ("Census__2008.txt", "r")
count = 0
for line in inFile:
if (count == 0):
count += 1
continue
else:
count += 1
line = line.strip()
word_list = line.split()
pop_num.append (word_list[-1])
counts = {}
for x in pop_num:
k = str(x)[0]
counts.setdefault(k, 0)
counts[k] += 1
countList = [counts[str(i)] for i in range(1,10)]
sumList = sum(countList)
dictCount = {}
dictCount[1] = countList[0]
dictCount[2] = countList[1]
dictCount[3] = countList[2]
dictCount[4] = countList[3]
dictCount[5] = countList[4]
dictCount[6] = countList[5]
dictCount[7] = countList[6]
dictCount[8] = countList[7]
dictCount[9] = countList[8]
num_Freq = []
for elm in countList:
rel_Freq = 0
rel_Freq = rel_Freq + ((elm / sumList) * 100.0)
rel_Freq = round(rel_Freq, 1)
num_Freq.append(rel_Freq)
freqCount = {}
freqCount[1] = num_Freq[0]
freqCount[2] = num_Freq[1]
freqCount[3] = num_Freq[2]
freqCount[4] = num_Freq[3]
freqCount[5] = num_Freq[4]
freqCount[6] = num_Freq[5]
freqCount[7] = num_Freq[6]
freqCount[8] = num_Freq[7]
freqCount[9] = num_Freq[8]
print ("Digit" " ", "Count", " ", "%")
print (
main()
Using your code, you just need to do:
for i in range(1, 10):
print (i, dictCount[i], freqCount[i])
But you can simplify it a lot:
import collections
data = []
with open("Census__2008.txt") as fh:
fh.readline() # skip first line
for line in fh:
value = line.split()[-1]
data.append(value)
c = collections.Counter([x[0] for x in data])
total = sum(c.values())
print("Digit", "Count", "%")
for k, v in sorted(c.iteritems()):
freq = v / total * 100
round_freq = round(freq, 1)
print(k, v, round_freq)

Categories