I got two files open and read like this
file1
Name, day1
Omi Aiz,90
Carin Jack,92
Swit Han,88
file2
Name, Day2
Omi Aiz, 20
Carin Jack,30
Swit Han,40
How to combine these into a single dictionary:
d={'OA':[90,20],'CJ':[92,30],'SH':[88,40]}
And find the average of day1 and day2
d={'OA':55,'CJ':61,'SH':64}
this should do the first part
for the second, you just loop through and do average
def get_letters(name):
names = name.split()
return f'{names[0][0]}{names[1][0]}'
with open('file1', 'r') as f1, open('file2', 'r') as f2:
d = {}
for l1, l2 in zip(f1[1:], f2[1:]):
l1 = l1.split(',')
l2 = l2.split(',')
letters = get_letters(l1[0])
d.append({letters:[l1[1], l2[1]]})
d={}
def read(file):
with open(file) as file:
lines = file.read().split('\n')
for line in lines:
if line.startswith('Name,'):
lines.remove(line)
return lines
def sep(name):
return ''.join((v[0] for v in name.split(' ')))
d = {}
for line in read('file1.txt'):
if line:
name,day=line.split(',')
k = sep(name)
d[k] = [int(day)]
for line in read('file2.txt'):
if line:
name,day=line.split(',')
k = sep(name)
if k in d:
d[k].append(int(day))
print(d)
Related
I want to define a function, that reads a table of a textfile as a dictionary and than use it for returning specific values. The keys are chemical symbols (like "He" for Helium,...). The values return their specific atom masses.
I don't understand, what I have to do...
The first five lines of the textfile read:
H,1.008
He,4.0026
Li,6.94
Be,9.0122
B,10.81
Here are my attempts: (I don't know where to place the parameter key so that I can define it)
def read_masses():
atom_masses = {}
with open["average_mass.csv") as f:
for line in f:
(key, value) = line.split(",")
atom_masses[key] = value
return(value)
m = read_masses("average_mass.csv)
print(m["N"]) #for the mass of nitrogen ```
once return has called, the code below it doesn't execute. What you need to return is the atom_masses not value and you have to place it outside the for loop
def read_masses(file):
atom_masses = {}
with open(file) as f:
for line in f:
(key, value) = line.split(",")
atom_masses[key] = value
return (atom_masses)
m = read_masses("average_mass.csv")
print(m["H"])
>>> 1.008
Try:
def read_masses(name):
data = {}
with open(name, "r") as f_in:
for line in map(str.strip, f_in):
if line == "":
continue
a, b = map(str.strip, line.split(",", maxsplit=1))
data[a] = float(b)
return data
m = read_masses("your_file.txt")
print(m.get("He"))
Prints:
4.0026
i have this assignment in a basic programming course where i need to transform this code using while loop instead of for loop, but i dont know how to doit
this is my code so far
def read_txt(file_txt):
file = open(file_txt, "r")
lines = file.readlines()
file.close()
return lines
file_txt = input("file: ")
lines = read_txt(file_txt)
for l in lines:
asd = l.split(",")
length = len(asd)
score = 0
for i in range(1, length):
score += int(asd[i])
average = score / (length-1)
print(asd[0], average)
file text is like this
edward,4,3,1,2
sara,5,4,1,0
def read_txt(file_txt):
file = open(file_txt, "r")
lines = file.readlines()
file.close()
return lines
file_txt = input("file: ")
lines = read_txt(file_txt)
lines.reverse()
while lines:
l = lines.pop()
asd = l.split(",")
length = len(asd)
score = 0
i = 1
while i < length:
score += int(asd[i])
i += 1
average = score / (length-1)
print(asd[0], average)
Now in this while loop, it will iterate through lines until lines is empty. it will pop out items one by one.
For loops are more suitable for iterating over lines in files than while loops. Few improvements here are, (1) use the builtin sum instead of manually adding up scores, and (2) don't read all lines in file at once if the files are too big.
file_txt = input("file: ")
with open(file_txt) as f:
while True:
line = f.readline()
if not line:
break
name, scores = line.split(',', maxsplit=1)
scores = scores.split(',')
avg = sum(int(s) for s in scores) / len(scores)
print(f'{name} {avg}')
As you see above the check for if not line to determine if we have reached the end of file in a while loop, this is not needed in for loop as that implements the __iter__ protocol.
Python 3.8 walrus operator makes that slightly easier with::
file_txt = input("file: ")
with open(file_txt) as f:
while line := f.readline():
name, scores = line.split(',', maxsplit=1)
scores = scores.split(',')
avg = sum(int(s) for s in scores) / len(scores)
print(f'{name} {avg}')
The following gives the exact same output without using any for loop.
filename = input("file: ")
with open(filename) as f:
f = f.readlines()
n = []
while f:
v = f.pop()
if v[-1] == '\n':
n.append(v.strip('\n'))
else:
n.append(v)
d = {}
while n:
v = n.pop()
v = v.split(',')
d[v[0]] = v[1:]
d_k = list(d.keys())
d_k.sort(reverse=True)
while d_k:
v = d_k.pop()
p = d[v]
n = []
while p:
a = p.pop()
a = int(a)
n.append(a)
print(str(v), str(sum(n)/len(n)))
Output:
edward 2.5
sara 2.5
I have this code
with open(newconfig, 'r') as file1: # New File
with open(goldconfig, 'r') as file2: # Standard File
difference = set(file1).difference(file2)
difference.discard('\n')
diff_file = input("INFO: Select what to name the difference(s) : ")
with open(diff_file, 'w') as file_out:
for line in difference:
file_out.write("** WARNING: Difference found in New Config:\n " + line + "\n")
print("WARNING: Difference in file: " + line)
print("\n\n")
print("INFO: Difference File Created: " + diff_file)
but I want to ignore if the file has the same word, but on different lines
so for example
List one:
TOM123
TOM1234
TOM12345
List Two:
TOMA
TOMB
TOM123
TOM1234
TOM12345
Difference:
TOMA
TOMB
If you want to get a line of text into a set, you can do something like this:
text = 'TOM1234 TOM1234 TOM12345 TOM123'
a = set([word for word in text.split()])
print(a)
Output
{'TOM123', 'TOM1234', 'TOM12345'}
If you want to find the items that are only in one of the sets, use symmetric_difference.
a = set(['TOM123', 'TOM1234', 'TOM12345', 'TOM5'])
b = set(['TOMA', 'TOMB', 'TOM123', 'TOM1234', 'TOM12345'])
difference = a ^ b
print(difference)
Output
{'TOM5', 'TOMA', 'TOMB'}
you can try this:
def open_file_and_return_list(file_path):
list = []
with open(file_path, 'r') as f:
line = f.readline()
while line:
list.append(line)
line = f.readline()
return list
def clean_new_line(list):
for i in range(len(list)):
if "\n" in list[i]:
list[i] = list[i].replace("\n", "")
return list
if __name__ == "__main__":
list1 = open_file_and_return_list(r"path\File1.txt")
list2 = open_file_and_return_list(r"path\File2.txt")
list1 = clean_new_line(list1)
list2 = clean_new_line(list2)
diff = []
for obj in list1:
if obj not in list2:
diff.append(obj)
for obj in list2:
if obj not in list1:
diff.append(obj)
print(diff)
I want to return a dictionary that a file contains. What I have is this code:
def read_report(filename):
new_report = {}
input_filename = open(filename)
for line in input_filename:
lines = line[:-1]
new_report.append(lines)
input_filename.close()
return new_report
It says I can't append to a dictionary. So how would I go with adding lines from the file into the dictionary? Let's say my filename is this:
shorts: a, b, c, d
longs: a, b, c, d
mosts: a
count: 11
avglen: 1.0
a 5
b 3
c 2
d 1
I'm assuming the last lines of your files (the ones that don't contain :) are to be ignored.
from collections import defaultdict
d = defaultdict(list)
with open('somefile.txt') as f:
for line in f:
if ':' in line:
key, val = line.split(':')
d[key.strip()] += val.rstrip().split(',')
def read_line(filename):
list = []
new_report = {}
file_name = open(filename)
for i in file_name:
list.append(i[:-1])
for i in range(len(list)):
new_report[i] = list[i]
file_name.close()
return new_report
if you rewrite your input file to have uniform lines like the first and the second, you could try this:
EDIT: modified code to support also lines with space separator instead of colon (:)
def read_report(filename):
new_report = {}
f = open(filename)
for line in f:
if line.count(':') == 1:
key, value = line.split(':')
else:
key, value = line.split(' ')
new_report[key] = value.split(',')
f.close()
return new_report
I have a file looking like this:
732772 scaffold-3 G G A
732772 scaffold-2 G G A
742825 scaffold-3 A A G
776546 scaffold-3 G A G
776546 scaffold-6 G A G
I'm interested in using column 2 as my key, and output in a way that: having a unique key, and assiociated with it values.
in other words, if name in column 2 occurs more than once, output it only once, therefore the output should be:
scaffold-3
732772 G G A
742825 A A G
776546 G A G
scaffold-2
732772 G G A
scaffold-6
776546 G A G
I wrote sth like this:
res = open('00test','r')
out = open('00testresult','w')
d = {}
for line in res:
if not line.startswith('#'):
line = line.strip().split()
pos = line[0]
name = line[1]
call = line[2]
father = line[3]
mother = line[4]
if not (name in d):
d[name] = []
d[name].append({'pos':pos,'call':call,'father':father,'mother':mother})
but I have no idea, how to output it in a way I described above.
Any help will be nice
EDIT:
This is fully working code, that solved the problem:
res = open('00test','r')
out = open('00testresult','w')
d = {}
for line in res:
if not line.startswith('#'):
line = line.strip().split()
pos = line[0]
name = line[1]
call = line[2]
father = line[3]
mother = line[4]
if not (name in d):
d[name] = []
d[name].append({'pos':pos,'call':call,'father':father,'mother':mother})
for k,v in d.items():
out.write(str(k)+'\n')
for i in v:
out.write(str(i['pos'])+'\t'+str(i['call'])+'\t'+str(i['father'])+'\t'+str(i['mother'])+'\n')
out.close()
Now that you have your dictionary, loop over the items and write to a file:
keys = ('pos', 'call', 'father', 'mother')
with open(outputfilename, 'w') as output:
for name in d:
output.write(name + '\n')
for entry in d['name']:
output.write(' '.join([entry[k] for k in keys]) + '\n')
You may want to use a collections.defaultdict() object instead of a regular dictionary for d:
from collections import defaultdict
d = defaultdict(list)
and remove the if not (name in d): d[name] = [] lines altogether.