I have a file looking like this:
732772 scaffold-3 G G A
732772 scaffold-2 G G A
742825 scaffold-3 A A G
776546 scaffold-3 G A G
776546 scaffold-6 G A G
I'm interested in using column 2 as my key, and output in a way that: having a unique key, and assiociated with it values.
in other words, if name in column 2 occurs more than once, output it only once, therefore the output should be:
scaffold-3
732772 G G A
742825 A A G
776546 G A G
scaffold-2
732772 G G A
scaffold-6
776546 G A G
I wrote sth like this:
res = open('00test','r')
out = open('00testresult','w')
d = {}
for line in res:
if not line.startswith('#'):
line = line.strip().split()
pos = line[0]
name = line[1]
call = line[2]
father = line[3]
mother = line[4]
if not (name in d):
d[name] = []
d[name].append({'pos':pos,'call':call,'father':father,'mother':mother})
but I have no idea, how to output it in a way I described above.
Any help will be nice
EDIT:
This is fully working code, that solved the problem:
res = open('00test','r')
out = open('00testresult','w')
d = {}
for line in res:
if not line.startswith('#'):
line = line.strip().split()
pos = line[0]
name = line[1]
call = line[2]
father = line[3]
mother = line[4]
if not (name in d):
d[name] = []
d[name].append({'pos':pos,'call':call,'father':father,'mother':mother})
for k,v in d.items():
out.write(str(k)+'\n')
for i in v:
out.write(str(i['pos'])+'\t'+str(i['call'])+'\t'+str(i['father'])+'\t'+str(i['mother'])+'\n')
out.close()
Now that you have your dictionary, loop over the items and write to a file:
keys = ('pos', 'call', 'father', 'mother')
with open(outputfilename, 'w') as output:
for name in d:
output.write(name + '\n')
for entry in d['name']:
output.write(' '.join([entry[k] for k in keys]) + '\n')
You may want to use a collections.defaultdict() object instead of a regular dictionary for d:
from collections import defaultdict
d = defaultdict(list)
and remove the if not (name in d): d[name] = [] lines altogether.
Related
I got two files open and read like this
file1
Name, day1
Omi Aiz,90
Carin Jack,92
Swit Han,88
file2
Name, Day2
Omi Aiz, 20
Carin Jack,30
Swit Han,40
How to combine these into a single dictionary:
d={'OA':[90,20],'CJ':[92,30],'SH':[88,40]}
And find the average of day1 and day2
d={'OA':55,'CJ':61,'SH':64}
this should do the first part
for the second, you just loop through and do average
def get_letters(name):
names = name.split()
return f'{names[0][0]}{names[1][0]}'
with open('file1', 'r') as f1, open('file2', 'r') as f2:
d = {}
for l1, l2 in zip(f1[1:], f2[1:]):
l1 = l1.split(',')
l2 = l2.split(',')
letters = get_letters(l1[0])
d.append({letters:[l1[1], l2[1]]})
d={}
def read(file):
with open(file) as file:
lines = file.read().split('\n')
for line in lines:
if line.startswith('Name,'):
lines.remove(line)
return lines
def sep(name):
return ''.join((v[0] for v in name.split(' ')))
d = {}
for line in read('file1.txt'):
if line:
name,day=line.split(',')
k = sep(name)
d[k] = [int(day)]
for line in read('file2.txt'):
if line:
name,day=line.split(',')
k = sep(name)
if k in d:
d[k].append(int(day))
print(d)
I want to define a function, that reads a table of a textfile as a dictionary and than use it for returning specific values. The keys are chemical symbols (like "He" for Helium,...). The values return their specific atom masses.
I don't understand, what I have to do...
The first five lines of the textfile read:
H,1.008
He,4.0026
Li,6.94
Be,9.0122
B,10.81
Here are my attempts: (I don't know where to place the parameter key so that I can define it)
def read_masses():
atom_masses = {}
with open["average_mass.csv") as f:
for line in f:
(key, value) = line.split(",")
atom_masses[key] = value
return(value)
m = read_masses("average_mass.csv)
print(m["N"]) #for the mass of nitrogen ```
once return has called, the code below it doesn't execute. What you need to return is the atom_masses not value and you have to place it outside the for loop
def read_masses(file):
atom_masses = {}
with open(file) as f:
for line in f:
(key, value) = line.split(",")
atom_masses[key] = value
return (atom_masses)
m = read_masses("average_mass.csv")
print(m["H"])
>>> 1.008
Try:
def read_masses(name):
data = {}
with open(name, "r") as f_in:
for line in map(str.strip, f_in):
if line == "":
continue
a, b = map(str.strip, line.split(",", maxsplit=1))
data[a] = float(b)
return data
m = read_masses("your_file.txt")
print(m.get("He"))
Prints:
4.0026
how can i read a specific line which starts in "#" from file in python and
set that line as a key in a dictionary (without the "#") and set all the lines after that line until the next "#" as a value is the dictionary
please help me
here is the file :
from collections import defaultdict
key = 'NOKEY'
d = defaultdict(list)
with open('thefile.txt', 'r') as f:
for line in f:
if line.startswith('#'):
key = line.replace('#', '')
continue
d[key].append(line)
Your dictionary will have a list of lines under each key. All lines that come before the first line starting with '#' would be stored under the key 'NOKEY'.
You could make use of Python's groupby function as follows:
from itertools import groupby
d = {}
key = ''
with open('input.txt', 'r') as f_input:
for k, g in groupby(f_input, key=lambda x: x[0] == '#'):
if k:
key = next(g).strip(' #\n')
else:
d[key] = ''.join(g)
print d
This would give you the following kind of output:
{'The Piper at the gates of dawn': '*Lucifer sam....\nsksdlkdfslkj\ndkdkfjoiupoeri\nlkdsjforinewonre\n', 'A Saucerful of Secrets': '*Let there be\nPeople heard him say'}
Tested using Python 2.7.9
A pretty simple version
filename = 'test'
results = {}
with open(filename, 'r') as f:
while (1):
text = f.readline()
if (text == ''):
break
elif (text[0] == "#"):
key = text
results[key] = ''
else:
results[key] += text
From (ignoring additional blank lines, a bi-product of the Answer formatting):
#The Piper at the gates of dawn
*Lucifer sam....
sksdlkdfslkj
dkdkfjoiupoeri
lkdsjforinewonre
# A Saucerful of Secrets
*Let there be
People heard him say
Produces:
{'#The Piper at the gates of dawn\n': '*Lucifer sam....\nsksdlkdfslkj\ndkdkfjoiupoeri\nlkdsjforinewonre\n', '# A Saucerful of Secrets \n': '*Let there be\nPeople heard him say\n'}
I want to return a dictionary that a file contains. What I have is this code:
def read_report(filename):
new_report = {}
input_filename = open(filename)
for line in input_filename:
lines = line[:-1]
new_report.append(lines)
input_filename.close()
return new_report
It says I can't append to a dictionary. So how would I go with adding lines from the file into the dictionary? Let's say my filename is this:
shorts: a, b, c, d
longs: a, b, c, d
mosts: a
count: 11
avglen: 1.0
a 5
b 3
c 2
d 1
I'm assuming the last lines of your files (the ones that don't contain :) are to be ignored.
from collections import defaultdict
d = defaultdict(list)
with open('somefile.txt') as f:
for line in f:
if ':' in line:
key, val = line.split(':')
d[key.strip()] += val.rstrip().split(',')
def read_line(filename):
list = []
new_report = {}
file_name = open(filename)
for i in file_name:
list.append(i[:-1])
for i in range(len(list)):
new_report[i] = list[i]
file_name.close()
return new_report
if you rewrite your input file to have uniform lines like the first and the second, you could try this:
EDIT: modified code to support also lines with space separator instead of colon (:)
def read_report(filename):
new_report = {}
f = open(filename)
for line in f:
if line.count(':') == 1:
key, value = line.split(':')
else:
key, value = line.split(' ')
new_report[key] = value.split(',')
f.close()
return new_report
I have the below code:
datedict = defaultdict(set)
with open('d:/info.csv', 'r') as csvfile:
filereader = csv.reader(csvfile, 'excel')
#passing the header
read_header = False
start_date=date(year=2009,month=1,day=1)
#print((seen_date - start_date).days)
tdic = {}
for row in filereader:
if not read_header:
read_header = True
continue
# reading the rest rows
name,id,firstseen = row[0],row[1],row[3]
try:
seen_date = datetime.datetime.strptime(firstseen, '%d/%m/%Y').date()
deltadays = (seen_date-start_date).days
deltaweeks = deltadays/7 + 1
key = name +'-'+id
currentvalue = tdic.get(key, [])
currentvalue.append(deltaweeks)
tdic[key] = currentvalue
except ValueError:
print('Date value error')
pass
tdic = dict((name, max(weeks) - min(weeks) + 1) for name, weeks in tdic.iteritems())
pprint.pprint(tdic)
in which I get the below result:
{'Mali-2': 20,
'Gooki-3': 6,
'Piata-4': 6,
'Goerge-5': 4,
'Samoo-6': 1,
'Marria-7': 2}
Now I would like to write and print the three items, name,id and weeks as separate columns in an excel file. Anyone knows how it is possible?
>>> with open('out.csv', 'w') as f:
w = csv.writer(f)
for k, v in tdic.iteritems():
name, id_ = k.split('-')
weeks = v
w.writerow([name, id_, weeks])
>>> with open('out.csv') as f:
print f.read()
Piata,4,6
Mali,2,20
Goerge,5,4
Gooki,3,6
Samoo,6,1
Marria,7,2
I however don't like the way you have done this, here are some suggestions for your code:
key = name +'-'+id
Instead of using string manipulation to create a key, use a tuple:
key = (name, id)
Change this line:
tdic = dict((name, max(weeks) - min(weeks) + 1) for name, weeks in tdic.iteritems())
to just say
tdic = dict((key, max(weeks) - min(weeks) + 1) for key, weeks in tdic.iteritems())
since now it is a key of (name, id_) we should reflect that (it's a minor thing but important)
Then the above code would simply be
>>> with open('out.csv', 'w') as f:
w = csv.writer(f)
for (name, id_), weeks in tdic.iteritems():
w.writerow([name, id_, weeks])
You can define a dict of dict like this:
a_dict = {key: { anotherKey: value}}
Or if the dict already exists:
a_dict[key] = {anotherkey: value}
print a_dict[key][anotherkey]