This code worked fine and it prints result in this format.
I need results in a nested dict format like this.
data = {
'year': {
'male': {'Q1': 1, 'Q2': 1, 'Q3': 1, 'Q4': 1, },
'female': { 'Q1': 1, 'Q2': 1, 'Q3': 1, 'Q4': 1, }
}
}
The code:
import csv
results = {'males': {}, 'females': {}}
with open('1000 Records.csv') as csv_file:
csv_reader = csv.reader(csv_file)
for row in csv_reader:
year_of_joining = int(row[17])
quarter_of_joining = row[15]
gender = 'males' if row[5] == 'M' else 'females'
if year_of_joining not in results[gender]:
results[gender][year_of_joining] = {f'Q{i + 1}': 0 for i in range(4)}
results[gender][year_of_joining][quarter_of_joining] += 1
years = list(results['males'].keys()) + list(results['females'].keys())
years = sorted(list(set(years)))
for year in years:
count = [results['males'].get(year, 0), results['females'].get(year, 0)]
print("Male's and Female's: %s: %s" % (year, count))
This is working solution:
import csv
import collections
data= {}
with open('1000 Records.csv') as csv_file:
csv_reader = csv.reader(csv_file)
for row in csv_reader:
year_of_joining = int(row[17])
quarter_of_joining = row[15]
gender = 'male' if row[5] == 'M' else 'female'
if year_of_joining not in data:
data[year_of_joining]={'male': {f'Q{i + 1}': 0 for i in range(4)}, 'female': {f'Q{i + 1}': 0 for i in range(4)}}
data[year_of_joining][gender][quarter_of_joining] += 1
data = collections.OrderedDict(sorted(data.items())) # sorting
for year in data:
print("Male's and Female's: %s: %s" % (year, data[year]))
The only difference in code above is that it gives output in slightly different format, but I suspect it may be what you wanted in the first place:
Male's and Female's: 1993: {'male': {'Q1': 0, 'Q2': 0, 'Q3': 0, 'Q4': 1}, 'female': {'Q1': 0, 'Q2': 0, 'Q3': 0,
'Q4': 0}}
Male's and Female's: 1998: {'male': {'Q1': 0, 'Q2': 0, 'Q3': 0, 'Q4': 1}, 'female': {'Q1': 0, 'Q2': 0, 'Q3': 0,
'Q4': 0}}
Male's and Female's: 1999: {'male': {'Q1': 0, 'Q2': 1, 'Q3': 1, 'Q4': 0}, 'female': {'Q1': 0, 'Q2': 0, 'Q3': 0,
'Q4': 1}}
Male's and Female's: 2001: {'male': {'Q1': 0, 'Q2': 0, 'Q3': 0, 'Q4': 0}, 'female': {'Q1': 1, 'Q2': 0, 'Q3': 0,
'Q4': 0}}
Male's and Female's: 2003: {'male': {'Q1': 0, 'Q2': 0, 'Q3': 0, 'Q4': 0}, 'female': {'Q1': 0, 'Q2': 0, 'Q3': 0,
'Q4': 1}}
If not, let me know, I will modify it.
You are close. Outside of your for year in years keep a dictionary that stores the running results of yearly counts:
data = {}
for year in years:
data[year] = {'male':results['males'].get(year, 0),
'female':results['females'].get(year, 0)}
I encountered a few errors in the code that supposedly "worked fine", so I fixed them too and optimized things a bit in the process. Below is the result using a simple sample CSV file I created for testing purposes:
import csv
from pprint import pprint
#YOJ, QOJ, GEN = 17, 15, 3
YOJ, QOJ, GEN = 0, 1, 2 # For testing since no sample CSV provided.
results = {'males': {}, 'females': {}}
with open('1000 Records.csv') as csv_file:
for row in csv.reader(csv_file):
year_of_joining = int(row[YOJ])
quarter_of_joining = int(row[QOJ])
gender = 'males' if row[GEN] == 'M' else 'females'
if year_of_joining not in results[gender]:
results[gender][year_of_joining] = {f'Q{i + 1}': 0 for i in range(4)}
QOJ_key = f'Q{quarter_of_joining+1}' # Convert to dict key format.
results[gender][year_of_joining][QOJ_key] += 1
years = sorted(results['males'].keys() | results['females'].keys())
data = {year: {'males': results['males'][year],
'females': results['females'][year]}
for year in years}
pprint(data, sort_dicts=False)
Sample output:
{1980: {'males': {'Q1': 0, 'Q2': 1, 'Q3': 1, 'Q4': 0},
'females': {'Q1': 0, 'Q2': 0, 'Q3': 1, 'Q4': 0}},
1981: {'males': {'Q1': 0, 'Q2': 0, 'Q3': 1, 'Q4': 0},
'females': {'Q1': 0, 'Q2': 0, 'Q3': 0, 'Q4': 2}}}
I have this list of names: [Frank, Sam, Kevin, Jack]
Is it possible to create a dictionary using the names in the list to create something like this?
'Frank' : {'Sam': 0, 'Kevin': 0, 'Jack': 0},
'Sam' : {'Frank': 0, 'Kevin': 0, 'Jack': 0},
'Kevin' : {'Frank': 0, 'Sam': 0, 'Jack': 0}
'Jack' : {'Frank': 0, 'Sam': 0, 'Kevin': 0}
I want to know if it's possible to iterate through the list, pick the first name and then create a dictionary with it, with the other members in the list as keys and 0 as the default value. And then repeat it for the other elements in the list as well.
I was thinking of using something like this.
my_dynamic_vars = dict()
my_dynamic_vars.update({string: dict()})
Any help would be much appreciated.
You can use nested dictionary comprehensions:
>>> lst = ['Frank', 'Sam', 'Kevin', 'Jack']
>>> dct = {x:{y:0 for y in lst if y != x} for x in lst}
>>> dct
{'Frank': {'Kevin': 0, 'Sam': 0, 'Jack': 0}, 'Kevin': {'Frank': 0, 'Jack': 0, 'Sam': 0}, 'Sam': {'Frank': 0, 'Jack': 0, 'Kevin': 0}, 'Jack': {'Frank': 0, 'Kevin': 0, 'Sam': 0}}
>>>
>>> # Just to demonstrate
>>> from pprint import pprint
>>> pprint(dct)
{'Frank': {'Jack': 0, 'Kevin': 0, 'Sam': 0},
'Jack': {'Frank': 0, 'Kevin': 0, 'Sam': 0},
'Kevin': {'Frank': 0, 'Jack': 0, 'Sam': 0},
'Sam': {'Frank': 0, 'Jack': 0, 'Kevin': 0}}
>>>
ummm
d= {}
names = ["Frank","Sam","Kevin","Jack"]
for name in names:
d[name] = dict.fromkeys(set(names).difference([name]),0)
print d
is probably how I would do it ..
names = ['Frank', 'Sam', 'Kevin', 'Jack']
d = dict.fromkeys(names, 0)
names_dict = {}
for name in names:
temp = d.copy()
del temp[name]
names_dict.update([(name, temp)])
output:
>>> for d in names_dict:
>>> print d
'Frank': {'Jack': 0, 'Kevin': 0, 'Sam': 0}
'Sam': {'Frank': 0, 'Jack': 0, 'Kevin': 0}
'Kevin': {'Frank': 0, 'Jack': 0, 'Sam': 0}
'Jack': {'Frank': 0, 'Kevin': 0, 'Sam': 0}
names = ['Frank', 'Sam', 'Kevin', 'Jack']
d = {name: dict.fromkeys([x for x in names if x != name], 0)
for name in names}
from pprint import pprint
pprint(d)
Output
{'Frank': {'Jack': 0, 'Kevin': 0, 'Sam': 0},
'Jack': {'Frank': 0, 'Kevin': 0, 'Sam': 0},
'Kevin': {'Frank': 0, 'Jack': 0, 'Sam': 0},
'Sam': {'Frank': 0, 'Jack': 0, 'Kevin': 0}}
This question already has answers here:
How to transpose a dataset in a csv file?
(7 answers)
Closed 2 years ago.
What I have is a long list of codes that involves reading different files and in the end putting everything into different .csv
This is all my codes
import csv
import os.path
#open files + readlines
with open("C:/Users/Ivan Wong/Desktop/Placement/Lists of targets/Mouse/UCSC to Ensembl.csv", "r") as f:
reader = csv.reader(f, delimiter = ',')
#find files with the name in 1st row
for row in reader:
graph_filename = os.path.join("C:/Python27/Scripts/My scripts/Selenoprotein/NMD targets",row[0]+"_nt_counts.txt.png")
if os.path.exists(graph_filename):
y = row[0]+'_nt_counts.txt'
r = open('C:/Users/Ivan Wong/Desktop/Placement/fp_mesc_nochx/'+y, 'r')
k = r.readlines()
r.close
del k[:1]
k = map(lambda s: s.strip(), k)
interger = map(int, k)
import itertools
#adding the numbers for every 3 rows
def grouper(n, iterable, fillvalue=None):
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return itertools.izip_longest(*args, fillvalue=fillvalue)
result = map(sum, grouper(3, interger, 0))
e = row[1]
cDNA = open('C:/Users/Ivan Wong/Desktop/Placement/Downloaded seq/Mouse/cDNA.txt', 'r')
seq = cDNA.readlines()
# get all lines that have a gene name
lineNum = 0;
lineGenes = []
for line in seq:
lineNum = lineNum +1
if '>' in line:
lineGenes.append(str(lineNum))
if '>'+e in line:
lineBegin = lineNum
cDNA.close
# which gene is this
index1 = lineGenes.index(str(lineBegin))
lineEnd = lineGenes[index1+1]
# linebegin and lineEnd now give you, where to look for your sequence, all that
# you have to do is to read the lines between lineBegin and lineEnd in the file
# and make it into a single string.
lineEnd = lineGenes[index1+1]
Lastline = int(lineEnd) -1
# in your code you have already made a list with all the lines (q), first delete
# \n and other symbols, then combine all lines into a big string of nucleotides (like this)
qq = seq[lineBegin:Lastline]
qq = map(lambda s: s.strip(), qq)
string = ''
for i in range(len(qq)):
string = string + qq[i]
# now you want to get a list of triplets, again you can use the for loop:
# first get the length of the string
lenString = len(string);
# this is your list codons
listCodon = []
for i in range(0,lenString/3):
listCodon.append(string[0+i*3:3+i*3])
with open(e+'.csv','wb') as outfile:
outfile.writelines(str(result)+'\n'+str(listCodon))
My problem here is the file produced looks like this:
0 0 0
'GCA' 'CTT' 'GGT'
I want to make it like this:
0 GCA
0 CTT
0 GGT
What can I do in my code to achieve this?
print result:
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 2, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 3, 3, 0, 3, 1, 2, 1, 2, 1, 0, 1, 0, 1, 2, 1, 0, 5, 0, 0, 0, 0, 6, 0, 1, 0, 0, 2, 0, 1, 0, 0, 1, 1, 0, 1, 6, 34, 35, 32, 1, 1, 0, 4, 1, 0, 1, 0, 0, 0, 0, 1, 6, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
print listCodon:
['gtt', 'gaa', 'aca', 'gag', 'aca', 'tgt', 'tct', 'gga', 'gat', 'gag', 'ctg', 'tgg', 'gca', 'gaa', 'gga', 'cag', 'gcc', 'taa', 'gca', 'cag', 'gca', 'gca', 'gag', 'ctt', 'tga', 'tct', 'ctt', 'ggt', 'gat', 'cgg', 'tgg', 'ggg', 'atc', 'cgg', 'tgg', 'cct', 'agc', 'ttg', 'tgc', 'caa', 'gga', 'agc', 'tgc', 'tca', 'gct', 'ggg', 'aaa', 'gaa', 'ggt', 'ggc', 'tgt', 'ggc', 'tga', 'cta', 'tgt', 'gga', 'acc', 'ttc', 'tcc', 'ccg', 'agg', 'cac', 'caa', 'gtg', 'ggg', 'cct', 'tgg', 'tgg', 'cac', 'ctg', 'tgt', 'caa', 'cgt', 'ggg', 'ttg', 'cat', 'acc', 'caa', 'gaa', 'gct', 'gat', 'gca', 'tca', 'ggc', 'tgc', 'act', 'gct', 'ggg', 'ggg', 'cat', 'gat', 'cag', 'aga', 'tgc', 'tca', 'cca', 'cta', 'tgg', 'ctg', 'gga', 'ggt', 'ggc', 'cca', 'gcc', 'tgt', 'cca', 'aca', 'caa', 'ctg', 'gtg', 'aga', 'gag', 'aag', 'ccc', 'ttg', 'ccc', 'tct', 'gca', 'ggt', 'ccc', 'att', 'gaa', 'agg', 'aga', 'ggt', 'ttg', 'ctc', 'tct', 'gcc', 'act', 'cat', 'ctg', 'taa', 'ccg', 'tga', 'gct', 'ttt', 'cca', 'ccc', 'ggc', 'ctc', 'ctc', 'ttt', 'gat', 'ccc', 'aga', 'ata', 'atg', 'act', 'ctg', 'aga', 'ctt', 'ctt', 'atg', 'tat', 'gaa', 'taa', 'atg', 'cct', 'ggg', 'cca', 'aaa', 'acc']
picture on the left is what Marek's code helped me to achieve, I want to make an improvement so it arrange like the picture on the right
You can use zip() to zip together two iterators. So if you have
result = [0, 0, 0, 0, 0]
listCodons = ['gtt', 'gaa', 'aca', 'gag', 'aca']
then you can do
>>> list(zip(result, listCodons))
[(0, 'gtt'), (0, 'gaa'), (0, 'aca'), (0, 'gag'), (0, 'aca')]
or, for your example:
with open(e+'.csv','w') as outfile:
out = csv.writer(outfile)
out.writerows(zip(result, listCodons))
try this:
proper_result = '\n'.join([ '%s %s' % (nr, codon) for nr, codon in zip(result, listCodon) ] )
Edit (codons split into separate columns):
proper_result = '\n'.join(' '.join([str(nr),] + list(codon)) for nr, codon in zip(nrs, cdns))
Edit (comma separated values):
proper_result = '\n'.join('%s, %s' % (nr, codon) for nr, codon in zip(result, listCodon))