I have a dictionary as below which has repeated item name, the difference is the value of each part name. i want to write those info to csv with expected result is :
import csv
dict={
'test':['part_name','test1','test2','test3','part_name','test1','test2','test3'],
'value':['partA','12','55','109','partB','14','54','106'],
'lcl':['lcl','10','50','100','lcl','10','50','100'],
'ucl':['ucl','18','60','115','ucl','18','60','115'],
}
tmp={}
for k,v1,v2,v3 in zip(dict["test"],dict["value"],dict["lcl"],dict["ucl"]):
tmp.setdefault(k, []).append([v1,v2,v3])
print(tmp)
with open('table.csv','w') as f:
writer_inline = csv.writer(f, delimiter=',', lineterminator=',')
writer = csv.writer(f, delimiter=',', lineterminator='\n')
writer.writerow(tmp.keys())
writer.writerows(zip(*tmp.values()))
Try the below code to get your desired csv. I would recommend not to use dict as name for your dictionary. I have changed it to d:
import csv
d = {
'test':['part_name','test1','test2','test3','part_name','test1','test2','test3'],
'value':['partA','12','55','109','partB','14','54','106'],
'lcl':['lcl','10','50','100','lcl','10','50','100'],
'ucl':['ucl','18','60','115','ucl','18','60','115'],
}
headers = d['test'][:len(set(d['test']))]
size = len(headers)
d.pop('test', None)
parts = []
for i in d:
parts += [[d[i][j:(j+size)] for j in range(0, len(d['value']), size)]]
rows = []
for part in list(zip(*parts)):
rows += part
with open('table.csv','w') as f:
writer = csv.writer(f, delimiter=',', lineterminator='\n')
writer.writerow(headers)
writer.writerows(rows)
Related
I have 2 CSVs which are New.csv and Old.csv shown below:
Old.csv
longName,shortName,eventType,number,severity
ACTAGENT201,ACAT201,RES,1,INFO
ACTAGENT202,ACAT202,RES,2,ALERT
ACODE801,AC801,ADMIN,1,MINOR
ACODE802,AC802,ADMIN,2,MINOR
ACODE102,AC102,COMM,2,CRITICAL
ACODE103,AC103,COMM,3,CRITICAL
ACODE104,AC104,COMM,4,CRITICAL
ACODE105,AC105,COMM,5,CRITICAL
ACODE106,AC106,COMM,6,CRITICAL
New.csv
longName,shortName,eventType,number,severity
ACTAGENT201,ACAT201,RES,1,INFO
ACTAGENT202,ACAT202,RES,2,ALERT
ACODE801,AC801,ADMIN,1,MINOR
ACODE802,AC802,ThisHasBeenChanged,2,MINOR
ACODE102,AC102,COMM,2,CRITICAL
ACODE103,AC103,COMM,3,CRITICAL
ACODE104,AC104,COMM,4,THISHASBEENCHANGED
ACODE105,AC105,COMM,5,CRITICAL
ACODE106,AC106,COMM,6,CRITICAL
If there is data in one of the columns in the row that has been modified/changed between the old.csv and the new.csv then that whole row should be appended to the changes.csv like this with each column from old.csv and new.csv beside each other:
I know how to find new and deleted items in the csv, but could not figure out how to get the modified items. Code below:
import csv
def DeletedItems(old_csv, new_csv, changes_csv):
with open(new_csv, newline="", encoding="utf8") as new_fp:
csv_reader = csv.reader(new_fp)
csv_headings = next(csv_reader)
new_long_names = {row[0] for row in csv.reader(new_fp)}
with open(old_csv, newline="", encoding="utf8") as old_fp:
with open(changes_csv, "a", newline="", encoding="utf8") as changes_fp:
writer = csv.writer(changes_fp)
writer.writerow("")
for row in csv.reader(old_fp):
if row[0] not in new_long_names:
writer.writerow(row)
def NewItems(old_csv, new_csv, changes_csv):
with open(old_csv, newline="", encoding="utf8") as old_fp:
csv_reader = csv.reader(old_fp)
csv_headings = next(csv_reader)
old_long_names = {row[0] for row in csv.reader(old_fp)}
with open(new_csv, newline="", encoding="utf8") as new_fp:
with open(changes_csv, "w", newline="", encoding="utf8") as changes_fp:
writer = csv.writer(changes_fp)
for row in csv.reader(new_fp):
if row[0] not in old_long_names:
writer.writerow(row)
NewItems("old.csv", "new.csv", "changes.csv")
DeletedItems("old.csv", "new.csv", "changes.csv")
First, read both CSV files into a dictionary, using the longName values as keys.
import csv
with open(old_csv_file, "r") as fh:
reader = csv.reader(fh)
old_csv = {row[0]: row for row in reader}
with open(new_csv_file, "r") as fh:
reader = csv.reader(fh)
new_csv = {row[0]: row for row in reader}
Then, it's easy to find newly added and deleted keys using set operations.
old_longNames = set(old_csv.keys())
new_longNames = set(new_csv.keys())
# common: set intersection
common_longNames = old_longNames.intersection(new_longNames)
# removed: whatever's in old but not in new
removed_longNames = old_longNames - new_longNames
# added: whatever's in new but not in old
added_longNames = new_longNames - old_longNames
Finally, iterate over the common set to find where there are changes:
changed_longNames = []
for key in common_longNames:
old_row = old_csv[key]
new_row = new_csv[key]
# if any(o != n for o, n in zip(old_row, new_row)):
if old_row != new_row:
# this row has at least one column changed. Do whatever
print(f"LongName {key} has changes")
changed_longNames.append(key)
Or, as a list comprehension:
changed_longNames = [key for key in common_longNames if old_csv[key] != new_csv[key]]
Writing everything to a new csv file is also fairly trivial. Note that the sets don't preserve the order, so you might not get the result in the same order.
with open("deleted.csv", "w") as fh:
writer = csv.writer(fh)
for key in removed_longNames:
writer.writerow(old_csv[key])
with open("inserted.csv", "w") as fh:
writer = csv.writer(fh)
for key in added_longNames:
writer.writerow(new_csv[key])
with open("changed.csv", "w") as fh:
writer = csv.writer(fh)
for key in changed_longNames:
old_row = old_csv[key]
new_row = new_csv[key]
merged_row = []
for oi, ni in zip(old_row, new_row):
merged_row.append(oi)
merged_row.append(ni)
writer.writerow(merged_row)
This is one file result.csv:
M11251TH1230
M11543TH4292
M11435TDS144
This is another file sample.csv:
M11435TDS144,STB#1,Router#1
M11543TH4292,STB#2,Router#1
M11509TD9937,STB#3,Router#1
M11543TH4258,STB#4,Router#1
Can I write a Python program to compare both the files and if line in result.csv matches with the first word in the line in sample.csv, then append 1 else append 0 at every line in sample.csv?
import pandas as pd
d1 = pd.read_csv("1.csv",names=["Type"])
d2 = pd.read_csv("2.csv",names=["Type","Col2","Col3"])
d2["Index"] = 0
for x in d1["Type"] :
d2["Index"][d2["Type"] == x] = 1
d2.to_csv("3.csv",header=False)
Considering "1.csv" and "2.csv" are your csv input files and "3.csv" is the result you needed
The solution using csv.reader and csv.writer (csv module):
import csv
newLines = []
# change the file path to the actual one
with open('./data/result.csv', newline='\n') as csvfile:
data = csv.reader(csvfile)
items = [''.join(line) for line in data]
with open('./data/sample.csv', newline='\n') as csvfile:
data = list(csv.reader(csvfile))
for line in data:
line.append(1 if line[0] in items else 0)
newLines.append(line)
with open('./data/sample.csv', 'w', newline='\n') as csvfile:
writer = csv.writer(csvfile)
writer.writerows(newLines)
The sample.csv contents:
M11435TDS144,STB#1,Router#1,1
M11543TH4292,STB#2,Router#1,1
M11509TD9937,STB#3,Router#1,0
M11543TH4258,STB#4,Router#1,0
With only one column, I wonder why you made it as a result.csv. If it is not going to have any more columns, a simple file read operation would suffice. Along with converting the data from result.csv to dictionary will help in quick run as well.
result_file = "result.csv"
sample_file = "sample.csv"
with open(result_file) as fp:
result_data = fp.read()
result_dict = dict.fromkeys(result_data.split("\n"))
"""
You can change the above logic, in case you have very few fields on csv like this:
result_data = fp.readlines()
result_dict = {}
for result in result_data:
key, other_field = result.split(",", 1)
result_dict[key] = other_field.strip()
"""
#Since sample.csv is a real csv, using csv reader and writer
with open(sample_file, "rb") as fp:
sample_data = csv.reader(fp)
output_data = []
for data in sample_data:
output_data.append("%s,%d" % (data, data[0] in result_dict))
with open(sample_file, "wb") as fp:
data_writer = csv.writer(fp)
data_writer.writerows(output_data)
The following snippet of code will work for you
import csv
with open('result.csv', 'rb') as f:
reader = csv.reader(f)
result_list = []
for row in reader:
result_list.extend(row)
with open('sample.csv', 'rb') as f:
reader = csv.reader(f)
sample_list = []
for row in reader:
if row[0] in result_list:
sample_list.append(row + [1])
else:
sample_list.append(row + [0]
with open('sample.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerows(sample_list)
I have multiple text files with 4 columns (tab separated). Each file will have around 2000 rows. Using Python, how can i create a new file that looks like this?
file1column1 file1column4 file2column4 file3column4 ...fileNcolumn4
Thanks.
Here is the code i tried:
file_lists = ['file1.data', 'file2.data']
temp_data = []
for a_file in file_lists:
file_h = open(a_file)
a_list = []
csv_reader = csv.reader(file_h, delimiter='\t')
for row in csv_reader:
if afile == "file1.data":
a_list.extend([row[0], row[3]])
else:
a_list.append(row[3])
temp_data.append((n for n in a_list))
file_h.close()
with open('output.data', 'w') as output_file:
csv_writer = csv.writer(output_file, delimiter='\t')
for row in list(zip(*temp_data)):
csv_writer.writerow(row)
output_file.close()
Courtesy: Combining columns of multiple files in one file - Python
I am getting results in following format though:
file1column1 file2column4
file1column4 file2column4
file1column1 file2column4
file1column4 file2column4
What about:
file_lists = [('file1.data', 1), ('file1.data', 3), ('file2.data',3)]
temp_data = []
for a_file in file_lists:
file_h = open(a_file[0])
a_list = []
csv_reader = csv.reader(file_h, delimiter='\t')
for row in csv_reader:
a_list.append(row[a_file[1]])
temp_data.append((n for n in a_list))
file_h.close()
with open('output.data', 'w') as output_file:
csv_writer = csv.writer(output_file, delimiter='\t')
for row in list(zip(*temp_data)):
csv_writer.writerow(row)
You don't need to close file when you use with ... as
I have a bit of python code that produces a .csv file, however I don't know how to add column names, or a header row. Here is my code:
handle = open(sys.argv[1])
with open('protparams.csv', 'w') as fp:
writer = csv.writer(fp, delimiter=',')
for record in SeqIO.parse(handle, "fasta"):
seq = str(record.seq)
X = ProtParam.ProteinAnalysis(seq)
data = [seq,X.get_amino_acids_percent(),X.aromaticity(),X.gravy(),X.isoelectric_point(),X.secondary_structure_fraction(),X.molecular_weight(),X.instability_index()]
writer.writerow(data)
I have tried adding in something like:
writer = csv.writer(fp, delimiter=',',[seq,aa_percentage,aromaticity,gravy,isoelectric_point,secondary_structure_fraction,molecular_weight,instability_index])
but this obviously doesn't work
anyone have any ideas?
Write the headers before the loop:
handle = open(sys.argv[1])
with open('protparams.csv', 'w') as fp:
writer = csv.writer(fp, delimiter=',')
writer.writerow(['heading1','heading2','heading3'])
for record in SeqIO.parse(handle, "fasta"):
seq = str(record.seq)
X = ProtParam.ProteinAnalysis(seq)
data = [seq,X.get_amino_acids_percent(),X.aromaticity(),X.gravy(),X.isoelectric_point(),X.secondary_structure_fraction(),X.molecular_weight(),X.instability_index()]
writer.writerow(data)
Some example data:
title1|title2|title3|title4|merge
test|data|here|and
test|data|343|AND
",3|data|343|and
My attempt at coding this:
import csv
import StringIO
storedoutput = StringIO.StringIO()
fields = ('title1', 'title2', 'title3', 'title4', 'merge')
with open('file.csv', 'rb') as input_csv:
reader = csv.DictReader(input_csv, fields, delimiter='|')
for counter, row in enumerate(reader):
counter += 1
#print row
if counter != 1:
for field in fields:
if field == "merge":
row['merge'] = ("%s%s%s" % (row["title1"], row["title3"], row["title4"]))
print row
storedoutput.writelines(','.join(map(str, row)) + '\n')
contents = storedoutput.getvalue()
storedoutput.close()
print "".join(contents)
with open('file.csv', 'rb') as input_csv:
input_csv = input_csv.read().strip()
output_csv = []
output_csv.append(contents.strip())
if "".join(output_csv) != input_csv:
with open('file.csv', 'wb') as new_csv:
new_csv.write("".join(output_csv))
Output should be
title1|title2|title3|title4|merge
test|data|here|and|testhereand
test|data|343|AND|test343AND
",3|data|343|and|",3343and
For your reference upon running this code the first print it prints the rows as I would hope then to appear in the output csv. However the second print prints the title row x times where x is the number of rows.
Any input or corrections or working code would be appreciated.
I think we can make this a lot simpler. Dealing with the rogue " was a bit of a nuisance, I admit, because you have to work hard to tell Python you don't want to worry about it.
import csv
with open('file.csv', 'rb') as input_csv, open("new_file.csv", "wb") as output_csv:
reader = csv.DictReader(input_csv, delimiter='|', quoting=csv.QUOTE_NONE)
writer = csv.DictWriter(output_csv, reader.fieldnames, delimiter="|",quoting=csv.QUOTE_NONE, quotechar=None)
merge_cols = "title1", "title3", "title4"
writer.writeheader()
for row in reader:
row["merge"] = ''.join(row[col] for col in merge_cols)
writer.writerow(row)
produces
$ cat new_file.csv
title1|title2|title3|title4|merge
test|data|here|and|testhereand
test|data|343|AND|test343AND
",3|data|343|and|",3343and
Note that even though you wanted the original file updated, I refused. Why? It's a bad idea, because then you can destroy your data while working on it.
How can I be so sure? Because that's exactly what I did when I first ran your code, and I know better. ;^)
That double quote in the last line is definitely messing up the csv.DictReader().
This works:
new_lines = []
with open('file.csv', 'rb') as f:
# skip the first line
new_lines.append(f.next().strip())
for line in f:
# the newline and split the fields
line = line.strip().split('|')
# exctract the field data you want
title1, title3, title4 = line[0], line[2], line[3]
# turn the field data into a string and append in to the rest
line.append(''.join([title1, title3, title4]))
# save the new line for later
new_lines.append('|'.join(line))
with open('file.csv', 'w') as f:
# make one long string and write it to the new file
f.write('\n'.join(new_lines))
import csv
import StringIO
stored_output = StringIO.StringIO()
with open('file.csv', 'rb') as input_csv:
reader = csv.DictReader(input_csv, delimiter='|', quoting=csv.QUOTE_NONE)
writer = csv.DictWriter(stored_output, reader.fieldnames, delimiter="|",quoting=csv.QUOTE_NONE, quotechar=None)
merge_cols = "title1", "title3", "title4"
writer.writeheader()
for row in reader:
row["merge"] = ''.join(row[col] for col in merge_cols)
writer.writerow(row)
contents = stored_output.getvalue()
stored_output.close()
print contents
with open('file.csv', 'rb') as input_csv:
input_csv = input_csv.read().strip()
if input_csv != contents.strip():
with open('file.csv', 'wb') as new_csv:
new_csv.write("".join(contents))