Hi I am using this method to write a csv file from a csv file which is a hashed code but i only receive the last row in output, how can i add each row to the previous one?
import hashlib
import csv
d = dict()
result = ()
for i in range(0 , 9999) :
n = hashlib.sha256(str(i).encode())
d[n.hexdigest()] = str(i)
with open('/Users/MJ-Mac/Desktop/karname.txt') as f:
file = csv.reader(f)
for row in file :
a = row[0]
b = d[row[1]]
result = (a , b)
with open('/Users/MJ-Mac/Desktop/result3.txt', 'w') as f2:
file2 = csv.writer(f2)
file2.writerow(result)
Other answers have suggested replacing 'w' with 'a', this is not necessary when working with csv.writer. It also could make your file grow everytime you run the program.
Instead of reopening and closing relut3.txt, keep it open and use just one writer
import hashlib
import csv
d = dict()
result = ()
for i in range(0 , 9999) :
n = hashlib.sha256(str(i).encode())
d[n.hexdigest()] = str(i)
with open('/Users/MJ-Mac/Desktop/result3.txt', 'w') as result_file:
result_writer = csv.writer(result_file) # only create this once
with open('/Users/MJ-Mac/Desktop/karname.txt') as f:
file = csv.reader(f)
for row in file :
a = row[0]
b = d[row[1]]
result = (a , b)
result_writer.writerow(result) # use the already created writer
Your code is writing one line to the file, then it opens the file again and writes the next line as the full content of the program.
So on the second run through the loop only the second line will be in the file.
import hashlib
import csv
d = dict()
result = ()
for i in range(0 , 9999) :
n = hashlib.sha256(str(i).encode())
d[n.hexdigest()] = str(i)
with open('/Users/MJ-Mac/Desktop/karname.txt') as f:
file = csv.reader(f)
for row in file :
a = row[0]
b = d[row[1]]
result = (a , b)
with open('/Users/MJ-Mac/Desktop/result3.txt', 'a') as f2:
file2 = csv.writer(f2)
file2.writerow(result)
It might be better to open the file and then write everything to it:
import hashlib
import csv
d = dict()
result = ()
for i in range(0 , 9999) :
n = hashlib.sha256(str(i).encode())
d[n.hexdigest()] = str(i)
with open('/Users/MJ-Mac/Desktop/karname.txt') as f:
file = csv.reader(f)
with open('/Users/MJ-Mac/Desktop/result3.txt', 'w') as f2:
file2 = csv.writer(f2)
for row in file :
a = row[0]
b = d[row[1]]
result = (a , b)
file2.writerow(result)
I can't run this myself, since your data is not included.
However, I think your problem is that with open('/Users/MJ-Mac/Desktop/result3.txt', 'w') has the "w" flag -- which stands for "write" -- so your data is being overwritten, You might instead need the "a" flag for "append," so that each line will be appended to the data you are exporting.
import hashlib
import csv
d = dict()
result = ()
for i in range(0 , 9999) :
n = hashlib.sha256(str(i).encode())
d[n.hexdigest()] = str(i)
with open('/Users/MJ-Mac/Desktop/karname.txt') as f:
file = csv.reader(f)
for row in file :
a = row[0]
b = d[row[1]]
result = (a , b)
with open('/Users/MJ-Mac/Desktop/result3.txt', 'a') as f2:
file2 = csv.writer(f2)
file2.writerow(result)
It is easier and more readable to open both the input and output files at once, and initialise the CSV reader and writer at the start:
with open('/Users/MJ-Mac/Desktop/karname.txt') as in_file, open('/Users/MJ-Mac/Desktop/result3.txt', 'w') as out_file:
output = csv.writer(out_file)
for row in csv.reader(in_file):
output.writerow([row[0], d[row[1]])
Related
I'm trying to use Python to copy lines from one csv file to another and add data to a new column in the process. The data is being copied correctly to the new file, but it's all being copied to the same line in the new file.
file = "C:/original_file.csv"
nf = "C:/file_updated.csv"
i = 0
with open(file, 'r') as origFile:
with open(nf, 'w') as newFile:
lineList = []
for line in origFile:
strippedLine = line.strip()
lineList = strippedLine.split(',')
lineList.append("C:/ATT" + str(i) + "_PHOTO 1.jpg")
lineStr = str(lineList)
lineStr = lineStr.replace("'", "")
newFile.write(lineStr)
print lineList
i += 1
origFile.close()
newFile.close()
How can I make it so that each line from the first file copies to a separate line of the new file?
file = "C:/original_file.csv"
nf = "C:/file_updated.csv"
i = 0
with open(file, 'r') as origFile:
with open(nf, 'w') as newFile:
lineList = []
for line in origFile:
strippedLine = line.strip()
lineList = strippedLine.split(',')
lineList.append("C:/ATT" + str(i) + "_PHOTO 1.jpg")
lineStr = str(lineList)
lineStr = lineStr.replace("'", "")
newFile.write(lineStr)
newFile.write('\n') #Insert a new line
print lineList
i += 1
origFile.close()
newFile.close()
No need to install pandas, the built-in csv library is great for this!!
$ cat tmp.csv
first,second
third,fourth
import csv
to_read = "./tmp.csv"
to_write = "./tmp2.csv"
with open(to_read, newline="") as to_read_fp, open(to_write, "w", newline="") as to_write_fp:
reader = csv.reader(to_read_fp)
writer = csv.writer(to_write_fp)
for count, row in enumerate(reader):
row.append(f"C:/ATT{count}_PHOTO 1.jpg")
writer.writerow(row)
$ cat tmp2.csv
first,second,C:/ATT0_PHOTO 1.jpg
third,fourth,C:/ATT1_PHOTO 1.jpg
If you want to do it without any imports you could try something like this which adds a new column with the header New Field.
Of course it assumes the original CSV has a header row.
file = "original_file.csv"
nf = "file_updated.csv"
with open(file, 'r') as origFile:
data = [line.strip().split(',') for line in origFile.readlines()]
header = data[0]
data = data[1:]
header.append('New Field')
data = [line + [f'C:/ATT{idx}_PHOTO 1.jpg'] for idx, line in enumerate(data)]
data = [','.join(line) for line in [header]+data]
with open(nf, 'w') as newFile:
newFile.writelines('\n'.join(data))
"""
SAMPLE INPUT
Field1,Field2
Data1,Data2
Data3,Data4
SAMPLE OUTPUT
Field1,Field2,New Field
Data1,Data2,C:/ATT0_PHOTO 1.jpg
Data3,Data4,C:/ATT1_PHOTO 1.jpg
"""
heres my little program. at the end i want to write the names and passwords
into csv file like this:
Jack,9978
Sara,1647
but i cant!? my program output is correct but when i write it into csv it goes like:
Jack9978,Sara1674
how will you fix it?
import hashlib
import csv
answer = []
usr_pas = []
with open('...', 'r') as f:
reader = csv.reader(f)
for word in reader:
usr_pas.append(word)
for i in range(999, 10000):
num = str(i)
m = hashlib.sha256()
m.update(num.encode('utf-8'))
hsh = m.hexdigest()
hash_dict = {hsh: num}
for key in list(hash_dict.items()):
for value in usr_pas:
if key[0] == value[1]:
answer.append(value[0] +','+ key[1])
file = open("...", 'w', newline='')
with file:
writer = csv.writer(file)
writer.writerow(i.strip().replace(',', '') for i in answer)
file.close()
what did i wrong!?
Try this (lines with comments are changed):
import hashlib
import csv
answer = []
usr_pas = []
with open('...', 'r') as f:
reader = csv.reader(f)
for word in reader:
usr_pas.append(word)
for i in range(999, 10000):
num = str(i)
m = hashlib.sha256()
m.update(num.encode('utf-8'))
hsh = m.hexdigest()
hash_dict = {hsh: num}
for key in list(hash_dict.items()):
for value in usr_pas:
if key[0] == value[1]:
answer.append(value[0] +','+ key[1] + '\n') #added '\n' at the end
file = open("...", 'w', newline='')
with file:
writer = csv.writer(file)
writer.writerow(i for i in answer) #removed i.replace
file.close()
I guess you want a csv file with multiple lines instead of one. If so, my suggestion is to use csv.csvwriter.writerows instead of csv.csvwriter.writerow. The latter is designed to write a single row. See the official document here. Indeed multiple lines might be created with \n manipulator, it means a single line with multiple elements that contains "new line", which seems awkward.
Since we can use the default delimiter (comma), we just need to manage each element in the line as a tuple (or a list). Answers should be added into list answer like this:
answer.append((value[0], key[1]))
while we write rows in this way:
writer.writerows(answer)
Let's put them together:
import hashlib
import csv
answer = []
usr_pas = []
with open('...', 'r') as f:
reader = csv.reader(f)
for word in reader:
usr_pas.append(word)
for i in range(999, 10000):
num = str(i)
m = hashlib.sha256()
m.update(num.encode('utf-8'))
hsh = m.hexdigest()
hash_dict = {hsh: num}
for key in list(hash_dict.items()):
for value in usr_pas:
if key[0] == value[1]:
# answer.append(value[0] +','+ key[1])
answer.append((value[0], key[1]))
file = open("...", 'w', newline='')
with file:
writer = csv.writer(file)
# writer.writerow(i.strip().replace(',', '') for i in answer)
writer.writerows(answer)
file.close()
I have the following input file with a header row:
test_in.csv
LON,LAT,NUMBER,STREET,UNIT,CITY,DISTRICT,REGION,POSTCODE,ID,HASH
-72.5708234,41.4155142,39,HICKORY LA,,,,,,,8a0df668e0d49b02
-72.5647745,41.4160301,1213,KILLINGWORTH RD,,,,,,,b3ecaab86e476f46
I need to replace any of the columns with a specified string
for example CITY column's data should be replaced from "" to "MyCity"
My code only outputs the header and first row
python test_forcefld.py test_in.csv MyCity CITY out_test.csv
import csv
import sys
in_file_name = sys.argv[1]
force_data = sys.argv[2]
force_fld = sys.argv[3]
out_file_name = sys.argv[4]
# First read top row/header from input file
fieldnames = []
for filename in [in_file_name]:
with open(filename, "rb") as f_in:
reader = csv.reader(f_in)
headers = next(reader)
for h in headers:
fieldnames.append(h)
#print headers to output file
with open(out_file_name, 'w') as fou:
dw = csv.DictWriter(fou, delimiter=',', fieldnames=fieldnames)
dw.writeheader()
f_in2 = open(in_file_name, "rb")
reader2 = csv.DictReader(f_in2) # Uses the field names in this file
datarow = next(reader2)
datarow[force_fld] = force_data
with open(out_file_name, 'wa') as fou:
dw2 = csv.DictWriter(fou, delimiter=',', fieldnames=fieldnames)
dw2.writeheader()
dw2.writerow(data row)
Output shows
LON,LAT,NUMBER,STREET,UNIT,CITY,DISTRICT,REGION,POSTCODE,ID,HASH
-72.5708234,41.4155142,39,HICKORY LA,,MyCity,,,,,8a0df668e0d49b02
Your code is a little difficult to read, but assuming datarow is a dictionary containing your records:
In your last row, change
dw2.writerow(datarow)
Into
dw2.writerows(datarow)
While you're at it, you should also consider using datarow.keys() for your fieldnames, for conciseness.
This should do it, you just need pandas:
import pandas as pd
df = pd.read_csv(in_file_name, sep=',')
df['CITY'].fillna('MyCity', inplace=True)
And to save it:
df.to_csv(out_file_name)
You can try somthing like this in order to have your desired file:
I'm assuming your input file is called f_input.txt and your output file is called f_output.txt:
data = list(k.rstrip().split(',') for k in open("f_input.txt", 'r'))
with open("f_output.txt", 'a+') as f:
f.write(",".join(data[0]) + '\n')
for k in data[1:]:
# Modify the positions of k[:n] + your data + k[n+1]
# if you need to handle another position
f.write(",".join(k[:6]) + "MyCity" + ",".join(k[7:]) + "\n")
This worked in the end:
import csv
import sys
in_file_name = sys.argv[1]
force_data = sys.argv[2]
force_fld = sys.argv[3]
out_file_name = sys.argv[4]
# First read top row/header from input file
fieldnames = []
for filename in [in_file_name]:
with open(filename, "rb") as f_in:
reader = csv.reader(f_in)
headers = next(reader)
for h in headers:
fieldnames.append(h)
f_in2 = open(in_file_name, "r")
#print headers to output file
fou = open(out_file_name, 'wa')
dw = csv.DictWriter(fou, delimiter=',', fieldnames=fieldnames)
dw.writeheader()
reader2 = csv.DictReader(f_in2) # Uses the field names in this file
for row in reader2:
row[force_fld] = force_data
dw2 = csv.DictWriter(fou, delimiter=',', fieldnames=fieldnames)
dw2.writerow(row)
This is one file result.csv:
M11251TH1230
M11543TH4292
M11435TDS144
This is another file sample.csv:
M11435TDS144,STB#1,Router#1
M11543TH4292,STB#2,Router#1
M11509TD9937,STB#3,Router#1
M11543TH4258,STB#4,Router#1
Can I write a Python program to compare both the files and if line in result.csv matches with the first word in the line in sample.csv, then append 1 else append 0 at every line in sample.csv?
import pandas as pd
d1 = pd.read_csv("1.csv",names=["Type"])
d2 = pd.read_csv("2.csv",names=["Type","Col2","Col3"])
d2["Index"] = 0
for x in d1["Type"] :
d2["Index"][d2["Type"] == x] = 1
d2.to_csv("3.csv",header=False)
Considering "1.csv" and "2.csv" are your csv input files and "3.csv" is the result you needed
The solution using csv.reader and csv.writer (csv module):
import csv
newLines = []
# change the file path to the actual one
with open('./data/result.csv', newline='\n') as csvfile:
data = csv.reader(csvfile)
items = [''.join(line) for line in data]
with open('./data/sample.csv', newline='\n') as csvfile:
data = list(csv.reader(csvfile))
for line in data:
line.append(1 if line[0] in items else 0)
newLines.append(line)
with open('./data/sample.csv', 'w', newline='\n') as csvfile:
writer = csv.writer(csvfile)
writer.writerows(newLines)
The sample.csv contents:
M11435TDS144,STB#1,Router#1,1
M11543TH4292,STB#2,Router#1,1
M11509TD9937,STB#3,Router#1,0
M11543TH4258,STB#4,Router#1,0
With only one column, I wonder why you made it as a result.csv. If it is not going to have any more columns, a simple file read operation would suffice. Along with converting the data from result.csv to dictionary will help in quick run as well.
result_file = "result.csv"
sample_file = "sample.csv"
with open(result_file) as fp:
result_data = fp.read()
result_dict = dict.fromkeys(result_data.split("\n"))
"""
You can change the above logic, in case you have very few fields on csv like this:
result_data = fp.readlines()
result_dict = {}
for result in result_data:
key, other_field = result.split(",", 1)
result_dict[key] = other_field.strip()
"""
#Since sample.csv is a real csv, using csv reader and writer
with open(sample_file, "rb") as fp:
sample_data = csv.reader(fp)
output_data = []
for data in sample_data:
output_data.append("%s,%d" % (data, data[0] in result_dict))
with open(sample_file, "wb") as fp:
data_writer = csv.writer(fp)
data_writer.writerows(output_data)
The following snippet of code will work for you
import csv
with open('result.csv', 'rb') as f:
reader = csv.reader(f)
result_list = []
for row in reader:
result_list.extend(row)
with open('sample.csv', 'rb') as f:
reader = csv.reader(f)
sample_list = []
for row in reader:
if row[0] in result_list:
sample_list.append(row + [1])
else:
sample_list.append(row + [0]
with open('sample.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerows(sample_list)
I have a little program that just needs to read one (and only one) row from a csv file and write the column values to a series of files. The program has three system arguments: the path to the data file, the job id (uuid), and the target row number, i.e. the row in the csv that I want to parse. It's not working, how can I fix it?
import csv
import sys
import itertools
f = sys.argv[1]
uuid = sys.argv[2]
target_row = sys.argv[3]
tmpdir="/tmp/pagekicker/"
folder = tmpdir+uuid
destination1 = folder + '/csv/row.editedby'
destination3 = folder + '/csv/row.booktitle'
destination4 = folder + '/csv/row.seeds'
destination5 = folder + '/csv/row.imprint'
f = open(f, 'rb')
f1 = open(destination1, 'w')
f3 = open(destination3, 'w')
f4 = open(destination4, 'w')
f5 = open(destination5, 'w')
target_row = int(target_row)
try:
reader = csv.reader(f) # creates the reader object
for row in itertools.islice(reader,1,1): # iterates the rows of the file in orders
editedby = row[0] # we throw away column 2
booktitle = row[2]
print row[2]
seeds = row[3]
imprint = row[4]
f1.write(editedby)
f3.write(booktitle)
f4.write(seeds)
f5.write(imprint)
f.close()
f1.close()
f3.close()
f4.close()
f5.close()
finally:
print 'done'
UPDATE: thanks Graham Bell for his suggested code. There are two "f5s" in the first line of his 'with' statement My code now looks like this:
i
mport csv
import sys
import itertools
f = sys.argv[1]
uuid = sys.argv[2]
target_row = sys.argv[3]
tmpdir="/tmp/pagekicker/"
folder = tmpdir+uuid
# os.mkdir(folder)
destination3 = folder + '/csv/row.booktitle'
destination1 = folder + '/csv/row.editedby'
destination4 = folder + '/csv/row.seeds'
destination5 = folder + '/csv/row.imprint'
with open(f, 'rb') as f, open(destination1, 'w') as f1, open(destination3, 'w') as f3, open(destination4, 'w') as f4, open(destination5, 'w') as f5:
target_row = int(target_row)
try:
reader = csv.reader(f) # creates the reader object
for row in itertools.islice(reader,1,1): # iterates the rows of the file in orders
editedby = row[0] # we throw away column 2
booktitle = row[2]
print row[2]
seeds = row[3]
imprint = row[4]
f1.write(editedby)
f3.write(booktitle)
f4.write(seeds)
f5.write(imprint)
except
print 'done'
Without the except, it generates "unexpected unindent" when I run it. With the except, it says that the except line is invalid syntax.
the csv library DictReader() object has the ability to display the current line number with:
reader = csv.DictReader(csv_file)
reader.line_num
you could iterate through and do nothing until you reach the correct line number that you need, something like this:
for row in reader:
if reader.line_num == row_you_want
do something
the DictReader class also allows you to have the first row in your CSV file to be title columns, and then you can access them like so:
reader["title_of_column1"]
which might save you some work as well, also you should use the python with block when working with files like so:
with open(f, 'rb') as f, open(destination1, 'w') as f1, open(destination3, 'w') as f3, open(destination4, 'w') as f5, open(destination5, 'w') as f5:
target_row = int(target_row)
try:
reader = csv.reader(f) # creates the reader object
for row in itertools.islice(reader,1,1): # iterates the rows of the file in orders
editedby = row[0] # we throw away column 2
booktitle = row[2]
print row[2]
seeds = row[3]
imprint = row[4]
f1.write(editedby)
f3.write(booktitle)
f4.write(seeds)
f5.write(imprint)
This way you don't have to worry about closing them all
Assuming you count rows from 1 (rather than 0), here's a standalone function that will do it:
import csv
from contextlib import contextmanager
import sys
import itertools
#contextmanager
def multi_file_manager(files, mode='r'):
""" Context manager for multiple files. """
files = [open(file, mode) for file in files]
yield files
for file in files:
file.close()
# This is the standalone function
def csv_read_row(filename, n):
""" Read and return nth row of a csv file, counting from 1. """
with open(filename, 'rb') as f:
reader = csv.reader(f)
return next(itertools.islice(reader, n-1, n))
if len(sys.argv) != 4:
print('usage: utility <csv filename> <uuid> <target row>')
sys.exit(1)
tmpdir = "/tmp/pagekicker"
f = sys.argv[1]
uuid = sys.argv[2]
target_row = int(sys.argv[3])
folder = os.path.join(tmpdir, uuid)
destinations = [folder+dest for dest in ('/csv/row.editedby',
'/csv/row.booktitle',
'/csv/row.seeds',
'/csv/row.imprint')]
with multi_file_manager(destinations, mode='w') as files:
row = csv_read_row(f, target_row)
#editedby, booktitle, seeds, imprint = row[0], row[2], row[3], row[4]
for i,j in zip(range(4), (0, 2, 3, 4)):
files[i].write(row[j]+'\n')