Writing to a CSV without pandas - python
I am writing a list of numbers to a CSV.
However it is putting each number into a different cell.
I can not figure out why.
What I tried
I was using csv.writerow() which puts them all into the same row.
But I need them in columns.
Attempting to fix that I switched to csv.writerows() which puts them columns, but every single number is in a new row separated from the next.
Does anyone know why this is?
Code
class readingJ1Average:
def readingJ1(filepath):
with open(filepath, 'r') as f:
j1 = f.readlines()[46:47]
#Coverting list to a string
j1_join = ('\n'.join(j1))
#Pulling only average
j1_value = j1_join[5:16]
#Appending to a list
j1_list.append(j1_value)
def readingJ2(filepath):
with open(filepath, 'r') as f:
j2 = f.readlines()[47:48]
print(j2)
#Coverting list to a string
j2_join = ('\n'.join(j2))
#Pulling only average
j2_value = j2_join[5:16]
#Appending to a list
j2_list.append(j2_value)
def readingJ3(filepath):
with open(filepath, 'r') as f:
j3 = f.readlines()[48:49]
#Coverting list to a string
j3_join = ('\n'.join(j3))
#Pulling only average
j3_value = j3_join[5:16]
#Appending to a list
j3_list.append(j3_value)
def readingJ4(filepath):
with open(filepath, 'r') as f:
j4 = f.readlines()[48:49]
#Coverting list to a string
j4_join = ('\n'.join(j4))
#Pulling only average
j4_value = j4_join[5:16]
#Appending to a list
j4_list.append(j4_value)
def readingJ5(filepath):
with open(filepath, 'r') as f:
j5 = f.readlines()[49:50]
#Coverting list to a string
j5_join = ('\n'.join(j5))
#Pulling only average
j5_value = j5_join[5:16]
#Appending to a list
j5_list.append(j5_value)
def readingJ6(filepath):
with open(filepath, 'r') as f:
j6 = f.readlines()[50:51]
#Coverting list to a string
j6_join = ('\n'.join(j6))
#Pulling only average
j6_value = j6_join[5:16]
#Appending to a list
j6_list.append(j6_value)
def readingJ7(filepath):
with open(filepath, 'r') as f:
j7 = f.readlines()[51:52]
#Coverting list to a string
j7_join = ('\n'.join(j7))
#Pulling only average
j7_value = j7_join[5:16]
#Appending to a list
j7_list.append(j7_value)
#Beginning main code
j1_list = []
j2_list = []
j3_list = []
j4_list = []
j5_list = []
j6_list = []
j7_list = []
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ1(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ2(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ3(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ4(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ5(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ6(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ7(filepath)
with open('C:/Users/DunningJ3/Desktop/sample.csv', 'w') as wf:
write = csv.writer(wf)
write.writerows(j1_list)
#TXT file to Excel
The fast answer is that you need to convert the row to string instead on keeping it as a list or to use a matrix. But first you need to keep things simple, cleaning all code smells and following best practices, otherwise will be hard to figure out a solution.
The csv.writerows() is expecting a list of rows but you want to transpose them, so we can solve this by using a matrix or an array of string. For both the new item (a list of numbers or a string) is generated per row of the original csv taking the same position.
Let's say the original csv is "A" and contains items in the form of "a(ij)". You will build a new "A' " where it's items are "a'(ji)" and csv.writerows() is expecting:
[
[a'(00), a'(01), ..., a'(0i)]
[a'(10), a'(11), ..., a'(1i)]
...
[a'(j0), a'(j1), ..., a'(ji)
]
This is kind transposing a matrix btw
import csv
matrix = []
def init_matrix(total_lines):
for i in range(total_lines):
matrix.append([])
def readAll(filepath, csv_separator):
with open(filepath, 'r') as f:
lines = f.readlines()
total_rows = len(lines)
total_cols = len(lines[0].split(csv_separator))
print('Total Rows ', total_rows)
print('Total Cols ', total_cols)
init_matrix(total_cols)
for j in range(total_rows):
line = lines[j].rstrip()
elements = line.split(csv_separator)
for i in range(total_cols):
matrix[i].append(elements[i])
def main():
filepath = f"{'test.csv'}"
readAll(filepath, ',')
with open('result.csv', 'w') as wf:
write = csv.writer(wf)
write.writerows(matrix)
main()
And here the sample test.csv file
a,1,2,3,4,5,6,7,8,9,0
b,1,2,3,4,5,6,7,8,9,0
c,1,2,3,4,5,6,7,8,9,0
d,1,2,3,4,5,6,7,8,9,0
e,1,2,3,4,5,6,7,8,9,0
f,1,2,3,4,5,6,7,8,9,0
g,1,2,3,4,5,6,7,8,9,0
The output will be
a,b,c,d,e,f,g
1,1,1,1,1,1,1
2,2,2,2,2,2,2
3,3,3,3,3,3,3
4,4,4,4,4,4,4
5,5,5,5,5,5,5
6,6,6,6,6,6,6
7,7,7,7,7,7,7
8,8,8,8,8,8,8
9,9,9,9,9,9,9
0,0,0,0,0,0,0
Related
Getting unique values from csv file, output to new file
I am trying to get the unique values from a csv file. Here's an example of the file: 12,life,car,good,exellent 10,gift,truck,great,great 11,time,car,great,perfect The desired output in the new file is this: 12,10,11 life,gift,time car,truck good.great excellent,great,perfect Here is my code: def attribute_values(in_file, out_file): fname = open(in_file) fout = open(out_file, 'w') # get the header line header = fname.readline() # get the attribute names attrs = header.strip().split(',') # get the distinct values for each attribute values = [] for i in range(len(attrs)): values.append(set()) # read the data for line in fname: cols = line.strip().split(',') for i in range(len(attrs)): values[i].add(cols[i]) # write the distinct values to the file for i in range(len(attrs)): fout.write(attrs[i] + ',' + ','.join(list(values[i])) + '\n') fout.close() fname.close() The code currently outputs this: 12,10 life,gift car,truck good,great exellent,great 12,10,11 life,gift,time car,car,truck good,great exellent,great,perfect How can I fix this?
You could try to use zip to iterate over the columns of the input file, and then eliminate the duplicates: import csv def attribute_values(in_file, out_file): with open(in_file, "r") as fin, open(out_file, "w") as fout: for column in zip(*csv.reader(fin)): items, row = set(), [] for item in column: if item not in items: items.add(item) row.append(item) fout.write(",".join(row) + "\n") Result for the example file: 12,10,11 life,gift,time car,truck good,great exellent,great,perfect
Python - changing content of .txt files from folder and saving in new folder
I need to change some key words in multiple .txt files, using dictionary strucure for this. Then, save changed files in new localization. I write code attached below, but when I run it is warking all the time, and when I break it there is only one empty file cretead. import os import os.path from pathlib import Path dir_path = Path("C:\\Users\\myuser\\Documents\\scripts_new") #loading pair of words from txt file into dictionary myfile = open("C:\\Users\\myuser\\Desktop\\Python\\dictionary.txt") data_dict = {} for line in myfile: k, v = line.strip().split(':') data_dict[k.strip()] = v.strip() myfile.close() # Get the list of all files and directories path_dir = "C:\\Users\\myuser\\Documents\\scripts" # iterate over files in # that directory for filename in os.listdir(path_dir): f = os.path.join(path_dir, filename) name = os.path.join(filename) text_file = open(f) #read whole file to a string sample_string = text_file.read() # Iterate over all key-value pairs in dictionary for key, value in data_dict.items(): # Replace key character with value character in string sample_string = sample_string.replace(key, value) with open(os.path.join(dir_path,name), "w") as file1: toFile = input(sample_string) file1.write(toFile)
I have found a solution, with a little different approach. Maybe this code might be usefull for someone: import os #loading pair of words from txt file into dictionary myfile = open("C:\\Users\\user\\Desktop\\Python\\dictionary.txt") data_dict = {} for line in myfile: k, v = line.strip().split(':') data_dict[k.strip()] = v.strip() myfile.close() sourcepath = os.listdir("C:\\Users\\user\\Documents\\scripts\\") for file in sourcepath: input_file = "C:\\Users\\user\\Documents\\scripts\\" + file print('Conversion is ongoing for: ' + input_file) with open(input_file, 'r') as input_file: filedata = input_file.read() destination_path = "C:\\Users\\user\\Documents\\scripts_new\\"+ file # Iterate over all key-value pairs in dictionary for key, value in data_dict.items(): filedata = filedata.replace(key,value) with open(destination_path,'w') as file: file.write(filedata)
Hmmm... I think your problem might actually be use of the line toFile = input(sample_string) As that'll halt the program awaiting a user input Anyway, it could probably do with a little organisation into functions. Even this below is a bit... meh. import os import os.path from pathlib import Path dir_path = Path("C:\\Users\\myuser\\Documents\\scripts_new") # ----------------------------------------------------------- def load_file(fileIn): #loading pair of words from txt file into dictionary with open(fileIn) as myfile: data_dict = {} for line in myfile: k, v = line.strip().split(':') data_dict[k.strip()] = v.strip() return data_dict # ----------------------------------------------------------- def work_all_files(starting_dir, moved_dir, data_dict): # Iterate over files within the dir - note non recursive for filename in os.listdir(starting_dir): f = os.path.join(starting_dir, filename) with open(f, 'r') as f1: #read whole file to a string sample_string = f1.read() new_string = replace_strings(sample_string, data_dict) with open(os.path.join(moved_dir, filename), "w") as file1: file1.write(new_string) # ----------------------------------------------------------- def replace_strings(sample_string, data_dict): # Iterate over all key-value pairs in dictionary # and if they exist in sample_string, replace them for key, value in data_dict.items(): # Replace key character with value character in string sample_string = sample_string.replace(key, value) return sample_string # ----------------------------------------------------------- if __name__ == "__main__": # Get the dict-val pairings first data_dict = load_file("C:\\Users\\myuser\\Desktop\\Python\\dictionary.txt") #Then run over all the files within dir work_all_files("C:\\Users\\myuser\\Documents\\scripts", "C:\\Users\\myuser\\Documents\\new_scripts", data_dict) We could have housed all this in a class and then transported a few variables around using the instance (i.e. "self") - would have been cleaner. But first step is learning to break things into functions.
How to split text file by id in python
I have a bunch of text files containing tab separated tables. The second column contains an id number, and each file is already sorted by that id number. I want to separate each file into multiple files by the id number in column 2. Here's what I have. readpath = 'path-to-read-file' writepath = 'path-to-write-file' for filename in os.listdir(readpath): with open(readpath+filename, 'r') as fh: lines = fh.readlines() lastid = 0 f = open(writepath+'checkme.txt', 'w') f.write(filename) for line in lines: thisid = line.split("\t")[1] if int(thisid) <> lastid: f.close() f = open(writepath+thisid+'-'+filename,'w') lastid = int(thisid) f.write(line) f.close() What I get is simply a copy of all the read files with the first id number from each file in front of the new filenames. It is as if thisid = line.split("\t")[1] is only done once in the loop. Any clue to what is going on? EDIT The problem was my files used \r rather than \r\n to terminate lines. Corrected code (simply adding 'rU' when opening the read file and swapping != for <>): readpath = 'path-to-read-file' writepath = 'path-to-write-file' for filename in os.listdir(readpath): with open(readpath+filename, 'rU') as fh: lines = fh.readlines() lastid = 0 f = open(writepath+'checkme.txt', 'w') f.write(filename) for line in lines: thisid = line.split("\t")[1] if int(thisid) != lastid: f.close() f = open(writepath+thisid+'-'+filename,'w') lastid = int(thisid) f.write(line) f.close()
If you're dealing with tab delimited files, then you can use the csv module, and take advantage of the fact that itertools.groupby will do the previous/current tracking of the id for you. Also utilise os.path.join to make sure your filenames end up joining correctly. Untested: import os import csv from itertools import groupby readpath = 'path-to-read-file' writepath = 'path-to-write-file' for filename in os.listdir(readpath): with open(os.path.join(readpath, filename)) as fin: tabin = csv.reader(fin, delimiter='\t') for file_id, rows in groupby(tabin, lambda L: L[1]): with open(os.path.join(writepath, file_id + '-' + filename), 'w') as fout: tabout = csv.writer(fout, delimiter='\t') tabout.writerows(rows)
Python: add value and write output
I need to get information from a list and add a column year from name. I still not sure how to add one field 'year' in record. Can I use append? And about output file, I just need use outputcsv.writerow(records) isn't it? This is a part of code that I stuck: filenames = ('babyQld2010.csv', 'babyQld2011.csv', 'babyQld2012.csv', 'babyQld2012.csv', 'babyQld2014.csv') outFile = open('babyQldAll.csv','w') csvFile_out = csv.writer(outFile, delimiter=',') for filename in filenames: name, ext = filename.split('.') year = name[-4:] #extract year from file names records = extract_names(filename) # Get (name, count, gender) from list "records", # and add value of "year" and write into output file (using "for" loop ) Output file look like: 2010,Lola,69,Girl And input, I have 5 file babyQld2010.csv, babyQld2011.csv, babyQld2012.csv, babyQld2012.csv, babyQld2014.csv which contains: Mia,425,William,493 and I have to sort it in format and I already done it and save in list 'records' Lola,69,Girl now I need to add one field 'year' on 'record' list and export csv file. This is my full code: import csv def extract_names(filename): ''' Extract babyname, count, gender from a csv file, and return the data in a list. ''' inFile = open(filename, 'rU') csvFile = csv.reader(inFile, delimiter=',') # Initialization records = [] rowNum = 0 for row in csvFile: if rowNum != 0: # +++++ You code here ++++ # Read each row of csv file and save information in list 'records' # as (name, count, gender) records.append([row[0], row[1], "Female"]) records.append([row[2], row[3], "Male"]) print('Process each row...') rowNum += 1 inFile.close() return(records) #### Start main program ##### filenames = ('babyQld2010.csv', 'babyQld2011.csv', 'babyQld2012.csv', 'babyQld2012.csv', 'babyQld2014.csv') with open('babyQldAll.csv','w') as outFile: csvFile_out = csv.writer(outFile, delimiter=',') for filename in filenames: name, ext = filename.split('.') year = name.split('.')[0][-4:] #extract year from file names records = extract_names(filename) for record in records: csvFile_out.write([year] + record) print("Write in csv file...") outFile.close()
To get the year from the csv file you can simply split the string at '.' and then take the last four characters from the first part of the split. Example - >>> s = 'babyQld2010.csv' >>> s.split('.')[0][-4:] '2010' Then just simply iterate over your list of records, which you say is correct, for each list within in, use list contatenation to create a new list with year at the start and write that to csv file. I would also suggest that you use with statement for opening the file to write to (and even in the function where you are reading from the other csv files). Example - filenames = ('babyQld2010.csv', 'babyQld2011.csv', 'babyQld2012.csv', 'babyQld2012.csv', 'babyQld2014.csv') with open('babyQldAll.csv','w') as outFile: csvFile_out = csv.writer(outFile, delimiter=',') for filename in filenames: name, ext = filename.split('.') year = name.split('.')[0][-4:] #extract year from file names records = extract_names(filename) for record in records: csvFile_out.writerow([year] + record)
Yes, you can just append the year column to each row as you read it in from your source files. You can read in & write out each row as a dictionary so that you can use your existing column headers to address the data if you need to massage it on the way through. Using the csv.DictWriter() method you specify your headers (fieldnames) when you set it up. You can then write them out with the writeheader() method. import csv file_list = ['babyQld2010.csv', 'babyQld2011.csv', 'babyQld2012.csv', 'babyQld2012.csv', 'babyQld2014.csv'] outFile = open('babyQldAll.csv', 'wb') csv_writer = csv.DictWriter(outFile, fieldnames=['name','count','gender','year']) csv_write_out.writeheader() for a_file in file_list: name,ext = a_file.split('.') year = name[-4:] with open(a_file, 'rb') as inFile: csv_read_in = csv.DictReader(inFile) for row in csv_read_in: row['year'] = year csv_writer.writerow(row) outfile.close() Hope this helps.
Having problems searching with strings from csv module in Python
I have a csv file partList.csv with strings that I want to use to search through a larger group of txt files. For some reason when I use the direct string 'L 99' I get a result. When I load the string L 99 from the csv I get no result. partList.csv only contains cells in the first column with part numbers, one of which is L-99. txt_files_sample\5.txt is a text document that at some point contains the string L 99 My code: def GetPartList(): partList = [] f = open('partList.csv', 'rb') try: reader = csv.reader(f) for row in reader: part = row[0].replace('-',' ').strip() partList.append(part) finally: f.close() return partList def FindFileNames(partList): i = 0 files = [] for root, dirs, filenames in os.walk('txt_files_sample'): for f in filenames: document = open(os.path.join(root, f), 'rb') for line in document: if partList[i] in line: #if 'L 99' in line: files.append(f) break i = i + 1 return files print FindFileNames(GetPartList()) The code, as it stands above produces: >>> [] If I uncomment if 'L 99' in line: and comment out if partList[i] in line: I get the result: >>> ['5.txt']
So using Martijn's input, I discovered the issue was how I looped over partList. Rewritting FindFileNames() worked: def FindFileList(partList): i = 0 files = [] for root, dirs, filenames in os.walk('txt_files'): for f in filenames: a = 0 document = open(os.path.join(root, f), 'rb') for line in document: if a is 1: break for partNo in partList: if partNo in line: files.append(f) a = 1 document.close() return files With the updated code I got a result that was an accurate list of filenames.