The code is to read the xls file from a directory, convert it to csv file and copy it to another directory.
filePath = os.path.join('.', 'attachments')
filePaths = [f for f in os.listdir(filePath) if os.path.isfile(os.path.join(filePath, f)) and f.endswith('.xls')]
for f in filePaths:
wb = xlrd.open_workbook(os.path.join(filePath, f))
sheet = wb.sheet_by_index(0)
filename = f + '.csv'
fp = open(os.path.join(filePath, filename), 'wb')
wr = csv.writer(fp, quoting=csv.QUOTE_ALL)
for rownum in xrange(sheet.nrows):
wr.writerow(sheet.row_values(rownum))
fp.close
shutil.copy(os.path.join('.', 'attachments', filename), new_directory)
The result is:
The xls file is successfully converted to a csv file, but in the new_directory, the copied file only contains part of the csv file.
For example, the original csv file has 30 rows, but in the copied file, there is only 17 rows. Any idea of why this would happen?
Here's your problem:
fp.close
You need to call the close method, not just reference it as a method. So:
fp.close()
However, it will make your life easier if you use with statements instead of trying to figure out where to explicitly close everything:
with open(os.path.join(filePath, filename), 'wb') as fp:
wr = csv.writer(fp, quoting=csv.QUOTE_ALL)
for rownum in xrange(sheet.nrows):
wr.writerow(sheet.row_values(rownum))
Related
I have text files in multiple folders(folder names are the names of categories/labels). I want to generate a csv file(dataset) that also has a column as the label(folder name) of that category of text.
import csv
import os
folder = os.path.dirname("/home/jaideep/Desktop/folder/ML DS/Csv/Datasets/")
folder_list = os.listdir(folder)
with open("/home/jaideep/Desktop/folder/ML DS/Csv/data.csv", "w") as outfile:
writer = csv.writer(outfile)
writer.writerow(['Label', 'Email','Message'])
for f in folder_list:
file_list = os.listdir(folder+"/"+f+"/")
print(file_list)
for file in file_list:
with open(file, "r") as infile:
contents = infile.read()
outfile.write(f+',')
outfile.write(contents)
But I'm getting
File "/home/jaideep/Desktop/folder/ML DS/Csv/Main.py", line 15, in <module>
with open(file, "r") as infile:
FileNotFoundError: [Errno 2] No such file or directory: 'file2.txt'
I know there are similar questions previously asked, but I couldn't file solution to my issue. Any help would be appreciated, thanks.
os.listdir only lists the filenames of a directory, so you need to reconstruct the path.
You may want to check out glob for that matter.
This version should solve your issue.
import csv
import os
folder = os.path.dirname("/home/jaideep/Desktop/folder/ML DS/Csv/Datasets/")
folder_list = os.listdir(folder)
with open("/home/jaideep/Desktop/folder/ML DS/Csv/data.csv", "w") as outfile:
writer = csv.writer(outfile)
writer.writerow(['Label', 'Email','Message'])
for f in folder_list:
file_list = os.listdir(os.path.join(folder, f))
print(file_list)
for file in file_list:
with open(os.path.join(folder, f, file), "r") as infile:
contents = infile.read()
outfile.write(f+',')
outfile.write(contents)
Can you help me please. how to add column name to csv file with python.
dirname = os.path.dirname(os.path.abspath(__file__))
csvfilename = os.path.join(dirname, 'csvfile.csv')
file_exists = os.path.isfile(csvfilename)
f = open(csvfilename,'a')
f.write(list[0] + ';' + '\r\n')
f.close()
may be you can add a header like this?
with open(csvfilename, 'wt', newline ='') as file:
write_header = csv.writer(file, delimiter=',')
write_header.writerow(i for i in list[0])
Since you just want to modify a single line of the file there isn't a need to run this all through a CSV processor. Its generally best not to read and write to the same file. You can create a temporary file and make the changes to the top of the file before copying the bulk.
import shutil
dirname = os.path.dirname(os.path.abspath(__file__))
csvfilename = os.path.join(dirname, 'csvfile.csv')
tmpfilename = os.path.join(dirname, 'csvfile.csv.tmp')
file_exists = os.path.isfile(csvfilename)
with open(csvfilename, 'rb') as f_in, open(tmpfilename, 'wb') as f_out:
# discard current header
next(f_in)
# write new header
f_out.write("colname\r\n".encode())
# copy the rest of the file
shutil.copyfileobj(f_in, f_out)
# replace original csv with fixed
shutil.move(tmpfilename, csvfilename)
I have a list of tsv files where I am looking to grab column headers for all the files.
with open(os.path.abspath('reference/file.tsv'), 'rU') as file:
reader = csv.reader(file)
row1 = next(reader)
Currently, this snippet only reads 1 file where I have a list of files that needs to be parsed.
dir_path = os.path.abspath('reference/')
files = os.listdir(dir_path)
The name of the files are listed in files. How do I loop through the list of files and grab only the column headers for each file?
I try this and it works.
import os
import csv
dir_path = os.path.abspath('reference/')
files = os.listdir(dir_path)
for f in files:
with open(dir_path +'/'+f, 'rU') as file:
reader = csv.reader(file)
row1 = next(reader)
print row1
The files variable in your code is the content of the reference folder, meaning all files and subfolders of the folder. They are returned in a list of strings, containing only the file or subfolder name. This means that you'll have to prefix the path yourself.
Example:
dir_path = os.path.abspath('reference/')
files = os.listdir(dir_path)
for file in files:
# Skip non-files
if not os.path.isfile(file):
continue
with open(os.path.join(dir_path, file), 'rU') as f:
reader = csv.reader(f)
row1 = next(reader)
An alternative using the pathlib module:
for file in Path('reference/').glob('*'):
if not file.is_file():
continue
with open(str(file.resolve()), 'rU') as f:
reader = csv.reader(f)
row1 = next(reader)
Wouldn't you be better off in reading the first line of each of those files, appending them to a list and then passing them to csvreader?
Example:
lines = []
with open(str(file.resolve()), 'rU') as f:
lines.append(f.readline())
reader = csv.reader(lines)
for row in reader:
# whatever you want to do with the parsed lines
I am trying to merge all the csv files in a folder into one large csv file. I also need to add a new column to this merged csv that shows the original file that each row came from. This is the code I have so far:
import csv
import glob
read_files = glob.glob("*.csv")
source = []
with open("combined.files.csv", "wb") as outfile:
for f in read_files:
source.append(f)
with open(f, "rb") as infile:
outfile.write(infile.read())
I know I have to somehow repeat each f for as many rows as are in each csv and then append that as a new column to the .write command, but I am not sure how to do this. Thank you everyone!
If you add the filename as the final column, you don't need to parse the csv at all. Just read them line by line, add filename and write. And don't open in binary mode!
import glob
import os
out_filename = "combined.files.csv"
if os.path.exists(out_filename):
os.remove(out_filename)
read_files = glob.glob("*.csv")
with open(out_filename, "w") as outfile:
for filename in read_files:
with open(filename) as infile:
for line in infile:
outfile.write('{},{}\n'.format(line.strip(), filename))
If your csv's have a common header line, pick one to write to the outfile and supress the rest
import os
import glob
want_header = True
out_filename = "combined.files.csv"
if os.path.exists(out_filename):
os.remove(out_filename)
read_files = glob.glob("*.csv")
with open(out_filename, "w") as outfile:
for filename in read_files:
with open(filename) as infile:
if want_header:
outfile.write('{},Filename\n'.format(next(infile).strip()))
want_header = False
else:
next(infile)
for line in infile:
outfile.write('{},{}\n'.format(line.strip(), filename))
I need some help with converting a number of text files to csv files. All my text files are in one folder and I want to convert them to csv files into another folder. The names of individual files should remain the same. Below is the script I got so far...converting an individual file works fine but to work on all the files within a folder is where I am stuck. Any help will be appreciated.
import csv
import os
directory = raw_input("INPUT Folder:")
output = raw_input("OUTPUT Folder")
txt_files = directory
csv_files = output
try:
for txt_file in txt_files:
in_txt = csv.reader(open(txt_file, "rb"), delimiter = '=')
for csv_file in csv_files:
out_csv = csv.writer(open(csv_file, 'wb'))
out_csv.writerows(in_txt)
except:
print ()
glob.glob() is perfectly suited for the task. Also, use with context manager when working with files:
import csv
import glob
import os
directory = raw_input("INPUT Folder:")
output = raw_input("OUTPUT Folder:")
txt_files = os.path.join(directory, '*.txt')
for txt_file in glob.glob(txt_files):
with open(txt_file, "rb") as input_file:
in_txt = csv.reader(input_file, delimiter='=')
filename = os.path.splitext(os.path.basename(txt_file))[0] + '.csv'
with open(os.path.join(output, filename), 'wb') as output_file:
out_csv = csv.writer(output_file)
out_csv.writerows(in_txt)