python2 can't file a file in current folder - python

For the next file info:
[jzun#hscd8a25e93f9vm dates]$ pwd
/home/jzun/vivo_mod_samples/dates
[jzun#hscd8a25e93f9vm dates]$ ls
date_def.json dates_add.rdf dates.bak dates.rdf dates_sub.rdf dates.txt datetime_precision_enum.txt gen_date_rdf.py gen_date_rdf.py.bak gen_dates.py gen_dates.py.bak get.txt README.md run_pump_2_to_create_date_defs.sh sv.cfg
I have the next python2 function:
def read_csv(filename, skip=True, delimiter='|'):
"""
Read a CSV file, return dictionary object
:param filename: name of file to read
:param skip: should lines with invalid number of columns be skipped? False=Throw Exception
:param delimiter: The delimiter for CSV files
:return: Dictionary object
"""
cwd = os.getcwd()
print("read_csv>current dir = " + cwd)
# fp = open(filename, 'rU')
# print(fp)
# data = read_csv_fp(fp, skip, delimiter)
# fp.close()
with open(filename, 'rU') as fp:
data = read_csv_fp(fp, skip, delimiter)
fp.close()
return data
After running it with filename = dates.txt I get the next result:
read_csv>current dir = /home/jzun/vivo_mod_samples/dates
dates.txt file not found
I know similar questions have been posted but interestingly I can not find anything that could help me to solve this problem. Any ideas?

Try passing the full path of the file dates.txt:
cwd = os.getcwd()
file_name = "dates.txt"
file_path = os.path.join(cwd, file_name)
# Double check that file exist
assert os.path.isfile(file_path) is True
with open(file_path, 'rU') as fp:
data = read_csv_fp(fp, skip, delimiter)
fp.close()

Related

python csv add column name

Can you help me please. how to add column name to csv file with python.
dirname = os.path.dirname(os.path.abspath(__file__))
csvfilename = os.path.join(dirname, 'csvfile.csv')
file_exists = os.path.isfile(csvfilename)
f = open(csvfilename,'a')
f.write(list[0] + ';' + '\r\n')
f.close()
may be you can add a header like this?
with open(csvfilename, 'wt', newline ='') as file:
write_header = csv.writer(file, delimiter=',')
write_header.writerow(i for i in list[0])
Since you just want to modify a single line of the file there isn't a need to run this all through a CSV processor. Its generally best not to read and write to the same file. You can create a temporary file and make the changes to the top of the file before copying the bulk.
import shutil
dirname = os.path.dirname(os.path.abspath(__file__))
csvfilename = os.path.join(dirname, 'csvfile.csv')
tmpfilename = os.path.join(dirname, 'csvfile.csv.tmp')
file_exists = os.path.isfile(csvfilename)
with open(csvfilename, 'rb') as f_in, open(tmpfilename, 'wb') as f_out:
# discard current header
next(f_in)
# write new header
f_out.write("colname\r\n".encode())
# copy the rest of the file
shutil.copyfileobj(f_in, f_out)
# replace original csv with fixed
shutil.move(tmpfilename, csvfilename)

Read File headers and delimiters using Python

I am reading all the files from a given folder (contains Dir, Sub dir and files of type .csv, .txt ..)
I need to get the following information into an output file in the following format:
FileLocation, FileName, Delimiter, Columns
(All columns needed in a cell separated by delimiter)
I am using the following script which works fine except delimiter. I have tried using csv.sniffer but it does not work.
import sys,os,csv
ofilew = open('D:\OutputFile/Columns_Info.csv', 'w')
ofile = open('D:\OutputFile/Columns_Info.csv', 'a')
root = 'D:\UnZipFiles'
path = os.path.join(root)
columninfo = 'FolderLocation, FileName, Delimiter, Columns' + '\n'
ofilew.write(columninfo)
for r,d,f in os.walk(path):
for file in f:
fullfilepath = os.path.join(r,file)
with open(fullfilepath,'r') as f:
columninfo = f.readline()
columninfo = columninfo.replace(",", ";")
output = file +','+ columninfo
outputfinal = r + ',' + output
ofile.write(outputfinal)
The following approach should work for you, it uses Python's csv.sniffer feature to attempt to determine the correct dialect to use for reading the file. This also contains the delimiter that is used.
import os, csv
header_output = ['FolderLocation', 'FileName', 'Delimiter', 'Columns']
path = r'D:\UnZipFiles'
with open(r'D:\OutputFile\Columns_Info.csv', 'wb') as f_output:
csv_output = csv.writer(f_output)
csv_output.writerow(header_output)
for root, folders, files in os.walk(path):
for file in files:
full_file_path = os.path.join(root, file)
with open(full_file_path, 'rb') as f_input:
try:
dialect = csv.Sniffer().sniff(f_input.read(1024))
f_input.seek(0)
csv_input = csv.reader(f_input, dialect)
header_input = next(csv_input)
csv_output.writerow([root, file, dialect.delimiter] + header_input)
except csv.Error as e:
print "{} - could not determine the delimiter".format(file)
As an alternative to csv.sniffer, you could devise your own, but the Python one is much more powerful than this:
def get_delimiter(file_name):
cols_found = []
for delim in [',', ';', '|', '\t']:
with open(file_name, 'rb') as f_in:
cols_found.append([len(next(csv.reader(f_in, delimiter=delim))), delim])
if cols_found[-1][0] > 1:
return sorted(cols_found)[-1][1]
else:
return None
print get_delimiter('my.csv')
This returns a possible delimiter by counting which delimiter results in the most columns in the first row. If only one column is found, it returns None to indicate no matching delimiter was found. It could instead raise an exception.

Move files listed in csv file?

I have been trying to use the following code to move files that are listed in a csv list. But at most it will copy the last file in the list but not the rest.
I keep hitting this wall with every example I have seen listed what am I doing wrong?
My CVS list will have a list like:
12355,12355.jpg
Here's my code
import os
import shutil
import csv
keys={}
with open('shuttle_image.csv', 'r') as f:
reader = csv.reader(f, delimiter = ',')
for rowDict in reader:
keys[rowDict[0]] = rowDict[1]
print (rowDict)
dir_src = 'C:\\Users\\Willie\\Desktop\\Suppliers Dropship\\hunting\\'
dir_dst = 'C:\\image\\'
for file in os.listdir(dir_src):
src_file = os.path.join(dir_src, file)
dst_file = os.path.join(dir_dst, file)
if file in rowDict[1]:
shutil.move(src_file, dst_file)
I think doing something like this will work (untested):
import os
import shutil
import csv
keys={}
with open('shuttle_image.csv', 'r') as f:
reader = csv.reader(f, delimiter=',')
for rowDict in reader:
keys[rowDict[0]] = rowDict[1]
print(rowDict) # if desired
valid_files = set(keys.values()) # file names found in csv
dir_src = 'C:\\Users\\Willie\\Desktop\\Suppliers Dropship\\hunting\\'
dir_dst = 'C:\\image\\'
for file in os.listdir(dir_src):
if file in valid_files:
src_file = os.path.join(dir_src, file)
dst_file = os.path.join(dir_dst, file)
shutil.move(src_file, dst_file)
As an optimization, unless you need the keys dictionary for other processing, you could change the first part so it just creates the valid_files set variable used in the second for loop:
valid_files = set() # empty set
with open('shuttle_image.csv', 'r') as f:
for rowDict in csv.reader(f, delimiter=','):
valid_files |= {rowDict[1]} # add file name to set
print(rowDict) # if desired
The reason why it's only the last file that could be copied (if it was) is because in this line:
if file in rowDict[1]:
you are referencing rowDict outside of the first for-loop. So at that execution moment, it contains the last value of this loop.
If I understand correctly what you are trying to do you could try something like this (untested code):
import os
import shutil
import csv
dir_src = 'C:\\Users\\Willie\\Desktop\\Suppliers Dropship\\hunting\\'
dir_dst = 'C:\\image\\'
with open('shuttle_image.csv', 'r') as f:
reader = csv.reader(f, delimiter = ',')
for rowDict in reader:
id, filename = rowDict
src_file = os.path.join(dir_src, filename)
if os.path.exists(src_file):
shutil.move(src_file, dir_dst)
So instead of:
Constructing a dictionary with all the values in your CSV file
Somehow check for every file in your source directory that it is included in your dictionary (which is what I interpreted you were trying to do)
And move it if it does.
You could:
For every file extracted from your CSV, check that it exists in your source directory.
If it does, you move it to the destination directory.
Is that what you were trying to do ?
[And if the filename stays the same, you only need to specify the destination directory for the second argument of shutil.move()]

Read many csv file and write it to encoding to utf8 using python

I'm using python code to read from many csv files and set encoding to utf8.I meet the problem when I read the file I can read all lines but when I write it, it can write only 1 line. Please help me to check my code as below:
def convert_files(files, ascii, to="utf-8"):
for name in files:
#print ("Convert {0} from {1} to {2}").format(name, ascii, to)
with open(name) as f:
print(name)
count = 0
lineno = 0
#this point I want to write the below text into my each new file at the first line
#file_source.write('id;nom;prenom;nom_pere;nom_mere;prenom_pere;prenom_mere;civilite (1=homme 2=f);date_naissance;arrondissement;adresse;ville;code_postal;pays;telephone;email;civilite_demandeur (1=homme 2=f);nom_demandeur;prenom_demandeur;qualite_demandeur;type_acte;nombre_actes\n')
for line in f.readlines():
lineno +=1
if lineno == 1 :
continue
file_source = open(name, mode='w', encoding='utf-8', errors='ignore')
#pass
#print (line)
# start write data to to new file with encode
file_source.write(line)
#file_source.close
#print unicode(line, "cp866").encode("utf-8")
csv_files = find_csv_filenames('./csv', ".csv")
convert_files(csv_files, "cp866")
You're reopening the file during every iteration.
for line in f.readlines():
lineno +=1
if lineno == 1 :
continue
#move the following line outside of the for block
file_source = open(name, mode='w', encoding='utf-8', errors='ignore')
If all you need is to change the character encoding of the files then it doesn't matter that they are csv files unless the conversion may change what characters are interpreted as delimiter, quotechar, etc:
def convert(filename, from_encoding, to_encoding):
with open(filename, newline='', encoding=from_encoding) as file:
data = file.read().encode(to_encoding)
with open(filename, 'wb') as outfile:
outfile.write(data)
for path in csv_files:
convert(path, "cp866", "utf-8")
Add errors parameter to change how encoding/decoding errors are handled.
If files may be large then you could convert data incrementally:
import os
from shutil import copyfileobj
from tempfile import NamedTemporaryFile
def convert(filename, from_encoding, to_encoding):
with open(filename, newline='', encoding=from_encoding) as file:
with NamedTemporaryFile('w', encoding=to_encoding, newline='',
dir=os.path.dirname(filename)) as tmpfile:
copyfileobj(file, tmpfile)
tmpfile.delete = False
os.replace(tmpfile.name, filename) # rename tmpfile -> filename
for path in csv_files:
convert(path, "cp866", "utf-8")
You can do this
def convert_files(files, ascii, to="utf-8"):
for name in files:
with open(name, 'r+') as f:
data = ''.join(f.readlines())
data.decode(ascii).encode(to)
f.seek(0)
f.write(data)
f.truncate()

How to output in different directory?

I have this:
from os import path
base_path = "C:\\texts\\*.txt"
for file in files:
with open (file) as in_file, open(path.join(base_path,"%s_tokenized.txt" % file), "w") as out_file:
data = in_file.readlines()
for line in data:
words = line.split()
str1 = ','.join(words)
out_file.write(str1)
out_file.write("\n")
It produced tokenized files in the same directory it reads from. How can I output those out_files in different directory such as "C:\\texts\\Tokenized" ?
I know there are some ways to move those new files to other directory after producing them, but what I wanna know is that if there is anyway to output new files to other directory at the same time they are produced in above code?
Is this what you're looking for:
import os
import glob
source_pattern = 'c:/texts/*.txt'
output_directory = 'c:/texts/tokenized'
# Iterate over files matching source_pattern
for input_file in glob.glob(source_pattern):
# build the output filename
base,ext = os.path.splitext(os.path.basename(input_file))
output_file = os.path.join(output_directory,base + '_tokenized' + ext)
with open(input_file) as in_file, open(output_file,'w') as out_file:
for line in in_file:
out_file.write(','.join(line.split()) + '\n')
This is how I output to files in arbitrary directories :
dir_name = "../some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + '/out.txt'
out_file = open( out_file_name, 'w')
EDIT :
file_name = "{0}_tokenized.txt".format(something_from_tokenizing)
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + file_name
EDIT :
I just tried it, worked for me. You simply need two paths, one for the source directory and one for the destination. Hope this helps.
import os
from os import path
f1 = open("in.txt")
f2 = open("out.txt")
files = ["in.txt", "out.txt"]
base_path = "."
dir_name = "./some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
for file in files:
with open (file) as in_file, open(path.join(dir_name,"%s_tokenized.txt" % file), "w") as out_file:
data = in_file.readlines()
for line in data:
words = line.split()
str1 = ','.join(words)
out_file.write(str1)
out_file.write("\n")

Categories