looping until a file gets created in python - python

I am writing a csv file which is named as OUT_FILE. now I can see that file is not immediately created on the disk, so I want to wait until the file gets created.
below is the code to write the csv file:
with open(OUT_FILE, 'a') as outputfile:
with open(INTER_FILE, 'rb') as feed:
writer = csv.writer(outputfile, delimiter=',', quotechar='"')
reader = csv.reader(feed, delimiter=',', quotechar='"')
for row in reader:
reportable_jurisdiction=row[7]
if '|' in reportable_jurisdiction:
row[7]="|".join(sorted(list(row[7].split('|'))))
print " reportable Jurisdiction with comma "+reportable_jurisdiction
else:
print "reportable Jurisdiction if single "+reportable_jurisdiction
writer.writerow(row)
feed.close()
outputfile.close()
Now I have one file called FEED_FILE which actually the input for the OUT_FILE i.e. after wrting the data on OUT_FILE, the size of the OUT_FILE and FEED_FILE should be same.
so for the same I have written the below code :
while True:
try:
print'sleeping for 5 seconds'
time.sleep(5)
outputfileSize=os.path.getsize(OUT_FILE)
if( outputfileSize ==FeedFileSize ):
break
except OSError:
print " file not created "
continue
print " file created !!"
now I don't know if this is executing since there are no errors and even print is not coming in output ?
any help?

You can check if file exist using python's os module?
import os
def wait_for_file(path):
timeout = 300
while timeout:
if os.path.exists(path):
# check if your condition of file size having
# same as feed file size is met
return
timeout = timeout - 5
sleep(5)
raise Exception, "File was not created"

Related

How to export text to a new file, userinput?

So i wrote a little program in python which allows me to take a .csv file, filter out the lines i need and then export these into a new .txt file.
This worked quite well, so i decided to make it more user friendly by allowing the user to select the file that should be converted by himself through the console (command line).
My problem: The file is imported as a .csv file but not exported as a .txt file which leads to my program overwriting the original file which will be emptied because of a step in my program which allows me to delete the first two lines of the output text.
Does anyone know a solution for this?
Thanks :)
import csv
import sys
userinput = raw_input('List:')
saveFile = open(userinput, 'w')
with open(userinput, 'r') as file:
reader = csv.reader(file)
count = 0
for row in reader:
print(row[2])
saveFile.write(row[2] + ' ""\n')
saveFile.close()
saveFile = open(userinput, 'r')
data_list = saveFile.readlines()
saveFile.close()
del data_list[1:2]
saveFile = open(userinput, 'w')
saveFile.writelines(data_list)
saveFile.close()
Try This:
userinput = raw_input('List:')
f_extns = userinput.split(".")
saveFile = open(f_extns[0]+'.txt', 'w')
I think you probably just want to save the file with a new name, this Extracting extension from filename in Python talks about splitting out the extension so then you can just add your own extension
you would end up with something like
name, ext = os.path.splitext(userinput)
saveFile = open(name + '.txt', 'w')
You probably just need to change the extension of the output file. Here is a solution that sets the output file extension to .txt; if the input file is also .txt then there will be a problem, but for all other extensions of the input file this should work.
import csv
import os
file_name = input('Name of file:')
# https://docs.python.org/3/library/os.path.html#os.path.splitext
# https://stackoverflow.com/questions/541390/extracting-extension-from-filename-in-python
file_name, file_ext_r = os.path.splitext(file_name)
file_ext_w = '.txt'
file_name_r = ''.format(file_name, file_ext_r)
file_name_w = ''.format(file_name, file_ext_w)
print('File to read:', file_name_r)
print('File to write:', file_name_w)
with open(file_name_r, 'r') as fr, open(file_name_w, 'w') as fw:
reader = csv.reader(fr)
for i, row in enumerate(reader):
print(row[2])
if i >= 2:
fw.write(row[2] + ' ""\n')
I also simplified your logic to avoid writting the first 2 lines to the output file; no need to read and write the output file again.
Does this work for you?

Trying to read files named file1,file2,file3 using for loop in Python

I am pretty new to python and trying to run a script to edit csv files. The problem I am facing is that I need to split the csv files into smaller pieces(as they are large files and getting memory errors) and then run another script to edit the files but when im trying to append these two scripts and run the test, the script is reading only the first small file and not reading the rest of the files.
For example: When I split the main csv file, the files are getting split and the names come as big-1.csv,big-2.csv. Then when the script is picking up the files to edit, only big-1.csv is getting edited and rest are not getting edited.
The script is:
import csv
from csv import DictWriter
divisor = 990
outfileno = 1
outfile = None
with open('MOCK_DATA.csv', 'r', newline='') as infile:
infile_iter = csv.reader(infile, delimiter='\t')
header = next(infile_iter)
for index, row in enumerate(infile_iter):
if index % divisor == 0:
if outfile:
outfile.close()
outfilename = 'big-{}.csv'.format(outfileno)
outfile = open(outfilename, 'w', newline='')
outfileno += 1
writer = csv.writer(outfile, delimiter='\t', quoting=csv.QUOTE_NONE)
writer.writerow(header)
writer.writerow(row)
# Don't forget to close the last file
if outfile:
outfile.close()
#export the data
# with correct quoting, and that you are stuck with what you have.
for i in range(1,2):
with open("big-" + str(i) + ".csv") as people_file:
next(people_file)
corrected_people = []
for person_line in people_file:
chomped_person_line = person_line.rstrip()
person_tokens = chomped_person_line.split(",")
# check that each field has the expected type
try:
corrected_person = {
"id": person_tokens[0],
"first_name":person_tokens[1],
"last_name": "".join(person_tokens[2:-3]),
"email":person_tokens[-3],
"gender":person_tokens[-2],
"ip_address":person_tokens[-1]
}
if not corrected_person["ip_address"].startswith(
"") and corrected_person["ip_address"] !="n/a":
raise ValueError
corrected_people.append(corrected_person)
except (IndexError, ValueError):
# print the ignored lines, so manual correction can be performed later.
print("Could not parse line: " + chomped_person_line)
with open("fix-" + str(i) + ".csv", "w") as corrected_people_file:
writer = DictWriter(
corrected_people_file,
fieldnames=[
"id","first_name","last_name","email","gender","ip_address"
],delimiter=',')
writer.writeheader()
writer.writerows(corrected_people)
I think this maybe an issue with reading the smaller files in the for loop. The script is running without any error. Please help.

A csv file appears empty while it is not

I'm creating a program that will write information from a vote into a new file for results, and I need to check if the file is empty before writing in all the available voting options. (Otherwise the data would overwrite the already entered in results each time a person voted). My problem is, it's not returning anything when I run the piece of code. Here it is:
def VOTE1():
Data = ("VOTE")
voteChoice = OptionAmount.get()
with open('EXISTINGVOTE.CSV') as infile:
reader = csv.DictReader(infile)
for row in reader:
if voteChoice == row['TITLE NAME']:
f = open('{}.csv'.format(row['TITLE NAME']), "w")
f.close()
with open('{}.csv'.format(row['TITLE NAME']), "r") as f:
VoteRead = csv.reader(f, delimiter=",")
for row in VoteRead:
for field in row:
if field == Data:
print("Empty file")
else:
print("There is data in here")
else:
print("Not this one")
I should be getting a statement saying "Empty file" or "There is data in here"
Currently you destroy the file contents by opening the file for writing …
f = open('{}.csv'.format(row['TITLE NAME']), "w")
… and then you read the now empty new file.
open('{}.csv'.format(row['TITLE NAME']), "r") as f
What you probably want to do is to open the file to append data:
if voteChoice == row['TITLE NAME']:
# two lines deleted
with open('{}.csv'.format(row['TITLE NAME']), "a") as f:

spawn multiple processes to write different files Python

The idea is to write N files using N processes.
The data for the file to be written are coming from multiple files which are stored on a dictionary that has a list as a value and it looks like this:
dic = {'file1':['data11.txt', 'data12.txt', ..., 'data1M.txt'],
'file2':['data21.txt', 'data22.txt', ..., 'data2M.txt'],
...
'fileN':['dataN1.txt', 'dataN2.txt', ..., 'dataNM.txt']}
so file1 is data11 + data12 + ... + data1M etc...
So my code looks like this:
jobs = []
for d in dic:
outfile = str(d)+"_merged.txt"
with open(outfile, 'w') as out:
p = multiprocessing.Process(target = merger.merger, args=(dic[d], name, out))
jobs.append(p)
p.start()
out.close()
and the merger.py looks like this:
def merger(files, name, outfile):
time.sleep(2)
sys.stdout.write("Merging %n...\n" % name)
# the reason for this step is that all the different files have a header
# but I only need the header from the first file.
with open(files[0], 'r') as infile:
for line in infile:
print "writing to outfile: ", name, line
outfile.write(line)
for f in files[1:]:
with open(f, 'r') as infile:
next(infile) # skip first line
for line in infile:
outfile.write(line)
sys.stdout.write("Done with: %s\n" % name)
I do see the file written on the folder it should go to, but it's empty. no header, no nothing. I had put prints in there to see if everything is correct but nothing works.
Help!
Since the worker processes run in parallel to the main process creating them, the files named out get closed before the workers can write to them. This will happen even if you remove out.close() because of the with statement. Rather pass each process the filename and let the process open and close the file.
The problem is that you don't close the file in the child so internally buffered data is lost. You could move the file open to the child or wrap the whole thing in a try/finally block to make sure the file closes. A potential advantage of opening in the parent is that you can handle file errors there. I'm not saying its compelling, just an option.
def merger(files, name, outfile):
try:
time.sleep(2)
sys.stdout.write("Merging %n...\n" % name)
# the reason for this step is that all the different files have a header
# but I only need the header from the first file.
with open(files[0], 'r') as infile:
for line in infile:
print "writing to outfile: ", name, line
outfile.write(line)
for f in files[1:]:
with open(f, 'r') as infile:
next(infile) # skip first line
for line in infile:
outfile.write(line)
sys.stdout.write("Done with: %s\n" % name)
finally:
outfile.close()
UPDATE
There has been some confusion about parent/child file decriptors and what happens to files in the child. The underlying C library does not flush data to disk if a file is still open when the program exits. The theory is that a properly running program closes things before exit. Here is an example where the child loses data because it does not close the file.
import multiprocessing as mp
import os
import time
if os.path.exists('mytestfile.txt'):
os.remove('mytestfile.txt')
def worker(f, do_close=False):
time.sleep(2)
print('writing')
f.write("this is data")
if do_close:
print("closing")
f.close()
print('without close')
f = open('mytestfile.txt', 'w')
p = mp.Process(target=worker, args=(f, False))
p.start()
f.close()
p.join()
print('file data:', open('mytestfile.txt').read())
print('with close')
os.remove('mytestfile.txt')
f = open('mytestfile.txt', 'w')
p = mp.Process(target=worker, args=(f, True))
p.start()
f.close()
p.join()
print('file data:', open('mytestfile.txt').read())
I run it on linux and I get
without close
writing
file data:
with close
writing
closing
file data: this is data

Search, count and add - Python

properties = ["color", "font-size", "font-family", "width", "height"]
inPath = "style.css"
outPath = "output.txt"
#Open a file for reading
file = open(inPath, 'rU')
if file:
# read from the file
filecontents = file.read()
file.close()
else:
print "Error Opening File."
#Open a file for writing
file = open(outPath, 'wb')
if file:
for i in properties:
search = i
index = filecontents.find(search)
file.write(str(index), "\n")
file.close()
else:
print "Error Opening File."
seems to work, but:
It only searches a keyword once?
Its not writing to the output file. function takes exactly 1 argument
I don't want it to print the index actually, but the number of time the keyword appears.
Many thanks
First, you want .count(search), not .find(search), if what you're looking for is # of occurrences.
Second, .write() only takes a single parameter - if you want to write a newline, you need to concatenate it first, or call .write() twice.
Third, doing for i in properties: search = i is redundant; just use the name you want in your for loop.
for search in properties:
cnt = filecontents.count(search)
file.write(str(cnt) + "\n")
from itertools import imap
properties = ("color", "font-size", "font-family", "width", "height")
inPath = "style.css"
outPath = "output.txt"
try:
#Open a file for reading
filecontents = file(inPath).read()
except Exception as exc:
print exc
else:
#Open a file for writing
with open(outPath, 'wb') as out_file:
#for property in properties:
# out_string = "%s %s\n"
# out_file.write( out_string % (
# property, filecontents.count(property)))
outfile.write('\n'.join(
imap(str, imap(filecontents.count, properties))))

Categories