Overwrite File or Append - python

Need a single function that will write a file if one does not exist.
Overwrite the file if it does exist, but saves the original or incriments the new file by 1.
Naming format is yyyymmdd, so if existing it would create a new file called yyymmdd-v2 or something like that.
This is what I have currently.
def write_diff_file(x):
from datetime import datetime
datestring = datetime.strftime(datetime.now(), '%Y_%m_%d')
try:
with open("./%s" % 'filediff_' + datestring + '.txt', 'a') as f:
line = str(x).replace("archive\\", "")
f.write(line)
f.write("\n")
f.name
#print "Comparison File Written"
except IOError as (errno, strerror):
print "I/O error({0}): {1}".format(errno, strerror)
print "Error in write_diff_file function"

You want to check whether the file exists and adapt the filename if it already does. Something like this should work:
import os
from datetime import datetime
datestring = datetime.strftime(datetime.now(), '%Y_%m_%d')
filename = 'filediff_' + datestring + '.txt'
filenb = 1
while os.path.exists(filename):
filenb += 1
filename = 'filediff_{0}_v{1}.txt'.format(datestring, filenb)
with open(filename, 'w') as f:
....

Related

Downloading the most recent file from FTP with Python

I have been trying to write a function with Python that would allow to download the most recently added file (by timestamp within filename).
You can see that the format has a big timestamp.
What I have so far with the help of forums is the following code.
In the following code, I tried to sort using the date field (real added date to FTP server). However,
I want to adjust this code so that I can sort the files by the timestamp within filename.
EDIT (Tried to clean the code a bit):
def DownloadFileFromFTPServer2 (server, username, password, directory_to_file, file_to_write):
try:
f = ftplib.FTP(server)
except ((socket.error, socket.gaierror), e):
print ('cannot reach to %s' % server)
return
print ("Connected to FTP server")
try:
f.login(username, password)
except ftplib.error_perm:
print ("cannot login anonymously")
f.quit()
return
print ("Logged on to the FTP server")
try:
f.cwd(directory_to_file)
print ("Directory has been set")
except Exception as inst:
print (inst)
data = []
f.dir(data.append)
datelist = []
filelist =[]
for line in data:
print (line)
col = line.split()
datestr = ' '.join(line.split()[5:8])
date = time.strptime (datestr, '%b %d %H:%M')
datelist.append(date)
filelist.append(col[8])
combo = zip (datelist, filelist)
who = dict ( combo )
# Sort by dates and get the latest file by date....
for key in sorted(iter(who.keys()), reverse = True):
filename = who[key]
print ("File to download is %s" % filename)
try:
f.retrbinary('RETR %s' % filename, open(filename, 'wb').write)
except (ftplib.err_perm):
print ("Error: cannot read file %s" % filename)
os.unlink(filename)
else:
print ("***Downloaded*** %s " % filename)
print ("Retrieving FTP server data ......... DONE")
#VERY IMPORTANT RETURN
return
f.quit()
return 1
Any help is greately appreciated. Thanks.
EDIT [SOLVED]:
The line
date = time.strptime (datestr, '%b %d %H:%M')
should be replaced with:
try:
date = datetime.datetime.strptime (str(col[8]), 'S01375T-%Y-%m-%d-%H-%M-%S.csv')
except Exception as inst:
continue
try-continue is important since the first two path lines such as '.' and '..' will result a ValuError.
Once you have the list of filenames you can simply sort on filename, since the naming convention is S01375T-YYYY-MM-DD-hh-mm.csv this will naturally sort into date/time order. Note that if the S01375T- part varies you could sort on the name split at a fixed position or at the first -.
If this was not the case you could use the datetime.datetime.strptime method to parse the filenames into datetime instances.
Of course if you wished to really simplify things you could use the PyFileSystem FTPFS and it's various methods to allow you to treat the FTP system as if is was a slow local file system.
Try with the -t option in ftp.dir, this orders by date in reverse, then take the first in the list:
data = []
ftp.dir('-t',data.append)
filename = data[0]
You need to extract the timestamp from the filename properly.
You could split the filename at the first '-' and remove the file extensition '.csv' (f.split('-', 1)[1][:-4]).
Then you just need to construct the datetime obj for sorting.
from datetime import datetime
def sortByTimeStampInFile(fList):
fileDict = {datetime.strptime(f.split('-', 1)[1][:-4], '%Y-%m-%d-%H-%M-%S'): f for f in fList if f.endswith('.csv')}
return [fileDict[k] for k in sorted(fileDict.keys())]
files = ['S01375T-2016-03-01-12-00-00.csv', 'S01375T-2016-01-01-13-00-00.csv', 'S01375T-2016-04-01-13-01-00.csv']
print(sortByTimeStampInFile(files))
Returns:
['S01375T-2016-01-01-13-00-00.csv', 'S01375T-2016-03-01-12-00-00.csv', 'S01375T-2016-04-01-13-01-00.csv']
Btw. as long as your time format is 'year-month-day-hour-min-sec', a simple string sort would do it:
sorted([f.split('-', 1)[1][:-4] for f in fList if f.endswith('.csv')])
>>> ['2016-01-01-13-00-00', '2016-03-01-12-00-00', '2016-04-01-13-01-00']

Python Writing to txt error

Im trying to write different things onto a text file in a while loop but it only writes it once. I want to write something to unmigrated.txt
import urllib.request
import json
Txtfile = input("Name of the TXT file: ")
fw = open(Txtfile + ".txt", "r")
red = fw.read()
blue = red.split("\n")
i=0
while i<len(blue):
try:
url = "https://api.mojang.com/users/profiles/minecraft/" + blue[i]
rawdata = urllib.request.urlopen(url)
newrawdata = rawdata.read()
jsondata = json.loads(newrawdata.decode('utf-8'))
results = jsondata['id']
url_uuid = "https://sessionserver.mojang.com/session/minecraft/profile/" + results
rawdata_uuid = urllib.request.urlopen(url_uuid)
newrawdata_uuid = rawdata_uuid.read()
jsondata_uuid = json.loads(newrawdata_uuid.decode('utf-8'))
try:
results = jsondata_uuid['legacy']
print (blue[i] + " is " + "Unmigrated")
wf = open("unmigrated.txt", "w")
wring = wf.write(blue[i] + " is " + "Unmigrated\n")
except:
print(blue[i] + " is " + "Migrated")
except:
print(blue[i] + " is " + "Not-Premium")
i+=1
You keep overwriting opening the file with w inside the loop so you only see the last data that was written to the file, either open the file once outside the loop or open with a to append. Opening once would be the simplest approach, you can also use range instead of your while or better again just iterate over the list:
with open("unmigrated.txt", "w") as f: # with close your file automatically
for ele in blue:
.....
Also wring = wf.write(blue[i] + " is " + "Unmigrated\n") sets wring to None which is what write returns so probably not of any real use.
Lastly using a blank expect is usually never a good idea, catch the specific exceptions you expect and log or at least print when you get an error.
Using the requests library, I would break up your code doing something like:
import requests
def get_json(url):
try:
rawdata = requests.get(url)
return rawdata.json()
except requests.exceptions.RequestException as e:
print(e)
except ValueError as e:
print(e)
return {}
txt_file = input("Name of the TXT file: ")
with open(txt_file + ".txt") as fw, open("unmigrated.txt", "w") as f: # with close your file automatically
for line in map(str.rstrip, fw): # remove newlines
url = "https://api.mojang.com/users/profiles/minecraft/{}".format(line)
results = get_json(url).get("id")
if not results:
continue
url_uuid = "https://sessionserver.mojang.com/session/minecraft/profile/{}".format(results)
results = get_json(url_uuid).get('legacy')
print("{} is Unmigrated".format(line))
f.write("{} is Unmigrated\n".format(line))
I am not sure where 'legacy' fits into the code, that logic I will leave to you. You can also iterate directly over the file object so you can forget about splitting the lines into blue.
try:
with open("filename", "w") as f:
f.write("your content")
But that will overwrite all contents of the file.
Instead, if you want to append to the file use:
with open("filename", "a") as f:
If you choose to not use the with syntax, remember to close the file.
Read more here:
https://docs.python.org/2/library/functions.html#open

cannot open file named directory path + filename + current date and time

I created a txt file named as directory path+current date and time. The following error occurs:
File cannot be opened. coercing to Unicode: need string or buffer,
NoneType found
def create_file(count):
filename = "countMetrics"
dir = os.getcwd()
#print 'Current directory path is-'
#print dirPath
date = datetime.datetime.now()
now = date.strftime("%Y-%m-%d %H:%M")
#print 'current date and time is-'
#print now
## date and time representation
#print "Current date & time " + time.strftime("%c")
dirPath = os.path.join(dir, filename)
filenameCreated = dirPath+now+".txt"
#filenameCreated = dirPath+filename+now+".txt"
print filenameCreated
f = openfile(filenameCreated,'a')
return f
#writeFile(f,count)
#defining openfunction
def openfile(filename,mode):
try:
open(filename,mode)
except Exception, err:
print("File cannot be opened.")
print(str(err))
return
def readFile(filename):
try:
target = open(filename,'r')
content=filename.read() # reading contents of file
for line in target:
print content
target.close()
except:
print "File is empty.."
return
#defining write function
def writeFile(filename,count):
try:
target = openfile(filename,'a')
target.write(count)
target.close()
except Exception, err:
print("File have no data to be written.")
print(str(err))
return
Your openfile function is not returning anything. Change it to return the open file descriptor and your code might work :-)
def openfile(filename, mode):
try:
return open(filename, mode)
except Exception as err:
print "File cannot be created", err
return
And add an if in the main code to check whether you receive file descriptor.
f = openfile(filenameCreated,'a')
if not f:
print "No file created"
return
return f
And your writeFile function will be like this:
def writeFile(target, count):
try:
target.write(count)
target.close()
return 1
except Exception as err:
print "Cannot write into the file"
return 0
Because your openfile itself returns a descriptor. You don't need to create another one.

Search, count and add - Python

properties = ["color", "font-size", "font-family", "width", "height"]
inPath = "style.css"
outPath = "output.txt"
#Open a file for reading
file = open(inPath, 'rU')
if file:
# read from the file
filecontents = file.read()
file.close()
else:
print "Error Opening File."
#Open a file for writing
file = open(outPath, 'wb')
if file:
for i in properties:
search = i
index = filecontents.find(search)
file.write(str(index), "\n")
file.close()
else:
print "Error Opening File."
seems to work, but:
It only searches a keyword once?
Its not writing to the output file. function takes exactly 1 argument
I don't want it to print the index actually, but the number of time the keyword appears.
Many thanks
First, you want .count(search), not .find(search), if what you're looking for is # of occurrences.
Second, .write() only takes a single parameter - if you want to write a newline, you need to concatenate it first, or call .write() twice.
Third, doing for i in properties: search = i is redundant; just use the name you want in your for loop.
for search in properties:
cnt = filecontents.count(search)
file.write(str(cnt) + "\n")
from itertools import imap
properties = ("color", "font-size", "font-family", "width", "height")
inPath = "style.css"
outPath = "output.txt"
try:
#Open a file for reading
filecontents = file(inPath).read()
except Exception as exc:
print exc
else:
#Open a file for writing
with open(outPath, 'wb') as out_file:
#for property in properties:
# out_string = "%s %s\n"
# out_file.write( out_string % (
# property, filecontents.count(property)))
outfile.write('\n'.join(
imap(str, imap(filecontents.count, properties))))

How to convert a file to utf-8 in Python?

I need to convert a bunch of files to utf-8 in Python, and I have trouble with the "converting the file" part.
I'd like to do the equivalent of:
iconv -t utf-8 $file > converted/$file # this is shell code
Thanks!
You can use the codecs module, like this:
import codecs
BLOCKSIZE = 1048576 # or some other, desired size in bytes
with codecs.open(sourceFileName, "r", "your-source-encoding") as sourceFile:
with codecs.open(targetFileName, "w", "utf-8") as targetFile:
while True:
contents = sourceFile.read(BLOCKSIZE)
if not contents:
break
targetFile.write(contents)
EDIT: added BLOCKSIZE parameter to control file chunk size.
This worked for me in a small test:
sourceEncoding = "iso-8859-1"
targetEncoding = "utf-8"
source = open("source")
target = open("target", "w")
target.write(unicode(source.read(), sourceEncoding).encode(targetEncoding))
Thanks for the replies, it works!
And since the source files are in mixed formats, I added a list of source formats to be tried in sequence (sourceFormats), and on UnicodeDecodeError I try the next format:
from __future__ import with_statement
import os
import sys
import codecs
from chardet.universaldetector import UniversalDetector
targetFormat = 'utf-8'
outputDir = 'converted'
detector = UniversalDetector()
def get_encoding_type(current_file):
detector.reset()
for line in file(current_file):
detector.feed(line)
if detector.done: break
detector.close()
return detector.result['encoding']
def convertFileBestGuess(filename):
sourceFormats = ['ascii', 'iso-8859-1']
for format in sourceFormats:
try:
with codecs.open(fileName, 'rU', format) as sourceFile:
writeConversion(sourceFile)
print('Done.')
return
except UnicodeDecodeError:
pass
def convertFileWithDetection(fileName):
print("Converting '" + fileName + "'...")
format=get_encoding_type(fileName)
try:
with codecs.open(fileName, 'rU', format) as sourceFile:
writeConversion(sourceFile)
print('Done.')
return
except UnicodeDecodeError:
pass
print("Error: failed to convert '" + fileName + "'.")
def writeConversion(file):
with codecs.open(outputDir + '/' + fileName, 'w', targetFormat) as targetFile:
for line in file:
targetFile.write(line)
# Off topic: get the file list and call convertFile on each file
# ...
(EDIT by Rudro Badhon: this incorporates the original try multiple formats until you don't get an exception as well as an alternate approach that uses chardet.universaldetector)
Answer for unknown source encoding type
based on #Sébastien RoccaSerra
python3.6
import os
from chardet import detect
# get file encoding type
def get_encoding_type(file):
with open(file, 'rb') as f:
rawdata = f.read()
return detect(rawdata)['encoding']
from_codec = get_encoding_type(srcfile)
# add try: except block for reliability
try:
with open(srcfile, 'r', encoding=from_codec) as f, open(trgfile, 'w', encoding='utf-8') as e:
text = f.read() # for small files, for big use chunks
e.write(text)
os.remove(srcfile) # remove old encoding file
os.rename(trgfile, srcfile) # rename new encoding
except UnicodeDecodeError:
print('Decode Error')
except UnicodeEncodeError:
print('Encode Error')
You can use this one liner (assuming you want to convert from utf16 to utf8)
python -c "from pathlib import Path; path = Path('yourfile.txt') ; path.write_text(path.read_text(encoding='utf16'), encoding='utf8')"
Where yourfile.txt is a path to your $file.
For this to work you need python 3.4 or newer (probably nowadays you do).
Below a more readable version of the code above
from pathlib import Path
path = Path("yourfile.txt")
path.write_text(path.read_text(encoding="utf16"), encoding="utf8")
This is a Python3 function for converting any text file into the one with UTF-8 encoding. (without using unnecessary packages)
def correctSubtitleEncoding(filename, newFilename, encoding_from, encoding_to='UTF-8'):
with open(filename, 'r', encoding=encoding_from) as fr:
with open(newFilename, 'w', encoding=encoding_to) as fw:
for line in fr:
fw.write(line[:-1]+'\r\n')
You can use it easily in a loop to convert a list of files.
To guess what's the source encoding you can use the file *nix command.
Example:
$ file --mime jumper.xml
jumper.xml: application/xml; charset=utf-8
This is my brute force method. It also takes care of mingled \n and \r\n in the input.
# open the CSV file
inputfile = open(filelocation, 'rb')
outputfile = open(outputfilelocation, 'w', encoding='utf-8')
for line in inputfile:
if line[-2:] == b'\r\n' or line[-2:] == b'\n\r':
output = line[:-2].decode('utf-8', 'replace') + '\n'
elif line[-1:] == b'\r' or line[-1:] == b'\n':
output = line[:-1].decode('utf-8', 'replace') + '\n'
else:
output = line.decode('utf-8', 'replace') + '\n'
outputfile.write(output)
outputfile.close()
except BaseException as error:
cfg.log(self.outf, "Error(18): opening CSV-file " + filelocation + " failed: " + str(error))
self.loadedwitherrors = 1
return ([])
try:
# open the CSV-file of this source table
csvreader = csv.reader(open(outputfilelocation, "rU"), delimiter=delimitervalue, quoting=quotevalue, dialect=csv.excel_tab)
except BaseException as error:
cfg.log(self.outf, "Error(19): reading CSV-file " + filelocation + " failed: " + str(error))
convert all file in a dir to utf-8 encode. it is recursive and can filter file by suffix. thanks #Sole Sensei
# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple chardet
import os
import re
from chardet import detect
def get_file_list(d):
result = []
for root, dirs, files in os.walk(d):
dirs[:] = [d for d in dirs if d not in ['venv', 'cmake-build-debug']]
for filename in files:
# your filter
if re.search(r'(\.c|\.cpp|\.h|\.txt)$', filename):
result.append(os.path.join(root, filename))
return result
# get file encoding type
def get_encoding_type(file):
with open(file, 'rb') as f:
raw_data = f.read()
return detect(raw_data)['encoding']
if __name__ == "__main__":
file_list = get_file_list('.')
for src_file in file_list:
print(src_file)
trg_file = src_file + '.swp'
from_codec = get_encoding_type(src_file)
try:
with open(src_file, 'r', encoding=from_codec) as f, open(trg_file, 'w', encoding='utf-8') as e:
text = f.read()
e.write(text)
os.remove(src_file)
os.rename(trg_file, src_file)
except UnicodeDecodeError:
print('Decode Error')
except UnicodeEncodeError:
print('Encode Error')

Categories