I'm new to Python and I'm trying to automate some of my work.
I need to create a .wst file (dictation data file) with the same name as it's corresponding .DS2 (dictation file) and then populate the .WST file with the data input (author code, job type).
Perhaps I need to generate a txt then change the extension to .wst? I'm not sure...
Nothing is being created when I run the following, can anyone offer any advice?
import os
print('Dictation Zipper 1.0\n')
print('**Warning** What you set in the following fields will apply to ALL dictations in the current folder, please make any manual adjustments after running the tool.\n')
get_directory = input('Enter the file path where the dictations are stored, please use a NEW folder outwith the Share...\n')
author_id = input('Enter the four digit author id...\n')
jobtype_id = input('Enter the job type...\n')
for f in os.listdir():
file_name, file_ext = os.path.splitext(f) #splitting file name and extension
wst_file = open(file_name + ".wst", "w+") #creating a wst file
wst_file.write("[JobParameters]\nAuthorId=" + author_id + "\nJobtypeId=" + jobtype_id +"\nPriority=NORMAL\nKeyfield=\nUserfield1=\nUserfield2=\nUserfield3=\nUserfield4=\nNotes=\n")
wst_file.close() #closing wst file
You need to give the directory name as an argument to os.listdir(). And you need to prepend the directory name to the filenames when opening the file.
for f in os.listdir(get_directory):
file_name, file_ext = os.path.splitext(f) #splitting file name and extension
path = os.path.join(get_directory, file_name + ".wst")
wst_file = open(path, "w+") #creating a wst file
wst_file.write("[JobParameters]\nAuthorId=" + author_id + "\nJobtypeId=" + jobtype_id +"\nPriority=NORMAL\nKeyfield=\nUserfield1=\nUserfield2=\nUserfield3=\nUserfield4=\nNotes=\n")
wst_file.close() #closing wst file
Related
I have lots of zipped files on a Linux server and each file includes multiple text files.
what I want is to extract some of those text files, which have the same name across zipped files and save it a folder; I am creating one folder for each zipped file and extract the text file to it. I need to add the parent zipped folder name to the end of file names and save all text files in one directory. For example, if the zipped folder was March132017.zip and I extracted holding.txt, my filename would be holding_march13207.txt.
My problem is that I am not able to change the extracted file's name.
I would appreciate if you could advise.
import os
import sys
import zipfile
os.chdir("/feeds/lipper/emaxx")
pwkwd = "/feeds/lipper/emaxx"
for item in os.listdir(pwkwd): # loop through items in dir
if item.endswith(".zip"): # check for ".zip" extension
file_name = os.path.abspath(item) # get full path of files
fh = open(file_name, "rb")
zip_ref = zipfile.ZipFile(fh)
filelist = 'ISSUERS.TXT' , 'SECMAST.TXT' , 'FUND.TXT' , 'HOLDING.TXT'
for name in filelist :
try:
outpath = "/SCRATCH/emaxx" + "/" + os.path.splitext(item)[0]
zip_ref.extract(name, outpath)
except KeyError:
{}
fh.close()
import zipfile
zipdata = zipfile.ZipFile('somefile.zip')
zipinfos = zipdata.infolist()
# iterate through each file
for zipinfo in zipinfos:
# This will do the renaming
zipinfo.filename = do_something_to(zipinfo.filename)
zipdata.extract(zipinfo)
Reference:
https://bitdrop.st0w.com/2010/07/23/python-extracting-a-file-from-a-zip-file-with-a-different-name/
Why not just read the file in question and save it yourself instead of extracting? Something like:
import os
import zipfile
source_dir = "/feeds/lipper/emaxx" # folder with zip files
target_dir = "/SCRATCH/emaxx" # folder to save the extracted files
# Are you sure your files names are capitalized in your zip files?
filelist = ['ISSUERS.TXT', 'SECMAST.TXT', 'FUND.TXT', 'HOLDING.TXT']
for item in os.listdir(source_dir): # loop through items in dir
if item.endswith(".zip"): # check for ".zip" extension
file_path = os.path.join(source_dir, item) # get zip file path
with zipfile.ZipFile(file_path) as zf: # open the zip file
for target_file in filelist: # loop through the list of files to extract
if target_file in zf.namelist(): # check if the file exists in the archive
# generate the desired output name:
target_name = os.path.splitext(target_file)[0] + "_" + os.path.splitext(file_path)[0] + ".txt"
target_path = os.path.join(target_dir, target_name) # output path
with open(target_path, "w") as f: # open the output path for writing
f.write(zf.read(target_file)) # save the contents of the file in it
# next file from the list...
# next zip file...
You could simply run a rename after each file is extracted right? os.rename should do the trick.
zip_ref.extract(name, outpath)
parent_zip = os.path.basename(os.path.dirname(outpath)) + ".zip"
new_file_name = os.path.splitext(os.path.basename(name))[0] # just the filename
new_name_path = os.path.dirname(outpath) + os.sep + new_file_name + "_" + parent_zip
os.rename(outpath, new_namepath)
For the filename, if you want it to be incremental, simply start a count and for each file, go up by on.
count = 0
for file in files:
count += 1
# ... Do our file actions
new_file_name = original_file_name + "_" + str(count)
# ...
Or if you don't care about the end name you could always use something like a uuid.
import uuid
random_name = uuid.uuid4()
outpath = '/SCRATCH/emaxx'
suffix = os.path.splitext(item)[0]
for name in filelist :
index = zip_ref.namelist().find(name)
if index != -1: # check the file exists in the zipfile
filename, ext = os.path.splitext(name)
zip_ref.filelist[index].filename = f'{filename}_{suffix}.{ext}' # rename the extracting file to the suffix file name
zip_ref.extract(zip_ref.filelist[index], outpath) # use the renamed file descriptor to extract the file
I doubt this is possible to rename file during their extraction.
What about renaming files once they are extracted ?
Relying on linux bash, you can achieve it in a one line :
os.system("find "+outpath+" -name '*.txt' -exec echo mv {} `echo {} | sed s/.txt/"+zipName+".txt/` \;")
So, first we search all txt files in the specified folder, then exec the renaming command, with the new name computed by sed.
Code not tested, i'm on windows now ^^'
write a python program to create a .html file in a directory, the directory can be created correctly, use function open to create this .html file and try to write some content in this file,but the .html file can not be created,
def save_public_figure_page(self,type,p_f_name):
glovar.date = time.strftime("%Y%m%d", time.localtime())
p_f_page_file_directory = os.path.join("dataset", "html",type,glovar.date,p_f_name)
data_storage.directory_create(p_f_page_file_directory)
html_user_page = glovar.webdriver_browser.page_source
p_f_page_file = os.path.join(p_f_page_file_directory,type + "_" + p_f_name + ".html")
html_file = open(p_f_page_file, "w", encoding='utf-8')
html_file.write(html_user_page)
html_file.close()
the directory_create function in data_storage is:
#create the file storage directory
def directory_create(path):
directory = os.path.join(os.path.dirname(__file__),path)
if not os.path.exists(directory):
os.makedirs(directory)
it errors:
<class 'FileNotFoundError'> at /public_figure_name_sub
[Errno 2] No such file or directory: 'dataset\\html\\public_figure\\20170404\\Donald Trump \\public_figure_Donald Trump .html'
the current directory is under /dataset/, I found the directory:
F:\MyDocument\F\My Document\Training\Python\PyCharmProject\FaceBookCrawl\dataset\html\public_figure\20170404\Donald Trump
has been created correctly,but the file——public_figure_Donald Trump .html can not be created correctly,could you please tell me the reason and how to correct
As suggested by Jean-François Fabre, your file has a space just before the ".html".
To solve this, use trim() in the variable p_f_name in your 7th line:
# Added trim() to p_f_name
p_f_page_file = os.path.join(p_f_page_file_directory,type +
"_" + p_f_name.trim() + ".html")
This will create the file:
public_figure_Donald Trump.html
instead of
public_figure_Donald Trump .html
PD: Anyway your filename has a lot of spaces between Donald and Trump. I don't know where the file name comes but you might want to fix it.
Function save_public_figure_page
class public_figure:
def save_public_figure_page(self, type, p_f_name):
glovar.date = time.strftime("%Y%m%d", time.localtime())
p_f_name = p_f_name.trim() # Trim the name to get rid of extra spaces
p_f_page_name = '{t}_{pfn}.html'.format(t=type, pfn=p_f_name)
p_f_page_file_directory = os.path.join(
directory, # Add the directory from the data_storage.directory property
"dataset", "html",
type, glovar.date, p_f_name,
)
if data_storage.directory_create(self.p_f_page_file_directory):
html_user_page = glovar.webdriver_browser.page_source
p_f_page_file = os.path.join(p_f_page_file_directory, p_f_page_name)
html_file = open(p_f_page_file, "w", encoding='utf-8')
html_file.write(html_user_page)
html_file.close()
directory_create method of data_storage
#create the file storage directory
class data_storage:
def directory_create(self, path):
self.directory = os.path.join(os.path.dirname(__file__), path)
if not os.path.exists(self.directory):
try:
os.makedirs(self.directory)
except:
raise
else:
return True
else:
return True
Lets say I have n files in a directory with filenames: file_1.txt, file_2.txt, file_3.txt .....file_n.txt. I would like to import them into Python individually and then do some computation on them, and then store the results into n corresponding output files: file_1_o.txt, file_2_o.txt, ....file_n_o.txt.
I've figured out how to import multiple files:
import glob
import numpy as np
path = r'home\...\CurrentDirectory'
allFiles = glob.glob(path + '/*.txt')
for file in allFiles:
# do something to file
...
...
np.savetxt(file, ) ???
Not quite sure how to append the _o.txt (or any string for that matter) after the filename so that the output file is file_1_o.txt
Can you use the following snippet to build the output filename?
parts = in_filename.split(".")
out_filename = parts[0] + "_o." + parts[1]
where I assumed in_filename is of the form "file_1.txt".
Of course would probably be better to put "_o." (the suffix before the extension) in a variable so that you can change at will just in one place and have the possibility to change that suffix more easily.
In your case it means
import glob
import numpy as np
path = r'home\...\CurrentDirectory'
allFiles = glob.glob(path + '/*.txt')
for file in allFiles:
# do something to file
...
parts = file.split(".")
out_filename = parts[0] + "_o." + parts[1]
np.savetxt(out_filename, ) ???
but you need to be careful, since maybe before you pass out_filename to np.savetxt you need to build the full path so you might need to have something like
np.savetxt(os.path.join(path, out_filename), )
or something along those lines.
If you would like to combine the change in basically one line and define your "suffix in a variable" as I mentioned before you could have something like
hh = "_o." # variable suffix
..........
# inside your loop now
for file in allFiles:
out_filename = hh.join(file.split("."))
which uses another way of doing the same thing by using join on the splitted list, as mentioned by #NathanAck in his answer.
import os
#put the path to the files here
filePath = "C:/stack/codes/"
theFiles = os.listdir(filePath)
for file in theFiles:
#add path name before the file
file = filePath + str(file)
fileToRead = open(file, 'r')
fileData = fileToRead.read()
#DO WORK ON SPECIFIC FILE HERE
#access the file through the fileData variable
fileData = fileData + "\nAdd text or do some other operations"
#change the file name to add _o
fileVar = file.split(".")
newFileName = "_o.".join(fileVar)
#write the file with _o added from the modified data in fileVar
fileToWrite = open(newFileName, 'w')
fileToWrite.write(fileData)
#close open files
fileToWrite.close()
fileToRead.close()
Wanted to extract .zip file one by one. Before extracting I need to rename
myzip = zipfile.ZipFile(source,'r')
for zib_e in myzip.namelist():
filename = os.path.basename(zib_e)
if not filename:
continue
print zib_e
myzip.extract(zib_e,"/tmp/")
myzip.close()
The above code extracts all file in /tmp/. But I wanted to rename each file and save in destination directory ie., /tmp/ without zipped structure
After including read function, I can manipulate the file name
def guid1():
uniqueid = uuid.uuid4()
guid = str(uniqueid)
return guid
def zipextract(source,destination):
myzip = zipfile.ZipFile(source,'r')
for zib_e in myzip.namelist():
filename = os.path.basename(zib_e)
if not filename:
continue
print destination
data = myzip.read(zib_e)
output = open(destination+guid1()+".txt",'wb') #exporting to given location one by one
output.write(data)
output.close()
#data.close()
myzip.close()
My goal is to get to a txt file that is withing the second layer of zip files. The issue is that the txt file has the same name in all the .zip, so it overwrites the .txt and it only returns 1 .txt
from ftplib import *
import os, shutil, glob, zipfile, xlsxwriter
ftps = FTP_TLS()
ftps.connect(host='8.8.8.8', port=23)
ftps.login(user='xxxxxxx', passwd='xxxxxxx')
print ftps.getwelcome()
print 'Access was granted'
ftps.prot_p()
ftps.cwd('DirectoryINeed')
data = ftps.nlst() #Returns a list of .zip diles
data.sort() #Sorts the thing out
theFile = data[-2] #Its a .zip file #Stores the .zip i need to retrieve
fileSize = ftps.size(theFile) #gets the size of the file
print fileSize, 'bytes' #prints the size
def grabFile():
filename = 'the.zip'
localfile = open(filename, 'wb')
ftps.retrbinary('RETR ' + theFile, localfile.write)
ftps.quit()
localfile.close()
def unzipping():
zip_files = glob.glob('*.zip')
for zip_file in zip_files:
with zipfile.ZipFile(zip_file, 'r')as Z:
Z.extractall('anotherdirectory')
grabFile()
unzipping()
lastUnzip()
After this runs it grabs the .zip that I need and extracts the contents to a folder named anotherdirectory. Where it holds the second tier of .zips. This is where I get into trouble. When I try to extract the files from each zip. They all share the same name. I end up with a single .txt when I need one for each zip.
I think you're specifying the same output directory and filename each time. In the unzipping function,
change
Z.extractall('anotherdirectory')
to
Z.extractall(zip_file)
or
Z.extractall('anotherdirectory' + zip_file)
if the zip_file's are all the same, give each output folder a unique numbered name:
before unzipping function:
count = 1
then replace the other code with this:
Z.extractall('anotherdirectory/' + str(count))
count += 1
Thanks to jeremydeanlakey's response, I was able to get this part of my script. Here is how I did it:
folderUnzip = 'DirectoryYouNeed'
zip_files = glob.glob('*.zip')
count = 1
for zip_file in zip_files:
with zipfile.ZipFile(zip_file, 'r') as Z:
Z.extractall(folderUnzip + '/' + str(count))
count += 1