Empty files created when deleting the first element in string - python

My code is -
import os
currentdir = "........."
resultdir="............."
for root, dirs, files in os.walk(currentdir):
for name in files:
outfile1 = open(resultdir + "/" + name, "w+")
#outfile1 = open(resultdir + "/" + name, "w+")
print(name)
outfile2 = open(root+"/"+name,'r')
line = outfile2.readline()
while line:
#print(line)
if line[0]!="\"":
print(line)
outfile1.write(line)
outfile1.write("\n")
line = outfile2.readline()
outfile2.close()
outfile1.close()
I am trying to read every file from the directory and in each file I am trying to omit the lines which have a " in the starting and saving these corrected files in a new directory.But I am getting empty files in the new directory.

Related

Searching excel files for string in file through multiple folder directories not working

I have this code where I am trying to search a Directory and Sub Directories for a specified string within .xls and .xlsx files and return the file name for now. When I run this - I get a return of each file directory path as text for the files ending in .xls and .xlsx and the search string parameter I use under those same returned results. The code is not isolating the files with the string - rather, just returning the file path as text for all results and adding my string parameter to search for under that. What could be happening here? and is it possible to pass a list here and copy the discovered files to a folder? That is where I am trying to get with this in the end. Thank you.
import os
import openpyxl
def findFiles(strings, dir, subDirs, fileContent, fileExtensions):
filesInDir = []
foundFiles = []
filesFound = 0
if not subDirs:
for filename in os.listdir(dir):
if os.path.isfile(os.path.join(dir, filename).replace("\\", "/")):
filesInDir.append(os.path.join(dir, filename).replace("\\", "/"))
else:
for root, subdirs, files in os.walk(dir):
for f in files:
if not os.path.isdir(os.path.join(root, f).replace("\\", "/")):
filesInDir.append(os.path.join(root, f).replace("\\", "/"))
print(filesInDir)
if filesInDir:
for file in filesInDir:
print("Current file: "+file)
filename, extension = os.path.splitext(file)
if fileExtensions:
fileText = extension
else:
fileText = os.path.basename(filename).lower()
if fileContent:
fileText += getFileContent(file).lower()
for string in strings:
print(string)
if string in fileText:
foundFiles.append(file)
filesFound += 1
break
return foundFiles
def getFileContent(filename):
if filename.partition(".")[2] in supportedTypes:
if filename.endswith(".xls"):
content = ""
with openpyxl.load_workbook(filename) as pdf:
for x in range(0, len(pdf.pages)):
page = pdf.pages[x]
content = content + page.extract_text()
return content
elif filename.endswith(".xlsx"):
with openpyxl.load_workbook(filename, 'r') as f:
content = ""
lines = f.readlines()
for x in lines:
content = content + x
f.close()
return content
else:
return ""
supportedTypes = [".xls", ".xlsx"]
print(findFiles(strings=["55413354"], dir="C:/Users/User/", subDirs=True, fileContent=True, fileExtensions=False))
Expected output sample - reflects a find for string '55413354` - as in, that string was located in below file name only out of 3 files.
Excel File Name 123
Actual output - Returns everything - no filter is happening, and includes my search string under the file name.
path/Excel File Name 123
55413354
path/Excel File Name 321
55413354
path/Excel File Name 111
55413354

Modify .txt files, save originals and move modified

I modify many .txt files by adding string at the end of them.
How to save the original files and then move and rename modified ones?
I use os library.
i - iterated .txt files
new_name is variable consiting of random numbers
path=os.getcwd()
dirName=('new_directory')
This code only move files:
old_file = os.path.join(path, str(i))
new_path = os.path.join(path, dirName)
new_file = os.path.join(new_path, new_name)
os.rename(old_file, new_file)
I would like to use also os library to move and rename modified files.
Here is my whole code:
import os
import random
pressure=('Added pressure:')
dirName=('new_directory')
path=os.getcwd()
try:
os.mkdir(dirName)
print("Done: "+dirName)
except FileExistsError:
print("Directory" + dirName +" exist")
list=[]
for file in os.listdir("./"):
if file.endswith(".txt"):
lista.append(file)
for i in list:
f = open(i,"r")
file_contents = f.read()
print(f.read())
f.close()
if(pressure in file_contents):
print('It was added!')
else:
file=open(i, 'a')
rand_press=(str(random.randrange(980, 1040, 5))) #
rand_temp=(str(round(random.uniform(18, 26),2)))
press_and_temp=('Added pressure: \t'+rand_press+' mbar\n'+'Added temperature: \t'+rand_temp+' degC\n')
file.write(press_and_temp)
file.close()
new_name=str(rand_temp+'_'+rand_press+'.txt')
old_file = os.path.join(path, str(i))
new_path = os.path.join(path, dirName)
new_file = os.path.join(new_path, new_name)
os.rename(old_file, new_file)
Here are two suggestions.
1) Copy the file to a new location. Modify the second file. https://docs.python.org/3/library/shutil.html
2) Read the file, store it in some variable. Close the file. Create a new file in a new location, write to the file with your modifications, save it. https://www.guru99.com/reading-and-writing-files-in-python.html

Python function within a loop iterating over text files only works on the first file

I'm writing a simple script which loops over some text file and uses a function which should replace some string looking in a .csv file (every row has the word to replace and the word which I want there)
Here is my simple code:
import os
import re
import csv
def substitute_tips(table, tree_content):
count = 0
for l in table:
print("element of the table", l[1])
reg_tree = re.search(l[1],tree_content)
if reg_tree is not None:
#print("match in the tree: ",reg_tree.group())
tree_content = tree_content.replace(reg_tree.group(), l[0])
count = count + 1
else:
print("Not found: ",l[1])
tree_content = tree_content
print("Substitutions done: ",count)
return(tree_content)
path=os.getcwd()
table_name = "162_table.csv"
table = open(table_name)
csv_table = csv.reader(table, delimiter='\t')
for root, dirs, files in os.walk(path, topdown=True):
for name in files:
if name.endswith(".tree"):
print(Fore.GREEN + "Working on treefile", name)
my_tree = open(name, "r")
my_tree_content = my_tree.read()
output_tree = substitute_tips(csv_table, my_tree_content)
output_file = open(name.rstrip("tree") + "SPECIES_NAME.tre", "w")
output_file.write(output_tree)
output_file.close()
else:
print(Fore.YELLOW + name ,Fore.RED + "doesn't end in .tree")
It's probably very easy, but I'm a newbie.
Thanks!
The files list returned by os.walk contains only the file names rather than the full path names. You should join root with the file names instead to be able to open them:
Change:
my_tree = open(name, "r")
...
output_file = open(name.rstrip("tree") + "SPECIES_NAME.tre", "w")
to:
my_tree = open(os.path.join(root, name), "r")
...
output_file = open(os.path.join(root, name.rstrip("tree") + "SPECIES_NAME.tre"), "w")

renaming the extracted file from zipfile

I have lots of zipped files on a Linux server and each file includes multiple text files.
what I want is to extract some of those text files, which have the same name across zipped files and save it a folder; I am creating one folder for each zipped file and extract the text file to it. I need to add the parent zipped folder name to the end of file names and save all text files in one directory. For example, if the zipped folder was March132017.zip and I extracted holding.txt, my filename would be holding_march13207.txt.
My problem is that I am not able to change the extracted file's name.
I would appreciate if you could advise.
import os
import sys
import zipfile
os.chdir("/feeds/lipper/emaxx")
pwkwd = "/feeds/lipper/emaxx"
for item in os.listdir(pwkwd): # loop through items in dir
if item.endswith(".zip"): # check for ".zip" extension
file_name = os.path.abspath(item) # get full path of files
fh = open(file_name, "rb")
zip_ref = zipfile.ZipFile(fh)
filelist = 'ISSUERS.TXT' , 'SECMAST.TXT' , 'FUND.TXT' , 'HOLDING.TXT'
for name in filelist :
try:
outpath = "/SCRATCH/emaxx" + "/" + os.path.splitext(item)[0]
zip_ref.extract(name, outpath)
except KeyError:
{}
fh.close()
import zipfile
zipdata = zipfile.ZipFile('somefile.zip')
zipinfos = zipdata.infolist()
# iterate through each file
for zipinfo in zipinfos:
# This will do the renaming
zipinfo.filename = do_something_to(zipinfo.filename)
zipdata.extract(zipinfo)
Reference:
https://bitdrop.st0w.com/2010/07/23/python-extracting-a-file-from-a-zip-file-with-a-different-name/
Why not just read the file in question and save it yourself instead of extracting? Something like:
import os
import zipfile
source_dir = "/feeds/lipper/emaxx" # folder with zip files
target_dir = "/SCRATCH/emaxx" # folder to save the extracted files
# Are you sure your files names are capitalized in your zip files?
filelist = ['ISSUERS.TXT', 'SECMAST.TXT', 'FUND.TXT', 'HOLDING.TXT']
for item in os.listdir(source_dir): # loop through items in dir
if item.endswith(".zip"): # check for ".zip" extension
file_path = os.path.join(source_dir, item) # get zip file path
with zipfile.ZipFile(file_path) as zf: # open the zip file
for target_file in filelist: # loop through the list of files to extract
if target_file in zf.namelist(): # check if the file exists in the archive
# generate the desired output name:
target_name = os.path.splitext(target_file)[0] + "_" + os.path.splitext(file_path)[0] + ".txt"
target_path = os.path.join(target_dir, target_name) # output path
with open(target_path, "w") as f: # open the output path for writing
f.write(zf.read(target_file)) # save the contents of the file in it
# next file from the list...
# next zip file...
You could simply run a rename after each file is extracted right? os.rename should do the trick.
zip_ref.extract(name, outpath)
parent_zip = os.path.basename(os.path.dirname(outpath)) + ".zip"
new_file_name = os.path.splitext(os.path.basename(name))[0] # just the filename
new_name_path = os.path.dirname(outpath) + os.sep + new_file_name + "_" + parent_zip
os.rename(outpath, new_namepath)
For the filename, if you want it to be incremental, simply start a count and for each file, go up by on.
count = 0
for file in files:
count += 1
# ... Do our file actions
new_file_name = original_file_name + "_" + str(count)
# ...
Or if you don't care about the end name you could always use something like a uuid.
import uuid
random_name = uuid.uuid4()
outpath = '/SCRATCH/emaxx'
suffix = os.path.splitext(item)[0]
for name in filelist :
index = zip_ref.namelist().find(name)
if index != -1: # check the file exists in the zipfile
filename, ext = os.path.splitext(name)
zip_ref.filelist[index].filename = f'{filename}_{suffix}.{ext}' # rename the extracting file to the suffix file name
zip_ref.extract(zip_ref.filelist[index], outpath) # use the renamed file descriptor to extract the file
I doubt this is possible to rename file during their extraction.
What about renaming files once they are extracted ?
Relying on linux bash, you can achieve it in a one line :
os.system("find "+outpath+" -name '*.txt' -exec echo mv {} `echo {} | sed s/.txt/"+zipName+".txt/` \;")
So, first we search all txt files in the specified folder, then exec the renaming command, with the new name computed by sed.
Code not tested, i'm on windows now ^^'

How to save all files, not replacing the file?

I have 100 text files and I want to save it into 100 text files too. Right now, my coding can read all the files but it save only one file, which is the latest result. Here I attached the code.
def nama():
path = "C:/Amar/code/"
infilename = os.listdir(path)
print len(infilename)
for filename in infilename:
print("jumpa dah" + path + "\\"+ filename)
f = open(path + "\\" + filename, "r")
data = f.read()
f.close()
lines = data.split('\n')
outfilename = path + "result.txt"
print outfilename
f = open(outfilename , "a")
Append a string that will act as a unique identifier for each output file. You can use the input filename for this:
outfilename = path + filename + "_result.txt"
# e.g reports_result.txt

Categories