Cannot merge various text files with similar names into one file - python

I have an issue, where I iterate through the folder and would like to merge files containing particular name. I have files like 1a_USR02.txt, 1b_USR02.txt and 1a_USR06, 1b_USR06. However when I use the following code, the final file FINAL_USR02 or FINAL_USR06 contains only the second file (1b_USR02 or 1b_UR06). Any ideas?
import os
import shutil
cwd = os.getcwd()
directory = (cwd + '\\FINAL' + '\\')
delheadfiles = ['UST04', 'USR02', 'USR06','1251', 'AGRS', 'TEXTS',\
'USERS', 'FLAGS', 'DEVACCESS', 'USERNAME', 'TSTC', 'TSTCT']
for delheadfile in delheadfiles:
for file in os.listdir(directory):
if file.endswith(delheadfile + ".txt"):
table04 = (directory + 'FINAL_' + delheadfile + '.txt')
with open(directory + file, 'rb') as readfile:
if file.endswith(delheadfile + ".txt"):
with open(table04, 'wb') as outfile:
shutil.copyfileobj(readfile, outfile)

Try this:
import os
files_extensions = ['UST04', 'USR02', 'USR06']
folder_files = os.listdir()
for extension in files_extensions:
with open('FINAL_' + extension + '.txt', 'a+') as out_file:
for item in folder_files:
if item.endswith(extension + '.txt'):
data = open(item, 'r').read()
out_file.write(data)
out_file.close()

Related

How to get file creation dates for every files of a repertory ? (python)

I am writing basic information about files in a csv.
For now i have file names and path for each file of my repertory.
I want the file creation date for each element of the repertory but I can get it for only one (i don't need timestamp format) :
import os
import csv
creadate = os.path.getctime(filename)
with open('test.csv', 'wb') as f:
writer = csv.writer(f)
for path, dirs, files in os.walk(repertory):
for filename in files:
writer.writerow([filename + ';' + path + ';' + str(creadate)])
So I have obviously the same date all over the third row, and I can't figure out how to get it for all the files.
Thanks !
Put the date-getting logic in the loop, and use datetime to make it human readable.
import os
import csv
import datetime
repertory = '.'
with open('test.csv', 'wb') as f:
writer = csv.writer(f)
for path, dirs, files in os.walk(repertory):
for filename in files:
create_date = os.path.getctime(os.path.join(path, filename))
create_date = datetime.datetime.fromtimestamp(create_date)
writer.writerow([filename + ';' + path + ';' + str(create_date)])

How to save all files, not replacing the file?

I have 100 text files and I want to save it into 100 text files too. Right now, my coding can read all the files but it save only one file, which is the latest result. Here I attached the code.
def nama():
path = "C:/Amar/code/"
infilename = os.listdir(path)
print len(infilename)
for filename in infilename:
print("jumpa dah" + path + "\\"+ filename)
f = open(path + "\\" + filename, "r")
data = f.read()
f.close()
lines = data.split('\n')
outfilename = path + "result.txt"
print outfilename
f = open(outfilename , "a")
Append a string that will act as a unique identifier for each output file. You can use the input filename for this:
outfilename = path + filename + "_result.txt"
# e.g reports_result.txt

How to rename file names using text file in python

I need to rename a bunch of files in a folder with new name reference from a text file. Can you please give an example for this.
My New Names In a Text file:
1BA
1BB
1BC
1BD
1BE
1BF
1C0
1C1
1C2
1C3
Like this.
Updated Code:
import csv
import os
with open('names.txt') as f2:
filedata = f2.read().split(",")
os.rename(filedata[0].strip(), filedata[1].strip())
f2.close()
f2 = open ('Lines.txt','w')
f2.write(filedata)
f2.close()
What about using a CSV (comma separated) file for input in the format oldPath, newPath and do the following:
import csv
import os
with open('names.csv') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
oldPath = row[0]
newPath = row[1]
os.rename(oldPath, newPath)
Alternatively, if you want to move the file to another directory/filesystem you can have a look at shutil.move
# Create old.txt and rename.txt
# Do not include file path inside the txt files
# For each line, write a filename include the extension
from pathlib import Path
import os
import sys
print("enter path of folder")
path = input()
oldList = []
with open(path + "\\old.txt", "r", encoding="utf8", errors='ignore') as readtxt:
for f in readtxt:
fname = f.rstrip()
oldList.append(fname)
# i is index of oldList
i = 0
newList = []
with open(path + "\\rename.txt", "r", encoding="utf8", errors='ignore') as readtxt:
for f in readtxt:
newName = f.rstrip()
os.rename(path + "\\" + oldList[i], path + "\\" + newName)
i += 1

How to output in different directory?

I have this:
from os import path
base_path = "C:\\texts\\*.txt"
for file in files:
with open (file) as in_file, open(path.join(base_path,"%s_tokenized.txt" % file), "w") as out_file:
data = in_file.readlines()
for line in data:
words = line.split()
str1 = ','.join(words)
out_file.write(str1)
out_file.write("\n")
It produced tokenized files in the same directory it reads from. How can I output those out_files in different directory such as "C:\\texts\\Tokenized" ?
I know there are some ways to move those new files to other directory after producing them, but what I wanna know is that if there is anyway to output new files to other directory at the same time they are produced in above code?
Is this what you're looking for:
import os
import glob
source_pattern = 'c:/texts/*.txt'
output_directory = 'c:/texts/tokenized'
# Iterate over files matching source_pattern
for input_file in glob.glob(source_pattern):
# build the output filename
base,ext = os.path.splitext(os.path.basename(input_file))
output_file = os.path.join(output_directory,base + '_tokenized' + ext)
with open(input_file) as in_file, open(output_file,'w') as out_file:
for line in in_file:
out_file.write(','.join(line.split()) + '\n')
This is how I output to files in arbitrary directories :
dir_name = "../some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + '/out.txt'
out_file = open( out_file_name, 'w')
EDIT :
file_name = "{0}_tokenized.txt".format(something_from_tokenizing)
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + file_name
EDIT :
I just tried it, worked for me. You simply need two paths, one for the source directory and one for the destination. Hope this helps.
import os
from os import path
f1 = open("in.txt")
f2 = open("out.txt")
files = ["in.txt", "out.txt"]
base_path = "."
dir_name = "./some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
for file in files:
with open (file) as in_file, open(path.join(dir_name,"%s_tokenized.txt" % file), "w") as out_file:
data = in_file.readlines()
for line in data:
words = line.split()
str1 = ','.join(words)
out_file.write(str1)
out_file.write("\n")

Regex Search Replace for Multiple Files in a Directory using Python

Where am I going wrong ??
import os, os.path, re
path = "D:\python-test"
myfiles = os.listdir(path)
REGEXES = [(re.compile(r'dog'), 'cat'),
(re.compile(r'123'), '789')]
for f in myfiles:
file_name, file_extension = os.path.splitext(f)
if file_extension in ('.txt', '.doc', '.odt', '.htm', '.html', '.java'):
input_file = os.path.join(path, f)
with open(input_file, "w") as fi:
for line in fi:
for search, replace in REGEXES:
line = search.sub(replace, line)
fi.write(line)
Somehow its not working. I want to make the replacements in the current file and not a new file.
Update: How about creating A_reg.java from A.java. Moving A.java to a separate local folder and then renaming A_reg.java back to A.java . Possible ? If yes, please help me out with the code.
This is perfectly normal: you overwrite the files themselves. Write in new files, then rename.
Also, opening the way you do truncates files:
$ cat t.txt
foo
$ python
>>> f = open("t.txt", "w")
>>> f.close()
>>> exit()
$ cat t.txt
# file is empty!!
$
Based on the inputs from Fge. I could make it work by using
from shutil import move
move(output_file, input_file)
Thus the working code will be
import os, os.path, re
from shutil import move
path = "D:\python-test"
myfiles = os.listdir(path)
REGEXES = [(re.compile(r'dog'), 'cat'),
(re.compile(r'123'), '789')]
for f in myfiles:
file_name, file_extension = os.path.splitext(f)
generated_output_file = file_name + "_regex" + file_extension
if file_extension in ('.txt', '.doc', '.odt', '.htm', '.html', '.java'):
input_file = os.path.join(path, f)
output_file = os.path.join(path, generated_output_file)
with open(input_file, "r") as fi, open(output_file, "w") as fo:
for line in fi:
for search, replace in REGEXES:
line = search.sub(replace, line)
fo.write(line)
move(output_file, input_file)

Categories