Adding entries from multiple files in python - python

I have a question on how to add entries from 100 files (each file contains two columns) and then writing them to a new file(which will also contain two columns)?

This is very underspecified. It's not clear what your problem is.
Probabably you'd do something like:
entries = []
for f in ["file1.txt", "file2.txt", ..., "file100.txt"]:
entries.append(open(f).readlines())
o = open("output.txt", "w")
o.writelines(entries)
o.close()

Wasn't sure if you needed a solution to find all those 100 files as well?
If so, here is one approach including reading them all and writing them to a joined file:
from os import walk
from os.path import abspath
lines = []
for root, folders, files in walk('./path/'):
for file in files:
fh = open(abspath(root + '/' + file), 'rb')
lines.append(fh.read())
fh.close()
# break if you only want the first level of your directory tree
o = open('output.txt', 'wb')
o.write('\n'.join(lines))
o.close()
You could also do a "memory efficient" solution:
from os import walk
from os.path import abspath
o = open('output.txt', 'wb')
for root, folders, files in walk('./path/'):
for file in files:
fh = open(abspath(root + '/' + file), 'rb')
for line in fh.readline():
o.write(line)
del line
fh.close()
del fh
# break if you only want the first level of your directory tree
o.close()
Much of this is automated (I think) within Python, but lazy or not, if you can then remove objects from the memory after closing the files and before and before reusing variable names.. just in case?

a more scalable way, inspired by Torxed approach
from os import walk
from os.path import abspath
with open('output.txt', 'wb') as o:
for root, folders, files in walk('./path/'):
for filename in files:
with open(abspath(root + '/' + filename), 'rb') as i:
for line in i:
o.write(line)

Do you want to chain them? I.e., do you want all lines of file 1, then all lines of file 2, ...
Or do you want to merge them? Line 1 of file 1, line 1 of file 2, ...
For the first case:
from itertools import chain
filenames = ...
file_handles = [open(fn) for fn in filenames]
with open("output.txt", "w") as out_fh:
for line in chain(file_handles):
out_fh.write(line)
for fh in file_handles:
fh.close()
For the second case:
from itertools import izip_longest
filenames = ...
file_handles = [open(fn) for fn in filenames]
with open("output.txt", "w") as out_fh:
for lines in izip_longest(*file_handles, fillvalue=None):
for line in lines:
if line is not None:
out_fh.write(line)
for fh in file_handles:
fh.close()
Important: Never forget to close your files!
As #isedev pointed out, this approach is o.k. for 100 files, but as I open all handles immediately, for thousands this won't work.
If you want to overcome this problem, only option 1 (chaining) is reasonable...
filenames = ...
with open("output.txt", "w") as out_fh:
for fn in filenames:
with open(fn) as fh:
for line in fh:
out_fh.write(line)

Related

How to get number only values from a specific row from different text file

I am trying to get only numbers from a particular row from 10 different text files. As an output, I want those numbers appended as a list. I'm a new learner. I would appreciate your help.
tried this one but not working
import os
import sys,re
line_number=69
path = r'C:\Users\arpan\OneDrive\Desktop\New folder'
for filename in os.listdir(path):
with open(os.path.join(path, filename), 'r') as f:
#print (filename)
file = open(filename)
all_lines_variable = file.readlines()
sys.stdout = open("output", "a") #print output file
print(filename, all_lines_variable[line_number])
sys.stdout.close()
You can try this script, it will extract from all files line number 69 and then appends it to output.txt file:
import os
import re
line_number=69
path = r'C:\Users\arpan\OneDrive\Desktop\New folder'
with open('output.txt', 'w') as f_out:
for file in os.listdir(path):
with open(os.path.join(path, file), 'r') as f_in:
lines = f_in.readlines()
print(' '.join(re.findall(r'\d+', lines[line_number])), file=f_out)

How to add for loop in python?

I'm creating new files from originally existing ones in the mdp folder by changing a couple of lines in those files using python. I need to do this for 1000 files. Can anyone suggest a for loop which reads all files and changes them and creates new in one go?
This way I have to change the the number followed by 'md_' in the path and it's tedious because there are a 1000 files here.
I tried using str() but there was a 'could not read file error'
fin = open("/home/abc/xyz/mdp/md_1.mdp", "rt")
fout = open("/home/abc/xyz/middle/md_1.mdp", "wt")
for line in fin:
fout.write(line.replace('integrator = md', 'integrator
= md-vv'))
fin = open("/home/abc/xyz/middle/md_1.mdp", "rt")
fout = open("/home/abc/xyz/mdb/md_1.mdp", "wt")
for line in fin:
fout.write(line.replace('dt = 0.001', 'dt
= -0.001'))
fin.close()
fout.close()
os.listdir(path) is your friend:
import os
sourcedir = "/home/abc/xyz/mdp"
destdir = "/home/abc/xyz/middle"
for filename in os.listdir(sourcedir):
if not filename.endswith(".mdp"):
continue
source = os.path.join(sourcedir, filename)
dest = os.path.join(destdir, filename)
# with open(xxx) as varname makes sure the file(s)
# will be closed whatever happens in the 'with' block
# NB text mode is the default, and so is read mode
with open(source) as fin, open(dest, "w") as fout:
# python files are iterable... avoids reading
# the whole file in memory at once
for line in fin:
# will only work for those exact strings,
# you may want to use regexps if number of
# whitespaces vary etc
line = line.replace("dt = 0.001", "dt = -0.001")
line = line.replace(
'integrator = md',
'integrator = md-vv'
)
fout.write(line)
Assuming you want to edit all files that are located in the mdp folder you could do something like this.
import os
dir = "/home/abc/xyz/mdp/"
for filename in os.listdir(dir):
with open(dir + filename, "r+") as file:
text = file.read()
text = text.replace("dt = 0.001", "dt = -0.001")
file.seek(0)
file.write(text)
file.truncate()
This will go through every file and change it using str.replace().
If there are other files in the mdp folder that you do not want to edit, you could use and if-statement to check for the correct file name. Add something like this to encase the with open statement.
if filename.startswith("md_")

How to loop through each file in a folder, do some action to the file and save output to a file in another folder Python

I have a folder with multiple files like so:
1980
1981
1982
In each of these files is some text. I want to loop through each of these files and do some operation to each file then save the edited file to another folder and move onto the next file and so on. The result would be that I have the original folder and then another folder with the edited version of each file in it like so:
1980_filtered
1981_filtered
1982_filtered
Is it possible to do this?
Currently I have some code that loops through the files in a folder, does some filtering to each file and then saves all the edits of each file into one massive file. Here is my code:
import os
input_location = 'C:/Users/User/Desktop/mini_mouse'
output_location = 'C:/Users/User/Desktop/filter_mini_mouse/mouse'
for root, dir, files in os.walk(input_location):
for file in files:
os.chdir(input_location)
with open(file, 'r') as f, open('NLTK-stop-word-list', 'r') as f2:
mouse_file = f.read().split() # reads file and splits it into a list
stopwords = f2.read().split()
x = (' '.join(i for i in mouse_file if i.lower() not in (x.lower() for x in stopwords)))
with open(output_location, 'a') as output_file:
output_file.write(x)
Any help would be greatly appreciated!
You need to specify what each new file is called. To do so, Python has some good string formatting methods. Fortunately, your new desired file names are easy to do in a loop
import os
input_location = 'C:/Users/User/Desktop/mini_mouse'
output_location = 'C:/Users/User/Desktop/filter_mini_mouse/mouse'
for root, dir, files in os.walk(input_location):
for file in files:
new_file = "{}_filtered.txt".format(file)
os.chdir(input_location)
with open(file, 'r') as f, open('NLTK-stop-word-list', 'r') as f2:
mouse_file = f.read().split()
stopwords = f2.read().split()
x = (' '.join(i for i in mouse_file if i.lower() not in (x.lower() for x in stopwords)))
with open(output_location+'/'+new_file, 'w') as output_file: # Changed 'append' to 'write'
output_file.write(x)
If you're in Python 3.7, you can do
new_file = f"{file}_filtered.txt"
and
with open(f"{output_location}/{new_file}", 'w') as output_file:
output_file.write(x)
First of all you should start by opening the NLTK-stop-word-list only once, so I moved it outside of your loops. Second, os.chdir() is redundant, you can use os.path.join() to get your current file path (and to construct your new file path):
import os
input_location = 'C:/Users/User/Desktop/mini_mouse'
output_location = 'C:/Users/User/Desktop/filter_mini_mouse/'
stop_words_path = 'C:/Users/User/Desktop/NLTK-stop-word-list.txt'
with open(stop_words_path, 'r') as stop_words:
for root, dirs, files in os.walk(input_location):
for name in files:
file_path = os.path.join(root, name)
with open(file_path, 'r') as f:
mouse_file = f.read().split() # reads file and splits it into a list
stopwords = stop_words.read().split()
x = (' '.join(i for i in mouse_file if i.lower() not in (x.lower() for x in stopwords)))
new_file_path = os.path.join(output_location, name) + '_filtered'
with open(new_file_path, 'a') as output_file:
output_file.write(x)
P.S: I took the liberty to change some of your variable names as they were part of python's built in words ('file' and 'dir'). If you'll run __builtins__.__dict__.keys() you'll see them there.

Compare multiple text files, and save commons values

My actual code :
import os, os.path
DIR_DAT = "dat"
DIR_OUTPUT = "output"
filenames = []
#in case if output folder doesn't exist
if not os.path.exists(DIR_OUTPUT):
os.makedirs(DIR_OUTPUT)
#isolating empty values from differents contracts
for roots, dir, files in os.walk(DIR_DAT):
for filename in files:
filenames.append("output/" + os.path.splitext(filename)[0] + ".txt")
filename_input = DIR_DAT + "/" + filename
filename_output = DIR_OUTPUT + "/" + os.path.splitext(filename)[0] + ".txt"
with open(filename_input) as infile, open(filename_output, "w") as outfile:
for line in infile:
if not line.strip().split("=")[-1]:
outfile.write(line)
#creating a single file from all contracts, nb the values are those that are actually empty
with open(DIR_OUTPUT + "/all_agreements.txt", "w") as outfile:
for fname in filenames:
with open(fname) as infile:
for line in infile:
outfile.write(line)
#finale file with commons empty data
#creating a single file
with open(DIR_OUTPUT + "/all_agreements.txt") as infile, open(DIR_OUTPUT + "/results.txt", "w") as outfile:
seen = set()
for line in infile:
line_lower = line.lower()
if line_lower in seen:
outfile.write(line)
else:
seen.add(line_lower)
print("Psst go check in the ouptut folder ;)")
The last lines of my code are checking wether or not, element exists mutliple times. So, may the element exists, once, twice, three, four times. It will add it to results.txt.
But the thing is that I want to save it into results.txt only if it exists 4 times in results.txt.
Or best scenario, compare the 4 .txt files and save elements in commons into results.txt.
But I can't solve it..
Thanks for the help :)
To make it easier,
with open(DIR_OUTPUT + "/all_agreements.txt") as infile, open(DIR_OUTPUT + "/results.txt", "w") as outfile:
seen = set()
for line in infile:
if line in seen:
outfile.write(line)
else:
seen.add(line)
Where can I use the .count() function ?
Because I want to do something like xxx.count(line) == 4 then save it into resulsts.txt
If your files are not super big you can use set.intersection(a,b,c,d).
data = []
for fname in filenames:
current = set()
with open(fname) as infile:
for line in infile:
current.add(line)
data.append(current)
results = set.intersection(*data)
You also don't need to create one single big file for this issue.
Not sure how your input looks like or what output is expected...
But maybe this can spark some ideas:
from io import StringIO
from collections import Counter
lines = ["""\
a=This
b=is
c=a Test
""", """\
a=This
b=is
c=a Demonstration
""", """\
a=This
b=is
c=another
d=example
""", """\
a=This
b=is
c=so much
d=fun
"""]
files = (StringIO(l) for l in lines)
C = Counter(line for f in files for line in f)
print([k for k,v in C.items() if v >= 4])
# Output: ['a=This\n', 'b=is\n']

File append in python

I have n files in the location /root as follows
result1.txt
abc
def
result2.txt
abc
def
result3.txt
abc
def
and so on.
I must create a consolidated file called result.txt with all the values concatenated from all result files looping through the n files in a location /root/samplepath.
It may be easier to use cat, as others have suggested. If you must do it with Python, this should work. It finds all of the text files in the directory and appends their contents to the result file.
import glob, os
os.chdir('/root')
with open('result.txt', 'w+') as result_file:
for filename in glob.glob('result*.txt'):
with open(filename) as file:
result_file.write(file.read())
# append a line break if you want to separate them
result_file.write("\n")
That could be an easy way of doing so
Lets says for example that my file script.py is in a folder and along with that script there is a folder called testing, with inside all the text files named like file_0, file_1....
import os
#reads all the files and put everything in data
number_of_files = 0
data =[]
for i in range (number_of_files):
fn = os.path.join(os.path.dirname(__file__), 'testing/file_%d.txt' % i)
f = open(fn, 'r')
for line in f:
data.append(line)
f.close()
#write everything to result.txt
fn = os.path.join(os.path.dirname(__file__), 'result.txt')
f = open(fn, 'w')
for element in data:
f.write(element)
f.close()

Categories