I need to extract some values out of a file, i wrote the following code.
import os
import sys
rootdir='/home/nsingh/ansible-environments/aws'
for root, subdirs, files in os.walk(rootdir):
for j in subdirs:
print j
mypath=rootdir+'/'+j+'/inventory/group_vars/all'
#print mypath
fo=open(mypath,'r')
f=fo.readlines()
for line in f:
if ('isv_alias' in line or 'LMID' in line or 'products' in line):
path='/home/nsingh/krikCSV_fun.csv'
if('isv_alias' in line):
line=line.strip('isv_alias=')
line= line.strip('"')
elif('LMID' in line):
line=line.strip('LMID=')
else:
line=line.strip('products=')
fi= open(path,'a+')
fi.write(line)
fi.close()
fo.close()
the os.walk method somehow finds a hidden directory as well which is not actually present
loadgen
crapcity
rmstest2
suricatatest
.git
Traceback (most recent call last):
File "testme.py", line 9, in <module>
fo=open(mypath,'r')
IOError: [Errno 2] No such file or directory: '/home/nsingh/ansible-environments/aws/.git/inventory/group_vars/all'
OUTPUT:
: "suricatatest"^M
: suricatatest
: rms_ems_hosted
: 26
: rmstest2
: rms_scl
: 80
: suricatatest
: rms_ems_hosted
: 26
: "suricatatest"^M
: suricatatest
: rms_ems_hosted
: 26
I need the output as & also remove the semicolon:
suricatatest rms_ems_hosted 26
What makes you think that /.git doesn't exist?
Try this:
import os
rootdir = '/home/nsingh/ansible-environments/aws'
for root, subdirs, files in os.walk(rootdir):
for j in subdirs:
print(j)
my_path = rootdir + '/' + j + '/inventory/group_vars/all'
if os.path.isfile(my_path):
with open(my_path, 'r') as fo:
for line in fo.readlines():
if 'isv_alias' in line or 'LMID' in line or 'products' in line:
path = '/home/nsingh/krikCSV_fun.csv'
if 'isv_alias' in line:
line = line.strip('isv_alias=')
line = line.strip('"')
elif 'LMID' in line:
line = line.strip('LMID=')
else:
line = line.strip('products=')
with open(path, 'a+') as fi:
fi.write(line.lstrip(": "))
You should use os.path to make the file paths. os.walk will visit all the directories in the tree under the top directory - you are only interested in directories that end with 'inventory/group_vars', so check for that and take action. If you want to write the values as a group, you need to collect them in something.
import os, os.path, collections
rootdir = '/home/nsingh/ansible-environments/aws'
sub_folder = 'inventory/group_vars'
out_path = '/home/nsingh/krikCSV_fun.csv'
for dirpath, dirnames, filenames in os.walk(rootdir):
if dirpath.endswith(sub_folder):
data = collections.defaultdict(list)
with open(os.join(dirpath, 'all')) as f, open(out_path, 'a+') as out:
for line in f:
if 'isv_alias' in line:
line = line.strip('isv_alias=')
line = line.strip('"')
data['isv_alias'].append(line)
elif 'LMID' in line:
line = line.strip('LMID=')
data['LMID'].append(line)
elif 'products' in line:
line = line.strip('products=')
data['products'].append(line)
for a, b, c in zip(*data.values()):
out.write('{},{},{}\n'format(a, b, c))
I used a defaultdict to store multiple items of interest from a single file. If there is only one 'isv_alias', 'LMID', 'products' group in each file then you could just as easily store the information in a list or namedtuple.
You didn't provide an example of the file(s) so it's not clear what the line structure is. If it looks like this:
isv_alias="foo"
LMID=bar
products=26
It can be simplified to
keys = {'isv_alias', 'LMID', 'products'}
for dirpath, dirnames, filenames in os.walk(rootdir):
if dirpath.endswith(sub_folder):
data = collections.defaultdict(list)
with open(os.join(dirpath, 'all')) as f, open(out_path, 'a+') as out:
for line in f:
line = line.strip()
key, value = line.split('=')
if key in keys:
value = value.strip('"')
data[key].append(value)
for a, b, c in zip(*data.values()):
out.write('{},{},{}\n'format(a, b, c))
As long as you are accumulating the information in data, you can just open the output file once
data = collections.defaultdict(list)
keys = {'isv_alias', 'LMID', 'products'}
for dirpath, dirnames, filenames in os.walk(rootdir):
if dirpath.endswith(sub_folder):
with open(os.join(dirpath, 'all')) as f:
for line in f:
line = line.strip()
key, value = line.split('=')
if key in keys:
value = value.strip('"')
data[key].append(value)
with open(out_path, 'a+') as out:
for a, b, c in zip(*data.values()):
out.write('{},{},{}\n'format(a, b, c))
If using Python 3.6 or an ordered defaultdict then the solution above assumes the order of appearance of each key in the file is the order you want them written out.
If the file structure isn't ordered or the dictionary used isn't ordered, write to the file like this:
for a, b, c in zip(data['isv_alias'], data['LMID'], data['products']):
out.write('{},{},{}\n'format(a, b, c))
Related
I have some csv files that i have filtered with this code and it works:
with open('path' , 'r')as f:
for lines in f:
if '2020-12-31' in lines:
line_data = lines.split(';')
filtered_list.append(line_data)
newfile.write(lines)
Firstly i would like do this but for ALL csv file in my folder.
Secondly i would like to do this in prompt command line if possible( with sys?).
i tried:
import os
from os import walk
from pathlib import Path
dir = r'myPathFolder1'
target = r'myPathFolder2'
filtered_list=[]
for filenames in os.listdir(dir):
for f in filenames:
if f.endswith(".csv"):
newfile = open(dir + f, 'w')
with open(f , 'r') as t:
for lines in t:
if '2020-12-31' in lines:
line_data = lines.split(';')
filtered_list.append(line_data)
newfile.write(lines)
But it doesnt work.
The full code would be, I tried my code, it will copy to another folder.
import os,fnmatch
dir = "C:\\Users\\Frederic\\Desktop\\"
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
filtered_list = find('*.csv', dir)
print(filtered_list)
for filenames in filtered_list:
print(filenames)
for f in filtered_list:
if f.endswith(".csv"):
print(f.endswith(".csv"))
base_dir_pair = os.path.split(f)
address = "C:\\Users\\Frederic\\Desktop\\aa\\"
address = address + base_dir_pair[1]
print(address)
newfile = open(address, 'w')
with open(f, 'r') as t:
print("in1")
for lines in t:
print("in2")
if '2020-12-31' in lines:
print("in3")
line_data = lines.split(';')
filtered_list.append(line_data)
newfile.write(lines)
I have a folder with multiple files like so:
1980
1981
1982
In each of these files is some text. I want to loop through each of these files and do some operation to each file then save the edited file to another folder and move onto the next file and so on. The result would be that I have the original folder and then another folder with the edited version of each file in it like so:
1980_filtered
1981_filtered
1982_filtered
Is it possible to do this?
Currently I have some code that loops through the files in a folder, does some filtering to each file and then saves all the edits of each file into one massive file. Here is my code:
import os
input_location = 'C:/Users/User/Desktop/mini_mouse'
output_location = 'C:/Users/User/Desktop/filter_mini_mouse/mouse'
for root, dir, files in os.walk(input_location):
for file in files:
os.chdir(input_location)
with open(file, 'r') as f, open('NLTK-stop-word-list', 'r') as f2:
mouse_file = f.read().split() # reads file and splits it into a list
stopwords = f2.read().split()
x = (' '.join(i for i in mouse_file if i.lower() not in (x.lower() for x in stopwords)))
with open(output_location, 'a') as output_file:
output_file.write(x)
Any help would be greatly appreciated!
You need to specify what each new file is called. To do so, Python has some good string formatting methods. Fortunately, your new desired file names are easy to do in a loop
import os
input_location = 'C:/Users/User/Desktop/mini_mouse'
output_location = 'C:/Users/User/Desktop/filter_mini_mouse/mouse'
for root, dir, files in os.walk(input_location):
for file in files:
new_file = "{}_filtered.txt".format(file)
os.chdir(input_location)
with open(file, 'r') as f, open('NLTK-stop-word-list', 'r') as f2:
mouse_file = f.read().split()
stopwords = f2.read().split()
x = (' '.join(i for i in mouse_file if i.lower() not in (x.lower() for x in stopwords)))
with open(output_location+'/'+new_file, 'w') as output_file: # Changed 'append' to 'write'
output_file.write(x)
If you're in Python 3.7, you can do
new_file = f"{file}_filtered.txt"
and
with open(f"{output_location}/{new_file}", 'w') as output_file:
output_file.write(x)
First of all you should start by opening the NLTK-stop-word-list only once, so I moved it outside of your loops. Second, os.chdir() is redundant, you can use os.path.join() to get your current file path (and to construct your new file path):
import os
input_location = 'C:/Users/User/Desktop/mini_mouse'
output_location = 'C:/Users/User/Desktop/filter_mini_mouse/'
stop_words_path = 'C:/Users/User/Desktop/NLTK-stop-word-list.txt'
with open(stop_words_path, 'r') as stop_words:
for root, dirs, files in os.walk(input_location):
for name in files:
file_path = os.path.join(root, name)
with open(file_path, 'r') as f:
mouse_file = f.read().split() # reads file and splits it into a list
stopwords = stop_words.read().split()
x = (' '.join(i for i in mouse_file if i.lower() not in (x.lower() for x in stopwords)))
new_file_path = os.path.join(output_location, name) + '_filtered'
with open(new_file_path, 'a') as output_file:
output_file.write(x)
P.S: I took the liberty to change some of your variable names as they were part of python's built in words ('file' and 'dir'). If you'll run __builtins__.__dict__.keys() you'll see them there.
My actual code :
import os, os.path
DIR_DAT = "dat"
DIR_OUTPUT = "output"
filenames = []
#in case if output folder doesn't exist
if not os.path.exists(DIR_OUTPUT):
os.makedirs(DIR_OUTPUT)
#isolating empty values from differents contracts
for roots, dir, files in os.walk(DIR_DAT):
for filename in files:
filenames.append("output/" + os.path.splitext(filename)[0] + ".txt")
filename_input = DIR_DAT + "/" + filename
filename_output = DIR_OUTPUT + "/" + os.path.splitext(filename)[0] + ".txt"
with open(filename_input) as infile, open(filename_output, "w") as outfile:
for line in infile:
if not line.strip().split("=")[-1]:
outfile.write(line)
#creating a single file from all contracts, nb the values are those that are actually empty
with open(DIR_OUTPUT + "/all_agreements.txt", "w") as outfile:
for fname in filenames:
with open(fname) as infile:
for line in infile:
outfile.write(line)
#finale file with commons empty data
#creating a single file
with open(DIR_OUTPUT + "/all_agreements.txt") as infile, open(DIR_OUTPUT + "/results.txt", "w") as outfile:
seen = set()
for line in infile:
line_lower = line.lower()
if line_lower in seen:
outfile.write(line)
else:
seen.add(line_lower)
print("Psst go check in the ouptut folder ;)")
The last lines of my code are checking wether or not, element exists mutliple times. So, may the element exists, once, twice, three, four times. It will add it to results.txt.
But the thing is that I want to save it into results.txt only if it exists 4 times in results.txt.
Or best scenario, compare the 4 .txt files and save elements in commons into results.txt.
But I can't solve it..
Thanks for the help :)
To make it easier,
with open(DIR_OUTPUT + "/all_agreements.txt") as infile, open(DIR_OUTPUT + "/results.txt", "w") as outfile:
seen = set()
for line in infile:
if line in seen:
outfile.write(line)
else:
seen.add(line)
Where can I use the .count() function ?
Because I want to do something like xxx.count(line) == 4 then save it into resulsts.txt
If your files are not super big you can use set.intersection(a,b,c,d).
data = []
for fname in filenames:
current = set()
with open(fname) as infile:
for line in infile:
current.add(line)
data.append(current)
results = set.intersection(*data)
You also don't need to create one single big file for this issue.
Not sure how your input looks like or what output is expected...
But maybe this can spark some ideas:
from io import StringIO
from collections import Counter
lines = ["""\
a=This
b=is
c=a Test
""", """\
a=This
b=is
c=a Demonstration
""", """\
a=This
b=is
c=another
d=example
""", """\
a=This
b=is
c=so much
d=fun
"""]
files = (StringIO(l) for l in lines)
C = Counter(line for f in files for line in f)
print([k for k,v in C.items() if v >= 4])
# Output: ['a=This\n', 'b=is\n']
I have the below code to run through a directory and select all of the files and compare them to an inserted wordlist file. However I get the following error TypeError: invalid file: ['C:/Users/Nathan/Desktop/chats\\(1,).out'] I cannot figure out how to change the os.path.join to correctly show the file location.
self.wordopp = askdirectory(title="Select chat log directory")
path = self.wordopp
files = os.listdir(path)
paths = []
wordlist = self.wordop
for file in files:
paths.append(os.path.join(path, file))
f = open(wordlist)
l = set(w.strip().lower() for w in f)
with open(paths) as f:
found = False
file = open("out.txt", "w")
for line in paths:
line = line.lower()
if any(w in line for w in l):
found = True
file.write(line)
print(line)
if not found:
print(line)
Consider this line of code:
with open(paths) as f:
Ask yourself, "what is paths"? It is a list of filenames, not a single file. That's pretty much what the error is telling you: that a list is an invalid file.
Considering that you are looping over a list of filenames, my guess is that your intention is to do:
with open(file) as f:
or maybe
with open(paths[-1]) as f:
I'm not a programmer and I've been doing my best to create some small scripts with Python 3.4 that help me with different tasks at work.
I have several .txt files and I to every line in the file I would need to append:
the file name
the file name+ line number
save it as a UTF-8 csv with all fields separated by commas.
I managed to do this for one particular file, but I'm struggling to do it for all the files in the folder. I've tried import glob but with no success.
This is the code right now (a mess... that partially works):
with open('Ruth.txt', 'r') as program:
data = program.readlines()
with open('Ruth.txt', 'w') as program:
for (number, line) in enumerate(data):
program.write('%d","%s' % (number + 1, line))
files = 'Ruth.txt'
all_lines = []
for f in files.split():
lines = open(f, 'r').readlines()
for line in lines:
all_lines.append('"' + f + '"' + ',' + '"' + f + line.strip() + '"')
fout = open(f + 'out.csv', 'w')
fout.write('\n'.join(all_lines))
fout.close()
Try this:
import os
def add_numbers(filename):
with open(filename, 'r') as readfile:
data = readfile.readlines()
with open(filename, 'w') as writefile:
for i, line in enumerate(data):
writefile.write('%d. %s' % (i + 1, line))
for path, _, filenames in os.walk(folder):
for filename in filenames:
add_numbers(os.path.join(path, filename))
This will add numbers to each file in the directory and each file in all sub-directories. If you don't want it to check all sub-directories, change the for loop to this:
path, _, filenames = next(os.walk(folder))
for filename in filenames:
add_numbers(os.path.join(path, filename))
here the complete script that take one positional argument (folder) and create a new .csv file at the same level than the file.
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import sys
from pathlib import Path
def get_files(folder_path, suffix=".txt"):
return Path(folder_path).glob("**/*%s" % suffix)
def write_lines(file_):
with file_.with_suffix(".csv").open("w") as fout, file_.open(encoding="utf-8") as fin:
for i, line in enumerate(fin, 1):
# line number, file name, line
new_line = ",".join(["%d." % i, file_.name, line])
fout.write(new_line)
def main(folder):
for file_ in get_files(folder):
print(file_)
write_lines(file_)
if __name__ == '__main__':
try:
main(sys.argv[1])
except IndexError:
print("usage: %s foldername" % sys.argv[0])
This will take all text files in current folder and turn them into utf-8 encoded 'csv-style' files so that space in the text is turned into a comma with filename and line number also comma-separated.
from glob import glob
filenames = glob("*.txt")
text = ''
for fn in filenames:
with open(fn,'r') as f:
for i,line in enumerate(f):
line=','.join(line.split())
text += ','.join((line,fn,i+1)) + '\n'
fnew = fn.rsplit('.',1)[0]+'.csv'
with open(fnew,'w', encoding='utf-8') as f:
f.write(text)