i have to find recursively all lines( which start with string "excel") in all files (in directory and subdirectory) .i need for each filename the line found (for example :
filename1:
line1 founded...
filename2:
line2 founded...
Output result in file called "logfile"
if no line founded , filename not saved in logfile.
import os
word="excel"
from os.path import join
for (dirname, dirs, files) in os.walk('/batch/'):
for filename in files:
thefile = os.path.join(dirname,filename)
for line in files:
if line.startswith(word):
print (line)
print (thefile)
Thanks
Your code just has minor problems: The biggest one is that you loop on filename instead of file content.
import os
word="excel"
from os.path import join
for (dirname, dirs, files) in os.walk('/batch/'):
for filename in files:
thefile = os.path.join(dirname, filename)
with open(thefile) as f:
for line in f:
if line.startswith(word):
print (line)
print (thefile)
EDIT:
import os
word="excel"
from os.path import join
with open('log_result.txt', 'w') as log_file:
for (dirname, dirs, files) in os.walk('/tmp/toto'):
for filename in files:
thefile = os.path.join(dirname, filename)
with open(thefile) as f:
lines = [line for line in f if line.startswith(word)]
if lines:
log_file.write("File {}:\n".format(thefile))
log_file.writelines(lines)
Here is the fixed code.
you don't need to re traverse the same list of files.
os.walk() will return all sub directories in a directory, All you need you do is loop all directories.
Sample Code
import glob
import os
word="excel"
for (dirname, dirs, files) in os.walk("/batch/"):
for file_ in files :
if file_.startswith(word):
print(file_)
print(os.path.join(dirname, file_))
for dir_ in dirs :
myfiles = glob.glob(os.path.join(dirname,dir_))
for myfile in myfiles:
if myfile.startswith(word):
print(myfile)
print(os.path.join(dirname,myfiles))
hope this helps
Related
I want to open and read several text files. The plan is to find a string in the text files and print the whole line from the string. The thing is, I canĀ“t open the paths from the array.
I hope it is unterstandable what I want to try.
import os
from os import listdir
from os.path import join
from config import cred
path = (r"E:\Utorrent\Leaked_txt")
for filename in os.listdir(path):
list = [os.path.join(path, filename)]
print(list)
for i in range(len(list)-1):
with open(str(list[i], "r")) as f:
for line in f:
if cred in line:
print(line)
Thanks :D
I prefer to use glob when reading several files in a directory
import glob
files = glob.glob(r"E:\Utorrent\Leaked_txt\*.txt") # read all txt files in folder
for file in files: # iterate over files
with open(file, 'r') as f: # read file
for line in f.read(): # iterate over lines in each file
if cred in line: # if some string is in line
print(line) # print the line
With os, you can do something like this:
import os
from config import cred
path = "E:/Utorrent/Leaked_txt"
files = [os.path.join(path, file) for file in os.listdir(path) if file.endswith(".txt")]
for file in files:
with open(file, "r") as f:
for line in f.readlines():
if cred in line:
print(line)
Edit
os.listdir only includes files from the parent directory (specified by path). To get the .txt files from all sub-directories, use the following:
files = list()
for root, _, f in os.walk(path):
files += [os.path.join(root, file) for file in f if file.endswith(".txt")]
drilling down into sub-dirs looking for specific files to modify.
Using glob.glob() to search all sub-dirs for files.
path = 'C:\Test\*\*[1234,349,4568]*.*'
#print(path)
files = glob.glob(path)
print(files)
for name in files:
with open(name,'r') as inputfile:
newText = inputfile.read().replace('5484522-102','P/N 545616-102')
print(newText)
with open(name, "w") as outputfile:
outputfile.write(newText)
print('Done !')
modifies the files called out in the path and many more I don't want to be modified. How do i modify only the files called out in the path?
#!/usr/bin/env python3
#This code finds the specific files listed in a dir and copies them to another dir.
There the files are read and the p/n replace with another p/n
import os
import shutil
import glob
import pandas as pd
#fill in the file name into this set
df = pd.read_csv('c:\Test\ReadFiles.csv')
path = 'C:\Test\Test\*.*'
dest_dir = 'C:\Test\Test' # New dir for the files found
src_dir = 'C:\Test' # Search dir
walking through the dir and files looking for the files in the set().
for (dirpath, dirnames, filenames) in os.walk(src_dir):
for fname in filenames:
if fname[:6] in df:
print(fname)
shutil.copy(os.path.join(dirpath, fname), dest_dir)
iterate through files found and change the part number
files = glob.glob(path)
print(files)
for name in files:
with open(name,'r') as inputfile:
newText = inputfile.read().replace('222222-101','111111-101')
with open(name, "w") as outputfile:
outputfile.write(newText)
print(outputfile)
print('Done !')
I need to read the contents of a file from the list of files from a directory with os.listdir. My working scriptlet is as follows:
import os
path = "/Users/Desktop/test/"
for filename in os.listdir(path):
with open(filename, 'rU') as f:
t = f.read()
t = t.split()
print(t)
print(t) gives me all the contents from all the files at once present in the directory (path).
But I like to print the contents on first file, then contents of the second and so on, until all the files are read from in dir.
Please guide ! Thanks.
You can print the file name.
Print the content after the file name.
import os
path = "/home/vpraveen/uni_tmp/temp"
for filename in os.listdir(path):
with open(filename, 'rU') as f:
t = f.read()
print filename + " Content : "
print(t)
First, you should find the path of each file using os.path.join(path, filename). Otherwise you'll loop wrong files if you change the variable path. Second, your script already provides the contents of all files starting with the first one. I added a few lines to the script to print the file path and an empty line to see where the contents end and begin:
import os
path = "/Users/Desktop/test/"
for filename in os.listdir(path):
filepath = os.path.join(path, filename)
with open(filepath, 'rU') as f:
content = f.read()
print(filepath)
print(content)
print()
os.listdir returns the name of the files only. you need to os.path.join that name with the path the files live in - otherwise python will look for them in your current working directory (os.getcwd()) and if that happens not to be the same as path python will not find the files:
import os
path = "/Users/Desktop/test/"
for filename in os.listdir(path):
print(filename)
file_path = os.path.join(path, filename)
print(file_path)
..
if you have pathlib at your disposal you can also:
from pathlib import Path
path = "/Users/Desktop/test/"
p = Path(path)
for file in p.iterdir():
if not file.is_file():
continue
print(file)
print(file.read_text())
I am trying to find a string that is contained in files under a directory. Then make it to store it's file names and directories under a new text file or something.
I got upto where it is going through a directory and finding a string, then printing a result. But not sure of the next step.
Please help, I'm completely new to coding and python.
import glob, os
#Open a source as a file and assign it as source
source = open('target.txt').read()
filedirectories = []
#locating the source file and printing the directories.
os.chdir("/Users/a1003584/desktop")
for root, dirs, files in os.walk(".", topdown=True):
for name in files:
print(os.path.join(root, name))
if source in open(os.path.join(root, name)).read():
print 'treasure found.'
Don't do a string comparison if your looking for a dictionary. Instead use the json module. Like this.
import json
import os
filesFound = []
def searchDir(dirName):
for name in os.listdir(dirName):
# If it is a file.
if os.isfile(dirName+name):
try:
fileCon = json.load(dirName+name)
except:
print("None json file.")
if "KeySearchedFor" in fileCon:
filesFound.append(dirName+name)
# If it is a directory.
else:
searchDir(dirName+name+'/')
# Change this to the directory your looking in.
searchDir("~/Desktop")
open("~/Desktop/OutFile.txt",'w').write(filesFound)
This should write the output to a csv file
import csv
import os
with open('target.txt') as infile: source = infile.read()
with open("output.csv", 'w') as fout:
outfile = csv.writer(fout)
outfile.writerow("Directory FileName FilePath".split())
for root, dirnames, fnames in os.walk("/Users/a1003584/desktop", topdown=True):
for fname in fnames:
with open(os.path.join(root, fname)) as infile:
if source not in infile.read(): continue
outfile.writerow(root, fname, os.path.join(root, fname))
I have a python script parse.py, which in the script open a file, say file1, and then do something maybe print out the total number of characters.
filename = 'file1'
f = open(filename, 'r')
content = f.read()
print filename, len(content)
Right now, I am using stdout to direct the result to my output file - output
python parse.py >> output
However, I don't want to do this file by file manually, is there a way to take care of every single file automatically? Like
ls | awk '{print}' | python parse.py >> output
Then the problem is how could I read the file name from standardin?
or there are already some built-in functions to do the ls and those kind of work easily?
Thanks!
Os
You can list all files in the current directory using os.listdir:
import os
for filename in os.listdir(os.getcwd()):
with open(os.path.join(os.getcwd(), filename), 'r') as f: # open in readonly mode
# do your stuff
Glob
Or you can list only some files, depending on the file pattern using the glob module:
import os, glob
for filename in glob.glob('*.txt'):
with open(os.path.join(os.getcwd(), filename), 'r') as f: # open in readonly mode
# do your stuff
It doesn't have to be the current directory you can list them in any path you want:
import os, glob
path = '/some/path/to/file'
for filename in glob.glob(os.path.join(path, '*.txt')):
with open(os.path.join(os.getcwd(), filename), 'r') as f: # open in readonly mode
# do your stuff
Pipe
Or you can even use the pipe as you specified using fileinput
import fileinput
for line in fileinput.input():
# do your stuff
And you can then use it with piping:
ls -1 | python parse.py
You should try using os.walk.
import os
yourpath = 'path'
for root, dirs, files in os.walk(yourpath, topdown=False):
for name in files:
print(os.path.join(root, name))
stuff
for name in dirs:
print(os.path.join(root, name))
stuff
I was looking for this answer:
import os,glob
folder_path = '/some/path/to/file'
for filename in glob.glob(os.path.join(folder_path, '*.htm')):
with open(filename, 'r') as f:
text = f.read()
print (filename)
print (len(text))
you can choose as well '*.txt' or other ends of your filename
You can actually just use os module to do both:
list all files in a folder
sort files by file type, file name etc.
Here's a simple example:
import os #os module imported here
location = os.getcwd() # get present working directory location here
counter = 0 #keep a count of all files found
csvfiles = [] #list to store all csv files found at location
filebeginwithhello = [] # list to keep all files that begin with 'hello'
otherfiles = [] #list to keep any other file that do not match the criteria
for file in os.listdir(location):
try:
if file.endswith(".csv"):
print "csv file found:\t", file
csvfiles.append(str(file))
counter = counter+1
elif file.startswith("hello") and file.endswith(".csv"): #because some files may start with hello and also be a csv file
print "csv file found:\t", file
csvfiles.append(str(file))
counter = counter+1
elif file.startswith("hello"):
print "hello files found: \t", file
filebeginwithhello.append(file)
counter = counter+1
else:
otherfiles.append(file)
counter = counter+1
except Exception as e:
raise e
print "No files found here!"
print "Total files found:\t", counter
Now you have not only listed all the files in a folder but also have them (optionally) sorted by starting name, file type and others. Just now iterate over each list and do your stuff.
import pyautogui
import keyboard
import time
import os
import pyperclip
os.chdir("target directory")
# get the current directory
cwd=os.getcwd()
files=[]
for i in os.walk(cwd):
for j in i[2]:
files.append(os.path.abspath(j))
os.startfile("C:\Program Files (x86)\Adobe\Acrobat 11.0\Acrobat\Acrobat.exe")
time.sleep(1)
for i in files:
print(i)
pyperclip.copy(i)
keyboard.press('ctrl')
keyboard.press_and_release('o')
keyboard.release('ctrl')
time.sleep(1)
keyboard.press('ctrl')
keyboard.press_and_release('v')
keyboard.release('ctrl')
time.sleep(1)
keyboard.press_and_release('enter')
keyboard.press('ctrl')
keyboard.press_and_release('p')
keyboard.release('ctrl')
keyboard.press_and_release('enter')
time.sleep(3)
keyboard.press('ctrl')
keyboard.press_and_release('w')
keyboard.release('ctrl')
pyperclip.copy('')
The code below reads for any text files available in the directory which contains the script we are running. Then it opens every text file and stores the words of the text line into a list. After store the words we print each word line by line
import os, fnmatch
listOfFiles = os.listdir('.')
pattern = "*.txt"
store = []
for entry in listOfFiles:
if fnmatch.fnmatch(entry, pattern):
_fileName = open(entry,"r")
if _fileName.mode == "r":
content = _fileName.read()
contentList = content.split(" ")
for i in contentList:
if i != '\n' and i != "\r\n":
store.append(i)
for i in store:
print(i)
If you would like to open files in a directory and append them into a list, do this:
mylist=[]
for filename in os.listdir('path/here/'):
with open(os.path.join('path/here/', filename), 'r') as f:
mylist.append(f.read())
you may try another approach of using os.walk and os.path.join which is a little different from the above options:
for root, dirs, files in os.walk(EnterYourPath):
for name in files:
with open(os.path.join(root,name))as f:
text = f.read()
text variable includes all the files in the folder in the directory.