I'm trying to write data from a fast5 file to a txt file. I'm able to do so by going into the directory where the files are and using this code:
for filename in os.listdir(os.getcwd()):
if filename.endswith('.fast5'):
with h5py.File(filename, 'r') as hdf:
with open(new_txt, 'a') as myfile:
myfile.write('%s \t' % (filename))
However, I am now trying to access the files through the main directory, by looping through specific subfolders where the files are located and accessing the files that way, by using this code:
for root, dirs, files in os.walk(path):
for d in dirs:
if d.startswith('pass') or d.startswith('fail')
for rootfolder, blankdirs, fast5files in os.walk(d):
for filename in fast5files:
if filename.endswith('.fast5'):
with h5py.File(filename, 'r') as hdf:
with open(new_txt, 'a') as myfile:
myfile.write('%s \t' % (filename))
This code gives the error:
IOError: Unable to open file (Unable to open file: name = 'minion2_chip61_re_n90_yt2_2644_1_ch108_file0_strand.fast5', errno = 2, error message = 'no such file or directory', flags = 0, o_flags = 0)
which confuses me since it is able to get the filename, but somehow not able to read from it, which it could under the original code. The error occurs at this line:
with h5py.File(filename, 'r') as hdf:
Why is h5py not able to open/read the file in this way?
you need to add the directory os.walk is currently traversing to the filename:
....
if filename.endswith('.fast5'):
hdf5_path = os.path.join(root, filename)
with h5py.File(hdf5_path, 'r') as hdf:
...
Related
I'm trying to loop through files in multiple subdirectories in order to :
1- Add some text inside the files (ending with .ext)
2- Change the extension of each file from .ext to .ext2
The script works fine when I have only one subdir in the main directory, but when I try to run the script on multiple subdirs it says:
line 8, in
with open(name, "r") as f:
FileNotFoundError: [Errno 2] No such file or directory: "here the name of the subdir"
import os
directory = 'C:\\Users\\folder\\subfolders'
for dir, subdirs, files in os.walk(directory):
for name in files:
if name.endswith((".ext")):
with open(name, "r") as f:
XMLContent = f.readlines()
XMLContent.insert(6, '<XMLFormat>\n')
XMLContent.insert(40, '\n</XMLFormat>')
with open(name, "w") as f:
XMLContent = "".join(XMLContent)
f.write(XMLContent)
os.rename(os.path.join(dir, name), os.path.join(dir, name[:name.index('.ext')] +".ext1"))
Above is a screenshot of the sub dirs I have in the folder (1.Modified).
I've also created a new folder called all and put in it three folders and for each folder, I've created 2 files of .ext type.
So, I was able to write inside each file of them and change its name as well.
import os
for root, dirs, files in os.walk("/Users/ghaith/Desktop/test/all"):
for file in files:
if file.endswith('.ext'):
path = root + '/' + file
with open(path, "r") as f:
content = f.readlines()
content.insert(1, '<XMLFormat>\n')
content.insert(3, '\n</XMLFormat>')
with open(path, "w") as f:
content = "".join(content)
f.write(content)
os.rename(path, path+'2')
Output:
< XMLFormat >
< /XMLFormat >
you need to pass the directory to open the file
with open(os.path.join(directory, name), "r") as f:
But, I think the best way is use the os.listdir() to loop in the directory
for item in os.listdir(directory):
if item.endswith(".ext"):
with open(os.path.join(directory, item), "r") as r:
I have read all the stack exchange help files on looping through subfolders, as as well as the os documentation, but I am still stuck. I am trying to loop over files in subfolders, open each file, extract the first number in the first line, copy the file to a different subfolder(with the same name but in the output directory) and rename the file copy with the number as a suffix.
import os
import re
outputpath = "C:/Users/Heather/Dropbox/T_Files/Raw_FRUS_Data/Wisconsin_Copies_With_PageNumbers"
inputpath = "C:/Users/Heather/Dropbox/T_Files/Raw_FRUS_Data/FRUS_Wisconsin"
suffix=".txt"
for root, dirs, files in os.walk(inputpath):
for file in files:
file_path = os.path.join(root, file)
foldername=os.path.split(os.path.dirname(file_path))[1]
filebname=os.path.splitext(file)[0]
filename=filebname + "_"
f=open(os.path.join(root,file),'r')
data=f.readlines()
if data is None:
f.close()
else:
with open(os.path.join(root,file),'r') as f:
for line in f:
s=re.search(r'\d+',line)
if s:
pagenum=(s.group())
break
with open(os.path.join(outputpath, foldername,filename+pagenum+suffix), 'w') as f1:
with open(os.path.join(root,file),'r') as f:
for line in f:
f1.write(line)
I expect the result to be copies of the files in the input directory placed in the corresponding subfolder in the output directory, renamed with a suffix, such as "005_2", where 005 is the original file name, and 2 is the number the python code extracted from it.
The error I get seems to indicates that I am not looping through files correctly. I know the code for extracting the first number and renaming the file works because I tested it on a single file. But using os.walk to loop through multiple subfolders is not working, and I can't figure out what I am doing wrong. Here is the error:
File "<ipython-input-1-614e2851f16a>", line 23, in <module>
with open(os.path.join(outputpath, foldername,filename+pagenum+suffix), 'w') as f1:
IOError: [Errno 2] No such file or directory: 'C:/Users/Heather/Dropbox/T_Files/Raw_FRUS_Data/Wisconsin_Copies_With_PageNumbers\\FRUS_Wisconsin\\.dropbox_1473986809.txt'
Well, this isn't eloquent, but it worked
from glob import glob
folderlist=glob("C:\\...FRUS_Wisconsin*\\")
outputpath = "C:\\..\Wisconsin_Copies_With_PageNumbers"
for folder in folderlist:
foldername = str(folder.split('\\')[7])
for root, dirs, files in os.walk(folder):
for file in files:
filebname=os.path.splitext(file)[0]
filename=filebname + "_"
if not filename.startswith('._'):
with open(os.path.join(root,file),'r') as f:
for line in f:
s=re.search(r'\d+',line)
if s:
pagenum=(s.group())
break
with open(os.path.join(outputpath, foldername,filename+pagenum+suffix), 'w') as f1:
with open(os.path.join(root,file),'r') as f:
for line in f:
f1.write(line)
I have multiple files in 7 different folder directories. All of these files have the same name, and I want to combine those files with the same name as one file, in another directory
import os
from itertools import chain
paths = (r'C:/Users/Test_folder/Input/', r'C:/Users/Test_folder/Input_2/')
for path, dirs, files in chain.from_iterable(os.walk(path) for path in paths):
for fname in paths:
for line in fname:
f = open(os.path.join(r'C:/Users/Test_folder/Test_output/', os.path.basename(fname)), 'a')
f.write ('{:}\n'.format(line))
f.close()
Error:
f = open(os.path.join(r'C:/Users/Test_folder/Test_output/', os.path.basename(fname)), 'a')
IOError: [Errno 13] Permission denied: 'C:/Users/Test_folder/Test_output/'
>>>
For issue of permisson denied
with open(os.path.join('type filename here' , os.path.basename(line)), 'w')
Or
for filename in os.listdir(src):
path = os.path.join(src, filename)
with open(path, "r") as inputFile:
content = inputFile.read()
The logic of your code is wrong:
for fname in paths should be for fname in files
for line in fname will not read the file fname line by line as fname is a string, not a file object
The permission error is due to that your code try to open a directory for appending.
Try:
import os
from itertools import chain
paths = (r'C:/Users/Test_folder/Input/', r'C:/Users/Test_folder/Input_2/')
for path, dirs, files in chain.from_iterable(os.walk(path) for path in paths):
for fname in files:
with open(os.path.join(path, fname)) as fin, open(os.path.join('C:/Users/Test_folder/Test_output/', fname), 'a') as fout:
fout.write(fin.read())
If you're using windows, re-run your ide as administrator.
I am new to python. I am trying to read multiple files one by one from subfolders, do some processing and output. Below is my code:
import os
rootdir = 'dir'
for subdir, dirs, files in os.walk(rootdir):
for fname in files:
print os.path.join(subdir, fname)
f = open(fname, 'r')
lines = f.readlines()
f.close()
f = open(fname, 'w')
for line in lines:
f['X1'] = f['X1'].astype(str)
But I am having the following error:
IOError: [Errno2] No such file or directory : 'test.txt'
Your problem is that you are doing this:
os.path.join(subdir, fname)
But you are not assigning it to a variable, so when you do this:
f=open(fname,'r')
You are still only using the filename.
You should do this:
file_path = os.path.join(subdir, fname)
So now you actually have the file path. Then this:
f=open(file_path,'r')
The issue I'm having is that instead over writing all of the lines of the read file to the output, only the last line is in the output file. I believe it is getting written over and over again, but I can't seem to tell how to fix the loop. If anyone could help me out, it'd be greatly appreciated. This issue may be with opening my file repeatedly in a for loop.
import os
import re
# 1.walk around directory and find lastjob.txt file in one of folders
rootDir = "C:\\Users\Bob\Desktop\Path Parsing Project"
for path, dirs, files in os.walk(rootDir):
for filename in files:
fullpath = os.path.join(path, filename)
if filename=="text.txt":
# 2.open file. read from file
fi = open(fullpath, 'r')
# 3.parse text in incoming file and use regex to find PATH
for line in fi:
m = re.search("(Adding file.*)",line)
if m:
#4.write path and info to outgoing file
#print(line)
fo = open('outputFile', 'w')
fo.write(line + '\n')
By placing fo = open('outputFile', 'w') at the beginning, I got the desired result and the script processed much faster.
import os
import re
fo = open('outputFile', 'w')
# 1.walk around directory and find lastjob.txt file in one of folders
rootDir = "C:\\Users\Bob\Desktop\Path Parsing Project"
for path, dirs, files in os.walk(rootDir):
for filename in files:
fullpath = os.path.join(path, filename)
if filename=="text.txt":
# 2.open file. read from file
fi = open(fullpath, 'r')
# 3.parse text in incoming file and use regex to find PATH
for line in fi:
m = re.search(r'(Adding file.*)',line)
if m:
fo.write(line)
fo.close()
fi.close()