I was wondering if there was a way to automatically get the date created and rename the file to it. I was coding in it a bit, but then I got an error. The error said FileNotFoundError: [WinError 2] The system cannot find the file specified: 'AutoDateRename'. Where AutoDateRename is the name of the folder I want the renamed files to go into. Is there a way to do this?
The Full Error is:
Exception in Tkinter callback
Traceback (most recent call last):
File "C:\Users\lucio\AppData\Local\Programs\Python\Python38-32\lib\tkinter\__init__.py", line 1883, in __call__
return self.func(*args)
File "c:/Users/lucio/Documents/PythonWorkspace/FileAutoDateRename/script.py", line 21, in RenameDateCreated
os.rename(rf'{E1_Val.get()}/{filename}',rf'{E1_Val.get()}/AutoDateRename/{time.ctime(os.path.getctime(filename))}')
File "C:\Users\lucio\AppData\Local\Programs\Python\Python38-32\lib\genericpath.py", line 65, in getctime
return os.stat(filename).st_ctime
FileNotFoundError: [WinError 2] The system cannot find the file specified: 'AutoDateRename'
My Code:
from tkinter import *
from tkinter import filedialog
import os.path, time
import os
window = Tk()
window.title("File Auto Date Rename")
window.geometry("500x300")
window.resizable(width=False,height=False)
E1_Val = StringVar()
def SelectFolder():
window.filename = filedialog.askdirectory()
E1.delete(0,"end")
E1.insert(END,window.filename)
def RenameDateCreated():
os.mkdir(f'{E1_Val.get()}/AutoDateRename')
for filename in os.listdir(E1_Val.get()):
os.rename(rf'{E1_Val.get()}/{filename}',rf'{E1_Val.get()}/AutoDateRename/{time.ctime(os.path.getctime(filename))}')
L1 = Label(text="Auto Date Rename",font='Helvetica 16 bold underline')
L1.pack()
L2 = Label(text="Folder Path",font='Helvetica 12 bold italic')
L2.pack(pady=5)
E1 = Entry(window,textvariable=E1_Val)
E1.pack(pady=5)
B1 = Button(window,text="Locate",height=1,width=5,command=SelectFolder)
B1.pack(pady=5)
L4 = Label(text="Rename Options",font='Helvetica 12 bold italic')
L4.pack(pady=5)
B2 = Button(window,text="Rename Using Date Created",height=1,width=25,command=RenameDateCreated)
B2.pack(pady=2)
B3 = Button(window,text="Rename Using Date Modified",height=1,width=25)
B3.pack(pady=2)
L5 = Label(text="Make sure that the folder you\nare converting is backed up in a safe location!",font='Helvetica 14 bold')
L5.pack(pady=15)
window.mainloop()
Since you created a directory AutoDateRename inside the source directory (returned by E1_Val.get(), it will be included in the list returned by os.listdir() and I think you don't want to rename it. So you should check whether the source file is a file and not a directory.
And os.path.getctime(filename) should be os.path.getctime(os.path.join(E1_Val.get(), filename)) instead.
Also time.ctime() may return string with characters (like :) that are not allowed in filename. Use time.strftime() to format the time to be a valid filename.
Below is an updated RenameDateCreated():
def RenameDateCreated():
srcdir = E1_Val.get().strip()
if srcdir:
destdir = os.path.join(srcdir, 'AutoDateRename')
os.makedirs(destdir, exist_ok=True) # create the directory if it does not exists
for filename in os.listdir(srcdir):
srcpath = os.path.join(srcdir, filename)
if os.path.isfile(srcpath): # only process file, not directory
ctime = time.strftime('%Y%m%d_%H%M%S', time.localtime(os.path.getctime(srcpath)))
destpath = os.path.join(destdir, f'{ctime}_{filename}')
os.rename(srcpath, destpath)
Related
I am creating an code editor but my code is only run python file which is in same folder where code editor file is also present
and when I open another folder in side bar and select a file from and run it than my terminal shows error
I tried many times but I am unable to fix it
Please tell me how to fix it
error:-
python: can't open file 'D:\\coding notes\\pytho project\\Anmol.py': [Errno 2] No such file or directory
This is my code :-
import os
import subprocess
from tkinter import*
from tkinter import ttk
from tkinter.filedialog import askdirectory, asksaveasfilename
def process_directory(parent,path):
for i in os.listdir(path):
abspath = os.path.join(path,i)
dirv = os.path.isdir(abspath)
oid = tree.insert(parent,END,text=i,open=False)
if dirv:
process_directory(oid,abspath)
def Open(event=None):
global path
for i in tree.get_children():
tree.delete(i)
path = askdirectory()
abspath = os.path.abspath(path)
root_node = tree.insert("",END,text=abspath,open=True)
process_directory(root_node,abspath)
def select_file(event=None):
global file
item = tree.selection()
file = tree.item(item,"text")
abspath = os.path.join(path,file)
editor.delete(1.0,END)
with open(abspath,"r") as f:
editor.insert(1.0,f.read())
def save(event=None):
global file
if file == "":
saveas()
else:
item = tree.selection()
file = tree.item(item,"text")
filepath = os.path.join(path,file)
with open(file,"w") as f:
f.write(editor.get(1.0,END))
root.title(os.path.basename(file) + "-Python")
def saveas(event=None):
global file
file = asksaveasfilename(defaultextension=".py",filetypes=[("Python Files","*.py")])
if file == "":
file = None
else:
with open(file,"w") as f:
f.write(editor.get(1.0,END))
root.title(os.path.basename(file) + "-Python")
def run(event=None):
global file
if file == "":
pass
else:
command = f"python {file}"
run_file = subprocess.Popen(command,stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True)
Output, error = run_file.communicate()
output.insert(END,f"{file}>>\n")
output.insert(END,Output)
output.insert(END,error)
root = Tk()
tree = ttk.Treeview()
tree.pack(side=LEFT,fill=BOTH)
file = ""
path = ""
editor = Text()
editor.pack(expand=True,fill=BOTH)
output = Text(height=15)
output.pack(expand=True,fill=BOTH)
root.bind("<Control-Alt-o>",Open)
root.bind("<Control-s>",save)
root.bind("<Control-Alt-s>",saveas)
root.bind("<Shift-Return>",run)
tree.bind("<<TreeviewSelect>>",select_file)
root.mainloop()
If the selected file is inside a sub-folder of the selected folder, then the absolute path created by abspath = os.path.join(path,file) inside select_file() is not the correct absolute path (miss the sub-folder information).
One of the way is to save the absolute path in values option when inserting into the treeview:
def process_directory(parent,path):
for i in os.listdir(path):
abspath = os.path.join(path,i)
dirv = os.path.isdir(abspath)
# save the absolute path in "values" option
oid = tree.insert(parent,END,text=i,open=False,values=(abspath,))
if dirv:
process_directory(oid,abspath)
Then inside select_file() you can get the absolute path by getting the values option instead of joining the selected folder and the selected file:
def select_file(event=None):
global file
item = tree.selection()
# get the absolute path
file = tree.item(item,"values")[0]
if os.path.isfile(file):
editor.delete(1.0,END)
with open(file,"r") as f:
editor.insert(1.0,f.read())
Same apply to save() as well.
from glob import glob
from os import rename
import time
arrr = []
def getnames():
with open("name.txt", "r+") as nameFile:
for name in nameFile:
nameFile.readline()
newlinestrip = name.strip("\n")
arrr.append(newlinestrip)
renames()
def renames():
for fname in glob('*.png'):
print(fname)
for name in arrr:
time.sleep(1)
rename(fname, name)
print("bruh")
getnames()
It renames the 1st file and the it crashes with the error
Traceback (most recent call last):
File ".\rename.py", line 24, in <module>
getnames()
File ".\rename.py", line 13, in getnames
renames()
File ".\rename.py", line 20, in renames
rename(fname, name)
FileNotFoundError: [WinError 2] Den angivne fil blev ikke fundet: 'sans (100).png' -> 'acacia_door_top.png'
And i don't know how to fix this, i have a txt file with the new names that looks something like this
name.png
name1.png
and so on.
On way is to use zip() function if the len of both the files and arr is equal, However you could just the following if the intention is to rename all the files with names like name1.png, name2.png .. name608.png:
import os
for count, filename in enumerate(os.listdir("someDir")):
dst = "name" + str(count) + ".png"
src = 'someDir' + filename
dst = 'someDir' + dst
# rename() function will rename all the files
os.rename(src, dst)
When i run this code i get an error saying the file does not exist, i have created the file and linked back to them by copying the directory from the save part. I can also see the file and have triple checked the name etc but it still won't work can someone help.
from tkinter import *
import os.path
master= Tk()
master.geometry('500x500+0+0')
def print_value(val):
print ("c1="+str (c1v.get()))
print ("c2="+str(c2v.get()))
c1v=DoubleVar()
c2v=DoubleVar()
c1 = Scale(master, from_=255, to=0, length =400,width =100, troughcolor = 'blue',command=print_value, variable =c1v)
c1.grid(row=1,column=1)
c2 = Scale(master, from_=255, to=0, length =400,width =100, troughcolor = 'blue',command=print_value, variable =c2v)
c2.grid(row=1,column=2)
def func():
pass
file1 = open("C:/Users/Josh Bailey/Desktop/pi_dmx/preset_test.txt")
val1, val2 = (x.split("=")[1] for x in file1)
c1.set(val1)
c2.set(val2)
file1.close()
def record():
save_path = 'C:/Users/Josh Bailey/Desktop/pi_dmx'
name_of_file = ("preset_test ")
completeName = os.path.join(save_path, name_of_file+".txt")
file1 = open(completeName , "w")
toFile = ("c1="+str (c1.get())+ "\n""c2="+str(c2.get()))
file1.write(toFile)
file1.close()
master.mainloop()
rec=Button(master, text="Record",width=20, height=10, bg='Red', command=record)
rec.grid(row=2, column=1)
load=Button(master, text="Load",width=20, height=10, bg='gold',command=func)
load.grid(row=2, column=2)
the error is-
Exception in Tkinter callback Traceback (most recent call last):
File "C:\Python33\lib\idlelib\run.py", line 121, in main
seq, request = rpc.request_queue.get(block=True, timeout=0.05) File "C:\Python33\lib\queue.py", line 175, in get
raise Empty queue.Empty
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File
"C:\Python33\lib\tkinter\__init__.py", line 1475, in __call__
return self.func(*args) File "C:\Users\Josh Bailey\Desktop\save test.py", line 24, in func
file1 = open("C:/Users/Josh Bailey/Desktop/pi_dmx/preset_test.txt") FileNotFoundError: [Errno 2]
No such file or directory: 'C:/Users/Josh Bailey/Desktop/pi_dmx/preset_test.txt'
Inside func, you specify the filepath as being:
C:/Users/Josh Bailey/Desktop/pi_dmx/preset_test.txt
However, your record function makes it to be:
C:/Users/Josh Bailey/Desktop/pi_dmx/preset_test .txt
# Note the extra space here--^
Because of this, Python will not be able to find the file.
To fix the problem, remove the space on this line in record:
name_of_file = ("preset_test ")
# here--^
Now record will create the filepath to be what it should.
Also, that pass inside of func should not be there. It does nothing.
You're on Windows right? Replace the slashes with backslashes, \, and add a "r" infront of the string, like this:
file1 = open(r"C:\Users\Josh Bailey\Desktop\pi_dmx\preset_test.txt")
Hope this works
I need to get the current directory in an os.walk process. It works when there is just one subdirectory level but fails when there's more. Please advise...
[CODE]
# AFFECTS everything reachable from the directory named in "top",
# assuming there are no symbolic links.
# CAUTION: This is dangerous! For example, if top == '/', it
# could affect all your disk files.
import os, glob, arcpy, csv, sys, shutil, datetime
top = r'L:\Raster_Data\Topographic_Maps'
RootOutput = r'L:\Raster_Data\Topographic_Maps'
#FileList = csv.reader(open('FileList.csv'))
SearchString=['Temp_Pol', 'Spatial_Ex']
filecount=0
successcount=0
errorcount=0
print "Working in: "+os.getcwd()
list =[]
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write("Log of files Succesfully processed. RESULT of process run #:"+str(datetime.datetime.now())+"\n")
f.close()
#for File in FileList:
for root, dirs, files in os.walk(top, topdown=False):
#for directory in dirs:
for file in files:
#currentPath=os.path.join(root,directory)
currentPath=os.path.abspath(file)
os.chdir(currentPath)
#arcpy.env.workspace = currentPath
#print os.getcwd()
lstFCs = glob.glob('*'+SearchString[0]+'*.shp')
#print lstFCs
OutPutDir=os.path.abspath(currentPath)
for fc in lstFCs:
filecount=filecount+1
list.append(OutPutDir+"\\"+fc)
lstFCs = glob.glob('*'+SearchString[1]+'*.shp')
#print lstFCs
for fc in lstFCs:
OutPutDir=RootOutput+"\\"+directory
filecount=filecount+1
list.append(OutPutDir+"\\"+fc)
print 'Merging: ' + str(list)
#arcpy.Merge_management(list, RootOutput+"\\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(list)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()
so the list should be appended with the fc and the full path to it but just gets the root path and the final part of the path -not the directories in between.
Thanks for your advise,
[Error Messages]
Working in: L:\Raster_Data\Topographic_Maps Merging:
['L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\ecw\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SC54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SC55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SD54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SD55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SE54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\SE55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SG55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SG56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SH56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\Tablelands_100K\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\200DPI\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\1M\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\1M\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\250K\prj_Temp_Polygon_Extent_1.shp',
'L:\Raster_Data\Topographic_Maps\250K\Temp_Polygon_Extent_1.shp',
'L:\Raster_Data\Topographic_Maps\250K\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\5M\prj_Temp_Polygon_Extent_2.shp',
'L:\Raster_Data\Topographic_Maps\5M\Temp_Polygon_Extent_2.shp',
'L:\Raster_Data\Topographic_Maps\5M\Spatial_Extent.shp'] Traceback
(most recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 64, in
arcpy.Merge_management(list, RootOutput+"\Full_Extent.shp")
File "C:\Program Files\ArcGIS\Desktop10.0\arcpy\arcpy\management.py",
line 3124, in Merge
raise e ExecuteError: Failed to execute. Parameters are not valid.
ERROR 000732: Input Datasets: Dataset
L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\ecw\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SC54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SC55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SD54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SD55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SE54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\SE55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SG55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SG56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SH56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\Tablelands_100K\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\200DPI\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\1M\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\1M\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\250K\prj_Temp_Polygon_Extent_1.shp;L:\Raster_Data\Topographic_Maps\250K\Temp_Polygon_Extent_1.shp;L:\Raster_Data\Topographic_Maps\250K\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\5M\prj_Temp_Polygon_Extent_2.shp;L:\Raster_Data\Topographic_Maps\5M\Temp_Polygon_Extent_2.shp;L:\Raster_Data\Topographic_Maps\5M\Spatial_Extent.shp
does not exist or is not supported Failed to execute (Merge).
Working in: L:\Raster_Data\Topographic_Maps Traceback (most
recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 28, in
os.chdir(currentPath) WindowsError: [Error 2] The system cannot
find the file specified:
'L:\Raster_Data\Topographic_Maps\ecw\SC54'
directory 'SC54'
dirs ['SC54', 'SC55', 'SD54', 'SD55', 'SE54', 'SE55']
os.path.abspath(dirs[0])
'L:\Raster_Data\Topographic_Maps\ecw\SC54'
os.getcwd() 'L:\Raster_Data\Topographic_Maps\ecw'
Working in: L:\Raster_Data\Topographic_Maps Traceback (most
recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 28, in
os.chdir(currentPath) WindowsError: [Error 2] The system cannot
find the file specified:
'L:\Raster_Data\Topographic_Maps\7178cp_dd.ers'
file '7178cp_dd.ers'
os.path.abspath
os.path.abspath(file)
'L:\Raster_Data\Topographic_Maps\7178cp_dd.ers'
Thanks all, I used the input from the forum to complete the script. It's below for anyone who wants it. best,
# AFFECTS everything reachable from the directory named in "top",
# assuming there are no symbolic links.
# CAUTION: This is dangerous! For example, if top == '/', it
# could affect all your disk files.
import os, arcpy, sys, datetime
top = os.getcwd()
RootOutput = top
FileTypes=['shp']
SearchStrings=['Temp_Pol', 'Spatial_Ex']
filecount=0
#successcount=0
#errorcount=0
print "Working in: "+os.getcwd()
list =[]
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write("Log of files Succesfully processed. RESULT of process run #:"+str(datetime.datetime.now())+"\n")
f.close()
for root, dirs, files in os.walk(top, topdown=False):
for fl in files:
currentFile=os.path.join(root, fl)
for FileType in FileTypes:
status= str.endswith(currentFile,FileType)
if str(status) == 'True':
for SearchString in SearchStrings:
if str(SearchString in currentFile) == 'True':
#print str(currentFile)+str(status)
filecount=filecount+1
list.append(currentFile)
print 'Merging: ' + str(list)
#Replace with any function you want to carry out on the generated list of files.
#arcpy.Merge_management(list, RootOutput+"\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(list)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()
You should use
os.path.join(root, file)
instead of simply using file like suggested in the os.walk doc examples os.walk
Btw, be careful with the reserved keywords. file is a built-in function and list too
>>> a = list()
>>> a
[]
>>> list = []
>>> b = list()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: 'list' object is not callable
For some applications we always need to change the current working directory while we are in os.walk recursive call, in that case I would suggest changing the current working directory twice as shown below. I am writing about situations where having absolute file paths will not help .
from os import listdir
from os.path import isfile, join
import os
import re
# store the location of the top most directory
top = os.getcwd()
for (dirname, dirs, files) in os.walk(os.getcwd()):
for filename in files:
os.chdir(dirname)
# add all your operations for the current job in the directory
# Now go back to the top of the chain
os.chdir(top)
It looks like you're after a recursive glob. Something like the code below might be of use:
class rglob:
'''A recursive/regex enhanced glob
adapted from os-path-walk-example-3.py - http://effbot.org/librarybook/os-path.htm
'''
def __init__(self, directory, pattern="*", regex=False, regex_flags=0, recurse=True):
''' #type directory: C{str}
#param directory: Path to search
#type pattern: C{type}
#param pattern: Regular expression/wildcard pattern to match files against
#type regex: C{boolean}
#param regex: Use regular expression matching (if False, use fnmatch)
See U{http://docs.python.org/library/re.html}
#type regex_flags: C{int}
#param regex_flags: Flags to pass to the regular expression compiler.
See U{http://docs.python.org/library/re.html}
#type recurse: C{boolean}
#param recurse: Recurse into the directory?
'''
self.stack = [directory]
self.pattern = pattern
self.regex = regex
self.recurse = recurse
self.regex_flags = regex_flags
self.files = []
self.index = 0
def __getitem__(self, index):
while 1:
try:
file = self.files[self.index]
self.index = self.index + 1
except IndexError:
# pop next directory from stack
self.directory = self.stack.pop()
try:
self.files = os.listdir(self.directory)
self.index = 0
except:pass
else:
# got a filename
fullname = os.path.join(self.directory, file)
if os.path.isdir(fullname) and not os.path.islink(fullname) and self.recurse:
self.stack.append(fullname)
if self.regex:
import re
if re.search(self.pattern,file,self.regex_flags):
return fullname
else:
import fnmatch
if fnmatch.fnmatch(file, self.pattern):
return fullname
shplist=[shp for shp in rglob(top,'*.shp')]
print 'Merging: ' + str(shplist)
#arcpy.Merge_management(shplist, RootOutput+"\\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(shplist)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()
I'm new to python and bioinformatics field. I'm using python-2.6. Now I'm trying to select all fastq.gz files, then gzip.open(just a few lines because it's too huge and time-wasting), then count 'J' , then pick out those files with 'J' count NOT equal to 0.
The following is my code:
#!/usr/bin/python
import os,sys,re,gzip
path = "/home/XXX/nearline"
for file in os.listdir(path):
if re.match('.*\.recal.fastq.gz', file):
text = gzip.open(file,'r').readlines()[:10]
word_list = text.split()
number = word_list.count('J') + 1
if number !== 0:
print file
But I got some errors:
Traceback (most recent call last):
File "fastqfilter.py", line 9, in <module>
text = gzip.open(file,'r').readlines()[:10]
File "/share/lib/python2.6/gzip.py", line 33, in open
return GzipFile(filename, mode, compresslevel)
File "/share/lib/python2.6/gzip.py", line 79, in __init__
fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
IOError: [Errno 2] No such file or directory: 'ERR001268_1.recal.fastq.gz'
What's this traceback: File......
Is there anything wrong with gzip here?
And why can't it find ERR001268_1.recal.fastq.gz? It's the first fastq file in the list, and DOES exist there.
Hope give me some clues, and any point out any other errors in the script.
THanks a lot.
Edit: thx everyone. I followed Dan's suggestion. And I tried on ONE fastq file first. My script goes like:
#!/usr/bin/python
import os,sys
import gzip
import itertools
file = gzip.open('/home/xug/nearline/ERR001274_1.recal.fastq.gz','r')
list(itertools.islice(file.xreadlines(),10))
word_list = list.split()
number = word_list.count('J') + 1
if number != 0:
print 'ERR001274_1.recal.fastq.gz'
Then errors are:
Traceback (most recent call last):
File "try2.py", line 8, in <module>
list(itertools.islice(text.xreadlines(),10))
AttributeError: GzipFiles instance has no attribute 'xreadlines'
Edit again: Thx Dan, I've solved the problem yesterday. Seems GzipFiles don't support xreadlines. So I tried the similar way as you suggested later. And it works. See below:
#!/usr/bin/python
import os,sys,re
import gzip
from itertools import islice
path = "/home/XXXX/nearline"
for file in os.listdir(path):
if re.match('.*\.recal.fastq.gz', file):
fullpath = os.path.join(path, file)
myfile = gzip.open(fullpath,'r')
head = list(islice(myfile,1000))
word_str = ";".join(str(x) for x in head)
number = word_str.count('J')
if number != 0:
print file
on this line:
text = gzip.open(file,'r').read()
file is a filename not a full path so
fullpath = os.path.join(path, file)
text = gzip.open(fullpath,'r').read()
about F.readlines()[:10] will read the whole file in to a list of lines and then take the first 10
import itertools
list(itertools.islice(F.xreadlines(),10))
this will not read the whole file into memory and will only read the first 10 lines into a list
but as gzip.open returns an object that doesn't have .xreadlines() and but as files are iterable on their lines just:
list(itertools.islice(F,10))
would work as this test shows:
>>> import gzip,itertools
>>> list(itertools.islice(gzip.open("/home/dan/Desktop/rp718.ps.gz"),10))
['%!PS-Adobe-2.0\n', '%%Creator: dvips 5.528 Copyright 1986, 1994 Radical Eye Software\n', '%%Title: WLP-94-int.dvi\n', '%%CreationDate: Mon Jan 16 16:24:41 1995\n', '%%Pages: 6\n', '%%PageOrder: Ascend\n', '%%BoundingBox: 0 0 596 842\n', '%%EndComments\n', '%DVIPSCommandLine: dvips -f WLP-94-int.dvi\n', '%DVIPSParameters: dpi=300, comments removed\n']
Change your code to:
#!/usr/bin/python
import os,sys,re,gzip
path = "/home/XXX/nearline"
for file in os.listdir(path):
if re.match('.*\.recal.fastq.gz', file):
text = gzip.open(os.path.join(path,file),'r').readlines()[:10]
word_list = text.split()
number = word_list.count('J') + 1
if number !== 0:
print file
It's trying to open ERR001268_1.recal.fastq.gz from the working directory, not from /home/XXX/nearline.