Selecting files based on creation date - python

I have a folder named myclientcard and it has 69 subfolders in that subfolders we have number of subfolders where it has to go to error folder and inside error folder it has number of txt files, So I want the contents of those text file of all 69 folders inside error inside the specified using the date format 17/01/2019 to 24/01/2019 and convert it into excel file
import os
import numpy as np
from os import listdir
from os.path import join
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
mypath = "D:\myclientcard"
files = [join(mypath,f) for f in listdir(mypath) if '.txt' not in f]
for file in files:
path = file
filename =[join(path,f) for f in listdir(path) if 'ERROR' in f]
for text_file_path in filename:
file_path = text_file_path
textfiles = [join(file_path,f) for f in listdir(file_path) if '.txt' in f]
for files in textfiles:
reading_files = open(files,'r')
read =
writting_files = open('result.txt','a')
wr = writting_files.write(read)
read_files = pd.read_csv('result.txt',delim_whitespace='')
writer = ExcelWriter('output.xlsx')

Using the answers from here and here. Assuming you are on a windows platform.
import os
import numpy as np
from os import listdir
from os.path import join
# Importing datetime module
from datetime import datetime as dt
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
mypath = "D:\myclientcard"
# Add start date here
start_date = dt.strptime('17/01/2019', '%d/%m/%Y')
# Add end date here
end_date = dt.strptime('24/01/2019', '%d/%m/%Y')
files = [join(mypath,f) for f in listdir(mypath) if '.txt' not in f]
for file in files:
path = file
filename =[join(path,f) for f in listdir(path) if 'ERROR' in f]
for text_file_path in filename:
file_path = text_file_path
textfiles = [join(file_path,f) for f in listdir(file_path) if '.txt' in f]
# Filtering on the basis of date
textfiles = [f for f in textfiles if ((os.path.getctime(f) >= start_date) and (os.path.getctime(f) <= end_date))]
for files in textfiles:
reading_files = open(files,'r')
read =
writting_files = open('result.txt','a')
wr = writting_files.write(read)
read_files = pd.read_csv('result.txt',delim_whitespace='')
writer = ExcelWriter('output.xlsx')
On a side note, consider optimizing your code. Also try os.walk, it can be useful at times!


Outputting or reading columns in a different order using pandas/python

I have a Python script that reads a CSV file and outputs it to HTML.
I would like to change the order of the columns once written to the HTML file, but I don't know if I should do this on read or write or what function to use, here is my code so far.
import pandas as pd
import os
import shutil
import glob
#create the directory for holding the HTML files if it doesn't exist
isExist = os.path.exists(HTMLPATH)
if not isExist:
os.mkdir(HTMLPATH, 0o777)
#convert .txt file to a html file in the HTML folder
#can't convert an empty file so only convert if file size in not 0
for quefile in glob.iglob('C:/NMS4/QUE/*.txt'):
if os.path.getsize(quefile) != 0:
csv = pd.read_csv(quefile, header=None, usecols=[0,3,4,15,34,43,44,129], names=['OrderNo','Req Qty','Planned Start','Resource','Op','Part','Desc','Qty Recd'])
html_table = csv.to_html()
f = open(quefile + '.html', 'w')
shutil.move(quefile + ".HTML", HTMLPATH)
Any help, greatly appreciated.
I've been looking at, but can't quite find out how to re-order the columns.
Changes to get the solution are,
import pandas as pd
import os
import shutil
import glob
#create the directory for holding the HTML files if it doesn't exist
isExist = os.path.exists(HTMLPATH)
if not isExist:
os.mkdir(HTMLPATH, 0o777)
#convert .txt file to a html file in the HTML folder
#python can't convert an empty file so only convert if file size in not 0
for quefile in glob.iglob('C:/NMS4/QUE/*.txt'):
if os.path.getsize(quefile) != 0:
csv = pd.read_csv(quefile, header=None, usecols=[0,3,4,15,34,43,44,129], names=['Order No','Req Qty','Planned Start','Resource','Op','Part','Desc','Qty Recd'])
cols = list(csv.columns)
a, b, c, d, e, f, g = cols.index('Order No'), cols.index('Req Qty'), cols.index('Planned Start'), cols.index('Resource'), cols.index('Op'), cols.index('Part'), cols.index('Desc')
cols[a], cols[b], cols[c], cols[d], cols[e], cols[f], cols[g] = cols[a], cols[e], cols[f], cols[g], cols[c], cols[b], cols[d]
df = csv[cols]
html_table = df.to_html()
f = open(quefile + '.html', 'w')
shutil.move(quefile + ".HTML", HTMLPATH)

how can i retrive images with specific name?

I have a folder containing 82.000 images and a file containing 50.000 names of the images I need, how can I use the name of the file to retrieve the image itself
I tried this
import os, os.path
import fnmatch
from os import listdir
from os.path import isfile, join
import shutil
from shutil import copyfile
from pathlib import Path
id_ = "name_image.txt"
files = os.listdir(scr)
for img_filename in os.listdir(scr):
x = os.path.splitext(img_filename)[0].split('jpeg')
with open(id_) as f:
lines = f.readlines()
for lines in os.listdir(scr):
if lines in x:
shutil.move(os.path.join(scr,lines), dst)
but not work
the file contains names in this format
try this
import glob
import os
id_ = "name_image.txt"
with open(id_) as f:
lines = [x.replace('\n','') for x in f.readlines()]
for f in glob.glob(os.path.join(scr, '*.jpeg'), recursive=True):
if f.split(os.sep)[-1] in lines:
os.rename(f, os.path.join(dst, f.split(os.sep)[-1]))

reading multiple csv file from a different directory in python

import csv
import pandas
df_list = []
path = "C:/Users/bubai/Desktop/try/scrapy/output"
#all csv file
for file in os.listdir(path):
df_list.append(file) # all csv file in this
for i in df_list:
df = pandas.read_csv(i) # open one by one
I have some error:-FileNotFoundError: [Errno 2] File b'poem1.csv' does not exist: b'poem1.csv'
file name are saved like poem1.csv
You need to append the filename to the path.
import csv
import pandas
import os
df_list = []
path = "C:/Users/bubai/Desktop/try/scrapy/output"
#all csv file
for file in os.listdir(path):
df_list.append(os.path.join(path,file)) # all csv file in this
for i in df_list:
df = pandas.read_csv(i) # open one by one
You need to concatenate the directory name with the filename in order to refer to the file.
import os
df = pandas.read_csv(os.path.join(path, i)

Speed up parsing of gzipped jsonlines files

I have about 5,000 .gzip files (~1MB each). Each of these files contains data in a jsonlines format. Here's what it looks like:
I want to parse these files and convert them to a pandas dataframe. Is there a way to speed up this process? Here's my code but it's kinda slow (0.5s per file)
import pandas as pd
import jsonlines
import gzip
import os
import io
path = 'data/apps/'
files = os.listdir(path)
result = []
for n, file in enumerate(files):
print(n, file)
with open(f'{path}/{file}', 'rb') as f:
data =
unzipped_data = gzip.decompress(data)
decoded_data = io.BytesIO(unzipped_data)
reader = jsonlines.Reader(decoded_data)
for line in reader:
if line['category_id'] == 6014:
df = pd.DataFrame(result)
This should allow you to read each line without loading the whole file.
import pandas as pd
import json
import gzip
import os
path = 'data/apps/'
files = os.listdir(path)
result = []
for n, file in enumerate(files):
print(n, file)
with'{path}/{file}') as f:
for line in f:
data = json.loads(line)
if data['category_id'] == 6014:
df = pd.DataFrame(result)

How to Import Multiple excel file in PandasDataframe

I cannot load multiple excel files from a directory in only one Dataframe.
I have tried two different ways and both do no work.
Gives me this error.
How can I solve the problem? It does find the files when creates the list, but than cannot open it in the Dataframe.
Any hints ?
import pandas as pd
import os
import glob
import xlrd
cwd = os.getcwd()
path = '/Users/giovanni/Desktop/news media'
files = os.listdir(path)
files_xls = [f for f in files if f[-3:] == 'lsx']
df = pd.DataFrame()
for f in files_xls:
data = pd.read_excel(f)
df = df.append(data)
FileNotFoundError: [Errno 2] No such file or directory: 'NOV.xlsx'
Try this:
import os
import glob
path = '/Users/giovanni/Desktop/news media'
df = pd.DataFrame()
for file in glob.glob(os.path.join(path,'*.xlsx')):
data = pd.read_excel(file)
df = df.append(data)
Replace your final loop with:
for f in files_xls:
full_path = os.path.join(path, f)
data = pd.read_excel(full_path)
df = df.append(data)
