Copy Files by creation or modification date - python

Please excuse my ignorance, I am new to programming and python, the code below allows me copy file if and only if it was modified in the last 24 hours.
Is there a better way I can twist my program to consider also the last 8 character which is the date the file was created _20191108. Files are usually as presented below
7***_13_01_2172_20191106.txt
7***_13_01_2174_20191107.txt
7***_12_01_2175_20191108.txt
7***_13_01_2176_20191108.txt
import time
import os
import shutil
giorno = 24 * 60 * 60
src = 'C:/Users/Daniels/Desktop/FileMover/SourceA'
dst = 'C:/Users/Daniels/Desktop/FileMover/SourceB'
now = time.time()
primo = now - giorno
def last_mod_time(file_name):
return os.path.getmtime(file_name)
for file_name in os.listdir(src):
src_filename = os.path.join(src, file_name)
if last_mod_time(src_filename) > primo:
dst_filename = os.path.join(dst, file_name)
shutil.copy(src_filename, dst_filename)
print(file_name)
Thank you!

I am not sure I got your question correctly.
If you want to use the filename to generate a date, you can simply parse it and pass it to datetime.datetime(). Remeber to pass the appropriate tzinfo information.
import datetime
fname = '20191112.txt'
year = int(d[:4])
month = int(d[4:6])
day = int(d[6:8])
date = datetime.datetime(year, month, day, datetime.tzinfo=timezone.utc)
Now date is a datetime object.
>>> date
datetime.datetime(2019, 11, 2, 0, 0)
You can convert it easily to a UNIX timestamp if that is what you need in your script.
>>> date.timestamp()
1572649200.0

Related

How to move files with their creation time in a specific range?

I'd like to move files (not copy) base on range of file time modification.
I try to find solution and I found code as below.
import os
import shutil
import time
from datetime import datetime
src = "C:/Users/eldri/OneDrive/Desktop/"
dst = "C:/Users/eldri/OneDrive/Desktop/output"
ext = input("[+] File format: ") # "txt"
start = input("[+] Date start: ") # "01/07/2020"
end = input("[+] Date end: ") # "30/07/2020"
def dateRange(createdDate, startDate, endDate):
"""determines if date is in range"""
createdDate = datetime.strptime(createdDate, '%a %b %d %H:%M:%S %Y')
startDate = datetime.strptime(startDate, '%d/%m/%Y')
endDate = datetime.strptime(endDate, '%d/%m/%Y')
return startDate < createdDate < endDate
for filename in os.listdir(src):
created = time.ctime(os.path.getmtime(src + filename))
if filename.endswith('.' + ext) and dateRange(created, start, end):
shutil.copy(src + filename, dst)
print("[+] File transferred " + filename + created)
else:
print("[+] File not transferred " + filename + created)
print("[+] Transfer complete")
This code is work when I put specific date modification require but I want to move file base on range of file modification time.
for example : If create modification time 00:00 - 12:00 move file to a folder1
and 13:00 - 24:00 move to folder2 by do not necessary input range of time.
Please supporting if you have any idea.
Use datetime.time instead if you want use time of day. To convert the "Unix" time from os.path.getmtime() to a time object, I think its easier to first make a datetime. I didn't include any input reading since I wasn't sure how you want to deal with it:
import datetime
import os
def created_in_range(created, start_hours, end_hours):
created_time = datetime.datetime.fromtimestamp(created).time()
return datetime.time(hour=start_hours) < created_time < datetime.time(hour=end_hours)
src = "."
for filename in os.listdir(src):
if not os.path.isfile(filename):
continue
created = os.path.getmtime(os.path.join(src, filename))
if created_in_range(created, 0, 12):
print("Move to folder_1")
elif not created_in_range(created, 0, 13):
print("Move to folder_2")
I had to use a not keyword for 1300 - 2400 range since the "hour" argument for time needs to be in between 0 and 23.

Python - Importing all files within a date range from a folder

I have a folder called C:\Flows in which I have multiple files in the format of 20190101_Flows.csv for all the days of this year.
I want to import a file for each day perform operations on it and then save the file. However, I'm stuck at it.
So far, I have written the code below to get the files. But I am stuck after that.
from datetime import date
from datetime import datetime, timedelta
import glob
from os import path
DATE_FORMAT = "%Y%m%d"
pathDir = r'C:/Flows/'
dateStart = "20190301"
dateEnd = "20190305"
start_date = datetime.strptime(dateStart, DATE_FORMAT).date()
end_date = datetime.strptime(dateEnd, DATE_FORMAT).date()
delta_one_day = timedelta(days=1)
date = start_date
while date <= end_date:
data_folder = path.join(pathDir)
if path.isdir(data_folder):
for filename in glob(os.path.join(data_folder, "*_Flows.csv")):
#this is where i would like to perform the operations on the file imported
df['code']=df[df.columns[1]].astype(str).str[0:3]
df['bmunit']=df[df.columns[1]].astype(str).str[4:]
df['checko']=df.iloc[:,2:50].sum(axis=1)
print filename
date += delta_one_day

python zip extract with timestamp under Windows [duplicate]

I'm trying to extract files from a zip file using Python 2.7.1 (on Windows, fyi) and each of my attempts shows extracted files with Modified Date = time of extraction (which is incorrect).
import os,zipfile
outDirectory = 'C:\\_TEMP\\'
inFile = 'test.zip'
fh = open(os.path.join(outDirectory,inFile),'rb')
z = zipfile.ZipFile(fh)
for name in z.namelist():
z.extract(name,outDirectory)
fh.close()
I also tried using the .extractall method, with the same results.
import os,zipfile
outDirectory = 'C:\\_TEMP\\'
inFile = 'test.zip'
zFile = zipfile.ZipFile(os.path.join(outDirectory,inFile))
zFile.extractall(outDirectory)
Can anyone tell me what I'm doing wrong?
I'd like to think this is possible without having to post-correct the modified time per How do I change the file creation date of a Windows file?.
Well, it does take a little post-processing, but it's not that bad:
import os
import zipfile
import time
outDirectory = 'C:\\TEMP\\'
inFile = 'test.zip'
fh = open(os.path.join(outDirectory,inFile),'rb')
z = zipfile.ZipFile(fh)
for f in z.infolist():
name, date_time = f.filename, f.date_time
name = os.path.join(outDirectory, name)
with open(name, 'wb') as outFile:
outFile.write(z.open(f).read())
date_time = time.mktime(date_time + (0, 0, -1))
os.utime(name, (date_time, date_time))
Okay, maybe it is that bad.
Based on Jia103's answer, I have developed a function (using Python 2.7.14) which preserves directory and file dates AFTER everything has been extracted. This isolates any ugliness in the function, and you can also use zipfile.Zipfile.extractAll() or whatever zip extract method you want:
import time
import zipfile
import os
# Restores the timestamps of zipfile contents.
def RestoreTimestampsOfZipContents(zipname, extract_dir):
for f in zipfile.ZipFile(zipname, 'r').infolist():
# path to this extracted f-item
fullpath = os.path.join(extract_dir, f.filename)
# still need to adjust the dt o/w item will have the current dt
date_time = time.mktime(f.date_time + (0, 0, -1))
# update dt
os.utime(fullpath, (date_time, date_time))
To preserve dates, just call this function after your extract is done.
Here's an example, from a script I wrote to zip/unzip game save directories:
z = zipfile.ZipFile(zipname, 'r')
print 'I have opened zipfile %s, ready to extract into %s' \
% (zipname, gamedir)
try: os.makedirs(gamedir)
except: pass # Most of the time dir already exists
z.extractall(gamedir)
RestoreTimestampsOfZipContents(zipname, gamedir) #<-- USED
print '%s zip extract done' % GameName[game]
Thanks everyone for your previous answers!
Based on Ethan Fuman's answer, I have developed this version (using Python 2.6.6) which is a little more consise:
zf = ZipFile('archive.zip', 'r')
for zi in zf.infolist():
zf.extract(zi)
date_time = time.mktime(zi.date_time + (0, 0, -1))
os.utime(zi.filename, (date_time, date_time))
zf.close()
This extracts to the current working directory and uses the ZipFile.extract() method to write the data instead of creating the file itself.
Based on Ber's answer, I have developed this version (using Python 2.7.11), which also accounts for directory mod dates.
from os import path, utime
from sys import exit
from time import mktime
from zipfile import ZipFile
def unzip(zipfile, outDirectory):
dirs = {}
with ZipFile(zipfile, 'r') as z:
for f in z.infolist():
name, date_time = f.filename, f.date_time
name = path.join(outDirectory, name)
z.extract(f, outDirectory)
# still need to adjust the dt o/w item will have the current dt
date_time = mktime(f.date_time + (0, 0, -1))
if (path.isdir(name)):
# changes to dir dt will have no effect right now since files are
# being created inside of it; hold the dt and apply it later
dirs[name] = date_time
else:
utime(name, (date_time, date_time))
# done creating files, now update dir dt
for name in dirs:
date_time = dirs[name]
utime(name, (date_time, date_time))
if __name__ == "__main__":
unzip('archive.zip', 'out')
exit(0)
Since directories are being modified as the extracted files are being created inside them, there appears to be no point in setting their dates with os.utime until after the extraction has completed, so this version caches the directory names and their timestamps till the very end.

getting file from date range of the current directory

This is my example path: 'c:\Data\2015-08-01'
Currently I'm getting all the files inside on one(1) specific date, but my goal is to get the files with date range of file folder. Example is to get 2015-08-01 to 2015-08-05' just like the BETWEEN query in MySQL
import os
import os.path
import tempfile
dateStart = '2015-08-01'
dateEnd = '2015-08-05'
year = dateStart[0:4]
yearMonth = year + '_' + dateStart[5:7]
pathDir = 'c:\\Data'
date_folder = pathDir + '\\' + dateStart
count = 0
for filefolder in os.listdir(date_folder):
filefolder = date_folder + "\\" + filefolder
for file in os.listdir(filefolder):
if "txt" in file:
filename = filefolder + "\\" + file
print filename
#Output of this, is all text files for this date only '2015-08-01'
Its hard for me to loop to pull files for date range e.g. '2015-08-01' to '2015-08-05'. How to do this?
Note that there is a folder after my dates and the textfiles are in the last. and the textfile containing on that folder is my point to get. so that from my old code I used this: filefolder = date_folder + "\" + filefolder to get the text in 1 date only.
Here is my sample real path data:
\\10.81.67.162\DLCx Logs\DLCx02\2015\2015_08\2015-08-01\Folder\data.text
and if I will get the range from 2015-08-01 to 2015-08-01. this will be the output:
\\10.81.67.162\DLCx Logs\DLCx02\2015\2015_08\2015-08-01\Folder\data.text
\\10.81.67.162\DLCx Logs\DLCx02\2015\2015_08\2015-08-02\Folder\data.text
\\10.81.67.162\DLCx Logs\DLCx02\2015\2015_08\2015-08-03\Folder\data.text
\\10.81.67.162\DLCx Logs\DLCx02\2015\2015_08\2015-08-04\Folder\data.text
\\10.81.67.162\DLCx Logs\DLCx02\2015\2015_08\2015-08-05\Folder\data.text
Here is my approach: start with separate year, month, day and build the date:
import glob
import os
pattern = os.path.join(r'C:\Data', '{}-{:02}-{:02}', '*', '*.txt')
year, month = 2015, 8
start_day, end_day = 1, 5
for day in range(start_day, end_day + 1):
wildcard = pattern.format(year, month, day)
for filename in glob.glob(wildcard):
print filename
The datetime module makes doing date arithmetic, comparisons, as well as converting them to or from strings relatively easy.
Here's how it could be used to do what you're trying to accomplish (at least according to your most recent comments):
from datetime import datetime, timedelta
from glob import glob
from os import path
DATE_FORMAT = '%Y-%m-%d'
SUBFOLDER_PATH_FORMAT = r'%Y\%Y_%m\%Y-%m-%d\Folder'
pathDir = r'\\10.81.67.162\DLCx Logs\DLCx02'
dateStart = '2015-08-01'
dateEnd = '2015-09-01'
start_date = datetime.strptime(dateStart, DATE_FORMAT).date()
end_date = datetime.strptime(dateEnd, DATE_FORMAT).date()
delta_one_day = timedelta(days=1)
date = start_date
while date <= end_date:
subfolder_path = date.strftime(SUBFOLDER_PATH_FORMAT)
data_folder = path.join(pathDir, subfolder_path)
if path.isdir(data_folder):
for filename in glob(os.path.join(data_folder, '*.txt')):
print filename
date += delta_one_day
It is easiest to convert your dates to date objects. You can then just compare them. See example below:
#!/usr/bin/python
import os
import os.path
import tempfile
import datetime
import re
dateStart = '2015-08-03'
dateEnd = '2015-08-05'
# helper function to convert date strings to date objects
def make_dt(ds):
return datetime.date(int(ds[0:4]), int(ds[5:7]), int(ds[8:10]))
# convert date string to date object
dt_start = make_dt(dateStart)
dt_end = make_dt(dateEnd)
pathDir = '.'
if __name__ == "__main__":
for folder in os.listdir(pathDir):
# only folders that match date format yyyy-mm-dd
if re.match("[0-9]{4}-[0-9]{2}-[0-9]{2}", folder):
# convert folder name to date object
dt_folder = make_dt(folder)
if (dt_folder <= dt_end) and (dt_folder >= dt_start):
print "folder %s is between start [%s] and end [%s]" % (folder, dateStart, dateEnd)

How to use python to turn a .dbf into a shapefile

I have been scouring the internet trying to find a pythonic (sp?!) way to process this data..
Everyday we will recieve a load of data in .dbf format (hopefully) - we then need to save this data as a shapefile.
Does anyone have any links or any suggestions as to my process?
To append the file's creation_date to its name, you need to obtain the creation date with os.stat() and then rename the file with os.rename(). You can format the date string with date.strftime().
import datetime, os
filename = 'original.ext'
fileinfo = os.stat(filename)
creation_date = datetime.date.fromtimestamp(fileinfo.st_ctime)
os.rename(filename, filename + '-' + creation_date.strftime('%Y-%m-%d'))
Off the top of my head:
import os
import datetime
myfile = "test.txt"
creationdate = os.stat(myfile).st_ctime
timestamp = datetime.datetime.fromtimestamp(creationdate)
datestr = datetime.datetime.strftime(timestamp, "%Y%m%d")
os.rename(myfile, os.path.splitext(myfile)[0] + datestr + os.path.splitext(myfile)[1])
renames test.txt to test20110221.txt.
It was in model builder all along!
# (generated by ArcGIS/ModelBuilder)
# Usage: DBF2SHAPEFILE <XY_Table> <Y_Field> <X_Field> <Output_Feature_Class>
# ---------------------------------------------------------------------------
# Import system modules
import sys, string, os, arcgisscripting, datetime
# Adds the creation date to all of the previous shapefiles in that folder
filename = 'D:/test.txt'
fileinfo = os.stat(filename)
creation_date = datetime.date.fromtimestamp(fileinfo.st_ctime)
os.rename(filename, filename + '-' + creation_date.strftime('%Y-%m-%d'))
# Create the Geoprocessor object
gp = arcgisscripting.create()
# Load required toolboxes...
gp.AddToolbox("C:/Program Files/ArcGIS/ArcToolbox/Toolboxes/Data Management Tools.tbx")
# Script arguments...
XY_Table = sys.argv[1]
Y_Field = sys.argv[2]
X_Field = sys.argv[3]
Output_Feature_Class = sys.argv[4]
# Local variables...
Layer_Name_or_Table_View = ""
# Process: Make XY Event Layer...
gp.MakeXYEventLayer_management(XY_Table, X_Field, Y_Field, Layer_Name_or_Table_View, "")
# Process: Copy Features...
gp.CopyFeatures_management(Layer_Name_or_Table_View, Output_Feature_Class, "", "0", "0", "0")
If you wanted to do it without using ArcGIS, you could use OGR's python bindings or the ogr2ogr utility through a subprocess. You could use the utility through a windows batch file, which would be a lot faster than calling the arc process for every file if you have many to do...
As you know it's not a question of changing the extension, there is a specific format required.

Categories