Python tar a directory and symmetric encrypt with gpg - python

At this point the script works great for a single file. When a directory is given it uses tar to create a singe file which works well, then the tar file is gpg encrypted with a password provided. The gpg works also. The problem is that when you decrypt the gpg file the tar is corrupted every time. I'm trying to find what I'm doing wrong here. Please help.
#!/usr/bin/env python3
# Takes file in does symmetric encryption with the password you provide
# then adds it to a running IPFS(ipfs.io) instance.
#
import os
import argparse
import gnupg
import ipfsapi
import tarfile
# Parse command arguments
parser = argparse.ArgumentParser(description='Encrypt file/directory and add it to IPFS')
parser.add_argument('-i','--input', help='File.txt or Directory', required=True)
parser.add_argument('-p','--password', help='Password to encrypt with', required=True)
args = parser.parse_args()
# Set GPG Home directory
gpg = gnupg.GPG(homedir='')
# Set GPG Encoding
gpg.encoding = 'utf-8'
# Get dataToEncrypt full path
dataToEncrypt = (os.path.abspath(args.input))
# Setup tar filename to end with .zip
tarFile = ("{}.tar".format(dataToEncrypt))
# Setup encrypted filename to end with .gpg
encryptedFile = ("{}.tar.gpg".format(dataToEncrypt))
# Tell module where IPFS instance is located
api = ipfsapi.connect('127.0.0.1', 5001)
def dataTar():
if os.path.isfile(dataToEncrypt):
return
else:
#return
with tarfile.open(tarFile, 'w|') as tar:
tar.add(dataToEncrypt)
tar.close()
def encryptFile():
passphrase = (args.password)
if os.path.isfile(dataToEncrypt):
with open(dataToEncrypt, 'rb') as f:
status = gpg.encrypt(f,
encrypt=False,
symmetric='AES256',
passphrase=passphrase,
armor=False,
output=dataToEncrypt + ".gpg")
else:
with open(tarFile, 'rb') as f:
status = gpg.encrypt(f,
encrypt=False,
symmetric='AES256',
passphrase=passphrase,
armor=False,
output=dataToEncrypt + ".tar.gpg")
print ('ok: ', status.ok)
print ('status: ', status.status)
print ('stderr: ', status.stderr)
def ipfsFile(encryptedFile):
# Add encrypted file to IPFS
ipfsLoadedFile = api.add(encryptedFile, wrap_with_directory=True)
# Return Hash of new IPFS File
fullHash = (ipfsLoadedFile[1])
ipfsHash = fullHash['Hash']
return(ipfsHash)
def delEncryptedFile(encryptedFile):
try:
os.remove(encryptedFile)
except:
print("Error: %s unable to find or delete file." % encryptedFile)
def main():
dataTar()
encryptFile()
#ipfsFile(encryptedFile)
#print ("File encrypted and added to IPFS with this hash " + ipfsFile(encryptedFile))
#delEncryptedFile(encryptedFile)
if __name__ == "__main__":
main()

Code looks fine. I just tried it with https://pypi.org/project/python-gnupg/ and it works fine. I had to fix the API's according to this package, but I don't think that matters. Just diff it to see the changes. I don't see any problem except that you should be using gpg -d file.tar.pgp | tar xvf -.
#!/usr/bin/env python3
# Takes file in does symmetric encryption with the password you provide then
# adds it to a running IPFS (ipfs.io) instance.
import os
import argparse
import gnupg
import tarfile
parser = argparse.ArgumentParser(
description='Encrypt file/directory and add it to IPFS')
parser.add_argument('-i','--input',
help='File.txt or Directory',
required=True)
parser.add_argument('-p','--password',
help='Password to encrypt with',
required=True)
args = parser.parse_args()
gpg = gnupg.GPG()
gpg.encoding = 'utf-8'
dataToEncrypt = (os.path.abspath(args.input))
tarFile = ("{}.tar".format(dataToEncrypt))
encryptedFile = ("{}.tar.gpg".format(dataToEncrypt))
def dataTar():
if os.path.isfile(dataToEncrypt):
return
else:
with tarfile.open(tarFile, 'w|') as tar:
tar.add(dataToEncrypt)
tar.close()
def encryptFile():
passphrase = (args.password)
if os.path.isfile(dataToEncrypt):
with open(dataToEncrypt, 'rb') as f:
status = gpg.encrypt(f.read(),
recipients=None,
symmetric='AES256',
passphrase=passphrase,
armor=False,
output=dataToEncrypt + ".gpg")
else:
with open(tarFile, 'rb') as f:
status = gpg.encrypt(f.read(),
recipients=None,
symmetric='AES256',
passphrase=passphrase,
armor=False,
output=dataToEncrypt + ".tar.gpg")
print ('ok: ', status.ok)
print ('status: ', status.status)
print ('stderr: ', status.stderr)
def ipfsFile(encryptedFile):
ipfsLoadedFile = api.add(encryptedFile, wrap_with_directory=True)
fullHash = (ipfsLoadedFile[1])
ipfsHash = fullHash['Hash']
return(ipfsHash)
def delEncryptedFile(encryptedFile):
try:
os.remove(encryptedFile)
except:
print("Error: %s unable to find or delete file." % encryptedFile)
def main():
dataTar()
encryptFile()
if __name__ == "__main__":
main()

Related

How do I create a python script such that it sends an email when csv files in a directory has not updated in the last 24 hours?

I am new to python and trying to understanding how to automate stuff. I have a folder in which 5 csv files get updated daily, however sometimes one of them or two dont on particular days. Im having to manually check this folder. Instead I want to automate this in such a way that if a csv file does not update in the last 24hours, It can send an email to myself alerting me of this.
My code:
import datetime
import glob
import os
import smtplib
import string
now = datetime.datetime.today() #Get current date
list_of_files = glob.glob('c:/Python/*.csv') # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime) #get latest file created in folder
newestFileCreationDate = datetime.datetime.utcfromtimestamp(os.path.getctime(latest_file)) # get creation datetime of last file
dif = (now - newestFileCreationDate) #calculating days between actual date and last creation date
logFile = "c:/Python/log.log" #defining a log file
def checkFolder(dif, now, logFile):
if dif > datetime.timedelta(days = 1): #Check if difference between today and last created file is greater than 1 days
HOST = "12.55.13.12" #This must be your smtp server ip
SUBJECT = "Alert! At least 1 day wthout a new file in folder xxxxxxx"
TO = "xx.t#gmail.com"
FROM = "xx.t#gmail.com"
text = "%s - The oldest file in folder it's %s old " %(now, dif)
BODY = string.join((
"From: %s" % FROM,
"To: %s" % TO,
"Subject: %s" % SUBJECT ,
"",
text
), "\r\n")
server = smtplib.SMTP(HOST)
server.sendmail(FROM, [TO], BODY)
server.quit()
file = open(logFile,"a") #Open log file in append mode
file.write("%s - [WARNING] The oldest file in folder it's %s old \n" %(now, dif)) #Write a log
file.close()
else : # If difference between today and last creation file is less than 1 days
file = open(logFile,"a") #Open log file in append mode
file.write("%s - [OK] The oldest file in folder it's %s old \n" %(now, dif)) #write a log
file.close()
checkFolder(dif,now,logFile) #Call function and pass 3 arguments defined before
However, this does not run without error and I just want to be notified by mail of those files in the folder that havent been updated. even if it is one of out 5 files of them or 5 out of 5 that havent updated.
Use pure python and concise way
import hashlib
import glob
import json
import smtplib
from email.message import EmailMessage
import time
import schedule #pip install schedule
hasher = hashlib.md5()
size = 65536 #to read large files in chunks
list_of_files = glob.glob('./*.csv') #absolute path for crontab
Part 1) Run this script first then comment it out. It will create a json file with hashes of your files.
first_hashes = {}
for x in list_of_files:
with open(x, 'rb') as f:
buf = f.read(size)
while len(buf) > 0:
hasher.update(buf)
buf = f.read(size)
first_hashes[x] = hasher.hexdigest()
with open('hash.json', 'w') as file:
file.write(json.dumps(first_hashes, indent=2))
Now comment it out or even delete it.
Part 2) Automation script:
def send_email():
check_hash = {} #Contain hashes that have not changed
with open('hash.json') as f: #absolute path for crontab
data = json.load(f)
for x in list_of_files:
with open(x, 'rb') as f:
buf = f.read(size)
while len(buf) > 0:
hasher.update(buf)
buf = f.read(size)
new_hash = hasher.hexdigest()
#if a hash match with one in data, that file has not changed
if new_hash in data.values():
check_hash[x] = new_hash
data[x] = new_hash
#update our hashes
with open('hash.json', 'w') as file: #absolute path for crontab
file.write(json.dumps(data, indent=2))
if len(check_hash) > 0: #check if there's anything in check_hash
filename="check_hash.txt" #absolute path for crontab
#write to a text file named "check_hash.txt"
with open(filename, 'w') as f: #absolute path for crontab
f.write(json.dumps(check_hash, indent=2))
# for gmail smtp setup watch youtu.be/JRCJ6RtE3xU
EMAIL_ADDRESS = 'SMTPAddress#gmail.com'
EMAIL_PASSWORD = 'SMTPPassWord'
msg = EmailMessage()
msg['Subject'] = 'Unupdated files'
msg['From'] = EMAIL_ADDRESS
msg['To'] = 'receive#gmail.com'
msg.set_content('These file(s) did not update:')
msg.add_attachment(open(filename, "r").read(), filename=filename)
with smtplib.SMTP_SSL('smtp.gmail.com', 465) as smtp:
smtp.login(EMAIL_ADDRESS, EMAIL_PASSWORD)
smtp.send_message(msg)
#for faster testing check other options here github.com/dbader/schedule
schedule.every().day.at("10:30").do(send_email)
while 1:
schedule.run_pending()
time.sleep(1)
EDIT: If you restart your pc, you will need to run this file again to restart schedule, to avoid that, you can use crontab as follows (learn how from youtu.be/j-KgGVbyU08):
# mm hh DOM MON DOW command
30 10 * * * python3 path-to-file/email-script.py #Linux
30 10 * * * python path-to-file/email-script.py #Windows
This will run the script everyday at 10:30 AM IF the pc is ON at that time. For faster testing (run every 1 minute) use:
* * * * * python3 path-to-file/email-script.py
NOTE: If you gonna use crontab, you MUST use absolute path for all file references and replace
schedule.every().day.at("10:30").do(send_email)
while 1:
schedule.run_pending()
time.sleep(1)
with
if __name__ == "__main__":
send_email()
Tested and it's working great!
Granted I don't know CSV but I would import time and using the format and time. Sleep function create a timer. What's good about time module is that you can configure it to set a value to a variable after time is up. SO maybe if you do that and put into an if statement, when the variable reaches a value, send the email.
Are you thinking of something like this?
import os
from datetime import datetime
import smtplib
import textwrap
def send_email_failure():
SERVER = "12.55.13.12" #This must be your smtp server ip
SUBJECT = "Alert! At least 1 day without a new file in folder xxxxxxx"
TO = "xx.t#gmail.com"
FROM = "xx.t#gmail.com"
TEXT = "%s - The oldest file in folder it's %sh old " %(datetime.now(), oldest_time_hour)
"""this is some test documentation in the function"""
message = textwrap.dedent("""\
From: %s
To: %s
Subject: %s
%s
""" % (FROM, ", ".join(TO), SUBJECT, TEXT))
print(message)
# Send the mail
server = smtplib.SMTP(SERVER)
server.sendmail(FROM, TO, message)
server.quit()
def save_log(logFile, ok_or_failure, time_now, delta):
file = open(logFile,"a") #Open log file in append mode
if ok_or_failure != 'ok':
file.write("%s - [WARNING] The oldest file in folder it's %s old \n" %(time_now, delta))
else:
file.write("%s - [OK] The oldest file in folder it's %s old \n" %(time_now, delta))
file.close()
def check_file(filename):
print(filename)
if filename.endswith('.csv'):
print('csv')
try:
mtime = os.path.getmtime(filename) # get modified time
except OSError:
mtime = 0
last_modified_date = datetime.fromtimestamp(mtime)
tdelta = datetime.now() - last_modified_date
hours = tdelta.seconds // 3600 # convert to hours
return hours
else:
return 0
# we check what files are in the dir 'files'
# and their modification time
oldest_time_hour = 0
for path, dirs, files in os.walk('./files'): # this need to be modified by case
for file in files:
# get each file time of modification
time = check_file(path+'/'+file)
if time > 0:
# save the oldest time
if time > oldest_time_hour:
oldest_time_hour = time
# if it is older that 24h
if oldest_time_hour > 24:
save_log('log.log', 'failure', datetime.now(), oldest_time_hour)
send_email_failure()
else:
save_log('log.log', 'ok', datetime.now(), oldest_time_hour)
also you will need an end-less loop to run the python script or a chronjob to run this python script every hour or so
Why are you checking the last_modified_date? I suggest you to check the modification of the file with md5 checksum.
My Idea is, if you have following files :
file1.csv
file2.csv
file3.csv
file4.csv
file5.csv
You can check their md5 checksum and write the result + DateTime into a file next to the original file. like following :
file1.csv
file1.csv_checksum
Content of file1.csv_checksum
timestamp,checksum
1612820511,d41d8cd98f00b204e9800998ecf8427e
you can check md5 of a file with following code:
>>> import hashlib
>>> hashlib.md5(open('filename.exe','rb').read()).hexdigest()
then you can check the result with the provided one in the checksum file ( and if the checksum file does not exist, just create it for the first time )
I think you can easily handle it with this approach.
At first i started with a task scheduler decorator which will enable you to poll a directory for a fixed delay:
import time
import functools
def scheduled(fixed_delay):
def decorator_scheduled(func):
functools.wraps(func)
def wrapper_schedule(*args, **kwargs):
result = func(*args, **kwargs)
self = args[0]
delay = getattr(self, fixed_delay)
time.sleep(delay)
return result
return wrapper_schedule
return decorator_scheduled
Saved it as a seperate module named task_scheduler.py.
I will use it in my file watcher:
import os
from task_scheduler import scheduled
import smtplib, ssl
class FileWatcher:
def __init__(self,
files_path='./myFiles',
extension='.csv',
poll_delay=2):
self.files_path = files_path
self.extension = extension
self.poll_delay = poll_delay
def notify_host_on_nonchange(self, file_path):
port = 465
smtp_server = "smtp.gmail.com"
sender_email = "sender#gmail.com"
receiver_email = "receiver#gmail.com"
password = "Your password here" #You may want to read it from file
message = f"No change in file: {file_path} for 24 hurs!"
context = ssl.create_default_context()
with smtplib.SMTP_SSL(smtp_server, port, context=context) as server:
server.login(sender_email, password)
server.sendmail(sender_email, receiver_email, message)
def watch(self):
try:
while True:
self.poll_()
except KeyboardInterrupt:
log.debug('Polling interrupted by user.')
#scheduled("poll_delay")
def poll_(self,):
for f in os.listdir(self.files_path):
full_path = os.path.join(self.files_path, f)
path_stat = os.stat(full_path)
_, file_ext = os.path.splitext(f)
ctime = path_stat.st_ctime
diff = time.time() - ctime/3600
if diff<=24 or not S_ISREG(path_stat.st_mode) or str(file_ext) != self.extension:
continue
self.notify_host_on_nonchange(full_path)
if __name__ == "__main__":
file_listener = FileWatcher()
file_listener.watch()
Above class defines a poll_ function which benefits from os.stat module to check the modification time. If modification time smaller than or equal to 24 or the file is not a regular file (means that it is a directory) or it does not have the extension you look for polling will skip it, otherwise calls the notify function to send e-mail. It uses the gmail smtp server example but you can change it as appropriate for your environment. Watch function is a wrapper for continous polling.
This class is adapted from my machine learning model watcher and loader, you can access that version and project from my github. For further explanation about decorator and script you can check out my medium post.

I'm trying to create a directory with os.mkdir

import pathlib
import subprocess
import argparse
import os
from _datetime import datetime
def get_unique_run_id():
if os.environ.get("BUILD_NUMBER"):
unique_run_id = os.environ.get("BUILD_NUMBER")
elif os.environ.get("CUSTOM_BUILD_NUMBER"):
unique_run_id = os.environ.get("CUSTOM_BUILD_NUMBER")
else:
unique_run_id = datetime.now().strftime('%Y%M%D%H%M%S')
os.environ['UNIQUE_RUN_ID'] = unique_run_id
return unique_run_id
def create_output_directory(prefix='results_'):
global run_id
if not run_id:
raise Exception("Variable 'run_id' is not set. Unable to create output directory")
curr_file_path = pathlib.Path(__file__).parent.absolute()
dir_to_create = os.path.join(curr_file_path, prefix + str(run_id))
os.mkdir(dir_to_create)
print(f"Created output directory: {dir_to_create}")
return dir_to_create
if __name__ == "__main__":
run_id = get_unique_run_id()
output_dir = create_output_directory()
json_out_dir = os.path.join(output_dir, 'json_report_out.json')
junit_out_dir = os.path.join(output_dir, 'junit_report_out')
# import pdb; pdb.set_trace()
parser = argparse.ArgumentParser()
parser.add_argument('--test_directory', required=False, help='Specify the location of the test file')
parser.add_argument('--behave_options', type=str, required=False, help='String of behave options')
args = parser.parse_args()
test_directory = '' if not args.test_directory else args.test_directory
behave_options = '' if not args.behave_options else args.behave_options
command = f'behave -k--no-capture -f json.pretty -o {json_out_dir} ' \
f'--junit --junit-directory {junit_out_dir}' \
f'{behave_options} ' \
f'{test_directory}'
print(f"Running command : {command}")
rs = subprocess.run(command, shell=True)
When I try to run this I'm getting an error as follows:
FileNotFoundError: [WinError 3] The system cannot find the path specified: 'E:\Projects\results_20204710/11/20194751'. Please help me to find a solution for this.
Thought it could be installer error. So tried both 32bit and 64bit python installers. I'm totally lost here.
For a single directory:
os.mkdir(...)
For nested directories:
os.makedirs(...)
You can also check if a diretory exists:
os.path.exists(...)

Saving IMAP messages with Python mailbox module

I'm downloading messages from IMAP with imaplib into a mbox (with mailbox module):
import imaplib, mailbox
svr = imaplib.IMAP4_SSL('imap.gmail.com')
svr.login('myname#gmail.com', 'mypaswword')
resp, [countstr] = svr.select("[Gmail]/All Mail", True)
mbox = mailbox.mbox('mails.mbox')
for n in range(...):
resp, lst1 = svr.fetch(n, 'UID') # the UID of the message
resp, lst2 = svr.fetch(n, '(RFC822)') # the message itself
mbox.add(lst2[0][1]) # add the downloaded message to the mbox
#
# how to store the UID of this current mail inside mbox?
#
Let's download the mails with UID = 1 .. 1000. Next time, I would like to begin at the 1001th message and not from the 1st. However, mailbox.mbox does not store the UID anywhre. So next time I will open the mbox file, it will be impossible to know where we stopped.
Is there a natural way with the module mailbox to store the UID of the emails?
Or maybe I don't use mailbox + imaplib the way it should ?
I hope it will be useful:
1) libraries and environment Win7 Anaconda3-4.3.1-Windows-x86_64.exe (new is available but that what I have used
2) To list all your mailboxes:
import getpass, imaplib, sys
def main():
hostname = 'my.mail.server'
username = 'my_user_name'
m = imaplib.IMAP4_SSL(hostname)
m.login(username, 'passowrd')
try:
print('Capabilities:', m.capabilities)
print('Listing mailboxes ')
status, data = m.list()
print('Status:', repr(status))
print('Data:')
for datum in data:
print(repr(datum))
finally:
m.logout()
if __name__ == '__main__':
main()
3) Using generated above information we can dump all email messages from mail server to the directories:
import getpass, imaplib, sys, email, os , io
import codecs
BASE_NAME = 'msg_no_'
BASE_DIR = 'D:/my_email/'
def writeTofile(mailDir, partOfName, msg ):
## no need of dos backslash -- newDir = BASE_DIR + mailDir.replace('/', '\\')
newDir = BASE_DIR + mailDir
if not os.path.exists(newDir):
os.makedirs(newDir)
os.chdir(newDir)
# print('Dir:' + os.getcwd() )
file_name = BASE_NAME + partOfName + '.eml'
# print('Write:' + file_name)
fw = open(newDir + '/' + file_name,'w', encoding="utf-8")
fw.write( msg )
fw.close()
return
def processMailDir(m, mailDir):
print('MailDIR:' + mailDir)
m.select(mailbox=mailDir, readonly=True)
typ, data = m.search(None, 'ALL')
for num in data[0].split():
typ, data = m.fetch(num, '(RFC822)')
msg = email.message_from_bytes(data[0][1])
smsg = msg.as_bytes().decode(encoding='ISO-8859-1')
writeTofile(mailDir, num.decode(), smsg )
m.close()
return
def main():
if len(sys.argv) != 3:
hostname = 'my.mail.server'
username = 'my_username'
m = imaplib.IMAP4_SSL(hostname)
m.login(username, 'password')
else:
hostname, username = sys.argv[1:]
m = imaplib.IMAP4_SSL(hostname)
m.login(username, getpass.getpass())
try:
print('Start...')
processMailDir(m, 'INBOX')
processMailDir(m, 'Sent')
processMailDir(m, 'archive/2013/201301')
processMailDir(m, 'archive/2013/201302')
# etc.. etc.. simple as it can be but not simpler
print('Done...')
finally:
m.logout()
if __name__ == '__main__':
main()
Above will dump your emails to:
D:\my_email\INBOX\msg_no_1.eml ... msg_no203.eml
then you need this secret to open eml's on windows:
Administrator: cmd.com:
assoc .eml=Outlook.File.eml
ftype Outlook.File.eml="C:\Program Files (x86)\Microsoft Office\Office12\OUTLOOK.EXE" /eml "%1"
Dear stockoverflow censor - please be merciful, I would found above useful; for example this: smsg = msg.as_bytes().decode(encoding='ISO-8859-1') took a long to figure out.
To answer your question: after staring at the docs for a long time I didn't see any cleanly way to do what you are looking for. If it is an absolute requirement that the UIDs be stored in the mbox file, then I'd suggest adding a custom UID header to the emails that you are storing:
message = email.message_from_string(lst2[0][1])
message.add_header("my_internal_uid_header", lst1[0][1])
mbox.add(message)
Now of course it is a HUGE pain to get the largest saved UID because you have to iterate through all the messages. I imagine that this would be really bad. If at all possible it would be better to store such information elsewhere.
Best of luck!

Python tarfile fails

I am trying to write a script that tar a directory and scp's to a server which have lots of tar files. I am having trouble in creating tar of the directories, here is the complete script. Why is that happening?
Code:
#!/usr/bin/python
import json
from pprint import pprint
import subprocess
import os
from os.path import expanduser
import time
import os.path
import shutil
import tarfile
import smtplib
import zipfile
import glob
def checkFileDownload():
os.system("scp ***#***.***.***.***:/var/log/apache2/access.log ~/pingMeServeraccess.log")
def sendNotificationText(server="smtp.gmail.com",userName="***#***.com",password="********",cellNumber="***********",testLink="Test"):
server = smtplib.SMTP_SSL(server, ***)
server.login(userName,password)
server.sendmail(userName,cellNumber,testLink)
def sendTarFileToPingMeServer(locationOfTarFile="/home/autotest/tarPackage",nameOfTarFile=""):
fullPathOfFile = nameOfTarFile
scpCommand = "scp -r "+ fullPathOfFile +" ***#***.***.***.***:/home/autotest/untethered/"
try:
os.popen(scpCommand)
testLink= "\nhttp://***.***.***.***/" + nameOfTarFile.split('/')[-1]
sendNotificationText(testLink = testLink)
except:
print "something went wrong"
def makeTarFile(sourceDir):
if os.path.exists(expanduser("~/tarPackage")):
shutil.rmtree(expanduser("~/tarPackage"))
else:
pass
dstFolder = expanduser('~/tarPackage')
crtDstFolder = 'mkdir -p ' + dstFolder
os.system(crtDstFolder)
archiveName = str(time.time())+'.tar'
print 'creating archive, '+archiveName
out = tarfile.open(expanduser('~/tarPackage/'+archiveName), mode='w')
try:
out.add(sourceDir)
sendTarFileToPingMeServer(nameOfTarFile=archiveName)
finally:
out.close()
checkFileDownload()
def getTest(userName):
testLoc = check(userName)
gitList= [];TestList = []; packageDir = "mkdir ~/testPackageDir"
if os.path.exists(expanduser("~/testPackageDir")):
shutil.rmtree(expanduser("~/testPackageDir"))
else:
pass
originalDirectory = os.getcwd()
gitrepo = ""
for test,gitLink in testLoc.items():
if gitLink not in gitList:
gitRepo = expanduser("~/tempGit_"+str(time.time()))
p = subprocess.Popen(["git", "clone", gitLink,gitRepo], stdout=subprocess.PIPE)
out,err = p.communicate()
gitList.append(gitLink)
testLink = gitRepo + test
if os.path.isfile(testLink):
os.system(packageDir)
relPath = test.rstrip(test.split('/')[-1])
x = "mkdir -p ~/testPackageDir"+relPath
os.system(x)
y = "~/testPackageDir" + relPath
cpTest = "cp "+testLink+" "+ expanduser(y)
os.system(cpTest)
else:
print "git link already cloned, skipping, checking for test cases."
testLink = gitRepo + test
if os.path.isfile(testLink):
relPath = test.rstrip(test.split('/')[-1])
x = "mkdir -p ~/testPackageDir"+relPath
os.system(x)
y = "~/testPackageDir" + relPath
cpTest = "cp "+testLink+" "+ expanduser(y)
os.system(cpTest)
makeTarFile(expanduser("~/testPackageDir"))
os.system("cd ~; rm -rf tempGit_*;cd -; rm -rf ~/testPackageDir")
def check(userName):
p = subprocess.Popen(["ls", "/var/www/tempdata/testexec"], stdout=subprocess.PIPE)
out,err = p.communicate()
out = out.split('\n')[:-1]
for fileName in out:
if userName in fileName:
filePath = "/var/www/tempdata/testexec/"+fileName
json_data=open(filePath)
data = json.load(json_data)
testLoc = searchForGitTest(data)
curDict = os.popen("pwd")
os.system("cd ~")
return testLoc
def searchForGitTest(data):
aux = {};auxList= []
for idx in range(len(data["rows"])):
scriptPath = data["rows"][idx]["scriptPath"]
gitPath = data["rows"][idx]["gitPath"]
aux[scriptPath] = gitPath
return aux
if __name__ == "__main__":
getTest("user")
Attaching the run:
autotest#batman007:/var/www$ python testPackageUploader.py
remote: Counting objects: 38357, done
remote: Finding sources: 100% (38357/38357)
remote: Total 38357 (delta 15889), reused 36060 (delta 15889)
Receiving objects: 100% (38357/38357), 652.78 MiB | 17.08 MiB/s, done.
Resolving deltas: 100% (15889/15889), done.
git link already cloned, skipping, checking for test cases.
creating archive
1407871278.15.tar: No such file or directory
access.log 100% 21KB 21.3KB/s 00:00
/var/www
The problem in this script was I was not closing the file and sending it to the server. One of my colleagues helped me to figure out this problem.

Python fast static file serving

What's the fastest way to serve static files in Python? I'm looking for something equal or close enough to Nginx's static file serving.
I know of SimpleHTTPServer but not sure if it can handle serving multiple files efficiently and reliably.
Also, I don't mind it being a part of a lib/framework of some sort as long as its lib/framework is lightweight.
EDIT: This project appears to be dead.
What about FAPWS3? One of the selling points:
Static file server
FAPWS can be used to serve a huge amount of static file requests. With the help of a async database in the backend, you can use FAPWS as your own Amazon S3.
If you look for a oneliner you can do the following:
$> python -m SimpleHTTPServer
This will not fullfil all the task required but worth mentioning that this is the simplest way :-)
I would highly recommend using a 3rd party HTTP server to serve static files.
Servers like nginx are heavily optimized for the task at hand, parallelized and written in fast languages.
Python is tied to one processor and interpreted.
Original SimpleHTTPServer from python standard library does NOT "handle serving multiple files efficiently and reliably". For instance, if you are downloading one file from it, another HTTP access to it must be hovering since SimpleHTTPServer.py is a simple singal-thread HTTP server which could only support one connecting simultaneously.
Fortunately, note that SimpleHTTPServer.py use BaseHTTPServer.HTTPServer as handler, which can be wrapped by SocketServer.ForkingMixIn and SocketServer.ThreadingMixIn also from python standard library to support multi-process and multi-thread mode, which could highly enhance simple HTTP server's "efficience and reliability".
According to this idea, a SimpleHTTPServer with multi-thread/multi-process support modified from original one is given as follows:
$ python2.7 ModifiedSimpleHTTPServer.py
usage: ModifiedSimpleHTTPServer.py [-h] [--pydoc] [--port PORT]
[--type {process,thread}] [--root ROOT]
[--run]
Modified SimpleHTTPServer with MultiThread/MultiProcess and IP bind support.
Original: https://docs.python.org/2.7/library/simplehttpserver.html
Modified by: vbem#163.com
optional arguments:
-h, --help show this help message and exit
--pydoc show this module's pydoc
run arguments:
--port PORT specify server port (default: 8000)
--type {process,thread}
specify server type (default: 'thread')
--root ROOT specify root directory (default: cwd '/home/vbem')
--run run http server foreground
NOTE: stdin for input, stdout for result, stderr for logging
For example, ModifiedSimpleHTTPServer.py --run --root /var/log --type process will run a multi-process HTTP static files server with '/var/log' as its root directory.
Modified codes are:
#! /usr/bin/env python2.7
# -*- coding: utf-8 -*-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
r"""Modified SimpleHTTPServer with MultiThread/MultiProcess and IP bind support.
Original: https://docs.python.org/2.7/library/simplehttpserver.html
Modified by: vbem#163.com
"""
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
import os, sys, pwd, posixpath, BaseHTTPServer, urllib, cgi, shutil, mimetypes, socket, SocketServer, BaseHTTPServer
from cStringIO import StringIO
USERNAME = pwd.getpwuid(os.getuid()).pw_name
HOSTNAME = socket.gethostname()
PORT_DFT = 8000
class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
server_version = "SimpleHTTP/0.6"
def do_GET(self):
f = self.send_head()
if f:
self.copyfile(f, self.wfile)
f.close()
def do_HEAD(self):
f = self.send_head()
if f:
f.close()
def send_head(self):
path = self.translate_path(self.path)
f = None
if os.path.isdir(path):
if not self.path.endswith('/'):
self.send_response(301)
self.send_header("Location", self.path + "/")
self.end_headers()
return None
for index in "index.html", "index.htm":
index = os.path.join(path, index)
if os.path.exists(index):
path = index
break
else:
return self.list_directory(path)
ctype = self.guess_type(path)
try:
f = open(path, 'rb')
except IOError:
self.send_error(404, "File not found")
return None
self.send_response(200)
self.send_header("Content-type", ctype)
fs = os.fstat(f.fileno())
self.send_header("Content-Length", str(fs[6]))
self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
self.end_headers()
return f
def list_directory(self, path):
try:
list = ['..'] + os.listdir(path) #
except os.error:
self.send_error(404, "No permission to list directory")
return None
list.sort(key=lambda a: a.lower())
f = StringIO()
displaypath = cgi.escape(urllib.unquote(self.path))
f.write('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
f.write("<html>\n<title>%s %s</title>\n<body>" % (HOSTNAME, displaypath))
f.write("%s#%s:<strong>%s</strong>\n" % (USERNAME, HOSTNAME, path.rstrip('/')+'/'))
f.write("<hr>\n<ul>\n")
for name in list:
fullname = os.path.join(path, name)
displayname = linkname = name
if os.path.isdir(fullname):
displayname = name + "/"
linkname = name + "/"
if os.path.islink(fullname):
displayname = name + "#"
f.write('<li>%s\n'
% (urllib.quote(linkname), cgi.escape(displayname)))
f.write("</ul>\n<hr>\n<pre>%s</pre>\n</body>\n</html>\n" % __doc__)
length = f.tell()
f.seek(0)
self.send_response(200)
encoding = sys.getfilesystemencoding()
self.send_header("Content-type", "text/html; charset=%s" % encoding)
self.send_header("Content-Length", str(length))
self.end_headers()
return f
def translate_path(self, path):
path = path.split('?',1)[0]
path = path.split('#',1)[0]
path = posixpath.normpath(urllib.unquote(path))
words = path.split('/')
words = filter(None, words)
path = os.getcwd()
for word in words:
drive, word = os.path.splitdrive(word)
head, word = os.path.split(word)
if word in (os.curdir, os.pardir): continue
path = os.path.join(path, word)
return path
def copyfile(self, source, outputfile):
shutil.copyfileobj(source, outputfile)
def guess_type(self, path):
base, ext = posixpath.splitext(path)
if ext in self.extensions_map:
return self.extensions_map[ext]
ext = ext.lower()
if ext in self.extensions_map:
return self.extensions_map[ext]
else:
return self.extensions_map['']
if not mimetypes.inited:
mimetypes.init()
extensions_map = mimetypes.types_map.copy()
extensions_map.update({'': 'text/plain'})
class ProcessedHTTPServer(SocketServer.ForkingMixIn, BaseHTTPServer.HTTPServer):
r"""Handle requests in multi process."""
class ThreadedHTTPServer(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer):
r"""Handle requests in a separate thread."""
SERVER_DICT = {
'thread' : ThreadedHTTPServer,
'process' : ProcessedHTTPServer,
}
SERVER_DFT = 'thread'
def run(sCwd=None, sServer=SERVER_DFT, nPort=PORT_DFT, *lArgs, **dArgs):
r"""
"""
sys.stderr.write('start with %r\n' % sys._getframe().f_locals)
if sCwd is not None:
os.chdir(sCwd)
cServer = SERVER_DICT[sServer]
oHttpd = cServer(("", nPort), SimpleHTTPRequestHandler)
sys.stderr.write('http://%s:%s/\n' % (HOSTNAME, nPort))
oHttpd.serve_forever()
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# main
def _main():
r"""Main.
"""
import argparse
oParser = argparse.ArgumentParser(
description = __doc__,
formatter_class = argparse.RawTextHelpFormatter,
epilog = 'NOTE: stdin for input, stdout for result, stderr for logging',
)
oParser.add_argument('--pydoc', action='store_true',
help = "show this module's pydoc",
)
oGroupR = oParser.add_argument_group(title='run arguments', description='')
oGroupR.add_argument('--port', action='store', type=int, default=PORT_DFT,
help = 'specify server port (default: %(default)r)',
)
oGroupR.add_argument('--type', action='store', default=SERVER_DFT, choices=SERVER_DICT.keys(),
help = 'specify server type (default: %(default)r)',
)
oGroupR.add_argument('--root', action='store', default=os.getcwd(),
help = 'specify root directory (default: cwd %(default)r)',
)
oGroupR.add_argument('--run', action='store_true',
help = '\n'.join((
'run http server foreground',
)))
oArgs = oParser.parse_args()
if oArgs.pydoc:
help(os.path.splitext(os.path.basename(__file__))[0])
elif oArgs.run:
return run(sCwd=oArgs.root, sServer=oArgs.type, nPort=oArgs.port)
else:
oParser.print_help()
return 1
return 0
if __name__ == "__main__":
exit(_main())
Meanwhile, the single python file with only 200 lines may satisfy your "in Python" and "lightweight" demands.
Last but not least, this ModifiedSimpleHTTPServer.py may be a "killer app" by hand for temporary use, however, Nginx is advised for long term use.

Categories