Django using FTP for large files

Django using FTP for large files - python

I'm using Django admin to upload large files into another server ( A download host ).
The files are usually 100mb.
I'm using FTP currently based on this.
It works fine with files less than 1mb but as it itself says in the documentation, it doesn't work with larger files and I get a 503 when the upload finishes in the Django admin.
I really searched a lot about another way to to this but it seems there is no other way in Django.
can you help me?
This is my settings.py
FTP_STORAGE_LOCATION = 'ftp://<myuser>:<mypass>#<host>:<port>/[path]'
my models.py
from . import ftp
fs = ftp.FTPStorage()
def my_awesome_upload_function(instance, filename):
return os.path.join('public_ftp/public/{}/'.format(instance.get_directory()), filename)
class Video(models.Model):
video_file_ftp = models.FileField(upload_to = my_awesome_upload_function, storage=fs)
this is ftp.py
# FTP storage class for Django pluggable storage system.
# Author: Rafal Jonca <jonca.rafal#gmail.com>
# License: MIT
# Comes from http://www.djangosnippets.org/snippets/1269/
#
# Usage:
#
# Add below to settings.py:
# FTP_STORAGE_LOCATION = '[a]ftp://<user>:<pass>#<host>:<port>/[path]'
#
# In models.py you can write:
# from FTPStorage import FTPStorage
# fs = FTPStorage()
# class FTPTest(models.Model):
# file = models.FileField(upload_to='a/b/c/', storage=fs)
import ftplib
import io
import os
from datetime import datetime
from urllib.parse import urljoin, urlparse
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.core.files.base import File
from django.core.files.storage import Storage
from django.utils.deconstruct import deconstructible
from storages.utils import setting
class FTPStorageException(Exception):
pass
#deconstructible
class FTPStorage(Storage):
"""FTP Storage class for Django pluggable storage system."""
def __init__(self, location=None, base_url=None, encoding=None):
location = location or setting('FTP_STORAGE_LOCATION')
if location is None:
raise ImproperlyConfigured("You must set a location at "
"instanciation or at "
" settings.FTP_STORAGE_LOCATION'.")
self.location = location
self.encoding = encoding or setting('FTP_STORAGE_ENCODING') or 'latin-1'
base_url = base_url or settings.MEDIA_URL
self._config = self._decode_location(location)
self._base_url = base_url
self._connection = None
def _decode_location(self, location):
"""Return splitted configuration data from location."""
splitted_url = urlparse(location)
config = {}
if splitted_url.scheme not in ('ftp', 'aftp'):
raise ImproperlyConfigured(
'FTPStorage works only with FTP protocol!'
)
if splitted_url.hostname == '':
raise ImproperlyConfigured('You must at least provide hostname!')
if splitted_url.scheme == 'aftp':
config['active'] = True
else:
config['active'] = False
config['path'] = splitted_url.path
config['host'] = splitted_url.hostname
config['user'] = splitted_url.username
config['passwd'] = splitted_url.password
config['port'] = int(splitted_url.port)
return config
def _start_connection(self):
# Check if connection is still alive and if not, drop it.
if self._connection is not None:
try:
self._connection.pwd()
except ftplib.all_errors:
self._connection = None
# Real reconnect
if self._connection is None:
ftp = ftplib.FTP()
ftp.encoding = self.encoding
try:
ftp.connect(self._config['host'], self._config['port'])
ftp.login(self._config['user'], self._config['passwd'])
if self._config['active']:
ftp.set_pasv(False)
if self._config['path'] != '':
ftp.cwd(self._config['path'])
self._connection = ftp
return
except ftplib.all_errors:
raise FTPStorageException(
'Connection or login error using data %s'
% repr(self._config)
)
def disconnect(self):
self._connection.quit()
self._connection = None
def _mkremdirs(self, path):
pwd = self._connection.pwd()
path_splitted = path.split(os.path.sep)
for path_part in path_splitted:
try:
self._connection.cwd(path_part)
except ftplib.all_errors:
try:
self._connection.mkd(path_part)
self._connection.cwd(path_part)
except ftplib.all_errors:
raise FTPStorageException(
'Cannot create directory chain %s' % path
)
self._connection.cwd(pwd)
return
def _put_file(self, name, content):
# Connection must be open!
try:
self._mkremdirs(os.path.dirname(name))
pwd = self._connection.pwd()
self._connection.cwd(os.path.dirname(name))
self._connection.storbinary('STOR ' + os.path.basename(name),
content.file,
content.DEFAULT_CHUNK_SIZE)
self._connection.cwd(pwd)
except ftplib.all_errors:
raise FTPStorageException('Error writing file %s' % name)
def _open(self, name, mode='rb'):
remote_file = FTPStorageFile(name, self, mode=mode)
return remote_file
def _read(self, name):
memory_file = io.BytesIO()
try:
pwd = self._connection.pwd()
self._connection.cwd(os.path.dirname(name))
self._connection.retrbinary('RETR ' + os.path.basename(name),
memory_file.write)
self._connection.cwd(pwd)
memory_file.seek(0)
return memory_file
except ftplib.all_errors:
raise FTPStorageException('Error reading file %s' % name)
def _save(self, name, content):
content.open()
self._start_connection()
self._put_file(name, content)
content.close()
return name
def _get_dir_details(self, path):
# Connection must be open!
try:
lines = []
self._connection.retrlines('LIST ' + path, lines.append)
dirs = {}
files = {}
for line in lines:
words = line.split()
if len(words) < 6:
continue
if words[-2] == '->':
continue
if words[0][0] == 'd':
dirs[words[-1]] = 0
elif words[0][0] == '-':
files[words[-1]] = int(words[-5])
return dirs, files
except ftplib.all_errors:
raise FTPStorageException('Error getting listing for %s' % path)
def modified_time(self, name):
self._start_connection()
resp = self._connection.sendcmd('MDTM ' + name)
if resp[:3] == '213':
s = resp[3:].strip()
# workaround for broken FTP servers returning responses
# starting with e.g. 1904... instead of 2004...
if len(s) == 15 and s[:2] == '19':
s = str(1900 + int(s[2:5])) + s[5:]
return datetime.strptime(s, '%Y%m%d%H%M%S')
raise FTPStorageException(
'Error getting modification time of file %s' % name
)
def listdir(self, path):
self._start_connection()
try:
dirs, files = self._get_dir_details(path)
return list(dirs.keys()), list(files.keys())
except FTPStorageException:
raise
def delete(self, name):
if not self.exists(name):
return
self._start_connection()
try:
self._connection.delete(name)
except ftplib.all_errors:
raise FTPStorageException('Error when removing %s' % name)
def exists(self, name):
self._start_connection()
try:
nlst = self._connection.nlst(
os.path.dirname(name) + '/'
)
if name in nlst or os.path.basename(name) in nlst:
return True
else:
return False
except ftplib.error_temp:
return False
except ftplib.error_perm:
# error_perm: 550 Can't find file
return False
except ftplib.all_errors:
raise FTPStorageException('Error when testing existence of %s'
% name)
def size(self, name):
self._start_connection()
try:
dirs, files = self._get_dir_details(os.path.dirname(name))
if os.path.basename(name) in files:
return files[os.path.basename(name)]
else:
return 0
except FTPStorageException:
return 0
def url(self, name):
if self._base_url is None:
raise ValueError("This file is not accessible via a URL.")
return urljoin(self._base_url, name).replace('\\', '/')
class FTPStorageFile(File):
def __init__(self, name, storage, mode):
self.name = name
self._storage = storage
self._mode = mode
self._is_dirty = False
self.file = io.BytesIO()
self._is_read = False
#property
def size(self):
if not hasattr(self, '_size'):
self._size = self._storage.size(self.name)
return self._size
def readlines(self):
if not self._is_read:
self._storage._start_connection()
self.file = self._storage._read(self.name)
self._is_read = True
return self.file.readlines()
def read(self, num_bytes=None):
if not self._is_read:
self._storage._start_connection()
self.file = self._storage._read(self.name)
self._is_read = True
return self.file.read(num_bytes)
def write(self, content):
if 'w' not in self._mode:
raise AttributeError("File was opened for read-only access.")
self.file = io.BytesIO(content)
self._is_dirty = True
self._is_read = True
def close(self):
if self._is_dirty:
self._storage._start_connection()
self._storage._put_file(self.name, self)
self._storage.disconnect()
self.file.close()
I just simply get the 503 service unavailable on my browser when I try to upload large files. but with less than 1mb files everything works.

Related

Python IMAPlib TimeoutError: [WinError 10060]

I am trying to save attachments of my outlook account emails to a local directory and the code was working perfectly till now, it starts showing error and I am not able to get this through. Can anybody please help with this?
TimeoutError: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond
Here is the code.
from classes.logfile import logger
from classes.config_ini import Config
import email
import imaplib
import email.mime.multipart
import os
import pandas as pd
import datetime
# This class is useful of fetching data from outlook emails by providing a subject line with files
class Outlook(Config):
def __init__(self):
super(Outlook, self).__init__()
self.username = None
self.password = None
self.imap = None
self.subject = None
self.file_name = None
self.s = None
self.att_path = "No attachment found"
def subject_line(self):
subject_read = pd.read_csv(self.section_value[0] + 'outlookEmails.csv')
subject = subject_read.iloc[:, :]
self.s = subject
self.subject = subject.iloc[:, 1]
self.file_name = subject.iloc[:, 0]
def close_connection(self):
return self.imap.close()
def login(self, username, password):
# IMAP Settings
self.username = username
self.password = password
print("signing in")
while True:
# Connect to the server
try:
self.imap = imaplib.IMAP4_SSL("outlook.office365.com", port=993)
r, d = self.imap.login(username, password)
assert r == 'OK', 'login failed'
print(" > Sign as ", d)
except imaplib.IMAP4.error:
print(" > Sign In ...")
continue
break
def inbox(self):
# selecting the inbox
typ, data = self.imap.select("Inbox")
print(typ, data)
num_msgs = int(data[0])
print('There are {} messages in INBOX'.format(num_msgs))
return self.imap.select("Inbox")
def email_check(self, download_folder):
# fetch the email body (RFC822) for the given ID
try:
for i, j in zip(self.subject, self.file_name):
print('Subject {}'.format(i))
# typ, msg_ids = self.imap.uid('search', None, 'SUBJECT {}'.format(i))
typ, msg_ids = self.imap.uid('search', None, '(SUBJECT "{}")'.format(i))
inbox_item_list = msg_ids[0].split()
most_recent = inbox_item_list[-1]
print(most_recent)
if typ == "OK":
ret, data = self.imap.uid('fetch', most_recent, '(RFC822)')
raw_data = data[0][1]
# converts byte literal to string removing b''
raw_data_string = raw_data.decode('utf-8')
msg = email.message_from_string(raw_data_string)
# downloading attachments
# print(msg)
print('Subject:' + msg['Subject'])
for part in msg.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
print("filename:" + filename)
filename = j
# if there is no filename, we create one with a counter to avoid duplicates
self.att_path = os.path.join(download_folder, filename)
# Check if its already there
# if not os.path.isfile(self.att_path):
fp = open(self.att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
except (imaplib.IMAP4.error, TypeError) as e:
logger.error(str(e))
pass
# moving files to particular folder
# folders = [d for d in os.listdir(download_folder) if os.path.isdir(d)]
# files = [f for f in os.listdir(download_folder) if os.path.isfile(f)]
# for d in folders:
# for f in files:
# if d in f:
# new_loc =
def main(self):
self.subject_line()
self.login('XXX', 'XXX')
self.inbox()
logger.info('start downloading emails at ' + str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
self.email_check(self.section_value[1])
self.close_connection()
logger.info('Emails Downloaded ' + str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
if __name__ == "__main__":
obj = Outlook()
obj.main()
Can somebody help me with this?

python http server threading via cli

New in version 3.7 supports ThreadingHTTPServer as mentioned in doc
to run from command line we use
python -m http.server
but its still run normal HTTPServer, is there any way to enable via command line.
EDITED:
python 3.7 runs ThreadingHTTPServer by default, no argument necessary

Simple Python 2 HTTP Server with multi-threading and partial-content support
#!/usr/bin/env python2
# Standard library imports.
from SocketServer import ThreadingMixIn
import BaseHTTPServer
import SimpleHTTPServer
import sys
import json
import os
from os.path import (join, exists, dirname, abspath, isabs, sep, walk, splitext,
isdir, basename, expanduser, split, splitdrive)
from os import makedirs, unlink, getcwd, chdir, curdir, pardir, rename, fstat
from shutil import copyfileobj, copytree
import glob
from zipfile import ZipFile
from urlparse import urlparse, parse_qs
from urllib import urlopen, quote, unquote
from posixpath import normpath
from cStringIO import StringIO
import re
import ConfigParser
import cgi
import threading
import socket
import errno
DATA_DIR = getcwd() # join(expanduser('~'), APP_NAME)
class ThreadingHTTPServer(ThreadingMixIn, BaseHTTPServer.HTTPServer):
pass
class RequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
""" Handler to handle POST requests for actions.
"""
serve_path = DATA_DIR
def do_GET(self):
""" Overridden to handle HTTP Range requests. """
self.range_from, self.range_to = self._get_range_header()
if self.range_from is None:
# nothing to do here
return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
print 'range request', self.range_from, self.range_to
f = self.send_range_head()
if f:
self.copy_file_range(f, self.wfile)
f.close()
def copy_file_range(self, in_file, out_file):
""" Copy only the range in self.range_from/to. """
in_file.seek(self.range_from)
# Add 1 because the range is inclusive
bytes_to_copy = 1 + self.range_to - self.range_from
buf_length = 64*1024
bytes_copied = 0
while bytes_copied < bytes_to_copy:
read_buf = in_file.read(min(buf_length, bytes_to_copy-bytes_copied))
if len(read_buf) == 0:
break
out_file.write(read_buf)
bytes_copied += len(read_buf)
return bytes_copied
def send_range_head(self):
"""Common code for GET and HEAD commands.
This sends the response code and MIME headers.
Return value is either a file object (which has to be copied
to the outputfile by the caller unless the command was HEAD,
and must be closed by the caller under all circumstances), or
None, in which case the caller has nothing further to do.
"""
path = self.translate_path(self.path)
f = None
if isdir(path):
if not self.path.endswith('/'):
# redirect browser - doing basically what apache does
self.send_response(301)
self.send_header("Location", self.path + "/")
self.end_headers()
return None
for index in "index.html", "index.htm":
index = join(path, index)
if exists(index):
path = index
break
else:
return self.list_directory(path)
if not exists(path) and path.endswith('/data'):
# FIXME: Handle grits-like query with /data appended to path
# stupid grits
if exists(path[:-5]):
path = path[:-5]
ctype = self.guess_type(path)
try:
# Always read in binary mode. Opening files in text mode may cause
# newline translations, making the actual size of the content
# transmitted *less* than the content-length!
f = open(path, 'rb')
except IOError:
self.send_error(404, "File not found")
return None
if self.range_from is None:
self.send_response(200)
else:
self.send_response(206)
self.send_header("Content-type", ctype)
fs = fstat(f.fileno())
file_size = fs.st_size
if self.range_from is not None:
if self.range_to is None or self.range_to >= file_size:
self.range_to = file_size-1
self.send_header("Content-Range",
"bytes %d-%d/%d" % (self.range_from,
self.range_to,
file_size))
# Add 1 because ranges are inclusive
self.send_header("Content-Length",
(1 + self.range_to - self.range_from))
else:
self.send_header("Content-Length", str(file_size))
self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
self.end_headers()
return f
def list_directory(self, path):
"""Helper to produce a directory listing (absent index.html).
Return value is either a file object, or None (indicating an
error). In either case, the headers are sent, making the
interface the same as for send_head().
"""
try:
list = os.listdir(path)
except os.error:
self.send_error(404, "No permission to list directory")
return None
list.sort(key=lambda a: a.lower())
f = StringIO()
displaypath = cgi.escape(unquote(self.path))
f.write('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
f.write("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
f.write("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
f.write("<hr>\n<ul>\n")
for name in list:
fullname = os.path.join(path, name)
displayname = linkname = name
# Append / for directories or # for symbolic links
if os.path.isdir(fullname):
displayname = name + "/"
linkname = name + "/"
if os.path.islink(fullname):
displayname = name + "#"
# Note: a link to a directory displays with # and links with /
f.write('<li>%s\n'
% (quote(linkname), cgi.escape(displayname)))
f.write("</ul>\n<hr>\n</body>\n</html>\n")
length = f.tell()
f.seek(0)
self.send_response(200)
encoding = sys.getfilesystemencoding()
self.send_header("Content-type", "text/html; charset=%s" % encoding)
self.send_header("Content-Length", str(length))
self.end_headers()
return f
def translate_path(self, path):
""" Override to handle redirects.
"""
path = path.split('?',1)[0]
path = path.split('#',1)[0]
path = normpath(unquote(path))
words = path.split('/')
words = filter(None, words)
path = self.serve_path
for word in words:
drive, word = splitdrive(word)
head, word = split(word)
if word in (curdir, pardir): continue
path = join(path, word)
return path
# Private interface ######################################################
def _get_range_header(self):
""" Returns request Range start and end if specified.
If Range header is not specified returns (None, None)
"""
range_header = self.headers.getheader("Range")
if range_header is None:
return (None, None)
if not range_header.startswith("bytes="):
print "Not implemented: parsing header Range: %s" % range_header
return (None, None)
regex = re.compile(r"^bytes=(\d+)\-(\d+)?")
rangething = regex.search(range_header)
if rangething:
from_val = int(rangething.group(1))
if rangething.group(2) is not None:
return (from_val, int(rangething.group(2)))
else:
return (from_val, None)
else:
print 'CANNOT PARSE RANGE HEADER:', range_header
return (None, None)
def get_server(port=8000, next_attempts=0, serve_path=None):
Handler = RequestHandler
if serve_path:
Handler.serve_path = serve_path
while next_attempts >= 0:
try:
httpd = ThreadingHTTPServer(("", port), Handler)
return httpd
except socket.error as e:
if e.errno == errno.EADDRINUSE:
next_attempts -= 1
port += 1
else:
raise
def main(args=None):
if args is None:
args = sys.argv[1:]
PORT = 8000
if len(args)>0:
PORT = int(args[-1])
serve_path = DATA_DIR
if len(args) > 1:
serve_path = abspath(args[-2])
httpd = get_server(port=PORT, serve_path=serve_path)
print "serving at port", PORT
httpd.serve_forever()
if __name__ == "__main__" :
main()

Track download progress of S3 file using boto3 and callbacks

I am trying to download a text file from S3 using boto3.
Here is what I have written.
class ProgressPercentage(object):
def __init__(self, filename):
self._filename = filename
self._size = float(os.path.getsize(filename))
self._seen_so_far = 0
self._lock = threading.Lock()
def __call__(self, bytes_amount):
# To simplify we'll assume this is hooked up
# to a single filename.
with self._lock:
self._seen_so_far += bytes_amount
percentage = round((self._seen_so_far / self._size) * 100,2)
LoggingFile('{} is the file name. {} out of {} done. The percentage completed is {} %'.format(str(self._filename), str(self._seen_so_far), str(self._size),str(percentage)))
sys.stdout.flush()
and I am calling it using
transfer.download_file(BUCKET_NAME,FILE_NAME,'{}{}'.format(LOCAL_PATH_TEMP , FILE_NAME),callback = ProgressPercentage(LOCAL_PATH_TEMP + FILE_NAME))
this is giving me a error that file is not present in the folder. Apparently when I already have a file with this name in the same folder it works but when I am downloading a fresh file , it errors out.
What is correction I need to make?

This is my implementation. No other dependencies, hack up the progress callback function to display whatever you want.
import sys
import boto3
s3_client = boto3.client('s3')
def download(local_file_name, s3_bucket, s3_object_key):
meta_data = s3_client.head_object(Bucket=s3_bucket, Key=s3_object_key)
total_length = int(meta_data.get('ContentLength', 0))
downloaded = 0
def progress(chunk):
nonlocal downloaded
downloaded += chunk
done = int(50 * downloaded / total_length)
sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50-done)) )
sys.stdout.flush()
print(f'Downloading {s3_object_key}')
with open(local_file_name, 'wb') as f:
s3_client.download_fileobj(s3_bucket, s3_object_key, f, Callback=progress)
e.g.
local_file_name = 'test.csv'
s3_bucket = 'my-bucket'
s3_object_key = 'industry/test.csv'
download(local_file_name, s3_bucket, s3_object_key)
Demo:
Tested with boto3>=1.14.19, python>=3.7

callback = ProgressPercentage(LOCAL_PATH_TEMP + FILE_NAME)) creates a ProgressPercentage object, runs its __init__ method, and passes the object as callback to the download_file method. This means the __init__ method is run before download_file begins.
In the __init__ method you are attempting to read the size of the local file being downloaded to, which throws an exception as the file does not exist since the download has yet to start. If you've already downloaded the file, then there's no problem since a local copy exists and its size can be read.
Of course, this is merely the cause of the exception you're seeing. You're using the _size property as the maximum value of download progress. However you're attempting to use the size of the local file. Until the file is completely downloaded, the local file system does not know how large the file is, it only knows how much space it takes up right now. This means as you download the file will gradually get bigger until it reaches its full size. As such, it doesn't really make sense to consider the size of the local file as the maximum size of the download. It may work in the case where you've already downloaded the file, but that isn't very useful.
The solution to your problem would be to check the size of the file you're going to download, instead of the size of the local copy. This ensures you're getting the actual size of whatever it is you're downloading, and that the file exists (as you couldn't be downloading it if it didn't). You can do this by getting the size of the remote file with head_object as follows
class ProgressPercentage(object):
def __init__(self, client, bucket, filename):
# ... everything else the same
self._size = client.head_object(Bucket=bucket, Key=filename).ContentLength
# ...
# If you still have the client object you could pass that directly
# instead of transfer._manager._client
progress = ProgressPercentage(transfer._manager._client, BUCKET_NAME, FILE_NAME)
transfer.download_file(..., callback=progress)
As a final note, although you got the code from the Boto3 documentation, it didn't work because it was intended for file uploads. In that case the local file is the source and its existence guaranteed.

Install progressbar with pip3 install progressbar
import boto3, os
import progressbar
bucket_name = "<your-s3-bucket-name>"
folder_name = "<your-directory-name-locally>"
file_name = "<your-filename-locally>"
path = folder_name + "/" + file_name
s3 = boto3.client('s3', aws_access_key_id="<your_aws_access_key_id>", aws_secret_access_key="<your_aws_secret_access_key>")
statinfo = os.stat(file_name)
up_progress = progressbar.progressbar.ProgressBar(maxval=statinfo.st_size)
up_progress.start()
def upload_progress(chunk):
up_progress.update(up_progress.currval + chunk)
s3.upload_file(file_name, bucket_name, path, Callback=upload_progress)
up_progress.finish()

Here's another simple custom implementation using tqdm:
from tqdm import tqdm
import boto3
def s3_download(s3_bucket, s3_object_key, local_file_name, s3_client=boto3.client('s3')):
meta_data = s3_client.head_object(Bucket=s3_bucket, Key=s3_object_key)
total_length = int(meta_data.get('ContentLength', 0))
with tqdm(total=total_length, desc=f'source: s3://{s3_bucket}/{s3_object_key}', bar_format="{percentage:.1f}%|{bar:25} | {rate_fmt} | {desc}", unit='B', unit_scale=True, unit_divisor=1024) as pbar:
with open(local_file_name, 'wb') as f:
s3_client.download_fileobj(s3_bucket, s3_object_key, f, Callback=pbar.update)
usage:
s3_download(bucket, key, local_file_name)
output:
100.0%|█████████████████████████ | 12.9MB/s | source: s3://my-bucket/my-key

Following the official document, it is not quite difficult to apply progress tracking (download_file and upload_file functions are similar).
Here is the full code with some modifications to see the data size in preferred manner.
import logging
import boto3
from botocore.exceptions import ClientError
import os
import sys
import threading
import math
ACCESS_KEY = 'xxx'
SECRET_KEY = 'xxx'
REGION_NAME= 'ap-southeast-1'
class ProgressPercentage(object):
def __init__(self, filename, filesize):
self._filename = filename
self._size = filesize
self._seen_so_far = 0
self._lock = threading.Lock()
def __call__(self, bytes_amount):
def convertSize(size):
if (size == 0):
return '0B'
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size,1024)))
p = math.pow(1024,i)
s = round(size/p,2)
return '%.2f %s' % (s,size_name[i])
# To simplify, assume this is hooked up to a single filename
with self._lock:
self._seen_so_far += bytes_amount
percentage = (self._seen_so_far / self._size) * 100
sys.stdout.write(
"\r%s %s / %s (%.2f%%) " % (
self._filename, convertSize(self._seen_so_far), convertSize(self._size),
percentage))
sys.stdout.flush()
def download_file(file_name, object_name, bucket_name):
# If S3 object_name was not specified, use file_name
if object_name is None:
object_name = file_name
# Initialize s3 client
s3_client = boto3.client(service_name="s3",
aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY,
region_name=REGION_NAME)
try:
response = s3_client.download_file(
Bucket=bucket_name,
Key=object_name,
Filename=file_name,
Callback=ProgressPercentage(file_name, (s3_client.head_object(Bucket=bucket_name, Key=object_name))["ContentLength"])
)
except ClientError as e:
logging.error(e)
return False
return True
file_name = "./output.csv.gz"
bucket_name = "mybucket"
object_name = "result/output.csv.gz"
download_file(file_name, object_name, bucket_name )

The object client.head_object(Bucket=bucket, Key=filename) is a dict. The file size can be accessed using ['ContentLength'].
Hence the code:
self._size = client.head_object(Bucket=bucket, Key=filename).ContentLength
should become:
self._size = float(client.head_object(Bucket=bucket, Key=filename)['ContentLength'])
Then it works. Thanks!

Someone may stumble upon this answer when trying to do this (As per the question title). The easiest way I know to show s3 upload progress:
import a progress bar library into your project. This is what I used: https://github.com/anler/progressbar
Then:
import progressbar
from hurry.filesize import size
import boto3
bucket = "my-bucket-name"
s3_client = boto3.resource('s3')
...
...
# you get the filesize from wherever you have the file on. your system maybe?
filesize = size(file)
up_progress = progressbar.AnimatedProgressBar(end=filesize, width=50)
def upload_progress(chunk):
up_progress + chunk # Notice! No len()
up_progress.show_progress()
s3_client.meta.client.upload_file(file, bucket, s3_file_name, Callback=upload_progress)
The important thing to notice here is the use of the Callback parameter(capital C). It basically returns the number of bytes uploaded to s3. So if you know the original filesize, some simple math gets you a progress bar. You can then use any progress bar library.

Info
Credits to #Kshitij Marwah, #yummies and nicolas.f.g posts
Using boto3 1.9.96 (dl via pip)
Removed threading
Changed display format (rewrite line above until dl completed)
Posting because difference b/w online doc and downloaded package
code
class ProgressPercentage(object):
def __init__(self, o_s3bucket, key_name):
self._key_name = key_name
boto_client = o_s3bucket.meta.client
# ContentLength is an int
self._size = boto_client.head_object(Bucket=o_s3bucket.name, Key=key_name)['ContentLength']
self._seen_so_far = 0
sys.stdout.write('\n')
def __call__(self, bytes_amount):
self._seen_so_far += bytes_amount
percentage = (float(self._seen_so_far) / float(self._size)) * 100
TERM_UP_ONE_LINE = '\033[A'
TERM_CLEAR_LINE = '\033[2K'
sys.stdout.write('\r' + TERM_UP_ONE_LINE + TERM_CLEAR_LINE)
sys.stdout.write('{} {}/{} ({}%)\n'.format(self._key_name, str(self._seen_so_far), str(self._size), str(percentage)))
sys.stdout.flush()
Then called it like that
Note the capital C on Callback (that differs from online doc)
progress = ProgressPercentage(o_s3bucket, key_name)
o_s3bucket.download_file(key_name, full_local_path, Callback=progress)
where o_s3bucket is :
bucket_name = 'my_bucket_name'
aws_profile = 'default' # this is used to catch creds from .aws/credentials ini file
boto_session = boto3.session.Session(profile_name=aws_profile)
o_s3bucket = boto_session.resource('s3').Bucket(bucket_name)
hth

Here is an option I've found useful for with the use of click (just run pip install click before applying code below) library:
import click
import boto3
import os
file_path = os.path.join('tmp', 'file_path')
s3_client = boto3.resource('s3')
with click.progressbar(length=os.path.getsize(file_path)) as progress_bar:
with open(file_path, mode='rb') as upload_file:
s3_client.upload_fileobj(
upload_file,
'bucket_name',
'foo_bar',
Callback=progress_bar.update
)

Here is code
try:
import logging
import boto3
from botocore.exceptions import ClientError
import os
import sys
import threading
import math
import re
from boto3.s3.transfer import TransferConfig
except Exception as e:
pass
ACCESS_KEY = 'XXXXXXXXXXXXXXXXX'
SECRET_KEY = 'XXXXXXXXXXXXXXXX'
REGION_NAME= 'us-east-1'
BucketName = "XXXXXXXXXXXXXXXX"
KEY = "XXXXXXXXXXXXXXXX"
class Size:
#staticmethod
def convert_size(size_bytes):
if size_bytes == 0:
return "0B"
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size_bytes, 1024)))
p = math.pow(1024, i)
s = round(size_bytes / p, 2)
return "%s %s" % (s, size_name[i])
class ProgressPercentage(object):
def __init__(self, filename, filesize):
self._filename = filename
self._size = filesize
self._seen_so_far = 0
self._lock = threading.Lock()
def __call__(self, bytes_amount):
def convertSize(size):
if (size == 0):
return '0B'
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size,1024)))
p = math.pow(1024,i)
s = round(size/p,2)
return '%.2f %s' % (s,size_name[i])
# To simplify, assume this is hooked up to a single filename
with self._lock:
self._seen_so_far += bytes_amount
percentage = (self._seen_so_far / self._size) * 100
sys.stdout.write(
"\r%s %s / %s (%.2f%%) " % (
self._filename, convertSize(self._seen_so_far), convertSize(self._size),
percentage))
sys.stdout.flush()
class AWSS3(object):
"""Helper class to which add functionality on top of boto3 """
def __init__(self, bucket, aws_access_key_id, aws_secret_access_key, region_name):
self.BucketName = bucket
self.client = boto3.client(
"s3",
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
region_name=region_name,
)
def get_size_of_files(self, Key):
response = self.client.head_object(Bucket=self.BucketName, Key=Key)
size = response["ContentLength"]
return {"bytes": size, "size": Size.convert_size(size)}
def put_files(self, Response=None, Key=None):
"""
Put the File on S3
:return: Bool
"""
try:
response = self.client.put_object(
ACL="private", Body=Response, Bucket=self.BucketName, Key=Key
)
return "ok"
except Exception as e:
print("Error : {} ".format(e))
return "error"
def item_exists(self, Key):
"""Given key check if the items exists on AWS S3 """
try:
response_new = self.client.get_object(Bucket=self.BucketName, Key=str(Key))
return True
except Exception as e:
return False
def get_item(self, Key):
"""Gets the Bytes Data from AWS S3 """
try:
response_new = self.client.get_object(Bucket=self.BucketName, Key=str(Key))
return response_new["Body"].read()
except Exception as e:
print("Error :{}".format(e))
return False
def find_one_update(self, data=None, key=None):
"""
This checks if Key is on S3 if it is return the data from s3
else store on s3 and return it
"""
flag = self.item_exists(Key=key)
if flag:
data = self.get_item(Key=key)
return data
else:
self.put_files(Key=key, Response=data)
return data
def delete_object(self, Key):
response = self.client.delete_object(Bucket=self.BucketName, Key=Key,)
return response
def get_all_keys(self, Prefix=""):
"""
:param Prefix: Prefix string
:return: Keys List
"""
try:
paginator = self.client.get_paginator("list_objects_v2")
pages = paginator.paginate(Bucket=self.BucketName, Prefix=Prefix)
tmp = []
for page in pages:
for obj in page["Contents"]:
tmp.append(obj["Key"])
return tmp
except Exception as e:
return []
def print_tree(self):
keys = self.get_all_keys()
for key in keys:
print(key)
return None
def find_one_similar_key(self, searchTerm=""):
keys = self.get_all_keys()
return [key for key in keys if re.search(searchTerm, key)]
def __repr__(self):
return "AWS S3 Helper class "
def download_file(self,file_name, object_name):
try:
response = self.client.download_file(
Bucket=self.BucketName,
Key=object_name,
Filename=file_name,
Config=TransferConfig(
max_concurrency=10,
use_threads=True
),
Callback=ProgressPercentage(file_name,
(self.client.head_object(Bucket=self.BucketName,
Key=object_name))["ContentLength"])
)
except ClientError as e:
logging.error(e)
return False
return True
helper = AWSS3(aws_access_key_id=ACCESS_KEY, aws_secret_access_key=SECRET_KEY, bucket=BucketName, region_name='us-east-1')
helper.download_file(file_name='test.zip', object_name=KEY)

cherrypy didn't work correctly whith daemonize

I'm new in cherrypy, and I try to develop a small application by using this framework. My problem is, this application can serves well and browser can access when cherrypy works in undaemonizer mode. But when I write code by using cherrypy.process.plugins.Daemonizer(), cherrypy started in background, and it listen on specific port, but browser will get a connection refused( iptables or ufw already shutted down but still inaccessible ). The incredible thing is, when I start it with daemoned mode, I can still start a undaemoned process , and they listen on the same port. I wonder why would this happend, and how to solve it?
Simply saids: With Daemonizer starts, cherrypy listened on specified port, but browser connection refused; without Daemonizer, cherrypy works very well.
Thanks alot
with my code
from optparse import OptionParser
from cherrypy.process.plugins import Daemonizer
from cherrypy.process.plugins import PIDFile
import cherrypy
import json
import urllib
import datetime
try:
import cPickle as pickle
except:
import pickle
import time
import base64
import os
import sys
'''
cherrypy class
'''
class Index(object):
#cherrypy.expose
def index(self):
return "Say hello to the yellow elephant"
class System(object):
#cherrypy.expose
def env(self, token):
local_token = Token()
if local_token.AuthToken(token) is True:
env = get_env()
return json.dumps(env)
return '{"errcode", "Invalid token"}'
class Jmx(object):
#cherrypy.expose
def get(self, token, host, port, qry):
local_token = Token()
if local_token.AuthToken(token) is True:
url = 'http://' + host + ':' + port + '/jmx?qry=' + qry
jmx = urllib.urlopen(url)
jmx_data = jmx.read().replace('\n', '')
jmx.close()
return jmx_data
return '{"errcode", "Invalid token"}'
"""
command uses base64 encode by using http post method
"""
class Command(object):
def __init__(self):
self.fname = datetime.datetime.now().strftime('%Y-%m-%d_%M-%M-%S') + '.log'
#cherrypy.expose
def run(self, token, command):
local_token = Token()
command = base64.b64decode(command)
if local_token.AuthToken(token) is True:
os.popen(command + ' 2>&1 > /usr/lib/agent/output/' + self.fname)
return '{"errcode", "Invalid token"}'
#cherrypy.expose
def readlog(self, token):
local_token = Token()
if local_token.AuthToken(token) is True:
log = open('/usr/lib/agent/output/' + self.fname)
lines = log.readlines()
log.close()
return json.dumps(lines, ensure_ascii=False)
return '{"errcode", "Invalid token"}'
"""
First time access from central, it will create a new token on slave node, the token is pickle.dump(cacl_mysql_passwd(conf['agent']['secret']))
By token created , if central makes change to secret, the slave node will be inaccessible!!!
"""
class Token(object):
def AuthToken(self, token):
if(os.path.isfile('/usr/lib/agent/key/authenticate.key')) is False:
return self.CreateToken(token)
else:
try:
k = open('/usr/lib/agent/key/authenticate.key', 'rb')
tokenizer = pickle.load(k)
k.close()
if token == tokenizer:
return True
else:
return False
except IOError, e:
return '{"errcode":"' + str(e).replace('\n', '<br/>') + '"}'
#cherrypy.expose
def CreateToken(self, token):
if(os.path.isfile('/usr/lib/agent/key/authenticate.key')) is False:
try:
k = open('/usr/lib/agent/key/authenticate.key', 'wb')
pickle.dump(token, k)
k.close()
return True
except IOError, e:
return '{"Exception":"' + str(e).replace('\n', '<br/>') + '"}'
else:
return '{"errcode":"token exists"}'
class Controller:
def __init__(self, pidfile='/var/run/agent/agent.pid', host='0.0.0.0', port=30050):
self.port = port
self.host = host
self.pidfile = pidfile
self.settings = {
'global': {
'server.socket_port': port,
'server.socket_host': host,
'server.socket_file': '',
'server.socket_queue_size': 5,
'server.protocol_version': 'HTTP/1.1',
'server.log_to_screen': True,
'server.log_file': '',
'server.reverse_dns': False,
'server.thread_pool': 10,
'server.environment': 'production',
'engine.timeout_monitor.on': False
}
}
def start(self):
if os.path.exists(self.pidfile):
sys.stderr.write('PID file exists, server running?\n')
sys.exit(1)
else:
Daemonizer(cherrypy.engine, stdin='/dev/stdin', stdout='/dev/stdout', stderr='/dev/stderr').subscribe()
PIDFile(cherrypy.engine, self.pidfile).subscribe()
cherrypy.tree.mount(Index(), '/')
cherrypy.tree.mount(System(), '/system')
cherrypy.tree.mount(Command(), '/command')
cherrypy.tree.mount(Jmx(), '/jmx')
cherrypy.config.update(self.settings)
cherrypy.engine.start()
cherrypy.engine.block()
def stop(self):
cherrypy.config.update(self.settings)
if os.path.exists(self.pidfile):
cherrypy.engine.stop()
cherrypy.engine.exit()
try:
process = open(self.pidfile).read().strip()
if process != 0:
os.popen('kill -9 %s' % process)
os.remove(self.pidfile)
except IOError, e:
sys.stderr.write(str(e))
else:
sys.stderr.write('PID file does not exist, server gone?\n')
sys.exit(1)
if '__main__' == __name__:
cherrypy.engine.autoreload.stop()
cherrypy.engine.autoreload.unsubscribe()
syntax = 'Syntax: %prog -b 192.168.1.1 -s start'
parser = OptionParser(usage=syntax)
ip = os.popen('hostname -i').read().strip()
hostname = os.popen('hostname --fqdn').read().strip()
parser.add_option('-b', '--bind', action='store', type='string', dest='bind', default=ip, help='Inner network IP address, default value is hostname -i')
parser.add_option('-s', '--signal', action='store', type='string', dest='signal', help='Valid signal is {start|stop|restart}')
options, args = parser.parse_args()
if len(sys.argv) == 1:
print 'Use %s -h or --help for help.' % sys.argv[0]
else:
if options.signal == '':
print 'Must give -s option\'s value'
else:
daemon = Controller(pidfile='/var/run/agent/agent.pid', host=options.bind)
if 'start' == options.signal:
daemon.start()
elif 'stop' == options.signal:
daemon.stop()
else:
print 'Invalid signal'
sys.exit(1)

Edit ini file option values with ConfigParser (Python)

Anyone know how'd I'd go about editing ini file values preferably using ConfigParser? (Or even a place to start from would be great!) I've got lots of comments throughout my config file so I'd like to keep them by just editing the values, not taking the values and playing around with multiple files.
Structure of my config file:
[name1]
URL = http://example.com
username = dog
password = password
[name2]
URL = http://catlover.com
username = cat
password = adffa
As you can see, I've got the same options for different section names, so editing just the values for one section is a bit trickier if ConfigParser can't do it.
Thanks in advance.

Here is an example
import sys
import os.path
from ConfigParser import RawConfigParser as ConfParser
from ConfigParser import Error
p = ConfParser()
# this happend to me save as ASCII
o = open("config.ini")
if o.read().startswith("\xef\xbb\xbf"):
print "Fatal Error; Please save the file as ASCII not unicode."
sys.exit()
try:
results = p.read("config.ini")
except Error, msg:
print "Error Parsing File"
print msg
else:
if results == []:
print "Could not load config.ini."
if not os.path.exists("config.ini"):
print "config.ini does not exist."
else:
print "An uknown error occurred."
else:
print "Config Details"
sections = p.sections()
sections.sort()
for s in sections:
print "------------------------"
print s
if p.has_option(s, "URL"):
print "URL: ",
print p.get(s, "URL")
else:
print "URL: No Entry"
if p.has_option(s, "username"):
print "User: ",
print p.get(s, "username")
else:
print "User: N/A"
if p.has_option(s, "password"):
print "Password: ",
print p.get(s, "password")
else:
print "Password: N/A"
Also I created this class to store my apps variables etc and also make config writing easier it was originally used with twisted but I created a simple replacement logger
import os.path
import sys
#from twisted.python import log
import ConfigParser
from traceback import print_last
class Log(object):
def msg(t):
print "Logger: %s " % t
def err(t = None):
print "-------------Error-----------"
print "\n\n"
if t is None:
print_last()
# sloppy replacement for twisted's logging functions
log = Log()
class Settings(object):
'''Stores settings'''
config_variables = ['variables_that_should_be_stored_in_config']
def __init__(self, main_folder = None, log_file = None, music_folder = None ):
# load the defaults then see if there are updates ones in the config
self.load_defaults()
self.config = ConfigParser.RawConfigParser()
if len(self.config.read(self.settings_file)) == 1:
if 'Settings' in self.config.sections():
try:
self.music_folder = self.config.get('Settings', 'music_folder')
except ConfigParser.NoOptionError:
pass
log.msg('Music Folder: %s' % self.music_folder)
try:
self.mplayer = self.config.get('Settings', 'mplayer')
except ConfigParser.NoOptionError:
pass
try:
self.eula = self.config.getboolean('Settings', 'eula')
except ConfigParser.NoOptionError:
pass
else:
log.msg('No Settings Section; Defaults Loaded')
else:
log.msg('Settings at default')
def load_defaults(self):
log.msg('Loading Defaults')
self.main_folder = os.path.dirname(os.path.abspath(sys.argv[0]))
self.settings_file = os.path.join(self.main_folder, 'settings.cfg')
self.log_file = os.path.join(self.main_folder, 'grooveshark.log')
self.music_folder = os.path.join(self.main_folder, 'Music')
self.grooveshark_started = False
self.eula = False
self.download_percent = 0.5# default buffer percent is 50 %
if sys.platform == 'win32' or sys.platform == 'cygwin':# Windows
if os.path.exists( os.path.join(self.main_folder, 'mplayer', 'mplayer.exe') ):
self.mplayer = os.path.join(self.main_folder, 'mplayer', 'mplayer.exe')
elif os.path.exists( os.path.join(self.main_folder, '/mplayer.exe') ):
self.mplayer = os.path.join(self.main_folder, '/mplayer.exe')
else:
self.mplayer = 'download'
elif sys.platform == 'darwin':# Mac
if os.path.exists( os.path.join(self.main_folder, 'mplayer/mplayer.app') ):
self.mplayer = os.path.join(self.main_folder, 'mplayer/mplayer.app')
elif os.path.exists( os.path.join(self.main_folder, '/mplayer.app') ):
self.mplayer = os.path.join(self.main_folder, '/mplayer.app')
else:
self.mplayer = 'download'
else:# linux
# download or navigate to it
self.mplayer = 'download'
# Create Music Folder if it does not exist
if not os.path.exists(self.music_folder):
os.makedirs(self.music_folder)
# Create log file if it does not exist
if not os.path.exists(self.log_file):
l = open(self.log_file, 'wb')
l.close()
log.msg('Application Folder: %s' % self.main_folder)
log.msg('Log File: %s' % self.log_file)
log.msg('Music Folder: %s' % self.music_folder)
def __setattr__(self, variable, value):
log.msg('Setting %s to %s' % (variable, value))
object.__setattr__(self, variable, value)
if variable in self.config_variables:
try:
self.config.set('Settings', variable, value)
except:
# Means config wasn't created then, could be we were trying to set self.config (in which case self.config wasn't set yet because we were trying to set it)
log.err()
else:
# UPDATE settings file
log.msg('Saving Settings to %s' % (self.settings_file))
try:
self.config.write( open(self.settings_file, 'wb') )
except:
log.err()

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.