Google Cloud Storage same file name

Google Cloud Storage same file name - python

I'm using this with angluar and flow.js to upload images but im running into the problem that when the file names are the same it doesn't complete and i don't understand why. shouldn't it just overwrite the previous one.
class ImageUploadHandler(webapp2.RequestHandler):
def post(self):
chunk_number = int(self.request.params.get('flowChunkNumber'))
chunk_size = int(self.request.params.get('flowChunkSize'))
current_chunk_size = int(self.request.params.get('flowCurrentChunkSize'))
total_size = int(self.request.params.get('flowTotalSize'))
total_chunks = int(self.request.params.get('flowTotalChunks'))
identifier = str(self.request.params.get('flowIdentifier'))
filename = str(self.request.params.get('flowFilename'))
data = self.request.params.get('file')
f = ImageFile(filename, identifier, total_chunks, chunk_size, total_size)
f.write_chunk(chunk_number, current_chunk_size, data)
if f.ready_to_build():
info = f.build()
if info:
self.response.headers['Content-Type'] = 'application/json'
self.response.out.write(json.dumps(info.as_dict()))
else:
self.error(500)
else:
self.response.headers['Content-Type'] = 'application/json'
self.response.out.write(json.dumps({
'chunkNumber': chunk_number,
'chunkSize': chunk_size,
'message': 'Chunk ' + str(chunk_number) + ' written'
}))
Where ImageFile is a class that writes to the google cloud storage.
Edit:
Below the ImageFile class. Only thing missing is the FileInfo class which is a simple model to store the generated url with the filename.
class ImageFile:
def __init__(self, filename, identifier, total_chunks, chunk_size, total_size):
self.bucket_name = os.environ.get('BUCKET_NAME', app_identity.get_default_gcs_bucket_name())
self.original_filename = filename
self.filename = '/' + self.bucket_name + '/' + self.original_filename
self.identifier = identifier
self.total_chunks = total_chunks
self.chunk_size = chunk_size
self.total_size = total_size
self.stat = None
self.chunks = []
self.load_stat()
self.load_chunks(identifier, total_chunks)
def load_stat(self):
try:
self.stat = gcs.stat(self.filename)
except gcs.NotFoundError:
self.stat = None
def load_chunks(self, identifier, number_of_chunks):
for n in range(1, number_of_chunks + 1):
self.chunks.append(Chunk(self.bucket_name, identifier, n))
def exists(self):
return not not self.stat
def content_type(self):
if self.filename.lower().endswith('.jpg'):
return 'image/jpeg'
elif self.filename.lower().endswith('.jpeg'):
return 'image/jpeg'
elif self.filename.lower().endswith('.png'):
return 'image/png'
elif self.filename.lower().endswith('.git'):
return 'image/gif'
else:
return 'binary/octet-stream'
def ready(self):
return self.exists() and self.stat.st_size == self.total_size
def ready_chunks(self):
for c in self.chunks:
if not c.exists():
return False
return True
def delete_chunks(self):
for c in self.chunks:
c.delete()
def ready_to_build(self):
return not self.ready() and self.ready_chunks()
def write_chunk(self, chunk_number, current_chunk_size, data):
chunk = self.chunks[int(chunk_number) - 1]
chunk.write(current_chunk_size, data)
def build(self):
try:
log.info('File \'' + self.filename + '\': assembling chunks.')
write_retry_params = gcs.RetryParams(backoff_factor=1.1)
gcs_file = gcs.open(self.filename,
'w',
content_type=self.content_type(),
options={'x-goog-meta-identifier': self.identifier},
retry_params=write_retry_params)
for c in self.chunks:
log.info('Writing chunk ' + str(c.chunk_number) + ' of ' + str(self.total_chunks))
c.write_on(gcs_file)
gcs_file.close()
except Exception, e:
log.error('File \'' + self.filename + '\': Error during assembly - ' + e.message)
else:
self.delete_chunks()
key = blobstore.create_gs_key('/gs' + self.filename)
url = images.get_serving_url(key)
info = ImageInfo(name=self.original_filename, url=url)
info.put()
return info
The Chunk class:
class Chunk:
def __init__(self, bucket_name, identifier, chunk_number):
self.chunk_number = chunk_number
self.filename = '/' + bucket_name + '/' + identifier + '-chunk-' + str(chunk_number)
self.stat = None
self.load_stat()
def load_stat(self):
try:
self.stat = gcs.stat(self.filename)
except gcs.NotFoundError:
self.stat = None
def exists(self):
return not not self.stat
def write(self, size, data):
write_retry_params = gcs.RetryParams(backoff_factor=1.1)
gcs_file = gcs.open(self.filename, 'w', retry_params=write_retry_params)
for c in data.file:
gcs_file.write(c)
gcs_file.close()
self.load_stat()
def write_on(self, stream):
gcs_file = gcs.open(self.filename)
try:
data = gcs_file.read()
while data:
stream.write(data)
data = gcs_file.read()
except gcs.Error, e:
log.error('Error writing data to chunk: ' + e.message)
finally:
gcs_file.close()
def delete(self):
try:
gcs.delete(self.filename)
self.stat = None
except gcs.NotFoundError:
pass

Related

how did you use javascript file in your django views

i want to code in views.py but no idea how to code it and download the scc file
i have coded
def export_transcript(request):
task_id = request.query_params.get("task_id")
export_type = request.query_params.get("export_type")
if task_id is None or export_type is None:
return Response(
{"message": "missing param : task_id or export_type"},
status=status.HTTP_400_BAD_REQUEST,
)
supported_types = ["scc"]
if export_type not in supported_types:
return Response(
{
"message": "exported type only supported formats are : {scc} "
},
status=status.HTTP_400_BAD_REQUEST,
)
try:
task = Task.objects.get(pk=task_id)
except Task.DoesNotExist:
return Response(
{"message": "Task not found."},
status=status.HTTP_404_NOT_FOUND,
)
transcript = get_transcript_id(task)
if transcript is None:
return Response(
{"message": "Transcript not found."},
status=status.HTTP_404_NOT_FOUND,
)
payload = transcript.payload["payload"]
lines = []
if export_type == "scc":
for index, segment in enumerate(payload):
hex_string = segment["text"].encode("utf-8").hex()
hex_string = [hex_string[i:i+4] for i in range(0, len(hex_string))]
hex_string = " ".join(hex_string)
# make sure the start time is in 00:00:16:00 format
start_time = segment["start_time"].split(":")
# make sure the list index is not out of range
if len(start_time) < 4:
start_time = start_time + ["00"] * (4 - len(start_time))
start_time = ":".join(start_time)
lines.append(
start_time + " "
+ hex_string + "\n"
)
filename="transcript.scc"
content = "\n".join(lines)
if len(content) == 0:
content = " "
content_type = "application/json"
response = HttpResponse(content, content_type=content_type)
response["Content-Disposition"] = 'attachment; filename="%s"' % filename
response["filename"] = filename
return response
i want to know how to code so as to get scc file format

Django using FTP for large files

I'm using Django admin to upload large files into another server ( A download host ).
The files are usually 100mb.
I'm using FTP currently based on this.
It works fine with files less than 1mb but as it itself says in the documentation, it doesn't work with larger files and I get a 503 when the upload finishes in the Django admin.
I really searched a lot about another way to to this but it seems there is no other way in Django.
can you help me?
This is my settings.py
FTP_STORAGE_LOCATION = 'ftp://<myuser>:<mypass>#<host>:<port>/[path]'
my models.py
from . import ftp
fs = ftp.FTPStorage()
def my_awesome_upload_function(instance, filename):
return os.path.join('public_ftp/public/{}/'.format(instance.get_directory()), filename)
class Video(models.Model):
video_file_ftp = models.FileField(upload_to = my_awesome_upload_function, storage=fs)
this is ftp.py
# FTP storage class for Django pluggable storage system.
# Author: Rafal Jonca <jonca.rafal#gmail.com>
# License: MIT
# Comes from http://www.djangosnippets.org/snippets/1269/
#
# Usage:
#
# Add below to settings.py:
# FTP_STORAGE_LOCATION = '[a]ftp://<user>:<pass>#<host>:<port>/[path]'
#
# In models.py you can write:
# from FTPStorage import FTPStorage
# fs = FTPStorage()
# class FTPTest(models.Model):
# file = models.FileField(upload_to='a/b/c/', storage=fs)
import ftplib
import io
import os
from datetime import datetime
from urllib.parse import urljoin, urlparse
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.core.files.base import File
from django.core.files.storage import Storage
from django.utils.deconstruct import deconstructible
from storages.utils import setting
class FTPStorageException(Exception):
pass
#deconstructible
class FTPStorage(Storage):
"""FTP Storage class for Django pluggable storage system."""
def __init__(self, location=None, base_url=None, encoding=None):
location = location or setting('FTP_STORAGE_LOCATION')
if location is None:
raise ImproperlyConfigured("You must set a location at "
"instanciation or at "
" settings.FTP_STORAGE_LOCATION'.")
self.location = location
self.encoding = encoding or setting('FTP_STORAGE_ENCODING') or 'latin-1'
base_url = base_url or settings.MEDIA_URL
self._config = self._decode_location(location)
self._base_url = base_url
self._connection = None
def _decode_location(self, location):
"""Return splitted configuration data from location."""
splitted_url = urlparse(location)
config = {}
if splitted_url.scheme not in ('ftp', 'aftp'):
raise ImproperlyConfigured(
'FTPStorage works only with FTP protocol!'
)
if splitted_url.hostname == '':
raise ImproperlyConfigured('You must at least provide hostname!')
if splitted_url.scheme == 'aftp':
config['active'] = True
else:
config['active'] = False
config['path'] = splitted_url.path
config['host'] = splitted_url.hostname
config['user'] = splitted_url.username
config['passwd'] = splitted_url.password
config['port'] = int(splitted_url.port)
return config
def _start_connection(self):
# Check if connection is still alive and if not, drop it.
if self._connection is not None:
try:
self._connection.pwd()
except ftplib.all_errors:
self._connection = None
# Real reconnect
if self._connection is None:
ftp = ftplib.FTP()
ftp.encoding = self.encoding
try:
ftp.connect(self._config['host'], self._config['port'])
ftp.login(self._config['user'], self._config['passwd'])
if self._config['active']:
ftp.set_pasv(False)
if self._config['path'] != '':
ftp.cwd(self._config['path'])
self._connection = ftp
return
except ftplib.all_errors:
raise FTPStorageException(
'Connection or login error using data %s'
% repr(self._config)
)
def disconnect(self):
self._connection.quit()
self._connection = None
def _mkremdirs(self, path):
pwd = self._connection.pwd()
path_splitted = path.split(os.path.sep)
for path_part in path_splitted:
try:
self._connection.cwd(path_part)
except ftplib.all_errors:
try:
self._connection.mkd(path_part)
self._connection.cwd(path_part)
except ftplib.all_errors:
raise FTPStorageException(
'Cannot create directory chain %s' % path
)
self._connection.cwd(pwd)
return
def _put_file(self, name, content):
# Connection must be open!
try:
self._mkremdirs(os.path.dirname(name))
pwd = self._connection.pwd()
self._connection.cwd(os.path.dirname(name))
self._connection.storbinary('STOR ' + os.path.basename(name),
content.file,
content.DEFAULT_CHUNK_SIZE)
self._connection.cwd(pwd)
except ftplib.all_errors:
raise FTPStorageException('Error writing file %s' % name)
def _open(self, name, mode='rb'):
remote_file = FTPStorageFile(name, self, mode=mode)
return remote_file
def _read(self, name):
memory_file = io.BytesIO()
try:
pwd = self._connection.pwd()
self._connection.cwd(os.path.dirname(name))
self._connection.retrbinary('RETR ' + os.path.basename(name),
memory_file.write)
self._connection.cwd(pwd)
memory_file.seek(0)
return memory_file
except ftplib.all_errors:
raise FTPStorageException('Error reading file %s' % name)
def _save(self, name, content):
content.open()
self._start_connection()
self._put_file(name, content)
content.close()
return name
def _get_dir_details(self, path):
# Connection must be open!
try:
lines = []
self._connection.retrlines('LIST ' + path, lines.append)
dirs = {}
files = {}
for line in lines:
words = line.split()
if len(words) < 6:
continue
if words[-2] == '->':
continue
if words[0][0] == 'd':
dirs[words[-1]] = 0
elif words[0][0] == '-':
files[words[-1]] = int(words[-5])
return dirs, files
except ftplib.all_errors:
raise FTPStorageException('Error getting listing for %s' % path)
def modified_time(self, name):
self._start_connection()
resp = self._connection.sendcmd('MDTM ' + name)
if resp[:3] == '213':
s = resp[3:].strip()
# workaround for broken FTP servers returning responses
# starting with e.g. 1904... instead of 2004...
if len(s) == 15 and s[:2] == '19':
s = str(1900 + int(s[2:5])) + s[5:]
return datetime.strptime(s, '%Y%m%d%H%M%S')
raise FTPStorageException(
'Error getting modification time of file %s' % name
)
def listdir(self, path):
self._start_connection()
try:
dirs, files = self._get_dir_details(path)
return list(dirs.keys()), list(files.keys())
except FTPStorageException:
raise
def delete(self, name):
if not self.exists(name):
return
self._start_connection()
try:
self._connection.delete(name)
except ftplib.all_errors:
raise FTPStorageException('Error when removing %s' % name)
def exists(self, name):
self._start_connection()
try:
nlst = self._connection.nlst(
os.path.dirname(name) + '/'
)
if name in nlst or os.path.basename(name) in nlst:
return True
else:
return False
except ftplib.error_temp:
return False
except ftplib.error_perm:
# error_perm: 550 Can't find file
return False
except ftplib.all_errors:
raise FTPStorageException('Error when testing existence of %s'
% name)
def size(self, name):
self._start_connection()
try:
dirs, files = self._get_dir_details(os.path.dirname(name))
if os.path.basename(name) in files:
return files[os.path.basename(name)]
else:
return 0
except FTPStorageException:
return 0
def url(self, name):
if self._base_url is None:
raise ValueError("This file is not accessible via a URL.")
return urljoin(self._base_url, name).replace('\\', '/')
class FTPStorageFile(File):
def __init__(self, name, storage, mode):
self.name = name
self._storage = storage
self._mode = mode
self._is_dirty = False
self.file = io.BytesIO()
self._is_read = False
#property
def size(self):
if not hasattr(self, '_size'):
self._size = self._storage.size(self.name)
return self._size
def readlines(self):
if not self._is_read:
self._storage._start_connection()
self.file = self._storage._read(self.name)
self._is_read = True
return self.file.readlines()
def read(self, num_bytes=None):
if not self._is_read:
self._storage._start_connection()
self.file = self._storage._read(self.name)
self._is_read = True
return self.file.read(num_bytes)
def write(self, content):
if 'w' not in self._mode:
raise AttributeError("File was opened for read-only access.")
self.file = io.BytesIO(content)
self._is_dirty = True
self._is_read = True
def close(self):
if self._is_dirty:
self._storage._start_connection()
self._storage._put_file(self.name, self)
self._storage.disconnect()
self.file.close()
I just simply get the 503 service unavailable on my browser when I try to upload large files. but with less than 1mb files everything works.

How to create testcase for those method

I have two method and here is test i have done for feedback_selected_material_to_unsplash. I need to convert this test into using mock and also i'm not sure how to write properly test for download_image_material. I appreciate for any help
Here is the testcase
def test_feedback_selected_material_to_unsplash_download_location_not_null(self, mock_post):
URL = 'https://material.richka.co/track/unsplash'
data = {
'download_location': self.download_location
}
resp = requests.post("www.someurl.com", data=json.dumps(data), headers={'Content-Type': 'application/json'})
mock_post.assert_called_with(URL, data=json.dumps(data), headers={'Content-Type': 'application/json'})
def feedback_selected_material_to_unsplash(download_location):
if not download_location == 'null':
URL = 'https://test/track/unsplash'
data = {
'download_location': download_location
}
try:
response = requests.post(URL, data)
logger.info(response.json())
logger.info('Done the feedback to unsplash.com')
except:
pass
this method i dont know how to write testcase for this
def download_image_material(request, data, video, keyword, keyword_gui, material_type):
dname_tmp = settings.BASE_DIR + '/web/static'
hashedDname = get_path_stock_box()
saveFolder = gen_hash(gen_fname())
path = '%s/%s/%s' % (dname_tmp, hashedDname, saveFolder)
if not os.path.exists(path):
os.makedirs(path)
objs = []
material = Material(generated_video_data=video, keyword = keyword)
material.is_video = material_type
material.save()
for index,datum in enumerate(data):
if index == 50:
break
obj = {}
obj['word'] = datum['word']
obj['link'] = datum['link']
obj['user'] = datum['user']
obj['download_location'] = datum['download_location'] if 'unsplash.com' in datum['link'] else None
imgUrl = datum['small']
try:
headers = {}
response, imgRaw = http_get(imgUrl, headers=headers)
except urllib.error.HTTPError as ex:
continue
except:
continue
imgUrlWithoutQuery = re.sub(r'\?.*','',imgUrl)
suffix = imgUrlWithoutQuery[imgUrlWithoutQuery.rfind('.') + 1 :]
suffix = suffix.lower()
if suffix in settings.IMG_EXTENSIONS or suffix in settings.VIDEO_EXTENSIONS or suffix.lower() == 'mp4':
pass
else:
mime = response.info()['Content-Type']
suffix = _mime2suffix(mime)
if suffix not in settings.IMG_EXTENSIONS and suffix not in settings.VIDEO_EXTENSIONS or suffix.lower() == 'mp4':
continue
imgFname = '%s.%s' % (gen_hash(gen_fname()), suffix)
imgPathFname = '%s/%s/%s/%s' % (dname_tmp, hashedDname, saveFolder, imgFname)
imgPathFnameSaved = '%s/%s/%s' % (hashedDname, saveFolder, imgFname)
fout = open(imgPathFname, 'wb')
fout.write(imgRaw)
fout.close()
#process file
obj['media'] = imgPathFnameSaved
if suffix in settings.IMG_EXTENSIONS:
save_image_with_resize_and_rotate(imgPathFname)
obj['media'] = imgPathFnameSaved
elif suffix.lower() in settings.VIDEO_EXTENSIONS:
# convert videos to mp4 and delete original files
upload.conv2mp4(path)
os.remove(path)
hashed_name = imgPathFnameSaved[ : imgPathFnameSaved.rfind('.')] + '.mp4'
obj['media'] = hashed_name
if suffix == 'mp4':
obj['video'] = {}
obj['video']['duration'] = gen.get_video_duration(settings.BASE_DIR + '/web/static/' + obj['media'])
gen_thumbnail(settings.BASE_DIR + '/web/static/' + obj['media'])
fname_mp4 = obj['media']
obj['media'] = fname_mp4[ : fname_mp4.rfind('.')] + '.png'
#process service name
url = urlparse(datum['link'])
obj['service'] = {}
obj['service']['url'] = f'{url.scheme}://{url.netloc}'
obj['service']['hostname'] = url.netloc
# get json from database
material = Material.objects.get(id=material.pk)
objDb = material.get_json_data()
objs = []
if objDb:
for objOld in objDb:
objs.append(objOld)
objs.append(obj)
material.set_json_data(objs)
material.save()
res_json = {'result': True, 'data':obj, 'keyword': keyword_gui, 'pk': material.pk}
yield json.dumps(res_json) + '\n'
material.set_json_data(objs)
material.save()
yield json.dumps({'result': True, 'end': True, 'pk': material.pk}) + '\n'

my python gui file downloader not responding when downloading a file whose size in GB

I developed a file downloader GUI app using Requests module using python 2.7 in spyder IDE. When i run it and download the file in size GB, app becomes not responding.
import requests
import os
from Tkinter import Label, Frame, Tk, BOTH, TOP, LEFT, X, Button, Entry, FLAT, StringVar, DISABLED
from ttk import Progressbar
import tkFont
import tkMessageBox
class Downloader:
def __init__(self,parent):
self.parent = parent
self.parent.title("downloader")
self.parent_frame = Frame(self.parent)
self.parent_frame.pack(fill=BOTH,expand=True) # expands fill all remining spaces
self.control_button_frame=Frame(self.parent_frame)
self.control_button_frame.pack(side=TOP,fill=X)
self.downloading_url=StringVar()
self.downloading_url.set("paste downloadable url here (use ctrl + v)") # make a Tkinter StringVar between root = Tk() window
self.req_obj = None
self.source_file_size = None
self.error_msg = None
self.downloaded_file_size = 0
self.pause_flag = 0
self.cancel_flag = 0
self.AddContainers()
self.AddWidgets()
def AddContainers(self):
self.top_frame = Frame(self.parent_frame)
self.top_frame.pack(side=TOP,pady=10,padx=10,fill=X)
self.downloads_details_frame=Frame(self.parent_frame)
self.downloads_details_frame.pack(side=TOP,padx=10,fill=X)
self.status_history_buttons_frame=Frame(self.downloads_details_frame)
self.status_history_buttons_frame.pack(side=TOP,fill=X)
self.downloads_status_show_frame=Frame(self.downloads_details_frame,bg="white")
self.downloads_status_show_frame.pack(side=TOP,fill=X)
self.downloads_status_show_left_frame = Frame(self.downloads_status_show_frame)
self.downloads_status_show_left_frame.pack(side=LEFT, fill=X)
self.downloads_status_show_right_frame = Frame(self.downloads_status_show_frame)
self.downloads_status_show_right_frame.pack(side=LEFT, fill=X)
self.progress_bar_frame = Frame(self.parent_frame)
self.progress_bar_frame.pack(side=TOP,fill=X)
self.control_button_frame=Frame(self.parent_frame)
self.control_button_frame.pack(side=TOP,fill=X)
def AddWidgets(self):
label_names = ["Status","File size","Downloaded","Transfer rate","Time left","Resume Capability"]
self.downloading_status_data_label = {}
address_label = Label(self.top_frame, text ="Address")
address_label.pack(side="left")
self.url_font_style = tkFont.Font(family = 'ms sans Serif', size = 9, weight = "normal" )
self.url_entry_field = Entry(self.top_frame,width=60,font = self.url_font_style,textvariable=self.downloading_url)
self.url_entry_field.pack(side="left",padx=5,fill=X,expand=True)
self.url_ok_button = Button(self.top_frame, text ="Download", width = 8, command = self.UrlProcessing)
self.url_ok_button.pack(side="left")
self.status_button = Button(self.status_history_buttons_frame,text="Download Status",relief=FLAT,bg="white",anchor="w", width=13)
self.status_button.pack(side="left")
self.show_history_button=Button(self.status_history_buttons_frame,text="History")
self.show_history_button.pack(side="left")
for names in label_names:
lab = Label(self.downloads_status_show_left_frame, text=names, anchor="w",bg="white", width=16)
lab.pack(side=TOP,fill=X)
self.status_data_label = Label(self.downloads_status_show_right_frame, text = "N/A", anchor="w",bg="white")
self.status_data_label.pack(side=TOP,fill=X)
self.file_size_data_label = Label(self.downloads_status_show_right_frame, text = "N/A", anchor="w",bg="white")
self.file_size_data_label.pack(side=TOP,fill=X)
self.downloaded_data_label = Label(self.downloads_status_show_right_frame, text = "N/A", anchor="w",bg="white")
self.downloaded_data_label.pack(side=TOP,fill=X)
self.transfer_rate_data_label = Label(self.downloads_status_show_right_frame, text = "N/A", anchor="w",bg="white")
self.transfer_rate_data_label.pack(side=TOP,fill=X)
self.time_left_data_label = Label(self.downloads_status_show_right_frame, text = "N/A", anchor="w",bg="white")
self.time_left_data_label.pack(side=TOP,fill=X)
self.resume_status_data_label = Label(self.downloads_status_show_right_frame, text = "N/A", anchor="w",bg="white")
self.resume_status_data_label.pack(side=TOP,fill=X)
self.progress=Progressbar(self.progress_bar_frame,orient="horizontal",length=200,mode="determinate")
self.progress.pack(fill=X,side="left",expand=True)
self.progress["value"] = 0
self.cancel_button=Button(self.control_button_frame,text="cancel",width=8, command = self.CancelDownload)
self.cancel_button.pack(side="right",padx=10,pady=5)
self.start_pause_button=Button(self.control_button_frame,text="pause",width=8, command = self.StartPause)
self.start_pause_button.pack(side="right",padx=10,pady=5)
def ErrorMessageHandler(self,err_message):
tkMessageBox.showerror("Error",err_message)
def ShowMessage(self,title,message):
tkMessageBox.showinfo(title,message)
def UpdateDownloadingDetails(self,meta_data):
self.status_data_label.config(text=meta_data["Status"])
self.file_size_data_label.config(text=meta_data["File size"])
self.downloaded_data_label.config(text=meta_data["Downloaded"])
self.transfer_rate_data_label.config(text=meta_data["Transfer rate"])
self.time_left_data_label.config(text=meta_data["Time left"])
self.parent.update()
def CancelDownload(self):
self.cancel_flag = 1
def StartPause(self):
if self.pause_flag == 0:
self.start_pause_button.config(text ="start")
self.parent.update()
self.pause_flag = 1
else:
self.start_pause_button.config(text ="pause")
self.parent.update()
self.pause_flag = 0
self.UrlProcessing()
def MakeRequest(self,source_url):
start_byte_pos = 0
start_header = {'Range': 'bytes=%d-' % start_byte_pos}
try:
self.req_obj = requests.get(source_url, headers=start_header, stream = True)
if self.req_obj.status_code == 206:
self.source_file_size = int(self.req_obj.headers['Content-Length'])
self.progress["maximum"] = self.source_file_size
return True
except HTTPError, e:
self.error_msg = 'HTTPError = ' + str(e.code)
return False
except URLError, e:
self.error_msg = 'URLError = ' + str(e.reason)
return False
except httplib.HTTPException, e:
self.error_msg = 'HTTPException'
return False
except Exception:
self.error_msg = 'generic exception: ' + "Unknown Url Type" #traceback.format_exc()
return False
def GetSizeInFormat(self, total_bytes):
formatted_size = None
if total_bytes < 1024:
formatted_size= str(total_bytes)+" Bytes"
elif total_bytes < 1048576:
formatted_size = str(float(total_bytes/1024))+" KB"
if (total_bytes % 1024) >= 8:
formatted_size = str(total_bytes/1024) + "."+str(int ( (total_bytes % 1024 ) / 8)) + " KB"
elif total_bytes < 1073741824:
formatted_size = str(float(total_bytes/1048576))+" MB"
if (total_bytes % 1048576) >= 1024:
formatted_size = str(total_bytes/1048576) + "."+str(int ( (total_bytes % 1048576 ) / 1024)) + " MB"
elif total_bytes >= 1073741824:
formatted_size = str(float(total_bytes/1073741824))+" GB"
if (total_bytes % 1073741824) >= 1048576:
formatted_size = str(total_bytes/1073741824) + "."+str(int ( (total_bytes % 1073741824 ) / 1048576)) + " GB"
return formatted_size
def GetTimeInFormat(self, total_seconds):
formatted_time = None
if total_seconds >= 3600:
formatted_time = str(total_seconds/3600)+" hr"
if (total_seconds % 3600) >= 60:
formatted_time = formatted_time+" "+str( (total_seconds % 3600) / 60)+" min"
elif (total_seconds % 3600) < 60 and (total_seconds % 3600) > 0:
formatted_time = formatted_time +" "+str(total_seconds%3600)+" sec"
elif total_seconds >= 60:
formatted_time = str(total_seconds / 60)+" min"
if (total_seconds % 60) >0:
formatted_time = formatted_time+" "+str(total_seconds % 60)+" sec"
else:
formatted_time = str(total_seconds)+" sec"
return formatted_time
def GetFileName(self,source_url):
file_name = None
temp = source_url.split('/')[-1]
for raw_data in ['<','>',':','"','/','|','?','*']:
if raw_data in temp:
temp = temp.replace(raw_data,'_')
file_name = temp
return file_name
def CheckFileExistence(self,file_name):
if os.path.exists(file_name):
return True
else:
return False
def GetLocalFileSize(self,file_name):
return os.path.getsize(file_name)
def SetResumeDownload(self,fileurl, resume_byte_pos):
resume_header = {'Range': 'bytes=%d-' % resume_byte_pos}
return requests.get(fileurl, headers=resume_header, stream=True)
def DownloadFile(self,file_ptr):
chunk_size = (1024*1024)
self.status_data_label.config(text="downloading")
self.file_size_data_label.config(text=self.GetSizeInFormat(self.source_file_size))
self.downloaded_data_label.config(text=self.GetSizeInFormat(self.downloaded_file_size))
self.transfer_rate_data_label.config(text = self.GetSizeInFormat(1024*1024))
temp = (self.source_file_size-self.downloaded_file_size)/chunk_size
self.time_left_data_label.config(text= self.GetTimeInFormat(temp))
self.resume_status_data_label.config(text="yes", fg="blue")
self.parent.update()
for chunk in self.req_obj.iter_content(chunk_size):
self.parent.update()
if chunk:
self.downloaded_file_size = self.downloaded_file_size + len(chunk)
file_ptr.write(chunk)
self.downloaded_data_label.config(text=self.GetSizeInFormat(self.downloaded_file_size))
temp = (self.source_file_size-self.downloaded_file_size)/chunk_size
self.time_left_data_label.config(text= self.GetTimeInFormat(temp))
self.progress["value"] = self.downloaded_file_size
self.parent.update()
if self.pause_flag == 1:
self.req_obj.close()
break
if self.cancel_flag ==1:
file_ptr.flush()
file_ptr.close()
self.req_obj.close()
self.parent.destroy()
def UrlProcessing(self):
self.url_ok_button.config(state=DISABLED)
self.parent.update()
self.url = self.downloading_url.get()
file_name = self.GetFileName(self.url)
if self.CheckFileExistence(file_name):
if self.MakeRequest(self.url):
temp_file_size = self.GetLocalFileSize(file_name)
if temp_file_size == self.source_file_size:
self.ShowMessage(file_name,"File is already downloaded")
else:
self.status_data_label.config(text = "connecting",fg="blue")
self.parent.update()
self.req_obj.close()
self.req_obj = self.SetResumeDownload(self.url,temp_file_size)
file_ptr = open(file_name, 'ab')
self.DownloadFile(file_ptr)
file_ptr.flush()
file_ptr.close()
self.req_obj.close()
if self.source_file_size == self.downloaded_file_size:
self.ShowMessage(file_name,"Downloading Completed")
self.parent.destroy()
else:
self.ErrorMessageHandler(self.error_msg)
else:
self.status_data_label.config(text = "connecting",fg="blue")
self.parent.update()
if self.MakeRequest(self.url):
file_ptr = open(file_name, 'wb')
self.DownloadFile(file_ptr)
file_ptr.flush()
file_ptr.close()
self.req_obj.close()
if self.source_file_size == self.downloaded_file_size:
self.ShowMessage(file_name,"Downloading Completed")
self.parent.destroy()
else:
self.ErrorMessageHandler(self.error_msg)
if __name__=='__main__' :
root = Tk()
app = Downloader(root)
root.mainloop()

converting curl command to pycurl

So the curl command I'm using is as follows:
cmd = "curl --write-out %{http_code} -X PUT -T " + self.basedir + putfile + " -# -o /dev/null " + self.uri + "/" + self.dist + "/" + putfile
I'd like to change this from invoking a system command to using pycurl. This way I can have more granular control over it and ultimately implement a progress bar for it. However, when I try and convert to python, my resulting script fails. Here is my efforts towards a python script:
f = open(filepath, "rb")
fs = os.path.getsize(filepath)
c = pycurl.Curl()
c.setopt(c.URL, target_url)
c.setopt(c.HTTPHEADER, ["User-Agent: Load Tool (PyCURL Load Tool)"])
c.setopt(c.PUT, 1)
c.setopt(c.READDATA, f)
c.setopt(c.INFILESIZE, int(fs))
c.setopt(c.NOSIGNAL, 1)
c.setopt(c.VERBOSE, 1)
c.body = StringIO()
c.setopt(c.WRITEFUNCTION, c.body.write)
try:
c.perform()
except:
import traceback
traceback.print_exc(file=sys.stderr)
sys.stderr.flush()
f.close()
c.close()
sys.stdout.write(".")
sys.stdout.flush()
Here's what that outputs:
* About to connect() to ************ port 8090 (#0)
* Trying 16.94.124.53... * connected
> PUT /incoming/ HTTP/1.1
Host: ***********
Accept: */*
User-Agent: Load Tool (PyCURL Load Tool)
Content-Length: 21
Expect: 100-continue
< HTTP/1.1 100 Continue
* We are completely uploaded and fine
< HTTP/1.1 500 Internal Server Error
< Content-type: text/html
* no chunk, no close, no size. Assume close to signal end
<
Thanks in advance for you help!

I've did uploading working module, you can find your answers looking in code.
And you can find almost all answers regarding pycurl by digging libcurl examples and Docs.
'''
Created on Oct 22, 2013
#author: me
'''
import pycurl
import os
import wx
import sys
import hashlib
from cStringIO import StringIO
def get_file_hash(full_filename):
BLOCKSIZE = 65536
hasher = hashlib.md5()
with open(full_filename, 'rb') as afile:
buf = afile.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(BLOCKSIZE)
return hasher.hexdigest()
class FtpUpload(object):
def __init__(self, server, username, password, **items):
self.server = server
self.username = username
self.password = password
self.gauge = items.get("gauge")
self.sb_speed = items.get("sb_speed")
self.upload_file_size = items.get("upload_file_size")
self.upload_file_speed = items.get("upload_file_speed")
self.filesize = 0
self.ftp_filehash = '0'
def sizeToNiceString(self, byteCount):
for (cutoff, label) in [(1024*1024*1024, "GB"), (1024*1024, "MB"), (1024, "KB")]:
if byteCount >= cutoff:
return "%.2f %s" % (byteCount * 1.0 / cutoff, label)
if byteCount == 1:
return "1 byte"
else:
return "%d bytes" % byteCount
def initRange(self, filesize):
self.filesize = filesize
self.gauge.SetRange(filesize)
def updateValue(self, upload_d):
upload_d_int = int(upload_d)
self.gauge.SetValue(upload_d_int)
upload_d_str = self.sizeToNiceString(upload_d)
upload_percent = int((upload_d*100)/self.filesize)
upload_d_status = "{0}/{1} ({2}%)".format(upload_d_str, self.sizeToNiceString(self.filesize), upload_percent)
self.sb_speed.SetStatusText(upload_d_status, 1)
self.upload_file_size.SetLabel(upload_d_status)
self.upload_file_speed.SetLabel(upload_d_str)
def progress(self, download_t, download_d, upload_t, upload_d):
self.updateValue(upload_d)
def test(self, debug_type, debug_msg):
if len(debug_msg) < 300:
print "debug(%d): %s" % (debug_type, debug_msg.strip())
def ftp_file_hash(self, buf):
sys.stderr.write("{0:.<20} : {1}\n".format('FTP RAW ', buf.strip()))
ftp_filehash = dict()
item = buf.strip().split('\n')[0]
ext = item.split('.')[1]
if len(ext) > 3:
ftp_filename = item[:-33]
ftp_filehash = item[-32:]
self.ftp_filehash = ftp_filehash
def get_ftp_file_hash(self, filename):
c = pycurl.Curl()
list_file_hash = 'LIST -1 ' + filename + "_*"
sys.stderr.write("{0:.<20} : {1} \n".format('FTP command ', list_file_hash))
c.setopt(pycurl.URL, self.server)
c.setopt(pycurl.USERNAME, self.username)
c.setopt(pycurl.PASSWORD, self.password)
c.setopt(pycurl.VERBOSE, False)
c.setopt(pycurl.DEBUGFUNCTION, self.test)
c.setopt(pycurl.CUSTOMREQUEST, list_file_hash)
c.setopt(pycurl.WRITEFUNCTION, self.ftp_file_hash)
c.perform()
c.close()
def delete_ftp_hash_file(self, ftp_hash_file_old):
c = pycurl.Curl()
delete_hash_file = 'DELE ' + ftp_hash_file_old
sys.stderr.write("{0:.<20} : {1} \n".format('FTP command ', delete_hash_file))
c.setopt(pycurl.URL, self.server)
c.setopt(pycurl.USERNAME, self.username)
c.setopt(pycurl.PASSWORD, self.password)
c.setopt(pycurl.VERBOSE, False)
c.setopt(pycurl.DEBUGFUNCTION, self.test)
c.setopt(pycurl.CUSTOMREQUEST, delete_hash_file)
try:
c.perform()
except Exception as e:
print e
c.close()
def upload(self, full_filename, filesize):
self.initRange(filesize)
filename = os.path.basename(full_filename)
sys.stderr.write("filename: %s\n" % full_filename)
c = pycurl.Curl()
c.setopt(pycurl.USERNAME, self.username)
c.setopt(pycurl.PASSWORD, self.password)
c.setopt(pycurl.VERBOSE, False)
c.setopt(pycurl.DEBUGFUNCTION, self.test)
c.setopt(pycurl.NOBODY, True)
c.setopt(pycurl.HEADER, False)
ftp_file_path = os.path.join(self.server, os.path.basename(full_filename))
file_hash = get_file_hash(full_filename)
ftp_hash_file = ftp_file_path + "_%s" % file_hash
# Getting filesize if exist on server.
try:
c.setopt(pycurl.URL, ftp_file_path)
c.perform()
filesize_offset = int(c.getinfo(pycurl.CONTENT_LENGTH_DOWNLOAD))
except Exception as error_msg:
print error_msg
wx.MessageBox(str(error_msg), 'Connection error!',
wx.OK | wx.ICON_ERROR)
# Exit upload function.
return True
ftp_file_append = True
# Get ftp file hash.
self.get_ftp_file_hash(filename)
offset = filesize_offset == -1 and '0' or filesize_offset
sys.stderr.write("L_file hash : {0:.<60}: {1:<40}\n".format(filename, file_hash))
sys.stderr.write("F_file hash : {0:.<60}: {1:<40}\n".format(filename, self.ftp_filehash))
sys.stderr.write("{0:15} : {1:.>15}\n".format('filesize:', filesize))
sys.stderr.write("{0:15} : {1:.>15}\n".format('ftp_filesize', offset))
sys.stderr.write("{0:15} : {1:.>15}\n".format('to upload:', filesize - int(offset)))
# File not exist on FTP server.
if filesize_offset == -1:
# file not exist: uploading from offset zero.
ftp_file_append = False
filesize_offset = 0
# Local and FTP file size and files MD5 are the same.
elif filesize_offset == self.filesize and file_hash == self.ftp_filehash:
sys.stderr.write("--- File exist on server! ---\n\n")
self.upload_file_speed.SetLabel("File exist on server!")
self.sb_speed.SetStatusText("File exist on server!", 1)
# Check next filename.
return False
# Ftp file and local file different data.
elif file_hash != self.ftp_filehash:
ftp_file_append = False
filesize_offset = 0
ftp_hash_file_old = filename + "_" + self.ftp_filehash
# delete old hash file.
self.delete_ftp_hash_file(ftp_hash_file_old)
c.setopt(pycurl.FTPAPPEND, ftp_file_append)
c.setopt(pycurl.UPLOAD, True)
c.setopt(pycurl.PROGRESSFUNCTION, self.progress)
with open('filehash.txt', 'w') as f:
f.write(file_hash)
for item in ("filehash.txt", full_filename):
# dont show progress by default.
noprogress = True
# upload ftp_hash_file first.
ftp_url = ftp_hash_file
with open(item, "rb") as f:
# chages ftp_url and show progress values, add filesize_offset.
if item != "filehash.txt":
f.seek(filesize_offset)
noprogress = False
ftp_url = ftp_file_path
c.setopt(pycurl.URL, ftp_url)
c.setopt(pycurl.NOPROGRESS, noprogress)
c.setopt(pycurl.READFUNCTION, f.read)
try:
c.perform()
if item != "filehash.txt":
sys.stderr.write("{0:15} : {1:.>15}\n\n".format("size uploaded", int(c.getinfo(pycurl.SIZE_UPLOAD))))
except Exception as error_msg:
print error_msg
wx.MessageBox(str(error_msg), 'Connection error!',
wx.OK | wx.ICON_ERROR)
# Exit upload function.
return True
self.ftp_filehash = '0'
c.close()
if __name__ == '__main__':
pass

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Google Cloud Storage same file name - python

Related

how did you use javascript file in your django views

Django using FTP for large files

How to create testcase for those method

my python gui file downloader not responding when downloading a file whose size in GB

converting curl command to pycurl

Categories

Resources