Program responsible for displaying of ETA while loading InceptionV3 from Keras - python

I was loading the InceptionV3 model from Keras for the first time and it took a long time due to my low processing power and it had me thinking about which program is responsible for the calculation of ETA displaying the bar?
InceptionV3_base_model = InceptionV3(weights='imagenet', include_top=False)
>>
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
37036032/87910968 [===========>..................] - ETA: 37s
Which program is calculating and displaying these? is it Keras, Jupyter or the Linux itself calculating?

Take keras.datasets.mnist as an example. (Because it's also showing a progress bar.)
Source code:
"""MNIST handwritten digits dataset.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ..utils.data_utils import get_file
import numpy as np
def load_data(path='mnist.npz'):
"""Loads the MNIST dataset.
# Arguments
path: path where to cache the dataset locally
(relative to ~/.keras/datasets).
# Returns
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
"""
path = get_file(path,
origin='https://s3.amazonaws.com/img-datasets/mnist.npz',
file_hash='8a61469f7ea1b51cbae51d4f78837e45')
with np.load(path, allow_pickle=True) as f:
x_train, y_train = f['x_train'], f['y_train']
x_test, y_test = f['x_test'], f['y_test']
return (x_train, y_train), (x_test, y_test)
And we know the bar comes from ..utils.data_utils.get_file
keras.utils.__init__.py looks like this:
from __future__ import absolute_import
from . import np_utils
from . import generic_utils
from . import data_utils
from . import io_utils
from . import conv_utils
from . import losses_utils
from . import metrics_utils
# Globally-importable utils.
from .io_utils import HDF5Matrix
from .io_utils import H5Dict
from .data_utils import get_file
from .data_utils import Sequence
from .data_utils import GeneratorEnqueuer
from .data_utils import OrderedEnqueuer
from .generic_utils import CustomObjectScope
from .generic_utils import custom_object_scope
from .generic_utils import get_custom_objects
from .generic_utils import serialize_keras_object
from .generic_utils import deserialize_keras_object
from .generic_utils import Progbar
from .layer_utils import convert_all_kernels_in_model
from .layer_utils import get_source_inputs
from .layer_utils import print_summary
from .vis_utils import model_to_dot
from .vis_utils import plot_model
from .np_utils import to_categorical
from .np_utils import normalize
from .multi_gpu_utils import multi_gpu_model
get_file comes from keras.data_utils
keras.data_utils.py:
"""Utilities for file download and caching."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import hashlib
import multiprocessing as mp
import os
import random
import shutil
import sys
import tarfile
import threading
import time
import warnings
import zipfile
from abc import abstractmethod
from contextlib import closing
from multiprocessing.pool import ThreadPool
import numpy as np
import six
from six.moves.urllib.error import HTTPError
from six.moves.urllib.error import URLError
from six.moves.urllib.request import urlopen
try:
import queue
except ImportError:
import Queue as queue
from ..utils.generic_utils import Progbar
if sys.version_info[0] == 2:
def urlretrieve(url, filename, reporthook=None, data=None):
"""Replacement for `urlretrieve` for Python 2.
Under Python 2, `urlretrieve` relies on `FancyURLopener` from legacy
`urllib` module, known to have issues with proxy management.
# Arguments
url: url to retrieve.
filename: where to store the retrieved data locally.
reporthook: a hook function that will be called once
on establishment of the network connection and once
after each block read thereafter.
The hook will be passed three arguments;
a count of blocks transferred so far,
a block size in bytes, and the total size of the file.
data: `data` argument passed to `urlopen`.
"""
def chunk_read(response, chunk_size=8192, reporthook=None):
content_type = response.info().get('Content-Length')
total_size = -1
if content_type is not None:
total_size = int(content_type.strip())
count = 0
while True:
chunk = response.read(chunk_size)
count += 1
if reporthook is not None:
reporthook(count, chunk_size, total_size)
if chunk:
yield chunk
else:
break
with closing(urlopen(url, data)) as response, open(filename, 'wb') as fd:
for chunk in chunk_read(response, reporthook=reporthook):
fd.write(chunk)
else:
from six.moves.urllib.request import urlretrieve
def _extract_archive(file_path, path='.', archive_format='auto'):
"""Extracts an archive if it matches tar, tar.gz, tar.bz, or zip formats.
# Arguments
file_path: path to the archive file
path: path to extract the archive file
archive_format: Archive format to try for extracting the file.
Options are 'auto', 'tar', 'zip', and None.
'tar' includes tar, tar.gz, and tar.bz files.
The default 'auto' is ['tar', 'zip'].
None or an empty list will return no matches found.
# Returns
True if a match was found and an archive extraction was completed,
False otherwise.
"""
if archive_format is None:
return False
if archive_format == 'auto':
archive_format = ['tar', 'zip']
if isinstance(archive_format, six.string_types):
archive_format = [archive_format]
for archive_type in archive_format:
if archive_type == 'tar':
open_fn = tarfile.open
is_match_fn = tarfile.is_tarfile
if archive_type == 'zip':
open_fn = zipfile.ZipFile
is_match_fn = zipfile.is_zipfile
if is_match_fn(file_path):
with open_fn(file_path) as archive:
try:
archive.extractall(path)
except (tarfile.TarError, RuntimeError,
KeyboardInterrupt):
if os.path.exists(path):
if os.path.isfile(path):
os.remove(path)
else:
shutil.rmtree(path)
raise
return True
return False
def get_file(fname,
origin,
untar=False,
md5_hash=None,
file_hash=None,
cache_subdir='datasets',
hash_algorithm='auto',
extract=False,
archive_format='auto',
cache_dir=None):
"""Downloads a file from a URL if it not already in the cache.
By default the file at the url `origin` is downloaded to the
cache_dir `~/.keras`, placed in the cache_subdir `datasets`,
and given the filename `fname`. The final location of a file
`example.txt` would therefore be `~/.keras/datasets/example.txt`.
Files in tar, tar.gz, tar.bz, and zip formats can also be extracted.
Passing a hash will verify the file after download. The command line
programs `shasum` and `sha256sum` can compute the hash.
# Arguments
fname: Name of the file. If an absolute path `/path/to/file.txt` is
specified the file will be saved at that location.
origin: Original URL of the file.
untar: Deprecated in favor of 'extract'.
boolean, whether the file should be decompressed
md5_hash: Deprecated in favor of 'file_hash'.
md5 hash of the file for verification
file_hash: The expected hash string of the file after download.
The sha256 and md5 hash algorithms are both supported.
cache_subdir: Subdirectory under the Keras cache dir where the file is
saved. If an absolute path `/path/to/folder` is
specified the file will be saved at that location.
hash_algorithm: Select the hash algorithm to verify the file.
options are 'md5', 'sha256', and 'auto'.
The default 'auto' detects the hash algorithm in use.
extract: True tries extracting the file as an Archive, like tar or zip.
archive_format: Archive format to try for extracting the file.
Options are 'auto', 'tar', 'zip', and None.
'tar' includes tar, tar.gz, and tar.bz files.
The default 'auto' is ['tar', 'zip'].
None or an empty list will return no matches found.
cache_dir: Location to store cached files, when None it
defaults to the [Keras Directory](/faq/#where-is-the-keras-configuration-filed-stored).
# Returns
Path to the downloaded file
""" # noqa
if cache_dir is None:
if 'KERAS_HOME' in os.environ:
cache_dir = os.environ.get('KERAS_HOME')
else:
cache_dir = os.path.join(os.path.expanduser('~'), '.keras')
if md5_hash is not None and file_hash is None:
file_hash = md5_hash
hash_algorithm = 'md5'
datadir_base = os.path.expanduser(cache_dir)
if not os.access(datadir_base, os.W_OK):
datadir_base = os.path.join('/tmp', '.keras')
datadir = os.path.join(datadir_base, cache_subdir)
if not os.path.exists(datadir):
os.makedirs(datadir)
if untar:
untar_fpath = os.path.join(datadir, fname)
fpath = untar_fpath + '.tar.gz'
else:
fpath = os.path.join(datadir, fname)
download = False
if os.path.exists(fpath):
# File found; verify integrity if a hash was provided.
if file_hash is not None:
if not validate_file(fpath, file_hash, algorithm=hash_algorithm):
print('A local file was found, but it seems to be '
'incomplete or outdated because the ' + hash_algorithm +
' file hash does not match the original value of ' +
file_hash + ' so we will re-download the data.')
download = True
else:
download = True
if download:
print('Downloading data from', origin)
class ProgressTracker(object):
# Maintain progbar for the lifetime of download.
# This design was chosen for Python 2.7 compatibility.
progbar = None
def dl_progress(count, block_size, total_size):
if ProgressTracker.progbar is None:
if total_size == -1:
total_size = None
ProgressTracker.progbar = Progbar(total_size)
else:
ProgressTracker.progbar.update(count * block_size)
error_msg = 'URL fetch failure on {} : {} -- {}'
try:
try:
urlretrieve(origin, fpath, dl_progress)
except HTTPError as e:
raise Exception(error_msg.format(origin, e.code, e.msg))
except URLError as e:
raise Exception(error_msg.format(origin, e.errno, e.reason))
except (Exception, KeyboardInterrupt):
if os.path.exists(fpath):
os.remove(fpath)
raise
ProgressTracker.progbar = None
if untar:
if not os.path.exists(untar_fpath):
_extract_archive(fpath, datadir, archive_format='tar')
return untar_fpath
if extract:
_extract_archive(fpath, datadir, archive_format)
return fpath
def _hash_file(fpath, algorithm='sha256', chunk_size=65535):
"""Calculates a file sha256 or md5 hash.
# Example
```python
>>> from keras.utils.data_utils import _hash_file
>>> _hash_file('/path/to/file.zip')
'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'
```
# Arguments
fpath: path to the file being validated
algorithm: hash algorithm, one of 'auto', 'sha256', or 'md5'.
The default 'auto' detects the hash algorithm in use.
chunk_size: Bytes to read at a time, important for large files.
# Returns
The file hash
"""
if (algorithm == 'sha256') or (algorithm == 'auto' and len(hash) == 64):
hasher = hashlib.sha256()
else:
hasher = hashlib.md5()
with open(fpath, 'rb') as fpath_file:
for chunk in iter(lambda: fpath_file.read(chunk_size), b''):
hasher.update(chunk)
return hasher.hexdigest()
def validate_file(fpath, file_hash, algorithm='auto', chunk_size=65535):
"""Validates a file against a sha256 or md5 hash.
# Arguments
fpath: path to the file being validated
file_hash: The expected hash string of the file.
The sha256 and md5 hash algorithms are both supported.
algorithm: Hash algorithm, one of 'auto', 'sha256', or 'md5'.
The default 'auto' detects the hash algorithm in use.
chunk_size: Bytes to read at a time, important for large files.
# Returns
Whether the file is valid
"""
if ((algorithm == 'sha256') or
(algorithm == 'auto' and len(file_hash) == 64)):
hasher = 'sha256'
else:
hasher = 'md5'
if str(_hash_file(fpath, hasher, chunk_size)) == str(file_hash):
return True
else:
return False
class Sequence(object):
"""Base object for fitting to a sequence of data, such as a dataset.
Every `Sequence` must implement the `__getitem__` and the `__len__` methods.
If you want to modify your dataset between epochs you may implement
`on_epoch_end`. The method `__getitem__` should return a complete batch.
# Notes
`Sequence` are a safer way to do multiprocessing. This structure guarantees
that the network will only train once on each sample per epoch which is not
the case with generators.
# Examples
```python
from skimage.io import imread
from skimage.transform import resize
import numpy as np
# Here, `x_set` is list of path to the images
# and `y_set` are the associated classes.
class CIFAR10Sequence(Sequence):
def __init__(self, x_set, y_set, batch_size):
self.x, self.y = x_set, y_set
self.batch_size = batch_size
def __len__(self):
return int(np.ceil(len(self.x) / float(self.batch_size)))
def __getitem__(self, idx):
batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
return np.array([
resize(imread(file_name), (200, 200))
for file_name in batch_x]), np.array(batch_y)
```
"""
use_sequence_api = True
#abstractmethod
def __getitem__(self, index):
"""Gets batch at position `index`.
# Arguments
index: position of the batch in the Sequence.
# Returns
A batch
"""
raise NotImplementedError
#abstractmethod
def __len__(self):
"""Number of batch in the Sequence.
# Returns
The number of batches in the Sequence.
"""
raise NotImplementedError
def on_epoch_end(self):
"""Method called at the end of every epoch.
"""
pass
def __iter__(self):
"""Create a generator that iterate over the Sequence."""
for item in (self[i] for i in range(len(self))):
yield item
# Global variables to be shared across processes
_SHARED_SEQUENCES = {}
# We use a Value to provide unique id to different processes.
_SEQUENCE_COUNTER = None
def init_pool(seqs):
global _SHARED_SEQUENCES
_SHARED_SEQUENCES = seqs
def get_index(uid, i):
"""Get the value from the Sequence `uid` at index `i`.
To allow multiple Sequences to be used at the same time, we use `uid` to
get a specific one. A single Sequence would cause the validation to
overwrite the training Sequence.
# Arguments
uid: int, Sequence identifier
i: index
# Returns
The value at index `i`.
"""
return _SHARED_SEQUENCES[uid][i]
class SequenceEnqueuer(object):
"""Base class to enqueue inputs.
The task of an Enqueuer is to use parallelism to speed up preprocessing.
This is done with processes or threads.
# Examples
```python
enqueuer = SequenceEnqueuer(...)
enqueuer.start()
datas = enqueuer.get()
for data in datas:
# Use the inputs; training, evaluating, predicting.
# ... stop sometime.
enqueuer.close()
```
The `enqueuer.get()` should be an infinite stream of datas.
"""
def __init__(self, sequence,
use_multiprocessing=False):
self.sequence = sequence
self.use_multiprocessing = use_multiprocessing
global _SEQUENCE_COUNTER
if _SEQUENCE_COUNTER is None:
try:
_SEQUENCE_COUNTER = mp.Value('i', 0)
except OSError:
# In this case the OS does not allow us to use
# multiprocessing. We resort to an int
# for enqueuer indexing.
_SEQUENCE_COUNTER = 0
if isinstance(_SEQUENCE_COUNTER, int):
self.uid = _SEQUENCE_COUNTER
_SEQUENCE_COUNTER += 1
else:
# Doing Multiprocessing.Value += x is not process-safe.
with _SEQUENCE_COUNTER.get_lock():
self.uid = _SEQUENCE_COUNTER.value
_SEQUENCE_COUNTER.value += 1
self.workers = 0
self.executor_fn = None
self.queue = None
self.run_thread = None
self.stop_signal = None
def is_running(self):
return self.stop_signal is not None and not self.stop_signal.is_set()
def start(self, workers=1, max_queue_size=10):
"""Start the handler's workers.
# Arguments
workers: number of worker threads
max_queue_size: queue size
(when full, workers could block on `put()`)
"""
if self.use_multiprocessing:
self.executor_fn = self._get_executor_init(workers)
else:
# We do not need the init since it's threads.
self.executor_fn = lambda _: ThreadPool(workers)
self.workers = workers
self.queue = queue.Queue(max_queue_size)
self.stop_signal = threading.Event()
self.run_thread = threading.Thread(target=self._run)
self.run_thread.daemon = True
self.run_thread.start()
def _send_sequence(self):
"""Send current Iterable to all workers."""
# For new processes that may spawn
_SHARED_SEQUENCES[self.uid] = self.sequence
def stop(self, timeout=None):
"""Stops running threads and wait for them to exit, if necessary.
Should be called by the same thread which called `start()`.
# Arguments
timeout: maximum time to wait on `thread.join()`
"""
self.stop_signal.set()
with self.queue.mutex:
self.queue.queue.clear()
self.queue.unfinished_tasks = 0
self.queue.not_full.notify()
self.run_thread.join(timeout)
_SHARED_SEQUENCES[self.uid] = None
#abstractmethod
def _run(self):
"""Submits request to the executor and queue the `Future` objects."""
raise NotImplementedError
#abstractmethod
def _get_executor_init(self, workers):
"""Get the Pool initializer for multiprocessing.
# Returns
Function, a Function to initialize the pool
"""
raise NotImplementedError
#abstractmethod
def get(self):
"""Creates a generator to extract data from the queue.
Skip the data if it is `None`.
# Returns
Generator yielding tuples `(inputs, targets)`
or `(inputs, targets, sample_weights)`.
"""
raise NotImplementedError
class OrderedEnqueuer(SequenceEnqueuer):
"""Builds a Enqueuer from a Sequence.
Used in `fit_generator`, `evaluate_generator`, `predict_generator`.
# Arguments
sequence: A `keras.utils.data_utils.Sequence` object.
use_multiprocessing: use multiprocessing if True, otherwise threading
shuffle: whether to shuffle the data at the beginning of each epoch
"""
def __init__(self, sequence, use_multiprocessing=False, shuffle=False):
super(OrderedEnqueuer, self).__init__(sequence, use_multiprocessing)
self.shuffle = shuffle
self.end_of_epoch_signal = threading.Event()
def _get_executor_init(self, workers):
"""Get the Pool initializer for multiprocessing.
# Returns
Function, a Function to initialize the pool
"""
return lambda seqs: mp.Pool(workers,
initializer=init_pool,
initargs=(seqs,))
def _wait_queue(self):
"""Wait for the queue to be empty."""
while True:
time.sleep(0.1)
if self.queue.unfinished_tasks == 0 or self.stop_signal.is_set():
return
def _run(self):
"""Submits request to the executor and queue the `Future` objects."""
while True:
sequence = list(range(len(self.sequence)))
self._send_sequence() # Share the initial sequence
if self.shuffle:
random.shuffle(sequence)
with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor:
for i in sequence:
if self.stop_signal.is_set():
return
future = executor.apply_async(get_index, (self.uid, i))
future.idx = i
self.queue.put(future, block=True)
# Done with the current epoch, waiting for the final batches
self._wait_queue()
if self.stop_signal.is_set():
# We're done
return
# Call the internal on epoch end.
self.sequence.on_epoch_end()
# communicate on_epoch_end to the main thread
self.end_of_epoch_signal.set()
def join_end_of_epoch(self):
self.end_of_epoch_signal.wait(timeout=30)
self.end_of_epoch_signal.clear()
def get(self):
"""Creates a generator to extract data from the queue.
Skip the data if it is `None`.
# Yields
The next element in the queue, i.e. a tuple
`(inputs, targets)` or
`(inputs, targets, sample_weights)`.
"""
try:
while self.is_running():
try:
future = self.queue.get(block=True)
inputs = future.get(timeout=30)
except mp.TimeoutError:
idx = future.idx
warnings.warn(
'The input {} could not be retrieved.'
' It could be because a worker has died.'.format(idx),
UserWarning)
inputs = self.sequence[idx]
finally:
self.queue.task_done()
if inputs is not None:
yield inputs
except Exception:
self.stop()
six.reraise(*sys.exc_info())
def init_pool_generator(gens, random_seed=None):
global _SHARED_SEQUENCES
_SHARED_SEQUENCES = gens
if random_seed is not None:
ident = mp.current_process().ident
np.random.seed(random_seed + ident)
def next_sample(uid):
"""Get the next value from the generator `uid`.
To allow multiple generators to be used at the same time, we use `uid` to
get a specific one. A single generator would cause the validation to
overwrite the training generator.
# Arguments
uid: int, generator identifier
# Returns
The next value of generator `uid`.
"""
return six.next(_SHARED_SEQUENCES[uid])
class GeneratorEnqueuer(SequenceEnqueuer):
"""Builds a queue out of a data generator.
The provided generator can be finite in which case the class will throw
a `StopIteration` exception.
Used in `fit_generator`, `evaluate_generator`, `predict_generator`.
# Arguments
sequence: a sequence function which yields data
use_multiprocessing: use multiprocessing if True, otherwise threading
wait_time: time to sleep in-between calls to `put()`
random_seed: Initial seed for workers,
will be incremented by one for each worker.
"""
def __init__(self, sequence, use_multiprocessing=False, wait_time=None,
random_seed=None):
super(GeneratorEnqueuer, self).__init__(sequence, use_multiprocessing)
self.random_seed = random_seed
if wait_time is not None:
warnings.warn('`wait_time` is not used anymore.',
DeprecationWarning)
def _get_executor_init(self, workers):
"""Get the Pool initializer for multiprocessing.
# Returns
Function, a Function to initialize the pool
"""
return lambda seqs: mp.Pool(workers,
initializer=init_pool_generator,
initargs=(seqs, self.random_seed))
def _run(self):
"""Submits request to the executor and queue the `Future` objects."""
self._send_sequence() # Share the initial generator
with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor:
while True:
if self.stop_signal.is_set():
return
self.queue.put(
executor.apply_async(next_sample, (self.uid,)), block=True)
def get(self):
"""Creates a generator to extract data from the queue.
Skip the data if it is `None`.
# Yields
The next element in the queue, i.e. a tuple
`(inputs, targets)` or
`(inputs, targets, sample_weights)`.
"""
try:
while self.is_running():
try:
future = self.queue.get(block=True)
inputs = future.get(timeout=30)
self.queue.task_done()
except mp.TimeoutError:
warnings.warn(
'An input could not be retrieved.'
' It could be because a worker has died.'
'We do not have any information on the lost sample.',
UserWarning)
continue
if inputs is not None:
yield inputs
except StopIteration:
# Special case for finite generators
last_ones = []
while self.queue.qsize() > 0:
last_ones.append(self.queue.get(block=True))
# Wait for them to complete
list(map(lambda f: f.wait(), last_ones))
# Keep the good ones
last_ones = [future.get() for future in last_ones if future.successful()]
for inputs in last_ones:
if inputs is not None:
yield inputs
except Exception as e:
self.stop()
if 'generator already executing' in str(e):
raise RuntimeError(
"Your generator is NOT thread-safe."
"Keras requires a thread-safe generator when"
"`use_multiprocessing=False, workers > 1`."
"For more information see issue #1638.")
six.reraise(*sys.exc_info())
That's where it comes from.
So progress bar in get_file is rendered by ..utils.generic_utils.Progbar, which is keras itself.

Related

How to properly call a multithreaded method inside a class in Python?

I've got a method to download a bunch of files, and then do things with them.
The multithreaded download methods worked when not in a class, but when I put them inside the class, they cease processing immediately after initiating the first file in the list. There are no errors thrown; the URL call is good, etc. So I am probably missing something related to OOP in python.
from multiprocessing import cpu_count
from multiprocessing.pool import ThreadPool
import os
from requests import Session
import time
class OSM
def __init__(self):
self.url_root = "https://my.site/index.html"
self.s = self._mount_session()
self.data = None # zip object of download links and associated local paths
self.download_path = "C:/Temp"
def _download_parallel(self, args):
results = ThreadPool(cpu_count() - 1).imap_unordered(self._download_url, args)
for result in results:
print(f"URL: {result[0]} | Time (s): {result[1]}")
def _download_url(self, args):
t0 = time.time()
url, fn = args[0], args[1]
try:
r = self.s.get(url)
with open(fn, 'wb') as f:
f.write(r.content)
return (url, time.time() - t0)
except Exception as e:
print(f"Exception in _download_url(): {e}")
pass
def _mount_session(self):
return Session() # placefiller, the session is negotiated in here
def download(self):
# expose this to the user
if not os.path.exists(self.download_path): os.makedirs(self.download_path)
return self._download_parallel(self.data)
def do_stuff_with_files(self):
# process files, etc
return
def get_file_list(self):
dl_links = []
local_files = []
# check the website, get list of links, create list of local files, zip together
self.data = zip(dl_links, local_files)
if __name__ == "__main__":
o = OSM()
o.get_file_list()
o.download()

how to retrieve data from file on a diferent folder

I have a python script to scan internet with the following structure that I'm trying to adapt to read alist of IPs instead a range of IPs
/ip-ranges
range.txt
/script
loader.py
scanner.py
the scanner.py is the following
# Import modules
from .inspection import Request, InspectPaths, InspectContent, PortIsOpen, GetTitle
from ipaddress import ip_address
from threading import Thread
# Scan result
class __Result:
def __init__(self, name, atype, path, title):
self.name = name
self.type = atype
self.path = path
self.title = title
# Return IPs in IPv4 range, inclusive.
import ipaddress
def process(iptext):
try:
print(ipaddress.ip_interface(iptext).network)
return
except Exception:
print("INVALID")
return
with open('ipaddresses.txt', 'r') as f:
for line in f:
line = "".join(line.split())
process(line)
# Scan IP address range
def ScanRange(ranges):
threads = []
# *-- Scan IP range --*
for address in IPsRange(ranges):
t = Thread(
target=__СheckAddrThreaded,
args=(address,)
)
threads.append(t)
t.start()
for thread in threads:
thread.join()
The loader.py is the following
# Import modules
from os import listdir
from sys import exit
# Select IP ranges from directory
def SelectIPRanges():
path = "ip-ranges/" # Directory path.
files = listdir(path) # Get directory files.
for i, f in enumerate(files): # Enumerate and
print(f" [{i+1}] - {f}") # print files.
# *-- Get user input --*
try:
file = path + files[int(input("\n [?] Please select country to scan --> ")) - 1]
except ValueError:
exit(f" [!] ERROR: Please enter a numerical value!")
except IndexError:
exit(f" [!] ERROR: Please enter value from 1 to {len(files)}!")
else:
# *-- Read file --*
with open(file, "r") as ranges_file: # Open file in reading mode.
ranges = ranges_file.readlines() # Read all lines.
return ranges, file.split("/")[-1]
The changes that I have made on the scanner.py was on this part
# Return IPs in IPv4 range, inclusive.
import ipaddress
def process(iptext):
try:
print(ipaddress.ip_interface(iptext).network)
return
except Exception:
print("INVALID")
return
with open('ipaddresses.txt', 'r') as f:
for line in f:
line = "".join(line.split())
process(line)
the original scanner.py is
# Import modules
from .inspection import Request, InspectPaths, InspectContent, PortIsOpen, GetTitle
from ipaddress import ip_address
from threading import Thread
# Scan result
class __Result:
def __init__(self, name, atype, path, title):
self.name = name
self.type = atype
self.path = path
self.title = title
# Return IPs in IPv4 range, inclusive.
def IPsRange(start='', end=''):
if not start and not end:
return []
if not end and start.__contains__("-"):
start, end = start.split("-")
end = end.replace("\n","")
start = int(ip_address(start).packed.hex(), 16)
end = int(ip_address(end).packed.hex(), 16)
return [ip_address(ip).exploded for ip in range(start, end)]
# Scan IP address range
def ScanRange(ranges):
threads = []
# *-- Scan IP range --*
for address in IPsRange(ranges):
t = Thread(
target=__СheckAddrThreaded,
args=(address,)
)
threads.append(t)
t.start()
for thread in threads:
thread.join()
At this moment I'm getting some hard time to link the loader.py and scanner.py. There are some changes that I know I need to do but can't figure out exactly what. I would requeste some guidance here
Thanks you all
So you load all possible ips with loader, and process them with scanner correct?
Right. I see. You suggest to import loader.py in scanner.py, but in this case I'd rather make a class that holds related functionality:
# Import modules
from .inspection import Request, InspectPaths, InspectContent, PortIsOpen, GetTitle
from ipaddress import ip_address
from threading import Thread
import ipaddress
from os import listdir
from sys import exit
# Scan result
class IP_Scanner():
def __init__(self, name='', atype='', path='', title=''):
self.name = name
self.type = atype
self.path = path
self.title = title
self.processed_range = []
# Return IPs in IPv4 range, inclusive.
def process(self, ip):
try:
line = "".join(ip.split())
if ipaddress.ip_interface(line).network:
self.processed_range.append(line)
except Exception:
print("INVALID: "+line)
return 0
def set_range(self):
# Select IP ranges from directory
path = "ip-ranges/" # Directory path.
files = listdir(path) # Get directory files.
for i, f in enumerate(files): # Enumerate and
print(f" [{i + 1}] - {f}") # print files.
# *-- Get user input --*
try:
file = path + files[int(input("\n [?] Please select country to scan --> ")) - 1]
except ValueError:
exit(f" [!] ERROR: Please enter a numerical value!")
except IndexError:
exit(f" [!] ERROR: Please enter value from 1 to {len(files)}!")
else:
# *-- Read file --*
with open(file, "r") as ranges_file: # Open file in reading mode.
self.range = ranges_file.readlines() # Read all lines.
return self.range, file.split("/")[-1]
# Scan IP address range
def scan_range(self):
if self.range:
threads = []
# *-- Scan IP range --*
for address in self.range:
t = Thread(
target=process, #__СheckAddrThreaded, #I'm assuming this is what your process is doing
args=(address,)
)
threads.append(t)
t.start()
for thread in threads:
thread.join()
if __name__ == '__main__':
x = IP_Scanner()
x.set_range()
x.scan_range()
print('Success on: '+x.processed_range)
Many other ways to do this, but this encapsulates the related info into an object so you can run many instances of it in parallel.
For imports from a different folder with init.py (or package), see: https://realpython.com/absolute-vs-relative-python-imports/

How to pass live audio url to Google Speech to Text API

I have a url to live audio recording that I'm trying to transcribe using Google Speech to Text API. I am using an example code from the Cloud Speech to Text API. However, the problem is that when I pass the live url I do not receive any output. Below is the relevant portion of my code. Any help would be greatly appreciated!
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
import io
import os
import time
import requests
import numpy as np
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
from urllib.request import urlopen
from datetime import datetime
from datetime import timedelta
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]= "app_creds.json"
def get_stream():
stream = urlopen('streamurl')
duration = 60
begin = datetime.now()
duration = timedelta(seconds=duration)
while datetime.now() - begin < duration:
data = stream.read(8000)
return data
def transcribe_streaming():
"""Streams transcription of the given audio file."""
client = speech.SpeechClient()
content = get_stream()
# In practice, stream should be a generator yielding chunks of audio data.
stream = [content]
requests = (types.StreamingRecognizeRequest(audio_content=chunk)
for chunk in stream)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US')
streaming_config = types.StreamingRecognitionConfig(config=config)
# streaming_recognize returns a generator.
responses = client.streaming_recognize(streaming_config, requests)
for response in responses:
# Once the transcription has settled, the first result will contain the
# is_final result. The other results will be for subsequent portions of
# the audio.
for result in response.results:
print('Finished: {}'.format(result.is_final))
print('Stability: {}'.format(result.stability))
alternatives = result.alternatives
# The alternatives are ordered from most likely to least.
for alternative in alternatives:
print('Confidence: {}'.format(alternative.confidence))
print(u'Transcript: {}'.format(alternative.transcript))
When sending audio to the Google Speech service, make sure that the service object setup matches the audio encoding. In your particular case
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US')
corresponds to single channel, 16KHz, linear 16 bit PCM encoding. See the list of other supported encodings if you need to transcribe audio in different formats.
A part of my code I used a while back, I don't know if that may help:
def live_recognize_loop(self):
client = self.client
def is_running():
return self.recording
while self.recording:
with MicrophoneStream(RATE, CHUNK) as stream:
audio_generator = stream.generator(is_running)
requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator)
responses = client.streaming_recognize(client.custom_streaming_config, requests)
responses_iterator = iter(responses)
while self.recording:
try:
response = next(responses_iterator)
except StopIteration:
break
except OutOfRange:
# Exception 400 - Exceeded maximum allowed stream duration of 65 seconds.
self.user_display(self.intermediateFrame.GetMessageText())
break # Start over
except ServiceUnavailable as e:
# Exception 503 - Getting metadata from plugin failed
self.log("{0} - NOT RECOGNIZED - {1}\n".format(self.getDate(), e))
break
except ResourceExhausted as e:
break
except GoogleAPICallError as e:
break
if response.results:
result = response.results[0]
if result.alternatives:
transcript = result.alternatives[0].transcript
self.intermediateFrame.SetMessageText(transcript)
if not result.is_final:
self.intermediateFrame.Display()
# print(transcript)
else:
self.user_display(transcript)
self.intermediateFrame.Display(False)
self.intermediateFrame.SetMessageText("")
#print("\t\t FINAL: %s" % transcript)
break # Start over
MicrophoneStream class
from __future__ import division
import pyaudio
from six.moves import queue
class MicrophoneStream(object):
"""Opens a recording stream as a generator yielding the audio chunks."""
def __init__(self, rate, chunk):
self._rate = rate
self._chunk = chunk
# Create a thread-safe buffer of audio data
self._buff = queue.Queue()
self.closed = True
def __enter__(self):
self._audio_interface = pyaudio.PyAudio()
self._audio_stream = self._audio_interface.open(
format=pyaudio.paInt16,
# The API currently only supports 1-channel (mono) audio
channels=1, rate=self._rate,
input=True, frames_per_buffer=self._chunk,
# Run the audio stream asynchronously to fill the buffer object.
# This is necessary so that the input device's buffer doesn't
# overflow while the calling thread makes network requests, etc.
stream_callback=self._fill_buffer,
)
self.closed = False
return self
def __exit__(self, type, value, traceback):
self._audio_stream.stop_stream()
self._audio_stream.close()
self.closed = True
# Signal the generator to terminate so that the client's
# streaming_recognize method will not block the process termination.
self._buff.put(None)
self._audio_interface.terminate()
def _fill_buffer(self, in_data, frame_count, time_info, status_flags):
"""Continuously collect data from the audio stream, into the buffer."""
self._buff.put(in_data)
return None, pyaudio.paContinue
def generator(self, is_running=None):
while not self.closed:
# Use a blocking get() to ensure there's at least one chunk of
# data, and stop iteration if the chunk is None, indicating the
# end of the audio stream.
chunk = self._buff.get()
if callable(is_running) and not is_running():
return
if chunk is None:
return
data = [chunk]
# Now consume whatever other data's still buffered.
while True:
try:
chunk = self._buff.get(block=False)
if chunk is None:
return
data.append(chunk)
except queue.Empty:
break
yield b''.join(data)
Try using:
import urllib
urllib.urlretrieve ("http://www.example.com/songs/mp3.mp3", "mp3.mp3")
(for Python 3+ use import urllib.request and urllib.request.urlretrieve)

Track download progress of S3 file using boto3 and callbacks

I am trying to download a text file from S3 using boto3.
Here is what I have written.
class ProgressPercentage(object):
def __init__(self, filename):
self._filename = filename
self._size = float(os.path.getsize(filename))
self._seen_so_far = 0
self._lock = threading.Lock()
def __call__(self, bytes_amount):
# To simplify we'll assume this is hooked up
# to a single filename.
with self._lock:
self._seen_so_far += bytes_amount
percentage = round((self._seen_so_far / self._size) * 100,2)
LoggingFile('{} is the file name. {} out of {} done. The percentage completed is {} %'.format(str(self._filename), str(self._seen_so_far), str(self._size),str(percentage)))
sys.stdout.flush()
and I am calling it using
transfer.download_file(BUCKET_NAME,FILE_NAME,'{}{}'.format(LOCAL_PATH_TEMP , FILE_NAME),callback = ProgressPercentage(LOCAL_PATH_TEMP + FILE_NAME))
this is giving me a error that file is not present in the folder. Apparently when I already have a file with this name in the same folder it works but when I am downloading a fresh file , it errors out.
What is correction I need to make?
This is my implementation. No other dependencies, hack up the progress callback function to display whatever you want.
import sys
import boto3
s3_client = boto3.client('s3')
def download(local_file_name, s3_bucket, s3_object_key):
meta_data = s3_client.head_object(Bucket=s3_bucket, Key=s3_object_key)
total_length = int(meta_data.get('ContentLength', 0))
downloaded = 0
def progress(chunk):
nonlocal downloaded
downloaded += chunk
done = int(50 * downloaded / total_length)
sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50-done)) )
sys.stdout.flush()
print(f'Downloading {s3_object_key}')
with open(local_file_name, 'wb') as f:
s3_client.download_fileobj(s3_bucket, s3_object_key, f, Callback=progress)
e.g.
local_file_name = 'test.csv'
s3_bucket = 'my-bucket'
s3_object_key = 'industry/test.csv'
download(local_file_name, s3_bucket, s3_object_key)
Demo:
Tested with boto3>=1.14.19, python>=3.7
callback = ProgressPercentage(LOCAL_PATH_TEMP + FILE_NAME)) creates a ProgressPercentage object, runs its __init__ method, and passes the object as callback to the download_file method. This means the __init__ method is run before download_file begins.
In the __init__ method you are attempting to read the size of the local file being downloaded to, which throws an exception as the file does not exist since the download has yet to start. If you've already downloaded the file, then there's no problem since a local copy exists and its size can be read.
Of course, this is merely the cause of the exception you're seeing. You're using the _size property as the maximum value of download progress. However you're attempting to use the size of the local file. Until the file is completely downloaded, the local file system does not know how large the file is, it only knows how much space it takes up right now. This means as you download the file will gradually get bigger until it reaches its full size. As such, it doesn't really make sense to consider the size of the local file as the maximum size of the download. It may work in the case where you've already downloaded the file, but that isn't very useful.
The solution to your problem would be to check the size of the file you're going to download, instead of the size of the local copy. This ensures you're getting the actual size of whatever it is you're downloading, and that the file exists (as you couldn't be downloading it if it didn't). You can do this by getting the size of the remote file with head_object as follows
class ProgressPercentage(object):
def __init__(self, client, bucket, filename):
# ... everything else the same
self._size = client.head_object(Bucket=bucket, Key=filename).ContentLength
# ...
# If you still have the client object you could pass that directly
# instead of transfer._manager._client
progress = ProgressPercentage(transfer._manager._client, BUCKET_NAME, FILE_NAME)
transfer.download_file(..., callback=progress)
As a final note, although you got the code from the Boto3 documentation, it didn't work because it was intended for file uploads. In that case the local file is the source and its existence guaranteed.
Install progressbar with pip3 install progressbar
import boto3, os
import progressbar
bucket_name = "<your-s3-bucket-name>"
folder_name = "<your-directory-name-locally>"
file_name = "<your-filename-locally>"
path = folder_name + "/" + file_name
s3 = boto3.client('s3', aws_access_key_id="<your_aws_access_key_id>", aws_secret_access_key="<your_aws_secret_access_key>")
statinfo = os.stat(file_name)
up_progress = progressbar.progressbar.ProgressBar(maxval=statinfo.st_size)
up_progress.start()
def upload_progress(chunk):
up_progress.update(up_progress.currval + chunk)
s3.upload_file(file_name, bucket_name, path, Callback=upload_progress)
up_progress.finish()
Here's another simple custom implementation using tqdm:
from tqdm import tqdm
import boto3
def s3_download(s3_bucket, s3_object_key, local_file_name, s3_client=boto3.client('s3')):
meta_data = s3_client.head_object(Bucket=s3_bucket, Key=s3_object_key)
total_length = int(meta_data.get('ContentLength', 0))
with tqdm(total=total_length, desc=f'source: s3://{s3_bucket}/{s3_object_key}', bar_format="{percentage:.1f}%|{bar:25} | {rate_fmt} | {desc}", unit='B', unit_scale=True, unit_divisor=1024) as pbar:
with open(local_file_name, 'wb') as f:
s3_client.download_fileobj(s3_bucket, s3_object_key, f, Callback=pbar.update)
usage:
s3_download(bucket, key, local_file_name)
output:
100.0%|█████████████████████████ | 12.9MB/s | source: s3://my-bucket/my-key
Following the official document, it is not quite difficult to apply progress tracking (download_file and upload_file functions are similar).
Here is the full code with some modifications to see the data size in preferred manner.
import logging
import boto3
from botocore.exceptions import ClientError
import os
import sys
import threading
import math
ACCESS_KEY = 'xxx'
SECRET_KEY = 'xxx'
REGION_NAME= 'ap-southeast-1'
class ProgressPercentage(object):
def __init__(self, filename, filesize):
self._filename = filename
self._size = filesize
self._seen_so_far = 0
self._lock = threading.Lock()
def __call__(self, bytes_amount):
def convertSize(size):
if (size == 0):
return '0B'
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size,1024)))
p = math.pow(1024,i)
s = round(size/p,2)
return '%.2f %s' % (s,size_name[i])
# To simplify, assume this is hooked up to a single filename
with self._lock:
self._seen_so_far += bytes_amount
percentage = (self._seen_so_far / self._size) * 100
sys.stdout.write(
"\r%s %s / %s (%.2f%%) " % (
self._filename, convertSize(self._seen_so_far), convertSize(self._size),
percentage))
sys.stdout.flush()
def download_file(file_name, object_name, bucket_name):
# If S3 object_name was not specified, use file_name
if object_name is None:
object_name = file_name
# Initialize s3 client
s3_client = boto3.client(service_name="s3",
aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY,
region_name=REGION_NAME)
try:
response = s3_client.download_file(
Bucket=bucket_name,
Key=object_name,
Filename=file_name,
Callback=ProgressPercentage(file_name, (s3_client.head_object(Bucket=bucket_name, Key=object_name))["ContentLength"])
)
except ClientError as e:
logging.error(e)
return False
return True
file_name = "./output.csv.gz"
bucket_name = "mybucket"
object_name = "result/output.csv.gz"
download_file(file_name, object_name, bucket_name )
The object client.head_object(Bucket=bucket, Key=filename) is a dict. The file size can be accessed using ['ContentLength'].
Hence the code:
self._size = client.head_object(Bucket=bucket, Key=filename).ContentLength
should become:
self._size = float(client.head_object(Bucket=bucket, Key=filename)['ContentLength'])
Then it works. Thanks!
Someone may stumble upon this answer when trying to do this (As per the question title). The easiest way I know to show s3 upload progress:
import a progress bar library into your project. This is what I used: https://github.com/anler/progressbar
Then:
import progressbar
from hurry.filesize import size
import boto3
bucket = "my-bucket-name"
s3_client = boto3.resource('s3')
...
...
# you get the filesize from wherever you have the file on. your system maybe?
filesize = size(file)
up_progress = progressbar.AnimatedProgressBar(end=filesize, width=50)
def upload_progress(chunk):
up_progress + chunk # Notice! No len()
up_progress.show_progress()
s3_client.meta.client.upload_file(file, bucket, s3_file_name, Callback=upload_progress)
The important thing to notice here is the use of the Callback parameter(capital C). It basically returns the number of bytes uploaded to s3. So if you know the original filesize, some simple math gets you a progress bar. You can then use any progress bar library.
Info
Credits to #Kshitij Marwah, #yummies and nicolas.f.g posts
Using boto3 1.9.96 (dl via pip)
Removed threading
Changed display format (rewrite line above until dl completed)
Posting because difference b/w online doc and downloaded package
code
class ProgressPercentage(object):
def __init__(self, o_s3bucket, key_name):
self._key_name = key_name
boto_client = o_s3bucket.meta.client
# ContentLength is an int
self._size = boto_client.head_object(Bucket=o_s3bucket.name, Key=key_name)['ContentLength']
self._seen_so_far = 0
sys.stdout.write('\n')
def __call__(self, bytes_amount):
self._seen_so_far += bytes_amount
percentage = (float(self._seen_so_far) / float(self._size)) * 100
TERM_UP_ONE_LINE = '\033[A'
TERM_CLEAR_LINE = '\033[2K'
sys.stdout.write('\r' + TERM_UP_ONE_LINE + TERM_CLEAR_LINE)
sys.stdout.write('{} {}/{} ({}%)\n'.format(self._key_name, str(self._seen_so_far), str(self._size), str(percentage)))
sys.stdout.flush()
Then called it like that
Note the capital C on Callback (that differs from online doc)
progress = ProgressPercentage(o_s3bucket, key_name)
o_s3bucket.download_file(key_name, full_local_path, Callback=progress)
where o_s3bucket is :
bucket_name = 'my_bucket_name'
aws_profile = 'default' # this is used to catch creds from .aws/credentials ini file
boto_session = boto3.session.Session(profile_name=aws_profile)
o_s3bucket = boto_session.resource('s3').Bucket(bucket_name)
hth
Here is an option I've found useful for with the use of click (just run pip install click before applying code below) library:
import click
import boto3
import os
file_path = os.path.join('tmp', 'file_path')
s3_client = boto3.resource('s3')
with click.progressbar(length=os.path.getsize(file_path)) as progress_bar:
with open(file_path, mode='rb') as upload_file:
s3_client.upload_fileobj(
upload_file,
'bucket_name',
'foo_bar',
Callback=progress_bar.update
)
Here is code
try:
import logging
import boto3
from botocore.exceptions import ClientError
import os
import sys
import threading
import math
import re
from boto3.s3.transfer import TransferConfig
except Exception as e:
pass
ACCESS_KEY = 'XXXXXXXXXXXXXXXXX'
SECRET_KEY = 'XXXXXXXXXXXXXXXX'
REGION_NAME= 'us-east-1'
BucketName = "XXXXXXXXXXXXXXXX"
KEY = "XXXXXXXXXXXXXXXX"
class Size:
#staticmethod
def convert_size(size_bytes):
if size_bytes == 0:
return "0B"
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size_bytes, 1024)))
p = math.pow(1024, i)
s = round(size_bytes / p, 2)
return "%s %s" % (s, size_name[i])
class ProgressPercentage(object):
def __init__(self, filename, filesize):
self._filename = filename
self._size = filesize
self._seen_so_far = 0
self._lock = threading.Lock()
def __call__(self, bytes_amount):
def convertSize(size):
if (size == 0):
return '0B'
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size,1024)))
p = math.pow(1024,i)
s = round(size/p,2)
return '%.2f %s' % (s,size_name[i])
# To simplify, assume this is hooked up to a single filename
with self._lock:
self._seen_so_far += bytes_amount
percentage = (self._seen_so_far / self._size) * 100
sys.stdout.write(
"\r%s %s / %s (%.2f%%) " % (
self._filename, convertSize(self._seen_so_far), convertSize(self._size),
percentage))
sys.stdout.flush()
class AWSS3(object):
"""Helper class to which add functionality on top of boto3 """
def __init__(self, bucket, aws_access_key_id, aws_secret_access_key, region_name):
self.BucketName = bucket
self.client = boto3.client(
"s3",
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
region_name=region_name,
)
def get_size_of_files(self, Key):
response = self.client.head_object(Bucket=self.BucketName, Key=Key)
size = response["ContentLength"]
return {"bytes": size, "size": Size.convert_size(size)}
def put_files(self, Response=None, Key=None):
"""
Put the File on S3
:return: Bool
"""
try:
response = self.client.put_object(
ACL="private", Body=Response, Bucket=self.BucketName, Key=Key
)
return "ok"
except Exception as e:
print("Error : {} ".format(e))
return "error"
def item_exists(self, Key):
"""Given key check if the items exists on AWS S3 """
try:
response_new = self.client.get_object(Bucket=self.BucketName, Key=str(Key))
return True
except Exception as e:
return False
def get_item(self, Key):
"""Gets the Bytes Data from AWS S3 """
try:
response_new = self.client.get_object(Bucket=self.BucketName, Key=str(Key))
return response_new["Body"].read()
except Exception as e:
print("Error :{}".format(e))
return False
def find_one_update(self, data=None, key=None):
"""
This checks if Key is on S3 if it is return the data from s3
else store on s3 and return it
"""
flag = self.item_exists(Key=key)
if flag:
data = self.get_item(Key=key)
return data
else:
self.put_files(Key=key, Response=data)
return data
def delete_object(self, Key):
response = self.client.delete_object(Bucket=self.BucketName, Key=Key,)
return response
def get_all_keys(self, Prefix=""):
"""
:param Prefix: Prefix string
:return: Keys List
"""
try:
paginator = self.client.get_paginator("list_objects_v2")
pages = paginator.paginate(Bucket=self.BucketName, Prefix=Prefix)
tmp = []
for page in pages:
for obj in page["Contents"]:
tmp.append(obj["Key"])
return tmp
except Exception as e:
return []
def print_tree(self):
keys = self.get_all_keys()
for key in keys:
print(key)
return None
def find_one_similar_key(self, searchTerm=""):
keys = self.get_all_keys()
return [key for key in keys if re.search(searchTerm, key)]
def __repr__(self):
return "AWS S3 Helper class "
def download_file(self,file_name, object_name):
try:
response = self.client.download_file(
Bucket=self.BucketName,
Key=object_name,
Filename=file_name,
Config=TransferConfig(
max_concurrency=10,
use_threads=True
),
Callback=ProgressPercentage(file_name,
(self.client.head_object(Bucket=self.BucketName,
Key=object_name))["ContentLength"])
)
except ClientError as e:
logging.error(e)
return False
return True
helper = AWSS3(aws_access_key_id=ACCESS_KEY, aws_secret_access_key=SECRET_KEY, bucket=BucketName, region_name='us-east-1')
helper.download_file(file_name='test.zip', object_name=KEY)

Python hash function as Twisted xmlrpc class issueing same has for every file?

I'm new to most of this so forgive me if I'm doing something really dumb. The following is a simple Twisted xmlrpc server which is supposed to return file info. It works fine except that the xmlrpc_hash function gives the same result for every file. Example below code. Any help would be great!
from twisted.web import xmlrpc, server
import os
class rfi(xmlrpc.XMLRPC):
"""
rfi - Remote File Info server
"""
def xmlrpc_echo(self, x):
"""
Return all passed args as a test
"""
return x
def xmlrpc_location(self):
"""
Return current directory name
"""
return os.getcwd()
def xmlrpc_ls(self, path):
"""
Run ls on the path
"""
result = []
listing = os.listdir(path)
for l in listing:
result.append(l)
return result
def xmlrpc_stat(self, path):
"""
Stat the path
"""
result = str(os.stat(path))
return result
def xmlrpc_hash(self, path):
"""
Hash the path
"""
from hashlib import sha1
if os.path.isfile(path):
f = open(path,'rb')
h = sha1()
block_size = 2**20
f.close()
return h.hexdigest()
else:
return 'Not a file'
if __name__ == '__main__':
from twisted.internet import reactor
r = rfi()
reactor.listenTCP(7081, server.Site(r))
reactor.run()
Example output:
import xmlrpclib
s = xmlrpclib.Server('http://localhost:7081/')
s.hash('file_1.txt')
'da39a3ee5e6b4b0d3255bfef95601890afd80709'
s.hash('file_2.txt')
'da39a3ee5e6b4b0d3255bfef95601890afd80709'
This is because you're never actually updating the hash object:
from hashlib import sha1
if os.path.isfile(path):
f = open(path,'rb')
h = sha1()
h.update(f.read()) # You're missing this line
f.close()
return h.hexdigest()
else:
return 'Not a file'

Categories