I have created a custom encoder/decoder like so:
import tensorflow as tf
from tensorflow_model_optimization.python.core.internal import tensor_encoding as te
# noinspection PyUnresolvedReferences
class SparseTernaryCompressionEncodingStage(te.core.EncodingStageInterface):
AVERAGE = 'average'
NEGATIVES = 'negatives'
POSITIVES = 'positives'
TESTING = 'testing'
NEW_SHAPE = 'new_shape'
ORIGINAL_SHAPE = 'original_shape'
def name(self):
def compressible_tensors_keys(self):
def commutes_with_sum(self):
def decode_needs_input_shape(self):
def get_params(self):
def encode(self, original_tensor, encode_params):
original_shape = tf.shape(original_tensor)
tensor = tf.reshape(original_tensor, [-1])
sparsification_rate = int(len(tensor) / 100 * 1)
new_shape = tensor.get_shape().as_list()
if sparsification_rate == 0:
sparsification_rate = 1
mask = tf.cast(tf.abs(tensor) >= tf.math.top_k(tf.abs(tensor), sparsification_rate)[0][-1], tf.float32)
inv_mask = tf.cast(tf.abs(tensor) < tf.math.top_k(tf.abs(tensor), sparsification_rate)[0][-1], tf.float32)
tensor_masked = tf.multiply(tensor, mask)
average = tf.reduce_sum(tf.abs(tensor_masked)) / sparsification_rate
compressed_tensor = tf.add(tf.multiply(average, mask) * tf.sign(tensor), tf.multiply(tensor_masked, inv_mask))
negatives = tf.where(compressed_tensor < 0)
positives = tf.where(compressed_tensor > 0)
encoded_x = {self.AVERAGE: average, self.NEGATIVES: negatives, self.POSITIVES: positives,
self.NEW_SHAPE: new_shape, self.ORIGINAL_SHAPE: original_shape}
return encoded_x
def decode(self, encoded_tensors, decode_params, num_summands=None, shape=None):
decompressed_tensor = tf.zeros(self.NEW_SHAPE, tf.float32)
average_values_negative = tf.fill([len(self.NEGATIVES), ], -self.AVERAGE)
average_values_positive = tf.fill([len(self.POSITIVES), ], self.AVERAGE)
decompressed_tensor = tf.tensor_scatter_nd_update(decompressed_tensor, self.NEGATIVES, average_values_negative)
decompressed_tensor = tf.tensor_scatter_nd_update(decompressed_tensor, self.POSITIVES, average_values_positive)
decompressed_tensor = tf.reshape(decompressed_tensor, self.ORIGINAL_SHAPE)
return decompressed_tensor
Now, i would like to use the encode function to encode all the weights that the client send to the server and, on the server, use the decode function to be able to obtain all the weights back. Basically, instead of sending all the weights from the client to the server, i want to send only some necessaries information that will let me able to create the weights back from only 5 informations.
The problem is that i don't understand how to tell the client to use this encoder to send the information and to the server to use the decoder before trying to do:
round_model_delta = tff.federated_mean(client_outputs.weights_delta, weight=weight_denom)
I'm using Tensorflow Federated simple_fedavg as basic project.

If you only want to modify the aggregation, you may have easier time using the tff.learning APIs with what you have, parameterizing the aggregation with a tff.aggregators object. For instance:
def encoder_fn(value_spec):
return te.encoders.as_gather_encoder(
..., # Other args.
You may also find these tutorials helpful:


Why tensorflow is required to be non-deterministic in tf.timestamp()?

I am trying to measure the time required for the model forward pass. I've encountered a post mentioning the disadvantage of using python time modules for doing so.
Although the post relies on torch and uses torch.cuda.Event(enable_timing=True) to determine the current time, I've found maybe a similar function with tensorflow tf.timestamp().
However, using this function with os.environ['TF_DETERMINISTIC_OPS'] = '1', leads to the following error:
tensorflow.python.framework.errors_impl.FailedPreconditionError: Timestamp cannot be called when determinism is enabled [Op:Timestamp]
I am interested in knowing the reason tf.timestamp() requires the model to be not deterministic. Any ideas ?
Code Idea:
os.environ['TF_DETERMINISTIC_OPS'] = '1'
def forward_pass(model, x):
y = model(x, training=False)
return y
def inspect_time(model, model_in, runs):
time_start = time.time()
time_start_gpu = tf.timestamp()
for i in range(runs):
pred = forward_pass(model, model_in)
time_avg_gpu = (tf.timestamp() - time_start_gpu)/runs
time_avg_cpu = (time.time()-time_start)/runs
return time_avg_cpu, time_avg_gpu
if __name__ == '__main__':
model = make_model()
with tf.device(logical_gpus[0]):
x_batch, _ = train_dataset.take(1).get_single_element()
x_batch = tnp.copy(x_batch)
assert x_batch.device.endswith("GPU:0")
time_cpu, time_gpu = inspect_time(model, x_batch, 100)

tf_agents and reverb produce incompatible tensor

I'm trying to implement a DDPG using tf_agents and reverb but I can't figure out how both libraries to work together. For this, I'm trying to use the code from the DQL-Tutorial from tf_agents with my own agent and gym environment. The error occurs when I try to retrieve data from reverb and the tensor shape doesn't match. I've created the smallest possible example I could think of, to show the problem:
import gym
from gym import spaces
from gym.utils.env_checker import check_env
from gym.envs.registration import register
import tensorflow as tf
import numpy as np
import reverb
from tf_agents.agents import DdpgAgent
from tf_agents.drivers.py_driver import PyDriver
from tf_agents.environments import TFPyEnvironment, suite_gym, validate_py_environment
from tf_agents.networks import Sequential
from tf_agents.policies import PyTFEagerPolicy
from tf_agents.replay_buffers import ReverbReplayBuffer, ReverbAddTrajectoryObserver
from tf_agents.specs import tensor_spec, BoundedArraySpec
Example Gym environment
class TestGym(gym.Env):
metadata = {"render_modes": ["human"]}
def __init__(self):
self.observation_space = spaces.Box(low=-1, high=1, shape=(30,), dtype=np.float32)
self.action_space = spaces.Box(low=-1, high=1, shape=(2,), dtype=np.float32)
self.__count = 0
def step(self, action):
self.__count += 1
return np.zeros(30, dtype=np.float32), 0, self.__count >= 100, {}
def render(self, mode="human"):
return None
def reset(self, seed=None, return_info=False, options=None):
super().reset(seed=seed, options=options)
self.__count = 0
if return_info:
return np.zeros(30, dtype=np.float32), {}
return np.zeros(30, dtype=np.float32)
Creating a TFAgent and use reverb to store and retrieve
def main():
# make sure the gym environment is ok
# create tf-py-environment
env = TFPyEnvironment(suite_gym.load("TestGym-v0"))
# make sure the py environment is ok
validate_py_environment(env.pyenv, episodes=5)
# example actor network
actor_network = Sequential([
tf.keras.layers.Dense(2, activation=None)
], input_spec=env.observation_spec())
# example critic network
n_actions = env.action_spec().shape[0]
n_observ = env.observation_spec().shape[0]
critic_input_spec: BoundedArraySpec = BoundedArraySpec((n_actions + n_observ,), "float32", minimum=-1, maximum=1)
critic_network = Sequential([
tf.keras.layers.Dense(1, activation=None)
], input_spec=critic_input_spec)
# example rl agent
agent = DdpgAgent(
# create reverb table
table_name = "uniform_table"
replay_buffer_signature = tensor_spec.from_spec(agent.collect_data_spec)
replay_buffer_signature = tensor_spec.add_outer_dim(replay_buffer_signature)
table = reverb.Table(
# create reverb server
reverb_server = reverb.Server([table])
# create replay buffer for this table and server
replay_buffer = ReverbReplayBuffer(
# create observer to store experiences
observer = ReverbAddTrajectoryObserver(
# run a view steps to ill the replay buffer
driver = PyDriver(env.pyenv, PyTFEagerPolicy(agent.collect_policy, use_tf_function=True), [observer], max_steps=100)
# create a dataset to access the replay buffer
dataset = replay_buffer.as_dataset(num_parallel_calls=3, sample_batch_size=20, num_steps=2).prefetch(3)
iterator = iter(dataset)
# retrieve a sample
print(next(iterator)) # <===== ERROR
if __name__ == '__main__':
When I run this code, I get the following error Message:
{{function_node __wrapped__IteratorGetNext_output_types_11_device_/job:localhost/replica:0/task:0/device:CPU:0}}
Received incompatible tensor at flattened index 0 from table 'uniform_table'.
Specification has (dtype, shape): (int32, [?]).
Tensor has (dtype, shape): (int32, [2,1]).
Table signature:
0: Tensor<name: 'step_type/step_type', dtype: int32, shape: [?]>,
1: Tensor<name: 'observation/observation', dtype: float, shape: [?,30]>,
2: Tensor<name: 'action/action', dtype: float, shape: [?,2]>,
3: Tensor<name: 'next_step_type/step_type', dtype: int32, shape: [?]>,
4: Tensor<name: 'reward/reward', dtype: float, shape: [?]>,
5: Tensor<name: 'discount/discount', dtype: float, shape: [?]>
In my gym environment, I defined the action space as a 2-element vector and I'm guessing that this action vector is somehow the problem. I've tried to use tensor specs for every input and output but I guess I made a mistake somewhere. Does anyone have an Idea what I'm doing wrong here?
I finally figured it out:
PyDriver needs a PyEnvironment to work properly. In my code I used the pyenv attribute of my TFPyEnvironment which, despite of its name, doesn't return a regular PyEnvironment but a batched one insted.
Changing the code in the following way fixes this issue:
def main():
# make sure the gym environment is ok
# create py-environment
pyenv = suite_gym.load("TestGym-v0") # <=============
# create tf-py-environment
env = TFPyEnvironment(pyenv)
driver = PyDriver(py_env, PyTFEagerPolicy(agent.collect_policy, use_tf_function=True), [observer], max_steps=100)

Cannot register text_classifier as Model; name already in use for TextClassifier

Trying to use text classifier model shared by
Everything used to work and suddenly I keep getting this error: Cannot register text_classifier as Model; name already in use for TextClassifier
What might be the reason? any suggestion?
from typing import Dict, Optional, List, Any
import torch
import torch.nn.functional as F
from import Vocabulary
from allennlp.models.model import Model
from allennlp.modules import FeedForward, TextFieldEmbedder, Seq2SeqEncoder
from allennlp.nn import InitializerApplicator, RegularizerApplicator
from allennlp.nn import util
from import CategoricalAccuracy, F1Measure
from overrides import overrides
class TextClassifier(Model):
Implements a basic text classifier:
1) Embed tokens using `text_field_embedder`
2) Seq2SeqEncoder, e.g. BiLSTM
3) Append the first and last encoder states
4) Final feedforward layer
Optimized with CrossEntropyLoss. Evaluated with CategoricalAccuracy & F1.
def __init__(self, vocab: Vocabulary,
text_field_embedder: TextFieldEmbedder,
text_encoder: Seq2SeqEncoder,
classifier_feedforward: FeedForward,
verbose_metrics: False,
initializer: InitializerApplicator = InitializerApplicator(),
regularizer: Optional[RegularizerApplicator] = None,
) -> None:
super(TextClassifier, self).__init__(vocab, regularizer)
self.text_field_embedder = text_field_embedder
self.num_classes = self.vocab.get_vocab_size("labels")
self.text_encoder = text_encoder
self.classifier_feedforward = classifier_feedforward
self.prediction_layer = torch.nn.Linear(self.classifier_feedforward.get_output_dim() , self.num_classes)
self.label_accuracy = CategoricalAccuracy()
self.label_f1_metrics = {}
self.verbose_metrics = verbose_metrics
for i in range(self.num_classes):
self.label_f1_metrics[vocab.get_token_from_index(index=i, namespace="labels")] = F1Measure(positive_label=i)
self.loss = torch.nn.CrossEntropyLoss()
self.pool = lambda text, mask: util.get_final_encoder_states(text, mask, bidirectional=True)
def forward(self,
text: Dict[str, torch.LongTensor],
label: torch.IntTensor = None,
metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]:
text : Dict[str, torch.LongTensor]
From a ``TextField``
label : torch.IntTensor, optional (default = None)
From a ``LabelField``
metadata : ``List[Dict[str, Any]]``, optional, (default = None)
Metadata containing the original tokenization of the premise and
hypothesis with 'premise_tokens' and 'hypothesis_tokens' keys respectively.
An output dictionary consisting of:
label_logits : torch.FloatTensor
A tensor of shape ``(batch_size, num_labels)`` representing unnormalised log probabilities of the label.
label_probs : torch.FloatTensor
A tensor of shape ``(batch_size, num_labels)`` representing probabilities of the label.
loss : torch.FloatTensor, optional
A scalar loss to be optimised.
embedded_text = self.text_field_embedder(text)
mask = util.get_text_field_mask(text)
encoded_text = self.text_encoder(embedded_text, mask)
pooled = self.pool(encoded_text, mask)
ff_hidden = self.classifier_feedforward(pooled)
logits = self.prediction_layer(ff_hidden)
class_probs = F.softmax(logits, dim=1)
output_dict = {"logits": logits}
if label is not None:
loss = self.loss(logits, label)
output_dict["loss"] = loss
# compute F1 per label
for i in range(self.num_classes):
metric = self.label_f1_metrics[self.vocab.get_token_from_index(index=i, namespace="labels")]
metric(class_probs, label)
self.label_accuracy(logits, label)
return output_dict
def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
class_probabilities = F.softmax(output_dict['logits'], dim=-1)
output_dict['class_probs'] = class_probabilities
return output_dict
def get_metrics(self, reset: bool = False) -> Dict[str, float]:
metric_dict = {}
sum_f1 = 0.0
for name, metric in self.label_f1_metrics.items():
metric_val = metric.get_metric(reset)
if self.verbose_metrics:
metric_dict[name + '_P'] = metric_val[0]
metric_dict[name + '_R'] = metric_val[1]
metric_dict[name + '_F1'] = metric_val[2]
sum_f1 += metric_val[2]
names = list(self.label_f1_metrics.keys())
total_len = len(names)
average_f1 = sum_f1 / total_len
metric_dict['average_F1'] = average_f1
metric_dict['accuracy'] = self.label_accuracy.get_metric(reset)
return metric_dict
The name is already taken. Something that’s already a part of AllenNLP uses that name already, so you need to pick a different one.
For the curious, AllenNLP creates a registry of models, so that you can select a model at the command line. (That’s what the decorator is doing.) This requires the names to be unique.
The name text_classifier was used by AllenNLP only after the external package you’re using used it. It worked in May 2019, when that file was last updated. But 17 months ago, AllenNLP started using it. So it’s not your fault; it’s a mismatch between those two packages (at least, in their current versions).

TensorRT for upscaling images serves not expected results

I'm fighting with TensorRT (TensorRT 4 for python right now) since several weeks. I passed a lot of problems to get TensorRT running. The example code from NVIDIA works well for me :
TensorRT MNIST example
Now, i created my own network in tensorflow (a very simple one) for upscaling images, let's say (in HWC) 320x240x3 into 640x480x3 .The usual way by creating a frozen-graph and running an inferencer just based on Tensorflow gave me expected results but not by using TensorRT.
I have a strange feeling about that i made something wrong by feeding the images into the GPU-memory (This would be probably an issue about pycuda and/or TensorRT).
The worst case scenario would be that TensorRT destroys my network by the optimization process.
I hope someone has just a little idea for saving my life.
This is my Tensorflow-model (i just wrapped the functions):
net = conv2d(input,
net = deconv2d(net,
This is the important snippet of my inferencer:
import tensorrt as trt
import uff
from tensorrt.parsers import uffparser
import pycuda.driver as cuda
import numpy as np
def _init_infer(self, uff_model):
g_logger = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
parser = uffparser.create_uff_parser()
parser.register_input(self.input_node, (self.channels, self.height, self.width), 0)
self.engine = trt.utils.uff_to_trt_engine(g_logger, uff_model, parser, self.max_batch_size,
self.runtime = trt.infer.create_infer_runtime(g_logger)
self.context = self.engine.create_execution_context()
self.output = np.empty(self.output_size, dtype=self.dtype)
# create CUDA stream = cuda.Stream()
# allocate device memory
self.d_input = cuda.mem_alloc(self.channels * self.max_batch_size * self.width *
self.height * self.output.dtype.itemsize)
self.d_output = cuda.mem_alloc(self.output_size * self.output.dtype.itemsize)
self.bindings = [int(self.d_input), int(self.d_output)]
def infer(self, input_batch, batch_size=1):
# transfer input data to device
cuda.memcpy_htod_async(self.d_input, input_batch,
# execute model
self.context.enqueue(batch_size, self.bindings,, None)
# transfer predictions back
cuda.memcpy_dtoh_async(self.output, self.d_output,
# synchronize threads
return self.output
And the executable snippet:
# create trt inferencer
trt_inferencer = TensorRTInferencer(params=params)
img = [misc.imread('./test_images/lion.png')]
img[0] = normalize(img[0])
img = img[0]
# inferencing method
result = trt_inferencer.infer(img)
result = inormalize(result, dtype=np.uint8)
result = result.reshape(1, params.height * 2, params.width * 2, 3)
And the weird result by comparison :(
upscaled lion TensorRT, Tensorflow, Original
I got it now, finally. The problem was a wrong dimension and order of the input images and output. And for everyone who run into the same problem, this is the adopted executable snippet, dependent on my initialization:
# create trt inferencer
trt_inferencer = TensorRTInferencer(params=params)
img = [misc.imread('./test_images/lion.png')]
img[0] = normalize(img[0])
img = img[0]
img = np.transpose(img, (2, 0, 1))
img = img.ravel()
# inferencing method
result = trt_inferencer.infer(img)
result = inormalize(result, dtype=np.uint8)
result = np.reshape(result, newshape=[3, params.height * 2, params.width * 2])
result = np.transpose(result, (1, 2, 0))

How to classify images using Spark and Caffe

I am using Caffe to do image classification, can I am using MAC OS X, Pyhton.
Right now I know how to classify a list of images using Caffe with Spark python, but if I want to make it faster, I want to use Spark.
Therefore, I tried to apply the image classification on each element of an RDD, the RDD created from a list of image_path. However, Spark does not allow me to do so.
Here is my code:
This is the code for image classification:
# display image name, class number, predicted label
def classify_image(image_path, transformer, net):
image =
transformed_image = transformer.preprocess('data', image)
net.blobs['data'].data[...] = transformed_image
output = net.forward()
output_prob = output['prob'][0]
pred = output_prob.argmax()
labels_file = caffe_root + 'data/ilsvrc12/synset_words.txt'
labels = np.loadtxt(labels_file, str, delimiter='\t')
lb = labels[pred]
image_name = image_path.split(images_folder_path)[1]
result_str = 'image: '+image_name+' prediction: '+str(pred)+' label: '+lb
return result_str
This this the code generates Caffe parameters and apply the classify_image method on each element of the RDD:
def main():
sys.path.insert(0, caffe_root + 'python')
model_def = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
model_weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
net = caffe.Net(model_def,
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)
transformer ={'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', mu)
transformer.set_raw_scale('data', 255)
transformer.set_channel_swap('data', (2,1,0))
227, 227)
image_list= []
for image_path in glob.glob(images_folder_path+'*.jpg'):
images_rdd = sc.parallelize(image_list)
transformer_bc = sc.broadcast(transformer)
net_bc = sc.broadcast(net)
image_predictions = image_path: classify_image(image_path, transformer_bc, net_bc))
print image_predictions
if __name__ == '__main__':
As you can see, here I tried to broadcast the caffe parameters, transformer_bc = sc.broadcast(transformer), net_bc = sc.broadcast(net)
The error is:
RuntimeError: Pickling of "caffe._caffe.Net" instances is not enabled
Before I am doing the broadcast, the error was :
Driver stacktrace.... Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):....
So, do you know, is there any way I can classify images using Caffe and Spark but also take advantage of Spark?
When you work with complex, non-native objects initialization has to moved directly to the workers for example with singleton module:
import cafe
net = None
def build_net(*args, **kwargs):
... # Initialize net here
return net
def get_net(*args, **kwargs):
global net
if net is None:
net = build_net(*args, **kwargs)
return net
import net_builder
def classify_image(image_path, transformer, *args, **kwargs):
net = net_builder.get_net(*args, **kwargs)
It means you'll have to distribute all required files as well. It can be done either manually or using SparkFiles mechanism.
On a side note you should take a look at the SparkNet package.
