I'm trying to implement a DDPG using tf_agents and reverb but I can't figure out how both libraries to work together. For this, I'm trying to use the code from the DQL-Tutorial from tf_agents with my own agent and gym environment. The error occurs when I try to retrieve data from reverb and the tensor shape doesn't match. I've created the smallest possible example I could think of, to show the problem:
Imports
import gym
from gym import spaces
from gym.utils.env_checker import check_env
from gym.envs.registration import register
import tensorflow as tf
import numpy as np
import reverb
from tf_agents.agents import DdpgAgent
from tf_agents.drivers.py_driver import PyDriver
from tf_agents.environments import TFPyEnvironment, suite_gym, validate_py_environment
from tf_agents.networks import Sequential
from tf_agents.policies import PyTFEagerPolicy
from tf_agents.replay_buffers import ReverbReplayBuffer, ReverbAddTrajectoryObserver
from tf_agents.specs import tensor_spec, BoundedArraySpec
Example Gym environment
class TestGym(gym.Env):
metadata = {"render_modes": ["human"]}
def __init__(self):
self.observation_space = spaces.Box(low=-1, high=1, shape=(30,), dtype=np.float32)
self.action_space = spaces.Box(low=-1, high=1, shape=(2,), dtype=np.float32)
self.__count = 0
def step(self, action):
self.__count += 1
return np.zeros(30, dtype=np.float32), 0, self.__count >= 100, {}
def render(self, mode="human"):
return None
def reset(self, seed=None, return_info=False, options=None):
super().reset(seed=seed, options=options)
self.__count = 0
if return_info:
return np.zeros(30, dtype=np.float32), {}
else:
return np.zeros(30, dtype=np.float32)
register(
id="TestGym-v0",
entry_point="reverb_test:TestGym",
nondeterministic=False
)
Creating a TFAgent and use reverb to store and retrieve
def main():
# make sure the gym environment is ok
check_env(gym.make("TestGym-v0"))
# create tf-py-environment
env = TFPyEnvironment(suite_gym.load("TestGym-v0"))
# make sure the py environment is ok
validate_py_environment(env.pyenv, episodes=5)
# example actor network
actor_network = Sequential([
tf.keras.layers.Dense(40),
tf.keras.layers.Dense(2, activation=None)
], input_spec=env.observation_spec())
# example critic network
n_actions = env.action_spec().shape[0]
n_observ = env.observation_spec().shape[0]
critic_input_spec: BoundedArraySpec = BoundedArraySpec((n_actions + n_observ,), "float32", minimum=-1, maximum=1)
critic_network = Sequential([
tf.keras.layers.Dense(40),
tf.keras.layers.Dense(1, activation=None)
], input_spec=critic_input_spec)
# example rl agent
agent = DdpgAgent(
time_step_spec=env.time_step_spec(),
action_spec=env.action_spec(),
actor_network=actor_network,
critic_network=critic_network,
)
# create reverb table
table_name = "uniform_table"
replay_buffer_signature = tensor_spec.from_spec(agent.collect_data_spec)
replay_buffer_signature = tensor_spec.add_outer_dim(replay_buffer_signature)
table = reverb.Table(
table_name,
max_size=100_000,
sampler=reverb.selectors.Uniform(),
remover=reverb.selectors.Fifo(),
rate_limiter=reverb.rate_limiters.MinSize(1),
signature=replay_buffer_signature
)
# create reverb server
reverb_server = reverb.Server([table])
# create replay buffer for this table and server
replay_buffer = ReverbReplayBuffer(
agent.collect_data_spec,
table_name=table_name,
sequence_length=2,
local_server=reverb_server
)
# create observer to store experiences
observer = ReverbAddTrajectoryObserver(
replay_buffer.py_client,
table_name,
sequence_length=2
)
# run a view steps to ill the replay buffer
driver = PyDriver(env.pyenv, PyTFEagerPolicy(agent.collect_policy, use_tf_function=True), [observer], max_steps=100)
driver.run(env.reset())
# create a dataset to access the replay buffer
dataset = replay_buffer.as_dataset(num_parallel_calls=3, sample_batch_size=20, num_steps=2).prefetch(3)
iterator = iter(dataset)
# retrieve a sample
print(next(iterator)) # <===== ERROR
if __name__ == '__main__':
main()
When I run this code, I get the following error Message:
tensorflow.python.framework.errors_impl.InvalidArgumentError:
{{function_node __wrapped__IteratorGetNext_output_types_11_device_/job:localhost/replica:0/task:0/device:CPU:0}}
Received incompatible tensor at flattened index 0 from table 'uniform_table'.
Specification has (dtype, shape): (int32, [?]).
Tensor has (dtype, shape): (int32, [2,1]).
Table signature:
0: Tensor<name: 'step_type/step_type', dtype: int32, shape: [?]>,
1: Tensor<name: 'observation/observation', dtype: float, shape: [?,30]>,
2: Tensor<name: 'action/action', dtype: float, shape: [?,2]>,
3: Tensor<name: 'next_step_type/step_type', dtype: int32, shape: [?]>,
4: Tensor<name: 'reward/reward', dtype: float, shape: [?]>,
5: Tensor<name: 'discount/discount', dtype: float, shape: [?]>
[Op:IteratorGetNext]
In my gym environment, I defined the action space as a 2-element vector and I'm guessing that this action vector is somehow the problem. I've tried to use tensor specs for every input and output but I guess I made a mistake somewhere. Does anyone have an Idea what I'm doing wrong here?
I finally figured it out:
PyDriver needs a PyEnvironment to work properly. In my code I used the pyenv attribute of my TFPyEnvironment which, despite of its name, doesn't return a regular PyEnvironment but a batched one insted.
Changing the code in the following way fixes this issue:
...
def main():
# make sure the gym environment is ok
check_env(gym.make("TestGym-v0"))
# create py-environment
pyenv = suite_gym.load("TestGym-v0") # <=============
# create tf-py-environment
env = TFPyEnvironment(pyenv)
...
driver = PyDriver(py_env, PyTFEagerPolicy(agent.collect_policy, use_tf_function=True), [observer], max_steps=100)
driver.run(py_env.reset())
...
Related
I am trying to set a Deep-Q-Learning agent with a custom environment in OpenAI Gym. I have 4 continuous state variables with individual limits and 3 integer action variables with individual limits.
Here is the code:
#%% import
from gym import Env
from gym.spaces import Discrete, Box, Tuple
import numpy as np
#%%
class Custom_Env(Env):
def __init__(self):
# Define the state space
#State variables
self.state_1 = 0
self.state_2 = 0
self.state_3 = 0
self.state_4_currentTimeSlots = 0
#Define the gym components
self.action_space = Box(low=np.array([0, 0, 0]), high=np.array([10, 20, 27]), dtype=np.int)
self.observation_space = Box(low=np.array([20, -20, 0, 0]), high=np.array([22, 250, 100, 287]),dtype=np.float16)
def step(self, action ):
# Update state variables
self.state_1 = self.state_1 + action [0]
self.state_2 = self.state_2 + action [1]
self.state_3 = self.state_3 + action [2]
#Calculate reward
reward = self.state_1 + self.state_2 + self.state_3
#Set placeholder for info
info = {}
#Check if it's the end of the day
if self.state_4_currentTimeSlots >= 287:
done = True
if self.state_4_currentTimeSlots < 287:
done = False
#Move to the next timeslot
self.state_4_currentTimeSlots +=1
state = np.array([self.state_1,self.state_2, self.state_3, self.state_4_currentTimeSlots ])
#Return step information
return state, reward, done, info
def render (self):
pass
def reset (self):
self.state_1 = 0
self.state_2 = 0
self.state_3 = 0
self.state_4_currentTimeSlots = 0
state = np.array([self.state_1,self.state_2, self.state_3, self.state_4_currentTimeSlots ])
return state
#%% Set up the environment
env = Custom_Env()
#%% Create a deep learning model with keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
def build_model(states, actions):
model = Sequential()
model.add(Dense(24, activation='relu', input_shape=states))
model.add(Dense(24, activation='relu'))
model.add(Dense(actions[0] , activation='linear'))
return model
states = env.observation_space.shape
actions = env.action_space.shape
print("env.observation_space: ", env.observation_space)
print("env.observation_space.shape : ", env.observation_space.shape )
print("action_space: ", env.action_space)
print("action_space.shape : ", env.action_space.shape )
model = build_model(states, actions)
print(model.summary())
#%% Build Agent wit Keras-RL
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
def build_agent (model, actions):
policy = BoltzmannQPolicy()
memory = SequentialMemory(limit = 50000, window_length=1)
dqn = DQNAgent (model = model, memory = memory, policy=policy,
nb_actions=actions, nb_steps_warmup=10, target_model_update= 1e-2)
return dqn
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics = ['mae'])
dqn.fit (env, nb_steps = 4000, visualize=False, verbose = 1)
When I run this code I get the following error message
ValueError: Model output "Tensor("dense_23/BiasAdd:0", shape=(None, 3), dtype=float32)" has invalid shape. DQN expects a model that has one dimension for each action, in this case (3,).
thrown by the line dqn = DQNAgent (model = model, memory = memory, policy=policy, nb_actions=actions, nb_steps_warmup=10, target_model_update= 1e-2)
Can anyone tell me, why this problem is occuring and how to solve this issue? I assume it has something to do with the built model and thus with the action and state spaces. But I could not figure out what exactly the problem is.
Reminder on the bounty: My bounty is expiring quite soon and unfortunately, I still have not received any answer. If you at least have a guess how to tackle that problem, I'll highly appreciate if you share your thoughts with me and I would be quite thankful for it.
As we talked about in the comments, it seems that the Keras-rl library is no longer supported (the last update in the repository was in 2019), so it's possible that everything is inside Keras now. I take a look at Keras documentation and there are no high-level functions to build a reinforcement learning model, but is possible to use lower-level functions to this.
Here is an example of how to use Deep Q-Learning with Keras: link
Another solution may be to downgrade to Tensorflow 1.0 as it seems the compatibility problem occurs due to some changes in version 2.0. I didn't test, but maybe the Keras-rl + Tensorflow 1.0 may work.
There is also a branch of Keras-rl to support Tensorflow 2.0, the repository is archived, but there is a chance that it will work for you
Adding a flatten layer before the final output can solve this error. Example:
def build_model(states, actions):
model = Sequential()
model.add(Dense(24, activation='relu', input_shape=states))
model.add(Dense(24, activation='relu'))
model.add(Flatten())
model.add(Dense(actions[0] , activation='linear'))
return model
I have created a custom encoder/decoder like so:
import tensorflow as tf
from tensorflow_model_optimization.python.core.internal import tensor_encoding as te
# noinspection PyUnresolvedReferences
class SparseTernaryCompressionEncodingStage(te.core.EncodingStageInterface):
AVERAGE = 'average'
NEGATIVES = 'negatives'
POSITIVES = 'positives'
TESTING = 'testing'
NEW_SHAPE = 'new_shape'
ORIGINAL_SHAPE = 'original_shape'
def name(self):
pass
def compressible_tensors_keys(self):
pass
def commutes_with_sum(self):
pass
def decode_needs_input_shape(self):
pass
def get_params(self):
pass
def encode(self, original_tensor, encode_params):
original_shape = tf.shape(original_tensor)
tensor = tf.reshape(original_tensor, [-1])
sparsification_rate = int(len(tensor) / 100 * 1)
new_shape = tensor.get_shape().as_list()
if sparsification_rate == 0:
sparsification_rate = 1
mask = tf.cast(tf.abs(tensor) >= tf.math.top_k(tf.abs(tensor), sparsification_rate)[0][-1], tf.float32)
inv_mask = tf.cast(tf.abs(tensor) < tf.math.top_k(tf.abs(tensor), sparsification_rate)[0][-1], tf.float32)
tensor_masked = tf.multiply(tensor, mask)
average = tf.reduce_sum(tf.abs(tensor_masked)) / sparsification_rate
compressed_tensor = tf.add(tf.multiply(average, mask) * tf.sign(tensor), tf.multiply(tensor_masked, inv_mask))
negatives = tf.where(compressed_tensor < 0)
positives = tf.where(compressed_tensor > 0)
encoded_x = {self.AVERAGE: average, self.NEGATIVES: negatives, self.POSITIVES: positives,
self.NEW_SHAPE: new_shape, self.ORIGINAL_SHAPE: original_shape}
return encoded_x
def decode(self, encoded_tensors, decode_params, num_summands=None, shape=None):
decompressed_tensor = tf.zeros(self.NEW_SHAPE, tf.float32)
average_values_negative = tf.fill([len(self.NEGATIVES), ], -self.AVERAGE)
average_values_positive = tf.fill([len(self.POSITIVES), ], self.AVERAGE)
decompressed_tensor = tf.tensor_scatter_nd_update(decompressed_tensor, self.NEGATIVES, average_values_negative)
decompressed_tensor = tf.tensor_scatter_nd_update(decompressed_tensor, self.POSITIVES, average_values_positive)
decompressed_tensor = tf.reshape(decompressed_tensor, self.ORIGINAL_SHAPE)
return decompressed_tensor
Now, i would like to use the encode function to encode all the weights that the client send to the server and, on the server, use the decode function to be able to obtain all the weights back. Basically, instead of sending all the weights from the client to the server, i want to send only some necessaries information that will let me able to create the weights back from only 5 informations.
The problem is that i don't understand how to tell the client to use this encoder to send the information and to the server to use the decoder before trying to do:
round_model_delta = tff.federated_mean(client_outputs.weights_delta, weight=weight_denom)
I'm using Tensorflow Federated simple_fedavg as basic project.
If you only want to modify the aggregation, you may have easier time using the tff.learning APIs with what you have, parameterizing the aggregation with a tff.aggregators object. For instance:
te.core.EncoderComposer(te.testing.PlusOneOverNEncodingStage()).make()
def encoder_fn(value_spec):
return te.encoders.as_gather_encoder(
te.core.EncoderComposer(SparseTernaryCompressionEncodingStage()).make(),
value_spec)
tff.learning.build_federated_averaging_process(
..., # Other args.
model_update_aggregation_factory=tff.aggregators.EncodedSumFactory(
encoder_fn))
You may also find these tutorials helpful:
https://www.tensorflow.org/federated/tutorials/tuning_recommended_aggregators
https://www.tensorflow.org/federated/tutorials/custom_aggregators
Trying to use text classifier model shared by https://github.com/allenai/scibert/blob/master/scibert/models/text_classifier.py
Everything used to work and suddenly I keep getting this error: Cannot register text_classifier as Model; name already in use for TextClassifier
What might be the reason? any suggestion?
from typing import Dict, Optional, List, Any
import torch
import torch.nn.functional as F
from allennlp.data import Vocabulary
from allennlp.models.model import Model
from allennlp.modules import FeedForward, TextFieldEmbedder, Seq2SeqEncoder
from allennlp.nn import InitializerApplicator, RegularizerApplicator
from allennlp.nn import util
from allennlp.training.metrics import CategoricalAccuracy, F1Measure
from overrides import overrides
#Model.register("text_classifier")
class TextClassifier(Model):
"""
Implements a basic text classifier:
1) Embed tokens using `text_field_embedder`
2) Seq2SeqEncoder, e.g. BiLSTM
3) Append the first and last encoder states
4) Final feedforward layer
Optimized with CrossEntropyLoss. Evaluated with CategoricalAccuracy & F1.
"""
def __init__(self, vocab: Vocabulary,
text_field_embedder: TextFieldEmbedder,
text_encoder: Seq2SeqEncoder,
classifier_feedforward: FeedForward,
verbose_metrics: False,
initializer: InitializerApplicator = InitializerApplicator(),
regularizer: Optional[RegularizerApplicator] = None,
) -> None:
super(TextClassifier, self).__init__(vocab, regularizer)
self.text_field_embedder = text_field_embedder
self.num_classes = self.vocab.get_vocab_size("labels")
self.text_encoder = text_encoder
self.classifier_feedforward = classifier_feedforward
self.prediction_layer = torch.nn.Linear(self.classifier_feedforward.get_output_dim() , self.num_classes)
self.label_accuracy = CategoricalAccuracy()
self.label_f1_metrics = {}
self.verbose_metrics = verbose_metrics
for i in range(self.num_classes):
self.label_f1_metrics[vocab.get_token_from_index(index=i, namespace="labels")] = F1Measure(positive_label=i)
self.loss = torch.nn.CrossEntropyLoss()
self.pool = lambda text, mask: util.get_final_encoder_states(text, mask, bidirectional=True)
initializer(self)
#overrides
def forward(self,
text: Dict[str, torch.LongTensor],
label: torch.IntTensor = None,
metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]:
"""
Parameters
----------
text : Dict[str, torch.LongTensor]
From a ``TextField``
label : torch.IntTensor, optional (default = None)
From a ``LabelField``
metadata : ``List[Dict[str, Any]]``, optional, (default = None)
Metadata containing the original tokenization of the premise and
hypothesis with 'premise_tokens' and 'hypothesis_tokens' keys respectively.
Returns
-------
An output dictionary consisting of:
label_logits : torch.FloatTensor
A tensor of shape ``(batch_size, num_labels)`` representing unnormalised log probabilities of the label.
label_probs : torch.FloatTensor
A tensor of shape ``(batch_size, num_labels)`` representing probabilities of the label.
loss : torch.FloatTensor, optional
A scalar loss to be optimised.
"""
embedded_text = self.text_field_embedder(text)
mask = util.get_text_field_mask(text)
encoded_text = self.text_encoder(embedded_text, mask)
pooled = self.pool(encoded_text, mask)
ff_hidden = self.classifier_feedforward(pooled)
logits = self.prediction_layer(ff_hidden)
class_probs = F.softmax(logits, dim=1)
output_dict = {"logits": logits}
if label is not None:
loss = self.loss(logits, label)
output_dict["loss"] = loss
# compute F1 per label
for i in range(self.num_classes):
metric = self.label_f1_metrics[self.vocab.get_token_from_index(index=i, namespace="labels")]
metric(class_probs, label)
self.label_accuracy(logits, label)
return output_dict
##overrides
def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
class_probabilities = F.softmax(output_dict['logits'], dim=-1)
output_dict['class_probs'] = class_probabilities
return output_dict
def get_metrics(self, reset: bool = False) -> Dict[str, float]:
metric_dict = {}
sum_f1 = 0.0
for name, metric in self.label_f1_metrics.items():
metric_val = metric.get_metric(reset)
if self.verbose_metrics:
metric_dict[name + '_P'] = metric_val[0]
metric_dict[name + '_R'] = metric_val[1]
metric_dict[name + '_F1'] = metric_val[2]
sum_f1 += metric_val[2]
names = list(self.label_f1_metrics.keys())
total_len = len(names)
average_f1 = sum_f1 / total_len
metric_dict['average_F1'] = average_f1
metric_dict['accuracy'] = self.label_accuracy.get_metric(reset)
return metric_dict
The name is already taken. Something that’s already a part of AllenNLP uses that name already, so you need to pick a different one.
For the curious, AllenNLP creates a registry of models, so that you can select a model at the command line. (That’s what the decorator is doing.) This requires the names to be unique.
The name text_classifier was used by AllenNLP only after the external package you’re using used it. It worked in May 2019, when that file was last updated. But 17 months ago, AllenNLP started using it. So it’s not your fault; it’s a mismatch between those two packages (at least, in their current versions).
import numpy as np
import gym
from gym.spaces import Box
import ray
from ray import tune
from ray.rllib.utils import try_import_tf
import ray.rllib.agents.ddpg as ddpg
from ray.tune.logger import pretty_print
tf = try_import_tf()
# gym environment adapter
class SimpleSupplyChain(gym.Env):
def __init__(self, config):
self.reset()
self.action_space = Box(low=0.0, high=1000.0, shape=(self.supply_chain.retail_store_num + 1, ), dtype=np.float32)
self.observation_space = Box(-1000000.0, 10000000, shape=(len(self.supply_chain.initial_state().to_array()), ), dtype=np.float32)
def reset(self):
self.supply_chain = SupplyChainEnvironment()
self.state = self.supply_chain.initial_state()
return self.state.to_array()
def step(self, action):
action_obj = Action(self.supply_chain.retail_store_num)
action_obj.production_level = action[0]
action_obj.shippings_to_retail_stores = action[1:]
self.state, reward, done = self.supply_chain.step(self.state, action_obj)
return self.state.to_array(), reward, done, {}
ray.shutdown()
ray.init()
def train_ddpg():
config = ddpg.DEFAULT_CONFIG.copy()
config["log_level"] = "WARN"
config["actor_hiddens"] = [512, 512]
config["critic_hiddens"] = [512, 512]
config["gamma"] = 0.95
config["timesteps_per_iteration"] = 1000
config["target_network_update_freq"] = 5
config["buffer_size"] = 10000
# config['actor_lr']=1e-6
# config['critic_lr']=1e-6
print(config)
import json
try:
import cPickle as pickle
except ImportError: # Python 3.x
import pickle
with open('config.p', 'wb') as fp:
pickle.dump(config, fp, protocol=pickle.HIGHEST_PROTOCOL)
trainer = ddpg.DDPGTrainer(config=config, env=SimpleSupplyChain)
for i in range(5):
result = trainer.train()
print(pretty_print(result))
checkpoint = trainer.save('./')
print("Checkpoint saved at", checkpoint)
train_ddpg()
When I run the above code I am getting this error.
Cannot cast array data from dtype('O') to dtype('float32') according to the rule 'same_kind'
I am using DDPG to find the solution but it seems that there is something wrong with the data type. The data comes from pandas and I changed the datatype to float64. I checked and tried everything i could but the error still appeared. Could someone help me please?
While running the following python code in C++ using pybind11, pytorch 1.6.0, I get "Invalid Pointer" error. In python, the code runs successfully without any error. Whats the reason? How can I solve this problem?
import torch
import torch.nn.functional as F
import numpy as np
import cv2
import torchvision
import eval_widerface
import torchvision_model
def resize(image, size):
image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
return image
# define constants
model_path = '/path/to/model.pt'
image_path = '/path/to/image_pad.jpg'
scale = 1.0 #Image resize scale (2 for half size)
font = cv2.FONT_HERSHEY_SIMPLEX
MIN_SCORE = 0.9
image_bgr = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)#skimage.io.imread(args.image_path)
cv2.imshow("input image",image_bgr)
cv2.waitKey()
cv2.destroyAllWindows()
# load pre-trained model
return_layers = {'layer2':1,'layer3':2,'layer4':3}
RetinaFace = torchvision_model.create_retinaface(return_layers)
print('RetinaFace.state_dict().')
retina_dict = RetinaFace.state_dict()
the following function generates error.
def create_retinaface(return_layers,backbone_name='resnet50',anchors_num=3,pretrained=True):
print('In create_retinaface.')
print(resnet.__dict__)
backbone = resnet.__dict__[backbone_name](pretrained=pretrained)
print('backbone.')
# freeze layer1
for name, parameter in backbone.named_parameters():
print('freeze layer 1.');
# if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
# parameter.requires_grad_(False)
if name == 'conv1.weight':
# print('freeze first conv layer...')
parameter.requires_grad_(False)
model = RetinaFace(backbone,return_layers,anchor_nums=3)
return model
The statement backbone = resnet.__dict__ [backbone_name](pretrained=pretrained) generated error that looks like
*** Error in `./p': munmap_chunk(): invalid pointer: 0x00007f4461866db0 ***
======= Backtrace: =========
/usr/lib64/libc.so.6(+0x7f3e4)[0x7f44736b43e4]
/usr/local/lib64/libopencv_gapi.so.4.1(_ZNSt10_HashtableISsSsSaISsENSt8__detail9_IdentityESt8equal_toISsESt4hashISsENS1_18_Mod_range_hashingENS1_20_Default_ranged_hashENS1_20_Prime_rehash_policyENS1_17_Hashtable_traitsILb1ELb1ELb1EEEE21_M_insert_unique_nodeEmmPNS1_10_Hash_nodeISsLb1EEE+0xc9)[0x7f4483dee1a9]
/home/20face/.virtualenvs/torch/lib64/python3.6/site-packages/torch/lib/libtorch_python.so(+0x4403b5)[0x7f4460bb73b5]
/home/20face/.virtualenvs/torch/lib64/python3.6/site-packages/torch/lib/libtorch_python.so(+0x44570a)[0x7f4460bbc70a]
/home/20face/.virtualenvs/torch/lib64/python3.6/site-packages/torch/lib/libtorch_python.so(+0x275b20)[0x7f44609ecb20]
/usr/lib64/libpython3.6m.so.1.0(_PyCFunction_FastCallDict+0x147)[0x7f4474307167]
/usr/lib64/libpython3.6m.so.1.0(+0x1507df)[0x7f44743727df]
/usr/lib64/libpython3.6m.so.1.0(_PyEval_EvalFrameDefault+0x3a7)[0x7f44743670f7]
/usr/lib64/libpython3.6m.so.1.0(+0x1505ca)[0x7f44743725ca]
/usr/lib64/libpython3.6m.so.1.0(+0x150903)[0x7f4474372903]
/usr/lib64/libpython3.6m.so.1.0(_PyEval_EvalFrameDefault+0x3a7)[0x7f44743670f7]
/usr/lib64/libpython3.6m.so.1.0(+0x14fb69)[0x7f4474371b69]
/usr/lib64/libpython3.6m.so.1.0(_PyFunction_FastCallDict+0x24f)[0x7f44743739ff]
/usr/lib64/libpython3.6m.so.1.0(_PyObject_FastCallDict+0x10e)[0x7f44742ca1de]
/usr/lib64/libpython3.6m.so.1.0(_PyObject_Call_Prepend+0x61)[0x7f44742ca2f1]
/usr/lib64/libpython3.6m.so.1.0(PyObject_Call+0x43)[0x7f44742c9f63]
/usr/lib64/libpython3.6m.so.1.0(+0xfa7e5)[0x7f447431c7e5]
/usr/lib64/libpython3.6m.so.1.0(+0xf71e2)[0x7f44743191e2]
/usr/lib64/libpython3.6m.so.1.0(PyObject_Call+0x43)[0x7f44742c9f63]
/usr/lib64/libpython3.6m.so.1.0(_PyEval_EvalFrameDefault+0x2067)[0x7f4474368db7]
/usr/lib64/libpython3.6m.so.1.0(PyEval_EvalCodeEx+0x24f)[0x7f4474372c9f]
This line is causing the error because it assumes __dict__ has a backbone_name element:
backbone = resnet.__dict__[backbone_name](pretrained=pretrained)
When that isn't the case, it basically tries to access invalid memory. Check __dict__ first with an if statement or make sure that it has the backbone_name element before trying to use it.