How to get the local_rank attribute - python

the Engine code is as follows:
class Engine(object):
def __init__(self, cfg, custom_parser=None):
self.version = 0.01
self.state = State()
self.devices = None
self.distributed = False
self.logger = None
self.cfg = cfg
if custom_parser is None:
self.parser = argparse.ArgumentParser()
else:
assert isinstance(custom_parser, argparse.ArgumentParser)
self.parser = custom_parser
self.inject_default_parser()
self.args = self.parser.parse_args()
self.continue_state_object = self.args.continue_fpath
if 'WORLD_SIZE' in os.environ:
self.distributed = int(os.environ['WORLD_SIZE']) > 1
if self.distributed:
self.local_rank = self.args.local_rank
self.world_size = int(os.environ['WORLD_SIZE'])
self.world_rank = int(os.environ['RANK'])
torch.cuda.set_device(self.local_rank)
dist.init_process_group(backend="nccl", init_method='env://')
dist.barrier()
self.devices = [i for i in range(self.world_size)]
else:
# todo check non-distributed training
self.world_rank = 1
self.devices = parse_torch_devices(self.args.devices)
def setup_log(self, name='train', log_dir=None, file_name=None):
if not self.logger:
self.logger = get_logger(
name, log_dir, self.args.local_rank, filename=file_name)
else:
self.logger.warning('already exists logger')
return self.logger
I want to get local_rank,but when I just use Engine.local_rank ,it returns AttributeError: type object 'Engine' has no attribute 'local_rank'
like this

Your local_rank depends on self.distributed==True or self.distributed!=0 which means 'WORLD_SIZE' needs to be in os.environ so just add the environment variable WORLD_SIZE (which should be an integer)

Related

How to retrieve the attributes of a saved model class to initialize it

I have the following class. I'd like to save this class in one script and load it in another one. However, I don't know how I can correctly initialize the attributes of a class when I load it somewhere else?!
class Dataset(nn.Module):
def __init__(
self,
observation_spec,
action_spec,
size,
):
super(Dataset, self).__init__()
self._size = size
obs_shape = list(observation_spec.shape)
obs_type = observation_spec.dtype
action_shape = list(action_spec.shape)
action_type = action_spec.dtype
self._s1 = self._zeros([size] + obs_shape, obs_type)
self._s2 = self._zeros([size] + obs_shape, obs_type)
self._a1 = self._zeros([size] + action_shape, action_type)
self._a2 = self._zeros([size] + action_shape, action_type)
self._discount = self._zeros([size], torch.float32)
self._reward = self._zeros([size], torch.float32)
self._data = Transition(
s1=self._s1, s2=self._s2, a1=self._a1, a2=self._a2,
discount=self._discount, reward=self._reward)
self._current_size = torch.autograd.Variable(torch.tensor(0), requires_grad=False)
self._current_idx = torch.autograd.Variable(torch.tensor(0), requires_grad=False)
self._capacity = torch.autograd.Variable(torch.tensor(self._size))
self._config = collections.OrderedDict(
observation_spec=observation_spec,
action_spec=action_spec,
size=size
#property
def config(self):
return self._config
#property
def data(self):
return self._data
#property
def capacity(self):
return self._size
#property
def size(self):
return self._current_size.numpy()
def _zeros(self, shape, dtype):
"""Create a variable initialized with zeros."""
return torch.autograd.Variable(torch.zeros(shape, dtype = dtype))
#save the model/class
assert data.size == data.capacity
data_ckpt_name = os.path.join(log_dir, 'data_{}.pt'.format(env_name))
torch.save([data.capacity, data.state_dict()], data_ckpt_name)
whole_data_ckpt_name = os.path.join(log_dir, 'data_{}.pth'.format(env_name))
with open( whole_data_ckpt_name, 'wb') as filehandler:
pickle.dump(data, filehandler)
when I tried to load this class based on this answer with its attribute inside another script
dm_env = gym.spec(env_name).make()
env = alf_gym_wrapper.AlfGymWrapper(dm_env)
observation_spec = env.observation_spec()
action_spec = env.action_spec()
# Prepare data.
logging.info('Loading data from %s ...', data_file)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_ckpt_name = os.path.join(data_file, 'data_{}.pt'.format(env_name))
whole_data_ckpt_name = os.path.join(data_file, 'data_{}.pth'.format(env_name))
data_size, state = torch.load(data_ckpt_name, map_location=device)
full_data = dc.Dataset(observation_spec, action_spec, data_size)
open(whole_data_ckpt_name, 'a').close()
scores = {};
try:
with open(whole_data_ckpt_name, "rb") as file:
unpickler = pickle.Unpickler(file);
scores = unpickler.load();
if not isinstance(scores, dict):
scores = {};
except EOFError:
return {}
print(f"loaded data {scores}")
full_data.load_state_dict( state)
print(f"loaded data : {full_data.size}")
It seems the code breaks down here.
I am wondering how I can extract the size attribute of the class from the saved model when I load it, in order to properly initialize this attribute?

Python: is_set/partial update pattern - how to recognize if a member was explicitly set in constructor?

In my class BaseFoo, I want to determine which arguments of its subclass Foo were passed explicitly in the constructor:
class BaseFoo:
def __init__(self, created_at, updated_at, deleted_at):
self.created_at = created_at
self.updated_at = updated_at
self.deleted_at = deleted_at
self.is_set = {}
class Foo(BaseFoo):
def __init__(self, friend_id: int = None, start_time: datetime = None, id: int = None, end_time: datetime = None, created_at=None,
updated_at=None,
deleted_at=None):
super().__init__(created_at, updated_at, deleted_at)
self.id = id
self.friend_id = friend_id
self.start_time = start_time
self.end_time = end_time
My goal is to properly populate the is_set field of a BaseFoo/Foo instance according to what has been set explicitly by the constructor/setters. Here are a few examples of my end goal:
# input
foo = Foo(friend_id=1)
print(foo.is_set)
#output
# {"friend_id": True}
foo = Foo()
print(foo.is_set)
#output
# {}
foo = Foo(start_time=None)
foo.friend_id = 3
print(foo.is_set)
#output
# {"start_time": True, "friend_id": True}
foo = Foo(start_time=datetime.datetime.now())
foo.friend_id = 3
print(foo.is_set)
#output
# {"start_time": True, "friend_id": True}
I'm almost where I want to be with this method in BaseFoo:
def __setattr__(self, attrname, val):
import inspect
f = inspect.currentframe()
if 'self' in f.f_back.f_locals and issubclass(type(f.f_back.f_locals['self']), BaseFoo):
# call came from __init__
if attrname != "is_set":
self.is_set = self.is_set if hasattr(self, "is_set") else {}
self.is_set[attrname] = True
super().__setattr__(attrname, val)
else:
if attrname != "is_set":
self.is_set = self.is_set if hasattr(self, "is_set") else {}
self.is_set[attrname] = True
super().__setattr__(attrname, val)
The only missing piece here is that I need to know, in case __setattr__ has been triggered by the __init__ constructor, whether the val of attrname has been explicitly set by the user (if it has been explicitly set to its default value).
Is this possible?

Python - Parent method don't acess the value of variable children

Hi I'm having a problem in this classes I created the parent class extracao_nia with the method aplica_extracao for having the similar part of the execution that I use in others class and the diferent part is in the transform method definined in the children class
but I'm having an issue that the variables that I defined as list() are Null variable when I execute the code:
AttributeError: 'NoneType' object has no attribute 'append'
class extracao_nia:
def __init__(self, d=1, h=1, m=15):
self._data_base = "database"
self.UM_DIA = datetime.timedelta(days=d)
self.UMA_HORA = datetime.timedelta(hours=h)
self.INTERVALO = datetime.timedelta(minutes=m)
#property
def data_base(self):
return self._data_base
def aplica_extracao(self, SQL):
fim_intervalo = self.inicio + self.INTERVALO#
pbar = self.cria_prog_bar(SQL)#
while (fim_intervalo <= self.FIM):#
self.connector.execute(SQL,(self.inicio.strftime('%Y-%m-%d %H:%M'),fim_intervalo.strftime('%Y-%m-%d %H:%M')))#
for log in self.connector:#
self.transforma(log)
self.inicio = fim_intervalo
fim_intervalo = self.inicio + self.INTERVALO
class usuarios_unicos(extracao_nia):
def __init__(self, d=1, h=1, m=15, file='nodes.json'):
self._data_base = "database"
self.UM_DIA = datetime.timedelta(days=d)
self.UMA_HORA = datetime.timedelta(hours=h)
self.INTERVALO = datetime.timedelta(minutes=m)
self.file = file
self.ids = list()
self.nodes = list()
self.list_cpf = list()
def transforma(self, log):
context = json.loads(log[0])['context']
output = json.loads(log[0])['output']
try:
nr_cpf = context['dadosDinamicos']['nrCpf']
conversation_id = context['conversation_id']
nodes_visited = output['output_watson']['nodes_visited']
i = self.ids.index(conversation_id)
atual = len(self.nodes[i])
novo = len(nodes_visited)
if novo > atual:
nodes[i] = nodes_visited
except KeyError:
pass
except ValueError:
self.ids.append(conversation_id)
self.nodes = self.nodes.append(nodes_visited)
self.list_cpf = self.list_cpf.append(nr_cpf)
list.append returns None since it is an in-place operation, so
self.nodes = self.nodes.append(nodes_visited)
will result in self.nodes being assigned None. Instead you can just use
self.nodes += nodes_visited

python instantiate a nested class

I read several post about the nested class, but this mechanism is still not clear for me.
Basically what I want to do is define a class (to solve ordinary differential equation in which I define one class for each order of accuracy )
Here's the class :
class AdamsBashforth(AdamsMethods):
class _2nd(AdamsMethods):
startup = False
def __init__(self, dydt : Rhs, filename :str = None , salve : bool = True ):
self.file = filename
self.save = save
super().__init__(dydt)
def solve(self):
self.time , self.u = self.dydt.createArray()
u[1] = rungekutta.RK2.step(self.dydt.f, time[0], u[0], self.dt)
for i in range(1,len(self.time)-1):
self.u[i+1] = u[i] + self.dt/2*(3*self.dydt.f(self.time[i],self.u[i])-self.dydt.f(self.time[i-1],self.u[i-1]))
AdamsBashforth._2nd.solved = True
if self.file != None:
super().write2file()
if self.save:
return self.time,self.u
def plot(self):
if AdamsBashforth._2nd.solved:
super().plot('Sys ODE solution using Adams-Bashforth 2nd order','time [s]','y(t)')
else:
print("Unsolved problem, call `solve` method before")
#classmethod
def step(cls, func , t : np.float , u : np.float, dt ):
def f(ti,ui):
return np.array([function(ti,ui) for function in func])
if AdamsBashforth._2nd.startup == False:
#print ("AB start-up")
AdamsBashforth.u1 = rungekutta.RK2.step(func,t,u,dt)
AdamsBashforth.startup = True
unext = AdamsBashforth.u1 + dt/2.*(3.*f(t+dt,AdamsBashforth.u1) - f(t,u))
AdamsBashforth.u1 = u
return unext
Here's the way that I would really like to do is to call:
problem = AdamsBashforth._2nd(dydt)
but i receive an error that says that adamsbashforth has no member _2nd
so I've try to doing so .. but still the same message :
class AdamsBashforth(AdamsMethods):
def __init__(self, dydt, filename :str = None , salve : bool = True):
self._2nd = self._2nd(dydt,filename,salve)
def solve(self):
pass
class _2nd(AdamsMethods):
What is the right way in order to obtain a call like the first (problem = AdamsBashforth._2nd(dydt))
and using the class _2nd
EDIT
class AdamsBashforth(AdamsMethods):
def __init__(self, dydt, filename :str = None , save : bool = True):
self._2nd = AdamsBashforth._2nd(dydt,filename,save)
def solve(self):
pass
class _2nd(AdamsMethods):
startup = False
solved = False
def __init__(self, dydt : Rhs, filename :str = None , save : bool = True ):
self.file = filename
self.save = save
super().__init__(dydt)
def solve(self):
self.time , self.u = self.dydt.createArray()
self.u[1] = rungekutta.RK2.step(self.dydt.f, self.time[0], self.u[0], self.dt)
for i in range(1,len(self.time)-1):
self.u[i+1] = u[i] + self.dt/2*(3*self.dydt.f(self.time[i],self.u[i])-self.dydt.f(self.time[i-1],self.u[i-1]))
_2nd.solved = True
if self.file != None:
super().write2file()
if self.save:
return self.time,self.u
def plot(self):
if AdamsBashforth._2nd.solved:
super().plot('Sys ODE solution using Adams-Bashforth 2nd order','time [s]','y(t)')
else:
print("Unsolved problem, call `solve` method before")
#classmethod
def step(cls, func , t : np.float , u : np.float, dt ):
def f(ti,ui):
return np.array([function(ti,ui) for function in func])
if AdamsBashforth._2nd.startup == False:
#print ("AB start-up")
AdamsBashforth.u1 = rungekutta.RK2.step(func,t,u,dt)
AdamsBashforth.startup = True
unext = AdamsBashforth.u1 + dt/2.*(3.*f(t+dt,AdamsBashforth.u1) - f(t,u))
AdamsBashforth.u1 = u
return unext
calling in this way :
ab2_p1 = adamsmethods.AdamsBashforth(problem1, 'ab2_1.dat')
ab2t,ab2u = ab2_p1._2nd.solve()
ab2_p1.plot()
looks that work .. but I don't know why ... and I don't know if is it the correct way !!
but I have still problem in order to call the classmethod for example :
u = adamsmethods.AdamsBashforth._2nd.step(func0,t,u,dt)
doesn't work :
Traceback (most recent call last):
File "drive.py", line 287, in <module>
main()
File "drive.py", line 149, in main
u = adamsmethods.AdamsBashforth._2nd.step(func0,t,u,dt)
AttributeError: type object '_2nd' has no attribute 'step'
------------------------------------------------------------

Class inheritance type checking after pickling in Python

Is there a sure-fire way to check that the class of an object is a sub-class of the desired super?
For Example, in a migration script that I'm writing, I have to convert objects of a given type to dictionaries in a given manner to ensure two-way compatability of the data.
This is best summed up like so:
Serializable
User
Status
Issue
Test
Set
Step
Cycle
However, when I'm recursively checking objects after depickling, I receive a Test object that yields the following results:
Testing data object type:
type(data)
{type}< class'__main.Test' >
Testing Class type:
type(Test())
{type}< class'__main.Test' >
Testing object type against class type:
type(Test()) == type(data)
{bool}False
Testing if object isinstance() of Class:
isinstance(data, Test)
{bool}False
Testing if Class isinstance() of Super Class:
isinstance(Test(), Serializable)
{bool}True
Testing isinstance() of Super Class::
isinstance(data, Serializable)
{bool}False
Interestingly, it doesn't appear to have any such problem prior to pickling as it handles the creation of dictionary and integrity hash just fine.
This only crops up with depickled objects in both Pickle and Dill.
For Context, here's the code in it's native environment - the DataCache object that is pickled:
class DataCache(object):
_hash=""
_data = None
#staticmethod
def genHash(data):
dataDict = DataCache.dictify(data)
datahash = json.dumps(dataDict, sort_keys=True)
return hashlib.sha256(datahash).digest()
#staticmethod
def dictify(data):
if isinstance(data,list):
datahash = []
for item in data:
datahash.append(DataCache.dictify(item))
elif isinstance(data,(dict, collections.OrderedDict)):
datahash = collections.OrderedDict()
for key,value in datahash.iteritems():
datahash[key]= DataCache.dictify(value)
elif isinstance(data, Serializable):
datahash = data.toDict()
else:
datahash = data
return datahash
def __init__(self, restoreDict = {}):
if restoreDict:
self.__dict__.update(restoreDict)
def __getinitargs__(self):
return (self.__dict__)
def set(self, data):
self._hash = DataCache.genHash(data)
self._data = data
def verify(self):
dataHash = DataCache.genHash(self._data)
return (self._hash == dataHash)
def get(self):
return self._data
Finally, I know there's arguments for using JSON for readability in storage, I needed Pickle's ability to convert straight to and from Objects without specifying the object type myself. (thanks to the nesting, it's not really feasible)
Am I going mad here or does pickling do something to the class definitions?
EDIT:
Minimal Implementation:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import requests
from aenum import Enum
import json # _tricks
import base64
import argparse
import os
import sys
import datetime
import dill
import hashlib
import collections
class Serializable(object):
def __init__(self, initDict={}):
if initDict:
self.__dict__.update(initDict)
def __str__(self):
return str(self.sortSelf())
def sortSelf(self):
return collections.OrderedDict(sorted(self.__dict__.items()))
def toDict(self):
return self.__dict__
def fromDict(self, dict):
# Not using __dict__.update(...) to avoid polluting objects with the excess data
varMap = self.__dict__
if dict and varMap:
for key in varMap:
if (key in dict):
varMap[key] = dict[key]
self.__dict__.update(varMap)
return self
return None
class Issue(Serializable):
def __init__(self, initDict={}):
self.id = 0
self.key = ""
self.fields = {}
if initDict:
self.__dict__.update(initDict)
Serializable.__init__(self)
def fieldToDict(self, obj, key, type):
if key in obj:
result = obj[key]
else:
return None
if result is None:
return None
if isinstance(result, type):
return result.toDict()
return result
def fromDict(self, jsonDict):
super(Issue, self).fromDict(jsonDict)
self.fields["issuetype"] = IssueType().fromDict(self.fields["issuetype"])
self.fields["assignee"] = User().fromDict(self.fields["assignee"])
self.fields["creator"] = User().fromDict(self.fields["creator"])
self.fields["reporter"] = User().fromDict(self.fields["reporter"])
return self
def toDict(self):
result = super(Issue, self).toDict()
blankKeys = []
for fieldName, fieldValue in self.fields.iteritems():
if fieldValue is None:
blankKeys.append(fieldName)
if blankKeys:
for key in blankKeys:
self.fields.pop(key, None)
result["fields"]["issuetype"] = self.fieldToDict(result["fields"], "issuetype", IssueType)
result["fields"]["creator"] = self.fieldToDict(result["fields"], "creator", User)
result["fields"]["reporter"] = self.fieldToDict(result["fields"], "reporter", User)
result["fields"]["assignee"] = self.fieldToDict(result["fields"], "assignee", User)
return result
class IssueType(Serializable):
def __init__(self):
self.id = 0
self.name = ""
def toDict(self):
return {"id": str(self.id)}
class Project(Serializable):
def __init__(self):
Serializable.__init__(self)
self.id = 0
self.name = ""
self.key = ""
class Cycle(Serializable):
def __init__(self):
self.id = 0
self.name = ""
self.totalExecutions = 0
self.endDate = ""
self.description = ""
self.totalExecuted = 0
self.started = ""
self.versionName = ""
self.projectKey = ""
self.versionId = 0
self.environment = ""
self.totalCycleExecutions = 0
self.build = ""
self.ended = ""
self.name = ""
self.modifiedBy = ""
self.projectId = 0
self.startDate = ""
self.executionSummaries = {'executionSummary': []}
class Step(Serializable):
def __init__(self):
self.id = ""
self.orderId = 0
self.step = ""
self.data = ""
self.result = ""
self.attachmentsMap = {}
def toDict(self):
dict = {}
dict["step"] = self.step
dict["data"] = self.data
dict["result"] = self.result
dict["attachments"] = []
return dict
class Status(Serializable):
def __init__(self):
self.id = 0
self.name = ""
self.description = ""
self.isFinal = True
self.color = ""
self.isNative = True
self.statusCount = 0
self.statusPercent = 0.0
class User(Serializable):
def __init__(self):
self.displayName = ""
self.name = ""
self.emailAddress = ""
self.key = ""
self.active = False
self.timeZone = ""
class Execution(Serializable):
def __init__(self):
self.id = 0
self.orderId = 0
self.cycleId = -1
self.cycleName = ""
self.issueId = 0
self.issueKey = 0
self.projectKey = ""
self.comment = ""
self.versionId = 0,
self.versionName = "",
self.executedOn = ""
self.creationDate = ""
self.executedByUserName = ""
self.assigneeUserName = ""
self.status = {}
self.executionStatus = ""
def fromDict(self, jsonDict):
super(Execution, self).fromDict(jsonDict)
self.status = Status().fromDict(self.status)
# This is already listed as Execution Status, need to associate and convert!
return self
def toDict(self):
result = super(Execution, self).toDict()
result['status'] = result['status'].toDict()
return result
class ExecutionContainer(Serializable):
def __init__(self):
self.executions = []
def fromDict(self, jsonDict):
super(ExecutionContainer, self).fromDict(jsonDict)
self.executions = []
for executionDict in jsonDict["executions"]:
self.executions.append(Execution().fromDict(executionDict))
return self
class Test(Issue):
def __init__(self, initDict={}):
if initDict:
self.__dict__.update(initDict)
Issue.__init__(self)
def toDict(self):
result = super(Test, self).toDict()
stepField = "CustomField_0001"
if result["fields"][stepField]:
steps = []
for step in result["fields"][stepField]["steps"]:
steps.append(step.toDict())
result["fields"][stepField] = steps
return result
def fromDict(self, jsonDict):
super(Test, self).fromDict(jsonDict)
stepField = "CustomField_0001"
steps = []
if stepField in self.fields:
for step in self.fields[stepField]["steps"]:
steps.append(Step().fromDict(step))
self.fields[stepField] = {"steps": steps}
return self
class Set(Issue):
def __init__(self, initDict={}):
self.__dict__.update(initDict)
Issue.__init__(self)
class DataCache(object):
_hash = ""
_data = None
#staticmethod
def genHash(data):
dataDict = DataCache.dictify(data)
datahash = json.dumps(dataDict, sort_keys=True)
return hashlib.sha256(datahash).digest()
#staticmethod
def dictify(data):
if isinstance(data, list):
datahash = []
for item in data:
datahash.append(DataCache.dictify(item))
elif isinstance(data, (dict, collections.OrderedDict)):
datahash = collections.OrderedDict()
for key, value in datahash.iteritems():
datahash[key] = DataCache.dictify(value)
elif isinstance(data, Serializable):
datahash = data.toDict()
else:
datahash = data
return datahash
def __init__(self, restoreDict={}):
if restoreDict:
self.__dict__.update(restoreDict)
def __getinitargs__(self):
return (self.__dict__)
def set(self, data):
self._hash = DataCache.genHash(data)
self._data = data
def verify(self):
dataHash = DataCache.genHash(self._data)
return (self._hash == dataHash)
def get(self):
return self._data
def saveCache(name, projectKey, object):
filePath = "migration_caches/{projectKey}".format(projectKey=projectKey)
if not os.path.exists(path=filePath):
os.makedirs(filePath)
cache = DataCache()
cache.set(object)
targetFile = open("{path}/{name}".format(name=name, path=filePath), 'wb')
dill.dump(obj=cache, file=targetFile)
targetFile.close()
def loadCache(name, projectKey):
filePath = "migration_caches/{projectKey}/{name}".format(name=name, projectKey=projectKey)
result = False
try:
targetFile = open(filePath, 'rb')
try:
cache = dill.load(targetFile)
if isinstance(cache, DataCache):
if cache.verify():
result = cache.get()
except EOFError:
# except BaseException:
print ("Failed to load cache from file: {filePath}\n".format(filePath=filePath))
except IOError:
("Failed to load cache file at: {filePath}\n".format(filePath=filePath))
targetFile.close()
return result
testIssue = Test().fromDict({"id": 1000,
"key": "TEST",
"fields": {
"issuetype": {
"id": 1,
"name": "TestIssue"
},
"assignee": "Minothor",
"reporter": "Minothor",
"creator": "Minothor",
}
})
saveCache("Test", "TestProj", testIssue)
result = loadCache("Test", "TestProj")
EDIT 2
The script in it's current form, now seems to work correctly with vanilla Pickle, (initially switched to Dill due to a similar issue, which was solved by the switch).
However, if you are here with this issue and require Dill's features, then as Mike noted in the comments - it's possible to change the settings in dill.settings to have Dill behave pickle referenced items only with joblib mode, effectively mirroring pickle's standard pickling behaviour.

Categories