I have an object of a custom class that I am trying to serialize and permanently store.
When I serialize it, store it, load it and use it in the same run, it works fine. It only messes up when I've ended the process and then try to load it again from the pickle file. This is the code that works fine:
first_model = NgramModel(3, name="debug")
for paragraph in text:
first_model.train(paragraph_to_sentences(text))
# paragraph to sentences just uses regex to do the equivalent of splitting by punctuation
print(first_model.context_options)
# context_options is a dict (counter)
first_model = NgramModel.load_existing_model("debug")
#load_existing_model loads the pickle file. Look in the class code
print(first_model.context_options)
However, when I run this alone, it prints an empty counter:
first_model = NgramModel.load_existing_model("debug")
print(first_model.context_options)
This is a shortened version of the class file (the only two methods that touch the pickle/dill are update_pickle_state and load_existing_model):
import os
import dill
from itertools import count
from collections import Counter
from os import path
class NgramModel:
context_options: dict[tuple, set[str]] = {}
ngram_count: Counter[tuple] = Counter()
n = 0
pickle_path: str = None
num_paragraphs = 0
num_sentences = 0
def __init__(self, n: int, **kwargs):
self.n = n
self.pickle_path = NgramModel.pathify(kwargs.get('name', NgramModel.gen_pickle_name())) #use name if exists else generate random name
def train(self, paragraph_as_list: list[str]):
'''really the central method that coordinates everything else. Takes a list of sentences, generates data(n-grams) from each, updates the fields, and saves the instance (self) to a pickle file'''
self.num_paragraphs += 1
for sentence in paragraph_as_list:
self.num_sentences += 1
generated = self.generate_Ngrams(sentence)
self.ngram_count.update(generated)
for ngram in generated:
self.add_to_set(ngram)
self.update_pickle_state()
def update_pickle_state(self):
'''saves instance to pickle file'''
file = open(self.pickle_path, "wb")
dill.dump(self, file)
file.close()
#staticmethod
def load_existing_model(name: str):
'''returns object from pickle file'''
path = NgramModel.pathify(name)
file = open(path, "rb")
obj: NgramModel = dill.load(file)
return obj
def generate_Ngrams(self, string: str):
'''ref: https://www.analyticsvidhya.com/blog/2021/09/what-are-n-grams-and-how-to-implement-them-in-python/'''
words = string.split(" ")
words = ["<start>"] * (self.n - 1) + words + ["<end>"] * (self.n - 1)
list_of_tup = []
for i in range(len(words) + 1 - self.n):
list_of_tup.append((tuple(words[i + j] for j in range(self.n - 1)), words[i + self.n - 1]))
return list_of_tup
def add_to_set(self, ngram: tuple[tuple[str, ...], str]):
if ngram[0] not in self.context_options:
self.context_options[ngram[0]] = set()
self.context_options[ngram[0]].add(ngram[1])
#staticmethod
def pathify(name):
'''converts name to path'''
return f"models/{name}.pickle"
#staticmethod
def gen_pickle_name():
for i in count():
new_name = f"unnamed-pickle-{i}"
if not path.exists(NgramModel.pathify(new_name)):
return new_name
All the other fields print properly and are complete and correct except the two dicts
The problem is that is that context_options is a mutable class-member, not an instance member. If I had to guess, dill is only pickling instance members, since the class definition holds class members. That would account for why you see a "filled-out" context_options when you're working in the same shell but not when you load fresh — you're using the dirtied class member in the former case.
It's for stuff like this that you generally don't want to use mutable class members (or similarly, mutable default values in function signatures). More typical is to use something like context_options: dict[tuple, set[str]] = None and then check if it's None in the __init__ to set it to a default value, e.g., an empty dict. Alternatively, you could use a #dataclass and provide a field initializer, i.e.
#dataclasses.dataclass
class NgramModel:
context_options: dict[tuple, set[str]] = dataclasses.field(default_factory=dict)
...
You can observe what I mean about it being a mutable class member with, for instance...
if __name__ == '__main__':
ng = NgramModel(3, name="debug")
print(ng.context_options) # {}
ng.context_options[("foo", "bar")] = {"baz", "qux"}
print(ng.context_options) # {('foo', 'bar'): {'baz', 'qux'}}
ng2 = NgramModel(3, name="debug")
print(ng2.context_options) # {('foo', 'bar'): {'baz', 'qux'}}
I would expect a brand new ng2 to have the same context that the brand new ng had - empty (or whatever an appropriate default is).
This question already has answers here:
Getting the name of a variable as a string
(32 answers)
Closed 4 months ago.
Is it possible to get the original variable name of a variable passed to a function? E.g.
foobar = "foo"
def func(var):
print var.origname
So that:
func(foobar)
Returns:
>>foobar
EDIT:
All I was trying to do was make a function like:
def log(soup):
f = open(varname+'.html', 'w')
print >>f, soup.prettify()
f.close()
.. and have the function generate the filename from the name of the variable passed to it.
I suppose if it's not possible I'll just have to pass the variable and the variable's name as a string each time.
EDIT: To make it clear, I don't recommend using this AT ALL, it will break, it's a mess, it won't help you in any way, but it's doable for entertainment/education purposes.
You can hack around with the inspect module, I don't recommend that, but you can do it...
import inspect
def foo(a, f, b):
frame = inspect.currentframe()
frame = inspect.getouterframes(frame)[1]
string = inspect.getframeinfo(frame[0]).code_context[0].strip()
args = string[string.find('(') + 1:-1].split(',')
names = []
for i in args:
if i.find('=') != -1:
names.append(i.split('=')[1].strip())
else:
names.append(i)
print names
def main():
e = 1
c = 2
foo(e, 1000, b = c)
main()
Output:
['e', '1000', 'c']
To add to Michael Mrozek's answer, you can extract the exact parameters versus the full code by:
import re
import traceback
def func(var):
stack = traceback.extract_stack()
filename, lineno, function_name, code = stack[-2]
vars_name = re.compile(r'\((.*?)\).*$').search(code).groups()[0]
print vars_name
return
foobar = "foo"
func(foobar)
# PRINTS: foobar
Looks like Ivo beat me to inspect, but here's another implementation:
import inspect
def varName(var):
lcls = inspect.stack()[2][0].f_locals
for name in lcls:
if id(var) == id(lcls[name]):
return name
return None
def foo(x=None):
lcl='not me'
return varName(x)
def bar():
lcl = 'hi'
return foo(lcl)
bar()
# 'lcl'
Of course, it can be fooled:
def baz():
lcl = 'hi'
x='hi'
return foo(lcl)
baz()
# 'x'
Moral: don't do it.
Another way you can try if you know what the calling code will look like is to use traceback:
def func(var):
stack = traceback.extract_stack()
filename, lineno, function_name, code = stack[-2]
code will contain the line of code that was used to call func (in your example, it would be the string func(foobar)). You can parse that to pull out the argument
You can't. It's evaluated before being passed to the function. All you can do is pass it as a string.
#Ivo Wetzel's answer works in the case of function call are made in one line, like
e = 1 + 7
c = 3
foo(e, 100, b=c)
In case that function call is not in one line, like:
e = 1 + 7
c = 3
foo(e,
1000,
b = c)
below code works:
import inspect, ast
def foo(a, f, b):
frame = inspect.currentframe()
frame = inspect.getouterframes(frame)[1]
string = inspect.findsource(frame[0])[0]
nodes = ast.parse(''.join(string))
i_expr = -1
for (i, node) in enumerate(nodes.body):
if hasattr(node, 'value') and isinstance(node.value, ast.Call)
and hasattr(node.value.func, 'id') and node.value.func.id == 'foo' # Here goes name of the function:
i_expr = i
break
i_expr_next = min(i_expr + 1, len(nodes.body)-1)
lineno_start = nodes.body[i_expr].lineno
lineno_end = nodes.body[i_expr_next].lineno if i_expr_next != i_expr else len(string)
str_func_call = ''.join([i.strip() for i in string[lineno_start - 1: lineno_end]])
params = str_func_call[str_func_call.find('(') + 1:-1].split(',')
print(params)
You will get:
[u'e', u'1000', u'b = c']
But still, this might break.
You can use python-varname package
from varname import nameof
s = 'Hey!'
print (nameof(s))
Output:
s
Package below:
https://github.com/pwwang/python-varname
For posterity, here's some code I wrote for this task, in general I think there is a missing module in Python to give everyone nice and robust inspection of the caller environment. Similar to what rlang eval framework provides for R.
import re, inspect, ast
#Convoluted frame stack walk and source scrape to get what the calling statement to a function looked like.
#Specifically return the name of the variable passed as parameter found at position pos in the parameter list.
def _caller_param_name(pos):
#The parameter name to return
param = None
#Get the frame object for this function call
thisframe = inspect.currentframe()
try:
#Get the parent calling frames details
frames = inspect.getouterframes(thisframe)
#Function this function was just called from that we wish to find the calling parameter name for
function = frames[1][3]
#Get all the details of where the calling statement was
frame,filename,line_number,function_name,source,source_index = frames[2]
#Read in the source file in the parent calling frame upto where the call was made
with open(filename) as source_file:
head=[source_file.next() for x in xrange(line_number)]
source_file.close()
#Build all lines of the calling statement, this deals with when a function is called with parameters listed on each line
lines = []
#Compile a regex for matching the start of the function being called
regex = re.compile(r'\.?\s*%s\s*\(' % (function))
#Work backwards from the parent calling frame line number until we see the start of the calling statement (usually the same line!!!)
for line in reversed(head):
lines.append(line.strip())
if re.search(regex, line):
break
#Put the lines we have groked back into sourcefile order rather than reverse order
lines.reverse()
#Join all the lines that were part of the calling statement
call = "".join(lines)
#Grab the parameter list from the calling statement for the function we were called from
match = re.search('\.?\s*%s\s*\((.*)\)' % (function), call)
paramlist = match.group(1)
#If the function was called with no parameters raise an exception
if paramlist == "":
raise LookupError("Function called with no parameters.")
#Use the Python abstract syntax tree parser to create a parsed form of the function parameter list 'Name' nodes are variable names
parameter = ast.parse(paramlist).body[0].value
#If there were multiple parameters get the positional requested
if type(parameter).__name__ == 'Tuple':
#If we asked for a parameter outside of what was passed complain
if pos >= len(parameter.elts):
raise LookupError("The function call did not have a parameter at postion %s" % pos)
parameter = parameter.elts[pos]
#If there was only a single parameter and another was requested raise an exception
elif pos != 0:
raise LookupError("There was only a single calling parameter found. Parameter indices start at 0.")
#If the parameter was the name of a variable we can use it otherwise pass back None
if type(parameter).__name__ == 'Name':
param = parameter.id
finally:
#Remove the frame reference to prevent cyclic references screwing the garbage collector
del thisframe
#Return the parameter name we found
return param
If you want a Key Value Pair relationship, maybe using a Dictionary would be better?
...or if you're trying to create some auto-documentation from your code, perhaps something like Doxygen (http://www.doxygen.nl/) could do the job for you?
I wondered how IceCream solves this problem. So I looked into the source code and came up with the following (slightly simplified) solution. It might not be 100% bullet-proof (e.g. I dropped get_text_with_indentation and I assume exactly one function argument), but it works well for different test cases. It does not need to parse source code itself, so it should be more robust and simpler than previous solutions.
#!/usr/bin/env python3
import inspect
from executing import Source
def func(var):
callFrame = inspect.currentframe().f_back
callNode = Source.executing(callFrame).node
source = Source.for_frame(callFrame)
expression = source.asttokens().get_text(callNode.args[0])
print(expression, '=', var)
i = 1
f = 2.0
dct = {'key': 'value'}
obj = type('', (), {'value': 42})
func(i)
func(f)
func(s)
func(dct['key'])
func(obj.value)
Output:
i = 1
f = 2.0
s = string
dct['key'] = value
obj.value = 42
Update: If you want to move the "magic" into a separate function, you simply have to go one frame further back with an additional f_back.
def get_name_of_argument():
callFrame = inspect.currentframe().f_back.f_back
callNode = Source.executing(callFrame).node
source = Source.for_frame(callFrame)
return source.asttokens().get_text(callNode.args[0])
def func(var):
print(get_name_of_argument(), '=', var)
If you want to get the caller params as in #Matt Oates answer answer without using the source file (ie from Jupyter Notebook), this code (combined from #Aeon answer) will do the trick (at least in some simple cases):
def get_caller_params():
# get the frame object for this function call
thisframe = inspect.currentframe()
# get the parent calling frames details
frames = inspect.getouterframes(thisframe)
# frame 0 is the frame of this function
# frame 1 is the frame of the caller function (the one we want to inspect)
# frame 2 is the frame of the code that calls the caller
caller_function_name = frames[1][3]
code_that_calls_caller = inspect.findsource(frames[2][0])[0]
# parse code to get nodes of abstract syntact tree of the call
nodes = ast.parse(''.join(code_that_calls_caller))
# find the node that calls the function
i_expr = -1
for (i, node) in enumerate(nodes.body):
if _node_is_our_function_call(node, caller_function_name):
i_expr = i
break
# line with the call start
idx_start = nodes.body[i_expr].lineno - 1
# line with the end of the call
if i_expr < len(nodes.body) - 1:
# next expression marks the end of the call
idx_end = nodes.body[i_expr + 1].lineno - 1
else:
# end of the source marks the end of the call
idx_end = len(code_that_calls_caller)
call_lines = code_that_calls_caller[idx_start:idx_end]
str_func_call = ''.join([line.strip() for line in call_lines])
str_call_params = str_func_call[str_func_call.find('(') + 1:-1]
params = [p.strip() for p in str_call_params.split(',')]
return params
def _node_is_our_function_call(node, our_function_name):
node_is_call = hasattr(node, 'value') and isinstance(node.value, ast.Call)
if not node_is_call:
return False
function_name_correct = hasattr(node.value.func, 'id') and node.value.func.id == our_function_name
return function_name_correct
You can then run it as this:
def test(*par_values):
par_names = get_caller_params()
for name, val in zip(par_names, par_values):
print(name, val)
a = 1
b = 2
string = 'text'
test(a, b,
string
)
to get the desired output:
a 1
b 2
string text
Since you can have multiple variables with the same content, instead of passing the variable (content), it might be safer (and will be simpler) to pass it's name in a string and get the variable content from the locals dictionary in the callers stack frame. :
def displayvar(name):
import sys
return name+" = "+repr(sys._getframe(1).f_locals[name])
If it just so happens that the variable is a callable (function), it will have a __name__ property.
E.g. a wrapper to log the execution time of a function:
def time_it(func, *args, **kwargs):
start = perf_counter()
result = func(*args, **kwargs)
duration = perf_counter() - start
print(f'{func.__name__} ran in {duration * 1000}ms')
return result
So I am working on a script to generate serialnumbers for a product. I want to make a txt file where I the script prints the generated key. somehow it cant print in there but I don't know what I need to changes about it.
key = Key('aaaa-bbbb-cccc-dddd-1111')
fh = open('key.txt')
fh.write(Key)
Ok, based on your response, I've mocked up the Key class as follows. Without more information, it's not possible to give you a definitive answer, but hopefully this helps!
class Key:
def __init__(self, serial):
self.serial = serial
def process_serial(self):
# Your processing here
...
return processed_serial # This should be a string
Then to write to file, you can do:
key = Key('aaaa-bbbb-cccc-dddd-1111')
with open('key.txt', 'w') as f:
f.write(key.process_serial())
Alternatively, you can add a __str__ method to your class, which will specify what happens when you call the Python builtin str on your object.
class Key:
def __init__(self, serial):
self.serial = serial
def __str__(self):
out = ... # construct what you want to write to file
return out
Giving:
key = Key('aaaa-bbbb-cccc-dddd-1111')
with open('key.txt', 'w') as f:
f.write(str(key))
You might also consider adding this as a method to your Key class
class Key:
__init__(self, serial):
self.serial = serial
def process_serial(self):
# Your processing here
...
return processed_serial # This should be a string
def write(self, file_name):
with open(file_name, 'w') as f:
f.write(self.process_serial)
Try:
key = "Key('aaaa-bbbb-cccc-dddd-1111')"
fh = open('key.txt', "w")
fh.write(key)
To generate a text file that doesn't already exist you need to use "w" .
Try doing:
key = Key('aaaa-bbbb-cccc-dddd-1111')
with open('key.txt', 'w') as fh:
fh.write(key)
Hope that Helps!
Note: it must be in the with ... so it writes, if its not there the file is considered as closed.
Let's say I have a class like so:
class Shell:
def cat(self, file):
try:
with open(file, 'r') as f:
print f.read()
except IOError:
raise IOError('invalid file location: {}'.format(f))
def echo(self, message):
print message
def ls(self, path):
print os.listdir(path)
In a javascript context, you might be able to do something like "Class"[method_name](), depending on how things were structured. I am looking for something similar in python to make this a "simulated operating system". EG:
import os
def runShell(user_name):
user_input = None
shell = Shell()
while(user_input != 'exit' or user_input != 'quit'):
user_input = raw_input('$'+ user_name + ': ')
...
now, the idea is they can type in something like this...
$crow: cat ../my_text
... and behind the scenes, we get this:
shell.cat('../my_text')
Similarly, I would like to be able to print all method definitions that exist within that class when they type help. EG:
$crow: help\n
> cat (file)
> echo (message)
> ls (path)
is such a thing achievable in python?
You can use the built-in function vars to expose all the members of an object. That's maybe the simplest way to list those for your users. If you're only planning to print to stdout, you could also just call help(shell), which will print your class members along with docstrings and so on. help is really only intended for the interactive interpreter, though, so you'd likely be better off writing your own help-outputter using vars and the __doc__ attribute that's magically added to objects with docstrings. For example:
class Shell(object):
def m(self):
'''Docstring of C#m.'''
return 1
def t(self, a):
'''Docstring of C#t'''
return 2
for name, obj in dict(vars(Shell)).items():
if not name.startswith('__'): #filter builtins
print(name, '::', obj.__doc__)
To pick out and execute a particular method of your object, you can use getattr, which grabs an attribute (if it exists) from an object, by name. For example, to select and run a simple function with no arguments:
fname = raw_input()
if hasattr(shell, fname):
func = getattr(shell, fname)
result = func()
else:
print('That function is not defined.')
Of course you could first tokenize the user input to pass arguments to your function as needed, like for your cat example:
user_input = raw_input().split() # tokenize
fname, *args = user_input #This use of *args syntax is not available prior to Py3
if hasattr(shell, fname):
func = getattr(shell, fname)
result = func(*args) #The *args syntax here is available back to at least 2.6
else:
print('That function is not defined.')
def Stats(self):
Stats = {}
for i in self.ExpTable:
Stats[i] = self.GetLvl(i)
for i in Stats:
print i + ":" + str(Stats[i])
I need to be able to use ExpTable as a variable when defining as i need to use other dict's later on all in format of {"String":Integer}
def Stats(self, {ExpTable}):
pass
something like this which works using it as a dictionary but being able to change dictonaries
If I understand correctly, you want the Stats method to take a dictionary as an argument, but to use the object's ExpTable attribute if none is provided.
def Stats(self, d=None):
if d is None:
d = self.ExpTable
new_stats = {}
for i in d:
new_stats[i] = self.GetLvl(i)
for i in new_stats:
print i + ":" + str(new_stats[i])