Calling a python script within another python script with args - python

Current Implementation which needs optimization
import subprocess
childprocess = subprocess.Popen(
['python',
'/full_path_to_directory/called_script.py',
'arg1',
'arg2'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
returnVal = childprocess.communicate()[0]
print(retVal)
Is this a correct way to call another script(called_script.py) within the current working directory?
Is there a better way to call the other script? I used import script but it gives me below error
called_script.py
def func(arg1, arg2, arg3):
#doSomething
#sys.out.write(returnVal)
if __name__ == "__main__":
func(arg1, arg2, arg3)
Implementation 2 (throws exception and errored out)
caller_script.py
Both of them are under the same path (i.e. /home/bin)
import called_script
returnVal = called_script.func(arg1,arg2,arg3)
print(returnVal)
Output:
nullNone
Traceback (most recent call last):
File "/path_to_caller/caller_script.py", line 89, in <module>
l.simple_bind_s(binddn, pw)
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 206, in simple_bind_s
msgid = self.simple_bind(who,cred,serverctrls,clientctrls)
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 200, in simple_bind
return self._ldap_call(self._l.simple_bind,who,cred,EncodeControlTuples(serverctrls),EncodeControlTuples(clientctrls))
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 96, in _ldap_call
result = func(*args,**kwargs)
TypeError: argument 2 must be string or read-only buffer, not None
Another alternative I used and gave me an error is
Implementation 3(throws exception and errors out)
caller_script.py
import ldap
returnVal = subprocess.call(['python','called_script.py','arg1','arg2'])
print(returnVal)
l = ldap.initialize(cp.get('some_config_ref','some_url'))
try:
l.protocol_version = ldap.VERSION3
l.simple_bind_s(binddn, returnVal)
except ldap.INVALID_CREDENTIALS:
sys.stderr.write("Your username or password is incorrect.")
sys.exit(1)
except ldap.LDAPError, e:
if type(e.message) == dict and e.message.has_key('xyz'):
sys.stderr.write(e.message['xyz'])
else:
sys.stderr.write(e)
sys.exit(1)
Output:
returnVal0Traceback (most recent call last):
File "./path_to_script/caller_script.py", line 88, in <module>
l.simple_bind_s(binddn, pw)
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 206, in simple_bind_s
msgid = self.simple_bind(who,cred,serverctrls,clientctrls)
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 200, in simple_bind
return self._ldap_call(self._l.simple_bind,who,cred,EncodeControlTuples(serverctrls),EncodeControlTuples(clientctrls))
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 96, in _ldap_call
result = func(*args,**kwargs)
TypeError: argument 2 must be string or read-only buffer, not int

Here is an example where you are calling a function from another file, you pass one value, a list, which can have an arbitrary amount of numbers, and you get the sum. Make sure they are in the same directory or you will need the path. The function in your example "script.py" does not allow you to pass a value.
called_script.py
def add_many(list_add):
the_sum = sum(list_add)
return the_sum
caller_script.py
import called_script
a_list = [1, 2, 3, 4]
the_sum = called_script.add_many(a_list)
print(the_sum)

Related

How to get the input from the subprocess?

I have a python program will will be called by another python program via a subprocess:
The python program is like this:
import argparse
import sys
from base64 import b64decode
import json
def set_key(args, **kwargs):
path = "/tmp/key"
print(kwargs)
try:
with open(path, 'wb') as open_file:
b = b64decode(kwargs)
open_file.write(b)
return Result(True)
except OSError as e:
return Result(False)
return Result(True)
commands_map = {
"set-key": set_key,
}
def main(sys_argv=None):
parser = argparse.ArgumentParser(
description="a util command"
)
parser.add_argument("-N", dest="check_testing", action="store_true",
help="check testing")
parser.add_argument("cmd", choices=sorted(list(commands_map)))
args, remaining = parser.parse_known_args(sys_argv)
# Not entirely sure there's much benefit to this check.
if args.check_testing:
return 1 if args.testing else 0
result, vals = commands_map[args.cmd](
remaining, testing=args.testing,
check_testing=args.check_testing)
print(json.dumps(vals, indent=2, sort_keys=True))
return 0 if result else 1
if __name__ == "__main__":
sys.exit(main())
And the caller program is:
import subprocess
input = b'{\n "keys": "l0Pu3TlknxWqTZwDG1yJcjUDGcBH7c8F19fkxeNmBl/2wXQoochlbxLTKhgkzeQNRDvFkQfMBcdlsbcxrrEQX+IydyiLkU5o8Gmhe2JGP56CNPLIefl9WPvLlPQBdvjEWO2UBaBjo2VW3Xsd1Ng+xFSUbP/ls7dso+h5/Ty37Rw="\n}'
cmda = ['python3', 'test.py', 'set-key']
try:
s = subprocess.run(cmda, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, input=input)
except subprocess.CalledProcessError as e:
print(e.stderr)
When I try to execute the call program, it complains the subprocess program kwargs is a dict.
b'Traceback (most recent call last):\n File "test.py", line 46, in <module>\n sys.exit(main())\n File "test.py", line 40, in main\n check_testing=args.check_testing)\n File "test.py", line 12, in set_key\n b = b64decode(kwargs)\n File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/base64.py", line 80, in b64decode\n s = _bytes_from_decode_data(s)\n File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/base64.py", line 46, in _bytes_from_decode_data\n "string, not %r" % s.__class__.__name__) from None\nTypeError: argument should be a bytes-like object or ASCII string, not \'dict\'\n'
How can I properly pass the input to the subprocess program so that I can write to a file?
Also, the args and kwargs seems to be empty (I expect it should be taken from the input call from subprocess). Anyone know why they are empty?

Multiprocessing map unorderable?

I have an operation using a dictionnary that I want to parallelize, but multiprocessing.map is causing me headaches
def dict_segmentor(dictionnary,n_processes):
print("len dictionnary")
print(len(dictionnary))
if len(dictionnary) < n_processes:
seg_size = len(dictionnary)
else:
seg_size = len(dictionnary) // n_processes
print("segmenting dictionnary")
print("seg_size "+str(seg_size))
print("len(dictionnary) "+str(len(dictionnary)))
itemlist=list(dictionnary.items())
seg_ranges = [dict(itemlist[s:s+seg_size]) for s in range(1, len(dictionnary)+1, seg_size)]
print("finished")
return seg_ranges
def multiprocess_calc(n_processes, dictionnary,struc):
dictionnary=dictionnary
struc=struc
seg_ranges1 = dict_segmentor(dictionnary,n_processes)
#this is invoked to break the dict to be passed into dicts into a list. Works as expected
print("seg_range_check")
print("seg_ranges1 {}".format(type(seg_ranges1)))#Returns a dict as expected
print("seg_ranges1 {}".format(type(seg_ranges1[0])))#Returns a list as expected
print("seg_ranges1 {}".format(len(seg_ranges1))) #Returns expected len=1
print("seg_ranges1 {}".format(len(seg_ranges1[0]))) #Returns expected len
processes = multiprocessing.Pool(n_processes)
print("Mapping Building")
processes.map(Builder, seg_ranges1,1)
def main():
file_multiprocess = 'pref_multiprocess.csv'
n_CPUs = multiprocessing.cpu_count()
n_processes = n_CPUs-1
print("\nNumber of CPUs detected:", n_CPUs)
multiprocess_calc(n_processes, file_multiprocess,struc)
if __name__ == '__main__':
main()
Here is complete Traceback:
Traceback (most recent call last):
File "<ipython-input-37-d0279721826c>", line 1, in <module>
runfile('Pyscript.py', wdir='C:/Python Scripts')
File "C:\Anaconda3\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 714, in runfile
execfile(filename, namespace)
File "C:\Anaconda3\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 89, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "Pyscript.py", line 1033, in <module>
main()
File "Pyscript.py", line 1025, in main
multiprocess_calc(n_processes, dictionnary,struc)
File "Pyscript.py", line 911, in multiprocess_calc
processes.map(Builder, seg_ranges1,1)
File "C:\Anaconda3\lib\multiprocessing\pool.py", line 260, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "C:\Anaconda3\lib\multiprocessing\pool.py", line 608, in get
raise self._value
TypeError: unorderable types: list() > int()
I don't understand, even by reading this carefully (https://docs.python.org/3.5/library/multiprocessing.html#module-multiprocessing).
Each chunk is a dict, so it should be sent through map to the builder.
But instead I get that stupid error and the traceback doesn't help. I looked into the code of pool.py but no luck,
and my builder is not involved since its first operation (a control printing) is not even displayed. The builder function seems to be totally ignored (and there is not even a syntax error)
So I conclude this is a problem with map.
In case I would have misunderstood the multiprocessing.map function and that it would first make a chunk, then iterate over it to apply map on each sub sub element, what function of multiprocessing could I use? Apply use only one thread. That would mean I should do this manually?
Please feel free to correct my code and give me some insights. Thanks by advance
Edit: here is the builder function:
def Builder(dictionary,struc=struc):
#def Builder(keys, dictionary=dictionary,struc=struc): #Alternative
#Note, I even tried to use only the keys, passing the dictionary from a global variable but it didn't work
print("Building Tree") #Not even displayed
print("type dictionary"+str(type(dictionary)))
frags=0
try:
if True:
print("Building")
#for id in keys: #Alternative
for id in dictionary:
seq=dictionary[id]
for i in range(3):
frags+=1
if len(seq)<=3:
break
seq=seq[i:-i]
struc.append(seq)
print("Number of frags found {}".format(frags))
except TypeError as e:
print (e)
print ("error in Builder")

Class variable in multiprocessing - python

Here is my code:
import multiprocessing
import dill
class Some_class():
class_var = 'Foo'
def __init__(self, param):
self.name = param
def print_name(self):
print("we are in object "+self.name)
print(Some_class.class_var)
def run_dill_encoded(what):
fun, args = dill.loads(what)
return fun(*args)
def apply_async(pool, fun, args):
return pool.apply_async(run_dill_encoded, (dill.dumps((fun, args)),))
if __name__ == '__main__':
list_names = [Some_class('object_1'), Some_class('object_2')]
pool = multiprocessing.Pool(processes=4)
results = [apply_async(pool, Some_class.print_name, args=(x,)) for x in list_names]
output = [p.get() for p in results]
print(output)
It returns error:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Python34\lib\multiprocessing\pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "C:\...\temp_obj_output_standard.py", line 18, in run_dill_encoded
return fun(*args)
File "C:/...temp_obj_output_standard.py", line 14, in print_name
print(Some_class.class_var)
NameError: name 'Some_class' is not defined
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/...temp_obj_output_standard.py", line 31, in <module>
output = [p.get() for p in results]
File "C:/...temp_obj_output_standard.py", line 31, in <listcomp>
output = [p.get() for p in results]
File "C:\Python34\lib\multiprocessing\pool.py", line 599, in get
raise self._value
NameError: name 'Some_class' is not defined
Process finished with exit code 1
The code works fine without line print(Some_class.class_var). What is wrong with accessing class variables, both objects should have it and I don't think processes should conflict about it. Am I missing something?
Any suggestions on how to troubleshoot it? Do not worry about run_dill_encoded and
apply_async, I am using this solution until I compile multiprocess on Python 3.x.
P.S. This is already enough, but stackoverflow wants me to put more details, not really sure what to put.

mapper_pre_filter in MRJob

I have been trying to tweek the mapper_pre_filter example given here. Now, if instead of specifying the command directly in steps, if I'm writing a method to return that command, like this:
from mrjob.job import MRJob
from mrjob.protocol import JSONValueProtocol
class KittiesJob(MRJob):
OUTPUT_PROTOCOL = JSONValueProtocol
def filter_input(self):
return ''' grep 'kitty' '''
def test_for_kitty(self, _, value):
yield None, 0 # make sure we have some output
if 'kitty' in value:
yield None, 1
def sum_missing_kitties(self, _, values):
yield None, sum(values)
def steps(self):
return [
self.mr(mapper_pre_filter=self.filter_input,
mapper=self.test_for_kitty,
reducer=self.sum_missing_kitties)]
if __name__ == '__main__':
KittiesJob().run()
I'm getting the following exception:
Exception: error getting step information:
Traceback (most recent call last):
File "/Users/sverma/work/mrjob/filter_input.py", line 30, in <module>
KittiesJob().run()
File "/Library/Python/2.7/site-packages/mrjob/job.py", line 494, in run
mr_job.execute()
File "/Library/Python/2.7/site-packages/mrjob/job.py", line 500, in execute
self.show_steps()
File "/Library/Python/2.7/site-packages/mrjob/job.py", line 677, in show_steps
print >> self.stdout, json.dumps(self._steps_desc())
File "/Library/Python/2.7/site-packages/simplejson/__init__.py", line 370, in dumps
return _default_encoder.encode(obj)
File "/Library/Python/2.7/site-packages/simplejson/encoder.py", line 269, in encode
chunks = self.iterencode(o, _one_shot=True)
File "/Library/Python/2.7/site-packages/simplejson/encoder.py", line 348, in iterencode
return _iterencode(o, 0)
File "/Library/Python/2.7/site-packages/simplejson/encoder.py", line 246, in default
raise TypeError(repr(o) + " is not JSON serializable")
TypeError: <bound method KittiesJob.filter_input of <__main__.KittiesJob object at 0x10449ac90>> is not JSON serializable
Can someone please explain what I'm doing wrong ?
Wow, that's a late answere. I think you want to change this:
mapper_pre_filter=self.filter_input, to
mapper_pre_filter=self.filter_input(),.
From the example mapper_pre_filter is expected to be a string, not a function. Maybe it'll help somebody in the future.
The stack trace says that the output of the filter is not JSON serializable, because it's probably empty.

Serializing twisted.protocols.amp.AmpList for testing

I have a command as follows:
class AddChatMessages(Command):
arguments = [
('messages', AmpList([('message', Unicode()), ('type', Integer())]))]
And I have a responder for it in a controller:
def add_chat_messages(self, messages):
for i, m in enumerate(messages):
messages[i] = (m['message'], m['type'])
self.main.add_chat_messages(messages)
return {}
commands.AddChatMessages.responder(add_chat_messages)
I am writing a unit test for it. This is my code:
class AddChatMessagesTest(ProtocolTestMixin, unittest.TestCase):
command = commands.AddChatMessages
data = {'messages': [{'message': 'hi', 'type': 'None'}]}
def assert_callback(self, unused):
pass
Where ProtocolMixin is as follows:
class ProtocolTestMixin(object):
def setUp(self):
self.protocol = client.CommandProtocol()
def assert_callback(self, unused):
raise NotImplementedError("Has to be implemented!")
def test_responder(self):
responder = self.protocol.lookupFunction(
self.command.commandName)
d = responder(self.data)
d.addCallback(self.assert_callback)
return d
It works if AmpList is not involved, but when it is - I get following error:
======================================================================
ERROR: test_responder
----------------------------------------------------------------------
Traceback (most recent call last):
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/internet/defer.py", line 139, in maybeDeferred
result = f(*args, **kw)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/internet/utils.py", line 203, in runWithWarningsSuppressed
reraise(exc_info[1], exc_info[2])
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/internet/utils.py", line 199, in runWithWarningsSuppressed
result = f(*a, **kw)
File "/Users/<username>/Projects/space/tests/client_test.py", line 32, in test_responder
d = responder(self.data)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 1016, in doit
kw = command.parseArguments(box, self)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 1717, in parseArguments
return _stringsToObjects(box, cls.arguments, protocol)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 2510, in _stringsToObjects
argparser.fromBox(argname, myStrings, objects, proto)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 1209, in fromBox
objects[nk] = self.fromStringProto(st, proto)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 1465, in fromStringProto
boxes = parseString(inString)
File "/Users/<username>/Projects/space/env/lib/python2.7/site-packages/twisted/protocols/amp.py", line 2485, in parseString
return cls.parse(StringIO(data))
TypeError: must be string or buffer, not list
Which makes sense, but how do I serialize a list in AddChatMessagesTest.data?
The responder expects to be called with a serialized box. It will then deserialize it, dispatch the objects to application code, take the object the application code returns, serialize it, and then return that serialized form.
For a few AMP types. most notably String, the serialized form is the same as the deserialized form, so it's easy to overlook this.
I think that you'll want to pass your data through Command.makeArguments in order to produce an object suitable to pass to a responder.
For example:
>>> from twisted.protocols.amp import Command, Integer
>>> class Foo(Command):
... arguments = [("bar", Integer())]
...
>>> Foo.makeArguments({"bar": 17}, None)
AmpBox({'bar': '17'})
>>>
If you do this with a Command that uses AmpList I think you'll find makeArguments returns an encoded string for the value of that argument and that the responder is happy to accept and parse that kind of string.

Categories