I'm using pprint to nicely print a dict and it's working fine. Now I switch to using an OrderedDict from module collections. Unfortunately, the pprint routing does not seem to recognize that such objects are more or less dicts as well and falls back to printing that as a long line.
>>> d = { i:'*'*i for i in range(8) }
>>> pprint.pprint(d)
{0: '',
1: '*',
2: '**',
3: '***',
4: '****',
5: '*****',
6: '******',
7: '*******'}
>>> pprint.pprint(collections.OrderedDict(d))
OrderedDict([(0, ''), (1, '*'), (2, '**'), (3, '***'), (4, '****'), (5, '*****'), (6, '******'), (7, '*******')])
Any way to get a nicer representation of OrderedDicts as well? Maybe even if they are nested inside a normal dict or list?
I found a relatively simple solution for this, but it includes the risk of making the output for your ordered dictionary appear exactly as if it were a regular dict object.
The original solution for using a context manager to prevent pprint from sorting dictionary keys comes from this answer.
#contextlib.contextmanager
def pprint_OrderedDict():
pp_orig = pprint._sorted
od_orig = OrderedDict.__repr__
try:
pprint._sorted = lambda x:x
OrderedDict.__repr__ = dict.__repr__
yield
finally:
pprint._sorted = pp_orig
OrderedDict.__repr__ = od_orig
(You could also just patch the OrderedDict.__repr__ method with dict.__repr__, but please don't.)
Example:
>>> foo = [('Roger', 'Owner'), ('Diane', 'Manager'), ('Bob', 'Manager'),
... ('Ian', 'Associate'), ('Bill', 'Associate'), ('Melinda', 'Associate')]
>>> d = OrderedDict(foo)
>>> pprint.pprint(d)
OrderedDict([('Roger', 'Owner'), ('Diane', 'Manager'), ('Bob', 'Manager'), ('Ian', 'Associate'), ('Bill', 'Associate'), ('Melinda', 'Associate')])
>>> pprint.pprint(dict(d))
{'Bill': 'Associate',
'Bob': 'Manager',
'Diane': 'Manager',
'Ian': 'Associate',
'Melinda': 'Associate',
'Roger': 'Owner'}
>>> with pprint_OrderedDict():
... pprint.pprint(d)
...
{'Roger': 'Owner',
'Diane': 'Manager',
'Bob': 'Manager',
'Ian': 'Associate',
'Bill': 'Associate',
'Melinda': 'Associate'}
Try this on:
d = collections.OrderedDict({ i:'*'*i for i in range(8) })
EDIT
pprint.pprint(list(d.items()))
If you are specifically targeting CPython* 3.6 or later, then you can just use regular dictionaries instead of OrderedDict. You'll miss out on a few methods exclusive to OrderedDict, and this is not (yet) guaranteed to be portable to other Python implementations,** but it is probably the simplest way to accomplish what you are trying to do.
* CPython is the reference implementation of Python which may be downloaded from python.org.
** CPython stole this idea from PyPy, so you can probably depend on it working there too.
I realize this is sort of necroposting, but I thought I'd post what I use. Its main virtue is that its aoutput can be read back into python, thus allowing, for instance, to shutlle between representations (which I use, for instance, on JSON files). Of course it breaks pprint encapsulation, by ripping some code off its inner _format function.
#!/bin/env python
from __future__ import print_function
import pprint;
from collections import OrderedDict
import json
import sys
class MyPP (pprint.PrettyPrinter):
def _format(self, object, stream, indent, allowance, context, level):
if not isinstance(object, OrderedDict) :
return pprint.PrettyPrinter._format(self, object, stream, indent, allowance, context, level)
level = level + 1
objid = id(object)
if objid in context:
stream.write(_recursion(object))
self._recursive = True
self._readable = False
return
write = stream.write
_len=len
rep = self._repr(object, context, level - 1)
typ = type(object)
sepLines = _len(rep) > (self._width - 1 - indent - allowance)
if self._depth and level > self._depth:
write(rep)
return
write('OrderedDict([\n%s'%(' '*(indent+1),))
if self._indent_per_level > 1:
write((self._indent_per_level - 1) * ' ')
length = _len(object)
#import pdb; pdb.set_trace()
if length:
context[objid] = 1
indent = indent + self._indent_per_level
items = object.items()
key, ent = items[0]
rep = self._repr(key, context, level)
write('( ')
write(rep)
write(', ')
self._format(ent, stream, indent + _len(rep) + 2,
allowance + 1, context, level)
write(' )')
if length > 1:
for key, ent in items[1:]:
rep = self._repr(key, context, level)
if sepLines:
write(',\n%s( %s , ' % (' '*indent, rep))
else:
write(', ( %s , ' % rep)
self._format(ent, stream, indent + _len(rep) + 2,
allowance + 1, context, level)
write(' )')
indent = indent - self._indent_per_level
del context[objid]
write('])')
return
pp = MyPP(indent=1)
handle=open(sys.argv[1],"r")
values=json.loads(handle.read(),object_pairs_hook=OrderedDict)
pp.pprint(values)
Related
We use the https://ltb-project.org/documentation/openldap-noopsrch.html overlay on openldap.
It gives you the number of entries in each catalog without having to browse all.
example show -e '!1.3.6.1.4.1.4203.666.5.18' controltype to ldapsearch:
ldapsearch -x -H 'ldap://localhost:389' -D 'cn=Manager,dc=my-domain,dc=com'
-w secret -b 'dc=my-domain,dc=com' \
'(objectClass=*)' -e '!1.3.6.1.4.1.4203.666.5.18'
I use the python3 ldap3: https://ldap3.readthedocs.io/en/latest/searches.html
Any tips/examples on how to implement this?
Thanks to #EricLavault answer I managed to fix this:
c.search(base, filter, scope, controls=[
build_control(oid='1.3.6.1.4.1.4203.666.5.18',
criticality=True,
value=None)
])
c.result then holds a controls dict:
{
'result': 0, 'description': 'success', 'dn': '',
'message': '', 'referrals': None, 'type': 'searchResDone',
'controls': {
'1.3.6.1.4.1.4203.666.5.18': {'description': '',
'criticality': False,
'value': b'0\x0b\x02\x01\x00\x02\x03\x01\xf0\xac\x02\x01\x00'
}
}
The format of value is described here:
https://ltb-project.org/documentation/openldap-noopsrch.html#usage
>>> v = b'0\x0b\x02\x01\x00\x02\x03\x01\xf0\xac\x02\x01\x00'
>>> vh = hex(int.from_bytes(v,'big'))
>>> vhl = [f"0x{vh[i:i+2]}" for i in range(2, len(vh), 2)]
>>> vhl
['0x30', '0x0b', '0x02', '0x01', '0x00', '0x02', '0x03', '0x01', '0xf0', '0xac', '0x02', '0x01', '0x00']
# org count length is the 7th hex from msb in vhl (it can have another position if the response have any kind of error)
>>> orglen = int(vhl[6], 16)
>>> orgcount = vhl[7:7+orglen]
>>> orgcount
['0x01', '0xf0', '0xac']
>>> c = '0x'
# merge orgcount hex
>>> for o in orgcount:
... c += f"{int(o, 16):02x}"
...
>>> c = int(c, 16) # convert back to dec
>>> c
127148
Checked by counting the objects returned given the same base,scope and filter, only it took 27sec to parse while this took 0.24sec
This is my code:
I'm trying to use the following code to insert data into an array of dictionaries but unable to insert properly.
Code:
test_list = {'module_serial-1': 'PSUXA12345680', 'module_name-1': 'CH1.FM5', 'module_name-2': 'CH1.FM6', 'module_serial-2': 'PSUXA12345681'}
def parse_subdevice_modules(row):
modules = []
module = {}
for k, v in row.items():
if v:
if re.match("module_name", k):
module['name'] = v
if re.match("module_serial", k):
module['serial'] = v
modules.append(module)
module = {}
return modules
print(parse_subdevice_modules(test_list))
Expected output:
[{'name':'CH1.FM5', serial': 'PSUXA12345680'}, {'name': 'CH1.FM6', 'serial': 'PSUXA12345681'}]
Actual output:
['serial': 'PSUXA12345680'}, {'name': 'CH1.FM6', 'serial': 'PSUXA12345681'}]
Run it here: https://repl.it/repls/WetSteelblueRange
Please note that the order of the data test_list cannot be altered as it comes via an external API so I used regex. Any ideas would be appreciated.
Your code relies on the wrong assumption that keys are ordered and that the serial will always follow the name. The proper solution here is to use a dict (actually a collections.defaultdict to make things easier) to collect and regroup the values you're interested in based on the module number (the final '-N' in the key). Note that you don't need regexps here - Python string already provide the necessary operations for this task:
from collections import defaultdict
def parse_subdevice_modules(row):
modules = defaultdict(dict)
for k, v in row.items():
# first get rid of what we're not interested in
if not v:
continue
if not k.startswith("module_"):
continue
# retrieve the key number (last char) with
# negative string indexing:
key_num = k[-1]
# retrieve the useful part of the key ("name" or "serial")
# by splitting the string:
key_name = k.split("_")[1].split("-")[0]
# and now we just have to store this in our defaultdict
modules[key_num][key_name] = v
# and return only the values.
# NB: in py2.x you don't need the call to `list`,
# you can just return `modules.values()` directly
modules = list(modules.values())
return modules
test_list = {
'profile': '', 'chassis_name': '123', 'supplier_order_num': '',
'device_type': 'mass_storage', 'device_subtype': 'flashblade',
'module_serial-1': 'PSUXA12345680', 'module_name-1': 'CH1.FM5',
'module_name-2': 'CH1.FM6', 'rack_total_pos': '',
'asset_tag': '002000027493', 'module_serial-2': 'PSUXA12345681',
'purchase_order': '0004530869', 'build': 'Test_Build_for_SNOW',
'po_line_num': '00190', 'mac_address': '', 'position': '7',
'model': 'FB-528TB-10X52.8TB', 'manufacturer': 'PureStorage',
'rack': 'Test_Rack_2', 'serial': 'PMPAM1842147D', 'name': 'FB02'
}
print(parse_subdevice_modules(test_list))
You can do somthing like this also.
test_list = {'module_serial-1': 'PSUXA12345680', 'module_name-1': 'CH1.FM5', 'module_name-2': 'CH1.FM6',
'module_serial-2': 'PSUXA12345681'}
def parse_subdevice_modules(row):
modules_list = []
for key, value in row.items():
if not value or key.startswith('module_name'):
continue
if key.startswith('module_serial'):
module_name_key = f'module_name-{key.split("-")[-1]}'
modules_list.append({'serial': value, 'name': row[module_name_key]})
return modules_list
print(parse_subdevice_modules(test_list))
Output:
[{'serial': 'PSUXA12345680', 'name': 'CH1.FM5'}, {'serial': 'PSUXA12345681', 'name': 'CH1.FM6'}]
You would need to check if module contains 2 elements and append it to modules:
test_list = {'module_serial-1': 'PSUXA12345680', 'module_name-1': 'CH1.FM5', 'module_name-2': 'CH1.FM6', 'module_serial-2': 'PSUXA12345681'}
def parse_subdevice_modules(row):
modules = []
module = {}
for k, v in row.items():
if v:
if k.startswith('module_name'):
module['name'] = v
elif k.startswith("module_serial"):
module['serial'] = v
if len(module) == 2:
modules.append(module)
module = {}
return modules
print(parse_subdevice_modules(test_list))
Returns:
[{'serial': 'PSUXA12345680'}, {'name': 'CH1.FM5'}, {'name': 'CH1.FM6'}, {'serial': 'PSUXA12345681'}]
I have the following code:
import os
import pprint
file_path = input("Please, enter the path to the file: ")
if os.path.exists(file_path):
worker_dict = {}
k = 1
for line in open(file_path,'r'):
split_line = line.split()
worker = 'worker{}'.format(k)
worker_name = '{}_{}'.format(worker, 'name')
worker_yob = '{}_{}'.format(worker, 'yob')
worker_job = '{}_{}'.format(worker, 'job')
worker_salary = '{}_{}'.format(worker, 'salary')
worker_dict[worker_name] = ' '.join(split_line[0:2])
worker_dict[worker_yob] = ' '.join(split_line[2:3])
worker_dict[worker_job] = ' '.join(split_line[3:4])
worker_dict[worker_salary] = ' '.join(split_line[4:5])
k += 1
else:
print('Error: Invalid file path')
File:
John Snow 1967 CEO 3400$
Adam Brown 1954 engineer 1200$
Output from worker_dict:
{
'worker1_job': 'CEO',
'worker1_name': 'John Snow',
'worker1_salary': '3400$',
'worker1_yob': '1967',
'worker2_job': 'engineer',
'worker2_name': 'Adam Brown',
'worker2_salary': '1200$',
'worker2_yob': '1954',
}
And I want to sort data by worker name and after that by salary. So my idea was to create a separate list with salaries and worker names to sort. But I have problems with filling it, maybe there is a more elegant way to solve my problem?
import os
import pprint
file_path = input("Please, enter the path to the file: ")
if os.path.exists(file_path):
worker_dict = {}
k = 1
with open(file_path,'r') as file:
content=file.read().splitlines()
res=[]
for i in content:
val = i.split()
name = [" ".join([val[0],val[1]]),]#concatenate first name and last name
i=name+val[2:] #prepend name
res.append(i) #append modified value to new list
res.sort(key=lambda x: x[3])#sort by salary
print res
res.sort(key=lambda x: x[0])#sort by name
print res
Output:
[['Adam Brown', '1954', 'engineer', '1200$'], ['John Snow', '1967', 'CEO', '3400$']]
[['Adam Brown', '1954', 'engineer', '1200$'], ['John Snow', '1967', 'CEO', '3400$']]
d = {
'worker1_job': 'CEO',
'worker1_name': 'John Snow',
'worker1_salary': '3400$',
'worker1_yob': '1967',
'worker2_job': 'engineer',
'worker2_name': 'Adam Brown',
'worker2_salary': '1200$',
'worker2_yob': '1954',
}
from itertools import zip_longest
#re-group:
def grouper(iterable, n, fillvalue=None):
"Collect data into fixed-length chunks or blocks"
# grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return zip_longest(*args, fillvalue=fillvalue)
#re-order:
res = []
for group in list(grouper(d.values(), 4)):
reorder = [1,2,0,3]
res.append([ group[i] for i in reorder])
#sort:
res.sort(key=lambda x: (x[1], x[2]))
output:
[['Adam Brown', '1200$', 'engineer', '1954'],
['John Snow', '3400$', 'CEO', '1967']]
Grouper is defined and explained in itertools. I've grouped your dictionary by records pertaining to each worker, returned it as a reordered list of lists. As lists, I sort them by the name and salary. This is solution is modular: it distinctly groups, re-orders and sorts.
I recommend to store the workers in a different format, for example .csv, then you could use csv.DictReader and put it into a list of dictionaries (this would also allow you to use jobs, names, etc. with more words like "tomb raider").
Note that you have to convert the year of birth and salary to ints or floats to sort them correctly, otherwise they would get sorted lexicographically as in a real world dictionary (book) because they are strings, e.g.:
>>> sorted(['100', '11', '1001'])
['100', '1001', '11']
To sort the list of dicts you can use operator.itemgetter as the key argument of sorted, instead of a lambda function, and just pass the desired key to itemgetter.
The k variable is useless, because it's just the len of the list.
The .csv file:
"name","year of birth","job","salary"
John Snow,1967,CEO,3400$
Adam Brown,1954,engineer,1200$
Lara Croft,1984,tomb raider,5600$
The .py file:
import os
import csv
from operator import itemgetter
from pprint import pprint
file_path = input('Please, enter the path to the file: ')
if os.path.exists(file_path):
with open(file_path, 'r', newline='') as f:
worker_list = list(csv.DictReader(f))
for worker in worker_list:
worker['salary'] = int(worker['salary'].strip('$'))
worker['year of birth'] = int(worker['year of birth'])
pprint(worker_list)
pprint(sorted(worker_list, key=itemgetter('name')))
pprint(sorted(worker_list, key=itemgetter('salary')))
pprint(sorted(worker_list, key=itemgetter('year of birth')))
You still need some error handling, if a int conversion fails, or just let the program crash.
I know that you can use split() to split a user input into two, but how would you split input that consists of multiple variables ? For example:
User input:
Shawn=14:soccer#2991842
What I would like to do:
name = Shawn
age = 14
course = soccer
idnumber = 2991842
What's the best way to do such thing ?
str = 'Shawn=14:soccer#2991842'
keys = ['name', 'age', 'course', 'idnumber']
values = re.split('[=:#]', str)
print dict(zip(keys, values))
Out[114]: {'age': '14', 'course': 'soccer', 'idnumber': '2991842', 'name': 'Shawn'}
I think Regex will work best here:
>>> from re import split
>>> mystr = "Shawn=14:soccer#2991842"
>>> split("\W", mystr)
['Shawn', '14', 'soccer', '2991842']
>>> lst = split("\W", mystr)
>>> name = lst[0]
>>> name
'Shawn'
>>> age = lst[1]
>>> age
'14'
>>> course = lst[2]
>>> course
'soccer'
>>> idnumber = lst[3]
>>> idnumber
'2991842'
>>>
Also, the above is a step-by-step demonstration. You can actually just do:
name, age, course, idnumber = split("\W", mystr)
Here's how I would do it.
def splitStr(str):
temp = str.split(':')
temp_nameAge = temp[0].split('=')
temp_courseId = temp[1].split('#')
name = temp_nameAge[0]
age = int(temp_nameAge[1])
course = temp_courseId[0]
idnumber = int(temp_courseId[1])
print 'Name = %s, age = %i, course = %s, id_number = %i' % (name, age, course, idnumber)
Another thing you can do is use split like: string.split(":").
Then you can change the format to "name:age:course:number"
You could just keep splitting the splits...
text2split = "Shawn=14:soccer#2991842"
name = text2split.split('=')[0]
age = text2split.split('=')[1].split(':')[0]
course = text2split.split('=')[1].split(':')[1].split('#')[0]
idnumber = text2split.split('=')[1].split(':')[1].split('#')[1]
This isn't the most elegant way to do it, but it'll work so long as text2split always has the same delimeters.
If you are ok with storing them under dictionary keys, you could use named group references
import re
x='shawn=14:soccer#2991842'
re.match(r'(?P<name>.*?)=(?P<age>.*):(?P<course>.*?)#(?P<idnumber>.*)', x).groupdict()
{'idnumber': '2991842', 'course': 'soccer', 'age': '14', 'name': 'shawn
I'd like to develop a small debugging tool for Python programs. For the "Dynamic Slicing" feature, I need to find the variables that are accessed in a statement, and find the type of access (read or write) for those variables.
But the only disassembly feature that's built into Python is dis.disassemble, and that just prints the disassembly to standard output:
>>> dis.disassemble(compile('x = a + b', '', 'single'))
1 0 LOAD_NAME 0 (a)
3 LOAD_NAME 1 (b)
6 BINARY_ADD
7 STORE_NAME 2 (x)
10 LOAD_CONST 0 (None)
13 RETURN_VALUE
I'd like to be able to transform the disassembly into a dictionary of sets describing which variables are used by each instruction, like this:
>>> my_disassemble('x = a + b')
{'LOAD_NAME': set(['a', 'b']), 'STORE_NAME': set(['x'])}
How can I do this?
Read the source code for the dis module and you'll see that it's easy to do your own disassembly and generate whatever output format you like. Here's some code that generates the sequence of instructions in a code object, together with their arguments:
from opcode import *
def disassemble(co):
"""
Disassemble a code object and generate its instructions.
"""
code = co.co_code
n = len(code)
extended_arg = 0
i = 0
free = None
while i < n:
c = code[i]
op = ord(c)
i = i+1
if op < HAVE_ARGUMENT:
yield opname[op],
else:
oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
extended_arg = oparg*65536L
if op in hasconst:
arg = co.co_consts[oparg]
elif op in hasname:
arg = co.co_names[oparg]
elif op in hasjrel:
arg = repr(i + oparg)
elif op in haslocal:
arg = co.co_varnames[oparg]
elif op in hascompare:
arg = cmp_op[oparg]
elif op in hasfree:
if free is None:
free = co.co_cellvars + co.co_freevars
arg = free[oparg]
else:
arg = oparg
yield opname[op], arg
And here's an example disassembly.
>>> def f(x):
... return x + 1
...
>>> list(disassemble(f.func_code))
[('LOAD_FAST', 'x'), ('LOAD_CONST', 1), ('BINARY_ADD',), ('RETURN_VALUE',)]
You can easily transform this into the dictionary-of-sets data structure you want:
>>> from collections import defaultdict
>>> d = defaultdict(set)
>>> for op in disassemble(f.func_code):
... if len(op) == 2:
... d[op[0]].add(op[1])
...
>>> d
defaultdict(<type 'set'>, {'LOAD_FAST': set(['x']), 'LOAD_CONST': set([1])})
(Or you could generate the dictionary-of-sets data structure directly.)
Note that in your application you probably don't actually need look up the name for each opcode. Instead, you could look up the opcodes you need in the opcode.opmap dictionary and create named constants, perhaps like this:
LOAD_FAST = opmap['LOAD_FAST'] # actual value is 124
...
for var in disassembly[LOAD_FAST]:
...
Update: in Python 3.4 you can use the new dis.get_instructions:
>>> def f(x):
... return x + 1
>>> import dis
>>> list(dis.get_instructions(f))
[Instruction(opname='LOAD_FAST', opcode=124, arg=0, argval='x',
argrepr='x', offset=0, starts_line=1, is_jump_target=False),
Instruction(opname='LOAD_CONST', opcode=100, arg=1, argval=1,
argrepr='1', offset=3, starts_line=None, is_jump_target=False),
Instruction(opname='BINARY_ADD', opcode=23, arg=None, argval=None,
argrepr='', offset=6, starts_line=None, is_jump_target=False),
Instruction(opname='RETURN_VALUE', opcode=83, arg=None, argval=None,
argrepr='', offset=7, starts_line=None, is_jump_target=False)]
I think the challenge here is to capture the output of a dis rather than parsing the output and create a dictionary. The reason I will not cover the second part is, the format and the fields (key, value) of the dictionary is not mentioned and its trivial.
As I mentioned, the reason its a challenge to capture the OP of dis is, its a print rather than a return, but this can be captured through context manager
def foo(co):
import sys
from contextlib import contextmanager
from cStringIO import StringIO
#contextmanager
def captureStdOut(output):
stdout = sys.stdout
sys.stdout = output
yield
sys.stdout = stdout
out = StringIO()
with captureStdOut(out):
dis.disassemble(co.func_code)
return out.getvalue()
import dis
import re
dict(re.findall("^.*?([A-Z_]+)\s+(.*)$", line)[0] for line in foo(foo).splitlines()
if line.strip())
{'LOAD_CONST': '0 (None)', 'WITH_CLEANUP': '', 'SETUP_WITH': '21 (to 107)', 'STORE_DEREF': '0 (sys)', 'POP_TOP': '', 'LOAD_FAST': '4 (out)', 'MAKE_CLOSURE': '0', 'STORE_FAST': '4 (out)', 'IMPORT_FROM': '4 (StringIO)', 'LOAD_GLOBAL': '5 (dis)', 'END_FINALLY': '', 'RETURN_VALUE': '', 'LOAD_CLOSURE': '0 (sys)', 'BUILD_TUPLE': '1', 'CALL_FUNCTION': '0', 'LOAD_ATTR': '8 (getvalue)', 'IMPORT_NAME': '3 (cStringIO)', 'POP_BLOCK': ''}
>>>