In the following code, I parsed the pdb and computed secondary structure using DSSP library. After parsing and storing values, I renumbered the residue number but I did not modify _pdb and _dssp instances. After renumbering, it also renumbered the previously assigned values of _dssp like you can see in output that I commented. I think this is effect of by reference operation. I want to avoid the changes in _dssp value after renumbering since I did not explicitly modify the _dssp. How can this type of operation be avoided?
class PDBModify:
_pdb = None
_dssp = None
def parse(self, pdbid, pdbfile):
pdbparser = PDBParser(PERMISSIVE=1)
structure = pdbparser.get_structure(pdbid, pdbfile)
self._pdb = structure[0]
self._dssp = DSSP(structure[0], pdbfile)
def delete_n_dump(self, rsds, map_rsds):
current = self._pdb
for chain in current:
for residue in list(chain):
if not residue.id[1] in rsds:
chain.detach_child(residue.id)
else:
residue.id = (' ', map_rsds[residue.id[1]], ' ')
write_pdb = PDBIO()
write_pdb.set_structure(current)
write_pdb.save("./3chy_md.pdb")
if __name__=="__main__":
map_rsds = {15:200, 20: 201, 25:202, 26:203, 30:204, 34:205, 35:206, 36:207}
rsds = [15, 20, 25, 26, 30, 34, 35, 36]
pdbmodify = PDBModify()
pdbmodify.parse('3chy', './3chy.pdb') # parsing pdb and computing ss
print "before "
for item in pdbmodify._dssp: # initial call
print item
pdbmodify.delete_n_dump(rsds, map_rsds) # renumber the residue id
print "after "
for item in pdbmodify._dssp: # second call
print item
Output:
before
(<Residue ALA het= resseq=2 icode= >, '-', 65, 0.6132075471698113, 360.0, 127.6)
...
(<Residue SER het= resseq=15 icode= >, 'H', 73, 0.5615384615384615, -60.0, -33.4)
...
after
(<Residue ALA het= resseq=2 icode= >, '-', 65, 0.6132075471698113, 360.0, 127.6)
...
(<Residue SER het= resseq=200 icode= >, 'H', 73, 0.5615384615384615, -60.0, -33.4) # residue number is changed here why?
...
You are pointing to the same structure[0] object with self._pdb and self._dssp
self._pdb = structure[0]
self._dssp = DSSP(structure[0], pdbfile)
If you want to unlink them, you could use copy.copy or copy.deepcopy (depending if structure[0] is a simple object, like a list, that needs just a shallow copy, or complex, like a list of lists).
For example:
import copy
self._pdb = copy.deepcopy(structure[0])
self._dssp = DSSP(structure[0], pdbfile)
Related
I am attempting to make a plan, which is a list of classes that can only be added when the required classes have been completed or the co-requisite classes are being taken in the same semester.
Below I have my code that almost works but it always reuses the classes even though they have already been completed/used. I tried to prevent this with and (class_list[i][0] not in classes_done), I was hoping that it wouldn't go into the if statement but it seems like it's being ignored.
The rest of this if statement seems to work fine. (class_list[i][3] == '' or class_list[i][3] in classes_done) does this class have a required completed class if yes has it been completed?
(class_list[i][2] in classes_for_semester or class_list[i][2] == '')does this class have a co-requisite class if yes is it in the class_for_semester or already completed?
The class_list variable is organized like this['name', 'credit', 'co-requisite', 'required completed classes', 'empty']. I added the other variables as comments to show what they look like.
class PlanGenerator:
def generator(max_credit_allowed, min_credit_allowed, classes_done, class_list):
classes_for_semester = []
credits_for_semester = 0
semester = 0
full_plan = []
# class_list = [['MA 241 ', '4', '', '', ''], ['PS 150 ', '3', 'MA 241 ', '', ''], ['UNIV 101', '1', '', '', ''], ['COM 122', '3', '', '', ''], ...]
# max_credit_allowed = 16
# min_credit_allowed = 12
# classes_done=['UNIV 101']
while len(classes_done) != len(class_list): # keep going until all classes are used
while int(min_credit_allowed) > credits_for_semester: # keep going until at least the minimum credits are in the semester
semester += 1
for i in range(len(class_list)): # looping over the class list
if int(class_list[i][1]) + credits_for_semester < max_credit_allowed: #if this class was to be added would it go over the max credit for semester if yes go to next class
if (class_list[i][3] == '' or class_list[i][3] in classes_done) and (class_list[i][2] in classes_for_semester or class_list[i][2] in classes_done or class_list[i][2] == '') and (class_list[i][0] not in classes_done):
classes_for_semester.append(class_list[i][0])
credits_for_semester += int(class_list[i][1])
print('classes for semester', classes_for_semester)
print('semester credits', credits_for_semester)
classes_done.append(classes_for_semester)
full_plan.append(semester)
full_plan.append(classes_for_semester)
print('full plan', full_plan)
classes_for_semester = []
credits_for_semester = 0
print('done')
print(full_plan)
I hope my explanation makes sense.
Maybe somebody can understand my mistake and help me find a good solution.
Also if you have anything that you see would make this code more simple please let me know.
Much appreciated
First, your while int(min_credit_allowed) > credits_for_semester line is leading to an infinite loop. It needs to be changed to
while len(classes_done) != len(class_list) and int(min_credit_allowed) > credits_for_semester: # Remove the second while loop
Secondly, you're appending a list to a list, so you get a 2-D list for classes_done with
classes_done.append(classes_for_semester)
This should be
classes_done += classes_for_semester
so that you add the items from classes_for_semester into classes_done, rather than adding a list.
Your new code should look like this:
def generator(max_credit_allowed, min_credit_allowed, classes_done, class_list):
classes_for_semester = []
credits_for_semester = 0
semester = 0
full_plan = []
# class_list = [['MA 241 ', '4', '', '', ''], ['PS 150 ', '3', 'MA 241 ', '', ''], ['UNIV 101', '1', '', '', ''], ['COM 122', '3', '', '', ''], ...]
# max_credit_allowed = 16
# min_credit_allowed = 12
# classes_done=['UNIV 101']
while len(classes_done) != len(class_list) and int(min_credit_allowed) > credits_for_semester: # keep going until at least the minimum credits are in the semester
semester += 1
for i in range(len(class_list)): # looping over the class list
if int(class_list[i][1]) + credits_for_semester < max_credit_allowed: #if this class was to be added would it go over the max credit for semester if yes go to next class
if (class_list[i][3] == '' or class_list[i][3] in classes_done) and (class_list[i][2] in classes_for_semester or class_list[i][2] in classes_done or class_list[i][2] == '') and (class_list[i][0] not in classes_done):
classes_for_semester.append(class_list[i][0])
credits_for_semester += int(class_list[i][1])
print('classes for semester', classes_for_semester)
print('semester credits', credits_for_semester)
classes_done += classes_for_semester
full_plan.append(semester)
full_plan.append(classes_for_semester)
print('full plan', full_plan)
classes_for_semester = []
credits_for_semester = 0
print('done')
print(full_plan)
I would highly recommend using None instead of '' for the non-existent values, that way you can do a simple value is None check instead of an equality check to an empty string.
For the lists of class information you're passing in, I would change them to classes, dictionaries, or namedtuples (find out more about them here) so that you can easily refer to the values by name rather than numbers.
class_list[i].class_name or class_list[i]['class_name'] are a lot easier to debug in the future than magic indices. You can even change your for loop to use the actual class details as a variable instead of i in range(len(class_list)) like so:
for c in class_list:
if int(c.credits) .... # Using a class or namedtuple approach as suggested above
And one minor thing that probably isn't a huge issue but could become a concern if these lists were to grow long: consider using sets instead of lists for storing things like classes_done and classes_for_semester. It also prevents duplicates from being stored (assuming you don't want to store the same class more than once).
To provide a concrete example of the namedtuple suggestion, you can do the following:
from collections import namedtuple
ClassList = namedtuple('ClassList', ['class_name', 'credits', 'coreq', 'prereq'])
class_list = [
ClassList(class_name='MA 241', credits=4, coreq=None, prereq=None),
ClassList(class_name='PS 150', credits=3, coreq='MA 241', prereq=None),
# ...
]
So your for loop becomes
for c in class_list:
if c.credits + credits_for_semester < max_credits_allowed:
if (c.prereq is None or c.prereq in classes_done) and \
(c.coreq in classes_for_semester or c.coreq in classes_done or c.coreq is None) and \
(c.class_name not in classes_done):
classes_for_semester.append(c.class_name)
credits_for_semester += c.credits
classes_done += classes_for_semester
full_plan.append(semester)
full_plan.append(classes_for_semester)
classes_for_semester = []
credits_for_semester = 0
I'm using pprint to nicely print a dict and it's working fine. Now I switch to using an OrderedDict from module collections. Unfortunately, the pprint routing does not seem to recognize that such objects are more or less dicts as well and falls back to printing that as a long line.
>>> d = { i:'*'*i for i in range(8) }
>>> pprint.pprint(d)
{0: '',
1: '*',
2: '**',
3: '***',
4: '****',
5: '*****',
6: '******',
7: '*******'}
>>> pprint.pprint(collections.OrderedDict(d))
OrderedDict([(0, ''), (1, '*'), (2, '**'), (3, '***'), (4, '****'), (5, '*****'), (6, '******'), (7, '*******')])
Any way to get a nicer representation of OrderedDicts as well? Maybe even if they are nested inside a normal dict or list?
I found a relatively simple solution for this, but it includes the risk of making the output for your ordered dictionary appear exactly as if it were a regular dict object.
The original solution for using a context manager to prevent pprint from sorting dictionary keys comes from this answer.
#contextlib.contextmanager
def pprint_OrderedDict():
pp_orig = pprint._sorted
od_orig = OrderedDict.__repr__
try:
pprint._sorted = lambda x:x
OrderedDict.__repr__ = dict.__repr__
yield
finally:
pprint._sorted = pp_orig
OrderedDict.__repr__ = od_orig
(You could also just patch the OrderedDict.__repr__ method with dict.__repr__, but please don't.)
Example:
>>> foo = [('Roger', 'Owner'), ('Diane', 'Manager'), ('Bob', 'Manager'),
... ('Ian', 'Associate'), ('Bill', 'Associate'), ('Melinda', 'Associate')]
>>> d = OrderedDict(foo)
>>> pprint.pprint(d)
OrderedDict([('Roger', 'Owner'), ('Diane', 'Manager'), ('Bob', 'Manager'), ('Ian', 'Associate'), ('Bill', 'Associate'), ('Melinda', 'Associate')])
>>> pprint.pprint(dict(d))
{'Bill': 'Associate',
'Bob': 'Manager',
'Diane': 'Manager',
'Ian': 'Associate',
'Melinda': 'Associate',
'Roger': 'Owner'}
>>> with pprint_OrderedDict():
... pprint.pprint(d)
...
{'Roger': 'Owner',
'Diane': 'Manager',
'Bob': 'Manager',
'Ian': 'Associate',
'Bill': 'Associate',
'Melinda': 'Associate'}
Try this on:
d = collections.OrderedDict({ i:'*'*i for i in range(8) })
EDIT
pprint.pprint(list(d.items()))
If you are specifically targeting CPython* 3.6 or later, then you can just use regular dictionaries instead of OrderedDict. You'll miss out on a few methods exclusive to OrderedDict, and this is not (yet) guaranteed to be portable to other Python implementations,** but it is probably the simplest way to accomplish what you are trying to do.
* CPython is the reference implementation of Python which may be downloaded from python.org.
** CPython stole this idea from PyPy, so you can probably depend on it working there too.
I realize this is sort of necroposting, but I thought I'd post what I use. Its main virtue is that its aoutput can be read back into python, thus allowing, for instance, to shutlle between representations (which I use, for instance, on JSON files). Of course it breaks pprint encapsulation, by ripping some code off its inner _format function.
#!/bin/env python
from __future__ import print_function
import pprint;
from collections import OrderedDict
import json
import sys
class MyPP (pprint.PrettyPrinter):
def _format(self, object, stream, indent, allowance, context, level):
if not isinstance(object, OrderedDict) :
return pprint.PrettyPrinter._format(self, object, stream, indent, allowance, context, level)
level = level + 1
objid = id(object)
if objid in context:
stream.write(_recursion(object))
self._recursive = True
self._readable = False
return
write = stream.write
_len=len
rep = self._repr(object, context, level - 1)
typ = type(object)
sepLines = _len(rep) > (self._width - 1 - indent - allowance)
if self._depth and level > self._depth:
write(rep)
return
write('OrderedDict([\n%s'%(' '*(indent+1),))
if self._indent_per_level > 1:
write((self._indent_per_level - 1) * ' ')
length = _len(object)
#import pdb; pdb.set_trace()
if length:
context[objid] = 1
indent = indent + self._indent_per_level
items = object.items()
key, ent = items[0]
rep = self._repr(key, context, level)
write('( ')
write(rep)
write(', ')
self._format(ent, stream, indent + _len(rep) + 2,
allowance + 1, context, level)
write(' )')
if length > 1:
for key, ent in items[1:]:
rep = self._repr(key, context, level)
if sepLines:
write(',\n%s( %s , ' % (' '*indent, rep))
else:
write(', ( %s , ' % rep)
self._format(ent, stream, indent + _len(rep) + 2,
allowance + 1, context, level)
write(' )')
indent = indent - self._indent_per_level
del context[objid]
write('])')
return
pp = MyPP(indent=1)
handle=open(sys.argv[1],"r")
values=json.loads(handle.read(),object_pairs_hook=OrderedDict)
pp.pprint(values)
I'm suppose to create a namedtuple which has 27 field_names. Though it has too many field_names I created a list called sub which has list of items for field_names. The result is my reference to the instance of namedtuple.
sub = [
'MA9221', 'MC9211', 'MC9212', 'MC9213', 'MC9214',
'MC9215', 'MC9222', 'MC9223', 'MC9224', 'MC9225',
'MC9231', 'MC9232', 'MC9233', 'MC9234', 'MC9235',
'MC9241', 'MC9242', 'MC9243', 'MC9244', 'MC9251',
'MC9252', 'MC9273', 'MC9277', 'MC9283', 'MC9285']
result = namedtuple('result', ['rollno', 'name'] + sub)
Result values:
rollno = 123123
name = "Sam"
sub_value = [
1,0,0,0,0,
0,0,1,1,1,
1,1,1,0,0,
1,1,0,0,1,
1,1,1,0,1]
Now, I don't know how the pass the elements of sub_value to result(rollno, name, ...).
This line actually defines the type itself:
result = namedtuple('result', ['rollno', 'name'] + sub)
To create an instance, you now need to call result(...).
>>> result(rollno, name, *sub_value)
result(rollno=123123, name='Sam', MA9221=1, MC9211=0, MC9212=0, MC9213=0, MC9214=0, MC9215=0, MC9222=0, MC9223=1, MC9224=1, MC9225=1, MC9231=1, MC9232=1, MC9233=1, MC9234=0, MC9235=0, MC9241=1, MC9242=1, MC9243=0, MC9244=0, MC9251=1, MC9252=1, MC9273=1, MC9277=1, MC9283=0, MC9285=1)
I'd like to develop a small debugging tool for Python programs. For the "Dynamic Slicing" feature, I need to find the variables that are accessed in a statement, and find the type of access (read or write) for those variables.
But the only disassembly feature that's built into Python is dis.disassemble, and that just prints the disassembly to standard output:
>>> dis.disassemble(compile('x = a + b', '', 'single'))
1 0 LOAD_NAME 0 (a)
3 LOAD_NAME 1 (b)
6 BINARY_ADD
7 STORE_NAME 2 (x)
10 LOAD_CONST 0 (None)
13 RETURN_VALUE
I'd like to be able to transform the disassembly into a dictionary of sets describing which variables are used by each instruction, like this:
>>> my_disassemble('x = a + b')
{'LOAD_NAME': set(['a', 'b']), 'STORE_NAME': set(['x'])}
How can I do this?
Read the source code for the dis module and you'll see that it's easy to do your own disassembly and generate whatever output format you like. Here's some code that generates the sequence of instructions in a code object, together with their arguments:
from opcode import *
def disassemble(co):
"""
Disassemble a code object and generate its instructions.
"""
code = co.co_code
n = len(code)
extended_arg = 0
i = 0
free = None
while i < n:
c = code[i]
op = ord(c)
i = i+1
if op < HAVE_ARGUMENT:
yield opname[op],
else:
oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
extended_arg = oparg*65536L
if op in hasconst:
arg = co.co_consts[oparg]
elif op in hasname:
arg = co.co_names[oparg]
elif op in hasjrel:
arg = repr(i + oparg)
elif op in haslocal:
arg = co.co_varnames[oparg]
elif op in hascompare:
arg = cmp_op[oparg]
elif op in hasfree:
if free is None:
free = co.co_cellvars + co.co_freevars
arg = free[oparg]
else:
arg = oparg
yield opname[op], arg
And here's an example disassembly.
>>> def f(x):
... return x + 1
...
>>> list(disassemble(f.func_code))
[('LOAD_FAST', 'x'), ('LOAD_CONST', 1), ('BINARY_ADD',), ('RETURN_VALUE',)]
You can easily transform this into the dictionary-of-sets data structure you want:
>>> from collections import defaultdict
>>> d = defaultdict(set)
>>> for op in disassemble(f.func_code):
... if len(op) == 2:
... d[op[0]].add(op[1])
...
>>> d
defaultdict(<type 'set'>, {'LOAD_FAST': set(['x']), 'LOAD_CONST': set([1])})
(Or you could generate the dictionary-of-sets data structure directly.)
Note that in your application you probably don't actually need look up the name for each opcode. Instead, you could look up the opcodes you need in the opcode.opmap dictionary and create named constants, perhaps like this:
LOAD_FAST = opmap['LOAD_FAST'] # actual value is 124
...
for var in disassembly[LOAD_FAST]:
...
Update: in Python 3.4 you can use the new dis.get_instructions:
>>> def f(x):
... return x + 1
>>> import dis
>>> list(dis.get_instructions(f))
[Instruction(opname='LOAD_FAST', opcode=124, arg=0, argval='x',
argrepr='x', offset=0, starts_line=1, is_jump_target=False),
Instruction(opname='LOAD_CONST', opcode=100, arg=1, argval=1,
argrepr='1', offset=3, starts_line=None, is_jump_target=False),
Instruction(opname='BINARY_ADD', opcode=23, arg=None, argval=None,
argrepr='', offset=6, starts_line=None, is_jump_target=False),
Instruction(opname='RETURN_VALUE', opcode=83, arg=None, argval=None,
argrepr='', offset=7, starts_line=None, is_jump_target=False)]
I think the challenge here is to capture the output of a dis rather than parsing the output and create a dictionary. The reason I will not cover the second part is, the format and the fields (key, value) of the dictionary is not mentioned and its trivial.
As I mentioned, the reason its a challenge to capture the OP of dis is, its a print rather than a return, but this can be captured through context manager
def foo(co):
import sys
from contextlib import contextmanager
from cStringIO import StringIO
#contextmanager
def captureStdOut(output):
stdout = sys.stdout
sys.stdout = output
yield
sys.stdout = stdout
out = StringIO()
with captureStdOut(out):
dis.disassemble(co.func_code)
return out.getvalue()
import dis
import re
dict(re.findall("^.*?([A-Z_]+)\s+(.*)$", line)[0] for line in foo(foo).splitlines()
if line.strip())
{'LOAD_CONST': '0 (None)', 'WITH_CLEANUP': '', 'SETUP_WITH': '21 (to 107)', 'STORE_DEREF': '0 (sys)', 'POP_TOP': '', 'LOAD_FAST': '4 (out)', 'MAKE_CLOSURE': '0', 'STORE_FAST': '4 (out)', 'IMPORT_FROM': '4 (StringIO)', 'LOAD_GLOBAL': '5 (dis)', 'END_FINALLY': '', 'RETURN_VALUE': '', 'LOAD_CLOSURE': '0 (sys)', 'BUILD_TUPLE': '1', 'CALL_FUNCTION': '0', 'LOAD_ATTR': '8 (getvalue)', 'IMPORT_NAME': '3 (cStringIO)', 'POP_BLOCK': ''}
>>>
This is what I have so far:
EX1 = open('ex1.txt')
EX1READ = EX1.read()
X1READ.splitlines(0)
['jk43:23 Marfield Lane:Plainview:NY:10023',
'axe99:315 W. 115th Street, Apt. 11B:New York:NY:10027',
'jab44:23 Rivington Street, Apt. 3R:New York:NY:10002',
'ap172:19 Boxer Rd.:New York:NY:10005',
'jb23:115 Karas Dr.:Jersey City:NJ:07127',
'jb29:119 Xylon Dr.:Jersey City:NJ:07127',
'ak9:234 Main Street:Philadelphia:PA:08990']
I'd like to be able to just grab the userId from this list and print it alphabetized. Any hints would be great.
userIds = []
EX1 = open('ex1.txt')
X1READ = EX1.readlines()
for line in X1READ:
useridname = line.split(" ")[0].split(":")[0];
userid = line.split(" ")[0].split(":")[1]
userIds.append([useridname, userid])
I'm sure there are more Pythonic ways to do this, but my method will return an list of lists, where each child list in the parent list is formatted like this:
["jk43", "23"]
So to get the first user id and id number, you'd do this:
firstUserId = userIds[0][0] + ": " + userIds[0][1]
Which would output
"jk43: 23"
To sort the list of IDs, you'd do something like this:
userIds = sorted(userIds, key = id: id[0])
Assuming the part before the first ":" is the userID you could do it in a more pythonic way like that:
with open("ex1.txt") as f:
lines = f.readlines()
userIDs = [l.split(":",1)[0] for l in lines]
print "\n".join(sorted(userIDs))
This does it:
IDs=[]
with open('ex1.txt', 'rb') as f:
for line in f:
IDs.append(line.split(':')[0])
print sorted(IDs)
Prints:
['ak9', 'ap172', 'axe99', 'jab44', 'jb23', 'jb29', 'jk43']
If your user id's like jk43:23 use IDs.append(line.split(' ')[0]) and that prints:
['ak9:234', 'ap172:19', 'axe99:315', 'jab44:23', 'jb23:115', 'jb29:119', 'jk43:23']
If your user ids are the number only, use IDs.append(int(line.split(' ')[0].split(':')[1])) which prints:
[19, 23, 23, 115, 119, 234, 315]