Python equivalent of unix cksum function - python

I've been looking for the equivalent python method for the unix cksum command:
http://pubs.opengroup.org/onlinepubs/7990989775/xcu/cksum.html
$ cksum ./temp.bin
1605138151 712368 ./temp.bin
So far I have found the zlib.crc32() function
>>> import zlib
>>> f = open('./temp.bin','rb')
>>> data = f.read()
>>> zlib.crc32(data)
1128751837
However this code appears to produce different results. As far as I can tell this should be using the same crc polynomial but I imagine there must be some difference in byte order or initial values. Can anyone shed some light on this for me?

Found a snippet here that implements a compatible cksum in python:
"""
This module implements the cksum command found in most UNIXes in pure
python.
The constants and routine are cribbed from the POSIX man page
"""
import sys
crctab = [ 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc,
0x17c56b6b, 0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f,
0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a,
0x384fbdbd, 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9,
0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, 0x6a1936c8,
0x6ed82b7f, 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3,
0x709f7b7a, 0x745e66cd, 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e,
0x95609039, 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5,
0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, 0xad2f2d84,
0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, 0xd4326d90, 0xd0f37027,
0xddb056fe, 0xd9714b49, 0xc7361b4c, 0xc3f706fb, 0xceb42022,
0xca753d95, 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1,
0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 0x34867077,
0x30476dc0, 0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c,
0x2e003dc5, 0x2ac12072, 0x128e9dcf, 0x164f8078, 0x1b0ca6a1,
0x1fcdbb16, 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca,
0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb,
0x6f52c06c, 0x6211e6b5, 0x66d0fb02, 0x5e9f46bf, 0x5a5e5b08,
0x571d7dd1, 0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d,
0x40d816ba, 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e,
0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, 0x8aad2b2f,
0x8e6c3698, 0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044,
0x902b669d, 0x94ea7b2a, 0xe0b41de7, 0xe4750050, 0xe9362689,
0xedf73b3e, 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2,
0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683,
0xd1799b34, 0xdc3abded, 0xd8fba05a, 0x690ce0ee, 0x6dcdfd59,
0x608edb80, 0x644fc637, 0x7a089632, 0x7ec98b85, 0x738aad5c,
0x774bb0eb, 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f,
0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, 0x251d3b9e,
0x21dc2629, 0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5,
0x3f9b762c, 0x3b5a6b9b, 0x0315d626, 0x07d4cb91, 0x0a97ed48,
0x0e56f0ff, 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623,
0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2,
0xe6ea3d65, 0xeba91bbc, 0xef68060b, 0xd727bbb6, 0xd3e6a601,
0xdea580d8, 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604,
0xc960ebb3, 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7,
0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 0x9b3660c6,
0x9ff77d71, 0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad,
0x81b02d74, 0x857130c3, 0x5d8a9099, 0x594b8d2e, 0x5408abf7,
0x50c9b640, 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c,
0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, 0x68860bfd,
0x6c47164a, 0x61043093, 0x65c52d24, 0x119b4be9, 0x155a565e,
0x18197087, 0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b,
0x0fdc1bec, 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088,
0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, 0xc5a92679,
0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12,
0xdf2f6bcb, 0xdbee767c, 0xe3a1cbc1, 0xe760d676, 0xea23f0af,
0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4,
0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5,
0x9e7d9662, 0x933eb0bb, 0x97ffad0c, 0xafb010b1, 0xab710d06,
0xa6322bdf, 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03,
0xb1f740b4 ]
UNSIGNED = lambda n: n & 0xffffffff
def memcrc(b):
n = len(b)
i = c = s = 0
for ch in b:
c = ord(ch)
tabidx = (s>>24)^c
s = UNSIGNED((s << 8)) ^ crctab[tabidx]
while n:
c = n & 0377
n = n >> 8
s = UNSIGNED(s << 8) ^ crctab[(s >> 24) ^ c]
return UNSIGNED(~s)
if __name__ == '__main__':
fname = sys.argv[-1]
buffer = open(fname, 'rb').read()
print "%d\t%d\t%s" % (memcrc(buffer), len(buffer), fname)
Tested briefly, seems to work well.

Same for python3
import sys
crctab = [ 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc,
0x17c56b6b, 0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f,
0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a,
0x384fbdbd, 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9,
0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, 0x6a1936c8,
0x6ed82b7f, 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3,
0x709f7b7a, 0x745e66cd, 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e,
0x95609039, 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5,
0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, 0xad2f2d84,
0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, 0xd4326d90, 0xd0f37027,
0xddb056fe, 0xd9714b49, 0xc7361b4c, 0xc3f706fb, 0xceb42022,
0xca753d95, 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1,
0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 0x34867077,
0x30476dc0, 0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c,
0x2e003dc5, 0x2ac12072, 0x128e9dcf, 0x164f8078, 0x1b0ca6a1,
0x1fcdbb16, 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca,
0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb,
0x6f52c06c, 0x6211e6b5, 0x66d0fb02, 0x5e9f46bf, 0x5a5e5b08,
0x571d7dd1, 0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d,
0x40d816ba, 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e,
0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, 0x8aad2b2f,
0x8e6c3698, 0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044,
0x902b669d, 0x94ea7b2a, 0xe0b41de7, 0xe4750050, 0xe9362689,
0xedf73b3e, 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2,
0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683,
0xd1799b34, 0xdc3abded, 0xd8fba05a, 0x690ce0ee, 0x6dcdfd59,
0x608edb80, 0x644fc637, 0x7a089632, 0x7ec98b85, 0x738aad5c,
0x774bb0eb, 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f,
0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, 0x251d3b9e,
0x21dc2629, 0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5,
0x3f9b762c, 0x3b5a6b9b, 0x0315d626, 0x07d4cb91, 0x0a97ed48,
0x0e56f0ff, 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623,
0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2,
0xe6ea3d65, 0xeba91bbc, 0xef68060b, 0xd727bbb6, 0xd3e6a601,
0xdea580d8, 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604,
0xc960ebb3, 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7,
0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 0x9b3660c6,
0x9ff77d71, 0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad,
0x81b02d74, 0x857130c3, 0x5d8a9099, 0x594b8d2e, 0x5408abf7,
0x50c9b640, 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c,
0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, 0x68860bfd,
0x6c47164a, 0x61043093, 0x65c52d24, 0x119b4be9, 0x155a565e,
0x18197087, 0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b,
0x0fdc1bec, 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088,
0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, 0xc5a92679,
0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12,
0xdf2f6bcb, 0xdbee767c, 0xe3a1cbc1, 0xe760d676, 0xea23f0af,
0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4,
0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5,
0x9e7d9662, 0x933eb0bb, 0x97ffad0c, 0xafb010b1, 0xab710d06,
0xa6322bdf, 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03,
0xb1f740b4 ]
UNSIGNED = lambda n: n & 0xffffffff
def memcrc(b):
n = len(b)
i = c = s = 0
for c in b:
tabidx = (s>>24)^c
s = UNSIGNED((s << 8)) ^ crctab[tabidx]
while n:
c = n & 0o0377
n = n >> 8
s = UNSIGNED(s << 8) ^ crctab[(s >> 24) ^ c]
return UNSIGNED(~s)
if __name__ == '__main__':
fname = sys.argv[-1]
buffer = open(fname, 'rb').read()
print("%d\t%d\t%s" % (memcrc(buffer), len(buffer), fname))

Related

Converting unicode to gsm encoding in golang

I am working on migrating my project in python to golang and I have a use case for converting utf-8 encoding to corresponding gsm ones if possible. I am very new to go, it will be really helpful to get some documentation or examples around it.
For example: Python snippet
ằ as unicode -> a after gsm encoding
for character in text:
if is_gsm(character):
transliterated_text += character.encode('utf-8')
continue
if is_nonascii_utf8(character):
transliterated_char = unidecode.unidecode(character)
if transliterated_char == '?' or transliterated_char == '':
gsm = False
break
if transliterated_char != rc:
character = transliterated_char
transliterated_text += character
else:
transliterated_text += character.encode('utf-8')
if gsm and is_gsm(transliterated_text.decode('utf-8')):
text = transliterated_text.decode('utf-8')
Thanks
You can do it in this way:
package main
import (
"fmt"
"regexp"
"strings"
)
var utf8GsmChars = map[string]string{
`#`: "\x00", `£`: "\x01", `$`: "\x02",
`¥`: "\x03", `è`: "\x04", `é`: "\x05",
`ù`: "\x06", `ì`: "\x07", `ò`: "\x08",
`Ç`: "\x09", `Ø`: "\x0B", `ø`: "\x0C",
`Å`: "\x0E", `Δ`: "\x10", `_`: "\x11",
`Φ`: "\x12", `Γ`: "\x13", `Λ`: "\x14",
`Ω`: "\x15", `Π`: "\x16", `Ψ`: "\x17",
`Σ`: "\x18", `Θ`: "\x19", `Ξ`: "\x1A",
`Æ`: "\x1C", `æ`: "\x1D", `ß`: "\x1E",
`É`: "\x1F", `Ä`: "\x5B", `Ö`: "\x5C",
`Ñ`: "\x5D", `Ü`: "\x5E", `§`: "\x5F",
`¿`: "\x60", `ä`: "\x7B", `ö`: "\x7C",
`ñ`: "\x7D", `ü`: "\x7E", `à`: "\x7F",
`^`: "\x1B\x14", `{`: "\x1B\x28",
`}`: "\x1B\x29", `\`: "\x1B\x2F",
`[`: "\x1B\x3C", `~`: "\x1B\x3D",
`]`: "\x1B\x3E", `|`: "\x1B\x40",
`€`: "\x1B\x65",
}
var gsmUtf8Chars = map[string]string{
"\x00": "\x40",
"\x01": "\xC2\xA3",
"\x02": "\x24",
"\x03": "\xC2\xA5",
"\x04": "\xC3\xA8",
"\x05": "\xC3\xA9",
"\x06": "\xC3\xB9",
"\x07": "\xC3\xAC",
"\x08": "\xC3\xB2",
"\x09": "\xC3\x87",
"\x0B": "\xC3\x98",
"\x0C": "\xC3\xB8",
"\x0E": "\xC3\xB8",
"\x0F": "\xC3\xA5",
"\x10": "\xCE\x94",
"\x11": "\x5F",
"\x12": "\xCE\xA6",
"\x13": "\xCE\x93",
"\x14": "\xCE\xA0",
"\x15": "\xCE\xA9",
"\x16": "\xCE\xA0",
"\x17": "\xCE\xA8",
"\x18": "\xCE\xA3",
"\x19": "\xCE\x98",
"\x1A": "\xCE\x9E",
"\x1C": "\xC3\x86",
"\x1D": "\xC3\xA6",
"\x1E": "\xC3\x9F",
"\x1F": "\xC3\x89",
"\x20": "\x20",
"\x24": "\xC2\xA4",
"\x40": "\xC2\xA1",
"\x5B": "\xC3\x84",
"\x5C": "\xC3\x96",
"\x5D": "\xC3\x91",
"\x5E": "\xC3\x9C",
"\x5F": "\xC2\xA7",
"\x60": "\xC2\xBF",
"\x7B": "\xC3\xA8",
"\x7C": "\xC3\xB6",
"\x7D": "\xC3\xB1",
"\x7E": "\xC3\xBC",
"\x7F": "\xC3\xA0",
}
func UTF8ToGsm0338(text string) string {
var s string = text
for k, v := range utf8GsmChars {
s = strings.Replace(s, k, v, -1)
}
re := regexp.MustCompile("[\\x{0080}-\\x{10FFFF}]")
s = re.ReplaceAllString(s, "?")
return s
}
func GSM0338ToUTF8(text string) string {
var s string = text
for k, v := range gsmUtf8Chars {
s = strings.Replace(s, k, v, -1)
}
return s
}
func main() {
s := "Hello World"
gsm := UTF8ToGsm0338(s)
utf8 := GSM0338ToUTF8(gsm)
fmt.Printf("word before: %s\nword after gsm: %s\nword after utf8: %s\n", s, gsm, utf8)
}

Custom encoding and decoding UTF special characters

Just for fun I've been embedding text in images. The following code is a distillation and demonstration of the encoding and decoding mechanism I am using.
class encChar:
def __init__(self,char):
self.p = self.enc(char)
def enc(self,char):
d = bin(ord(char)).split('b')[1]
while len(d)<8:
d = "0"+d
rdif = int(d[0])*4 + int(d[1])*2 + int(d[2])*1
gdif = int(d[3])*2 + int(d[4])*1
bdif = int(d[5])*4 + int(d[6])*2 + int(d[7])*1
return (rdif,gdif,bdif)
def dec(self):
dmap = {0:"000",1:"001",2:"010",3:"011",4:"100",5:"101",6:"110",7:"111"}
r = dmap[self.p[0]]
g = dmap[self.p[1]][1:]
b = dmap[self.p[2]]
n = int(r+g+b,2)
return chr(int(r+g+b,2))
testStr = """Languages
Deutsch
Español
Français
한국어
Italiano
Русский
Tagalog
Tiếng Việt
中文"""
result= ""
for line in testStr.split("\n"):
result+=line+"\n"
print(line)
print("".join([encChar(k).dec() for k in line]))
result+="".join([encChar(k).dec() for k in line])+"\n"
print()
result+="\n"
with open("errorop.txt","w",encoding="utf8") as op:
op.write(result)
Which produces the following document:
Languages
Languages
Deutsch
Deutsch
Español
Español
Français
Français
한국어
Õ­Å
Italiano
Italiano
Русский
Tagalog
Tagalog
Tiếng Việt
Tiõng Viöt
中文
Ë
As you can see several runes are altered by the process and I'm wondering how I can preserve them through this process.

python regex gives 1 instead of 01

Taken reference Why doesn't [01-12] range work as expected?
m = re.search(r"(\w+)\[([0-9]+)\:([0-9]+)\]", DUNESX[01:44])
or
m = re.search(r"(\w+)\[(0[1-9]|1[0-9]|2[0-9]|3[0-9]|4[0-9])\:([0-9]+)\]", DUNESX[01:44])
or
m = re.search(r"(\w+)\[(0?[1-9]|1[0-9]|2[0-9]|3[0-9]|4[0-9])\:([0-9]+)\]", DUNESX[01:44])
or
m = re.search(r"(\w+)\[([0-1][0-9]+)\:([0-9]+)\]", DUNESX[01:44])
But output from above expressions are
['DUNESX1', 'DUNESX2', 'DUNESX3', 'DUNESX4', 'DUNESX5', 'DUNESX6', 'DUNESX7', 'DUNESX8', 'DUNESX9', 'DUNESX10', 'DUNESX11', 'DUNESX12', 'DUNESX13', 'DUNESX14', 'DUNESX15', 'DUNESX16', 'DUNESX17', 'DUNESX18', 'DUNESX19', 'DUNESX20', 'DUNESX21', 'DUNESX22', 'DUNESX23', 'DUNESX24', 'DUNESX25', 'DUNESX26', 'DUNESX27', 'DUNESX28', 'DUNESX29', 'DUNESX30', 'DUNESX31', 'DUNESX32', 'DUNESX33', 'DUNESX34', 'DUNESX35', 'DUNESX36', 'DUNESX37', 'DUNESX38', 'DUNESX39', 'DUNESX40', 'DUNESX41', 'DUNESX42', 'DUNESX43', 'DUNESX44']
Doesn't provide desired output like
['DUNESX01', 'DUNESX02', 'DUNESX03', 'DUNESX04', 'DUNESX05', 'DUNESX06', 'DUNESX07', 'DUNESX08', 'DUNESX09', 'DUNESX10', 'DUNESX11', 'DUNESX12', 'DUNESX13', 'DUNESX14', 'DUNESX15', 'DUNESX16', 'DUNESX17', 'DUNESX18', 'DUNESX19', 'DUNESX20', 'DUNESX21', 'DUNESX22', 'DUNESX23', 'DUNESX24', 'DUNESX25', 'DUNESX26', 'DUNESX27', 'DUNESX28', 'DUNESX29', 'DUNESX30', 'DUNESX31', 'DUNESX32', 'DUNESX33', 'DUNESX34', 'DUNESX35', 'DUNESX36', 'DUNESX37', 'DUNESX38', 'DUNESX39', 'DUNESX40', 'DUNESX41', 'DUNESX42', 'DUNESX43', 'DUNESX44']
Complete code is
import re
group_list = ['DUNESX[01:44]']
host_list = getgrandchild(group_list)
def getgrandchild(child):
nodelist = []
if child is None:
return
for nodes in child:
print(nodes)
if re.match(r".*(\[[0-1][0-9]+\:[0-9]+\])", nodes):
m = re.search(r"(\w+)\[([0-9]+)\:([0-9]+)\]", nodes)
lb = int(m.group(2))
ub = int(m.group(3))
for i in range(lb, ub+1):
nodelist.append(m.group(1)+str(i))
elif re.match(r"(\w+)", nodes):
m = re.search(r"(\w+)", nodes)
nodelist.append(m.group(1))
I think I understand what you are trying to do. Here is a code that helps:
def getgrandchild(child):
nodelist = []
for nodes in child:
m = re.search("(\w+)\[([0-9]+)\:([0-9]+)\]",nodes)
for i in range(int(m.group(2)),int(m.group(3))+1):
nodelist.append(m.group(1)+str(i).zfill(len(m.group(2))))
return nodelist
You can see i've skipped some steps, but you can have them. I've focused on the main part.
We use zfill to add numbers of the form '001' or '01', which is explained here.
So, for this code, if you give:
getgrandchild(['DUNESX[01:44]'])
you get:
['DUNESX01', 'DUNESX02', 'DUNESX03', 'DUNESX04', 'DUNESX05', 'DUNESX06', 'DUNESX07', 'DUNESX08', 'DUNESX09', 'DUNESX10', 'DUNESX11', 'DUNESX12', 'DUNESX13', 'DUNESX14', 'DUNESX15', 'DUNESX16', 'DUNESX17', 'DUNESX18', 'DUNESX19', 'DUNESX20', 'DUNESX21', 'DUNESX22', 'DUNESX23', 'DUNESX24', 'DUNESX25', 'DUNESX26', 'DUNESX27', 'DUNESX28', 'DUNESX29', 'DUNESX30', 'DUNESX31', 'DUNESX32', 'DUNESX33', 'DUNESX34', 'DUNESX35', 'DUNESX36', 'DUNESX37', 'DUNESX38', 'DUNESX39', 'DUNESX40', 'DUNESX41', 'DUNESX42', 'DUNESX43', 'DUNESX44']
Also, if you give:
getgrandchild(['PYTHON[001:025]'])
you get:
['PYTHON001', 'PYTHON002', 'PYTHON003', 'PYTHON004', 'PYTHON005', 'PYTHON006', 'PYTHON007', 'PYTHON008', 'PYTHON009', 'PYTHON010', 'PYTHON011', 'PYTHON012', 'PYTHON013', 'PYTHON014', 'PYTHON015', 'PYTHON016', 'PYTHON017', 'PYTHON018', 'PYTHON019', 'PYTHON020', 'PYTHON021', 'PYTHON022', 'PYTHON023', 'PYTHON024', 'PYTHON025']

Iterating through a yaml file

I am trying to iterate through a yaml file. I want to extract the contents
ipv6: "2031:31:31:31:: 2032:32:32:32:: 2033:33:33:33:: 2034:34:34:34:: 2035:35:35:35::"
Below is my code. I get the below error :
for x in self.dhcp_dict['subnets'][sub]['ipv4'].split():
TypeError: string indices must be integers, not str
Can anyone tell em where i am going wrong? Thanks
Jessi
Code:
dict = yaml.load(fd)
self.server_dict = dict['server_configs']
self.interface_dict = self.server_dict['interface']
self.dhcp_dict = self.server_dict['dhcp_config']
def configureDhcpv6(self):
pdb.set_trace()
log.info ("Writing the dhcp.conf file")
infile = open('v6.txt', 'w+')
self.lease_time = self.dhcp_dict['lease_time']
infile.write("default-lease-time %s; \n" %(self.lease_time))
infile.write("preferred-lifetime 604800;\noption dhcp-renewal-time 604800;\noption dhcp-rebinding-time 7200;\noption dhcp6.domain-search cisco.com;\noption dhcp6.preference 255;\noption dhcp6.rapid-commit;\noption dhcp6.info-refresh-time 21600;\ndhcpv6-lease-file-name /var/lib/dhcpd/dhcpd6.leases;\nauthoritative;\nlog-facility local7;\n\n")
for sub in self.dhcp_dict['subnets']:
if (sub == 'relay'):
for x in self.dhcp_dict['subnets'][sub]['ipv6'].split():
range6 = sub + "11" + " " + sub + "254"
infile.write("Subnet 6 %s/64 {\n" % (sub))
infile.write(" range6 %s;\n}\n\n" % (range6))
infile.close()
Yaml file:
dhcp_config:
lease_time: "300"
relay_server: "5.5.5.0"
subnets:
relay:
ipv4: "30.30.30.0 31.31.31.0 32.32.32.0 33.33.33.0 34.34.34.0 35.35.35.0"
ipv6: "2031:31:31:31:: 2032:32:32:32:: 2033:33:33:33:: 2034:34:34:34:: 2035:35:35:35::"
smart_relay: "31.1.1.0 32.1.1.0 33.1.1.0 34.1.1.0 35.1.1.0"
snoop: "36.36.36.0 37.37.37.0 38.38.38.0 39.39.39.0 30.30.30.0"

Disassemble Python code to dictionary

I'd like to develop a small debugging tool for Python programs. For the "Dynamic Slicing" feature, I need to find the variables that are accessed in a statement, and find the type of access (read or write) for those variables.
But the only disassembly feature that's built into Python is dis.disassemble, and that just prints the disassembly to standard output:
>>> dis.disassemble(compile('x = a + b', '', 'single'))
1 0 LOAD_NAME 0 (a)
3 LOAD_NAME 1 (b)
6 BINARY_ADD
7 STORE_NAME 2 (x)
10 LOAD_CONST 0 (None)
13 RETURN_VALUE
I'd like to be able to transform the disassembly into a dictionary of sets describing which variables are used by each instruction, like this:
>>> my_disassemble('x = a + b')
{'LOAD_NAME': set(['a', 'b']), 'STORE_NAME': set(['x'])}
How can I do this?
Read the source code for the dis module and you'll see that it's easy to do your own disassembly and generate whatever output format you like. Here's some code that generates the sequence of instructions in a code object, together with their arguments:
from opcode import *
def disassemble(co):
"""
Disassemble a code object and generate its instructions.
"""
code = co.co_code
n = len(code)
extended_arg = 0
i = 0
free = None
while i < n:
c = code[i]
op = ord(c)
i = i+1
if op < HAVE_ARGUMENT:
yield opname[op],
else:
oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
extended_arg = oparg*65536L
if op in hasconst:
arg = co.co_consts[oparg]
elif op in hasname:
arg = co.co_names[oparg]
elif op in hasjrel:
arg = repr(i + oparg)
elif op in haslocal:
arg = co.co_varnames[oparg]
elif op in hascompare:
arg = cmp_op[oparg]
elif op in hasfree:
if free is None:
free = co.co_cellvars + co.co_freevars
arg = free[oparg]
else:
arg = oparg
yield opname[op], arg
And here's an example disassembly.
>>> def f(x):
... return x + 1
...
>>> list(disassemble(f.func_code))
[('LOAD_FAST', 'x'), ('LOAD_CONST', 1), ('BINARY_ADD',), ('RETURN_VALUE',)]
You can easily transform this into the dictionary-of-sets data structure you want:
>>> from collections import defaultdict
>>> d = defaultdict(set)
>>> for op in disassemble(f.func_code):
... if len(op) == 2:
... d[op[0]].add(op[1])
...
>>> d
defaultdict(<type 'set'>, {'LOAD_FAST': set(['x']), 'LOAD_CONST': set([1])})
(Or you could generate the dictionary-of-sets data structure directly.)
Note that in your application you probably don't actually need look up the name for each opcode. Instead, you could look up the opcodes you need in the opcode.opmap dictionary and create named constants, perhaps like this:
LOAD_FAST = opmap['LOAD_FAST'] # actual value is 124
...
for var in disassembly[LOAD_FAST]:
...
Update: in Python 3.4 you can use the new dis.get_instructions:
>>> def f(x):
... return x + 1
>>> import dis
>>> list(dis.get_instructions(f))
[Instruction(opname='LOAD_FAST', opcode=124, arg=0, argval='x',
argrepr='x', offset=0, starts_line=1, is_jump_target=False),
Instruction(opname='LOAD_CONST', opcode=100, arg=1, argval=1,
argrepr='1', offset=3, starts_line=None, is_jump_target=False),
Instruction(opname='BINARY_ADD', opcode=23, arg=None, argval=None,
argrepr='', offset=6, starts_line=None, is_jump_target=False),
Instruction(opname='RETURN_VALUE', opcode=83, arg=None, argval=None,
argrepr='', offset=7, starts_line=None, is_jump_target=False)]
I think the challenge here is to capture the output of a dis rather than parsing the output and create a dictionary. The reason I will not cover the second part is, the format and the fields (key, value) of the dictionary is not mentioned and its trivial.
As I mentioned, the reason its a challenge to capture the OP of dis is, its a print rather than a return, but this can be captured through context manager
def foo(co):
import sys
from contextlib import contextmanager
from cStringIO import StringIO
#contextmanager
def captureStdOut(output):
stdout = sys.stdout
sys.stdout = output
yield
sys.stdout = stdout
out = StringIO()
with captureStdOut(out):
dis.disassemble(co.func_code)
return out.getvalue()
import dis
import re
dict(re.findall("^.*?([A-Z_]+)\s+(.*)$", line)[0] for line in foo(foo).splitlines()
if line.strip())
{'LOAD_CONST': '0 (None)', 'WITH_CLEANUP': '', 'SETUP_WITH': '21 (to 107)', 'STORE_DEREF': '0 (sys)', 'POP_TOP': '', 'LOAD_FAST': '4 (out)', 'MAKE_CLOSURE': '0', 'STORE_FAST': '4 (out)', 'IMPORT_FROM': '4 (StringIO)', 'LOAD_GLOBAL': '5 (dis)', 'END_FINALLY': '', 'RETURN_VALUE': '', 'LOAD_CLOSURE': '0 (sys)', 'BUILD_TUPLE': '1', 'CALL_FUNCTION': '0', 'LOAD_ATTR': '8 (getvalue)', 'IMPORT_NAME': '3 (cStringIO)', 'POP_BLOCK': ''}
>>>

Categories