Convert "little endian" hex string to IP address in Python - python

What's the best way to turn a string in this form into an IP address: "0200A8C0". The "octets" present in the string are in reverse order, i.e. the given example string should generate 192.168.0.2.

Network address manipulation is provided by the socket module.
socket.inet_ntoa(packed_ip)
Convert a 32-bit packed IPv4 address (a string four characters in length) to its standard dotted-quad string representation (for example, ‘123.45.67.89’). This is useful when conversing with a program that uses the standard C library and needs objects of type struct in_addr, which is the C type for the 32-bit packed binary data this function takes as an argument.
You can translate your hex string to packed ip using struct.pack()
and the little endian, unsigned long format.
s = "0200A8C0"
import socket
import struct
addr_long = int(s, 16)
print(hex(addr_long)) # '0x200a8c0'
print(struct.pack("<L", addr_long)) # '\xc0\xa8\x00\x02'
print(socket.inet_ntoa(struct.pack("<L", addr_long))) # '192.168.0.2'

>>> s = "0200A8C0"
>>> bytes = ["".join(x) for x in zip(*[iter(s)]*2)]
>>> bytes
['02', '00', 'A8', 'C0']
>>> bytes = [int(x, 16) for x in bytes]
>>> bytes
[2, 0, 168, 192]
>>> print ".".join(str(x) for x in reversed(bytes))
192.168.0.2
It is short and clear; wrap it up in a function with error checking to suit your needs.
Handy grouping functions:
def group(iterable, n=2, missing=None, longest=True):
"""Group from a single iterable into groups of n.
Derived from http://bugs.python.org/issue1643
"""
if n < 1:
raise ValueError("invalid n")
args = (iter(iterable),) * n
if longest:
return itertools.izip_longest(*args, fillvalue=missing)
else:
return itertools.izip(*args)
def group_some(iterable, n=2):
"""Group from a single iterable into groups of at most n."""
if n < 1:
raise ValueError("invalid n")
iterable = iter(iterable)
while True:
L = list(itertools.islice(iterable, n))
if L:
yield L
else:
break

You could do something like this:
>>> s = '0200A8C0'
>>> octets = [s[i:i+2] for i in range(0, len(s), 2)]
>>> ip = [int(i, 16) for i in reversed(octets)]
>>> ip_formatted = '.'.join(str(i) for i in ip)
>>> print ip_formatted
192.168.0.2
The octet splitting could probably be done more elegantly, but I can't think of a simpler way off the top of my head.
EDIT: Or on one line:
>>> s = '0200A8C0'
>>> print '.'.join(str(int(i, 16)) for i in reversed([s[i:i+2] for i in range(0, len(s), 2)]))
192.168.0.2

My try:
a = '0200A8C0'
indices = range(0, 8, 2)
data = [str(int(a[x:x+2], 16)) for x in indices]
'.'.join(reversed(data))

A simple def you can make to convert from hex to decimal-ip:
def hex2ip(iphex):
ip = ["".join(x) for x in zip(*[iter(str(iphex))]*2)]
ip = [int(x, 16) for x in ip]
ip = ".".join(str(x) for x in (ip))
return ip
# And to use it:
ip = "ac10fc40"
ip = hex2ip(iphex)
print(ip)

Related

how to convert a bytes to character in python

I have a bytes type data like this:
b"6D4B8BD5"
the data is from a chinese character using unicode-escape code.
it can be generate like this:
'测试'.encode('unicode-escape')
result:
b'\\u6d4b\\u8bd5'
how can I convert b"6D4B8BD5" to b'\u6d4b\u8bd5' or
how can I convert b"6D4B8BD5" to '测试'?
unhexlify is a function to get the bytes, then decode with the right encoding:
>>> from binascii import unhexlify
>>> s = b'6D4B8BD5'
>>> unhexlify(s).decode('utf-16be')
'测试'
>>> str = b"6D4B8BD5"
>>> chr(int(str[0:4], 16))
'测'
>>> chr(int(str[4:8], 16))
'试'
The working solution which returns the correct result and works for any string :)
Python 3.x
def convert(chars):
if isinstance(chars, bytes):
chars = chars.decode('ascii')
chars = [''.join(c) for c in zip(chars[::4], chars[1::4], chars[2::4], chars[3::4])]
return "".join([chr(int(c, 16)) for c in chars])
print(convert(b"6D4B8BD5"))
+++++++
#> python test123.py
测试
Second solution without using lists & etc. Easier and faster.
def convert(chars):
if isinstance(chars, bytes):
chars = chars.decode('ascii')
result = ''
for i in range(len(chars) // 4):
result += chr(int(chars[4 * i:4 * (i + 1)], 16))
return result
print(convert(b"6D4B8BD5"))
++++++++
#> python test123.py
测试

Python: Reversibly encode alphanumeric string to integer

I want to convert a string (composed of alphanumeric characters) into an integer and then convert this integer back into a string:
string --> int --> string
In other words, I want to represent an alphanumeric string by an integer.
I found a working solution, which I included in the answer, but I do not think it is the best solution, and I am interested in other ideas/methods.
Please don't tag this as duplicate just because a lot of similar questions already exist, I specifically want an easy way of transforming a string into an integer and vice versa.
This should work for strings that contain alphanumeric characters, i.e. strings containing numbers and letters.
Here's what I have so far:
First define an string
m = "test123"
string -> bytes
mBytes = m.encode("utf-8")
bytes -> int
mInt = int.from_bytes(mBytes, byteorder="big")
int -> bytes
mBytes = mInt.to_bytes(((mInt.bit_length() + 7) // 8), byteorder="big")
bytes -> string
m = mBytes.decode("utf-8")
All together
m = "test123"
mBytes = m.encode("utf-8")
mInt = int.from_bytes(mBytes, byteorder="big")
mBytes2 = mInt.to_bytes(((mInt.bit_length() + 7) // 8), byteorder="big")
m2 = mBytes2.decode("utf-8")
print(m == m2)
Here is an identical reusable version of the above:
class BytesIntEncoder:
#staticmethod
def encode(b: bytes) -> int:
return int.from_bytes(b, byteorder='big')
#staticmethod
def decode(i: int) -> bytes:
return i.to_bytes(((i.bit_length() + 7) // 8), byteorder='big')
If you're using Python <3.6, remove the optional type annotations.
Test:
>>> s = 'Test123'
>>> b = s.encode()
>>> b
b'Test123'
>>> BytesIntEncoder.encode(b)
23755444588720691
>>> BytesIntEncoder.decode(_)
b'Test123'
>>> _.decode()
'Test123'
Recall that a string can be encoded to bytes, which can then be encoded to an integer. The encodings can then be reversed to get the bytes followed by the original string.
This encoder uses binascii to produce an identical integer encoding to the one in the answer by charel-f. I believe it to be identical because I extensively tested it.
Credit: this answer.
from binascii import hexlify, unhexlify
class BytesIntEncoder:
#staticmethod
def encode(b: bytes) -> int:
return int(hexlify(b), 16) if b != b'' else 0
#staticmethod
def decode(i: int) -> int:
return unhexlify('%x' % i) if i != 0 else b''
If you're using Python <3.6, remove the optional type annotations.
Quick test:
>>> s = 'Test123'
>>> b = s.encode()
>>> b
b'Test123'
>>> BytesIntEncoder.encode(b)
23755444588720691
>>> BytesIntEncoder.decode(_)
b'Test123'
>>> _.decode()
'Test123'
Assuming the character set is merely alphanumeric, i.e. a-z A-Z 0-9, this requires 6 bits per character. As such, using an 8-bit byte-encoding is theoretically an inefficient use of memory.
This answer converts the input bytes into a sequence of 6-bit integers. It encodes these small integers into one large integer using bitwise operations. Whether this actually translates into real-world storage efficiency is measured by sys.getsizeof, and is more likely for larger strings.
This implementation customizes the encoding for the choice of character set. If for example you were working with just string.ascii_lowercase (5 bits) rather than string.ascii_uppercase + string.digits (6 bits), the encoding would be correspondingly efficient.
Unit tests are also included.
import string
class BytesIntEncoder:
def __init__(self, chars: bytes = (string.ascii_letters + string.digits).encode()):
num_chars = len(chars)
translation = ''.join(chr(i) for i in range(1, num_chars + 1)).encode()
self._translation_table = bytes.maketrans(chars, translation)
self._reverse_translation_table = bytes.maketrans(translation, chars)
self._num_bits_per_char = (num_chars + 1).bit_length()
def encode(self, chars: bytes) -> int:
num_bits_per_char = self._num_bits_per_char
output, bit_idx = 0, 0
for chr_idx in chars.translate(self._translation_table):
output |= (chr_idx << bit_idx)
bit_idx += num_bits_per_char
return output
def decode(self, i: int) -> bytes:
maxint = (2 ** self._num_bits_per_char) - 1
output = bytes(((i >> offset) & maxint) for offset in range(0, i.bit_length(), self._num_bits_per_char))
return output.translate(self._reverse_translation_table)
# Test
import itertools
import random
import unittest
class TestBytesIntEncoder(unittest.TestCase):
chars = string.ascii_letters + string.digits
encoder = BytesIntEncoder(chars.encode())
def _test_encoding(self, b_in: bytes):
i = self.encoder.encode(b_in)
self.assertIsInstance(i, int)
b_out = self.encoder.decode(i)
self.assertIsInstance(b_out, bytes)
self.assertEqual(b_in, b_out)
# print(b_in, i)
def test_thoroughly_with_small_str(self):
for s_len in range(4):
for s in itertools.combinations_with_replacement(self.chars, s_len):
s = ''.join(s)
b_in = s.encode()
self._test_encoding(b_in)
def test_randomly_with_large_str(self):
for s_len in range(256):
num_samples = {s_len <= 16: 2 ** s_len,
16 < s_len <= 32: s_len ** 2,
s_len > 32: s_len * 2,
s_len > 64: s_len,
s_len > 128: 2}[True]
# print(s_len, num_samples)
for _ in range(num_samples):
b_in = ''.join(random.choices(self.chars, k=s_len)).encode()
self._test_encoding(b_in)
if __name__ == '__main__':
unittest.main()
Usage example:
>>> encoder = BytesIntEncoder()
>>> s = 'Test123'
>>> b = s.encode()
>>> b
b'Test123'
>>> encoder.encode(b)
3908257788270
>>> encoder.decode(_)
b'Test123'
so I needed transfer a dictionary in terms of numbers,
it may look kinda ugly but it's efficient in the way that every char (english letters) is exactly 2 numbers but it's capable of transfering any kind of unicode char
import json
myDict = {
"le key": "le Valueue",
2 : {
"heya": 1234569,
"3": 4
},
'Α α, Β β, Γ γ' : 'שלום'
}
def convertDictToNum(toBeConverted):
return int(''.join([(lambda c: c if len(c) ==2 else '0'+c )(str(ord(c) - 26)) for c in str(json.dumps(toBeConverted))]))
def loadDictFromNum(toBeDecoded):
toBeDecoded = str(toBeDecoded)
return json.loads(''.join([chr(int(toBeDecoded[cut:cut + 2]) + 26) for cut in range(0, len(toBeDecoded), 2)]))
numbersDict = convertDictToNum(myDict)
print(numbersDict)
# 9708827506817595083206088....
recoveredDict = loadDictFromNum(numbersDict)
print(recoveredDict)
# {'le key': 'le Valueue', '2': {'heya': 1234569, '3': 4}, 'Α α, Β β, Γ γ': 'שלום'}

Issue while trying to convert ascii characters to a binary value [duplicate]

Using this code to take a string and convert it to binary:
bin(reduce(lambda x, y: 256*x+y, (ord(c) for c in 'hello'), 0))
this outputs:
0b110100001100101011011000110110001101111
Which, if I put it into this site (on the right hand site) I get my message of hello back. I'm wondering what method it uses. I know I could splice apart the string of binary into 8's and then match it to the corresponding value to bin(ord(character)) or some other way. Really looking for something simpler.
For ASCII characters in the range [ -~] on Python 2:
>>> import binascii
>>> bin(int(binascii.hexlify('hello'), 16))
'0b110100001100101011011000110110001101111'
In reverse:
>>> n = int('0b110100001100101011011000110110001101111', 2)
>>> binascii.unhexlify('%x' % n)
'hello'
In Python 3.2+:
>>> bin(int.from_bytes('hello'.encode(), 'big'))
'0b110100001100101011011000110110001101111'
In reverse:
>>> n = int('0b110100001100101011011000110110001101111', 2)
>>> n.to_bytes((n.bit_length() + 7) // 8, 'big').decode()
'hello'
To support all Unicode characters in Python 3:
def text_to_bits(text, encoding='utf-8', errors='surrogatepass'):
bits = bin(int.from_bytes(text.encode(encoding, errors), 'big'))[2:]
return bits.zfill(8 * ((len(bits) + 7) // 8))
def text_from_bits(bits, encoding='utf-8', errors='surrogatepass'):
n = int(bits, 2)
return n.to_bytes((n.bit_length() + 7) // 8, 'big').decode(encoding, errors) or '\0'
Here's single-source Python 2/3 compatible version:
import binascii
def text_to_bits(text, encoding='utf-8', errors='surrogatepass'):
bits = bin(int(binascii.hexlify(text.encode(encoding, errors)), 16))[2:]
return bits.zfill(8 * ((len(bits) + 7) // 8))
def text_from_bits(bits, encoding='utf-8', errors='surrogatepass'):
n = int(bits, 2)
return int2bytes(n).decode(encoding, errors)
def int2bytes(i):
hex_string = '%x' % i
n = len(hex_string)
return binascii.unhexlify(hex_string.zfill(n + (n & 1)))
Example
>>> text_to_bits('hello')
'0110100001100101011011000110110001101111'
>>> text_from_bits('110100001100101011011000110110001101111') == u'hello'
True
Built-in only python
Here is a pure python method for simple strings, left here for posterity.
def string2bits(s=''):
return [bin(ord(x))[2:].zfill(8) for x in s]
def bits2string(b=None):
return ''.join([chr(int(x, 2)) for x in b])
s = 'Hello, World!'
b = string2bits(s)
s2 = bits2string(b)
print 'String:'
print s
print '\nList of Bits:'
for x in b:
print x
print '\nString:'
print s2
String:
Hello, World!
List of Bits:
01001000
01100101
01101100
01101100
01101111
00101100
00100000
01010111
01101111
01110010
01101100
01100100
00100001
String:
Hello, World!
I'm not sure how you think you can do it other than character-by-character -- it's inherently a character-by-character operation. There is certainly code out there to do this for you, but there is no "simpler" way than doing it character-by-character.
First, you need to strip the 0b prefix, and left-zero-pad the string so it's length is divisible by 8, to make dividing the bitstring up into characters easy:
bitstring = bitstring[2:]
bitstring = -len(bitstring) % 8 * '0' + bitstring
Then you divide the string up into blocks of eight binary digits, convert them to ASCII characters, and join them back into a string:
string_blocks = (bitstring[i:i+8] for i in range(0, len(bitstring), 8))
string = ''.join(chr(int(char, 2)) for char in string_blocks)
If you actually want to treat it as a number, you still have to account for the fact that the leftmost character will be at most seven digits long if you want to go left-to-right instead of right-to-left.
This is my way to solve your task:
str = "0b110100001100101011011000110110001101111"
str = "0" + str[2:]
message = ""
while str != "":
i = chr(int(str[:8], 2))
message = message + i
str = str[8:]
print message
if you don'y want to import any files you can use this:
with open("Test1.txt", "r") as File1:
St = (' '.join(format(ord(x), 'b') for x in File1.read()))
StrList = St.split(" ")
to convert a text file to binary.
and you can use this to convert it back to string:
StrOrgList = StrOrgMsg.split(" ")
for StrValue in StrOrgList:
if(StrValue != ""):
StrMsg += chr(int(str(StrValue),2))
print(StrMsg)
hope that is helpful, i've used this with some custom encryption to send over TCP.
Are you looking for the code to do it or understanding the algorithm?
Does this do what you need? Specifically a2b_uu and b2a_uu? There are LOTS of other options in there in case those aren't what you want.
(NOTE: Not a Python guy but this seemed like an obvious answer)
Convert binary to its equivalent character.
k=7
dec=0
new=[]
item=[x for x in input("Enter 8bit binary number with , seprator").split(",")]
for i in item:
for j in i:
if(j=="1"):
dec=2**k+dec
k=k-1
else:
k=k-1
new.append(dec)
dec=0
k=7
print(new)
for i in new:
print(chr(i),end="")
This is a spruced up version of J.F. Sebastian's. Thanks for the snippets though J.F. Sebastian.
import binascii, sys
def goodbye():
sys.exit("\n"+"*"*43+"\n\nGood Bye! Come use again!\n\n"+"*"*43+"")
while __name__=='__main__':
print "[A]scii to Binary, [B]inary to Ascii, or [E]xit:"
var1=raw_input('>>> ')
if var1=='a':
string=raw_input('String to convert:\n>>> ')
convert=bin(int(binascii.hexlify(string), 16))
i=2
truebin=[]
while i!=len(convert):
truebin.append(convert[i])
i=i+1
convert=''.join(truebin)
print '\n'+'*'*84+'\n\n'+convert+'\n\n'+'*'*84+'\n'
if var1=='b':
binary=raw_input('Binary to convert:\n>>> ')
n = int(binary, 2)
done=binascii.unhexlify('%x' % n)
print '\n'+'*'*84+'\n\n'+done+'\n\n'+'*'*84+'\n'
if var1=='e':
aus=raw_input('Are you sure? (y/n)\n>>> ')
if aus=='y':
goodbye()

How do I concatenate an escape to a string?

My goal is to convert a binary value into the "bytestring" format python interprets it as. Example: 1111111111111111 would be 0xffff and when interpreted be represented as \xff\xff\xff . If there is a more direct method of converting it to this format please let me know as that would be ideal, as of now I'm using brute force with this solution:
hexnum = hex(int("11110100111100001110110101111011",2))
hexstring = str(hexnum)[2:]
finalstr = ""
i = 0
while(i<=len(hexstring)):
finalstr+= hexstring[i:i+2]
finalstr+= "\x"
i=i+2
My problem is when:
print repr(finalstr)
I receive the error
ValueError: invalid \x escape
How do I properly concatenate the escape or how do I convert a binary string into the hex bytearray format python uses?
You can use binascii.unhexlify like this:
>>> import binascii
>>> s = "11110100111100001110110101111011"
>>> binascii.unhexlify(format(int(s, 2), 'x'))
'\xf4\xf0\xed{'
In Python 3:
v = int("11110100111100001110110101111011",2)
v.to_bytes((v.bit_length() + 7) // 8, 'big')
produces a bytes value represented by the bits:
>>> v = int("11110100111100001110110101111011",2)
>>> v.to_bytes((v.bit_length() + 7) // 8, 'big')
b'\xf4\xf0\xed{'
You can't just prepend the \x syntax; that only works in string literals.
In Python 2, you could use a bytearray() instead, as it takes a list of integers in the range 0-255:
v = int("11110100111100001110110101111011",2)
bytes_list = []
while v:
v, one_byte = divmod(v, 256)
bytes_list.append(one_byte)
str(bytearray(bytes_list[::-1]))
Demo:
>>> v = int("11110100111100001110110101111011",2)
>>> bytes_list = []
>>> while v:
... v, one_byte = divmod(v, 256)
... bytes_list.append(one_byte)
...
>>> bytearray(bytes_list[::-1])
bytearray(b'\xf4\xf0\xed{')
>>> str(bytearray(bytes_list[::-1]))
'\xf4\xf0\xed{'

Python efficient obfuscation of string

I need to obfuscate lines of Unicode text to slow down those who may want to extract them. Ideally this would be done with a built in Python module or a small add-on library; the string length will be the same or less than the original; and the "unobfuscation" be as fast as possible.
I have tried various character swaps and XOR routines, but they are slow. Base64 and hex encoding increase the size considerably. To date the most efficient method I've found is compressing with zlib at the lowest setting (1). Is there a better way?
How about the old ROT13 trick?
Python 3:
>>> import codecs
>>> x = 'some string'
>>> y = codecs.encode(x, 'rot13')
>>> y
'fbzr fgevat'
>>> codecs.decode(y, 'rot13')
u'some string'
Python 2:
>>> x = 'some string'
>>> y = x.encode('rot13')
>>> y
'fbzr fgevat'
>>> y.decode('rot13')
u'some string'
For a unicode string:
>>> x = u'國碼'
>>> print x
國碼
>>> y = x.encode('unicode-escape').encode('rot13')
>>> print y
\h570o\h78op
>>> print y.decode('rot13').decode('unicode-escape')
國碼
This uses a simple, fast encryption scheme on bytes objects.
# For Python 3 - strings are Unicode, print is a function
def obfuscate(byt):
# Use same function in both directions. Input and output are bytes
# objects.
mask = b'keyword'
lmask = len(mask)
return bytes(c ^ mask[i % lmask] for i, c in enumerate(byt))
def test(s):
data = obfuscate(s.encode())
print(len(s), len(data), data)
newdata = obfuscate(data).decode()
print(newdata == s)
simple_string = 'Just plain ASCII'
unicode_string = ('sensei = \N{HIRAGANA LETTER SE}\N{HIRAGANA LETTER N}'
'\N{HIRAGANA LETTER SE}\N{HIRAGANA LETTER I}')
test(simple_string)
test(unicode_string)
Python 2 version:
# For Python 2
mask = 'keyword'
nmask = [ord(c) for c in mask]
lmask = len(mask)
def obfuscate(s):
# Use same function in both directions. Input and output are
# Python 2 strings, ASCII only.
return ''.join([chr(ord(c) ^ nmask[i % lmask])
for i, c in enumerate(s)])
def test(s):
data = obfuscate(s.encode('utf-8'))
print len(s), len(data), repr(data)
newdata = obfuscate(data).decode('utf-8')
print newdata == s
simple_string = u'Just plain ASCII'
unicode_string = (u'sensei = \N{HIRAGANA LETTER SE}\N{HIRAGANA LETTER N}'
'\N{HIRAGANA LETTER SE}\N{HIRAGANA LETTER I}')
test(simple_string)
test(unicode_string)
It depends on the size of your input, if it's over 1K then using numpy is about 60x faster (runs in less than 2% of the naïve Python code).
import time
import numpy as np
mask = b'We are the knights who say "Ni"!'
mask_length = len(mask)
def mask_python(val: bytes) -> bytes:
return bytes(c ^ mask[i % mask_length] for i, c in enumerate(val))
def mask_numpy(val: bytes) -> bytes:
arr = np.frombuffer(val, dtype=np.int8)
length = len(value)
np_mask = np.tile(np.frombuffer(mask, dtype=np.int8), round(length/mask_length+0.5))[:length]
masked = arr ^ np_mask
return masked.tobytes()
value = b'0123456789'
for i in range(9):
start_py = time.perf_counter()
masked_py = mask_python(value)
end_py = time.perf_counter()
start_np = time.perf_counter()
masked_np = mask_numpy(value)
end_np = time.perf_counter()
assert masked_py == masked_np
print(f"{i+1} {len(value)} {end_py-start_py} {end_np-start_np}")
value = value * 10
Note: I'm a novice with numpy, if anyone has any comments on my code I would be very happy to hear about it in comments.
use codecs with hex encoding , like :
>>> codecs.encode(b'test/jimmy', 'hex')
b'746573742f6a696d6d79'
>>> codecs.decode(b'746573742f6a696d6d79', 'hex')
b'test/jimmy'

Categories