convert string to binary and take one's complement - python

I'm trying to convert string to binary and take one's complement, after that display the string again. i have seen a couples of related post such as here and here and i'm follow the official work where have been posted in here, in the below code after run the code its showing error AttributeError: 'bytes' object has no attribute 'encode'. i'm using python 3.6
the below code is :
import binascii
def text_to_bits(text, encoding='utf-8', errors='surrogatepass'):
bits = bin(int(binascii.hexlify(text.encode(encoding, errors)), 16))[2:]
return bits.zfill(8 * ((len(bits) + 7) // 8))
def text_from_bits(bits, encoding='utf-8', errors='surrogatepass'):
n = int(bits, 2)
return int2bytes(n).decode(encoding, errors)
def int2bytes(i):
hex_string = '%x' % i
n = len(hex_string)
return binascii.unhexlify(hex_string.zfill(n + (n & 1)))
your_string='hello'
b=your_string.encode('ascii', 'strict')
text_to_bits(b)
is there a way after convert it to binary to take one's complement of it and display the string again?

No need to convert your string into unicode (encode).
Your functions work very well.
Look at the code below:
your_string='hello'
#b=your_string.encode('ascii', 'strict')
b = text_to_bits(your_string)
print(b)
t = text_from_bits(b)
print(t)
Result:
0110100001100101011011000110110001101111
hello

If supporting ASCII is enough, you can do this:
a="Hello World!"
b="".join(bin(ord(x)^255)[2:] for x in a)
print(b)
c="".join(chr(int(b[x:x+8],2)^255) for x in range(0,len(b),8))
print(c)
101101111001101010010011100100111001000011011111101010001001000010001101100100111001101111011110
Hello World!
since ASCII codes are below 128, someASCII ^ 255 (the one's complement) is always going to be a 8-bit number (the most significant bit gets set). bin() prepends a 0b prefix, that is what the [2:] gets rid of.
If you need it for generic bytes, some padding magic has to be applied, like
b="".join(("0000000"+bin(ord(x))[2:])[-8:] for x in a)

I feel like you could do this easier:
st = "hello world"
my_binary = ' '.join(format(ord(x), 'b') for x in st)
print(my_binary)
original = ''.join(chr(int(X[:8], 2)) for X in my_binary.split())
print(original)
References:
Convert Binary String to String
Convert String to Binary
Then just do twos-compliment on the string with something like:
def binary_str_twos(bin_str):
twos = []
first_one = True
# twos compliment
for char in reversed(bin_str):
if char == ' ':
twos.append(char)
elif char == '1':
twos.append('1' if first_one else '0')
if first_one:
first_one = False
else:
twos.append('0' if first_one else '1')
return ''.join(reversed(twos))
Note that this isn't as efficient as working with binary only.
-- Edit --
Working in 8-bit binary without spaces:
st = "hello world"
my_binary = ''.join(format(ord(x), '08b') for x in st)
print(my_binary)
original = ''.join(chr(int(my_binary[i:i+8], 2)) for i in range(0, len(my_binary), 8))
print(original)

Related

how to convert a bytes to character in python

I have a bytes type data like this:
b"6D4B8BD5"
the data is from a chinese character using unicode-escape code.
it can be generate like this:
'测试'.encode('unicode-escape')
result:
b'\\u6d4b\\u8bd5'
how can I convert b"6D4B8BD5" to b'\u6d4b\u8bd5' or
how can I convert b"6D4B8BD5" to '测试'?
unhexlify is a function to get the bytes, then decode with the right encoding:
>>> from binascii import unhexlify
>>> s = b'6D4B8BD5'
>>> unhexlify(s).decode('utf-16be')
'测试'
>>> str = b"6D4B8BD5"
>>> chr(int(str[0:4], 16))
'测'
>>> chr(int(str[4:8], 16))
'试'
The working solution which returns the correct result and works for any string :)
Python 3.x
def convert(chars):
if isinstance(chars, bytes):
chars = chars.decode('ascii')
chars = [''.join(c) for c in zip(chars[::4], chars[1::4], chars[2::4], chars[3::4])]
return "".join([chr(int(c, 16)) for c in chars])
print(convert(b"6D4B8BD5"))
+++++++
#> python test123.py
测试
Second solution without using lists & etc. Easier and faster.
def convert(chars):
if isinstance(chars, bytes):
chars = chars.decode('ascii')
result = ''
for i in range(len(chars) // 4):
result += chr(int(chars[4 * i:4 * (i + 1)], 16))
return result
print(convert(b"6D4B8BD5"))
++++++++
#> python test123.py
测试

Python: Reversibly encode alphanumeric string to integer

I want to convert a string (composed of alphanumeric characters) into an integer and then convert this integer back into a string:
string --> int --> string
In other words, I want to represent an alphanumeric string by an integer.
I found a working solution, which I included in the answer, but I do not think it is the best solution, and I am interested in other ideas/methods.
Please don't tag this as duplicate just because a lot of similar questions already exist, I specifically want an easy way of transforming a string into an integer and vice versa.
This should work for strings that contain alphanumeric characters, i.e. strings containing numbers and letters.
Here's what I have so far:
First define an string
m = "test123"
string -> bytes
mBytes = m.encode("utf-8")
bytes -> int
mInt = int.from_bytes(mBytes, byteorder="big")
int -> bytes
mBytes = mInt.to_bytes(((mInt.bit_length() + 7) // 8), byteorder="big")
bytes -> string
m = mBytes.decode("utf-8")
All together
m = "test123"
mBytes = m.encode("utf-8")
mInt = int.from_bytes(mBytes, byteorder="big")
mBytes2 = mInt.to_bytes(((mInt.bit_length() + 7) // 8), byteorder="big")
m2 = mBytes2.decode("utf-8")
print(m == m2)
Here is an identical reusable version of the above:
class BytesIntEncoder:
#staticmethod
def encode(b: bytes) -> int:
return int.from_bytes(b, byteorder='big')
#staticmethod
def decode(i: int) -> bytes:
return i.to_bytes(((i.bit_length() + 7) // 8), byteorder='big')
If you're using Python <3.6, remove the optional type annotations.
Test:
>>> s = 'Test123'
>>> b = s.encode()
>>> b
b'Test123'
>>> BytesIntEncoder.encode(b)
23755444588720691
>>> BytesIntEncoder.decode(_)
b'Test123'
>>> _.decode()
'Test123'
Recall that a string can be encoded to bytes, which can then be encoded to an integer. The encodings can then be reversed to get the bytes followed by the original string.
This encoder uses binascii to produce an identical integer encoding to the one in the answer by charel-f. I believe it to be identical because I extensively tested it.
Credit: this answer.
from binascii import hexlify, unhexlify
class BytesIntEncoder:
#staticmethod
def encode(b: bytes) -> int:
return int(hexlify(b), 16) if b != b'' else 0
#staticmethod
def decode(i: int) -> int:
return unhexlify('%x' % i) if i != 0 else b''
If you're using Python <3.6, remove the optional type annotations.
Quick test:
>>> s = 'Test123'
>>> b = s.encode()
>>> b
b'Test123'
>>> BytesIntEncoder.encode(b)
23755444588720691
>>> BytesIntEncoder.decode(_)
b'Test123'
>>> _.decode()
'Test123'
Assuming the character set is merely alphanumeric, i.e. a-z A-Z 0-9, this requires 6 bits per character. As such, using an 8-bit byte-encoding is theoretically an inefficient use of memory.
This answer converts the input bytes into a sequence of 6-bit integers. It encodes these small integers into one large integer using bitwise operations. Whether this actually translates into real-world storage efficiency is measured by sys.getsizeof, and is more likely for larger strings.
This implementation customizes the encoding for the choice of character set. If for example you were working with just string.ascii_lowercase (5 bits) rather than string.ascii_uppercase + string.digits (6 bits), the encoding would be correspondingly efficient.
Unit tests are also included.
import string
class BytesIntEncoder:
def __init__(self, chars: bytes = (string.ascii_letters + string.digits).encode()):
num_chars = len(chars)
translation = ''.join(chr(i) for i in range(1, num_chars + 1)).encode()
self._translation_table = bytes.maketrans(chars, translation)
self._reverse_translation_table = bytes.maketrans(translation, chars)
self._num_bits_per_char = (num_chars + 1).bit_length()
def encode(self, chars: bytes) -> int:
num_bits_per_char = self._num_bits_per_char
output, bit_idx = 0, 0
for chr_idx in chars.translate(self._translation_table):
output |= (chr_idx << bit_idx)
bit_idx += num_bits_per_char
return output
def decode(self, i: int) -> bytes:
maxint = (2 ** self._num_bits_per_char) - 1
output = bytes(((i >> offset) & maxint) for offset in range(0, i.bit_length(), self._num_bits_per_char))
return output.translate(self._reverse_translation_table)
# Test
import itertools
import random
import unittest
class TestBytesIntEncoder(unittest.TestCase):
chars = string.ascii_letters + string.digits
encoder = BytesIntEncoder(chars.encode())
def _test_encoding(self, b_in: bytes):
i = self.encoder.encode(b_in)
self.assertIsInstance(i, int)
b_out = self.encoder.decode(i)
self.assertIsInstance(b_out, bytes)
self.assertEqual(b_in, b_out)
# print(b_in, i)
def test_thoroughly_with_small_str(self):
for s_len in range(4):
for s in itertools.combinations_with_replacement(self.chars, s_len):
s = ''.join(s)
b_in = s.encode()
self._test_encoding(b_in)
def test_randomly_with_large_str(self):
for s_len in range(256):
num_samples = {s_len <= 16: 2 ** s_len,
16 < s_len <= 32: s_len ** 2,
s_len > 32: s_len * 2,
s_len > 64: s_len,
s_len > 128: 2}[True]
# print(s_len, num_samples)
for _ in range(num_samples):
b_in = ''.join(random.choices(self.chars, k=s_len)).encode()
self._test_encoding(b_in)
if __name__ == '__main__':
unittest.main()
Usage example:
>>> encoder = BytesIntEncoder()
>>> s = 'Test123'
>>> b = s.encode()
>>> b
b'Test123'
>>> encoder.encode(b)
3908257788270
>>> encoder.decode(_)
b'Test123'
so I needed transfer a dictionary in terms of numbers,
it may look kinda ugly but it's efficient in the way that every char (english letters) is exactly 2 numbers but it's capable of transfering any kind of unicode char
import json
myDict = {
"le key": "le Valueue",
2 : {
"heya": 1234569,
"3": 4
},
'Α α, Β β, Γ γ' : 'שלום'
}
def convertDictToNum(toBeConverted):
return int(''.join([(lambda c: c if len(c) ==2 else '0'+c )(str(ord(c) - 26)) for c in str(json.dumps(toBeConverted))]))
def loadDictFromNum(toBeDecoded):
toBeDecoded = str(toBeDecoded)
return json.loads(''.join([chr(int(toBeDecoded[cut:cut + 2]) + 26) for cut in range(0, len(toBeDecoded), 2)]))
numbersDict = convertDictToNum(myDict)
print(numbersDict)
# 9708827506817595083206088....
recoveredDict = loadDictFromNum(numbersDict)
print(recoveredDict)
# {'le key': 'le Valueue', '2': {'heya': 1234569, '3': 4}, 'Α α, Β β, Γ γ': 'שלום'}

Python : Increase a string

I am not really familiar with Python yet.
I have a string like "11223300". Now I want to increase the last byte of that string ( from "00" to "FF"). I tried to convert the string into an integer ( integer=int(string,16)) and increase it, and convert it back later, but that does not work for me. Maybe one of you guys has a better idea.
string = "11223300"
counter = int(string, 16)
for i in range(255):
counter = counter + 1
IV = hex(counter)
Now I want to convert the IV from hex into a string
Thanks!
You can use format to convert int to your hex string, which will not keep the 0x prefix:
string = "11223300"
counter = int(string, 16)
for i in range(255):
counter = counter + 1
IV = format(counter, 'X')
print(IV)
Output:
112233FF
The function below takes a string and increases it by n from a given charset
For example, in the charset ['a','b','c'] :
"aaa" + 1 = "aab"
"aac" + 1 = "aba"
"acc" + 2 = "bab"
def str_increaser(mystr, charset, n_increase):
# Replaces a char in given string & index
def replace_chr_in_str(mystr, index, char):
mystr = list(mystr)
mystr[index] = char
return ''.join(mystr)
# Increases the char last (n) and possibly its left neighboor (n-1).
def local_increase(mystr, charset):
l_cs = len(charset)
# increasing 1 to last char if it's not the last char in charset (e.g. 'z' in lowercase ascii).
if (charset.index(mystr[-1]) < l_cs - 1):
mystr = replace_chr_in_str(mystr, -1, charset[charset.index(mystr[-1]) + 1])
# else, reset last char to the first one in charset and increase the its left char, using a reducted string for recursion
else:
mystr = replace_chr_in_str(mystr, -1, charset[0])
mystr = local_increase(mystr[:-1], charset) + mystr[-1]
return mystr
# Case if input = "zz...zz" in an ascii lowercase charset for instance
if (mystr == charset[-1] * len(mystr)):
print("str_increaser(): Input already max in charset")
else:
for i in range(n_increase):
mystr = local_increase(mystr, charset)
return mystr
Here's an exemple :
# In bash : $ man ascii
# charset = map(chr, range(97, 123)) + map(chr, range(65, 91))
import string
charset = string.lowercase + string.uppercase
print(str_increaser("RfZ", charset, 2)) # outputs "Rgb"
This function might be used to get all permutations in some charsets.
So are you literally wanting to change the last two bits of the string to "FF"? If so, easy
string = "11223300"
modified_string = string[:-2] + "FF"
Edit from comments
hex_str = "0x11223300"
for i in range(256):
hex_int = int(hex_str, 16)
new_int = hex_int + 0x01
print(hex(hex_int))
hex_str = str(hex(new_int))

Issue while trying to convert ascii characters to a binary value [duplicate]

Using this code to take a string and convert it to binary:
bin(reduce(lambda x, y: 256*x+y, (ord(c) for c in 'hello'), 0))
this outputs:
0b110100001100101011011000110110001101111
Which, if I put it into this site (on the right hand site) I get my message of hello back. I'm wondering what method it uses. I know I could splice apart the string of binary into 8's and then match it to the corresponding value to bin(ord(character)) or some other way. Really looking for something simpler.
For ASCII characters in the range [ -~] on Python 2:
>>> import binascii
>>> bin(int(binascii.hexlify('hello'), 16))
'0b110100001100101011011000110110001101111'
In reverse:
>>> n = int('0b110100001100101011011000110110001101111', 2)
>>> binascii.unhexlify('%x' % n)
'hello'
In Python 3.2+:
>>> bin(int.from_bytes('hello'.encode(), 'big'))
'0b110100001100101011011000110110001101111'
In reverse:
>>> n = int('0b110100001100101011011000110110001101111', 2)
>>> n.to_bytes((n.bit_length() + 7) // 8, 'big').decode()
'hello'
To support all Unicode characters in Python 3:
def text_to_bits(text, encoding='utf-8', errors='surrogatepass'):
bits = bin(int.from_bytes(text.encode(encoding, errors), 'big'))[2:]
return bits.zfill(8 * ((len(bits) + 7) // 8))
def text_from_bits(bits, encoding='utf-8', errors='surrogatepass'):
n = int(bits, 2)
return n.to_bytes((n.bit_length() + 7) // 8, 'big').decode(encoding, errors) or '\0'
Here's single-source Python 2/3 compatible version:
import binascii
def text_to_bits(text, encoding='utf-8', errors='surrogatepass'):
bits = bin(int(binascii.hexlify(text.encode(encoding, errors)), 16))[2:]
return bits.zfill(8 * ((len(bits) + 7) // 8))
def text_from_bits(bits, encoding='utf-8', errors='surrogatepass'):
n = int(bits, 2)
return int2bytes(n).decode(encoding, errors)
def int2bytes(i):
hex_string = '%x' % i
n = len(hex_string)
return binascii.unhexlify(hex_string.zfill(n + (n & 1)))
Example
>>> text_to_bits('hello')
'0110100001100101011011000110110001101111'
>>> text_from_bits('110100001100101011011000110110001101111') == u'hello'
True
Built-in only python
Here is a pure python method for simple strings, left here for posterity.
def string2bits(s=''):
return [bin(ord(x))[2:].zfill(8) for x in s]
def bits2string(b=None):
return ''.join([chr(int(x, 2)) for x in b])
s = 'Hello, World!'
b = string2bits(s)
s2 = bits2string(b)
print 'String:'
print s
print '\nList of Bits:'
for x in b:
print x
print '\nString:'
print s2
String:
Hello, World!
List of Bits:
01001000
01100101
01101100
01101100
01101111
00101100
00100000
01010111
01101111
01110010
01101100
01100100
00100001
String:
Hello, World!
I'm not sure how you think you can do it other than character-by-character -- it's inherently a character-by-character operation. There is certainly code out there to do this for you, but there is no "simpler" way than doing it character-by-character.
First, you need to strip the 0b prefix, and left-zero-pad the string so it's length is divisible by 8, to make dividing the bitstring up into characters easy:
bitstring = bitstring[2:]
bitstring = -len(bitstring) % 8 * '0' + bitstring
Then you divide the string up into blocks of eight binary digits, convert them to ASCII characters, and join them back into a string:
string_blocks = (bitstring[i:i+8] for i in range(0, len(bitstring), 8))
string = ''.join(chr(int(char, 2)) for char in string_blocks)
If you actually want to treat it as a number, you still have to account for the fact that the leftmost character will be at most seven digits long if you want to go left-to-right instead of right-to-left.
This is my way to solve your task:
str = "0b110100001100101011011000110110001101111"
str = "0" + str[2:]
message = ""
while str != "":
i = chr(int(str[:8], 2))
message = message + i
str = str[8:]
print message
if you don'y want to import any files you can use this:
with open("Test1.txt", "r") as File1:
St = (' '.join(format(ord(x), 'b') for x in File1.read()))
StrList = St.split(" ")
to convert a text file to binary.
and you can use this to convert it back to string:
StrOrgList = StrOrgMsg.split(" ")
for StrValue in StrOrgList:
if(StrValue != ""):
StrMsg += chr(int(str(StrValue),2))
print(StrMsg)
hope that is helpful, i've used this with some custom encryption to send over TCP.
Are you looking for the code to do it or understanding the algorithm?
Does this do what you need? Specifically a2b_uu and b2a_uu? There are LOTS of other options in there in case those aren't what you want.
(NOTE: Not a Python guy but this seemed like an obvious answer)
Convert binary to its equivalent character.
k=7
dec=0
new=[]
item=[x for x in input("Enter 8bit binary number with , seprator").split(",")]
for i in item:
for j in i:
if(j=="1"):
dec=2**k+dec
k=k-1
else:
k=k-1
new.append(dec)
dec=0
k=7
print(new)
for i in new:
print(chr(i),end="")
This is a spruced up version of J.F. Sebastian's. Thanks for the snippets though J.F. Sebastian.
import binascii, sys
def goodbye():
sys.exit("\n"+"*"*43+"\n\nGood Bye! Come use again!\n\n"+"*"*43+"")
while __name__=='__main__':
print "[A]scii to Binary, [B]inary to Ascii, or [E]xit:"
var1=raw_input('>>> ')
if var1=='a':
string=raw_input('String to convert:\n>>> ')
convert=bin(int(binascii.hexlify(string), 16))
i=2
truebin=[]
while i!=len(convert):
truebin.append(convert[i])
i=i+1
convert=''.join(truebin)
print '\n'+'*'*84+'\n\n'+convert+'\n\n'+'*'*84+'\n'
if var1=='b':
binary=raw_input('Binary to convert:\n>>> ')
n = int(binary, 2)
done=binascii.unhexlify('%x' % n)
print '\n'+'*'*84+'\n\n'+done+'\n\n'+'*'*84+'\n'
if var1=='e':
aus=raw_input('Are you sure? (y/n)\n>>> ')
if aus=='y':
goodbye()

Python efficient obfuscation of string

I need to obfuscate lines of Unicode text to slow down those who may want to extract them. Ideally this would be done with a built in Python module or a small add-on library; the string length will be the same or less than the original; and the "unobfuscation" be as fast as possible.
I have tried various character swaps and XOR routines, but they are slow. Base64 and hex encoding increase the size considerably. To date the most efficient method I've found is compressing with zlib at the lowest setting (1). Is there a better way?
How about the old ROT13 trick?
Python 3:
>>> import codecs
>>> x = 'some string'
>>> y = codecs.encode(x, 'rot13')
>>> y
'fbzr fgevat'
>>> codecs.decode(y, 'rot13')
u'some string'
Python 2:
>>> x = 'some string'
>>> y = x.encode('rot13')
>>> y
'fbzr fgevat'
>>> y.decode('rot13')
u'some string'
For a unicode string:
>>> x = u'國碼'
>>> print x
國碼
>>> y = x.encode('unicode-escape').encode('rot13')
>>> print y
\h570o\h78op
>>> print y.decode('rot13').decode('unicode-escape')
國碼
This uses a simple, fast encryption scheme on bytes objects.
# For Python 3 - strings are Unicode, print is a function
def obfuscate(byt):
# Use same function in both directions. Input and output are bytes
# objects.
mask = b'keyword'
lmask = len(mask)
return bytes(c ^ mask[i % lmask] for i, c in enumerate(byt))
def test(s):
data = obfuscate(s.encode())
print(len(s), len(data), data)
newdata = obfuscate(data).decode()
print(newdata == s)
simple_string = 'Just plain ASCII'
unicode_string = ('sensei = \N{HIRAGANA LETTER SE}\N{HIRAGANA LETTER N}'
'\N{HIRAGANA LETTER SE}\N{HIRAGANA LETTER I}')
test(simple_string)
test(unicode_string)
Python 2 version:
# For Python 2
mask = 'keyword'
nmask = [ord(c) for c in mask]
lmask = len(mask)
def obfuscate(s):
# Use same function in both directions. Input and output are
# Python 2 strings, ASCII only.
return ''.join([chr(ord(c) ^ nmask[i % lmask])
for i, c in enumerate(s)])
def test(s):
data = obfuscate(s.encode('utf-8'))
print len(s), len(data), repr(data)
newdata = obfuscate(data).decode('utf-8')
print newdata == s
simple_string = u'Just plain ASCII'
unicode_string = (u'sensei = \N{HIRAGANA LETTER SE}\N{HIRAGANA LETTER N}'
'\N{HIRAGANA LETTER SE}\N{HIRAGANA LETTER I}')
test(simple_string)
test(unicode_string)
It depends on the size of your input, if it's over 1K then using numpy is about 60x faster (runs in less than 2% of the naïve Python code).
import time
import numpy as np
mask = b'We are the knights who say "Ni"!'
mask_length = len(mask)
def mask_python(val: bytes) -> bytes:
return bytes(c ^ mask[i % mask_length] for i, c in enumerate(val))
def mask_numpy(val: bytes) -> bytes:
arr = np.frombuffer(val, dtype=np.int8)
length = len(value)
np_mask = np.tile(np.frombuffer(mask, dtype=np.int8), round(length/mask_length+0.5))[:length]
masked = arr ^ np_mask
return masked.tobytes()
value = b'0123456789'
for i in range(9):
start_py = time.perf_counter()
masked_py = mask_python(value)
end_py = time.perf_counter()
start_np = time.perf_counter()
masked_np = mask_numpy(value)
end_np = time.perf_counter()
assert masked_py == masked_np
print(f"{i+1} {len(value)} {end_py-start_py} {end_np-start_np}")
value = value * 10
Note: I'm a novice with numpy, if anyone has any comments on my code I would be very happy to hear about it in comments.
use codecs with hex encoding , like :
>>> codecs.encode(b'test/jimmy', 'hex')
b'746573742f6a696d6d79'
>>> codecs.decode(b'746573742f6a696d6d79', 'hex')
b'test/jimmy'

Categories