Convert utf-8 string to base64

Convert utf-8 string to base64 - python

I converted base64 str1= eyJlbXBsb3llciI6IntcIm5hbWVcIjpcInVzZXJzX2VtcGxveWVyXCIsXCJhcmdzXCI6XCIxMDQ5NTgxNjI4MzdcIn0ifQ
to str2={"employer":"{\"name\":\"users_employer\",\"args\":\"104958162837\"}"}
with help of http://www.online-decoder.com/ru
I want to convert str2 to str1 with help of python. My code:
import base64
data = """{"employer":"{"name":"users_employer","args":"104958162837"}"}"""
encoded_bytes = base64.b64encode(data.encode("utf-8"))
encoded_str = str(encoded_bytes, "utf-8")
print(encoded_str)
The code prints str3=
eyJlbXBsb3llciI6InsibmFtZSI6InVzZXJzX2VtcGxveWVyIiwiYXJncyI6IjEwNDk1ODE2MjgzNyJ9In0=
What should I change in code to print str1 instead of str3 ?
I tried
{"employer":"{\"name\":\"users_employer\",\"args\":\"104958162837\"}"}
and
{"employer":"{"name":"users_employer","args":"104958162837"}"}
but result is the same

The problem is that \" is a python escape sequence for the single character ". The fix is to keep the backslashes and use a raw string (note the "r" at the front).
data = r"""{"employer":"{\"name\":\"users_employer\",\"args\":\"104958162837\"}"}"""
This will be the original string, except that python pads base64 numbers to 4 character multiples with the "=" sign. Strip the padding and you have the original.
import base64
str1 = "eyJlbXBsb3llciI6IntcIm5hbWVcIjpcInVzZXJzX2VtcGxveWVyXCIsXCJhcmdzXCI6XCIxMDQ5NTgxNjI4MzdcIn0ifQ"
str1_decoded = base64.b64decode(str1 + "="*divmod(len(str1),4)[1]).decode("ascii")
print("str1", str1_decoded)
data = r"""{"employer":"{\"name\":\"users_employer\",\"args\":\"104958162837\"}"}"""
encoded_bytes = base64.b64encode(data.encode("utf-8"))
encoded_str = str(encoded_bytes.rstrip(b"="), "utf-8")
print("my encoded", encoded_str)
print("same?", str1 == encoded_str)

The results you want are just base64.b64decode(str1 + "=="). See this post for more information on the padding.

Related

String from file to string utf-8 in python

So I am reading and manipulate a file with :
base_file = open(path+'/'+base_name, "r")
lines = base_file.readlines()
After this I search and find the "raw_data" start of line.
if re.match("\s{0,100}raw_data: ",line):
split_line = line.split("raw_data:")
print(split_line)
raw_string = split_line[1]
One example of raw_data is:
raw_data: "&\276!\300\307 =\277\"O\271\277vH9?j?\345?#\243\264=\350\034\345\277\260\345\033\300\023\017(#z|\273\277L\}\277\210\\031\300\213\263z\277\302\241\033\300\000\207\323\277\247Oh>j\354\215#\364\305\201\276\361+\202#t:\304\277\344\231\243#\225k\002\300vw\262\277\362\220j\300\"(\337\276\354b8\300\230\347H\300\201\320\204\300S;N\300Z0G\300>j\210\000#\034\014\220#\231\330J#\223\025\236#\006\332\230\276\227\273\n\277\353#,#\202\205\215\277\340\356\022\300/\223\035\277\331\277\362\276a\350\013#)\353\276\277v6\316\277K\326\207\300`2)\300\004\014Q\300\340\267\271\300MV\305\300\327\010\207\300j\346o\300\377\260\216\300[\332g\300\336\266\003\300\320S\272?6\300Y#\356\250\034\300\367\277&\300\335Uq>o\010&\300r\277\252\300U\314\243\300\253d\377\300"
And raw_string will be
print(raw_data)
"&\276!\300\307
=\277\"O\271\277vH9?j?\345?#\243\264=\350\034\345\277\260\345\033\300\023\017(#z|\273\277L\}\277\210\\031\300\213\263z\277\302\241\033\300\000\207\323\277\247Oh>j\354\215#\364\305\201\276\361+\202#t:\304\277\344\231\243#\225k\002\300vw\262\277\362\220j\300\"(\337\276\354b8\300\230\347H\300\201\320\204\300S;N\300Z0G\300>j\210\000#\034\014\220#\231\330J#\223\025\236#\006\332\230\276\227\273\n\277\353#,#\202\205\215\277\340\356\022\300/\223\035\277\331\277\362\276a\350\013#)\353\276\277v6\316\277K\326\207\300`2)\300\004\014Q\300\340\267\271\300MV\305\300\327\010\207\300j\346o\300\377\260\216\300[\332g\300\336\266\003\300\320S\272?6\300Y#\356\250\034\300\367\277&\300\335Uq>o\010&\300r\277\252\300U\314\243\300\253d\377\300"
If I tried to read this file I will obtain one char to one char even for escape characters.
So, my question is how to transform this plain text to utf-8 string so that I can have one character when reading \300 and not 4 characters.
I tried to pass "encondig =utf-8" in open file method but does not work.
I have made the same example passing raw_data as variable and it works properly.
RAW_DATA = "&\276!\300\307 =\277\"O\271\277vH9?j?\345?#\243\264=\350\034\345\277\260\345\033\300\023\017(#z|\273\277L\\}\277\210\\\031\300\213\263z\277\302\241\033\300\000\207\323\277\247Oh>j\354\215#\364\305\201\276\361+\202#t:\304\277\344\231\243#\225k\002\300vw\262\277\362\220j\300\"(\337\276\354b8\300\230\347H\300\201\320\204\300S;N\300Z0G\300<I>>j\210\000#\034\014\220#\231\330J#\223\025\236#\006\332\230\276\227\273\n\277\353#,#\202\205\215\277\340\356\022\300/\223\035\277\331\277\362\276a\350\013#)\353\276\277v6\316\277K\326\207\300`2)\300\004\014Q\300\340\267\271\300MV\305\300\327\010\207\300j\346o\300\377\260\216\300[\332g\300\336\266\003\300\320S\272?6\300Y#\356\250\034\300\367\277&\300\335Uq>o\010&\300r\277\252\300U\314\243\300\253d\377\300"
print(f"Qnt -> {len(RAW_DATA)}") # Qnt -> 256
print(type(RAW_DATA))
at = 0
total = 0
while at < len(RAW_DATA):
fin = at+4
substrin = RAW_DATA[at:fin]
resu = FourString_float(substrin)
at = fin
For this example \300 is only one char.
Hope someone can help me.

The problem is that on the read file the escape \ symbols are coming in as \, but in the example you've provided they are being evaluated as part of the numerics that follow it. ie, \276 is read as a single character.
If you run:
RAW_DATA = r"&\276!\300\307 =\277\"O\271\277vH9?j?\345?#\243\264=\350\034\345\277\260\345\033\300\023\017(#z|\273\277L\\}\277\210\\\031\300\213\263z\277\302\241\033\300\000\207\323\277\247Oh>j\354\215#\364\305\201\276\361+\202#t:\304\277\344\231\243#\225k\002\300vw\262\277\362\220j\300\"(\337\276\354b8\300\230\347H\300\201\320\204\300S;N\300Z0G\300<I>>j\210\000#\034\014\220#\231\330J#\223\025\236#\006\332\230\276\227\273\n\277\353#,#\202\205\215\277\340\356\022\300/\223\035\277\331\277\362\276a\350\013#)\353\276\277v6\316\277K\326\207\300`2)\300\004\014Q\300\340\267\271\300MV\305\300\327\010\207\300j\346o\300\377\260\216\300[\332g\300\336\266\003\300\320S\272?6\300Y#\356\250\034\300\367\277&\300\335Uq>o\010&\300r\277\252\300U\314\243\300\253d\377\300"
print(f"Qnt -> {len(RAW_DATA)}") # Qnt -> 256
print(type(RAW_DATA))
at = 0
total = 0
while at < len(RAW_DATA):
fin = at+4
substrin = RAW_DATA[at:fin]
resu = FourString_float(substrin)
at = fin
You would should be getting the same error that you were getting originally. Notice that we are using the raw-string literal instead of regular string literal. This will ensure that the \ don't get escaped.
You would need to evaluate the RAW_DATA to force it to evaluate the \.
You can do something like RAW_DATA = eval(f'"{RAW_DATA}"') or
import ast
RAW_DATA = ast.literal_eval(f'"{RAW_DATA}"')
Note, the second option is a bit more secure that doing a straight eval as you are limiting the scope of what can be executed.

encoding decoding string in python 3.6 doesn't work?

I have this code:
import base64
words = ('word1',"word2")
for word in words: #for loop
str_encoded = base64.b64encode(word.encode()) # encoding it
print(str_encoded) #print encoded
str_decoded = str_encoded.decode('utf-8')
print(str_decoded)
back = base64.standard_b64decode(str_decoded) # testing if it worked
print(word, "," ,"{{" , str_decoded , "}}" , "," , str_decoded, back) #print test
when i print the test i see the b' wasn't removed.
how can i remove it? thanks!

You tried to decode your data in the wrong order, you have to go backwards compared to the encoding order:
import base64
words = ('word€',"word2") # Added some non-ascii characters for testing
for word in words:
# Encoding
print("Word:", word)
utf8_encoded = word.encode('utf8') # Encoding in utf8, gives a bytes object
print('utf8 encoded:', utf8_encoded)
str_encoded = base64.b64encode(utf8_encoded) # Encoding it in B64
print("Base64 encoded:", str_encoded)
# Decoding
b64_decoded = base64.standard_b64decode(str_encoded) # Decoding from B64, we get a bytes object
print("Decoded from base64:", b64_decoded)
str_decoded = b64_decoded.decode('utf-8') # and decode it (as utf8) to get a string
print("Decoded string:", str_decoded, '\n')
Output:
Word: word€
utf8 encoded: b'word\xe2\x82\xac'
Base64 encoded: b'd29yZOKCrA=='
Decoded from base64: b'word\xe2\x82\xac'
Decoded string: word€
Word: word2
utf8 encoded: b'word2'
Base64 encoded: b'd29yZDI='
Decoded from base64: b'word2'
Decoded string: word2

You have to decode "back" variable (which in your case is bytes) with:
back.decode("utf-8")
print(str(word), "," ,"{{" , str_decoded , "}}" , "," , str_decoded, back.decode("utf-8") )

Python adds extra to crypt result

I'm trying to create an API with token to communicate between an Raspberry Pi and a Webserver. Right now i'm tring to generate an Token with Python.
from Crypto.Cipher import AES
import base64
import os
import time
import datetime
import requests
BLOCK_SIZE = 32
BLOCK_SZ = 14
#!/usr/bin/python
salt = "123456789123" # Zorg dat de salt altijd even lang is! (12 Chars)
iv = "1234567891234567" # Zorg dat de salt altijd even lang is! (16 Chars)
currentDate = time.strftime("%d%m%Y")
currentTime = time.strftime("%H%M")
PADDING = '{'
pad = lambda s: s + (BLOCK_SIZE - len(s) % BLOCK_SIZE) * PADDING
EncodeAES = lambda c, s: base64.b64encode(c.encrypt(pad(s)))
DecodeAES = lambda c, e: c.decrypt(base64.b64decode(e)).rstrip(PADDING)
secret = salt + currentTime
cipher=AES.new(key=secret,mode=AES.MODE_CBC,IV=iv)
encode = currentDate
encoded = EncodeAES(cipher, encode)
print (encoded)
The problem is that the output of the script an exta b' adds to every encoded string.. And on every end a '
C:\Python36-32>python.exe encrypt.py
b'Qge6lbC+SulFgTk/7TZ0TKHUP0SFS8G+nd5un4iv9iI='
C:\Python36-32>python.exe encrypt.py
b'DTcotcaU98QkRxCzRR01hh4yqqyC92u4oAuf0bSrQZQ='
Hopefully someone can explain what went wrong.
FIXED!
I was able to fix it to decode it to utf-8 format.
sendtoken = encoded.decode('utf-8')

You are running Python 3.6, which uses Unicode (UTF-8) for string literals. I expect that the EncodeAES() function returns an ASCII string, which Python is indicating is a bytestring rather than a Unicode string by prepending the b to the string literal it prints.
You could strip the b out of the output post-Python, or you could print(str(encoded)), which should give you the same characters, since ASCII is valid UTF-8.
EDIT:
What you need to do is decode the bytestring into UTF-8, as mentioned in the answer and in a comment above. I was wrong about str() doing the conversion for you, you need to call decode('UTF-8') on the bytestring you wish to print. That converts the string into the internal UTF-8 representation, which then prints correctly.

Find, decode and replace all base64 values in text file

I have a SQL dump file that contains text with html links like:
<a href="http://blahblah.org/kb/getattachment.php?data=NHxUb3Bjb25fZGF0YS1kb3dubG9hZF9ob3d0by5wZGY=">attached file</a>
I'd like to find, decode and replace the base64 part of the text in each of these links.
I've been trying to use Python w/ regular expressions and base64 to do the job. However, my regex skills are not up to the task.
I need to select any string that starts with
'getattachement.php?data='
and ends with
'"'
I then need to decode the part between 'data=' and '&quot' using base64.b64decode()
results should look something like:
<a href="http://blahblah.org/kb/4/Topcon_data-download_howto.pdf">attached file</a>
I think the solution will look something like:
import re
import base64
with open('phpkb_articles.sql') as f:
for line in f:
re.sub(some_regex_expression_here, some_function_here_to_decode_base64)
Any ideas?
EDIT: Answer for anyone who's interested.
import re
import base64
import sys
def decode_base64(s):
"""
Method to decode base64 into ascii
"""
# fix escaped equal signs in some base64 strings
base64_string = re.sub('%3D', '=', s.group(1))
decodedString = base64.b64decode(base64_string)
# substitute '|' for '/'
decodedString = re.sub('\|', '/', decodedString)
# escape the spaces in file names
decodedString = re.sub(' ', '%20', decodedString)
# print 'assets/' + decodedString + '&quot' # Print for debug
return 'assets/' + decodedString + '&quot'
count = 0
pattern = r'getattachment.php\?data=([^&]+?)&quot'
# Open the file and read line by line
with open('phpkb_articles.sql') as f:
for line in f:
try:
# globally substitute in new file path
edited_line = re.sub(pattern, decode_base64, line)
# output the edited line to standard out
sys.stdout.write(edited_line)
except TypeError:
# output unedited line if decoding fails to prevent corruption
sys.stdout.write(line)
# print line
count += 1

you already have it, you just need the small pieces:
pattern: r'data=([^&]+?)&quot' will match anything after data= and before &quot
>>> pat = r'data=([^&]+?)&quot'
>>> line = '<a href="http://blahblah.org/kb/getattachment.php?data=NHxUb3Bjb25fZGF0YS1kb3dubG9hZF9ob3d0by5wZGY=">attached file</a>'
>>> decodeString = re.search(pat,line).group(1) #because the b64 string is capture by grouping, we only want group(1)
>>> decodeString
'NHxUb3Bjb25fZGF0YS1kb3dubG9hZF9ob3d0by5wZGY='
you can then use str.replace() method as well as base64.b64decode() method to finish the rest. I dont want to just write your code for you but this should give you a good idea of where to go.

decode base64 like string with different index table(s)

My problem is, that I have something encoded (base64 like) with a differnet index table:
0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/
instead of
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/
so when I use base64.b64decode() it gives me a wrong result.
Is there a way to set this table durring conversion (as a parameter maybe)?
Or should I "convert" the wrong base64 string, I mean replace 0 to A, 1 to B, etc... and than use base64decode? if so what is the best and fast workaround for this?
update1: I use this, which works, but looks a bit slow, and unprofessional. :)
def correctbase64(str):
dicta = [ ['0','A'], ['1','B'], ['2','C'], ['3','D'], ['4','E'], ['5','F'], ['6','G'], ['7','H'], ['8','I'], ['9','J'], ['A','K'], ['B','L'], ['C','M'], ['D','N'], ['E','O'], ['F','P'], ['G','Q'], ['H','R'], ['I','S'], ['J','T'], ['K','U'], ['L','V'], ['M','W'], ['N','X'], ['O','Y'], ['P','Z'], ['Q','a'], ['R','b'], ['S','c'], ['T','d'], ['U','e'], ['V','f'], ['W','g'], ['X','h'], ['Y','i'], ['Z','j'], ['a','k'], ['b','l'], ['c','m'], ['d','n'], ['e','o'], ['f','p'], ['g','q'], ['h','r'], ['i','s'], ['j','t'], ['k','u'], ['l','v'], ['m','w'], ['n','x'], ['o','y'], ['p','z'], ['q','0'], ['r','1'], ['s','2'], ['t','3'], ['u','4'], ['v','5'], ['w','6'], ['x','7'], ['y','8'], ['z','9'] ]
l = list(str)
for i in range(len(l)):
for c in dicta:
if l[i] == c[0]:
l[i] = c[1]
break
return "".join(l)

Something like this should work (WARNING: untested code; may be full of mistakes):
import string
my_base64chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"
std_base64chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
s = s.translate(string.maketrans(my_base64chars, std_base64chars))
data = base64.b64decode(s)
It isn't possible to make the standard base64 functions (or the lower-level ones in binascii that they call) use a custom table.

You can use translate() and maketrans():
from string import maketrans
base64fixTable = maketrans("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
def correctbase64(str):
return str.translate(base64fixTable)

print "Hello Reverse Engineering!\n"
import string
import base64
my_base64chars = "WXYZlabcd3fghijko12e456789ABCDEFGHIJKL+/MNOPQRSTUVmn0pqrstuvwxyz"
std_base64chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
s = 'whatever encoded message you have that used my_base64chars index'
c = s.translate(string.maketrans(my_base64chars, std_base64chars))
data = base64.b64decode(c)
print (data)

Use maketrans to build a translation table and then translate from the first alphabet to the second. Then base64 decode.
import string
import base64
def decode(str):
#make a translation table.
table = string.maketrans(
#your alphabet
string.digits + string.uppercase + string.lowercase + "+/",
#the original alphabet
string.uppercase + string.lowercase + string.digits + "+/"
)
#translate
str.translate(s, table)
#finally decode
return base64.b64decode(str)

this will handle error TypeError: Incorrect padding
from string import maketrans
import base64
STANDARD_ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
CUSTOM_ALPHABET = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/'
def correctbase64(input):
DECODE_TRANS = maketrans(CUSTOM_ALPHABET, STANDARD_ALPHABET)
newStr = input.translate(DECODE_TRANS)
# Add '=' char at the end of the string
newStr += '='
return base64.b64decode(newStr)
print custom_base64decode('x/Tcw/g') # hello

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Convert utf-8 string to base64 - python

The results you want are just base64.b64decode(str1 + "=="). See this post for more information on the padding.

Related

String from file to string utf-8 in python

encoding decoding string in python 3.6 doesn't work?

Python adds extra to crypt result

Find, decode and replace all base64 values in text file

decode base64 like string with different index table(s)

Categories

Resources