Related
I have a string as given below
dit ='{p_d: {a:3, what:3.6864e-05, s:lion, sst:{c:-20, b:6, p:panther}}}'
And I have a list of elements which I wanted to search in the above string and replace them with double quotes.
['', 'p_d', '', '', 'a', '3', '', 'what', '3.6864e-05', '', 's', 'lion', '', 'sst', '', 'c', '-20', '', 'b', '6', '', 'p', 'panther', '', '', '']
If I do search and replace using simple .replace it doesn't work as expected and can understand
import yaml
import ast
import json
import re
rep = {":": " ", "'":" ", "{":" ", "}":" ", ",": " "}
quot = "\""
dit = '{p_d: {a:3, what:3.6864e-05, s:lion, sst:{c:-20, b:6, p:panther}}}'
def replace_all(text, dic):
for i, j in dic.items():
text = text.replace(i, j)
print("replace_all: text {}".format(text))
return text
element_list_temp = replace_all(dit, rep)
element_list = element_list_temp.split(" ")
for z in element_list:
if z != "" and z in dit:
dit = dit.replace(z, quot+z+quot)
print(dit)
Output:
{""p"_d": {"a":"3", wh"a"t:"3"."6"8"6"4e-05, "s":"lion", "s""s"t:{"c":"-20", "b":"6", "p":"p""a"nther}}}
Desired Output:
'{"p_d": {"a":"3", "what":"3.6864e-05", "s":"lion", "sst":{"c":"-20", "b":"6", "p":"panther"}}}'
How to exactly match the string in the list one by one and replace them with double quotes.
Updates:
Different input
import yaml
import ast
import json
import re
rep = {":": " ", "'":" ", "{":" ", "}":" ", ",": " "}
quot = "\""
# dit = '{p_d: {a:3, what:3.6864e-05, s:lion, sst:{c:-20, b:6, p:panther}}}'
dit = "'{p_d: '{a:3, what:3.6864e-05, s:lion, vec_mode:'{2.5, -2.9, 3.4, 5.6, -8.9, -5.67, 2, 2, 2, 2, 5.4, 2, 2, 6.545, 2, 2}, sst:'{c:-20, b:6, p:panther}}}"
seps = ":'{}, "
val_strings = re.findall(f"[^{seps}]+", dit)
print("val_strings: {}".format(val_strings))
sep_strings = re.findall(f"[{seps}]+", dit)
print("sep_strings: {}".format(sep_strings))
seq = [f'{b}"{v}"' for b, v in zip(sep_strings, val_strings)] + sep_strings[-1:]
print("sep: {}".format(seq))
dit = "".join(seq)
print(dit)
Dict = json.loads(dit)
print(Dict)
result = yaml.dump(Dict)
print(result)
print(result.replace("'",""))
Output from above code
Think its failing because of the key:value pair of the dictionary. Checking at my end as well if there is a way to print them as arrays.
val_strings: ['p_d', 'a', '3', 'what', '3.6864e-05', 's', 'lion', 'vec_mode', '2.5', '-2.9', '3.4', '5.6', '-8.9', '-5.67', '2', '2', '2', '2', '5.4', '2', '2', '6.545', '2', '2', 'sst', 'c', '-20', 'b', '6', 'p', 'panther']
sep_strings: ["'{", ": '{", ':', ', ', ':', ', ', ':', ', ', ":'{", ', ', ', ', ', ', ', ', ', ', ', ', ', ', ', ', ', ', ', ', ', ', ', ', ', ', ', ', ', ', '}, ', ":'{", ':', ', ', ':', ', ', ':', '}}}']
sep: ['\'{"p_d"', ': \'{"a"', ':"3"', ', "what"', ':"3.6864e-05"', ', "s"', ':"lion"', ', "vec_mode"', ':\'{"2.5"', ', "-2.9"', ', "3.4"', ', "5.6"', ', "-8.9"', ', "-5.67"', ', "2"', ', "2"', ', "2"', ', "2"', ', "5.4"', ', "2"', ', "2"', ', "6.545"', ', "2"', ', "2"', '}, "sst"', ':\'{"c"', ':"-20"', ', "b"', ':"6"', ', "p"', ':"panther"', '}}}']
'{"p_d": '{"a":"3", "what":"3.6864e-05", "s":"lion", "vec_mode":'{"2.5", "-2.9", "3.4", "5.6", "-8.9", "-5.67", "2", "2", "2", "2", "5.4", "2", "2", "6.545", "2", "2"}, "sst":'{"c":"-20", "b":"6", "p":"panther"}}}
Traceback (most recent call last):
File "./ditoyaml_new.py", line 36, in <module>
Dict = json.loads(dit)
File "/usr/lib64/python3.6/json/__init__.py", line 354, in loads
return _default_decoder.decode(s)
File "/usr/lib64/python3.6/json/decoder.py", line 339, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/lib64/python3.6/json/decoder.py", line 357, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Expected Output with the json.load and dump as dictionary and if the key: value dictionary pair isnt available and put something like list or array. Checking at my end as well.
p_d:
a: 3
s: lion
sst:
b: 6
c: -20
p: panther
vec_mode:
[-8.9,
-5.67,
-2.9,
2,
2.5,
3.4,
5.4,
5.6,
6.545]
what: 3.6864e-05
Here is one way using regular expressions
import re
dit = '{p_d: {a:3, what:3.6864e-05, s:lion, sst:{c:-20, b:6, p:panther}}}'
seps = ":'{}, "
val_strings = re.findall(fr"[^{seps}]+", dit)
sep_strings = re.findall(fr"[{seps}]+", dit)
seq = [f'{b}"{v}"' for b, v in zip(sep_strings, val_strings)] + sep_strings[-1:]
dit = "".join(seq)
print(dit)
Output:
{"p_d": {"a":"3", "what":"3.6864e-05", "s":"lion", "sst":{"c":"-20", "b":"6", "p":"panther"}}}
JSON test:
import json
print(json.loads(dit))
Output:
{'p_d': {'a': '3', 'what': '3.6864e-05', 's': 'lion', 'sst': {'c': '-20', 'b': '6', 'p': 'panther'}}}
This is a word processing code for chabot, in it it removes some articles and prepositions to make it easier for the bot to read
import json
from random import choice
class ChatterMessage:
def __init__(self, raw):
self.raw = str(raw).lower()
self.processed_str = self.reduce()
self.responses = self.get_responses()
self.data = self.process_response()
self.response = choice(self.data['response'])
def remove_unwanted_chars(self, string):
list_of_chars = ['?', ".", ",", "!", "#", "[", "]", "{", "}", "#", "$", "%", "*", "&", "(", ")", "-", "_", "+", "="]
new_str = ""
for char in string:
if char not in list_of_chars:
new_str += str(char)
return new_str
def get_responses(self, response_file="info.json"):
with open(response_file, 'r') as file:
return json.loads(file.read())
def reduce(self):
stopwords = ['de', 'a', 'o', 'que', 'e', 'é', 'do', 'da', 'em', 'um', 'para', 'com', 'não', 'uma', 'os', 'no', 'se', 'na', 'por', 'mais', 'as', 'dos', 'como', 'mas', 'ao', 'ele', 'das', 'à', 'seu', 'sua', 'ou', 'quando', 'muito', 'nos', 'já', 'eu', 'também', 'só', 'pelo', 'pela', 'até', 'isso', 'ela', 'entre', 'depois', 'sem', 'mesmo', 'aos', 'seus', 'quem', 'nas', 'me', 'esse', 'eles', 'você', 'essa', 'num', 'nem', 'suas', 'meu', 'às', 'minha', 'numa', 'pelos', 'elas', 'qual', 'nós', 'lhe', 'deles', 'essas', 'esses', 'pelas', 'este', 'dele', 'tu', 'te', 'vocês', 'vos', 'lhes', 'meus', 'minhas', 'teu', 'tua', 'teus', 'tuas', 'nosso', 'nossa', 'nossos', 'nossas', 'dela', 'delas', 'esta', 'estes', 'estas', 'aquele', 'aquela', 'aqueles', 'aquelas', 'isto', 'aquilo', 'estou', 'está', 'estamos', 'estão', 'estive', 'esteve', 'estivemos', 'estiveram', 'estava', 'estávamos', 'estavam', 'estivera', 'estivéramos', 'esteja', 'estejamos', 'estejam', 'estivesse', 'estivéssemos', 'estivessem', 'estiver', 'estivermos', 'estiverem', 'hei', 'há', 'havemos', 'hão', 'houve', 'houvemos', 'houveram', 'houvera', 'houvéramos', 'haja', 'hajamos', 'hajam', 'houvesse', 'houvéssemos', 'houvessem', 'houver', 'houvermos', 'houverem', 'houverei', 'houverá', 'houveremos', 'houverão', 'houveria', 'houveríamos', 'houveriam', 'sou', 'somos', 'são', 'era', 'éramos', 'eram', 'fui', 'foi', 'fomos', 'foram', 'fora', 'fôramos', 'seja', 'sejamos', 'sejam', 'fosse', 'fôssemos', 'fossem', 'for', 'formos', 'forem', 'serei', 'será', 'seremos', 'serão', 'seria', 'seríamos', 'seriam', 'tenho', 'tem', 'temos', 'tém', 'tinha', 'tínhamos', 'tinham', 'tive', 'teve', 'tivemos', 'tiveram', 'tivera', 'tivéramos', 'tenha', 'tenhamos', 'tenham', 'tivesse', 'tivéssemos', 'tivessem', 'tiver', 'tivermos', 'tiverem', 'terei', 'terá', 'teremos', 'terão', 'teria', 'teríamos', 'teriam']
custom_filter = []
keywords_list = []
strlist = self.raw.split(" ")
for x in strlist:
if x not in stopwords and x not in custom_filter:
keywords_list.append(self.remove_unwanted_chars(x))
return keywords_list
def process_response(self):
percentage = lambda x, y: (100 * y) / x
total = sum(len(x['keywords']) for x in self.responses)
most_acc = 0
response_data = None
acc = 0
for value in self.responses:
c = 0
for x in value['keywords']:
if str(x).lower() in self.processed_str:
c += 1
if c > most_acc:
most_acc = c
acc = percentage(total, most_acc)
print(acc)
response_data = value
if acc < 6:
return {"response": "Sorry, I do not understand. Be more clear please"}
for x in self.processed_str:
if x not in response_data['keywords']:
response_data['keywords'].append(x)
return response_data
if __name__ == '__main__':
while True:
k = input("Você: ")
res = ChatterMessage(k)
.response
print("Bot:", res)
How to remove accents from keyword strings to "make it easier" for chatbot to read? I found this explanation: How to remove string accents using Python 3? But I don't know how it would be applied to this code as the bot always stops responding
You could use the Python package unidecode that replaces special characters with ASCII equivalents.
from unidecode import unidecode
text = "Björn, Łukasz and Σωκράτης."
print(unidecode(text))
# ==> Bjorn, Lukasz and Sokrates.
You could apply this to both the input and keywords.
# In the function definition of reduce(), place this line of code after
# stopwords = ['de', 'a', 'o', .....])
stopwords = [unidecode(s) for s in stopwords]
# In "__main__": replace k = input("Você: ") with the following line of code.
k = unidecode(input("Você: "))
If it makes sense, you could also force the strings to be all lowercase. This will make your string comparisons even more robust.
k = unidecode(input("Você: ").lower())
Because you requested the entire code:
import json
from random import choice
from unidecode import unidecode
class ChatterMessage:
def __init__(self, raw):
self.raw = str(raw).lower()
self.processed_str = self.reduce()
self.responses = self.get_responses()
self.data = self.process_response()
self.response = choice(self.data['response'])
def remove_unwanted_chars(self, string):
list_of_chars = ['?', ".", ",", "!", "#", "[", "]", "{", "}", "#", "$", "%", "*", "&", "(", ")", "-", "_", "+", "="]
new_str = ""
for char in string:
if char not in list_of_chars:
new_str += str(char)
return new_str
def get_responses(self, response_file="info.json"):
with open(response_file, 'r') as file:
return json.loads(file.read())
def reduce(self):
stopwords = ['de', 'a', 'o', 'que', 'e', 'é', 'do', 'da', 'em', 'um', 'para', 'com', 'não', 'uma', 'os', 'no', 'se', 'na', 'por', 'mais', 'as', 'dos', 'como', 'mas', 'ao', 'ele', 'das', 'à', 'seu', 'sua', 'ou', 'quando', 'muito', 'nos', 'já', 'eu', 'também', 'só', 'pelo', 'pela', 'até', 'isso', 'ela', 'entre', 'depois', 'sem', 'mesmo', 'aos', 'seus', 'quem', 'nas', 'me', 'esse', 'eles', 'você', 'essa', 'num', 'nem', 'suas', 'meu', 'às', 'minha', 'numa', 'pelos', 'elas', 'qual', 'nós', 'lhe', 'deles', 'essas', 'esses', 'pelas', 'este', 'dele', 'tu', 'te', 'vocês', 'vos', 'lhes', 'meus', 'minhas', 'teu', 'tua', 'teus', 'tuas', 'nosso', 'nossa', 'nossos', 'nossas', 'dela', 'delas', 'esta', 'estes', 'estas', 'aquele', 'aquela', 'aqueles', 'aquelas', 'isto', 'aquilo', 'estou', 'está', 'estamos', 'estão', 'estive', 'esteve', 'estivemos', 'estiveram', 'estava', 'estávamos', 'estavam', 'estivera', 'estivéramos', 'esteja', 'estejamos', 'estejam', 'estivesse', 'estivéssemos', 'estivessem', 'estiver', 'estivermos', 'estiverem', 'hei', 'há', 'havemos', 'hão', 'houve', 'houvemos', 'houveram', 'houvera', 'houvéramos', 'haja', 'hajamos', 'hajam', 'houvesse', 'houvéssemos', 'houvessem', 'houver', 'houvermos', 'houverem', 'houverei', 'houverá', 'houveremos', 'houverão', 'houveria', 'houveríamos', 'houveriam', 'sou', 'somos', 'são', 'era', 'éramos', 'eram', 'fui', 'foi', 'fomos', 'foram', 'fora', 'fôramos', 'seja', 'sejamos', 'sejam', 'fosse', 'fôssemos', 'fossem', 'for', 'formos', 'forem', 'serei', 'será', 'seremos', 'serão', 'seria', 'seríamos', 'seriam', 'tenho', 'tem', 'temos', 'tém', 'tinha', 'tínhamos', 'tinham', 'tive', 'teve', 'tivemos', 'tiveram', 'tivera', 'tivéramos', 'tenha', 'tenhamos', 'tenham', 'tivesse', 'tivéssemos', 'tivessem', 'tiver', 'tivermos', 'tiverem', 'terei', 'terá', 'teremos', 'terão', 'teria', 'teríamos', 'teriam']
stopwords = [unidecode(s) for s in stopwords]
custom_filter = []
keywords_list = []
strlist = self.raw.split(" ")
for x in strlist:
if x not in stopwords and x not in custom_filter:
keywords_list.append(self.remove_unwanted_chars(x))
return keywords_list
def process_response(self):
percentage = lambda x, y: (100 * y) / x
total = sum(len(x['keywords']) for x in self.responses)
most_acc = 0
response_data = None
acc = 0
for value in self.responses:
c = 0
for x in value['keywords']:
if str(x).lower() in self.processed_str:
c += 1
if c > most_acc:
most_acc = c
acc = percentage(total, most_acc)
print(acc)
response_data = value
if acc < 6:
return {"response": "Sorry, I do not understand. Be more clear please"}
for x in self.processed_str:
if x not in response_data['keywords']:
response_data['keywords'].append(x)
return response_data
if __name__ == '__main__':
while True:
k = unidecode(input("Você: "))
res = ChatterMessage(k).response
print("Bot:", res)
I'm looking for simple password-based obfuscation/security of strings.
I've pretty much gone over each example of > Simple way to encode a string according to a password?
And none of them work with my python 3.7.
I got the error with ord() so I updated the code, but even after, its still broken. For examle:
from itertools import cycle
def encode_zip_cycle(key, clear):
enc = [chr((ord(clear_char) + ord(key_char)) % 256)
for clear_char, key_char in zip(clear, cycle(key))]
return base64.urlsafe_b64encode("".join(enc).encode())
def decode_zip_cycle(key, enc):
enc = base64.urlsafe_b64decode(enc)
dec = [chr((256 + enc_char - ord(key_char)) % 256)
for enc_char, key_char in zip(enc, cycle(key))]
print(dec)
return "".join(dec)
text = "ATTACKATONCEfor Live 2154125-21-512^!££613-123!"
s = "1235348udgfjff"
print("Text : " + text)
print("Shift : " + str(s))
print("Cipher: ", encode_zip_cycle(s, text)) # , type(encode(s, text)))
print("Original text: ", decode_zip_cycle(s, encode_zip_cycle(s, text)))
Gives me
Text : ATTACKATONCEfor Live 2154125-21-512^!££613-123!
Shift : 1235348udgfjff
Cipher: b'csKGwod2dn95w4nCs8K1wqnCr8OMw5XCo1J_wp7CqcKZWMKVwoTCmcKXwp_CmsKXY2dgZ2RhbcKmwpbDhcKHDQnCnGJlYGZlZ1k='
['A', '\x90', 'S', '\x8d', 'T', 'B', '>', '\n', '\x15', '\\', '#', 'X', 'M', '\\', '\x84', '\x90', 'v', '\x8d', '|', '\x8f', 'T', 'N', '1', '[', '=', 'è', '\x19', '\\', 'm', '\x90', 'v', '\x8d', 'f', '$', '\x8a', ' ', '^', '\x1d', '\\', '/', '\\', '1', '\x91', 'm', '\x8f', 'e', '\x8f', 'c', '+', 'ò', 'ü', '\x00', 'þ', '÷', '\x07', '\\', 'u', '\x90', 'c', '\x8e', 'R', '\x8e', 'O', '\x98', '¥', '[', '6', 'ø', 'ÿ', 'ú', '5', '3', '4', '$']
Original text: ASTB>
\#XM\v|TN1[=è\mvf$ ^\/\1mec+òü þ÷\ucRO¥[6øÿú534$
In encode_zip_cycle you encode the "encrypted" string into utf-8 before doing the second encoding into base64. Yet, you don't revert this operation later in decode_zip_cycle.
This is the correct decode_zip_cycle function:
def decode_zip_cycle(key, enc):
enc = base64.urlsafe_b64decode(enc).decode()
dec = [chr((256 + ord(enc_char) - ord(key_char)) % 256)
for enc_char, key_char in zip(enc, cycle(key))]
print(dec)
return "".join(dec)
I have a nested list:
Table=[['','','','',''],
['','','','',''],
['','','','',''],
['','','','',''],
['','','','',''],
['','','','','']]
I have randomly placed some values in Table and now I want to place other things in the 2D neighbours of those values. E.g.:
Table=[['','','','',''],
['','','','',''],
['','','','',''],
['','','value','',''],
['','','','',''],
['','','','','']]
Then i want to add:
Table=[['','','','',''],
['','','','',''],
['','','1','',''],
['','1','value','1',''],
['','','1','',''],
['','','','','']]
Under is all my code i don't know why but it would accept it in any other format sorry :/
def add_nukes():
pos=j.index('nuke')
if "nuke" not in j[0]:j[pos+1]='1'
if "nuke" not in j[-1]:
j[pos-1] = "1"
board[pos][i-1]="1"
board[i+1][pos]="1"
import random
size=150
if size%2==1:
size+=1
board = [[" "]*size for i in range(size)]
bombs = 25
all_cells = ["nuke"] * bombs + [" "] * (size - bombs)
random.shuffle(all_cells)
board = [all_cells[i:i+10] for i in range(0, size, 10)]
count=0
for j in board:
for i in range(len(j)):
count+=1
if "nuke" in j[i]:
add_nukes()
elif "nuke" in j[i]:
add_nukes()
for item in board:
print item
Any value in Table is identified uniquely by its x and y coordinates, i.e. the element in the 2nd column (x == 1 because 0-indexed) and 3rd row (y == 2) is Table[y][x] == Table[2][1].
The four immediate neighbours of any cell A are the cells with x one away from A OR with y one away from A. If A is Table[y][x], then the neighbours are [Table[y - 1][x], Table[y + 1][x], Table[y, x - 1], Table[y, x + 1]].
Just like #Aurel Bílý mentioned, there are four neighbouring coordinates in which you need to add value for the specific case: [Table[y - 1][x], Table[y + 1][x], Table[y, x - 1], Table[y, x + 1]].
In order to do that, you must first ensure that these coordinates are valid and do not throw an IndexError exception. After you make sure that this coordinates are valid, you can safely add them in your table.
The code below demonstrates this:
Table=[['','','','',''],
['','','','',''],
['','','','',''],
['','','value','',''],
['','','','',''],
['','','','','']]
def isInBounds(Table,x,y):
return 0 <= x < len(Table) and 0 <= y < len(Table[0])
def addValue(Table,x,y,value):
if isInBounds(Table,x,y):
Table[x][y] = value
def addValuesAround(Table,x,y,value):
addValue(Table,x-1,y,value)
addValue(Table,x,y-1,value)
addValue(Table,x+1,y,value)
addValue(Table,x,y+1,value)
addValuesAround(Table,3,2,1)
for elem in Table:
print(elem)
This will return:
['', '', '', '', '']
['', '', '', '', '']
['', '', 1, '', '']
['', 1, 'value', 1, '']
['', '', 1, '', '']
['', '', '', '', '']
EDIT:
I think I got it, using both of our codes. Just be sure to change the syntax of the print function, because you're using Python 2.7 and I use Python 3.6:
import random
def isInBounds(Table,x,y):
return 0 <= x < len(Table) and 0 <= y < len(Table[0])
def addValue(Table,x,y,value):
if isInBounds(Table,x,y):
Table[x][y] = value
def addValuesAround(Table,x,y,value):
addValue(Table,x-1,y,value)
addValue(Table,x,y-1,value)
addValue(Table,x+1,y,value)
addValue(Table,x,y+1,value)
size=150
if size%2==1:
size+=1
board = [[" " for i in range(size)] for i in range(size)]
bombs = 25
all_cells = ["nuke"] * bombs + [" "] * (size - bombs)
random.shuffle(all_cells)
board = [all_cells[i:i+10] for i in range(0, size, 10)]
count=0
for i in range(len(board)):
for j in range(len(board[i])):
if board[i][j] == 'nuke':
addValuesAround(board,i,j,"1")
for item in board:
print(item)
This will give an instance of a board like this:
[' ', ' ', ' ', ' ', '1', ' ', '1', ' ', '1', ' ']
[' ', ' ', ' ', '1', 'nuke', '1', 'nuke', '1', 'nuke', '1']
['1', ' ', ' ', ' ', '1', ' ', '1', ' ', '1', '1']
['nuke', '1', '1', '1', 'nuke', '1', ' ', ' ', '1', 'nuke']
['1', '1', 'nuke', '1', '1', ' ', '1', ' ', ' ', '1']
[' ', ' ', '1', ' ', ' ', '1', 'nuke', '1', ' ', ' ']
[' ', ' ', '1', ' ', ' ', '1', '1', ' ', ' ', ' ']
[' ', '1', 'nuke', '1', '1', 'nuke', '1', ' ', ' ', ' ']
['1', 'nuke', '1', ' ', '1', '1', '1', ' ', '1', ' ']
[' ', '1', 'nuke', '1', 'nuke', '1', 'nuke', '1', 'nuke', '1']
['1', 'nuke', '1', ' ', '1', ' ', '1', ' ', '1', ' ']
[' ', '1', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
[' ', ' ', '1', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
[' ', '1', 'nuke', '1', ' ', '1', ' ', '1', ' ', ' ']
[' ', ' ', '1', ' ', '1', 'nuke', '1', 'nuke', '1', ' ']
I made a program which convert infix to postfix in python. The problem is when I introduce the arguments.
If i introduce something like this: (this will be a string)
( ( 73 + ( ( 34 - 72 ) / ( 33 - 3 ) ) ) + ( 56 + ( 95 - 28 ) ) )
it will split it with .split() and the program will work correctly.
But I want the user to be able to introduce something like this:
((73 + ( (34- 72 ) / ( 33 -3) )) + (56 +(95 - 28) ) )
As you can see I want that the blank spaces can be trivial but the program continue splitting the string by parentheses, integers (not digits) and operands.
I try to solve it with a for but I don't know how to catch the whole number (73 , 34 ,72) instead one digit by digit (7, 3 , 3 , 4 , 7 , 2)
To sum up, what I want is split a string like ((81 * 6) /42+ (3-1)) into:
[(, (, 81, *, 6, ), /, 42, +, (, 3, -, 1, ), )]
Tree with ast
You could use ast to get a tree of the expression :
import ast
source = '((81 * 6) /42+ (3-1))'
node = ast.parse(source)
def show_children(node, level=0):
if isinstance(node, ast.Num):
print(' ' * level + str(node.n))
else:
print(' ' * level + str(node))
for child in ast.iter_child_nodes(node):
show_children(child, level+1)
show_children(node)
It outputs :
<_ast.Module object at 0x7f56abbc5490>
<_ast.Expr object at 0x7f56abbc5350>
<_ast.BinOp object at 0x7f56abbc5450>
<_ast.BinOp object at 0x7f56abbc5390>
<_ast.BinOp object at 0x7f56abb57cd0>
81
<_ast.Mult object at 0x7f56abbd0dd0>
6
<_ast.Div object at 0x7f56abbd0e50>
42
<_ast.Add object at 0x7f56abbd0cd0>
<_ast.BinOp object at 0x7f56abb57dd0>
3
<_ast.Sub object at 0x7f56abbd0d50>
1
As #user2357112 wrote in the comments : ast.parse interprets Python syntax, not mathematical expressions. (1+2)(3+4) would be parsed as a function call and list comprehensions would be accepted even though they probably shouldn't be considered a valid mathematical expression.
List with a regex
If you want a flat structure, a regex could work :
import re
number_or_symbol = re.compile('(\d+|[^ 0-9])')
print(re.findall(number_or_symbol, source))
# ['(', '(', '81', '*', '6', ')', '/', '42', '+', '(', '3', '-', '1', ')', ')']
It looks for either :
multiple digits
or any character which isn't a digit or a space
Once you have a list of elements, you could check if the syntax is correct, for example with a stack to check if parentheses are matching, or if every element is a known one.
You need to implement a very simple tokenizer for your input. You have the following types of tokens:
(
)
+
-
*
/
\d+
You can find them in your input string separated by all sorts of white space.
So a first step is to process the string from start to finish, and extract these tokens, and then do your parsing on the tokens, rather than on the string itself.
A nifty way to do this is to use the following regular expression: '\s*([()+*/-]|\d+)'. You can then:
import re
the_input='(3+(2*5))'
tokens = []
tokenizer = re.compile(r'\s*([()+*/-]|\d+)')
current_pos = 0
while current_pos < len(the_input):
match = tokenizer.match(the_input, current_pos)
if match is None:
raise Error('Syntax error')
tokens.append(match.group(1))
current_pos = match.end()
print(tokens)
This will print ['(', '3', '+', '(', '2', '*', '5', ')', ')']
You could also use re.findall or re.finditer, but then you'd be skipping non-matches, which are syntax errors in this case.
If you don't want to use re module, you can try this:
s="((81 * 6) /42+ (3-1))"
r=[""]
for i in s.replace(" ",""):
if i.isdigit() and r[-1].isdigit():
r[-1]=r[-1]+i
else:
r.append(i)
print(r[1:])
Output:
['(', '(', '81', '*', '6', ')', '/', '42', '+', '(', '3', '-', '1', ')', ')']
It actual would be pretty trivial to hand-roll a simple expression tokenizer. And I'd think you'd learn more that way as well.
So for the sake of education and learning, Here is a trivial expression tokenizer implementation which can be extended. It works based upon the "maximal-much" rule. This means it acts "greedy", trying to consume as many characters as it can to construct each token.
Without further ado, here is the tokenizer:
class ExpressionTokenizer:
def __init__(self, expression, operators):
self.buffer = expression
self.pos = 0
self.operators = operators
def _next_token(self):
atom = self._get_atom()
while atom and atom.isspace():
self._skip_whitespace()
atom = self._get_atom()
if atom is None:
return None
elif atom.isdigit():
return self._tokenize_number()
elif atom in self.operators:
return self._tokenize_operator()
else:
raise SyntaxError()
def _skip_whitespace(self):
while self._get_atom():
if self._get_atom().isspace():
self.pos += 1
else:
break
def _tokenize_number(self):
endpos = self.pos + 1
while self._get_atom(endpos) and self._get_atom(endpos).isdigit():
endpos += 1
number = self.buffer[self.pos:endpos]
self.pos = endpos
return number
def _tokenize_operator(self):
operator = self.buffer[self.pos]
self.pos += 1
return operator
def _get_atom(self, pos=None):
pos = pos or self.pos
try:
return self.buffer[pos]
except IndexError:
return None
def tokenize(self):
while True:
token = self._next_token()
if token is None:
break
else:
yield token
Here is a demo the usage:
tokenizer = ExpressionTokenizer('((81 * 6) /42+ (3-1))', {'+', '-', '*', '/', '(', ')'})
for token in tokenizer.tokenize():
print(token)
Which produces the output:
(
(
81
*
6
)
/
42
+
(
3
-
1
)
)
Quick regex answer:
re.findall(r"\d+|[()+\-*\/]", str_in)
Demonstration:
>>> import re
>>> str_in = "((81 * 6) /42+ (3-1))"
>>> re.findall(r"\d+|[()+\-*\/]", str_in)
['(', '(', '81', '*', '6', ')', '/', '42', '+', '(', '3', '-', '1',
')', ')']
For the nested parentheses part, you can use a stack to keep track of the level.
This does not provide quite the result you want but might be of interest to others who view this question. It makes use of the pyparsing library.
# Stolen from http://pyparsing.wikispaces.com/file/view/simpleArith.py/30268305/simpleArith.py
# Copyright 2006, by Paul McGuire
# ... and slightly altered
from pyparsing import *
integer = Word(nums).setParseAction(lambda t:int(t[0]))
variable = Word(alphas,exact=1)
operand = integer | variable
expop = Literal('^')
signop = oneOf('+ -')
multop = oneOf('* /')
plusop = oneOf('+ -')
factop = Literal('!')
expr = operatorPrecedence( operand,
[("!", 1, opAssoc.LEFT),
("^", 2, opAssoc.RIGHT),
(signop, 1, opAssoc.RIGHT),
(multop, 2, opAssoc.LEFT),
(plusop, 2, opAssoc.LEFT),]
)
print (expr.parseString('((81 * 6) /42+ (3-1))'))
Output:
[[[[81, '*', 6], '/', 42], '+', [3, '-', 1]]]
Using grako:
start = expr $;
expr = calc | value;
calc = value operator value;
value = integer | "(" #:expr ")" ;
operator = "+" | "-" | "*" | "/";
integer = /\d+/;
grako transpiles to python.
For this example, the return value looks like this:
['73', '+', ['34', '-', '72', '/', ['33', '-', '3']], '+', ['56', '+', ['95', '-', '28']]]
Normally you'd use the generated semantics class as a template for further processing.
To provide a more verbose regex approach that you could easily extend:
import re
solution = []
pattern = re.compile('([\d\.]+)')
s = '((73 + ( (34- 72 ) / ( 33 -3) )) + (56 +(95 - 28) ) )'
for token in re.split(pattern, s):
token = token.strip()
if re.match(pattern, token):
solution.append(float(token))
continue
for character in re.sub(' ', '', token):
solution.append(character)
Which will give you the result:
solution = ['(', '(', 73, '+', '(', '(', 34, '-', 72, ')', '/', '(', 33, '-', 3, ')', ')', ')', '+', '(', 56, '+', '(', 95, '-', 28, ')', ')', ')']
Similar to #McGrady's answer, you can do this with a basic queue implementation.
As a very basic implementation, here's what your Queue class can look like:
class Queue:
EMPTY_QUEUE_ERR_MSG = "Cannot do this operation on an empty queue."
def __init__(self):
self._items = []
def __len__(self) -> int:
return len(self._items)
def is_empty(self) -> bool:
return len(self) == 0
def enqueue(self, item):
self._items.append(item)
def dequeue(self):
try:
return self._items.pop(0)
except IndexError:
raise RuntimeError(Queue.EMPTY_QUEUE_ERR_MSG)
def peek(self):
try:
return self._items[0]
except IndexError:
raise RuntimeError(Queue.EMPTY_QUEUE_ERR_MSG)
Using this simple class, you can implement your parse function as:
def tokenize_with_queue(exp: str) -> List:
queue = Queue()
cum_digit = ""
for c in exp.replace(" ", ""):
if c in ["(", ")", "+", "-", "/", "*"]:
if cum_digit != "":
queue.enqueue(cum_digit)
cum_digit = ""
queue.enqueue(c)
elif c.isdigit():
cum_digit += c
else:
raise ValueError
if cum_digit != "": #one last sweep in case there are any digits waiting
queue.enqueue(cum_digit)
return [queue.dequeue() for i in range(len(queue))]
Testing it like below:
exp = "((73 + ( (34- 72 ) / ( 33 -3) )) + (56 +(95 - 28) ) )"
print(tokenize_with_queue(exp)")
would give you the token list as:
['(', '(', '73', '+', '(', '(', '34', '-', '72', ')', '/', '(', '33', '-', '3', ')', ')', ')', '+', '(', '56', '+', '(', '95', '-', '28', ')', ')', ')']