Insert operators from string into expression string - python

Given a string with variables and parentheses:
'a((bc)((de)f))'
and a string of operators:
'+-+-+'
I would like to insert each operator (in order) into the first string between the following patterns (where char is defined as a character that is not an open or close parenthesis):
char followed by char
char followed by '('
')' followed by '('
')' followed by char
To give the result:
'a+((b-c)+((d-e)+f))'
Edit: I got it to work with the following code, but is there a more elegant way to do this, i.e. without a for loop?
x = 'a((bc)((de)f))'
operators = '+-+-+'
y = x
z = 0
for i in range(len(x)):
if i < len(x)-1:
xx = x[i]
isChar = True if x[i] != '(' and x[i] != ')' else False
isPO = True if x[i] == '(' else False
isPC = True if x[i] == ')' else False
isNxtChar = True if x[i+1] != '(' and x[i+1] != ')' else False
isNxtPO = True if x[i+1] == '(' else False
isNxtPC = True if x[i+1] == ')' else False
if (isChar and (isNxtChar or isNxtPO)) or (isPC and (isNxtPO or isNxtChar)):
aa = operators[z]
split1 = x[:i+1]
split2 = x[i+1:]
y = y[:i+z+1] + operators[z] + x[i+1:]
if z+1 < len(operators):
z+=1
print (y)

initialExpr = 'a((bc)((de)f))'
operators = '+-+-+'
countOp = 0
countChar = 0
for char in initialExpr:
countChar += 1
print(char,end='')
if countChar < len(initialExpr) and (char == ')' or char.isalpha()) and (initialExpr[countChar] == '(' or initialExpr[countChar].isalpha()):
print(operators[countOp], end='')
countOp += 1
This should do the job.
Assumption is the the variables, parenthesis and operators are in the right order and number.

One-liner using re:
import re
s = "a((bc)((de)f))"
o = "+-+-+"
print(
re.sub(
r"(?:[a-z](?:\(|[a-z]))|(?:\)(?:\(|[a-z]))",
lambda g, i=iter(o): next(i).join(g.group()),
s,
)
)
Prints:
a+((b-c)+((d-e)+f))

You can use a regex matching the pairs of characters inside which you want to insert an operator.
Then, you can use re.sub with a replacement function that joins the two characters with the next operator.
We can use a class with a __call__ method, that uses an iterator on the operators:
import re
rules = re.compile(r'[a-z]{2}|[a-z]\(|\)\(|\)[a-z]')
class Replace:
def __init__(self, operators):
self.it_operators = iter(operators)
def __call__(self, match):
return next(self.it_operators).join(match.group())
variables = 'a((bc)((de)f))'
operators = '+-+-+'
print(rules.sub(Replace(operators), variables))
# a+((b-c)+((d-e)+f))
Replace(operators) returns a callable Replace instance with an it_operators attribute that is an iterator, ready to iterate on the operators.
For each matching pair of characters, sub calls this instance, and its __call__ method returns the replacement for the two characters, that it builds by joining them with the next operator.

Related

How to remove leading zeros from the calculator expression in a string? python

I have a doubt, in python
the string is, Z = "00123+0567*29/03-7"
how to convert it to "123+567*29/3-7"
Even I tried with re.split('[+]|[*]|-|/', Z) later with for i in res : i = i.lstrip("0")
but it will split correctly, but to join back with the same operands as in the string "Z" as Z = "123+567*29/3-7"
How to resolve it
def cut_zeroes(Z):
i, res = 0, []
n = len(Z)
while i < n:
j = i
while i < n and Z[i] not in '+-/*':
i += 1
res.append(int(Z[j:i]))
if i < n:
res.append(Z[i])
i += 1
return ''.join(map(str,res))
Z = "00123+0567*29/03-700"
print(cut_zeroes(Z))
Z = "00123+0567*29/03-7"
print Z
import re
res = re.split(r'(\D)', Z)
print res
empty_lst = []
for i in res :
i = i.lstrip("0")
empty_lst.append(i)
print i
print empty_lst
new_str = ''.join(empty_lst)
print new_str
def zero_simplify(Z):
from re import sub
return [char for char in sub("0{2,}", "0", Z)]
Z = "00123+0567*29/03-7+0-000"
Z = zero_simplify(Z)
pos = len(Z)-1
while pos>-1:
if Z[pos]=="0":
end = pos
while Z[pos] == "0":
pos-=1
if pos==-1:
del Z[pos+1:end+1]
if (not Z[pos].isdigit()) and (Z[pos] != ".") and (Z[pos] == "0"):
del Z[pos+1:end+1]
else:
pos-=1
Z = "".join(Z)
print(Z)
What this does is set Z, 'listify' it, and set pos to the last position in Z. Then it removes all of the unnecessary 0s using the loop and Z = "".join(Z). Then it prints Z at the end. If you want a function to remove the zeroes, you can have it this way:
def zero_simplify(Z):
from re import sub
return [char for char in sub("0{2,}", "0", Z)]
def remove_unnecessary_zeroes(Z):
Z = [char for char in Z]
pos = len(Z)-1
while pos>-1:
if Z[pos]=="0":
end = pos
while Z[pos] == "0":
pos-=1
if pos==-1:
del Z[pos+1:end+1]
if (not Z[pos].isdigit()) and (Z[pos] != ".") and (Z[pos] == "0"):
del Z[pos+1:end+1]
else:
pos-=1
Z = "".join(Z)
return Z
Z = "00123+0567*29/03-7+0-000"
print(remove_unnecessary_zeroes(Z))
Try it yourself and tell me in the comments if it worked for you!
Here's a concise (if you strip all the comment in the code) and elegant way to achieve this:
import re
Z = "00123+0567*29/03-7"
operators = re.findall('\D', Z) # List all the operators used in the string
nums = re.split('\D', Z) # List all the numbers in the list
operators.append('') # Add an empty operator at the end
nums = [num.lstrip('0') for num in nums] # Strip all the leading zeroes from each numbers
# Create a list with the operands (numbers) concatenated by operators
num_operator_list = [nums[i] + operators[i] for i in range(len(nums))]
# Join all the intermediate expressions to create a final expression
final_expression = ''.join(num_operator_list)
print(final_expression)
Output
123+567*29/3-7
Explanation
First of all you need to separate the operators and the operands and then lstrip the zeroes from each operands. After this add an additional empty operator at the end of operator list. Then concatenate each operand with corresponding operator (the empty operator is concatenated with the last operand). Finally, join the list to get a final expression.
It can be done with regular expressions:
import re
Z = "00123+0567*29/03-7"
r1=r"(\D)0+(\d+)"
r2=r"\b0+(\d+)"
#substitute non-digit,leading zeroes, digits with non-digit and digits
sub1=re.sub(r1,r"\1\2",Z)
#substitute start of string, leading zeroes, digits with digits
sub2=re.sub(r2,r"\1",sub1)
print(sub2)
It is done in two passes (to handle the leading zeroes in the beginning of the string), I don't know if it's possible to do in one pass.

Write a recursive function matching_bracket(string, idx) to find the index of the close bracket matching the open bracket at string[idx]

While there are many questions on stackoverflow to check if the string is balanced, what I need is to find the index of the closing bracket of string[idx]. For example:
>>> matching_bracket('([])', 0)
3
>>> matching_bracket('([])', 1)
2
There are 3 conditions that will return -1:
the closing bracket is not of the same type
the nested brackets are not matched [IMPORTANT]
there are no more brackets available
Here is what I have so far:
def matching_bracket(string, idx):
open_tup = ("(", "{", "<", "[")
close_tup = (")", "}", ">", "]")
chosen = string[idx]
b_index = open_tup.index(chosen)
n = len(string) - 1
if string[idx + 1] in open_tup: # Case 1: Check if nested brackets match
return matching_bracket(string, idx + 1)
elif string[n] != close_tup[b_index]: # Case 2: Closing bracket not the same
return matching_bracket(string[0 : n], idx)
elif len(string) == 1: # Case 3: No more available brackets
return -1
else:
return n
While I am running a recursive function to check if the nested brackets are closed as well, I am having difficulty getting the correct output as I end up returning the index of the closing bracket that is nested instead. See below:
>>> matching_bracket('([])', 0)
2
How should I modify my code?
In above code, in first if condition your are checking whether the next bracket is of type open. if it is you are calling matching_bracket with next bracket index. and losing the actual open bracket index for which you want close bracket index.
Checkout following solution using :
def matching_bracket(string, idx):
open_tup = ("(", "{", "<", "[")
close_tup = (")", "}", ">", "]")
dict_brackets = {"{": "}", "(": ")", "<": ">", "[": "]"}
stack = []
if string[idx] in close_tup or idx >= len(string):
return -1
stack.append(string[idx])
for t in range(idx + 1, len(string)):
if string[t] in open_tup:
stack.append(string[t])
else:
if string[t] != dict_brackets.get(stack.pop()):
return -1
elif len(stack) == 0:
return t
return -1
It's a little convoluted, but should do:
def matching_bracket(string, idx):
bracket_dict = {'[':']', '(':')', '{':'}', '<':'>'}
# Actual recursive function
def inner_func(ix_open, ix_close):
if string[ix_close] == bracket_dict[string[ix_open]]:
return ix_open, ix_close
else:
if ix_close + 1 == len(string) - 1:
return ix_open, -1
else:
return inner_func(ix_open+1, ix_close+1)
if idx == len(string) - 1:
return -1
elif string[idx + 1] == bracket_dict[string[idx]]:
return idx + 1
elif idx == len(string) - 2:
return -1
else:
ix_open, ix_close = idx+2, idx+1
while ix_open != idx and ix_close != -1:
ix_open, ix_close = ix_open - 1, ix_close + 1
ix_open, ix_close = inner_func(ix_open, ix_close)
return ix_close
PS: Wrote down the solution way back, forgot to post :p
If your goal is to match open delimiters and close them with corresponding delimiters, take a look at this library I made, perhaps the algorithm can help you, though it is in java.
Here's how it works-
First the class needs to know which opening delimiter matches which closing delimiter - you can use a dictionary for this in python
delim_dict = {}
delim_dict['('] = ')'
.....
Now if you're only interested in checking whether the closing and opening delimiters don't match - take a look at this function.
Simply put, you have to count the number of each closing delimiter and open delimiter, reverse iterating the string from backwards. Whenever you see the, if the counts don't match, you know the delimiters are also not matched
Now if you want to find the index of your desired delimiter - take a look at this function
It's designed to find the mathematical function in an expression, given its closing delimiter, but you can modify it to match your usecase. Since you want to find a closing delimiter, given opening delimiter, you should be iterating the expression in normal order, instead of reverse
# opening_delim is given as parameter
closing_delim = get_corresponding_delimiter(opening_delim)
closing_delim_count, opening_delim_count = 0, 0
i = 0
for item in expression:
if expression[i] == opening_delim:
opening_delim_count += 1
elif expression[i] == closing_delim:
closing_delim_count+= 1
if opening_delim_count == closing_delim_count:
return i
i += 1
Of course, this code is only for the first index's delimiter and it also assumes the delimiters are matched correctly

Repair one string relative to another in Python

Query "AAAAA-AAACAAA-AAAAAA"
Reference "AA-AATAAAAAAATAAAAAA"
In Python,how do I repair a string (Query) relative to a Reference string where dashes in the query are substituted for the reference character, and dashes in the Reference string result in deletions in the corresponding Query character?
"AAAAA-AAACAAA-AAAAAA" should become
"AAAATAAACAAATAAAAAA"
(where parantheses here "AA()AA(T)AAACAAA(T)AAAAAA" highlight the modified characters)
Below is code that can repair the dashes in the Query relative to the reference which may or may not be helpful(line numbers are specific to the file, not relevant here, I apologize for the non-pythonic code!), but I cannot modify the Query according to dashes in the reference....
if "Query identifier" in line:
Query = line[24:-12]
if "-" in Query:
indices = [i for i, x in enumerate(Query) if x == "-"]
QueryStringUntilFirstDash = Query[:indices[0]]
found = 2
if found ==2 and "Reference identifier" in line:
Ref = line[24:-12]
if len(indices) == 1:
QueryDashToEnd.append(Query[indices[0]+1:])
print QueryStringUntilFirstDash+Ref[indices[0]]+str(QueryDashToEnd[0])
del(A[:])
else:
while y < len(indices):
y+=1
if y < len(indices):
DashesMiddleofQuery.append(Query[indices[y-1]:indices[y]])
DashesMiddleofQuerySubstitution = [B.replace('-', Ref[indices[y-1]]) for B in B]
Concat= ''.join(B)
del(B[:])
print UID
print Beg+str(Concat)+Query[indices[-1]+1:]+">1"
found = 0
y = 0
IIUC, something like this might work:
>>> query = "AAAAA-AAACAAA-AAAAAA"
>>> ref = "AA-AATAAAAAAATAAAAAA"
>>> fixed = ''.join(r if q == '-' else '' if r == '-' else q
... for q,r in zip(query, ref))
>>>
>>> fixed
'AAAATAAACAAATAAAAAA'
Or if you want to push the logic into a function:
>>> def fixer(q,r):
... if q == '-':
... return r
... if r == '-':
... return ''
... return q
...
>>> fixed = ''.join(map(fixer, query, ref))
>>> fixed
'AAAATAAACAAATAAAAAA'
I think it's easier to think in terms of pairs of characters, and what to do with those directly, rather than indices.

How to process character by character in a line

I have a file that has sequence on line 2 and variable called tokenizer, which give me an old position value. I am trying to find the new position.. For example tokenizer for this line give me position 12, which is E by counting letters only until 12. So i need to figure out the new position by counting dashes...
---------------LL---NE--HVKTHTEEK---PF-ICTVCR-KS----------
This is what i have so far it still doesn't work.
with open(filename) as f:
countletter = 0
countdash = 0
for line, line2 in itertools.izip_longest(f, f, fillvalue=''):
tokenizer=line.split()[4]
print tokenizer
for i,character in enumerate(line2):
for countletter <= tokenizer:
if character != '-':
countletter += 1
if character == '-':
countdash +=1
my new position should be 32 for this example
First answer, edited by Chad D to make it 1-indexed (but incorrect):
def get_new_index(string, char_index):
chars = 0
for i, char in enumerate(string):
if char != '-':
chars += 1
if char_index == chars:
return i+1
Rewritten version:
import re
def get(st, char_index):
chars = -1
for i, char in enumerate(st):
if char != '-':
chars += 1
if char_index == chars:
return i
def test():
st = '---------------LL---NE--HVKTHTEEK---PF-ICTVCR-KS----------'
initial = re.sub('-', '', st)
for i, char in enumerate(initial):
print i, char, st[get_1_indexed(st, i)]
def get_1_indexed(st, char_index):
return 1 + get(st, char_index - 1)
def test_1_indexed():
st = '---------------LL---NE--HVKTHTEEK---PF-ICTVCR-KS----------'
initial = re.sub('-', '', st)
for i, char in enumerate(initial):
print i+1, char, st[get_1_indexed(st, i + 1) - 1]
my original text looks like this and the position i was interested in was 12 which is 'E'
Actually, it's K, assuming you're using zero indexed strings. Python uses zero indexing so unless you're jumping through hoops to 1-index things (and you're not) it will give you K. If you were running into issues, try addressing this.
Here's some code for you that does what you need it to (albeit with 0-indexing, not 1-indexing). This can be found online here:
def get_new_index(oldindex, str):
newindex = 0
for c in str:
if c != '-':
if oldindex == 0:
return newindex
oldindex -= 1
newindex += 1
return 1 / 0 # throw a shitfit if we don't find the index
This is a silly way to get the second line, it would be clearer to use an islice, or next(f)
for line, line2 in itertools.izip_longest(f, f, fillvalue=''):
Here count_letter seems to be an int while tokenizer is a str. Probably not what you expect.
for countletter <= tokenizer:
It's also a syntax error, so I think this isn't the code you are running
Perhaps you should have
tokenizer = int(line.split()[4])
to make tokenizer into an int
print tokenizer can be misleading because int and str look identical, so you see what you expect to see. Try print repr(tokenizer) instead when you are debugging.
once you make sure tokenizer is an int, you can change this line
for i,character in enumerate(line2[:tokenizer]):

Shift cipher in Python: error using ord

I want to replace each character of a string by a different one, shifted over in the alphabet. I'm shifting by 2 in the example below, so a -> c, b -> d, etc.
I'm trying to use a regular expression and the sub function to accomplish this, but I'm getting an error.
This is the code that I have:
p = re.compile(r'(\w)')
test = p.sub(chr(ord('\\1') + 2), text)
print test
where the variable text is an input string.
And I'm getting this error:
TypeError: ord() expected a character, but string of length 2 found
I think the problem is that I the ord function is being called on the literal string "\1" and not on the \w character matched by the regular expression. What is the right way to do this?
It won't work like this. Python first runs chr(ord('\\') + 2 and then passes that result to p.sub.
You need to put it in a separate function or use an anonymous function (lambda):
p = re.compile(r'(\w)')
test = p.sub(lambda m: chr(ord(m.group(1)) + 2), text)
print test
Or better yet use maketrans instead of regular expressions:
import string
shift = 2
t = string.maketrans(string.ascii_lowercase, string.ascii_lowercase[shift:] +
string.ascii_lowercase[:shift])
string.translate(text, t)
Full version
def shouldShift(char):
return char in string.lowercase
def caesarShift(string, n):
def letterToNum(char):
return ord(char)-ord('a')
def numToLetter(num):
return chr(num+ord('a'))
def shiftByN(char):
return numToLetter((letterToNum(char)+n) % 26)
return ''.join((shiftByN(c) if shouldShift(c) else c) for c in string.lower())
One-liner
If you really want a one-liner, it would be this, but I felt it was uglier:
''.join(chr((ord(c)-ord('a')+n)%26 + ord('a')) for c in string)
Demo
>>> caesarShift(string.lowercase, 3)
'defghijklmnopqrstuvwxyzabc'
Try this, using list comprehensions:
input = 'ABC'
''.join(chr(ord(c)+2) for c in input)
> 'CDE'
It's simpler than using regular expressions.
def CaesarCipher(s1,num):
new_str = ''
for i in s1:
asc_V = ord(i)
if asc_V in range(65, 91):
if asc_V + num > 90:
asc_val = 65 + (num - 1 - (90 - asc_V))
else:
asc_val = asc_V + num
new_str = new_str + chr(asc_val)
elif (asc_V in range(97, 123)):
if asc_V + num > 122:
asc_val = 97 + (num - 1 - (122 - asc_V))
else:
asc_val = asc_V + num
new_str = new_str + chr(asc_val)
else:
new_str = new_str + i
return new_str
print (CaesarCipher("HEllo", 4))
print (CaesarCipher("xyzderBYTE", 2))

Categories