Related
I'm attempting to implement Python's split() function using recursion with no additional parameters and no loops.
For a given input string, this is the desired output
mySplit('hello,there,world', ',')
=> ['hello', 'there', 'world']
Here is my current attempt, but it really only removes the delimiter and places the string in a list, but I cannot figure out how to append items to the list!
def mySplit(string, delim):
if len(string) == 1:
return [string]
if string[0] == delim:
return [mySplit(string[1:], delim)[0]]
return [string[0] + mySplit(string[1:], delim)[0]]
This code results in ['hellothereworld']
I'd write something like:
def my_split(s, delim):
for i, c in enumerate(s):
if c == delim:
return [s[:i]] + my_split(s[i + 1 :], delim)
return [s]
EDIT: Oops, skipped over a crucial part of your question. I think this works.
def my_split(s, delim, i=0):
if i == len(s):
return [s]
elif s[i] == delim:
return [s[:i]] + my_split(s[i + 1 :], delim)
return my_split(s, delim, i + 1)
EDIT 2: It's a tricky one for sure. Really interesting problem. Hopefully I don't hit any more constraints with this one:
def my_split(s, delim):
if not s:
return [""]
elif s[0] == delim:
a = my_split(s[1:], delim)
return "", *a
b, *rest = my_split(s[1:], delim)
return [s[0] + b] + rest
assert my_split("hello,there,world", ",") == ["hello", "there", "world"]
assert my_split("hello world!", ",") == ["hello world!"]
assert my_split("hello world!", " ") == ["hello", "world!"]
def mySplit(string, delim):
if string.count(delim) == 0:
return [string]
idx = string.index(delim)
return [string[:idx]] + mySplit(string[idx + 1:], delim)
print(mySplit('hello,there,world', ','))
I've implemented the following 'tree sizer' but it fails under certain conditions, the example below returns size 2 when it should return size 4, can anyone help me out. I've written this several times, to no avail, it keeps failing.
def getRPNdepth(expression):
treesize=0
maxtreesize=treesize
mintreesize=treesize
tmpexp=expression
tmpfmla = [1 if n[0] == 'x' else n for n in tmpexp]
print(tmpfmla)
try:
stack = []
for val in tmpfmla:
if val in ['-', '+', '*', '/']:
op1 = stack.pop()
op2 = stack.pop()
if val == '-': result = op2 - op1
if val == '+': result = op2 + op1
if val == '*': result = op2 * op1
if val == '/':
if op1 == 0:
result = 1
else:
result = op2 / op1
stack.append(result)
treesize=treesize+1
else:
stack.append(float(val))
treesize = treesize - 1
if treesize>maxtreesize:
maxtreesize=treesize
if treesize<mintreesize:
mintreesize=treesize
return abs(mintreesize)
except:
print('error validate rpn>' + str(expression))
return 0
xxxx = ['x6', 'x7', '+', 'x7', '+', 'x7', '+', 'x7', '+']
print(getRPNdepth(xxxx))
a couple of examples :
['1','1' ,'+','1','1' ,'+' ,'+']
['1','1','1' ,'+' ,'+']
both give the result of 3, which is correct, but.
['1','1' ,'+','1','1' ,'+' ,'+']
returns 3 when it should be 4
All in all, I need to know the depth of the RPN from its string representation.
Calculating the tree depth is similar to evaluating the expression, but the operators calculate resulting depths instead of resulting values:
def getRPNdepth(expression):
stack = []
for val in expression:
if val in ['-', '+', '*', '/']:
stack.append(max(stack.pop(),stack.pop())+1)
else:
stack.append(1)
return stack.pop()
Well, just did a little of 'cheating' and used my rpn to infix converter to achieve the same goal, I post it here if anyone need it.
def getRPNdepth(expression):
tmpexp = expression
tmpfmla = [1 if n[0] == 'x' else n for n in tmpexp]
stack = []
for val in tmpfmla:
if val!=' ':
if val in ['-', '+', '*', '/']:
op1 = stack.pop()
op2 = stack.pop()
stack.append('(' + str(op1) + str(val) + str(op2) + ')')
else:
stack.append(str(val))
openparentesiscount=0
maxopenparentesiscount = 0
onlyparentesis=''
for c in stack[0]:
if c in ['(', ')']:
onlyparentesis=onlyparentesis+c
if c=='(':
openparentesiscount=openparentesiscount+1
else:
openparentesiscount = openparentesiscount - 1
if openparentesiscount>maxopenparentesiscount:
maxopenparentesiscount=openparentesiscount
return maxopenparentesiscount
Thanks all !
I'm trying to insert delimiters into a string that was created by a previous function (ifw(in_list)). I'm not having any issues with \n or \t but once my code gets to "," join it breaks down. I've tried a few different solutions and looked through similar questions/answers on the site but I keep getting the TypeError: can only join an iterable. Any help you can provide me would be very apprciated.
#! /usr/bin/env python
import os
import sys
import re
delim = os.getenv("QWIKIFWLISTMGR_DELIMITER")
in_list = sys.argv
def delim(in_list):
x = "screw python"
x = os.getenv('QWIKIFWLISTMGR_DELIMITER')
if 'BLANK' in x:
x = ' '.join(ifw(in_list))
return x
elif 'TAB' in x:
x = ifw(in_list)
x = '\t'.join(x)
return x
elif 'NL' in x:
x = ifw(in_list)
x = '\n'.join(x)
return x
elif 'COMMA' in x:
x = ','.join(str(x) for x in (ifw(in_list)))
return
elif 'COLON' in x:
x = ifw(in_list)
x = ':'.join(x)
return x
elif 'SEMICOLON' in x:
x = ifw(in_list)
x = ';'.join(x)
return x
elif 'SLASH' in x:
x = ifw(in_list)
x = '/'.join(x)
return x
else:
x = ifw(in_list)
return
def ifw(in_list):
usr_choice = (in_list)[1]
if usr_choice == 'i':
print(int_sort(in_list))
elif usr_choice =='f':
print(float_sort(in_list))
elif usr_choice == 'w':
print(word_sort(in_list))
def float_sort(in_list):
float_sort = "test"
sorted_float = "test"
float_sort = in_list[2:]
float_sort = ''.join(float_sort)
#float_sort1 = " ".join(list((re.findall(r"((?<!\S)\d+(?!\S))", float_sort))))
#float_sort2 = ' '.join(list(re.findall(r"(\d+\.\d+)", float_sort)
float_sort = " ".join(re.findall(r"\d*\.\d+|\d+", float_sort))
sorted_float = sorted(float_sort, key=len)
return float_sort
#print (float_sort(in_list))
def word_sort(in_list):
word_sort = " 1a "
word_sort = sorted(in_list[2:], key=len) #in_list must be 2 because the program will view usr input as a word
for i in word_sort:
punctuation = '.',',',';','!',' / ','"','?' #strips punctuation from words
if i in punctuation: #removes punctuation
word_sort = word_sort.replace(i," ")
#word_sort= sorted(word_sort, key=lambda L: (L.lower(), L))
word_sort= " ".join(sorted(word_sort, key=lambda L: (L.lower(), L))) #takes string and sorts by length giving priority to upper over lower when tied
sorted_word = " 1a " #left for testing
sorted_word = re.sub("\S+\d\S+", "", word_sort).strip() #removes any word with a number in it
sorted_word = "".join(sorted_word) #joins the results back into a string
return sorted_word
def int_sort(in_list):
in_list = " ".join(in_list[1:]) # takes information from argv and creates a string with it
int_sort = " ".join(list(reversed(re.findall(r"(?<!\S)\d+(?!\S)", in_list))))
# find all looks for a pattern of anything but a space... any number. anything besides a space in the in_list and returns it
#reveresd flips that return backwards
# list turns that into a list and join makes it a string again
return int_sort
#print int_sort(in_list)
#print (delim(in_list))
Your ifw function has no return statement, so it returns None.
So the line:
x = ','.join(str(x) for x in (ifw(in_list)))
becomes
x = ','.join(str(x) for x in None)
and python can't iterate over None.
I would like to split a string similar to
'abc "defg hijk \\"l; mn\\" opqrs"; tuv'
into
(['abc', '"defg hijk \\"l; mn\\" opqrs"'], 33)
i.e. I don't want to break on semicolon inside (nested) quotes. What's the easiest way, tokenize? It doesn't hurt if it's fast, but short is better.
Edit: I forgot one more detail that makes it even more tricky. I need the position of the semicolon that is cutting off the string, or -1 if there is none. (I'm doing changes to legacy code that used to be recursive, but stackoverflowed when the string became very long.)
It's unlikely there is an easy way to solve this without a proper parser. You could probably get away with a hand built parser that doesn't require tokenizing though.
Something like the following should be a good guide:
def parse(s):
cur_s = []
strings = []
def flush_string():
strings.push(''.join(cur_s))
cur_s = []
def handle_special_cases():
# TODO: Fill this in
for c in s:
if c == ';':
break
elif c in ['\\' '"']:
handle_special_cases()
elif c == ' ':
flush_string()
else:
cur_s.push(c)
flush_string()
return strings
It's a stateful search, so simple stateless operations are not available. Here's a simple char-by-char stateful evaluator that might meet your "short" without resorting to full tokenization/parsing:
#!/usr/bin/env python
inp="""abc "defg hijk \\"l; mn\\" opqrs"; tuv'`"""
def words_to_semi(inpstr):
ret = ['']
st8 = 1 # state: 1=reg, 2=in quotes, 3=escaped quote, 4=escaped reg, 0=end
ops = { 1 : {' ': lambda c: (None,1),
'"': lambda c: (c,2),
';': lambda c: ('',0),
'\\': lambda c: (c,4),
},
2 : {'\\': lambda c: (c,3),
'"': lambda c: (c,1),
},
3 : {None: lambda c: (c,2)},
4 : {None: lambda c: (c,1)},
}
pos = 0
for C in inpstr:
oc,st8 = ops[st8].get(C, ops[st8].get(None, lambda c:(c,st8)))(C)
if not st8: break
if oc is None:
ret.append('')
else:
ret[-1] += oc
pos = pos + 1
return ret, pos
print str(words_to_semi(inp))
Just modify the ops dict (and add new states) to handle other cases; everything else is generic code.
Here's the brute-force method I went with. Brrr...
def f(s):
instr = False
inescape = False
a = ''
rs = []
cut_index = -1
for idx,ch in enumerate(s):
if instr:
a += ch
if inescape:
inescape = False
elif ch == '\\':
inescape = True
elif ch == '"':
if a:
rs += [a]
a = ''
instr = False
elif ch == '"':
if a:
rs += [a]
a = ch
instr = True
elif ch == ';':
if a:
rs += [a]
cut_index = idx
break
elif ch == ' ' or ch == '\t' or ch == '\n':
if a:
rs += [a]
a = ''
else:
a += ch
return rs, cut_index
f('abc "defg hijk \\"l; mn\\" opqrs"; tuv')
I want to be able to pair up all parentheses in a string, if they aren't paired then then they get their index number and False. It seems like it is repeating some values over and over, i.e cl == pop[1]. I have tried to see where the problem is but I can't see it no matter how hard I try. So I'm asking if anyone help me to locate the error and maybe even improve my code ;)
def check_parentheses(string):
pending = 0
brackets = []
'''Checks if parens are paired, otherwise they are bad.'''
parenstack = collections.deque()
for ch in string:
if ch in lrmap:
try:
cl = string.index(ch, pending)
pending = cl + 1
except:
cl = False
if ch in lparens:
parenstack.append([ch, cl])
print parenstack
elif ch in rparens:
try:
pop = parenstack.pop()
if lrmap[pop[0]] != ch:
print 'wrong type of parenthesis popped from stack',\
pop[0], ch, pop[1], cl
brackets.append([pop[1], False])
brackets.append([cl, False])
else:
brackets.append([pop[1], cl])
except IndexError:
print 'no opening parenthesis left in stack'
brackets.append([cl, False])
# if we are not out of opening parentheses, we have a mismatch
for p in parenstack:
brackets.append([p[1],False])
return brackets
You can adapt my code to a similar question:
def Evaluate(str):
stack = []
pushChars, popChars = "<({[", ">)}]"
for c in str :
if c in pushChars :
stack.append(c)
elif c in popChars :
if not len(stack) :
return False
else :
stackTop = stack.pop()
balancingBracket = pushChars[popChars.index(c)]
if stackTop != balancingBracket :
return False
else :
return False
return not len(stack)
iparens = iter('(){}[]<>')
parens = dict(zip(iparens, iparens))
closing = parens.values()
def balanced(astr):
stack = []
for c in astr:
d = parens.get(c, None)
if d:
stack.append(d)
elif c in closing:
if not stack or c != stack.pop():
return False
return not stack
Example:
>>> balanced('[1<2>(3)]')
True
>>> balanced('[1<2(>3)]')
False
BRACES = { '(': ')', '[': ']', '{': '}' }
def group_check(s):
stack = []
for b in s:
c = BRACES.get(b)
if c:
stack.append(c)
elif not stack or stack.pop() != b:
return False
return not stack
Thanks hughdbrown your code was a breeze to get working and it's really short! You've just saved me a headache :D
converted it to pep8 if thats ok :)
Edit
Added support for comments and strings, it will not match inside them.
Added support for easy language brace checking, modify the charset dict.
Correctly paires up, i.e right to left
HTML
charset = dict(opening='{[(<',\
closing='}])>',\
string = ('"', "'"),\
comment=(('<!--', '-->')))
Python
charset = dict(opening='{[(<',\
closing='}])>',\
string = ('"', "'"),\
comment=(("'''", "'''"), ('"""', '"""'), ('#', '\n')))
C++
charset = dict(opening='{[(<',\
closing='}])>',\
string = ('"', "'"),\
comment=(('/*', '*/'), ('//', '\n')))
you get the point? :)
charset = dict(opening='{[(<',\
closing='}])>',\
string = ('"', "'"),\
comment=(('<!--', '-->'), ('"""', '"""'), ('#', '\n')))
allowed = ''.join([x[0][0] + x[1][0] for x in charset['comment']])
allowed += ''.join(charset['string'])
allowed += charset['opening']
allowed += charset['closing']
def brace_check(text):
o = []
c = []
notr = []
found = []
busy = False
last_pos = None
for i in xrange(len(text)):
ch = text[i]
if not busy:
cont = True
for comment in charset['comment']:
if ch == comment[0][0]:
como = text[i:len(comment[0])]
if como == comment[0]:
busy = comment[1]
if ch in charset['opening']:
last_pos = i
cont = False
break
if cont:
if ch in charset['string']:
busy = ch
elif ch in charset['opening']:
o.append((ch, i))
elif ch in charset['closing']:
c.append((ch, i))
else:
if ch == busy[0]:
if len(busy) == 1:
comc = ch
else:
comc = text[i:i + len(busy)]
if comc == busy:
if last_pos is not None:
if busy[-1] in charset['closing']:
found.append((last_pos, i))
last_pos = None
text = text[:i] + '\n' * len(comc) +\
text[i + len(comc):]
busy = not busy
elif busy in charset['string']:
if ch == '\n':
busy = not busy
for t, e in reversed(o):
try:
n = next((b, v) for b, v in c\
if b == charset['closing'][\
charset['opening'].find(t)] and v > e)
c.remove(n)
n = n[1]
if found != []:
if e < found[-1][0] and n > found[-1][0] and n < found[-1][1]\
or e < found[-1][1] and n > found[-1][1] and e > found[-1][0]:
found.append((n, False))
n = False
except StopIteration:
n = False
found.append((e, n))
for t, e in c:
found.append((e, False))
return found
An understandable solution in Python 3:
def check_balanced_string(str):
stack = []
dicc = {'(': ')', '[': ']', '{': '}'}
for char in str:
if char in dicc.keys(): # opening char
stack.append(char)
elif char in dicc.values(): # closing char
if dicc[stack[-1]] == char: # check if closing char corresponds to last opening char
stack.pop()
else:
return False
return not len(stack) # returns True when len == 0
eq = '{1+[3*5+(2+1)]}'
print(check_balanced_string(eq))
Try this:
def matched(s):
stack=[]
open,close="(",")"
for i in s:
if i in open:
stack.append(i)
if i in close:
if len(stack)==0:
return(False)
else:
stack.pop()
if len(stack):
return(False)
else:
return(True)
The below code will display the missing parentheses and the no of times missing in the given string.
from collections import Counter
def find_missing(str):
stack1 = []
stack2 = []
result = []
res_dict = {}
open_set = '<[{('
closed_set = '>]})'
a = list(str)
for i in a:
if i in open_set:
stack1.append(i)
elif i in closed_set:
stack2.append(i)
dict1 = Counter(stack1)
dict2 = Counter(stack2)
print(dict1)
print(dict2)
for i in open_set:
if dict1[i] > dict2[closed_set[open_set.index(i)]]:
res_dict[closed_set[open_set.index(i)]] = dict1[i] - dict2[closed_set[open_set.index(i)]]
result.append(closed_set[open_set.index(i)])
for i in closed_set:
if dict2[i] > dict1[open_set[closed_set.index(i)]]:
res_dict[open_set[closed_set.index(i)]] = dict2[i] - dict1[open_set[closed_set.index(i)]]
result.append(open_set[closed_set.index(i)])
return res_dict
# return result
if __name__ == '__main__':
str1 = '{This ((()bracket {[function]} <<going> crazy}'
x = find_missing(str1)
if len(x) > 0:
print("Imbalanced")
print(x)
else:
print("Balanced")
First we will scan the string from left to right, and every time we see an opening parenthesis we push it to a stack, because we want the last opening parenthesis to be closed first. (Remember the FILO structure of a stack!)
Then, when we see a closing parenthesis we check whether the last opened one is the corresponding closing match, by popping an element from the stack. If it’s a valid match, then we proceed forward, if not return false.
Code:
https://gist.github.com/i143code/51962bfb1bd5925f75007d4dcbcf7f55
I needed something for a recent project and figured I could build on the OP's solution a bit. It allows for comment patterns, quotes and brackets to be checked, whilst ignoring the surrounding text. I've purposefully made it more generic than it needs to be so that others can take what they want and cut out what they don't.
"""
This module is for testing bracket pairings within a given string
Tested with Python 3.5.4
>>> regexp = getRegexFromList(opening + closing)
>>> print(regexp)
(\\<\\-\\-|\\-\\-\\>|\\/\\*|\\/\\/|\\*\\/|\\#|\\"|\\'|\\(|\\[|\\{|\\<|\\\n|\\\n|\\"|\\'|\\)|\\]|\\}|\\>)
>>> test_string = 'l<--([0])-->1/*{<2>}*/3//<--4 &-->\\n5#"6"\\n7"/*(8)*/"9\'"10"\'11({12\ta})13[<14>]'
>>> patterns = re.findall(regexp, test_string)
>>> print(patterns)
['<--', '(', '[', ']', ')', '-->', '/*', '{', '<', '>', '}', '*/', '//', '<--', '-->', '\\n', '#', '"', '"', '\\n', '"', '/*', '(', ')', '*/', '"', '(', '{', '}', ')', '[', '<', '>', ']']
>>> doBracketsMatch(patterns)
True
>>> doBracketsMatch(['"', ')', '"', '[', ']', '\\''])
False
"""
# Dependencies
import re
# Global Variables
# Provide opening and closing patterns, along with their priorities & whether a priority is nestable
opening = ['<--', '/*', '//', '#', '"', '\'', '(', '[', '{', '<']
closing = ['-->', '*/', '\n', '\n', '"', '\'', ')', ']', '}', '>']
priority = [ 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]
nestable = {0: True, 1: False}
bracket_pairs = dict(zip(opening + closing, \
[[(closing + opening)[i], (priority + priority)[i]] \
for i in range(0, opening.__len__() * 2)]))
def getRegexFromList(listOfPatterns):
"""
Generate the search term for the regular expression
:param listOfPatterns:
:return:
>>> getRegexFromList(['"', '<--', '##', 'test'])
'(\\\\t\\\\e\\\\s\\\\t|\\\\<\\\\-\\\\-|\\\\#\\\\#|\\\\")'
"""
# Longer patterns first to prevent false negatives
search_terms = sorted(listOfPatterns, key=len, reverse=True)
regex = ""
for term in search_terms:
for char in str(term):
regex = regex + '\\' + char # Search for all characters literally
regex = regex + '|' # Search pattern = (a|b|c)
return '(' + regex[:-1] + ')' # Remove excess '|' and add brackets
def doBracketsMatch(list_of_brackets):
"""
Determine if brackets match up
:param list_of_brackets:
:return:
"""
stack = []
for bracket in list_of_brackets:
# Check empty stack conditions
if stack.__len__() is 0:
# Check for openings first to catch quotes
if bracket in opening:
stack.append(bracket)
elif bracket in closing:
return False
else:
continue
# Check for a matching bracket
elif bracket == bracket_pairs[stack[-1]][0]:
stack.pop()
# Ignore cases:
# - False positives
# - Lower priority brackets
# - Equal priority brackets if nesting is not allowed
elif bracket not in bracket_pairs or \
bracket_pairs[bracket][1] < bracket_pairs[stack[-1]][1] or \
(bracket_pairs[bracket][1] == bracket_pairs[stack[-1]][1] and \
not nestable[bracket_pairs[bracket][1]]):
continue
# New open bracket
elif bracket in opening:
stack.append(bracket)
# Otherwise, unpaired close bracket
else:
return False
# If stack isn't empty, then there is an unpaired open bracket
return not bool(stack)
if __name__ == '__main__':
import doctest
doctest.testmod()