python create slice object from string - python

I'd like to create a slice object from a string; right now the only way seems through a cumbersome hacky eval statement
class getslice:
def __getitem__(self, idx): return idx[0]
eval("getslice()[%s, 1]" %(":-1"))
thanks in advance.
Edit: Sorry if the original prompt was not clear, the input in this case was ":-1". The point was to parse the string. Ignacio Vazquez-Abrams's response at least solved the problem (and seems to work with reverse indexing as well), but I think my solution above is still more clear if not conceptually clean (and will work correctly if Python ever changes slicing syntax).

slice(*map(lambda x: int(x.strip()) if x.strip() else None, mystring.split(':')))
for single arg slices '-1' or '1' so when mystring.split(':')==1 you just call int(x)
On request, took it out of comment section.

If you want a slice object, why don't you just instantiate one?
s = slice(start, stop, step)
What are you meaning by "creating it from a string"?

slice(*[{True: lambda n: None, False: int}[x == ''](x) for x in (mystring.split(':') + ['', '', ''])[:3]])

I end up here because I wanted my script to accept a python-like splice argument and render it into a list of integers, I did it with a function that seems like it answers the OP's question:
# create a slice object from a string
def get_slice_obj(slicearg):
slice_ints = tuple([ int(n) for n in slicearg.split(':') ])
return apply(slice, slice_ints)
def ints_from_slicearg(slicearg):
slice_obj = get_slice_obj(slicearg)
return(range(slice_obj.start or 0, slice_obj.stop or -1, slice_obj.step or 1))
for t in ['1', '1:3', '4:9:2']:
print t, "=>", ints_from_slicearg(t)
Output:
1 => [0]
1:3 => [1, 2]
4:9:2 => [4, 6, 8]

Here's another method (just a consolidation of the others posted here):
def make_slice(expr):
def to_piece(s):
return s and int(s) or None
pieces = map(to_piece, expr.split(':'))
if len(pieces) == 1:
return slice(pieces[0], pieces[0] + 1)
else:
return slice(*pieces)
Example usages:
In [1]: make_slice(':')
Out[1]: slice(None, None, None)
In [2]: make_slice(':-2')
Out[2]: slice(None, -2, None)
In [3]: x = [1, 2, 3]
In [4]: x[make_slice('::-1')]
Out[4]: [3, 2, 1]

The one-liner from Ignacio Vazquez-Abrams is short but hardly readable and handles a single number inconsistently with slice. This tries to parse it in a cleaner way.
def parse_slice(value):
"""
Parses a `slice()` from string, like `start:stop:step`.
"""
if value:
parts = value.split(':')
if len(parts) == 1:
# slice(stop)
parts = [None, parts[0]]
# else: slice(start, stop[, step])
else:
# slice()
parts = []
return slice(*[int(p) if p else None for p in parts])
# unit tests:
try:
assert parse_slice('')
assert False, 'It should raise TypeError'
except TypeError:
pass
assert parse_slice('2') == slice(2)
assert parse_slice('2:3') == slice(2, 3)
assert parse_slice(':3') == slice(None, 3)
assert parse_slice(':') == slice(None, None)
assert parse_slice('2:') == slice(2, None)
assert parse_slice('2:3:4') == slice(2, 3, 4)
assert parse_slice(':3:4') == slice(None, 3, 4)
assert parse_slice('2::4') == slice(2, None, 4)
assert parse_slice('2:3:') == slice(2, 3, None)
assert parse_slice('::4') == slice(None, None, 4)
assert parse_slice('2::') == slice(2, None, None)
assert parse_slice('::') == slice(None, None, None)
assert parse_slice('-12:-13:-14') == slice(-12, -13, -14)
assert parse_slice('2:3:-4') == slice(2, 3, -4)
try:
parse_slice('1:2:3:4')
assert False, 'It should raise TypeError'
except TypeError:
pass

Based on #pprzemak drafted the following function for elaborate parsing:
def parse_slice(v: Text):
"""
Parses text like python "slice" expression (ie ``-10::2``).
:param v:
the slice expression or a lone integer
:return:
- None if input is None/empty
- a ``slice()`` instance (even if input a lone numbrt)
:raise ValueError:
input non-empty but invalid syntax
"""
orig_v = v
v = v and v.strip()
if not v:
return
try:
if ':' not in v:
## A lone number given.
v = int(v)
return slice(v, v + 1)
return slice(*map(lambda x: int(x.strip()) if x.strip() else None,
v.split(':')))
except Exception:
pass
## An alternative is to return `slice(None)` here.
raise trt.TraitError("Syntax-error in '%s' slice!" % orig_v)

How 'bout this (for simple non empty slice intervals) :
sliceStr = "3:8"
mySlice = slice( *map(int, sliceStr.split(':') ) )

I just needed to do this 12 years later so here's my answer using regex :)
import re
def parse_slice(string: str) -> slice:
"""
Parse a string representation of a slice and return a slice object
"""
# Matches one required colon, one optional colon, and up to three
# positive or negative numbers between them
match = re.match(r"^(-?[\d]*):(-?[\d]*)[:]?(-?[\d]*)$", string)
if match:
args = tuple(map(lambda s: int(s) if s else None, match.group(1, 2, 3)))
return slice(*args)
raise ValueError("Could not parse slice")

A slice object is usually created using subscript notation, this notation uses slice() internally, as stated on the slice() documentation. What you want to do is:
your_string[start:end]
From the python tutorial:
Strings can be subscripted (indexed);
like in C, the first character of a
string has subscript (index) 0. There
is no separate character type; a
character is simply a string of size
one. Like in Icon, substrings can be
specified with the slice notation: two
indices separated by a colon.
>>> word = 'Help' + 'A'
>>> word[4]
'A'
>>> word[0:2]
'He'
>>> word[2:4]
'lp'
Slice indices have useful defaults; an
omitted first index defaults to zero,
an omitted second index defaults to
the size of the string being sliced.
>>> word[:2] # The first two characters
'He'
>>> word[2:] # Everything except the first two characters
'lpA'

My solution to parse numpy style advanced indexing from string: my gist.
Although this is an old post, it's the only one I can find on this topic. Hope it helps.
Upon suggestion, I paste the code here, which could be a little bit long ... The code usage is (assuming a is an array-like object): a[parse_slice('1')] gives a[1]; a[parse_slice('2:,-1')] gives a[2:,-1]; etc.
import re
SLICE_TEMPLATES = [
('s', r'(?P<i>[+-]?\d+)'),
('sp', r'\((?P<i>[+-]?\d+)\)'),
('a', r'::?'),
('ri-', r'(?P<i>[+-]?\d+)::?'),
('ri-k', r'(?P<i>[+-]?\d+)::(?P<k>[+-]?\d+)'),
('r-j', r':(?P<j>[+-]?\d+):?'),
('r-jk', r':(?P<j>[+-]?\d+):(?P<k>[+-]?\d+)'),
('rij', r'(?P<i>[+-]?\d+):(?P<j>[+-]?\d+):?'),
('rijk', r'(?P<i>[+-]?\d+):(?P<j>[+-]?\d+):(?P<k>[+-]?\d+)'),
('r--k', r'::(?P<k>[+-]?\d+)'),
('l', r'\.\.\.'),
('eb', r'\[(?P<e>[+-]?\d+(,[+-]?\d+)*,?)\]'),
('ep', r'\((?P<e>[+-]?\d+(,[+-]?\d+)+,?)\)'),
('ep1', r'\((?P<e>[+-]?\d+,)\)'),
]
SLICE_TEMPLATES = [(k, re.compile(v)) for k, v in SLICE_TEMPLATES]
def tokenize_slice_groups(string):
# tokenize groups
groups = []
sbuf = []
expecting = {'(': ')', '[': ']'}
pbbuf = []
LEGAL_CHARS = '0123456789()[]+-:.'
WHITESPACE_CHARS = ' \t'
for c in string:
if c in WHITESPACE_CHARS:
pass
elif c == ',':
if len(pbbuf) not in (0, 2):
sbuf.append(c)
else:
groups.append(''.join(sbuf))
sbuf.clear()
pbbuf.clear()
elif c in LEGAL_CHARS:
sbuf.append(c)
if c in '([':
if pbbuf:
raise ValueError('too many brackets in axis {}'.format(
len(groups)))
pbbuf.append(c)
elif c in ')]':
if not pbbuf:
raise ValueError('brackets not match in axis {}'.format(
len(groups)))
if c != expecting[pbbuf[0]]:
raise ValueError('brackets not match in axis {}'.format(
len(groups)))
pbbuf.append(c)
else:
raise ValueError('illegal char `{}\''.format(c))
groups.append(''.join(sbuf))
return groups
def parse_slice_group(string):
for name, tem in SLICE_TEMPLATES:
matched = tem.fullmatch(string)
if matched:
if name[0] == 's':
return int(matched.group('i'))
if name[0] == 'a':
return slice(None, None, None)
if name[0] == 'r':
i, j, k = None, None, None
if 'i' in name:
i = int(matched.group('i'))
if 'j' in name:
j = int(matched.group('j'))
if 'k' in name:
k = int(matched.group('k'))
return slice(i, j, k)
if name[0] == 'l':
return ...
# if name[0] == 'e'
return list(map(int, filter(None, matched.group('e').split(','))))
raise ValueError('illegal group "{}"'.format(string))
def parse_slice(string):
groups = tokenize_slice_groups(string)
if groups == ['']:
raise ValueError('index must not be empty')
if groups and groups[-1] == '':
del groups[-1]
index = tuple(map(parse_slice_group, groups))
if index.count(...) > 1:
raise ValueError('ellipsis may occur at most once')
return index

Related

rotate a string n characters to the left, except the special characters

Hi I need help rotating a string to the left n amount of times, I have done so: btw Strings is a list of strings:
finaltext = ""
for i in strings:
first = i[0 : n]
second = i[n :]
i = second + first
finaltext += i
However, i'm not sure how to do this so that in a given string, say: "The intern", the space or any special characters would not move.
s1 = "The intern"
Right now my output is:
ternThe in
output I want:
ern eThein
any ideas? I currently created a function that indicates when a special character and its index in a string, I used that in a for loop to know that the current character is a special character, but when it comes to rotation how would i avoid that character
An intriguing question. How to rotate a string while ignoring specific characters?
Here we remove, rotate, reinsert characters.
Given
import collections as ct
def index(s):
"""Return a reversed dict of (char, [index, ...]) pairs."""
dd = ct.defaultdict(list)
for i, x in enumerate(s):
dd[x].append(i)
return dd
s1 = "The intern"
s2 = "Hello world!"
Code
def rotate(s, n=0, ignore=""):
"""Return string of rotated items, save ignored chars."""
s0 = s[:]
# Remove ignored chars
for ig in ignore:
s = s.replace(ig, "")
# Rotate remaining string, eqiv. to `res = s[-n:] + s[:-n]`
tail = s[-n:]
head = ""
for c in s[:-n]:
head += c
res = tail + head
# Reinsert ignored chars
if ignore:
res = list(res)
lookup = index(s0)
for ig in ignore:
for idx in lookup[ig]:
res.insert(idx, ig)
res = "".join(res)
return res
Tests
assert rotate(s1, n=0, ignore="") == "The intern"
assert rotate(s1, n=1, ignore="") == "nThe inter"
assert rotate(s1, n=1, ignore=" ") == "nTh einter"
assert rotate(s1, n=3, ignore=" ") == "ern Theint"
assert rotate(s2, n=12, ignore="") == "Hello world!"
assert rotate(s2, n=1, ignore="") == "!Hello world"
assert rotate(s2, n=1, ignore="H !") == "Hdell oworl!"
assert rotate(s2, n=1, ignore="!") == "dHello worl!"

Exchanging characters in a string

I need to exchange the middle character in a numeric string of 15 numbers with the last number of the string.
So I get that this:
def string(str):
return str[-1:] + str[1:-1] + str[:1]
print(string('abcd'))
print(string('12345'))
RESULTS:
dbca
52341
But how can I make it so that in the initial input string, 012345678912345,
where the 7 is exchanged with the last character in the string 5?
Consider
def last_to_mid(s):
if len(s) == 1:
return s
if len(s)%2 == 0:
raise ValueError('expected string of odd length')
idx = len(s)//2
return f'{s[:idx]}{s[-1]}{s[idx+1:-1]}{s[idx]}'
operating like this:
>>> last_to_mid('021')
'012'
>>> last_to_mid('0123x4567')
'01237456x'
>>> last_to_mid('1')
'1'
Assuming you have Python 3.6 or newer for f-strings.
You can have a function for this:
In [178]: def swap_index_values(my_string):
...: l = list(my_string)
...: middleIndex = (len(l) - 1)/2
...: middle_val = l[middleIndex]
...: l[middleIndex] = l[-1]
...: l[-1] = middle_val
...: return ''.join(l)
...:
In [179]:
In [179]: a
Out[179]: '012345678912345'
In [180]: swap_index_values(a)
Out[180]: '012345658912347'
Above, you can see that middle value and last values have been exchanged.
In this very specific context (always the middle and last character of a string of length 15), your initial approach can be extended to:
text[0:7]+text[-1]+text[8:-1]+text[7]
Also try to avoid variable names like str, since they shadow the function of the same name.
s1='1243125'
s2=s1[:len(s1)//2] + s1[-1] + s1[len(s1)//2 + 1:]
print(s2)
'1245125'

Find longest unique substring in string python

I am trying that age old question (there are multitudes of versions around) of finding the longest substring of a string which doesn't contain repeated characters. I can't work out why my attempt doesn't work properly:
def findLongest(inputStr):
resultSet = []
substr = []
for c in inputStr:
print ("c: ", c)
if substr == []:
substr.append([c])
continue
print(substr)
for str in substr:
print ("c: ",c," - str: ",str,"\n")
if c in str:
resultSet.append(str)
substr.remove(str)
else:
str.append(c)
substr.append([c])
print("Result set:")
print(resultSet)
return max(resultSet, key=len)
print (findLongest("pwwkewambb"))
When my output gets to the second 'w', it doesn't iterate over all the substr elements. I think I've done something silly, but I can't see what it is so some guidance would be appreciated! I feel like I'm going to kick myself at the answer...
The beginning of my output:
c: p
c: w
[['p']]
c: w - str: ['p']
c: w
[['p', 'w'], ['w']]
c: w - str: ['p', 'w'] # I expect the next line to say c: w - str: ['w']
c: k
[['w'], ['w']] # it is like the w was ignored as it is here
c: k - str: ['w']
c: k - str: ['w']
...
EDIT:
I replaced the for loop with
for idx, str in enumerate(substr):
print ("c: ",c," - str: ",str,"\n")
if c in str:
resultSet.append(str)
substr[idx] = []
else:
str.append(c)
and it produces the correct result. The only thing is that the empty element arrays get set with the next character. It seems a bit pointless; there must be a better way.
My expected output is kewamb.
e.g.
c: p
c: w
[['p']]
c: w - str: ['p']
c: w
[['p', 'w'], ['w']]
c: w - str: ['p', 'w']
c: w - str: ['w']
c: k
[[], [], ['w']]
c: k - str: []
c: k - str: []
c: k - str: ['w']
c: e
[['k'], ['k'], ['w', 'k'], ['k']]
c: e - str: ['k']
c: e - str: ['k']
c: e - str: ['w', 'k']
c: e - str: ['k']
...
Edit, per comment by #seymour on incorrect responses:
def find_longest(s):
_longest = set()
def longest(x):
if x in _longest:
_longest.clear()
return False
_longest.add(x)
return True
return ''.join(max((list(g) for _, g in groupby(s, key=longest)), key=len))
And test:
In [101]: assert find_longest('pwwkewambb') == 'kewamb'
In [102]: assert find_longest('abcabcbb') == 'abc'
In [103]: assert find_longest('abczxyabczxya') == 'abczxy'
Old answer:
from itertools import groupby
s = set() ## for mutable access
''.join(max((list(g) for _, g in groupby('pwwkewambb', key=lambda x: not ((s and x == s.pop()) or s.add(x)))), key=len))
'kewamb'
groupby returns an iterator grouped based on the function provided in the key argument, which by default is lambda x: x. Instead of the default we are utilizing some state by using a mutable structure (which could have been done a more intuitive way if using a normal function)
lambda x: not ((s and x == s.pop()) or s.add(x))
What is happening here is since I can't reassign a global assignment in a lambda (again I can do this, using a proper function), I just created a global mutable structure that I can add/remove. The key (no pun) is that I only keep elements that I need by using a short circuit to add/remove items as needed.
max and len are fairly self explanatory, to get the longest list produced by groupby
Another version without the mutable global structure business:
def longest(x):
if hasattr(longest, 'last'):
result = not (longest.last == x)
longest.last = x
return result
longest.last = x
return True
''.join(max((list(g) for _, g in groupby('pwwkewambb', key=longest)), key=len))
'kewamb'
Not sure what is wrong in your attempt, but it's complex and in:
for str in substr:
print ("c: ",c," - str: ",str,"\n")
if c in str:
resultSet.append(str)
substr.remove(str)
you're removing elements from a list while iterating on it: don't do that, it gives unexpected results.
Anyway, my solution, not sure it's intuitive, but it's probably simpler & shorter:
slice the string with an increasing index
for each slice, create a set and store letters until you reach the end of the string or a letter is already in the set. Your index is the max length
compute the max of this length for every iteration & store the corresponding string
Code:
def findLongest(s):
maxlen = 0
longest = ""
for i in range(0,len(s)):
subs = s[i:]
chars = set()
for j,c in enumerate(subs):
if c in chars:
break
else:
chars.add(c)
else:
# add 1 when end of string is reached (no break)
# handles the case where the longest string is at the end
j+=1
if j>maxlen:
maxlen=j
longest=s[i:i+j]
return longest
print(findLongest("pwwkewambb"))
result:
kewamb
Depends on your definition of repeated characters: if you mean consecutive, then the approved solution is slick, but not of characters appearing more than once (e.g.: pwwkewabmb -> 'kewabmb' ).
Here's what I came up with (Python 2):
def longest(word):
begin = 0
end = 0
longest = (0,0)
for i in xrange(len(word)):
try:
j = word.index(word[i],begin,end)
# longest?
if end-begin >= longest[1]-longest[0]:
longest = (begin,end)
begin = j+1
if begin==end:
end += 1
except:
end = i+1
end=i+1
if end-begin >= longest[1]-longest[0]:
longest = (begin,end)
return word[slice(*longest)]
Thus
>>> print longest('pwwkewabmb')
kewabm
>>> print longest('pwwkewambb')
kewamb
>>> print longest('bbbb')
b
My 2-cents:
from collections import Counter
def longest_unique_substr(s: str) -> str:
# get all substr-ings from s, starting with the longest one
for substr_len in range(len(s), 0, -1):
for substr_start_index in range(0, len(s) - substr_len + 1):
substr = s[substr_start_index : substr_start_index + substr_len]
# check if all substr characters are unique
c = Counter(substr)
if all(v == 1 for v in c.values()):
return substr
# ensure empty string input returns ""
return ""
Run:
In : longest_unique_substr('pwwkewambb')
Out: 'kewamb'
s=input()
ma=0
n=len(s)
l=[]
a=[]
d={}
st=0;i=0
while i<n:
if s[i] not in d:
d[s[i]]=i
l.append(s[i])
else:
t=d[s[i]]
d[s[i]]=i
s=s[t+1:]
d={}
n=len(s)
if len(l)>=3:
a.append(l)
ma=max(ma,len(l))
l=[];i=-1
i=i+1
if len(l)!=0 and len(l)>=3:
a.append(l)
ma=max(ma,len(l))
if len(a)==0:
print("-1")
else:
for i in a:
if len(i)==ma:
for j in i:
print(j,end="")
break

Getting the first appearance of a any char from a set in a string - python

is there a better way to find the first appearance of one of the chars: 'x','y','z' in someStr?
def findFirstAppearance(someStr):
x = someStr.find('x');
y = someStr.find('y');
z = someStr.find('z');
if x == -1: x= len(someStr);
if y == -1: y= len(someStr);
if z == -1: z= len(someStr);
return min(x,y,z);
for example: for someStr = "axby" it should return 1.
for someStr = "aybx" it should also return 1.
thanks!
Maybe:
>>> s = 'this string x contains y several letters z'
>>> next(i for i,c in enumerate(s) if c in 'xyz')
12
>>> s[12]
'x'
This will raise an exception if it's not found, which could be fixed by using a default value:
>>> next(i for i,c in enumerate(s) if c in 'Q')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
StopIteration
>>> next((i for i,c in enumerate(s) if c in 'Q'), -1)
-1
You could also pre-construct a set to test membership in:
>>> special = set("vmp")
>>> next((i for i,c in enumerate(s) if c in special), -1)
27
which might be faster if there were a lot of letters to test against; it'll depend a lot on the sizes involved. Easy to experiment if it matters, but (spoiler alert) it probably doesn't.
Here's an alternative using regular expressions.
import re
def find_first_dx(needles, haystack):
match = re.search('|'.join(map(re.escape, needles)), haystack)
return match.start() if match else -1
Examples:
>>> find_first_dx('xyz', 'abcyax')
3
>>> find_first_dx('xyz.', 'a.bcyax')
1
>>> find_first_dx('xyz', 'fee fi foe fum')
-1
>>> find_first_dx(['foe', 'fum'], 'fee fi foe fum')
7
I think this is what you're looking for. This finds the first occurance of one of may chars (items) in a string. It works just like str.find.
def findany(string, items, start, end=-1):
if end == -1:
end = len(string)
for i in range(start, end):
c = string[i]
if c in items:
return i
return -1
# 01234567
inp = "hellozxy"
print findany(inp, "xyz") # 5 = z
print findany(inp, "?") # -1 = not found
print findany(inp, ["o", "l"], 3) # 3, skips the first 'l'
Note: You pass a list of chars (1-character strings) as items. In python, a string is just that. If you pass something like ["x", "y", "blah"], it won't work (it'll ignore "blah").
This should work:
def findany(s1, s2):
for i, x in enumerate(s1):
if x in s2:
return i
return -1
Use enumerate(), it yields a tuple for each character of the string.
Tuple's first element is the index and second element is the character itself.
In [103]: def find_first(strs):
.....: for i,x in enumerate(strs):
.....: if x in 'xyz': #if current character is either
#'x' or 'y' or 'z' then return index
.....: return i
.....: return -1 #if the loop completed successfully then return -1
.....:
In [104]: find_first("fooxbaryzx")
Out[104]: 3
In [105]: find_first("qwerty")
Out[105]: 5
In [106]: find_first("qwert")
Out[106]: -1
In [107]: find_first("axby")
Out[107]: 1
In [108]: find_first("aybx")
Out[108]: 1
For a lot of chars, you should seriously think about using a regular expression,
especially if you are doing this in a loop in your application:
import re
def findall(string, chars)
m = re.search("[%s]" % chars, string, re.DOTALL)
if m:
return m.start()
return -1
this should be at least 100x faster than a pure-python loop with a call to "find"
for each char.
Just be aware that if you need to find a char that is used for other
purposes inside regexps "[ ]" , you should escape them (like "-", "^")

parsing nested parentheses in python, grab content by level

Apparently this problem comes up fairly often, after reading
Regular expression to detect semi-colon terminated C++ for & while loops
and thinking about the problem for a while, i wrote a function to return the content contained inside an arbitrary number of nested ()
The function could easily be extended to any regular expression object, posting here for your thoughts and considerations.
any refactoring advice would be appreciated
(note, i'm new to python still, and didn't feel like figuring out how to raise exceptions or whatever, so i just had the function return 'fail' if it couldin't figure out what was going on)
Edited function to take into account comments:
def ParseNestedParen(string, level):
"""
Return string contained in nested (), indexing i = level
"""
CountLeft = len(re.findall("\(", string))
CountRight = len(re.findall("\)", string))
if CountLeft == CountRight:
LeftRightIndex = [x for x in zip(
[Left.start()+1 for Left in re.finditer('\(', string)],
reversed([Right.start() for Right in re.finditer('\)', string)]))]
elif CountLeft > CountRight:
return ParseNestedParen(string + ')', level)
elif CountLeft < CountRight:
return ParseNestedParen('(' + string, level)
return string[LeftRightIndex[level][0]:LeftRightIndex[level][1]]
You don't make it clear exactly what the specification of your function is, but this behaviour seems wrong to me:
>>> ParseNestedParen('(a)(b)(c)', 0)
['a)(b)(c']
>>> nested_paren.ParseNestedParen('(a)(b)(c)', 1)
['b']
>>> nested_paren.ParseNestedParen('(a)(b)(c)', 2)
['']
Other comments on your code:
Docstring says "generate", but function returns a list, not a generator.
Since only one string is ever returned, why return it in a list?
Under what circumstances can the function return the string fail?
Repeatedly calling re.findall and then throwing away the result is wasteful.
You attempt to rebalance the parentheses in the string, but you do so only one parenthesis at a time:
>>> ParseNestedParen(')' * 1000, 1)
RuntimeError: maximum recursion depth exceeded while calling a Python object
As Thomi said in the question you linked to, "regular expressions really are the wrong tool for the job!"
The usual way to parse nested expressions is to use a stack, along these lines:
def parenthetic_contents(string):
"""Generate parenthesized contents in string as pairs (level, contents)."""
stack = []
for i, c in enumerate(string):
if c == '(':
stack.append(i)
elif c == ')' and stack:
start = stack.pop()
yield (len(stack), string[start + 1: i])
>>> list(parenthetic_contents('(a(b(c)(d)e)(f)g)'))
[(2, 'c'), (2, 'd'), (1, 'b(c)(d)e'), (1, 'f'), (0, 'a(b(c)(d)e)(f)g')]
Parentheses matching requires a parser with a push-down automaton. Some libraries exist, but the rules are simple enough that we can write it from scratch:
def push(obj, l, depth):
while depth:
l = l[-1]
depth -= 1
l.append(obj)
def parse_parentheses(s):
groups = []
depth = 0
try:
for char in s:
if char == '(':
push([], groups, depth)
depth += 1
elif char == ')':
depth -= 1
else:
push(char, groups, depth)
except IndexError:
raise ValueError('Parentheses mismatch')
if depth > 0:
raise ValueError('Parentheses mismatch')
else:
return groups
print(parse_parentheses('a(b(cd)f)')) # ['a', ['b', ['c', 'd'], 'f']]
Below is my Python solution with a time complexity of O(N)
str1 = "(a(b(c)d)(e(f)g)hi)"
def content_by_level(str1, l):
level_dict = {}
level = 0
level_char = ''
for s in str1:
if s == '(':
if level not in level_dict:
level_dict[level] = [level_char]
elif level_char != '':
level_dict[level].append(level_char)
level_char = ''
level += 1
elif s == ')':
if level not in level_dict:
level_dict[level] = [level_char]
elif level_char != '':
level_dict[level].append(level_char)
level_char = ''
level -= 1
else:
level_char += s
print(level_dict) # {0: [''], 1: ['a', 'hi'], 2: ['b', 'd', 'e', 'g'], 3: ['c', 'f']}
return level_dict[l]
print(content_by_level(str1,0)) # ['']
print(content_by_level(str1,1)) # ['a', 'hi']
print(content_by_level(str1,2)) # ['b', 'd', 'e', 'g']
print(content_by_level(str1,3)) # ['c', 'f']
#!/usr/bin/env python
import re
def ParseNestedParen(string, level):
"""
Generate strings contained in nested (), indexing i = level
"""
if len(re.findall("\(", string)) == len(re.findall("\)", string)):
LeftRightIndex = [x for x in zip(
[Left.start()+1 for Left in re.finditer('\(', string)],
reversed([Right.start() for Right in re.finditer('\)', string)]))]
elif len(re.findall("\(", string)) > len(re.findall("\)", string)):
return ParseNestedParen(string + ')', level)
elif len(re.findall("\(", string)) < len(re.findall("\)", string)):
return ParseNestedParen('(' + string, level)
else:
return 'fail'
return [string[LeftRightIndex[level][0]:LeftRightIndex[level][1]]]
Tests:
if __name__ == '__main__':
teststring = "outer(first(second(third)second)first)outer"
print(ParseNestedParen(teststring, 0))
print(ParseNestedParen(teststring, 1))
print(ParseNestedParen(teststring, 2))
teststring_2 = "outer(first(second(third)second)"
print(ParseNestedParen(teststring_2, 0))
print(ParseNestedParen(teststring_2, 1))
print(ParseNestedParen(teststring_2, 2))
teststring_3 = "second(third)second)first)outer"
print(ParseNestedParen(teststring_3, 0))
print(ParseNestedParen(teststring_3, 1))
print(ParseNestedParen(teststring_3, 2))
output:
Running tool: python3.1
['first(second(third)second)first']
['second(third)second']
['third']
['first(second(third)second)']
['second(third)second']
['third']
['(second(third)second)first']
['second(third)second']
['third']
>>>

Categories