I'm trying to create a function that returns the very last digit in a python string and if there are no digits in the string, it simply returns -5 as a result.
This is what i've gotten but it returns 0 if the string is made up of no digits or if the final character in the string is not a digit.
For example, LastNum("1*2*3*") should return 3, LastNum("****") should return -5. Help is greatly appreciated.
def LastNum(st):
Result = 0
for i in (st):
if i.isdigit():
Result = Result + int(max(st[-1::]))
return Result
It would be a good idea to start searching from the reverse
def lastNum(st):
# st[::-1] is reverse of st
for s in st[::-1]:
if s.isdigit():
return int(s)
return -5
I don't understand the intended logic behind your code, but this simpler one should work:
def LastNum(st):
Result = -5
for i in st:
if i.isdigit():
Result = int(i)
return Result
You can use regex as follows too.
import re
def LastNum(st):
result = -5
regex_res = re.findall('\d+', st)
if regex_res:
result = regex_res[-1]
return result
print LastNum("1*2*3*")
output:
3
A nice one-liner (probably not the most readable way to do it though)
import re
def LastNum(st, default=-5):
return int((re.findall(r'\d', st) or [default])[-1])
Related
I'm trying to write a simple Python algorithm to solve this problem. Can you please help me figure out how to do this?
If any character is repeated more than 4 times, the entire set of
repeated characters should be replaced with a slash '/', followed by a
2-digit number which is the length of this run of repeated characters,
and the character. For example, "aaaaa" would be encoded as "/05a".
Runs of 4 or less characters should not be replaced since performing
the encoding would not decrease the length of the string.
I see many great solutions here but none that feels very pythonic to my eyes. So I'm contributing with a implementation I wrote myself today for this problem.
def run_length_encode(data: str) -> Iterator[Tuple[str, int]]:
"""Returns run length encoded Tuples for string"""
# A memory efficient (lazy) and pythonic solution using generators
return ((x, sum(1 for _ in y)) for x, y in groupby(data))
This will return a generator of Tuples with the character and number of instances, but can easily be modified to return a string as well. A benefit of doing it this way is that it's all lazy evaluated and won't consume more memory or cpu than needed if you don't need to exhaust the entire search space.
If you still want string encoding the code can quite easily be modified for that use case like this:
def run_length_encode(data: str) -> str:
"""Returns run length encoded string for data"""
# A memory efficient (lazy) and pythonic solution using generators
return "".join(f"{x}{sum(1 for _ in y)}" for x, y in groupby(data))
This is a more generic run length encoding for all lengths, and not just for those of over 4 characters. But this could also quite easily be adapted with a conditional for the string if wanted.
Rosetta Code has a lot of implementations, that should easily be adaptable to your usecase.
Here is Python code with regular expressions:
from re import sub
def encode(text):
'''
Doctest:
>>> encode('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW')
'12W1B12W3B24W1B14W'
'''
return sub(r'(.)\1*', lambda m: str(len(m.group(0))) + m.group(1),
text)
def decode(text):
'''
Doctest:
>>> decode('12W1B12W3B24W1B14W')
'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'
'''
return sub(r'(\d+)(\D)', lambda m: m.group(2) * int(m.group(1)),
text)
textin = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
assert decode(encode(textin)) == textin
Aside for setting a=i after encoding a sequence and setting a width for your int when printed into the string. You could also do the following which takes advantage of pythons groupby. Its also a good idea to use format when constructing strings.
from itertools import groupby
def runLengthEncode (plainText):
res = []
for k,i in groupby(plainText):
run = list(i)
if(len(run) > 4):
res.append("/{:02}{}".format(len(run), k))
else:
res.extend(run)
return "".join(res)
Just observe the behaviour:
>>> runLengthEncode("abcd")
'abc'
Last character is ignored. You have to append what you've collected.
>>> runLengthEncode("abbbbbcd")
'a/5b/5b'
Oops, problem after encoding. You should set a=i even if you found a long enough sequence.
I know this is not the most efficient solution, but we haven't studied functions like groupby() yet so here's what I did:
def runLengthEncode (plainText):
res=''
a=''
count = 0
for i in plainText:
count+=1
if a.count(i)>0:
a+=i
else:
if len(a)>4:
if len(a)<10:
res+="/0"+str(len(a))+a[0][:1]
else:
res+="/" + str(len(a)) + a[0][:1]
a=i
else:
res+=a
a=i
if count == len(plainText):
if len(a)>4:
if len(a)<10:
res+="/0"+str(len(a))+a[0][:1]
else:
res+="/" + str(len(a)) + a[0][:1]
else:
res+=a
return(res)
Split=(list(input("Enter string: ")))
Split.append("")
a = 0
for i in range(len(Split)):
try:
if (Split[i] in Split) >0:
a = a + 1
if Split[i] != Split[i+1]:
print(Split[i],a)
a = 0
except IndexError:
print()
this is much easier and works everytime
def RLE_comp_encode(text):
if text == text[0]*len(text) :
return str(len(text))+text[0]
else:
comp_text , r = '' , 1
for i in range (1,len(text)):
if text[i]==text[i-1]:
r +=1
if i == len(text)-1:
comp_text += str(r)+text[i]
else :
comp_text += str(r)+text[i-1]
r = 1
return comp_text
This worked for me,
You can use the groupby() function combined with a list/generator comprehension:
from itertools import groupby, imap
''.join(x if reps <= 4 else "/%02d%s" % (reps, x) for x, reps in imap(lambda x: (x[0], len(list(x[1]))), groupby(s)))
An easy solution to run-length encoding which I can think of:
For encoding a string like "a4b5c6d7...":
def encode(s):
counts = {}
for c in s:
if counts.get(c) is None:
counts[c] = s.count(c)
return "".join(k+str(v) for k,v in counts.items())
For decoding a string like "aaaaaabbbdddddccccc....":
def decode(s):
return "".join((map(lambda tup: tup[0] * int(tup[1]), zip(s[0:len(s):2], s[1:len(s):2]))))
Fairly easy to read and simple.
text=input("Please enter the string to encode")
encoded=[]
index=0
amount=1
while index<=(len(text)-1):
if index==(len(text)-1) or text[index]!=text[(index+1)]:
encoded.append((text[index],amount))
amount=1
else:
amount=amount+1
index=index+1
print(encoded)
The question is :
Given a string, return a string where for every char in the original, there are two chars.
This is my attempt:
def double_char(str):
n = 0
for x in range(0, len(str)):
return 2*str[n]
n = n+1
When I run it, it only returns 2 versions of the first letter and doesn't loop properly. So for double_char(Hello) it just returns HH.
What is going wrong? Thanks in advance for any help, sorry for the really beginner question.
The return is causing your function to return in the first iteration so it just returns 2 of the first letter.
What you may have intended to write was something like
def double_char(s):
n = 0
r = ''
for x in range(0, len(s)):
r += 2*s[n]
n = n+1
return r
Building a string incrementally that is just 2 of each character.
A neater refactor of that function (without duplicating the other answer by using a comprehension) is
def double_char(s):
r = ''
for c in s:
r += 2*c
return r
You also should not use str as a variable name. It is a built in type and you are hiding that by defining a variable called str.
return returns control to the caller once reached, thus exiting your for loop prematurely.
Here's a simpler way to do that with str.join:
def double_char(s):
return ''.join(i*2 for i in s)
>>> s = 'Hello'
>>> double_char(s)
'HHeelllloo'
Do not use str as name to avoid shadowing the builtin str function.
Here is a different way to solving the question.
def double_char(str):
new_str = ""
for i in range(len(str)):
new_str += (str[i]*2)
return new_str
double_char('Hello')
'HHeelllloo'
def double_char(str):
string = ''
for i in range(len(str)):
string += str[i] * 2
i += 1
return string
I'm trying to write a simple Python algorithm to solve this problem. Can you please help me figure out how to do this?
If any character is repeated more than 4 times, the entire set of
repeated characters should be replaced with a slash '/', followed by a
2-digit number which is the length of this run of repeated characters,
and the character. For example, "aaaaa" would be encoded as "/05a".
Runs of 4 or less characters should not be replaced since performing
the encoding would not decrease the length of the string.
I see many great solutions here but none that feels very pythonic to my eyes. So I'm contributing with a implementation I wrote myself today for this problem.
def run_length_encode(data: str) -> Iterator[Tuple[str, int]]:
"""Returns run length encoded Tuples for string"""
# A memory efficient (lazy) and pythonic solution using generators
return ((x, sum(1 for _ in y)) for x, y in groupby(data))
This will return a generator of Tuples with the character and number of instances, but can easily be modified to return a string as well. A benefit of doing it this way is that it's all lazy evaluated and won't consume more memory or cpu than needed if you don't need to exhaust the entire search space.
If you still want string encoding the code can quite easily be modified for that use case like this:
def run_length_encode(data: str) -> str:
"""Returns run length encoded string for data"""
# A memory efficient (lazy) and pythonic solution using generators
return "".join(f"{x}{sum(1 for _ in y)}" for x, y in groupby(data))
This is a more generic run length encoding for all lengths, and not just for those of over 4 characters. But this could also quite easily be adapted with a conditional for the string if wanted.
Rosetta Code has a lot of implementations, that should easily be adaptable to your usecase.
Here is Python code with regular expressions:
from re import sub
def encode(text):
'''
Doctest:
>>> encode('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW')
'12W1B12W3B24W1B14W'
'''
return sub(r'(.)\1*', lambda m: str(len(m.group(0))) + m.group(1),
text)
def decode(text):
'''
Doctest:
>>> decode('12W1B12W3B24W1B14W')
'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'
'''
return sub(r'(\d+)(\D)', lambda m: m.group(2) * int(m.group(1)),
text)
textin = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW"
assert decode(encode(textin)) == textin
Aside for setting a=i after encoding a sequence and setting a width for your int when printed into the string. You could also do the following which takes advantage of pythons groupby. Its also a good idea to use format when constructing strings.
from itertools import groupby
def runLengthEncode (plainText):
res = []
for k,i in groupby(plainText):
run = list(i)
if(len(run) > 4):
res.append("/{:02}{}".format(len(run), k))
else:
res.extend(run)
return "".join(res)
Just observe the behaviour:
>>> runLengthEncode("abcd")
'abc'
Last character is ignored. You have to append what you've collected.
>>> runLengthEncode("abbbbbcd")
'a/5b/5b'
Oops, problem after encoding. You should set a=i even if you found a long enough sequence.
I know this is not the most efficient solution, but we haven't studied functions like groupby() yet so here's what I did:
def runLengthEncode (plainText):
res=''
a=''
count = 0
for i in plainText:
count+=1
if a.count(i)>0:
a+=i
else:
if len(a)>4:
if len(a)<10:
res+="/0"+str(len(a))+a[0][:1]
else:
res+="/" + str(len(a)) + a[0][:1]
a=i
else:
res+=a
a=i
if count == len(plainText):
if len(a)>4:
if len(a)<10:
res+="/0"+str(len(a))+a[0][:1]
else:
res+="/" + str(len(a)) + a[0][:1]
else:
res+=a
return(res)
Split=(list(input("Enter string: ")))
Split.append("")
a = 0
for i in range(len(Split)):
try:
if (Split[i] in Split) >0:
a = a + 1
if Split[i] != Split[i+1]:
print(Split[i],a)
a = 0
except IndexError:
print()
this is much easier and works everytime
def RLE_comp_encode(text):
if text == text[0]*len(text) :
return str(len(text))+text[0]
else:
comp_text , r = '' , 1
for i in range (1,len(text)):
if text[i]==text[i-1]:
r +=1
if i == len(text)-1:
comp_text += str(r)+text[i]
else :
comp_text += str(r)+text[i-1]
r = 1
return comp_text
This worked for me,
You can use the groupby() function combined with a list/generator comprehension:
from itertools import groupby, imap
''.join(x if reps <= 4 else "/%02d%s" % (reps, x) for x, reps in imap(lambda x: (x[0], len(list(x[1]))), groupby(s)))
An easy solution to run-length encoding which I can think of:
For encoding a string like "a4b5c6d7...":
def encode(s):
counts = {}
for c in s:
if counts.get(c) is None:
counts[c] = s.count(c)
return "".join(k+str(v) for k,v in counts.items())
For decoding a string like "aaaaaabbbdddddccccc....":
def decode(s):
return "".join((map(lambda tup: tup[0] * int(tup[1]), zip(s[0:len(s):2], s[1:len(s):2]))))
Fairly easy to read and simple.
text=input("Please enter the string to encode")
encoded=[]
index=0
amount=1
while index<=(len(text)-1):
if index==(len(text)-1) or text[index]!=text[(index+1)]:
encoded.append((text[index],amount))
amount=1
else:
amount=amount+1
index=index+1
print(encoded)
I have a feeling my question is pretty basic, as I am a first semester computer science student.
I have been asked to return the substring formed before a digit in a string similar to "abcd5efgh". The idea is to use a function to give me "abcd". I think I need to use .isdigit, but I'm not sure how to turn it into a function. Thank you in advance!
It could be done with regexp, but if you already discovered isdigit, why not use it in this case?
You can modify the last return s line to return something else if no digit is found:
def string_before_digit(s):
for i, c in enumerate(s):
if c.isdigit():
return s[:i]
return s # no digit found
print(string_before_digit("abcd5efgh"))
I am also currently a student and this is how i would approch this problem:
*For my school we are not allowed to use built in function like that in python :/
def parse(string):
newstring = ""
for i in string:
if i >= "0" and i <= "9":
break
else:
newstring += i
print newstring #Can use return if your needing it in another function
parse("abcd5efgh")
Hope this helps
A functional approach :)
>>> from itertools import compress, count, imap
>>> text = "abcd5efgh"
>>> text[:next(compress(count(), imap(str.isdigit, text)), len(text))]
'abcd'
The code is below will give you the first non digit part by using regular expression.
import re
myPattern=re.compile('[a-zA-Z]*')
firstNonDigitPart=myPattern.match('abcd5efgh')
firstNonDigitPart.group()
>>> 'abcd'
If you are not allowed to use regexes, maybe because they told you to do it explicitly by hand, you can do it like this:
def digit_index(s):
"""Helper function."""
# next(..., -1) asks the given iterator for the next value and returns -1 if there is none.
# This iterator gives the index n of the first "true-giving" element of the asked generator expression. True-giving is any character which is a digit.
return next(
(n for n, i in enumerate(i.isdigit() for i in "abc123") if i),
-1)
def before_digit(s):
di = digit_index(s)
if di == -1: return s
return s[:di]
should give you your wanted result.
A quite simple one-liner, using isdigit :)
>>> s = 'abcd5efgh'
>>> s[:[i for i, j in enumerate([_ for _ in s]) if j.isdigit()][0]]
'abcd'
An itertools approach:
>>> from itertools import takewhile
>>> s="abcd5efgh"
>>> ''.join(takewhile(lambda x: not x.isdigit(), s))
'abcd'
I am trying to replace the Nth appearance of a needle in a haystack. I want to do this simply via re.sub(), but cannot seem to come up with an appropriate regex to solve this. I am trying to adapt: http://docstore.mik.ua/orelly/perl/cookbook/ch06_06.htm but am failing at spanning multilines, I suppose.
My current method is an iterative approach that finds the position of each occurrence from the beginning after each mutation. This is pretty inefficient and I would like to get some input. Thanks!
I think you mean re.sub. You could pass a function and keep track of how often it was called so far:
def replaceNthWith(n, replacement):
def replace(match, c=[0]):
c[0] += 1
return replacement if c[0] == n else match.group(0)
return replace
Usage:
re.sub(pattern, replaceNthWith(n, replacement), str)
But this approach feels a bit hacky, maybe there are more elegant ways.
DEMO
Something like this regex should help you. Though I'm not sure how efficient it is:
#N=3
re.sub(
r'^((?:.*?mytexttoreplace){2}.*?)mytexttoreplace',
'\1yourreplacementtext.',
'mystring',
flags=re.DOTALL
)
The DOTALL flag is important.
I've been struggling for a while with this, but I found a solution that I think is pretty pythonic:
>>> def nth_matcher(n, replacement):
... def alternate(n):
... i=0
... while True:
... i += 1
... yield i%n == 0
... gen = alternate(n)
... def match(m):
... replace = gen.next()
... if replace:
... return replacement
... else:
... return m.group(0)
... return match
...
...
>>> re.sub("([0-9])", nth_matcher(3, "X"), "1234567890")
'12X45X78X0'
EDIT: the matcher consists of two parts:
the alternate(n) function. This returns a generator that returns an infinite sequence True/False, where every nth value is True. Think of it like list(alternate(3)) == [False, False, True, False, False, True, False, ...].
The match(m) function. This is the function that gets passed to re.sub: it gets the next value in alternate(n) (gen.next()) and if it's True it replaces the matched value; otherwise, it keeps it unchanged (replaces it with itself).
I hope this is clear enough. If my explanation is hazy, please say so and I'll improve it.
Could you do it using re.findall with MatchObject.start() and MatchObject.end()?
find all occurences of pattern in string with .findall, get indices of Nth occurrence with .start/.end, make new string with replacement value using the indices?
If the pattern ("needle") or replacement is a complex regular expression, you can't assume anything. The function "nth_occurrence_sub" is what I came up with as a more general solution:
def nth_match_end(pattern, string, n, flags):
for i, match_object in enumerate(re.finditer(pattern, string, flags)):
if i + 1 == n:
return match_object.end()
def nth_occurrence_sub(pattern, repl, string, n=0, flags=0):
max_n = len(re.findall(pattern, string, flags))
if abs(n) > max_n or n == 0:
return string
if n < 0:
n = max_n + n + 1
sub_n_times = re.sub(pattern, repl, string, n, flags)
if n == 1:
return sub_n_times
nm1_end = nth_match_end(pattern, string, n - 1, flags)
sub_nm1_times = re.sub(pattern, repl, string, n - 1, flags)
sub_nm1_change = sub_nm1_times[:-1 * len(string[nm1_end:])]
components = [
string[:nm1_end],
sub_n_times[len(sub_nm1_change):]
]
return ''.join(components)
I have a similar function I wrote to do this. I was trying to replicate SQL REGEXP_REPLACE() functionality. I ended up with:
def sql_regexp_replace( txt, pattern, replacement='', position=1, occurrence=0, regexp_modifier='c'):
class ReplWrapper(object):
def __init__(self, replacement, occurrence):
self.count = 0
self.replacement = replacement
self.occurrence = occurrence
def repl(self, match):
self.count += 1
if self.occurrence == 0 or self.occurrence == self.count:
return match.expand(self.replacement)
else:
try:
return match.group(0)
except IndexError:
return match.group(0)
occurrence = 0 if occurrence < 0 else occurrence
flags = regexp_flags(regexp_modifier)
rx = re.compile(pattern, flags)
replw = ReplWrapper(replacement, occurrence)
return txt[0:position-1] + rx.sub(replw.repl, txt[position-1:])
One important note that I haven't seen mentioned is that you need to return match.expand() otherwise it won't expand the \1 templates properly and will treat them as literals.
If you want this to work you'll need to handle the flags differently (or take it from my github, it's simple to implement and you can dummy it for a test by setting it to 0 and ignoring my call to regexp_flags()).