Match characters of a string - python

If I call matcher.match('bad') it should return a list of all permutations of 'bad' which exist in a provided list. In this example, output will be ['abd']. So far this is what i have tried but I can't seem to match this pattern.
class Matcher():
def __init__(self, string_list):
self.string_list = string_list
def match(self, string_match):
matched_list = [string for string in string_list if string_match in string]
return matched_list
string_list = ['abd', 'abdd', 'fret', 'gilk', 'lokm']
matcher = Matcher(string_list)
print(matcher.match('abd'))

This is an O(n log n) solution:
string_list = ['abd', 'abdd', 'fret', 'gilk', 'lokm']
my_str = 'bad'
[x for x in string_list if sorted(my_str) == sorted(x)] # ['abd']

You could always use collections.Counter() for this:
from collections import Counter
string_list = ['abd', 'abdd', 'fret', 'gilk', 'lokm']
my_str = 'bad'
print([x for x in string_list if Counter(my_str) == Counter(x)])
# ['abd']

Related

How to construct a string from letters of each word from list?

I am wondering how to construct a string, which takes 1st letter of each word from list. Then it takes 2nd letter from each word etc.
For example :
Input --> my_list = ['good', 'bad', 'father']
Every word has different length (but the words in the list could have equal length)
The output should be: 'gbfoaaodtdher'.
I tried:
def letters(my_list):
string = ''
for i in range(len(my_list)):
for j in range(len(my_list)):
string += my_list[j][i]
return string
print(letters(['good', 'bad', 'father']))
and I got:
'gbfoaaodt'.
That's a good job for itertools.zip_longest:
from itertools import zip_longest
s = ''.join([c for x in zip_longest(*my_list) for c in x if c])
print(s)
Or more_itertools.interleave_longest:
from more_itertools import interleave_longest
s = ''.join(interleave_longest(*my_list))
print(s)
Output: gbfoaaodtdher
Used input:
my_list = ['good', 'bad', 'father']
The answer by #mozway is the best approach, but if you want to go along with your original method, this is how
def letters(my_list):
string = ''
max_len = max([len(s) for s in my_list])
for i in range(max_len):
for j in range(len(my_list)):
if i < len(my_list[j]):
string += my_list[j][i]
return string
print(letters(['good', 'bad', 'father']))
Output: gbfoaaodtdher
We can do without zip_longest as well:
l = ['good', 'bad', 'father']
longest_string=max(l,key=len)
''.join(''.join([e[i] for e in l if len(e) > i]) for i in range(len(longest_string)))
#'gbfoaaodtdher'

Remove words containing vowels

I am looking output string having vowels removed.
Input: My name is 123
Output: my 123
I tried below code:
def without_vowels(sentence):
vowels = 'aeiou'
word = sentence.split()
for l in word:
for k in l:
if k in vowels:
l = ''
without_vowels('my name 123')
Can anyone give me result using list compression ?
You can use regex with search chars with 'a|e|i|o|u' with .lower() for words if have upper char like below:
>>> import re
>>> st = 'My nAmE Is 123 MUe'
>>> [s for s in st.split() if not re.search(r'a|e|i|o|u',s.lower())]
['My', '123']
>>> ' '.join(s for s in st.split() if not re.search(r'a|e|i|o|u',s.lower()))
'My 123'
This is one way to do it
def without_vowels(sentence):
words = sentence.split()
vowels = ['a', 'e', 'i', 'o', 'u']
cleaned_words = [w for w in words if not any(v in w for v in vowels)]
cleaned_string = ' '.join(cleaned_words)
print(cleaned_string)
Outputs my 123
def rem_vowel(string):
vowels = ['a','e','i','o','u']
result = [letter for letter in string if letter.lower() not in vowels]
result = ''.join(result)
print(result)
string = "My name is 123"
rem_vowel(string)
import re
def rem_vowel(string):
return (re.sub("[aeiouAEIOU]","",string))
Driver program
string = " I am uma Bhargav "
print rem_vowel(string)

Splitting a string with numbers and letters [duplicate]

I'd like to split strings like these
'foofo21'
'bar432'
'foobar12345'
into
['foofo', '21']
['bar', '432']
['foobar', '12345']
Does somebody know an easy and simple way to do this in python?
I would approach this by using re.match in the following way:
import re
match = re.match(r"([a-z]+)([0-9]+)", 'foofo21', re.I)
if match:
items = match.groups()
print(items)
>> ("foofo", "21")
def mysplit(s):
head = s.rstrip('0123456789')
tail = s[len(head):]
return head, tail
>>> [mysplit(s) for s in ['foofo21', 'bar432', 'foobar12345']]
[('foofo', '21'), ('bar', '432'), ('foobar', '12345')]
Yet Another Option:
>>> [re.split(r'(\d+)', s) for s in ('foofo21', 'bar432', 'foobar12345')]
[['foofo', '21', ''], ['bar', '432', ''], ['foobar', '12345', '']]
>>> r = re.compile("([a-zA-Z]+)([0-9]+)")
>>> m = r.match("foobar12345")
>>> m.group(1)
'foobar'
>>> m.group(2)
'12345'
So, if you have a list of strings with that format:
import re
r = re.compile("([a-zA-Z]+)([0-9]+)")
strings = ['foofo21', 'bar432', 'foobar12345']
print [r.match(string).groups() for string in strings]
Output:
[('foofo', '21'), ('bar', '432'), ('foobar', '12345')]
I'm always the one to bring up findall() =)
>>> strings = ['foofo21', 'bar432', 'foobar12345']
>>> [re.findall(r'(\w+?)(\d+)', s)[0] for s in strings]
[('foofo', '21'), ('bar', '432'), ('foobar', '12345')]
Note that I'm using a simpler (less to type) regex than most of the previous answers.
here is a simple function to seperate multiple words and numbers from a string of any length, the re method only seperates first two words and numbers. I think this will help everyone else in the future,
def seperate_string_number(string):
previous_character = string[0]
groups = []
newword = string[0]
for x, i in enumerate(string[1:]):
if i.isalpha() and previous_character.isalpha():
newword += i
elif i.isnumeric() and previous_character.isnumeric():
newword += i
else:
groups.append(newword)
newword = i
previous_character = i
if x == len(string) - 2:
groups.append(newword)
newword = ''
return groups
print(seperate_string_number('10in20ft10400bg'))
# outputs : ['10', 'in', '20', 'ft', '10400', 'bg']
import re
s = raw_input()
m = re.match(r"([a-zA-Z]+)([0-9]+)",s)
print m.group(0)
print m.group(1)
print m.group(2)
without using regex, using isdigit() built-in function, only works if starting part is text and latter part is number
def text_num_split(item):
for index, letter in enumerate(item, 0):
if letter.isdigit():
return [item[:index],item[index:]]
print(text_num_split("foobar12345"))
OUTPUT :
['foobar', '12345']
This is a little longer, but more versatile for cases where there are multiple, randomly placed, numbers in the string. Also, it requires no imports.
def getNumbers( input ):
# Collect Info
compile = ""
complete = []
for letter in input:
# If compiled string
if compile:
# If compiled and letter are same type, append letter
if compile.isdigit() == letter.isdigit():
compile += letter
# If compiled and letter are different types, append compiled string, and begin with letter
else:
complete.append( compile )
compile = letter
# If no compiled string, begin with letter
else:
compile = letter
# Append leftover compiled string
if compile:
complete.append( compile )
# Return numbers only
numbers = [ word for word in complete if word.isdigit() ]
return numbers
Here is simple solution for that problem, no need for regex:
user = input('Input: ') # user = 'foobar12345'
int_list, str_list = [], []
for item in user:
try:
item = int(item) # searching for integers in your string
except:
str_list.append(item)
string = ''.join(str_list)
else: # if there are integers i will add it to int_list but as str, because join function only can work with str
int_list.append(str(item))
integer = int(''.join(int_list)) # if you want it to be string just do z = ''.join(int_list)
final = [string, integer] # you can also add it to dictionary d = {string: integer}
print(final)
In Addition to the answer of #Evan
If the incoming string is in this pattern 21foofo then the re.match pattern would be like this.
import re
match = re.match(r"([0-9]+)([a-z]+)", '21foofo', re.I)
if match:
items = match.groups()
print(items)
>> ("21", "foofo")
Otherwise, you'll get UnboundLocalError: local variable 'items' referenced before assignment error.

How do I split a string at a separator unless that separator is followed by a certain pattern?

I have a Python string
string = aaa1bbb1ccc1ddd
and I want to split it like this
re.split('[split at all occurrences of "1", unless the 1 is followed by a c]', string)
so that the result is
['aaa', 'bbb1ccc', 'ddd']
How do I do this?
Use negative-lookahead with regex and the re module:
>>> string = 'aaa1bbb1ccc1ddd'
>>> import re
>>> re.split(r"1(?!c)", string)
['aaa', 'bbb1ccc', 'ddd']
def split_by_delim_except(s, delim, bar):
escape = '\b'
find = delim + bar
return map(lambda s: s.replace(escape, find),
s.replace(find, escape).split(delim))
split_by_delim_except('aaa1bbb1ccc1ddd', '1', 'c')
Although not as pretty as regex, my following code returns the same result:
string = 'aaa1bbb1ccc1ddd'
Split the string at all instances of '1'
p1 = string.split('1')
Create a new empty list so we can append our desired items to
new_result = []
count = 0
for j in p1:
if j.startswith('c'):
# This removes the previous element from the list and stores it in a variable.
prev_element = new_result.pop(count-1)
prev_one_plus_j = prev_element + '1' + j
new_result.append(prev_one_plus_j)
else:
new_result.append(j)
count += 1
print (new_result)
Output:
['aaa', 'bbb1ccc', 'ddd']

Python split punctuation but still include it

This is the list of strings that I have:
[
['It', 'was', 'the', 'besst', 'of', 'times,'],
['it', 'was', 'teh', 'worst', 'of', 'times']
]
I need to split the punctuation in times,, to be 'times',','
or another example if I have Why?!? I would need it to be 'Why','?!?'
import string
def punctuation(string):
for word in string:
if word contains (string.punctuation):
word.split()
I know it isn't in python language at all! but that's what I want it to do.
You can use finditer even if the string is more complex.
>>> r = re.compile(r"(\w+)(["+string.punctuation+"]*)")
>>> s = 'Why?!?Why?*Why'
>>> [x.groups() for x in r.finditer(s)]
[('Why', '?!?'), ('Why', '?*'), ('Why', '')]
>>>
you can use regular expression, for example:
In [1]: import re
In [2]: re.findall(r'(\w+)(\W+)', 'times,')
Out[2]: [('times', ',')]
In [3]: re.findall(r'(\w+)(\W+)', 'why?!?')
Out[3]: [('why', '?!?')]
In [4]:
A generator solution without regex:
import string
from itertools import takewhile, dropwhile
def splitp(s):
not_punc = lambda c: c in string.ascii_letters+"'" # won't split "don't"
for w in s:
punc = ''.join(dropwhile(not_punc, w))
if punc:
yield ''.join(takewhile(not_punc, w))
yield punc
else:
yield w
list(splitp(s))
Something like this? (Assumes punct is always at end)
def lcheck(word):
for i, letter in enumerate(word):
if not word[i].isalpha():
return [word[0:(i-1)],word[i:]]
return [word]
value = 'times,'
print lcheck(value)

Categories