parse parenthesized numbers to negative numbers - python

How can i Parse parenthesized numbers in a list of strings to negative numbers (or strings with negative sign).
example
input
list1= ['abcd','(1,234)','Level-2 (2):','(31)%', 'others','(3,102.2)%']
output
['abcd',-1234,'Level-2 (2):','-31%', 'others','-3102.2%']
strings only with numbers inside parenthesis or numbers with comma/dot inside parenthesis followed by a percentage(%) sign, should be parsed . other strings such as 'Level-2 (2):' should not be parsed.
I have tried
translator = str.maketrans(dict.fromkeys('(),'))
['-'+(x.translate(translator)) for x in list1]
but the output is (every element has a - appended)
['-abcd', '-1234', '-Level-2 2:', '-31%', '-others', '-3102.2%']

You can try using re.sub, eg:
import re
list1 = ['abcd','(1,234)','Level-2 (2):','(31)%', 'others','(3,102.2)%']
res = [re.sub(r'^\(([\d+.,]+)\)(%?)$', r'-\1\2', el) for el in list1]
# ['abcd', '-1,234', 'Level-2 (2):', '-31%', 'others', '-3,102.2%']

Try using re.match
Ex:
import re
list1= ['abcd','(1,234)','Level-2 (2):','(31)%', 'others','(31.2)%']
result = []
for i in list1:
m = re.match(r"\((\d+[.,]?\d*)\)(%?)", i)
if m:
result.append("-" + m.group(1)+m.group(2))
else:
result.append(i)
print(result)
Output:
['abcd', '-1,234', 'Level-2 (2):', '-31%', 'others', '-31.2%']
Update as per comment
import re
list1 = ['abcd','(1,234)','Level-2 (2):','(31)%', 'others','(3,102.2)%']
result = []
for i in list1:
m = re.match(r"\((\d+(?:,\d+)*(?:\.\d+)?)\)(%?)", i)
if m:
result.append("-" + m.group(1).replace(",", "")+m.group(2))
else:
result.append(i)
print(result)
Output:
['abcd', '-1234', 'Level-2 (2):', '-31%', 'others', '-3102.2%']

If you do not need to convert the value to int or float, re.match and str.translate should do the trick:
rx = re.compile('\([\d,.]+\)%?$')
tab = str.maketrans({i: None for i in '(),'})
output = ['-' + i.translate(tab) if rx.match(i) else i for i in list1]
It gives:
['abcd', '-1234', 'Level-2 (2):', '-31%', 'others', '-3102.2%']

for item in list1:
idx = list1.index(item)
list1[idx] = '-' + list1[idx].replace('(','').replace(')','').replace(',','')
print (list1)
output:
['-abcd', '-1234', '-Level-2 2:', '-31%', '-others', '-3102.2%']
or just:
list1= ['abcd','(1,234)','Level-2 (2):','(31)%', 'others','(3,102.2)%']
print (['-' + item.replace('(','').replace(')','').replace(',','') for item in list1])
output:
['-abcd', '-1234', '-Level-2 2:', '-31%', '-others', '-3102.2%']

Related

How to construct a string from letters of each word from list?

I am wondering how to construct a string, which takes 1st letter of each word from list. Then it takes 2nd letter from each word etc.
For example :
Input --> my_list = ['good', 'bad', 'father']
Every word has different length (but the words in the list could have equal length)
The output should be: 'gbfoaaodtdher'.
I tried:
def letters(my_list):
string = ''
for i in range(len(my_list)):
for j in range(len(my_list)):
string += my_list[j][i]
return string
print(letters(['good', 'bad', 'father']))
and I got:
'gbfoaaodt'.
That's a good job for itertools.zip_longest:
from itertools import zip_longest
s = ''.join([c for x in zip_longest(*my_list) for c in x if c])
print(s)
Or more_itertools.interleave_longest:
from more_itertools import interleave_longest
s = ''.join(interleave_longest(*my_list))
print(s)
Output: gbfoaaodtdher
Used input:
my_list = ['good', 'bad', 'father']
The answer by #mozway is the best approach, but if you want to go along with your original method, this is how
def letters(my_list):
string = ''
max_len = max([len(s) for s in my_list])
for i in range(max_len):
for j in range(len(my_list)):
if i < len(my_list[j]):
string += my_list[j][i]
return string
print(letters(['good', 'bad', 'father']))
Output: gbfoaaodtdher
We can do without zip_longest as well:
l = ['good', 'bad', 'father']
longest_string=max(l,key=len)
''.join(''.join([e[i] for e in l if len(e) > i]) for i in range(len(longest_string)))
#'gbfoaaodtdher'

Splitting a string with numbers and letters [duplicate]

I'd like to split strings like these
'foofo21'
'bar432'
'foobar12345'
into
['foofo', '21']
['bar', '432']
['foobar', '12345']
Does somebody know an easy and simple way to do this in python?
I would approach this by using re.match in the following way:
import re
match = re.match(r"([a-z]+)([0-9]+)", 'foofo21', re.I)
if match:
items = match.groups()
print(items)
>> ("foofo", "21")
def mysplit(s):
head = s.rstrip('0123456789')
tail = s[len(head):]
return head, tail
>>> [mysplit(s) for s in ['foofo21', 'bar432', 'foobar12345']]
[('foofo', '21'), ('bar', '432'), ('foobar', '12345')]
Yet Another Option:
>>> [re.split(r'(\d+)', s) for s in ('foofo21', 'bar432', 'foobar12345')]
[['foofo', '21', ''], ['bar', '432', ''], ['foobar', '12345', '']]
>>> r = re.compile("([a-zA-Z]+)([0-9]+)")
>>> m = r.match("foobar12345")
>>> m.group(1)
'foobar'
>>> m.group(2)
'12345'
So, if you have a list of strings with that format:
import re
r = re.compile("([a-zA-Z]+)([0-9]+)")
strings = ['foofo21', 'bar432', 'foobar12345']
print [r.match(string).groups() for string in strings]
Output:
[('foofo', '21'), ('bar', '432'), ('foobar', '12345')]
I'm always the one to bring up findall() =)
>>> strings = ['foofo21', 'bar432', 'foobar12345']
>>> [re.findall(r'(\w+?)(\d+)', s)[0] for s in strings]
[('foofo', '21'), ('bar', '432'), ('foobar', '12345')]
Note that I'm using a simpler (less to type) regex than most of the previous answers.
here is a simple function to seperate multiple words and numbers from a string of any length, the re method only seperates first two words and numbers. I think this will help everyone else in the future,
def seperate_string_number(string):
previous_character = string[0]
groups = []
newword = string[0]
for x, i in enumerate(string[1:]):
if i.isalpha() and previous_character.isalpha():
newword += i
elif i.isnumeric() and previous_character.isnumeric():
newword += i
else:
groups.append(newword)
newword = i
previous_character = i
if x == len(string) - 2:
groups.append(newword)
newword = ''
return groups
print(seperate_string_number('10in20ft10400bg'))
# outputs : ['10', 'in', '20', 'ft', '10400', 'bg']
import re
s = raw_input()
m = re.match(r"([a-zA-Z]+)([0-9]+)",s)
print m.group(0)
print m.group(1)
print m.group(2)
without using regex, using isdigit() built-in function, only works if starting part is text and latter part is number
def text_num_split(item):
for index, letter in enumerate(item, 0):
if letter.isdigit():
return [item[:index],item[index:]]
print(text_num_split("foobar12345"))
OUTPUT :
['foobar', '12345']
This is a little longer, but more versatile for cases where there are multiple, randomly placed, numbers in the string. Also, it requires no imports.
def getNumbers( input ):
# Collect Info
compile = ""
complete = []
for letter in input:
# If compiled string
if compile:
# If compiled and letter are same type, append letter
if compile.isdigit() == letter.isdigit():
compile += letter
# If compiled and letter are different types, append compiled string, and begin with letter
else:
complete.append( compile )
compile = letter
# If no compiled string, begin with letter
else:
compile = letter
# Append leftover compiled string
if compile:
complete.append( compile )
# Return numbers only
numbers = [ word for word in complete if word.isdigit() ]
return numbers
Here is simple solution for that problem, no need for regex:
user = input('Input: ') # user = 'foobar12345'
int_list, str_list = [], []
for item in user:
try:
item = int(item) # searching for integers in your string
except:
str_list.append(item)
string = ''.join(str_list)
else: # if there are integers i will add it to int_list but as str, because join function only can work with str
int_list.append(str(item))
integer = int(''.join(int_list)) # if you want it to be string just do z = ''.join(int_list)
final = [string, integer] # you can also add it to dictionary d = {string: integer}
print(final)
In Addition to the answer of #Evan
If the incoming string is in this pattern 21foofo then the re.match pattern would be like this.
import re
match = re.match(r"([0-9]+)([a-z]+)", '21foofo', re.I)
if match:
items = match.groups()
print(items)
>> ("21", "foofo")
Otherwise, you'll get UnboundLocalError: local variable 'items' referenced before assignment error.

How do I split a string at a separator unless that separator is followed by a certain pattern?

I have a Python string
string = aaa1bbb1ccc1ddd
and I want to split it like this
re.split('[split at all occurrences of "1", unless the 1 is followed by a c]', string)
so that the result is
['aaa', 'bbb1ccc', 'ddd']
How do I do this?
Use negative-lookahead with regex and the re module:
>>> string = 'aaa1bbb1ccc1ddd'
>>> import re
>>> re.split(r"1(?!c)", string)
['aaa', 'bbb1ccc', 'ddd']
def split_by_delim_except(s, delim, bar):
escape = '\b'
find = delim + bar
return map(lambda s: s.replace(escape, find),
s.replace(find, escape).split(delim))
split_by_delim_except('aaa1bbb1ccc1ddd', '1', 'c')
Although not as pretty as regex, my following code returns the same result:
string = 'aaa1bbb1ccc1ddd'
Split the string at all instances of '1'
p1 = string.split('1')
Create a new empty list so we can append our desired items to
new_result = []
count = 0
for j in p1:
if j.startswith('c'):
# This removes the previous element from the list and stores it in a variable.
prev_element = new_result.pop(count-1)
prev_one_plus_j = prev_element + '1' + j
new_result.append(prev_one_plus_j)
else:
new_result.append(j)
count += 1
print (new_result)
Output:
['aaa', 'bbb1ccc', 'ddd']

Combine elements of a list with all possible separators

I have the following requirement.
I have a list which say has 3 elements [X,Y,2]
What I would like to do is to generate strings with a separator (say "-") between (or not) each element. The order of the elements in the array should be preserved.
So the output would be:
'XY2'
'X-Y-2'
'X-Y2'
'XY-2'
is there an elegant way to this in python?
>>> import itertools
>>> for c in itertools.product(' -', repeat=2): print ('X%sY%s2' % c).replace(' ', '')
XY2
XY-2
X-Y2
X-Y-2
Or, with the elements coming from a python list:
import itertools
a = ['X', 'Y', 2]
for c in itertools.product(' -', repeat=2):
print ('%s%s%s%s%s' % (a[0],c[0],a[1],c[1],a[2])).replace(' ', '')
Or, in a slightly different style:
import itertools
a = ['X', 'Y', '2']
for c in itertools.product(' -', repeat=2):
print ( '%s'.join(a) % c ).replace(' ', '')
To capture the output to a list:
import itertools
a = ['X', 'Y', '2']
output = []
for c in itertools.product(' -', repeat=len(a)-1):
output.append( ('%s'.join(a) % c).replace(' ', '') )
print 'output=', output
A little more generalized but works for any number of separators and hopefully is easy to understand at each step:
import itertools
a = ['X', 'Y', '2']
all_separators = ['', '-', '+']
results = []
# this product puts all separators in all positions for len-1 (spaces between each element)
for this_separators in itertools.product(all_separators, repeat=len(a)-1):
this_result = []
for pair in itertools.izip_longest(a, this_separators, fillvalue=''):
for element in pair:
this_result.append(element)
# if you want it, here it is as a comprehension
# this_result = [element for pair
# in itertools.izip_longest(a, this_separators, fillvalue='')
# for element in pair]
this_result_string = ''.join(this_result) # check out join docs if it's new to you
results.append(this_result_string)
print results
>>> ['XY2', 'XY-2', 'XY+2', 'X-Y2', 'X-Y-2', 'X-Y+2', 'X+Y2', 'X+Y-2', 'X+Y+2']
These are the results for your case with just '' and '-' as separators:
>>> ['XY2', 'XY-2', 'X-Y2', 'X-Y-2']
If you want everything in one comprehension:
results = [''.join(element for pair
in itertools.izip_longest(a, this_separators, fillvalue='')
for element in pair)
for this_separators in itertools.product(all_separators, repeat=len(a)-1)]
I don't know if there is a function in itertool in order to do that. But i always think it's fun and a good exercice to do this kind of things. So there is a solution with recursive generator :
def generate(liste):
if len(liste) == 1:
yield [liste]
else:
for i in generate(liste[1:]):
yield [[liste[0]]]+i
yield [ [liste[0]]+i[0] ] + i[1:]
if __name__ == "__main__":
for i in generate (["X","Y","2"]):
print "test : " + str(i)
if len(i) == 1:
print "".join(i[0])
else:
print reduce(
lambda left, right : left + "".join(right),
i,
"")
Something like this?
from itertools import permutations
i = ["X","Y","2"]
for result in permutations(i, 3):
print "-".join(result)
Result:
X-Y-2
X-2-Y
Y-X-2
Y-2-X
2-X-Y
2-Y-X

Partitioning elements in list by \t . Python

my_list = ['1\tMelkor\tMorgoth\tSauronAtDolGoldul','2\tThingols\tHeirIsDior\tSilmaril','3\tArkenstone\tIsProbablyA\tSilmaril']
I'm trying to split this list into sublists separated by \t
output = [['1','Melkor','Morgoth','SauronAtDolGoldul'],['2','Thigols','HeirIsDior','Silmaril'],['3','Arkenstone','IsProbablyA','Silmaril']]
I was thinking something on the lines of
output = []
for k_string in my_list:
temp = []
for i in k_string:
temp_s = ''
if i != '\':
temp_s = temp_s + i
elif i == '\':
break
temp.append(temp_s)
it gets messed up with the t . . i'm not sure how else I would go about doing it. I've seen people use .join for similar things but I don't really understand how to use .join
You want to use str.split(); a list comprehension lets you apply this to all elements in one line:
output = [sub.split('\t') for sub in my_list]
There is no literal \ in the string; the \t is an escape code that signifies the tab character.
Demo:
>>> my_list = ['1\tMelkor\tMorgoth\tSauronAtDolGoldul','2\tThingols\tHeirIsDior\tSilmaril','3\tArkenstone\tIsProbablyA\tSilmaril']
>>> [sub.split('\t') for sub in my_list]
[['1', 'Melkor', 'Morgoth', 'SauronAtDolGoldul'], ['2', 'Thingols', 'HeirIsDior', 'Silmaril'], ['3', 'Arkenstone', 'IsProbablyA', 'Silmaril']]
>>> import csv
>>> my_list = ['1\tMelkor\tMorgoth\tSauronAtDolGoldul','2\tThingols\tHeirIsDior\tSilmaril','3\tArkenstone\tIsProbablyA\tSilmaril']
>>> list(csv.reader(my_list, delimiter='\t'))
[['1', 'Melkor', 'Morgoth', 'SauronAtDolGoldul'], ['2', 'Thingols', 'HeirIsDior', 'Silmaril'], ['3', 'Arkenstone', 'IsProbablyA', 'Silmaril']]

Categories