parse string into list based on input list - python

I would like to write a function in python3 to parse a string based on the input list element. The following function works but is there a better way to do it?
def func(oStr, s_s):
if not oStr:
return s_s
elif '' in s_s:
return [oStr]
else:
for x in s_s:
st = oStr.find(x)
end = st + len(x)
res.append(oStr[st:end])
oStr = oStr.replace(x, '')
if oStr:
res.append(oStr)
return res
case 1
o_str = 'ABCNew York - Address'
s_str = ['ABC']
return ['ABC', 'New York - Address']
case 2
o_str = 'New York Friend Add | NumberABCNewYork Name | FirstName Last Name | time : Jan-31-2017'
s_str = ['New York Friend Add | Number', 'ABC', 'NewYork Name | FirstName Last Name | time: Jan-31-2017']
return ['New York Friend Add | Number', 'ABC', 'NewYork Name | FirstName Last Name | time: Jan-31-2017']
case 3
o_str = '-'
s_str = ['']
return ['-']
case 4
o_str = '1'
s_str = ['']
return ['1']
case 5
o_str = '1234Family-Name'
s_str = ['1234']
return ['1234', 'Family-Name']
case 6
o_str = ''
s_str = ['12345667', 'name']
return ['12345667', 'name']

To use a string like an array, you would just program it in the same way. For example
myStr="Hello, World!"
myString.insert(len(myString),"""Your character here""")
For your purposes .append() would work exactly the same way. Hope I helped.

Related

How to retrieve information in the first section of the raw data only by regular expressions?

Below is a sample of the raw data which my code will process by regular expressions:
raw_data = '''
name : John
age : 26
gender : male
occupation : teacher
Father
---------------------
name : Bill
age : 52
gender : male
Mother
---------------------
name : Mary
age : 48
gender : female
'''
I want to retrieve the following part of information from the raw data and store it in a dictionary:
dict(name = 'John', age = 26, gender = 'male', occupation = 'teacher')
However, when I run my code as follows, it does not work as I expect:
import re
p = re.compile('[^-]*?^([^:\-]+?):([^\r\n]*?)$', re.M)
rets = p.findall(raw_data)
infoAboutJohnAsDict = {}
if rets != []:
for ret in rets:
infoAboutJohnAsDict[ret[0]] = ret[1]
else:
print("Not match.")
print(f'rets = {rets}')
print(f'infoAboutJohnAsDict = {infoAboutJohnAsDict}')
Can anyone give me any suggestion about how I should modify my code to achieve what I intend to do?
Here is one approach using regular expressions. We can first trim off the latter portion of the input which you don't want using re.sub. Then, use re.findall to find all key value pairs for John, and convert to a dictionary.
raw_data = re.sub(r'\s+\w+\s+-+.*', '', raw_data, flags=re.S)
matches = re.findall(r'(\w+)\s*:\s*(\w+)', raw_data)
d = dict()
for m in matches:
d[m[0]] = m[1]
print(d)
# {'gender': 'male', 'age': '26', 'name': 'John', 'occupation': 'teacher'}

String matching and replace

I have a text file like this and i want in implement in python
Enter the Username"<Username>" and phonenumber"<phonenumber>"
Enter the origin"<origin>" and destination"<destination>"
Examples:
| Username | phonenumber | origin | destination|
| JOHN | 40256786 | NYC | LONDON |
i want to replace the string which are in <> and replace with actual data, and my output will look like this :
Enter the Username "JOHN" and phonenumber "40256786"
Enter the origin "NYC" and destination "LONDON"
Update
Try:
import re
text = []
data = []
with open('data.txt') as fp:
line = ''
for line in fp:
if line.startswith('Examples'):
break
text.append(line)
text = ''.join(text)
headers = re.split('\s*\|\s*', fp.readline())[1:-1]
for line in fp:
values = re.split('\s*\|\s*', line)[1:-1]
data.append(dict(zip(headers, values)))
for d in data:
print(re.sub(r'\<(?P<key>[^>]*)\>', lambda x: d[x.group('key')], text))
Output:
Enter the Username"JOHN" and phonenumber"40256786"
Enter the origin"NYC" and destination"LONDON"
Old answer
You can use plenty of text processors to substitute text by variables: string.Template ($), format strings ({ }), Jinja2 ({{ }}). If you can, change your delimiter:
Here an example of format strings:
text = '''\
Enter the Username "{Username}" and phonenumber "{phonenumber}"
Enter the origin "{origin}" and destination "{destination}"\
'''
data = {'Username': 'John', 'phonenumber': '40256786',
'origin': 'NYC', 'destination': 'LONDON'}
print(text.format(**data))
Output:
Enter the Username "John" and phonenumber "40256786"
Enter the origin "NYC" and destination "LONDON"
One way would be to split each line by the delimiting character |. Then you can set the variables for the string accordingly.
sample_line = '| JOHN | 40256786 | NYC | LONDON |'
sample_line = sample_line.split('|')
data = {
'Username': sample_line[1],
'phonenumber': sample_line[2],
'origin': sample_line[3],
'destination': sample_line[4]
}
text = '''\
Enter the Username "{Username}" and phonenumber "{phonenumber}"
Enter the origin "{origin}" and destination "{destination}"\
'''
print(text.format(**data))
Alternatively, you should be able to use something like csv.reader

Remove punctation from every value in Python dictionary

I have a long dictionary which looks like this:
name = 'Barack.'
name_last = 'Obama!'
street_name = "President Streeet?"
list_of_slot_names = {'name':name, 'name_last':name_last, 'street_name':street_name}
I want to remove the punctation for every slot (name, name_last,...).
I could do it this way:
name = name.translate(str.maketrans('', '', string.punctuation))
name_last = name_last.translate(str.maketrans('', '', string.punctuation))
street_name = street_name.translate(str.maketrans('', '', string.punctuation))
Do you know a shorter (more compact) way to write this?
Result:
>>> print(name, name_last, street_name)
>>> Barack Obama President Streeet
Use a loop / dictionary comprehension
{k: v.translate(str.maketrans('', '', string.punctuation)) for k, v in list_of_slot_names.items()}
You can either assign this back to list_of_slot_names if you want to overwrite existing values or assign to a new variable
You can also then print via
print(*list_of_slot_names.values())
name = 'Barack.'
name_last = 'Obama!'
empty_slot = None
street_name = "President Streeet?"
print([str_.strip('.?!') for str_ in (name, name_last, empty_slot, street_name) if str_ is not None])
-> Barack Obama President Streeet
Unless you also want to remove them from the middle. Then do this
import re
name = 'Barack.'
name_last = 'Obama!'
empty_slot = None
street_name = "President Streeet?"
print([re.sub('[.?!]+',"",str_) for str_ in (name, name_last, empty_slot, street_name) if str_ is not None])
import re, string
s = 'hell:o? wor!d.'
clean = re.sub(rf"[{string.punctuation}]", "", s)
print(clean)
output
hello world

python transform complex list of lists into a string

I have a complex list of lists that looks like that :
[[['MARIA DUPONT',
' infos : ',
[' age = 28',
' yeux = bleus',
' sexe = femme']],
[' + ']],
[['PATRICK MARTIN',
' infos : ',
[' age = 53',
' yeux = marrons',
' sexe = homme']],
[' + ']],
[['JULIE SMITH',
' infos : ',
[' age = 17',
'yeux = verts',
'sexe = femme']],
[' fin ']]]
I am trying to transform it into a string. At the end I want to print that :
MARIA DUPONT,
infos :
age = 28
yeux = bleus
sexe = femme
+
PATRICK MARTIN
infos :
age = 53
yeux = marrons
sexe = homme
+
JULIE SMITH
infos :
age = 17
yeux = verts
sexe = femme
fin
My real data are more complicated and I have lists into level 5.
So I am looking for a way to solve the problem I explained to be able to adapt it and apply it to my real data.
I am trying with
''.join(list)
and
''.join(x for x in list)
But in both cases I have the error TypeError: list indices must be integers or slices, not list
I've tryed other ways but now I'm confused and I didn't found a good solution to reach my goal.
Any help would be appreciated, and thanks in advance. (and sorry for my bad english!)
You can use str.join with a single pass over the lists:
data = [[['MARIA DUPONT', ' infos : ', [' age = 28', ' yeux = bleus', ' sexe = femme']], [' + ']], [['PATRICK MARTIN', ' infos : ', [' age = 53', ' yeux = marrons', ' sexe = homme']], [' + ']], [['JULIE SMITH', ' infos : ', [' age = 17', 'yeux = verts', 'sexe = femme']], [' fin ']]]
r = '\n'.join('\n'.join([a, b, *c, f'\n{k}\n']) for [a, b, c], [k] in data)
Output:
MARIA DUPONT
infos :
age = 28
yeux = bleus
sexe = femme
+
PATRICK MARTIN
infos :
age = 53
yeux = marrons
sexe = homme
+
JULIE SMITH
infos :
age = 17
yeux = verts
sexe = femme
fin
If your lists are arbitrarily nested, then you can use recursion with a generator:
def flatten(d):
if isinstance(d, str):
yield d
else:
yield from [i for b in d for i in flatten(b)]
print('\n'.join(flatten(data)))
.join() won't work with a list in the list. I can offer you a solution based on recursion.
def list_to_str(_list):
result = ""
if isinstance(_list, list):
for l in _list:
result += list_to_str(l)
else:
result += _list
return result
result_string = list_to_str(your_list)
print(result_string)
I can't tell if you have a list with varying levels of lists but if so, you would probably need a conditional to see if the list goes further and recursively iterate the list.
def convert_list(dataset):
result = ''
for element in dataset:
if isinstance(element, list):
result += convert_list(element)
else:
result += str(element)
return result
This will not print the newlines you want but it does return the list as a string.
Write a recursive function to get inside your lists like below:
def print_data(input_list):
for obj in input_list:
if isinstance(obj, list):
print_data(obj)
else:
print(obj)
input_list = [[['MARIA DUPONT',
' infos : ',
[' age = 28',
' yeux = bleus',
' sexe = femme']],
[' + ']],
[['PATRICK MARTIN',
' infos : ',
[' age = 53',
' yeux = marrons',
' sexe = homme']],
[' + ']],
[['JULIE SMITH',
' infos : ',
[' age = 17',
'yeux = verts',
'sexe = femme']],
[' fin ']]]
print_data(input_list)

How to distribute comma separated element to form a list in python

How to extract/split multi-line comment to make a new list
clientInfo="""James,Jose,664 New Avenue,New Orleans,Orleans,LA,8/27/200,123,jjose#gmail.com,;
Shenna,Laureles, 288 Livinghood Heights,Brighton,Livingston,MI,2/19/75,laureles9219#yahoo.com,;
"""
into this kind of list
f_name = ["james","sheena"]
l_name = ["jose","Laureles"]
strt = ["664 New Avenue","288 Livinghood Heights"]
cty = ["New Orleans","Brighton"]
state = ["New Orleans","Livingston"]
If the order is always same. You could do something like this;
f_name = []
l_name = []
strt = []
cty = []
state = []
for client in clientData.split(";\n "):
client_ = client.split(",")
f_name.append(client_[0])
l_name.append(client_[1])
strt.append(client_[2])
cty.append(client_[3])
state.append(client_[4])
I could add some exception handling to handle the ; at the end of your string but, leaving that to you.
You can use split and zip.
def extract(string):
lines = string.split(";")
split_lines = tuple(map(lambda line: line.split(","), lines))
no_space1 = tuple(map(lambda item: item.strip(), split_lines[0]))
no_space2 = tuple(map(lambda item: item.strip(), split_lines[1]))
return list(zip(no_space1, no_space2))
This will produce
[('James', 'Shenna'), ('Jose', 'Laureles'), ('664 New Avenue', '288 Livinghood Heights'), ('New Orleans', 'Brighton'), ('Orleans', 'Living
ston'), ('LA', 'MI'), ('8/27/200', '2/19/75'), ('123', 'laureles9219#yahoo.com'), ('jjose#gmail.com', '')]
It has some tuples at the end you didn't ask for, but its relatively good. The no_space 1 and 2 lines are a bit repetitive, but cramming them into one line is worse in my opinion.
You can try:
clientData = """James,Jose,664 New Avenue,New Orleans,Orleans,LA,8/27/200,123,jjose#gmail.com,;
Shenna,Laureles, 288 Livinghood Heights,Brighton,Livingston,MI,2/19/75,laureles9219#yahoo.com,;
"""
data = clientData.split(";\n")
f_name = []
l_name = []
strt = []
cty = []
state = []
for data_line in data:
data_line = data_line.strip()
if len(data_line) >= 5:
line_info = data_line.split(",")
f_name.append(line_info[0].strip())
l_name.append(line_info[1].strip())
strt.append(line_info[2].strip())
cty.append(line_info[3].strip())
state.append(line_info[4].strip())
print(f_name)
print(l_name)
print(strt)
print(cty)
print(state)
Output:
['James', 'Shenna']
['Jose', 'Laureles']
['664 New Avenue', '288 Livinghood Heights']
['New Orleans', 'Brighton']
['Orleans', 'Livingston']

Categories