Print item of list and another item of list - python

Suppose I have two lists:
list1=[1, 2, 2, 12, 23]
list2=[2, 3, 5, 3, 4]
I want to arrange them side by side list1 and list2 using python
so:
| list1 | list2 |
|---|---|
| 1 | 2 |
| 2 | 3 |
| 2 | 5 |
| 12 | 3 |
| 23 | 4 |
but I want to remove the third row (2,5) and make it:
| list1 | list2 |
|---|---|
| 1 | 2 |
| 2 | 3,5 |
| 12 | 3 |
| 23 | 4 |

Using itertools.groupby:
from itertools import groupby
list1=[1, 2, 2, 12, 23]
list2=[2, 3, 5, 3, 4]
for key, value in groupby(zip(list1, list2), key=lambda x: x[0]):
print(f"{key} {','.join(str(y) for x, y in value)}")
or if you prefer using collections.defaultdict:
from collections import defaultdict
list1=[1,2,2,12,23]
list2=[2, 3, 5, 3, 4]
result = defaultdict(list)
for key, value in zip(list1, list2):
result[key].append(value)
for key, value in result.items():
print(f"{key} {','.join(map(str, value))}")
using only loops and built-in functions:
list1=[1,2,2,12,23]
list2=[2, 3, 5, 3, 4]
result={}
for key, value in zip(list1, list2):
result.setdefault(key, []).append(value)
for key, value in result.items():
print(f"{key} {','.join(map(str, value))}")
In all three cases the output is
1 2
2 3,5
12 3
23 4

You could zip the two lists together and append them to a dictionary. If one value of list 1 already exists as a key in the dictionary, you format the value as a string and append the next one to the same key. I just posted this since it might be easier to follow, however i really like the answer above from #buran.
list1 = [1, 2, 2, 12, 23]
list2 = [2, 3, 5, 3, 4]
combo_dict = {}
# zip both lists together and iterate over them
for key, val in zip(list1, list2):
"""if an item in list1 already exists in combo_dict,
format the respective overlapping value in list2 as a
string and append to the same key to the dict"""
if key in combo_dict.keys():
combo_dict[key] = ",".join([f"{combo_dict[key]}", f"{val}"])
"""if not, simply append the value of list 2 with the key of
list 1 to the dict (could also format this to a string)"""
else:
combo_dict[key] = val
# this is just for printing the dict to the console
no_out=[print(f"{key} {val}") for key, val in combo_dict.items()]

Related

Aggregate list dataframe in pandas using dedicated function

I have the following dataframe in pandas:
data = {'ID_1': {0: '10A00', 1: '10B00', 2: '20001', 3: '20001'},
'ID_2_LIST': {0: [20009, 30006], 1: [20001, 30006],
2: [30009, 30006], 3: [20001, 30003]},
'ID_OCCURRENCY_LIST': {0: [1, 2], 1: [5, 6], 2: [2, 4], 3: [1, 3]}}
# create df
df = pd.DataFrame(data)
| | ID_1 | ID_2_LIST | ID_OCCURRENCY_LIST |
|---:|:-------|:---------------|:---------------------|
| 0 | 10A00 | [20009, 30006] | [1, 2] |
| 1 | 10B00 | [20001, 30006] | [5, 6] |
| 2 | 20001 | [30009, 30006] | [2, 4] |
| 3 | 20001 | [20001, 30003] | [1, 3] |
I would aggregate by ID_1 field applying an external function (in order to identify similar ID_1, let's say "similarID(ID1,ID2)", which returns ID1 or ID2 according to some internal rules), re-generate the list of ID2 and sum the occurrencies for all the equal ID2.
The outcome should be:
**INDEX ID_1 ID_2_LIST ID_OCCURRENCY_LIST**
0 10A00 [20009,30006,20001] [1, 8, 5]
1 10B00 [20001,30006, 30003,20001] [5, 6, 4, 2]
1 20001 [30009,30006, 20001,30003] [2, 4, 1, 3]
EDIT
The code for the function is the following(s1=first string,c1=second string, p1=similarity percentage l1=confidence level, demeraulevenshtein is a literature function):
def pySimilar(s1,c1,p1,l1):
if s1 is None or c1 is None:
return 0
if len(s1)<=5 or len(c1)<=5:
return 0
s1=s1.strip()
c1=c1.strip()
s=s1
c=c1
if s1[3:len(s1)]==c1[3:len(c1)]:
return 1
if len(s1)>=len(c1):
ITERATIONLENGTH=len(c1)/2
else:
ITERATIONLENGTH=len(s1)/2
if len(s1)>=len(c1):
a=int(len(c1)/2)+1
if s1.find(c1[3:a])<0:
return 0
else:
b=int(len(s1)/2)+1
if c1.find(s1[3:b])<0:
return 0
v=[]
CNT=0
TMP=0
max_res=0
search=s1
while CNT < ITERATIONLENGTH:
TMP=(100-((pyDamerauLevenschtein(s[3:len(s)],c[3:len(c)]))*100)/(len(c)-3)) * ((len(search)-3)/(len(s1)-3))
v.append(TMP)
CNT=CNT+1
if TMP>max_res:
max_res = TMP
#s=s[0:len(s)-CNT]
search=s1[0:len(s1)-CNT]
s=s1[0:len(s1)-CNT]
c=c1[0:len(c1)-CNT]
if ((p1-(l1*p1/100)<=sum(v)/len(v) and sum(v)/len(v)<=p1+(l1*p1/100)) or sum(v)/len(v)>=p1+(l1*p1/100)) :
return 1
else:
return 0
I have implemented a function to be applied in the dataframe but it is very slow:
def aggregateListAndOccurrencies(list1,list2):
final = []
final_cnt = []
output = []
cnt_temp = 0
while list1:
elem = list1.pop(0)
cnt = list2.pop(0)
i=0
cnt_temp = cnt
for item in list1:
if pyMATCHSIMILARPN(elem,item,65,20)==1:
cnt_temp = list2[i]+cnt_temp
list1.pop(i)
list2.pop(i)
i+=1
final.append(elem)
final_cnt.append(cnt_temp)
output.append(final)
output.append(final_cnt)
return output
How could apply this in pandas? Any suggestions?
You can simply do a groupby over your ID_1 and just sum the ID_2_List and ID_OCCURRENCY_LIST columns:
df.groupby('ID_1').agg({'ID_2_LIST': 'sum', 'ID_OCCURRENCY_LIST': 'sum'})
if there's a spicific function you'd like the groupby to work with you can then you can use lambda to add it in the .agg:
df.groupby('ID_1').agg({'ID_2_LIST': 'sum', 'ID_OCCURRENCY_LIST': lambda x: ' '.join(x)})

Comparing keys of a dictionary in Python and extracting corresponding values

I have two dictionaries:-
dict_1 = {'a': ["['b','c','d','e']"],'b': ["['a','c','d','e']"],'p': ["['q','r','s']"]}
dict_2 = {'a': ["['x','y','z','n']"],'b': ["['u','v','w','x','y','z','n']"]}
I want to extract value of all the key for which exit in both dictionaries, such that at time I can get only one value of corresponding key at time from a dictionary.
Expected output
kindly help me to get the output.
I don't know what's a dataframe. Here is the code and sample output:
dict_one = {1: [1, 2, 3, 4]}
dict_two = {1: [7, 8, 9, 10]}
all_keys = set(dict_one).union(set(dict_two))
rows = []
for key in all_keys:
dict_one_values = []
if key in dict_one:
dict_one_values = dict_one[key]
dict_two_values = []
if key in dict_two:
dict_two_values = dict_two[key]
for i in range(min(len(dict_one_values), len(dict_two_values))):
rows.append([key, dict_one_values[i], dict_two_values[i]])
for row in rows:
print(*row)
output:
1 1 7
1 2 8
1 3 9
1 4 10
I will assume this input:
dict_1 = {'a': ['b','c','d','e'],'b': ['a','c','d','e'],'p': ['q','r','s']}
dict_2 = {'a': ['x','y','z','n'],'b': ['u','v','w','x','y','z','n']}
if not you can easily obtain it with dict_i = {k: literal_eval(v) for k,v in dict_i.items()}
You can use a comprehension to select the expected data, and use it to build your dataframe:
data = [(k, v1, v2) for k in dict_1 if k in dict_2
for v1, v2 in zip(dict_1[k], dict_2[k])]
df = pd.DataFrame(data)
it gives:
0 1 2
0 a b x
1 a c y
2 a d z
3 a e n
4 b a u
5 b c v
6 b d w
7 b e x

How to format a list in Python as a table

I have two separate lists that I want to format as a table.
keys = ('banana', 'broccoli', 'beans', 'apple')
values = (5, 1, 1, 1)
In the list above, it counts how many fruits there are available. For instance, there are 5 bananas and 1 broccoli. Note that the names of the fruit have different lengths. Preferably, I would like to not have to import any modules. Is there a way that I can format these lists so that it looks like:
'Banana' | 5
'Broccoli' | 1
'Beans' | 1
'Apple' | 1
I have already tried using this code:
row_format ="{:>15}" * (len(items) + 1)
print(row_format.format("", *items))
for keys, values in zip(items, keys):
print(row_format.format(values, *row))
Thanks for the help!
keys = ('banana', 'broccoli', 'beans', 'apple')
values = (5, 1, 1, 1)
max_length = len(max(keys, key=len))
for k, v in zip(keys, values):
print("{: <{width}} | {}".format(repr(k).title(), v, width=max_length + 2))
Prints:
'Banana' | 5
'Broccoli' | 1
'Beans' | 1
'Apple' | 1
you ca use:
width = max(len(w) for w in keys)
print(''.join(f'{k:{width}} | {v}\n' for k, v in zip(keys, values)))
# output:
# banana | 5
# broccoli | 1
# beans | 1
# apple | 1

Python: Efficient lookup by interval

I have a large lookup table where the key is an interval:
| min | max | value |
|-----|-----|---------|
| 0 | 3 | "Hello" |
| 4 | 5 | "World" |
| 6 | 6 | "!" |
| ... | ... | ... |
The goal is to create a lookup structure my_lookup that returns a value for each integer, depending on the range the integer is in.
For example: 2 -> "Hello", 3 -> "Hello", 4 -> "World".
Here is an implementation that does what I want:
d = {
(0, 3): "Hello",
(4, 5): "World",
(6, 6): "!"
}
def my_lookup(i: int) -> str:
for key, value in d.items():
if key[0] <= i <= key[1]:
return value
But looping over all entries seems inefficient (the actual lookup table contains 400.000 lines). Is there a faster way?
If your intervals are sorted (in ascending order), you can use bisect module (doc). The search is O(log n) instead of O(n):
min_lst = [0, 4, 6]
max_lst = [3, 5, 6]
values = ['Hello', 'World', '!']
import bisect
val = 2
idx = bisect.bisect_left(max_lst, val)
if idx < len(max_lst) and min_lst[idx] <= val <= max_lst[idx]:
print('Value found ->', values[idx])
else:
print('Value not found')
Prints:
Value found -> Hello

Counting choice streaks in Python

I have a dataset that looks like the following:
Subject | Session | Trial | Choice
--------+---------+-------+-------
1 | 1 | 1 | A
1 | 1 | 2 | B
1 | 1 | 3 | B
1 | 1 | 4 | B
1 | 1 | 5 | B
1 | 1 | 6 | A
2 | 1 | 1 | A
2 | 1 | 2 | A
2 | 1 | 3 | A
I would like to use a Python script to generate the following table:
Subject | Session | streak_count
--------+---------+-------------
1 | 1 | 3
2 | 1 | 1
Where streak_count is a count of the total number of choice streaks made by a given subject during a given session, and a streak is any number of choices of one particular item in a row (>0).
I've tried using some of the suggestions to similar questions here, but I'm having trouble figuring out how to count these instances, rather than measure their length, etc., which seem to be more common queries.
def count():
love = []
love1 = []
streak = -1
k = 0
session = 1
subject = raw_input("What is your subject? ")
trials = raw_input("How many trials do you wish to do? ")
trial = 0
for i in range(int(trials)):
choice = raw_input("What was the choice? ")
love.append(choice)
love1.append(choice)
trial += 1
print subject, trial, choice
if love[i] == love1[i-1]:
streak += 1
print subject, session, streak
This may be what you want it takes in how many trials you wish to do and whatever your subject is and if there is a streak it adds one. The reason streak starts at -1 is because when you put your first answer it adds one because of the negative index going back to its self.
I think this is what you are asking for;
import itertools
data = [
[1, 1, 1, 'A'],
[1, 1, 2, 'B'],
[1, 1, 3, 'B'],
[1, 1, 4, 'B'],
[1, 1, 5, 'B'],
[1, 1, 6, 'A'],
[2, 1, 1, 'A'],
[2, 1, 2, 'A'],
[2, 1, 3, 'A']
]
grouped = itertools.groupby(data, lambda x: x[0])
results = dict()
this, last = None, None
for key, group in grouped:
results[key] = 0
for c, d in enumerate(group):
this = d
streak = c == 0 or this[3] != last[3]]
if streak:
results[key] += 1
last = this
print results
This yields;
{1: 3, 2: 1}

Categories