Created nested/recursive list - python

How can I create a list recursively?
I have this list:
l = ['a', 'b', 'new', 'c', 'd', 'new', 'z', 'x', 'c', 'fin', 'f', 'fin',
'g', 'l', 'new', 'z', 'x', 'c', 'fin', 'j']
The expected output is:
r = ['a', 'b', ['c', 'd', ['z', 'x', 'c'] 'f'], 'g', 'l', ['z', 'x', 'c'] 'j']
What I have tried so far:
def asd(l, index=0):
r = []
for i in l[index:]:
index += 1
if i == 'new':
i, index = asd(l, index)
r.append(i)
if i == 'fin':
return r
return r, index
r, index = asd(l)
I cannot understand how to make it work. Can anyone help me?

This is a non-recursive solution that can create your list, parsing in one pass without any need for costly index() operations:
l = ['a', 'b', 'new', 'c', 'd', 'new', 'f', 'fin', 'g', 'fin', 'j']
rv = []
curr = [rv] # things are always added to the last element if not 'fin' or 'new'
for elem in l:
if elem == "new":
# create a new list, put it at end of curr
curr.append([])
# add that list to the one before
curr[-2].append(curr[-1])
elif elem == "fin":
# done, remove from curr
curr.pop()
else:
curr[-1].append(elem)
print(rv)
Output:
['a', 'b', ['c', 'd', ['f'], 'g'], 'j']
l = ['a', 'b', 'new', '1', '2', '3', 'fin', 'c', 'new', 'x', 'y', 'z', 'fin',]
leads to
['a', 'b', ['1', '2', '3'], 'c', ['x', 'y', 'z']]
You need to foolproof it against unbalanced / incorrect new/fin's
Edited to make it more concise after Matthieu's comment.

Here is a straight forward recursive solution, using a deque as a stack data structure from which you can popleft the leftmost element in O(1).
Algorithm
from collections import deque
def nest(lst):
return _nest(deque(lst))
def _nest(deq):
result = []
while deq:
x = deq.popleft()
if x == 'fin':
break
elif x == 'new':
result.append(_nest(deq))
else:
result.append(x)
return result
Tests
tests = [
[],
[1, 2, 3],
[1, 2, 'new', 3, 4, 'fin', 5],
[1, 2, 'new', 3, 4, 'fin', 5, 6, 'new', 7, 'fin'],
['new', 'fin', 'new', 'fin', 'new', 'new', 'fin', 'fin'],
['new', 1, 2, 'fin'],
[1, 2, 3, 'new', 4, 'new', 5, 6, 'fin', 7, 8, 'fin', 9, 10, 'new', 11, 'fin', 12, 13]
]
for test in tests:
print(nest(test))
Output
[]
[1, 2, 3]
[1, 2, [3, 4], 5]
[1, 2, [3, 4], 5, 6, [7]]
[[], [], [[]]]
[[1, 2]]
[1, 2, 3, [4, [5, 6], 7, 8], 9, 10, [11], 12, 13]

You can use a stack instead and go through the list and using it:
def parse(l):
stack = [[]]
for i in l:
if i == "new":
stack.append([])
elif i == "fin":
pop = stack.pop()
stack[-1].append(pop)
else:
stack[-1].append(i)
return stack[0]

Recursive alternative :
def asd(l):
if 'new' in l:
index_new = l.index('new')
keyword = 1
for index_fin,e in enumerate(l[index_new+1:], index_new+1):
if e == 'new':
keyword += 1
elif e == 'fin':
keyword -=1
if not keyword:
break
return l[:index_new] + [asd(l[index_new+1:index_fin])] + asd(l[index_fin+1:])
else:
return l
Input :
['a', 'b', 'new', 'c', 'd', 'new', 'z', 'x', 'c', 'fin', 'f', 'fin',
'g', 'l', 'new', 'z', 'x', 'c', 'fin', 'j']
Output :
['a', 'b', ['c', 'd', ['z', 'x', 'c'], 'f'], 'g', 'l', ['z', 'x', 'c'], 'j']

Related

find max duplicate values in a list

I have a file containing multiple lines in format student code and followed by some answer. e.g
N00000047,B,,D,C,C,B,D,D,C,C,D,,A,B,D,C,,D,A,C,,D,B,D,C
N00000048,B,A,D,D,C,B,,D,C,C,D,B,A,B,A,D,B,D,A,C,A,A,B,D,D
N00000049,A,,D,D,C,B,D,,C,C,D,B,,B,A,C,C,D,A,C,A,A,B,D,D
N00000050,,C,,D,,D,D,A,C,A,A,B,A,B,A,D,B,D,A,C,D,A,B,D,D
N00000051,B,A,B,,C,B,D,A,C,C,D,D,A,B,A,C,B,C,A,,A,A,B,D,B
N00000052,B,A,D,D,,B,D,A,D,,D,B,A,B,A,C,B,C,A,C,A,A,B,D,D
N00000053,B,A,D,D,C,B,D,A,C,C,D,B,B,B,C,C,B,D,A,C,A,C,A,D,D
And now I have to find which is the most question was skipped by students by order which question, how many student skipped and how many % student skipped that question.
I was split then make a loop and add every entry of skipped question in a list and then got stuck in find the max duplicates values in a list (it can be more than 1 output).
This is some expected output:
Question that most people answer incorrectly: 10 - 4 - 0.20, 14 - 4 - 0.20, 16 - 4 - 0.20, 19 - 4 - 0.20, 22 - 4 - 0.20. In format : a - b - c which a is question number, b is how much student was skipped, c is it take how many percentage of total student in class. There are 3 question have the most skipped is 10, 14, 19 and 22 and they all have 4 skipped.
Edited:
I put all skipped question in a list and count for which question have a largest duplicate like this:
def find_max_count(list):
item_with_max_count = []
max_count = 0
for item in list:
item_count = list.count(item)
if item_count > max_count:
max_count = list.count
for item1 in list:
if list.count(item1) == max_count:
item_with_max_count.append(item1)
return item_with_max_count
but there is an error:
TypeError: '>' not supported between instances of 'int' and 'builtin_function_or_method'
Start by accumulating a dictionary of all responses to each question and a list of all skipped answers:
from collections import defaultdict
responses = defaultdict(list) # all responses to a given question
skipped = [] # all skiped answers
for record in data.splitlines():
student_id, *answers = record.split(',')
for question_number, answer in enumerate(answers, start=1):
responses[question_number].append(answer)
if answer == '':
skipped.append(question_number)
Next perform the analysis:
from statistics import multimode
print('Most skipped questions:', multimode(skipped))
print('Answer for questions with more than two or more skips')
for question, answers in responses.items():
if answers.count('') >= 2:
print(f'Question {question}: {answers}')
This outputs:
Most skipped questions: [2, 5]
Answer for questions with more than two or more skips
Question 2: ['', 'A', '', 'C', 'A', 'A', 'A']
Question 5: ['C', 'C', 'C', '', 'C', '', 'C']
I'm certain this is what you wanted (a target output wasn't shown), but this should get you started the key techniques for analysis. In particular, the multimode function is super helpful in identifying most frequent occurrences including ties for first place. Also defaultdict is super useful for transposing the data from answers by student to answers by question.
Let's get a dictionary with the student id and the answers.
data = """
N00000047,B,,D,C,C,B,D,D,C,C,D,,A,B,D,C,,D,A,C,,D,B,D,C
N00000048,B,A,D,D,C,B,,D,C,C,D,B,A,B,A,D,B,D,A,C,A,A,B,D,D
N00000049,A,,D,D,C,B,D,,C,C,D,B,,B,A,C,C,D,A,C,A,A,B,D,D
N00000050,,C,,D,,D,D,A,C,A,A,B,A,B,A,D,B,D,A,C,D,A,B,D,D
N00000051,B,A,B,,C,B,D,A,C,C,D,D,A,B,A,C,B,C,A,,A,A,B,D,B
N00000052,B,A,D,D,,B,D,A,D,,D,B,A,B,A,C,B,C,A,C,A,A,B,D,D
N00000053,B,A,D,D,C,B,D,A,C,C,D,B,B,B,C,C,B,D,A,C,A,C,A,D,D
"""
info = {s: a
for line in data.strip().split('\n')
for s, *a in [line.split(',')]}
# {'N00000047': ['B', '', 'D', 'C', 'C', 'B', 'D', 'D', 'C', 'C', 'D', '', 'A', 'B', 'D', 'C', '', 'D', 'A', 'C', '', 'D', 'B', 'D', 'C'],
# 'N00000048': ['B', 'A', 'D', 'D', 'C', 'B', '', 'D', 'C', 'C', 'D', 'B', 'A', 'B', 'A', 'D', 'B', 'D', 'A', 'C', 'A', 'A', 'B', 'D', 'D'],
# 'N00000049': ['A', '', 'D', 'D', 'C', 'B', 'D', '', 'C', 'C', 'D', 'B', '', 'B', 'A', 'C', 'C', 'D', 'A', 'C', 'A', 'A', 'B', 'D', 'D'],
# 'N00000050': ['', 'C', '', 'D', '', 'D', 'D', 'A', 'C', 'A', 'A', 'B', 'A', 'B', 'A', 'D', 'B', 'D', 'A', 'C', 'D', 'A', 'B', 'D', 'D'],
# 'N00000051': ['B', 'A', 'B', '', 'C', 'B', 'D', 'A', 'C', 'C', 'D', 'D', 'A', 'B', 'A', 'C', 'B', 'C', 'A', '', 'A', 'A', 'B', 'D', 'B'],
# 'N00000052': ['B', 'A', 'D', 'D', '', 'B', 'D', 'A', 'D', '', 'D', 'B', 'A', 'B', 'A', 'C', 'B', 'C', 'A', 'C', 'A', 'A', 'B', 'D', 'D'],
# 'N00000053': ['B', 'A', 'D', 'D', 'C', 'B', 'D', 'A', 'C', 'C', 'D', 'B', 'B', 'B', 'C', 'C', 'B', 'D', 'A', 'C', 'A', 'C', 'A', 'D', 'D']}
Now, we can use collections.Counter to count up answers.
from collections import Counter
info = {s: Counter(a)
for line in data.strip().split('\n')
for s, *a in [line.split(',')]}
# {'N00000047': Counter({'D': 8, 'C': 7, 'B': 4, '': 4, 'A': 2}),
# 'N00000048': Counter({'D': 8, 'B': 6, 'A': 6, 'C': 4, '': 1}),
# 'N00000049': Counter({'D': 7, 'C': 6, 'A': 5, 'B': 4, '': 3}),
# 'N00000050': Counter({'D': 8, 'A': 7, 'B': 4, '': 3, 'C': 3}),
# 'N00000051': Counter({'B': 7, 'A': 7, 'C': 5, 'D': 4, '': 2}),
# 'N00000052': Counter({'A': 7, 'D': 7, 'B': 6, 'C': 3, '': 2}),
# 'N00000053': Counter({'D': 7, 'C': 7, 'B': 6, 'A': 5})}
From here, finding the statistical data you're looking for should be much easier. For instance:
(Requires Python 3.8+ for := operator.)
{q: {'all': (c := Counter(a)),
'skipped': (s := c['']),
'percentage': s / len(a)}
for line in data.strip().split('\n')
for q, *a in [line.split(',')]}
# {'N00000047': {'all': Counter({'D': 8, 'C': 7, 'B': 4, '': 4, 'A': 2}), 'skipped': 4, 'percentage': 0.16},
# 'N00000048': {'all': Counter({'D': 8, 'B': 6, 'A': 6, 'C': 4, '': 1}), 'skipped': 1, 'percentage': 0.04},
# 'N00000049': {'all': Counter({'D': 7, 'C': 6, 'A': 5, 'B': 4, '': 3}), 'skipped': 3, 'percentage': 0.12},
# 'N00000050': {'all': Counter({'D': 8, 'A': 7, 'B': 4, '': 3, 'C': 3}), 'skipped': 3, 'percentage': 0.12},
# 'N00000051': {'all': Counter({'B': 7, 'A': 7, 'C': 5, 'D': 4, '': 2}), 'skipped': 2, 'percentage': 0.08},
# 'N00000052': {'all': Counter({'A': 7, 'D': 7, 'B': 6, 'C': 3, '': 2}), 'skipped': 2, 'percentage': 0.08},
# 'N00000053': {'all': Counter({'D': 7, 'C': 7, 'B': 6, 'A': 5}), 'skipped': 0, 'percentage': 0.0}}
Something like?:
cat skipped.csv
N00000047,B,,D,C,C,B,D,D,C,C,D,,A,B,D,C,,D,A,C,,D,B,D,C
N00000048,B,A,D,D,C,B,,D,C,C,D,B,A,B,A,D,B,D,A,C,A,A,B,D,D
N00000049,A,,D,D,C,B,D,,C,C,D,B,,B,A,C,C,D,A,C,A,A,B,D,D
N00000050,,C,,D,,D,D,A,C,A,A,B,A,B,A,D,B,D,A,C,D,A,B,D,D
N00000051,B,A,B,,C,B,D,A,C,C,D,D,A,B,A,C,B,C,A,,A,A,B,D,B
N00000052,B,A,D,D,,B,D,A,D,,D,B,A,B,A,C,B,C,A,C,A,A,B,D,D
N00000053,B,A,D,D,C,B,D,A,C,C,D,B,B,B,C,C,B,D,A,C,A,C,A,D,D
import csv
from collections import Counter
with open("skipped.csv", "r" , newline="") as csv_file:
reader = csv.reader(csv_file)
l = []
for line in reader:
d = {"q": line.pop(0)}
ct = Counter(line)
# In Python 3.10+ you can do ct.total() instead of below.
q_sum = sum(ct.values())
skipped = ct['']
perc = skipped/q_sum
d.update({"skipped": skipped, "percentage": perc})
l.append(d)
l.sort(key=lambda x: x['skipped'], reverse=True)
l
[{'q': 'N00000047', 'skipped': 4, 'percentage': 0.16},
{'q': 'N00000049', 'skipped': 3, 'percentage': 0.12},
{'q': 'N00000050', 'skipped': 3, 'percentage': 0.12},
{'q': 'N00000051', 'skipped': 2, 'percentage': 0.08},
{'q': 'N00000052', 'skipped': 2, 'percentage': 0.08},
{'q': 'N00000048', 'skipped': 1, 'percentage': 0.04},
{'q': 'N00000053', 'skipped': 0, 'percentage': 0.0}]

Merge two lists alternatively n elements

I have two lists with same length:
list_1 = [1,2,3,4,5,6]
list_2 = ['a','b','c','d','e','f']
I need to merge these lists based on n as below:
if n = 1: result = [1,'a',2,'b',3,'c',4,'d',5,'e',6,'f']
if n = 2: result = [1,2,'a','b',3,4,'c','d',5,6,'e','f']
if n = 3: result = [1,2,3,'a','b','c',4,5,6,'d','e','f']
if n = 4: result = [1,2,3,4,'a','b','c','d',5,6,'e','f'], and so on
Is there any pythonic way to achieve this?
So far I only know to do if n = 1 with list comprehension:
result = [x for sublist in zip(list_1, list_2) for x in sublist]
I don't know how to do it dynamically.
Try itertools(zip_longest and chain.from_iterable) with a list-comprehension, one liner:
import itertools
def merge(l1, l2, n):
return [j for i in zip(itertools.zip_longest(*[iter(l1)]*n), itertools.zip_longest(*[iter(l2)]*n)) for j in itertools.chain.from_iterable(i) if j]
list_1 = [1, 2, 3, 4, 5, 6]
list_2 = ["a", "b", "c", "d", "e", "f"]
print(merge(list_1, list_2, 2))
# [1, 2, 'a', 'b', 3, 4, 'c', 'd', 5, 6, 'e', 'f']
print(merge(list_1, list_2, 3))
# [1, 2, 3, 'a', 'b', 'c', 4, 5, 6, 'd', 'e', 'f']
print(merge(list_1, list_2, 4))
# [1, 2, 3, 4, 'a', 'b', 'c', 'd', 5, 6, 'e', 'f']
Some possible reference:How does zip(*[iter(s)]*n) work in Python?
Alternative answer using generators:
list_1 = [1,2,3,4,5,6]
list_2 = ['a','b','c','d','e','f']
def merge(a, b, n):
a_index = 0
b_index = 0
while(a_index < len(a)):
for _ in range(n):
yield a[a_index]
a_index +=1
for _ in range(n):
yield b[b_index]
b_index += 1
result = [x for x in merge(list_1, list_2, 1)]
assert result == [1, 'a', 2, 'b', 3, 'c', 4, 'd', 5, 'e', 6, 'f']
result = [x for x in merge(list_1, list_2, 2)]
assert result == [1, 2, 'a', 'b', 3, 4, 'c', 'd', 5, 6, 'e', 'f']
result = [x for x in merge(list_1, list_2, 3)]
assert result == [1,2,3,'a','b','c',4,5,6,'d','e','f']
Only works for lists with the same size and probably has some more pitfalls.
Edit: just for fun, here's a version without managing an index.
def merge(a, b, n):
gen_a = (x for x in a)
gen_b = (x for x in b)
try:
while True:
for _ in range(n):
yield next(gen_a)
for _ in range(n):
yield next(gen_b)
except StopIteration:
pass
def main(order, iArr, sArr):
arr = []
for type in order:
if type == 'i':
arr.append(iArr[0])
iArr.remove(iArr[0])
else:
arr.append(sArr[0])
sArr.remove(sArr[0])
return arr
order1 = ['i', 's', 'i', 's', 'i', 's', 'i', 's', 'i', 's', 'i', 's']
order2 = ['i', 'i', 's', 's', 'i', 'i', 's', 's', 'i', 'i', 's', 's']
order3 = ['i', 'i', 'i', 's', 's', 's', 'i', 'i', 'i', 's', 's', 's']
list_1 = [1,2,3,4,5,6]
list_2 = ['a', 'b', 'c', 'd', 'e', 'f']
print(main(order2, list_1, list_2))

Changing 2-dimensional list to standard matrix form

org = [['A', 'a', 1],
['A', 'b', 2],
['A', 'c', 3],
['B', 'a', 4],
['B', 'b', 5],
['B', 'c', 6],
['C', 'a', 7],
['C', 'b', 8],
['C', 'c', 9]]
I want to change the 'org' to the standard matrix form like below.
transform = [['\t','A', 'B', 'C'],
['a', 1, 4, 7],
['b', 2, 5, 8],
['c', 3, 6, 9]]
I made a small function that converts this.
The code I wrote is below:
import numpy as np
def matrix(li):
column = ['\t']
row = []
result = []
rest = []
for i in li:
if i[0] not in column:
column.append(i[0])
if i[1] not in row:
row.append(i[1])
result.append(column)
for i in li:
for r in row:
if r == i[1]:
rest.append([i[2]])
rest = np.array(rest).reshape((len(row),len(column)-1)).tolist()
for i in range(len(rest)):
rest[i] = [row[i]]+rest[i]
result += rest
for i in result:
print(i)
matrix(org)
The result was this:
>>>['\t', 'school', 'kids', 'really']
[72, 0.008962252017017516, 0.04770759762717251, 0.08993156334317577]
[224, 0.004180594204995023, 0.04450803342634945, 0.04195010047081213]
[385, 0.0021807662921382335, 0.023217182598008267, 0.06564858527712682]
I don't think this is efficient since I use so many for loops.
Is there any efficient way to do this?
Since you are using 3rd party libraries, this is a task well suited for pandas.
There is some messy, but not inefficient, work to incorporate index and columns as per your requirement.
org = [['A', 'a', 1],
['A', 'b', 2],
['A', 'c', 3],
['B', 'a', 4],
['B', 'b', 5],
['B', 'c', 6],
['C', 'a', 7],
['C', 'b', 8],
['C', 'c', 9]]
df = pd.DataFrame(org)
pvt = df.pivot_table(index=0, columns=1, values=2)
cols = ['\t'] + pvt.columns.tolist()
res = pvt.values.T.tolist()
res.insert(0, pvt.index.tolist())
res = [[i]+j for i, j in zip(cols, res)]
print(res)
[['\t', 'A', 'B', 'C'],
['a', 1, 4, 7],
['b', 2, 5, 8],
['c', 3, 6, 9]]
Here's another "manual" way using only numpy:
org_arr = np.array(org)
key1 = np.unique(org_arr[:,0])
key2 = np.unique(org_arr[:,1])
values = org_arr[:,2].reshape((len(key1),len(key2))).transpose()
np.block([
["\t", key1 ],
[key2[:,None], values]
])
""" # alternatively, for numpy < 1.13.0
np.vstack((
np.hstack(("\t", key1)),
np.hstack((key2[:, None], values))
))
"""
For simplicity, it requires the input matrix to be strictly ordered (first col is major and ascending ...).
Output:
Out[58]:
array([['\t', 'A', 'B', 'C'],
['a', '1', '4', '7'],
['b', '2', '5', '8'],
['c', '3', '6', '9']],
dtype='<U1')

Python merge all child list into parent list and remove duplicates

I have a huge list of lists.
I want to Merge all child lists to parent list
and remove duplicates item from parent list after merge.
What is the optimized way to do this?
For Example:
x = [['a', 'b', 'c', 2, 4], ['x', 1, 2, 3, 'z'], ['z', 'b', 'y', 'a' 'x']]
How we can get the value of x like:
['a', 'b', 'c', 1, 2, 3, 4, 'z', 'y', 'x']
Use set and chain:
x = [['a', 'b', 'c', 2, 4], ['x', 1, 2, 3, 'z'], ['z', 'b', 'y', 'a' 'x']]
from itertools import chain
result = list(set(chain.from_iterable(x)))
print(result)
Use set
x = [['a', 'b', 'c', 2, 4], ['x', 1, 2, 3, 'z'], ['z', 'b', 'y', 'a' 'x']]
>>> list(set([item for sublist in x for item in sublist]))
[1, 2, 3, 4, 'z', 'ax', 'a', 'b', 'c', 'x', 'y']
first you can convert the list of list into a one list and than apply set to that list.
x = [['a', 'b', 'c', 2, 4], ['x', 1, 2, 3, 'z'], ['z', 'b', 'y', 'a' 'x']]
new_ls=[]
for ls in x:
new_ls.extend(ls)
print(list(set(new_ls))
output:
[1, 2, 3, 4, 'ax', 'b', 'y', 'x', 'c', 'z', 'a']

Natural Join Implementation Python

I am working on implementing natural join in python. The first two lines show the tables attributes and the next two lines each tables' tuples or rows.
Expected Output:
[['A', 1, 'A', 'a', 'A'],
['A', 1, 'A', 'a', 'Y'],
['A', 1, 'Y', 'a', 'A'],
['A', 1, 'Y', 'a', 'Y'],
['S', 2, 'B', 'b', 'S']]
And what I got:
[['A', 1, 'A', 'a', 'A', 'Y'],
['A', 1, 'A', 'a', 'A', 'Y']]
I have looked through the code and everything seems to be right, I would appreciate any help.
t1atts = ('A', 'B', 'C', 'D')
t2atts = ('B', 'D', 'E')
t1tuples = [['A', 1, 'A', 'a'],
['B', 2, 'Y', 'a'],
['Y', 4, 'B', 'b'],
['A', 1, 'Y', 'a'],
['S', 2, 'B', 'b']]
t2tuples = [[1, 'a', 'A'],
[3, 'a', 'B'],
[1, 'a', 'Y'],
[2, 'b', 'S'],
[3, 'b', 'E']]
def findindices(t1atts, t2atts):
t1index=[]
t2index=[]
for index, att in enumerate(t1atts):
for index2, att2 in enumerate(t2atts):
if att == att2:
t1index.append(index)
t2index.append(index2)
return t1index, t2index
def main():
tpl=0; tpl2=0; i=0; j=0; count=0; result=[]
t1index, t2index = findindices(t1atts, t2atts)
for tpl in t1tuples:
while tpl2 in range(len(t2tuples)):
i=0; j=0
while (i in range(len(t1index))) and (j in range(len(t2index))):
if tpl[t1index[i]] != t2tuples[tpl2][t2index[j]]:
i=len(t1index)
j=len(t1index)
else:
count+=1
i+=1
j+=1
if count == len(t1index):
extravals = [val for index, val in enumerate(t2tuples[tpl2]) if index not in t2index]
temp = tpl
tpl += extravals
result.append(tpl)
tpl = temp
count=0
tpl2+=1
print result
Here's what I came up with. I'd do some more refactoring, etc before calling it done
import pprint
t1atts = ('A', 'B', 'C', 'D')
t2atts = ('B', 'D', 'E')
t1tuples = [
['A', 1, 'A', 'a'],
['B', 2, 'Y', 'a'],
['Y', 4, 'B', 'b'],
['A', 1, 'Y', 'a'],
['S', 2, 'B', 'b']]
t2tuples = [
[1, 'a', 'A'],
[3, 'a', 'B'],
[1, 'a', 'Y'],
[2, 'b', 'S'],
[3, 'b', 'E']]
t1columns = set(t1atts)
t2columns = set(t2atts)
t1map = {k: i for i, k in enumerate(t1atts)}
t2map = {k: i for i, k in enumerate(t2atts)}
join_on = t1columns & t2columns
diff = t2columns - join_on
def match(row1, row2):
return all(row1[t1map[rn]] == row2[t2map[rn]] for rn in join_on)
results = []
for t1row in t1tuples:
for t2row in t2tuples:
if match(t1row, t2row):
row = t1row[:]
for rn in diff:
row.append(t2row[t2map[rn]])
results.append(row)
pprint.pprint(results)
And I get the expected results:
[['A', 1, 'A', 'a', 'A'],
['A', 1, 'A', 'a', 'Y'],
['A', 1, 'Y', 'a', 'A'],
['A', 1, 'Y', 'a', 'Y'],
['S', 2, 'B', 'b', 'S']]
Ok, here is the solution please verify and let me know if it works for you:
I change little bit of naming to understood myself:
#!/usr/bin/python
table1 = ('A', 'B', 'C', 'D')
table2 = ('B', 'D', 'E')
row1 = [['A', 1, 'A', 'a'],
['B', 2, 'Y', 'a'],
['Y', 4, 'B', 'b'],
['A', 1, 'Y', 'a'],
['S', 2, 'B', 'b']]
row2 = [[1, 'a', 'A'],
[3, 'a', 'B'],
[1, 'a', 'Y'],
[2, 'b', 'S'],
[3, 'b', 'E']]
def findindices(table1, table2):
inter = set(table1).intersection(set(table2))
tup_index1 = [table1.index(x) for x in inter]
tup_index2 = [table2.index(x) for x in inter]]
return tup_index1, tup_index2
def main():
final_lol = list()
tup_index1, tup_index2 = findindices(table1, table2)
merge_tup = zip(tup_index1, tup_index2)
for tup1 in row1:
for tup2 in row2:
for m in merge_tup:
if tup1[m[0]] != tup2[m[1]]:
break
else:
ls = []
ls.extend(tup1)
ls.append(tup2[-1])
final_lol.append(ls)
return final_lol
if __name__ == '__main__':
import pprint
pprint.pprint(main())
Output:
[['A', 1, 'A', 'a', 'A'],
['A', 1, 'A', 'a', 'Y'],
['A', 1, 'Y', 'a', 'A'],
['A', 1, 'Y', 'a', 'Y'],
['S', 2, 'B', 'b', 'S']]

Categories