List of tuples from heirarchical nested lists - python

Having an outer list of inner elements, each said inner element being
a flat/nested list. Each said inner list has a nesting structure that matches the inner list in the preceding outer cell. Meaning that each primitive value in a list either corresponds to a primitive value or to a list - in the following cell list (recursively applied). Thus, each inner list has a depth that is equal to or exceeds by 1 the depth of the element in the preceding cell.
(Notice that the first cell element can start as a nested list of any depth).
Example of the above:
[
[[1, 2, [3, 4]], 1 ],
[[3, [4, 5], [6, 7]], [5, 4] ],
[[5, [6, 7], [8, 9]], [7, [8, 6]] ],
]
It is desired to unfold the nested lists into a list of tuples, where each value is combined either with the parent value, or the corresponding list element if parent is list (with order being maintained). So for the above example list, the output should be:
[
(1, 3, 5),
(2, 4, 6),
(2, 5, 7),
(3, 6, 8),
(4, 7, 9),
(1, 5, 7),
(1, 4, 8),
(1, 4, 6),
]
Note: this question is an expansion on a previous question here, but unlike the linked question, the desired tuples here are flat.

Ok, how about this:
x = [
[[1, 2, [3, 4]], 1 ],
[[3, [4, 5], [6, 7]], [5, 4] ],
[[5, [6, 7], [8, 9]], [7, [8, 6]] ],
]
from collections import defaultdict
def g(x):
paths = defaultdict(lambda: [])
def calculate_paths(item, counts):
if type(item) is list:
for i, el in enumerate(item):
calculate_paths(el, counts + (i,))
else:
paths[counts].append(item)
def recreate_values(k, initial_len, desired_len):
if len(paths[k]) + initial_len == desired_len:
yield paths[k]
else:
for ks in keysets:
if len(ks) > len(k) and ks[0:len(k)] == k:
for ks1 in recreate_values(ks, initial_len + len(paths[k]), desired_len):
yield paths[k] + ks1
for lst in x:
calculate_paths(lst, (0,))
keysets = sorted(list(paths.keys()))
for k in keysets:
yield from recreate_values(k, 0, len(x))
>>> import pprint
>>> pprint.pprint(list(g(x)))
[[1, 3, 5],
[2, 4, 6],
[2, 5, 7],
[3, 6, 8],
[4, 7, 9],
[1, 5, 7],
[1, 4, 8],
[1, 4, 6]]
Works by creating a "path" for each number in the structure, which is a tuple which identifies how it fits in its particular row.
(Original attempt):
If it's always three levels, then something like this?
def listify(lst):
max_len = max(len(item) if type(item) is list else 1 for item in lst)
yield from zip(*[item if type(item) is list else [item] * max_len for item in lst])
def f():
for i in listify(x):
for j in listify(i):
for k in listify(j):
yield k
>>> list(f())

This is one heck of a problem to solve :-)
I did managed to get the solution for different levels as expected. However, I made one assumption to do that:
The last column of the input is the pointer to other columns
If that is no issue, the following solution will work fine :-)
input = [
[[1, 2, [3, 4]], 1 ],
[[3, [4, 5], [6, 7]], [5, 4] ],
[[5, [6, 7], [8, 9]], [7, [8, 6]] ],
]
def level_flatten(level):
"""
This method compares the elements and their types of last column and
makes changes to other columns accordingly
"""
for k, l in level.items():
size = len(l[-1]) if isinstance(l[-1], list) else 1
# Mostly l[-1] is going to be a list; this is for just in case
elements = l[-1]
for i in range(-1, -len(l)-1, -1):
elem = l[i]
if isinstance(l[i], int):
l[i] = [elem] * size
else:
for j in range(len(elem)):
if not isinstance(elem[j], type(elements[j])):
# For a list found in elements[j], there is a int at l[i][j]
elem[j] = [elem[j]] * len(elements[j])
return level
level = {}
for i in range(len(input[0])):
level[i] = []
for j in input:
level[i].append(j[i])
for k, l in level.items():
for i in range(len(l[-1])):
level = level_flatten(level)
total_flat = []
for item in l:
row = []
for x in item:
if isinstance(x, list):
row += x
else:
row.append(x)
total_flat.append(row)
level[k] = total_flat
output_list = []
for i in range(len(level)):# For maintaining the order
output_list += zip(*level[i])
print output_list
I know this is not a pretty solution and could be optimized further. I am trying to think of a better algorithm than this. Will update if I gets to a better solution :-)

I first tried to solve this using a 2d matrix but turned out it's simpler to iterate over the last row dividing the column segments above it:
def unfold(ldata):
'''
ldata: list of hierarchical lists.
technique: repeatedly flatten bottom row one level at a time, unpacking lists or
adding repeats in the column above at the same time.
convention: n=1 are primitives, n>=2 are lists.
'''
has_lists = True
while has_lists:
has_lists = False
for i, elm in enumerate(ldata[-1]):
if type(elm) is list:
has_lists = True
ldata[-1][i:i+1] = ldata[-1][i] # unpack
for k in range(0, len(ldata)-1): # over corresponding items in above column
if type(ldata[k][i]) is list:
ldata[k][i:i+1] = ldata[k][i] # unpack
else:
ldata[k][i:i+1] = [ldata[k][i]]*len(elm) # add repeats
return list(zip(*ldata))
x = [
[[1, 2, [3, 4]], 1 ],
[[3, [4, 5], [6, 7]], [5, 4] ],
[[5, [6, 7], [8, 9]], [7, [8, 6]] ],
]
from pprint import pprint
pprint(unfold(x))
>>>
[(1, 3, 5),
(2, 4, 6),
(2, 5, 7),
(3, 6, 8),
(4, 7, 9),
(1, 5, 7),
(1, 4, 8),
(1, 4, 6)]

Related

List comprehension syntax for pool.startmap_async()

Looking at an example here:
https://www.machinelearningplus.com/python/parallel-processing-python/
There is a function definition which is to be parallelised:
# Step 1: Redefine, to accept `i`, the iteration number
def howmany_within_range2(i, row, minimum, maximum):
"""Returns how many numbers lie within `maximum` and `minimum` in a given `row`"""
count = 0
for n in row:
if minimum <= n <= maximum:
count = count + 1
return (i, count)
The starmap_async example is give as below:
results = pool.starmap_async(howmany_within_range2, [(i, row, 4, 8) for i, row in enumerate(data)]).get()
I am a bit confused by this syntax here, particularly the "i" parameter and how this enumerate syntax works.
Also the apply_asyncy() example uses a pool.join() statement, but the map_async() statement doesn't use one?
Breaking this down a little,
data = [
[1, 2, 3],
[4, 5, 6],
[7, 8, 9],
]
arguments = [(i, row, 4, 8) for i, row in enumerate(data)]
print(arguments)
outputs (formatted)
[
(0, [1, 2, 3], 4, 8),
(1, [4, 5, 6], 4, 8),
(2, [7, 8, 9], 4, 8),
]
which are the tuples howmany_within_range2 will be executed with, i.e.
howmany_within_range2(0, [1, 2, 3], 4, 8)
howmany_within_range2(1, [4, 5, 6], 4, 8)
howmany_within_range2(2, [7, 8, 9], 4, 8)
but in parallel.
enumerate is used here to easily access the row index of the row in the data list; otherwise you'd just get a bunch of results without an easy way to associate them with the original data rows.

Subtract previous list from current list in a list of lists loop

I have a list of dataframes with data duplicating in every next dataframe within list which I need to subtract between themselves
the_list[0] = [1, 2, 3]
the_list[1] = [1, 2, 3, 4, 5, 6, 7]
There are also df headers. Dataframes are only different in number of rows.
Wanted solution:
the_list[0] = [1, 2, 3]
the_list[1] = [4, 5, 6, 7]
Due to the fact that my list of lists, the_list, contains several dataframes, I have to work backward and go from the last df to first with first remaining intact.
My current code (estwin is the_list):
estwin = [df1, df2, df3, df4]
output=([])
estwin.reverse()
for i in range(len(estwin) -1):
difference = Diff(estwin[i], estwin[i+1])
output.append(difference)
return(output)
def Diff(li_bigger, li_smaller):
c = [x for x in li_bigger if x not in li_smaller]
return (c)
Currently, the result is an empty list. I need an updated the_list that contains only the differences (no duplicate values between lists).
You should not need to go backward for this problem, it is easier to keep track of what you have already seen going forward.
Keep a set that gets updated with new items as you traverse through each list, and use it to filter out the items that should be present in the output.
list1 = [1,2,3]
list2 = [1,2,3,4,5,6,7]
estwin = [list1, list2]
lookup = set() #to check which items/numbers have already been seen.
output = []
for lst in estwin:
updated_lst = [i for i in lst if i not in lookup] #only new items present
lookup.update(updated_lst)
output.append(updated_lst)
print(output) #[[1, 2, 3], [4, 5, 6, 7]]
Your code is not runnable, but if I guess what you meant to write, it works, except that you have one bug in your algorithm:
the_list = [
[1, 2, 3],
[1, 2, 3, 4, 5, 6, 7],
[1, 2, 3, 4, 5, 6, 7, 8, 9]
]
def process(lists):
output = []
lists.reverse()
for i in range(len(lists)-1):
difference = diff(lists[i], lists[i+1])
output.append(difference)
# BUGFIX: Always add first list (now last becuase of reverse)
output.append(lists[-1])
output.reverse()
return output
def diff(li_bigger, li_smaller):
return [x for x in li_bigger if x not in li_smaller]
print(the_list)
print(process(the_list))
Output:
[[1, 2, 3], [1, 2, 3, 4, 5, 6, 7], [1, 2, 3, 4, 5, 6, 7, 8, 9]]
[[1, 2, 3], [4, 5, 6, 7], [8, 9]]
One-liner:
from itertools import chain
l = [[1, 2], [1, 2, 3], [1, 2, 3, 4], [1, 2, 3, 4, 5]]
new_l = [sorted(list(set(v).difference(chain.from_iterable(l[:num]))))
for num, v in enumerate(l)]
print(new_l)
# [[1, 2], [3], [4], [5]]

creating combinations from a list of lists

Im trying to figure out how to take a list of lists of integers and create a new list that contains combinations from the list of lists. I want the combination to start with a value from the first list and then respectively take 1 from each of the subsequent lists, only if the value is greater than the previous list.
l=[[1,2,3],[4,8],[5,10]]
# the answer would look like this
correct=[[1,4,5],[1,4,10],[1,8,10],[2,4,5],[2,4,10],[2,8,10],[3,4,5],[3,4,10],[3,8,10]]
>>> from itertools import product
...
...
... def combos(lst):
... result = []
... for p in product(*lst):
... if all(a < b for a, b in zip(p, p[1:])):
... result.append(list(p))
... return result
...
>>> lst = [[1, 2, 3], [4, 8], [5, 10]]
>>> correct = [[1, 4, 5], [1, 4, 10], [1, 8, 10], [2, 4, 5], [2, 4, 10],
... [2, 8, 10], [3, 4, 5], [3, 4, 10], [3, 8, 10]]
>>> combos(lst) == correct
True
List comprehension is probably a great way to go. It works nicely because of your constraints. You probably want something like:
[[i,j,k] for i in l[0] for j in l[1] if j>i for k in l[2] if k>j]
>>> [[1, 4, 5],
[1, 4, 10],
[1, 8, 10],
[2, 4, 5],
[2, 4, 10],
[2, 8, 10],
[3, 4, 5],
[3, 4, 10],
[3, 8, 10]]
This makes a list of lists of the form [i,j,k] for all the i's in l[0] for all the j's in l[1] if j>i and for all the k's in l[2] if k>j (since we already know that j>i at this point)
However, the code above only works for an input list of list of length 3. Took me a little bit, but this recursive approach should work for a input list of any length
def list_of_lists(in_list):
full_list=[]
def recurse(so_far, l):
if l==len(in_list):
return so_far
next_list = in_list[l]
for i in next_list:
if i>so_far[-1]:
new_list = recurse(so_far.copy()+[i], l+1)
if new_list:
full_list.append(new_list)
for i in in_list[0]:
recurse([i],1)
return full_list
l=[[1,2,3],
[4,8],
[5,10]]
ansList = []
for i in range(len(l[0])):
for j in range(len(l[1])):
for k in range(len(l[2])):
if l[0][i]<l[1][j] and l[1][j]<l[2][k]:
ansList.append([l[0][i],l[1][j],l[2][k]])
print(ansList)

Splitting nested list of floats to columns [duplicate]

Let's take:
l = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
The result I'm looking for is
r = [[1, 4, 7], [2, 5, 8], [3, 6, 9]]
and not
r = [(1, 4, 7), (2, 5, 8), (3, 6, 9)]
Python 3:
# short circuits at shortest nested list if table is jagged:
list(map(list, zip(*l)))
# discards no data if jagged and fills short nested lists with None
list(map(list, itertools.zip_longest(*l, fillvalue=None)))
Python 2:
map(list, zip(*l))
[[1, 4, 7], [2, 5, 8], [3, 6, 9]]
Explanation:
There are two things we need to know to understand what's going on:
The signature of zip: zip(*iterables) This means zip expects an arbitrary number of arguments each of which must be iterable. E.g. zip([1, 2], [3, 4], [5, 6]).
Unpacked argument lists: Given a sequence of arguments args, f(*args) will call f such that each element in args is a separate positional argument of f.
itertools.zip_longest does not discard any data if the number of elements of the nested lists are not the same (homogenous), and instead fills in the shorter nested lists then zips them up.
Coming back to the input from the question l = [[1, 2, 3], [4, 5, 6], [7, 8, 9]], zip(*l) would be equivalent to zip([1, 2, 3], [4, 5, 6], [7, 8, 9]). The rest is just making sure the result is a list of lists instead of a list of tuples.
Equivalently to Jena's solution:
>>> l=[[1,2,3],[4,5,6],[7,8,9]]
>>> [list(i) for i in zip(*l)]
... [[1, 4, 7], [2, 5, 8], [3, 6, 9]]
One way to do it is with NumPy transpose. For a list, a:
>>> import numpy as np
>>> np.array(l).T.tolist()
[[1, 4, 7], [2, 5, 8], [3, 6, 9]]
Or another one without zip (python < 3):
>>> map(list, map(None, *l))
[[1, 4, 7], [2, 5, 8], [3, 6, 9]]
Or for python >= 3:
>>> list(map(lambda *x: list(x), *l))
[[1, 4, 7], [2, 5, 8], [3, 6, 9]]
just for fun, valid rectangles and assuming that m[0] exists
>>> m = [[1,2,3],[4,5,6],[7,8,9]]
>>> [[row[i] for row in m] for i in range(len(m[0]))]
[[1, 4, 7], [2, 5, 8], [3, 6, 9]]
Methods 1 and 2 work in Python 2 or 3, and they work on ragged, rectangular 2D lists. That means the inner lists do not need to have the same lengths as each other (ragged) or as the outer lists (rectangular). The other methods, well, it's complicated.
the setup
import itertools
import six
list_list = [[1,2,3], [4,5,6, 6.1, 6.2, 6.3], [7,8,9]]
method 1 — map(), zip_longest()
>>> list(map(list, six.moves.zip_longest(*list_list, fillvalue='-')))
[[1, 4, 7], [2, 5, 8], [3, 6, 9], ['-', 6.1, '-'], ['-', 6.2, '-'], ['-', 6.3, '-']]
six.moves.zip_longest() becomes
itertools.izip_longest() in Python 2
itertools.zip_longest() in Python 3
The default fillvalue is None. Thanks to #jena's answer, where map() is changing the inner tuples to lists. Here it is turning iterators into lists. Thanks to #Oregano's and #badp's comments.
In Python 3, pass the result through list() to get the same 2D list as method 2.
method 2 — list comprehension, zip_longest()
>>> [list(row) for row in six.moves.zip_longest(*list_list, fillvalue='-')]
[[1, 4, 7], [2, 5, 8], [3, 6, 9], ['-', 6.1, '-'], ['-', 6.2, '-'], ['-', 6.3, '-']]
The #inspectorG4dget alternative.
method 3 — map() of map() — broken in Python 3.6
>>> map(list, map(None, *list_list))
[[1, 4, 7], [2, 5, 8], [3, 6, 9], [None, 6.1, None], [None, 6.2, None], [None, 6.3, None]]
This extraordinarily compact #SiggyF second alternative works with ragged 2D lists, unlike his first code which uses numpy to transpose and pass through ragged lists. But None has to be the fill value. (No, the None passed to the inner map() is not the fill value. It means there is no function to process each column. The columns are just passed through to the outer map() which converts them from tuples to lists.)
Somewhere in Python 3, map() stopped putting up with all this abuse: the first parameter cannot be None, and ragged iterators are just truncated to the shortest. The other methods still work because this only applies to the inner map().
method 4 — map() of map() revisited
>>> list(map(list, map(lambda *args: args, *list_list)))
[[1, 4, 7], [2, 5, 8], [3, 6, 9]] // Python 2.7
[[1, 4, 7], [2, 5, 8], [3, 6, 9], [None, 6.1, None], [None, 6.2, None], [None, 6.3, None]] // 3.6+
Alas the ragged rows do NOT become ragged columns in Python 3, they are just truncated. Boo hoo progress.
Three options to choose from:
1. Map with Zip
solution1 = map(list, zip(*l))
2. List Comprehension
solution2 = [list(i) for i in zip(*l)]
3. For Loop Appending
solution3 = []
for i in zip(*l):
solution3.append((list(i)))
And to view the results:
print(*solution1)
print(*solution2)
print(*solution3)
# [1, 4, 7], [2, 5, 8], [3, 6, 9]
import numpy as np
r = list(map(list, np.transpose(l)))
One more way for square matrix. No numpy, nor itertools, use (effective) in-place elements exchange.
def transpose(m):
for i in range(1, len(m)):
for j in range(i):
m[i][j], m[j][i] = m[j][i], m[i][j]
Maybe not the most elegant solution, but here's a solution using nested while loops:
def transpose(lst):
newlist = []
i = 0
while i < len(lst):
j = 0
colvec = []
while j < len(lst):
colvec.append(lst[j][i])
j = j + 1
newlist.append(colvec)
i = i + 1
return newlist
more_itertools.unzip() is easy to read, and it also works with generators.
import more_itertools
l = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
r = more_itertools.unzip(l) # a tuple of generators.
r = list(map(list, r)) # a list of lists
or equivalently
import more_itertools
l = more_itertools.chunked(range(1,10), 3)
r = more_itertools.unzip(l) # a tuple of generators.
r = list(map(list, r)) # a list of lists
matrix = [[1,2,3],
[1,2,3],
[1,2,3],
[1,2,3],
[1,2,3],
[1,2,3],
[1,2,3]]
rows = len(matrix)
cols = len(matrix[0])
transposed = []
while len(transposed) < cols:
transposed.append([])
while len(transposed[-1]) < rows:
transposed[-1].append(0)
for i in range(rows):
for j in range(cols):
transposed[j][i] = matrix[i][j]
for i in transposed:
print(i)
Just for fun: If you then want to make them all into dicts.
In [1]: l = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
...: fruits = ["Apple", "Pear", "Peach",]
...: [dict(zip(fruits, j)) for j in [list(i) for i in zip(*l)]]
Out[1]:
[{'Apple': 1, 'Pear': 4, 'Peach': 7},
{'Apple': 2, 'Pear': 5, 'Peach': 8},
{'Apple': 3, 'Pear': 6, 'Peach': 9}]
Here is a solution for transposing a list of lists that is not necessarily square:
maxCol = len(l[0])
for row in l:
rowLength = len(row)
if rowLength > maxCol:
maxCol = rowLength
lTrans = []
for colIndex in range(maxCol):
lTrans.append([])
for row in l:
if colIndex < len(row):
lTrans[colIndex].append(row[colIndex])
#Import functions from library
from numpy import size, array
#Transpose a 2D list
def transpose_list_2d(list_in_mat):
list_out_mat = []
array_in_mat = array(list_in_mat)
array_out_mat = array_in_mat.T
nb_lines = size(array_out_mat, 0)
for i_line_out in range(0, nb_lines):
array_out_line = array_out_mat[i_line_out]
list_out_line = list(array_out_line)
list_out_mat.append(list_out_line)
return list_out_mat

Transpose this matrix(python3) [duplicate]

This question already has answers here:
Matrix Transpose in Python [duplicate]
(19 answers)
Closed 8 years ago.
I am trying to find a way to transpose a matrix, for example for:
[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]]
it would change the matrix to:
[[1, 4, 7],
[2, 5, 8],
[3, 6, 9]]
So far, I tried several things but it never worked. I tried:
def transpose_matrix(matrix): # this one doesn't change the matrix at all
zip(*matrix)
return matrix
or
def transpose_matrix(matrix):
map(list, zip(*matrix))
return matrix
or
def transpose_matrix(matrix): # this one returns an empty list []
print(list(zip(*matrix)))
I want to code it without using external library such as numpy.
After that, it returns the result to this menu and use the transposed matrix for upcoming option (not shown):
def menu(matrix):
print('choose option')
loop = True
while loop:
print('''
1-display matrix
7-transpose it
8-other option
0-back
''')
choi = input('cchoice:')
if choi =='1':
print('\n'.join([' '.join(map(str, row)) for row in matrix]))
elif choix == '7':
matrix = transpose_matrix(matrix)
else:
print('invalid choice')
You aren't actually assigning the result of your manipulations, and therefore return the original matrix unchanged. For example:
def transpose_matrix(matrix):
zip(*matrix) # ignore result of computation
return matrix # return argument unchanged
Instead, try:
def transpose_matrix(matrix):
matrix = list(map(list, zip(*matrix))) # assign result
return matrix # return transposed matrix
or simply:
def transpose_matrix(matrix):
return list(map(list, zip(*matrix)))
Bear in mind that you will need to assign the return from the function in the calling function, too:
matrix = transpose_matrix(matrix)
In-place modification
Alternatively, you could transpose in-place and implicitly return None:
def transpose_matrix(matrix):
matrix[:] = list(map(list, zip(*matrix)))
This way you don't have to assign back to matrix when you call the function.
Examples
Examples of the functions in action:
>>> def transpose_matrix(matrix):
return list(map(list, zip(*matrix)))
>>> transpose_matrix([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
[[1, 4, 7], [2, 5, 8], [3, 6, 9]]
>>> def in_place_transpose(matrix):
matrix[:] = list(map(list, zip(*matrix)))
>>> m = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
>>> in_place_transpose(m)
>>> m
[[1, 4, 7], [2, 5, 8], [3, 6, 9]]
zip behaviour
It is important to note that zip will truncate its output to the shortest argument iterable, e.g.:
>>> a = [1, 2, 3]
>>> b = [4, 5, 6]
>>> c = [7, 8]
>>> d = [9]
>>> e = []
>>> for l in (b, c, d, e):
print(list(zip(a, l)))
[(1, 4), (2, 5), (3, 6)]
[(1, 7), (2, 8)]
[(1, 9)]
[]
Therefore if any of the rows of your matrix are empty lists, your output will be an empty list. You can use zip_longest from itertools to insert dummy fillvalues (None by default) instead:
>>> from itertools import zip_longest
>>> list(zip_longest([1, 2, 3], [4, 5]))
[(1, 4), (2, 5), (3, None)]
>>> m = [[1, 2, 3], [4, 5, 6], []]
>>> list(zip_longest(*m, fillvalue=0))
[(1, 4, 0), (2, 5, 0), (3, 6, 0)]
You could implement this as:
def transpose_matrix(matrix):
return list(map(list, zip_longest(*matrix, fillvalue=0)))
with from itertools import zip_longest at the top of your script.
There are some problems with the solutions you tried:
def transpose_matrix(matrix):
zip(*matrix)
return matrix
The result from zip(…) is thrown away, and you return the original matrix instead. Write return zip(*matrix).
def transpose_matrix(matrix):
map(list, zip(*matrix))
return matrix
Same as above, the result from map(…) is thrown away. Write return map(list, zip(*matrix)) instead.
def transpose_matrix(matrix):
print(list(zip(*m)))
The argument is named matrix, but you are working on m. So either rename the parameter m, or change the zipped value to *matrix.
Examples:
>>> m = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
>>> list(zip(*m))
[(1, 4, 7), (2, 5, 8), (3, 6, 9)]
>>> list(map(list, zip(*m)))
[[1, 4, 7], [2, 5, 8], [3, 6, 9]]

Categories