How can I turn random matrix into a table? - python

Here is the code I'm given.
import random
def create_random_matrix(rows_min, rows_max, cols_min, cols_max):
matrix = []
# generate a random number for the number of rows
# notice that randint works differently from similar functions
# you have seen in that rows_min and rows_max are both inclusive
# http://docs.python.org/3/library/random.html#random.randint
rows = random.randint(rows_min, rows_max)
for row in range(rows):
# add a row to the matrix
matrix.append([])
# generate a random number for the number of columns
cols = random.randint(cols_min, cols_max)
# generate a random number between 1 and 100 for each
# cell of the row
for col in range(cols):
matrix[row].append(random.randint(1, 100))
# done
return matrix
def print_matrix(twod_list):
print(twod_list)
if __name__ == "__main__":
random_matrix = create_random_matrix(8, 12, 3, 7)
print_matrix(random_matrix)
The code creates a random matrix like this:
[[52, 23, 11, 95, 79], [3, 63, 11], [5, 78, 3, 14, 37], [89, 98, 10], [24, 60, 80, 73, 84, 94], [45, 14, 28], [51, 19, 9], [43, 86, 63, 71, 19], [58, 6, 43, 17, 87, 64, 87], [77, 57, 97], [9, 71, 54, 20], [77, 86, 22]]
But how can I change the code to output something like this instead?
36 83 35 73
28 11 3 45 30 44
39 97 3 10 90 5 42
55 73 56 27 7 37
84 49 35 43
100 20 22 95 75 25
58 81 26 34 41 44 72
32 23 21
31 37 1
95 90 26 6 78 49 22
5 17 31
86 25 73 56 10

This is a simple solution to your problem to print the members of a list of lists:
mymatrix = [[52, 23, 11, 95, 79], [3, 63, 11], [5, 78, 3, 14, 37], [89, 98, 10], [24, 60, 80, 73, 84, 94], [45, 14, 28], [51, 19, 9], [43, 86, 63, 71, 19], [58, 6, 43, 17, 87, 64, 87], [77, 57, 97], [9, 71, 54, 20], [77, 86, 22]]
for list in mymatrix:
for item in list:
print item,
print
the output would look like:
52 23 11 95 79
3 63 11
5 78 3 14 37
89 98 10
24 60 80 73 84 94
45 14 28
51 19 9
43 86 63 71 19
58 6 43 17 87 64 87
77 57 97
9 71 54 20
77 86 22

just change the way you print it:
>>> for i in random_matrix:
... print " ".join(str(j) for j in i)
...
52 23 11 95 79
3 63 11
5 78 3 14 37
89 98 10
24 60 80 73 84 94
45 14 28
51 19 9
43 86 63 71 19
58 6 43 17 87 64 87
77 57 97
9 71 54 20
And just for fun, in one line:
print "\n".join(" ".join(str(j) for j in i) for i in random_matrix)

Related

convert list of lists in dataframe

I have a following data
0 [[-0.932, 2.443, -1....
1 [[-1.099, 2.140, -1.4...
2 [[-0.985, -1.654, -1....
3 [[-1.339, 2.070, -0....
4 [[-1.119, 2.788, -2....
...
494 [[-0.023, 2.688, -1...
495 [[1.897, 0.0, -2.249,...
496 [[1.538, 2.349, -0.6...
497 [[-0.141, 2.320, -0...
498 [[-0.483, 1.587, -1....
Length: 499, dtype: object
In each row are about 80 lists consisted (list of lists) and I would like to turn them into columns and to get the data:
ID col1 col2 ... col80
1.1.2020 0 -0.932 ...
2.1.2020 0 2.443 ...
3.1.2020 0 -1 ...
1.1.2020 1 -1.099
2.1.2020 1 2.140
3.1.2020 1 -1.4 ...
where the column ID is from the lists indicator (0,1,..,498). The index column (1.1.2020 2.1.2020..) is saved as another object (date). Is this possible and how?
Let's say you had data like:
import numpy as np
import pandas as pd
ser = pd.Series(np.arange(90).reshape(10, 3, 3).tolist())
0 [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
1 [[9, 10, 11], [12, 13, 14], [15, 16, 17]]
2 [[18, 19, 20], [21, 22, 23], [24, 25, 26]]
3 [[27, 28, 29], [30, 31, 32], [33, 34, 35]]
4 [[36, 37, 38], [39, 40, 41], [42, 43, 44]]
5 [[45, 46, 47], [48, 49, 50], [51, 52, 53]]
6 [[54, 55, 56], [57, 58, 59], [60, 61, 62]]
7 [[63, 64, 65], [66, 67, 68], [69, 70, 71]]
8 [[72, 73, 74], [75, 76, 77], [78, 79, 80]]
9 [[81, 82, 83], [84, 85, 86], [87, 88, 89]]
dtype: object
then I think you can do the bulk of the work like so:
out = ser.explode().apply(pd.Series).reset_index(names="ID")
ID 0 1 2
0 0 0 1 2
1 0 3 4 5
2 0 6 7 8
3 1 9 10 11
4 1 12 13 14
5 1 15 16 17
6 2 18 19 20
7 2 21 22 23
8 2 24 25 26
9 3 27 28 29
10 3 30 31 32
11 3 33 34 35
12 4 36 37 38
13 4 39 40 41
14 4 42 43 44
15 5 45 46 47
16 5 48 49 50
17 5 51 52 53
18 6 54 55 56
19 6 57 58 59
20 6 60 61 62
21 7 63 64 65
22 7 66 67 68
23 7 69 70 71
24 8 72 73 74
25 8 75 76 77
26 8 78 79 80
27 9 81 82 83
28 9 84 85 86
29 9 87 88 89
but you'll need to rename the columns and change the index yourself (how are you determining those dates?)

pandas merge two dataframe and sort by compared column in adjacent column

I compare two dataframe and result can be shown below;
import pandas as pd
exam_1 = {
'Name': ['Jonn', 'Tomas', 'Fran', 'Olga', 'Veronika', 'Stephan'],
'Mat': [85, 75, 50, 93, 88, 90],
'Science': [96, 97, 99, 87, 90, 88],
'Reading': [80, 60, 72, 86, 84, 77],
'Wiritng': [78, 82, 88, 78, 86, 82],
'Lang': [77, 79, 77, 72, 90, 92],
}
exam_2 = {
'Name': ['Jonn', 'Tomas', 'Fran', 'Olga', 'Veronika', 'Stephan'],
'Mat': [80, 80, 90, 90, 85, 80],
'Science': [50, 60, 85, 90, 66, 82],
'Reading': [60, 75, 55, 90, 85, 60],
'Wiritng': [56, 66, 90, 82, 60, 80],
'Lang': [80, 78, 76, 90, 77, 66],
}
df_1 = pd.DataFrame(exam_1)
df_2 = pd.DataFrame(exam_2)
cmp = pd.merge(df_1, df_2, how="outer", on=["Name"], suffixes=("_1", "_2"))
print(cmp)
Name Mat_1 Science_1 Reading_1 Wiritng_1 Lang_1 Mat_2 Science_2 Reading_2 Wiritng_2 Lang_2
0 Jonn 85 96 80 78 77 80 50 60 56 80
1 Tomas 75 97 60 82 79 80 60 75 66 78
2 Fran 50 99 72 88 77 90 85 55 90 76
3 Olga 93 87 86 78 72 90 90 90 82 90
4 Veronika 88 90 84 86 90 85 66 85 60 77
5 Stephan 90 88 77 82 92 80 82 60 80 66
But I want to see Mat_1 and Mat_2 in adjacent column and also others.
I try to do it manually but is there any easy way to do it like already built-in function.
You can use pandas.DataFrame.sort_index on axis=1.
Replace this :
cmp = pd.merge(df_1, df_2, how="outer", on=["Name"], suffixes=("_1", "_2"))
By this :
cmp = (
pd.merge(df_1, df_2, how="outer", on=["Name"], suffixes=("_1", "_2"))
.set_index("Name")
.sort_index(axis=1)
.reset_index()
)
​
# Output :
print(cmp.to_string())
​
Name Lang_1 Lang_2 Mat_1 Mat_2 Reading_1 Reading_2 Science_1 Science_2 Wiritng_1 Wiritng_2
0 Jonn 77 80 85 80 80 60 96 50 78 56
1 Tomas 79 78 75 80 60 75 97 60 82 66
2 Fran 77 76 50 90 72 55 99 85 88 90
3 Olga 72 90 93 90 86 90 87 90 78 82
4 Veronika 90 77 88 85 84 85 90 66 86 60
5 Stephan 92 66 90 80 77 60 88 82 82 80

Pandas: calculate weighted average by row using a dataframe and a series

I was trying to make a weighed average and I came across a doubt:
Problem
I wanted to create a new column named answer that calculates the result between each line and a list of weighted values named in this case as month. If I use df.mean() I would get a simple average by month and that is not what I want. The idea is to give more importance to the end of the year and less importance to the demand in the begging of the year. So that's why I would like to use weighted average calculation.
In excel I would use the formula bellow. I'm having troubles to convert this calculation to pandas data frame.
=SUMPRODUCT( demands[#[1]:[12]] ; month )/SUM(month)
I couldn't find a solution to this problem and I really appreciate help with this subject.
Thank you in advance.
Here's a dummy dataframe that serves as an example:
Example Code
demand = pd.DataFrame({'1': [360, 40, 100, 20, 55],
'2': [500, 180, 450, 60, 50],
'3': [64, 30, 60, 10, 0],
'4': [50, 40, 30, 60, 50],
'5': [40, 24, 45, 34, 60],
'6': [30, 34, 65, 80, 78],
'7': [56, 45, 34, 90, 58],
'8': [32, 12, 45, 55, 66],
'9': [32, 56, 89, 67, 56],
'10': [57, 35, 75, 48, 9],
'11': [56, 33, 11, 6, 78],
'12': [23, 65, 34, 8, 67]
})
months = [i for i in range(1,13)]
Visualization of the problem
Just use numpy.average, specifying weights:
demand["result"]=np.average(demand, weights=months, axis=1)
https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/numpy.average.html
Outputs:
1 2 3 4 5 6 ... 8 9 10 11 12 result
0 360 500 64 50 40 30 ... 32 32 57 56 23 58.076923
1 40 180 30 40 24 34 ... 12 56 35 33 65 43.358974
2 100 450 60 30 45 65 ... 45 89 75 11 34 58.884615
3 20 60 10 60 34 80 ... 55 67 48 6 8 43.269231
4 55 50 0 50 60 78 ... 66 56 9 78 67 55.294872
This can be done by the following:
demand['result'] = (demand * months).sum(axis=1)/sum(months)
You can try this code:
den = np.sum(a)
demand['average']=demand['1'].mul(1/den).add(demand['2'].mul(2/den)).add(demand['3'].mul(3/den)).add(demand['4'].mul(4/den)).add(demand['5'].mul(5/den)).add(demand['6'].mul(6/den)).add(demand['7'].mul(7/den)).add(demand['8'].mul(8/den)).add(demand['9'].mul(9/den)).add(demand['10'].mul(10/den)).add(demand['11'].mul(11/den)).add(demand['12'].mul(12/den))
The Output:
1 2 3 4 5 6 7 8 9 10 11 12 average
0 360 500 64 50 40 30 56 32 32 57 56 23 58.076923
1 40 180 30 40 24 34 45 12 56 35 33 65 43.358974
2 100 450 60 30 45 65 34 45 89 75 11 34 58.884615
3 20 60 10 60 34 80 90 55 67 48 6 8 43.269231
4 55 50 0 50 60 78 58 66 56 9 78 67 55.294872

Issue with matrix slicing in n-deminsional matrix

I was stuck in python function, but later solved it. I have a question regarding the python n-dimensional notation. That matrix was A(2,4,4,3). So what's the difference in accessing the matrix as A[:][0:3, 0:3, 3] and A[:][ 0:3, 0:3 ][3]
Test array(2,4,4,3):
[[[[ 0 1 2] [[[48 49 50]
[ 3 4 5] [51 52 53]
[ 6 7 8] [54 55 56]
[ 9 10 11]] [57 58 59]]
[[12 13 14] [[60 61 62]
[15 16 17] [63 64 65]
[18 19 20] [66 67 68]
[21 22 23]] [69 70 71]]
[[24 25 26] [[72 73 74]
[27 28 29] [75 76 77]
[30 31 32] [78 79 80]
[33 34 35]] [81 82 83]]
[[36 37 38] [[84 85 86]
[39 40 41] [87 88 89]
[42 43 44] [90 91 92]
[45 46 47]]] [93 94 95]]]
With data[0:4, 0:4, 1] you getting each second element from 4x4 array:
[[[ 3 4 5] [[51 52 53]
[15 16 17] [63 64 65]
[27 28 29] [75 76 77]
[39 40 41]] [87 88 89]]]
On the other hand with data[0:4, 0:4][1] you will get second part of 4x4x2 array:
[[[48 49 50]
[51 52 53]
[54 55 56]
[57 58 59]]
[[60 61 62]
[63 64 65]
[66 67 68]
[69 70 71]]
[[72 73 74]
[75 76 77]
[78 79 80]
[81 82 83]]
[[84 85 86]
[87 88 89]
[90 91 92]
[93 94 95]]]

How to shuffle groups of rows of a Pandas dataframe?

Let's assume I have a dataframe df:
import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.rand(12,4))
print(df)
0 1 2 3
0 71 64 84 20
1 48 60 83 61
2 48 78 71 46
3 65 88 66 77
4 71 22 42 58
5 66 76 64 80
6 67 28 74 87
7 32 90 55 78
8 80 42 52 14
9 54 76 73 17
10 32 89 42 36
11 85 78 61 12
How do I shuffle the rows of df three-by-three, i.e., how do I randomly shuffle the first three rows (0, 1, 2) with either the second (3, 4, 5), third (6, 7, 8) or fourth (9, 10, 11) group? This could be a possible outcome:
print(df)
0 1 2 3
3 65 88 66 77
4 71 22 42 58
5 66 76 64 80
9 54 76 73 17
10 32 89 42 36
11 85 78 61 12
6 67 28 74 87
7 32 90 55 78
8 80 42 52 14
0 71 64 84 20
1 48 60 83 61
2 48 78 71 46
Thus, the new order has the second group of 3 rows from original dataframe, then the last one, then the third one and finally the first group.
You can reshape into a 3D array splitting the first axis into two with the latter one of length 3 corresponding to the group length and then use np.random.shuffle for such a groupwise in-place shuffle along the first axis, which being of length as the number of groups holds those groups and thus achieves our desired result, like so -
np.random.shuffle(df.values.reshape(-1,3,df.shape[1]))
Explanation
To give it a bit of explanation, let's use np.random.permutation to generate those random indices along the first axis and then index into the 3D array version.
1] Input df :
In [199]: df
Out[199]:
0 1 2 3
0 71 64 84 20
1 48 60 83 61
2 48 78 71 46
3 65 88 66 77
4 71 22 42 58
5 66 76 64 80
6 67 28 74 87
7 32 90 55 78
8 80 42 52 14
9 54 76 73 17
10 32 89 42 36
11 85 78 61 12
2] Get 3D array version :
In [200]: arr_3D = df.values.reshape(-1,3,df.shape[1])
In [201]: arr_3D
Out[201]:
array([[[71, 64, 84, 20],
[48, 60, 83, 61],
[48, 78, 71, 46]],
[[65, 88, 66, 77],
[71, 22, 42, 58],
[66, 76, 64, 80]],
[[67, 28, 74, 87],
[32, 90, 55, 78],
[80, 42, 52, 14]],
[[54, 76, 73, 17],
[32, 89, 42, 36],
[85, 78, 61, 12]]])
3] Get shuffling indices and index into the first axis of 3D version :
In [202]: shuffle_idx = np.random.permutation(arr_3D.shape[0])
In [203]: shuffle_idx
Out[203]: array([0, 3, 1, 2])
In [204]: arr_3D[shuffle_idx]
Out[204]:
array([[[71, 64, 84, 20],
[48, 60, 83, 61],
[48, 78, 71, 46]],
[[54, 76, 73, 17],
[32, 89, 42, 36],
[85, 78, 61, 12]],
[[65, 88, 66, 77],
[71, 22, 42, 58],
[66, 76, 64, 80]],
[[67, 28, 74, 87],
[32, 90, 55, 78],
[80, 42, 52, 14]]])
Then, we are assigning these values back to input dataframe.
With np.random.shuffle, we are just doing everything in-place and hiding away the work needed to explicitly generate shuffling indices and assigning back.
Sample run -
In [181]: df = pd.DataFrame(np.random.randint(11,99,(12,4)))
In [182]: df
Out[182]:
0 1 2 3
0 82 49 80 20
1 19 97 74 81
2 62 20 97 19
3 36 31 14 41
4 27 86 28 58
5 38 68 24 83
6 85 11 25 88
7 21 31 53 19
8 38 45 14 72
9 74 63 40 94
10 69 85 53 81
11 97 96 28 29
In [183]: np.random.shuffle(df.values.reshape(-1,3,df.shape[1]))
In [184]: df
Out[184]:
0 1 2 3
0 85 11 25 88
1 21 31 53 19
2 38 45 14 72
3 82 49 80 20
4 19 97 74 81
5 62 20 97 19
6 36 31 14 41
7 27 86 28 58
8 38 68 24 83
9 74 63 40 94
10 69 85 53 81
11 97 96 28 29
Similar solution to #Divakar, probably simpler as I directly shuffle the index of the dataframe:
import numpy as np
import pandas as pd
df = pd.DataFrame([np.arange(0, 12)]*4).T
len_group = 3
index_list = np.array(df.index)
np.random.shuffle(np.reshape(index_list, (-1, len_group)))
shuffled_df = df.loc[index_list, :]
Sample output:
shuffled_df
Out[82]:
0 1 2 3
9 9 9 9 9
10 10 10 10 10
11 11 11 11 11
3 3 3 3 3
4 4 4 4 4
5 5 5 5 5
0 0 0 0 0
1 1 1 1 1
2 2 2 2 2
6 6 6 6 6
7 7 7 7 7
8 8 8 8 8
This is doing the same as the other two answers, but using integer division to create a group column.
nrows_df = len(df)
nrows_group = 3
shuffled = (
df
.assign(group_var=df.index // nrows_group)
.set_index("group_var")
.loc[np.random.permutation(nrows_df / nrows_group)]
)

Categories