Table in Matplotlib, can't get two columns? - python

I'm struggling with tables for matplotlib (blume). The table is for an automation project that will produce 22 different maps. The code below produce a table with 49 rows. Some figures will only have 6 rows. When the number of rows exceeds 25 I would like to use two columns.
import pandas as pd
import matplotlib.pyplot as plt
from blume.table import table
# Dataframe
df=pd.DataFrame({'nr': [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
'KnNamn': ['Härryda', 'Partille', 'Öckerö', 'Stenungsund', 'Tjörn', 'Orust',
'Sotenäs', 'Munkedal', 'Tanum', 'Dals-Ed', 'Färgelanda', 'Ale',
'Lerum', 'Vårgårda', 'Bollebygd', 'Grästorp', 'Essunga',
'Karlsborg', 'Gullspång', 'Tranemo', 'Bengtsfors', 'Mellerud',
'Lilla Edet', 'Mark', 'Svenljunga', 'Herrljunga', 'Vara', 'Götene',
'Tibro', 'Töreboda', 'Göteborg', 'Mölndal', 'Kungälv', 'Lysekil',
'Uddevalla', 'Strömstad', 'Vänersborg', 'Trollhättan', 'Alingsås',
'Borås', 'Ulricehamn', 'Åmål', 'Mariestad', 'Lidköping', 'Skara',
'Skövde', 'Hjo', 'Tidaholm', 'Falköping'],
'rel': [0.03650425, 0.05022105, 0.03009109, 0.03966735, 0.02793296,
0.03690838, 0.04757161, 0.05607283, 0.0546372 , 0.05452821,
0.06640368, 0.04252673, 0.03677577, 0.05385784, 0.0407173 ,
0.04024881, 0.05613226, 0.04476127, 0.08543165, 0.04070175,
0.09281077, 0.08711656, 0.06111578, 0.04564958, 0.05058988,
0.04618078, 0.04640402, 0.04826498, 0.08514253, 0.07799246,
0.07829886, 0.04249149, 0.03909206, 0.06835601, 0.08027622,
0.07087295, 0.09013876, 0.1040369 , 0.05004451, 0.06584845,
0.04338739, 0.10570863, 0.0553109 , 0.05024871, 0.06531729,
0.05565605, 0.05041816, 0.04885198, 0.07954831]})
# Table
fig,ax = plt.subplots(1, figsize=(10, 7))
val =[]
ax.axis('off')
for i, j, k in zip(df.nr, df.KnNamn, df.rel):
k = k*100
k = round(k,2)
k= (str(k) + ' %')
temp=str(i)+'. ' +str(j)+': ' + str(k)
val.append(temp)
val=[[el] for el in val]
#val=val[0] + val[1]
tab=table(ax,cellText=val,
#rowLabels=row_lab,
colLabels=['Relativ arbetslöshet'], loc='left', colWidths=[0.3], cellLoc='left')
plt.show()
As I understands it, if I want a table with two columns, my val object should be structured in a different way. In the case above, val is a nested list with 49 lists inside. I need to merge lists, I figure. I tried this pairwise for loop but that didn't work with range?
I'm sure there is a simple solution to this problem I have. Help would be much appreciated.
for i, j in zip(range(len(val)), range(len(val))[1:] + range(len(val))[:1]):
print(i, j)

I don't know if it is what you need but you could use zip() or better itertools.zip_longest() with val[:25], val[25:]
two_columns = []
for col1, col2 in itertools.zip_longest(values[:25], values[25:], fillvalue=''):
#print(f'{col1:25} | {col2}')
two_columns.append([col1, col2])
Full working example
import pandas as pd
import matplotlib.pyplot as plt
from blume.table import table
import itertools
df = pd.DataFrame({
'nr': [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49
],
'KnNamn': [
'Härryda', 'Partille', 'Öckerö', 'Stenungsund', 'Tjörn', 'Orust',
'Sotenäs', 'Munkedal', 'Tanum', 'Dals-Ed', 'Färgelanda', 'Ale',
'Lerum', 'Vårgårda', 'Bollebygd', 'Grästorp', 'Essunga',
'Karlsborg', 'Gullspång', 'Tranemo', 'Bengtsfors', 'Mellerud',
'Lilla Edet', 'Mark', 'Svenljunga', 'Herrljunga', 'Vara', 'Götene',
'Tibro', 'Töreboda', 'Göteborg', 'Mölndal', 'Kungälv', 'Lysekil',
'Uddevalla', 'Strömstad', 'Vänersborg', 'Trollhättan', 'Alingsås',
'Borås', 'Ulricehamn', 'Åmål', 'Mariestad', 'Lidköping', 'Skara',
'Skövde', 'Hjo', 'Tidaholm', 'Falköping'
],
'rel': [
0.03650425, 0.05022105, 0.03009109, 0.03966735, 0.02793296,
0.03690838, 0.04757161, 0.05607283, 0.0546372 , 0.05452821,
0.06640368, 0.04252673, 0.03677577, 0.05385784, 0.0407173 ,
0.04024881, 0.05613226, 0.04476127, 0.08543165, 0.04070175,
0.09281077, 0.08711656, 0.06111578, 0.04564958, 0.05058988,
0.04618078, 0.04640402, 0.04826498, 0.08514253, 0.07799246,
0.07829886, 0.04249149, 0.03909206, 0.06835601, 0.08027622,
0.07087295, 0.09013876, 0.1040369 , 0.05004451, 0.06584845,
0.04338739, 0.10570863, 0.0553109 , 0.05024871, 0.06531729,
0.05565605, 0.05041816, 0.04885198, 0.07954831
]
})
# df = df[:25] # test for 25 rows
# ---
fig, ax = plt.subplots(1, figsize=(10, 7))
ax.axis('off')
# --- values ---
#values = []
#for number, name, rel in zip(df.nr, df.KnNamn, df.rel):
# text = f'{number}. {name}: {rel*100:.2} %'
# values.append(text)
values = df.apply(lambda row: f'{row["nr"]}. {row["KnNamn"]}: {row["rel"]*100:.2} %', axis=1).values
# --- columns ---
if len(values) > 25:
two_columns = []
for col1, col2 in itertools.zip_longest(values[:25], values[25:], fillvalue=''):
#print(f'{col1:25} | {col2}')
two_columns.append([col1, col2])
tab = table(ax, cellText=two_columns,
#rowLabels=row_lab,
colLabels=['Col1', 'Col2'], colWidths=[0.3, 0.3], loc=-100, cellLoc='left')
else:
one_column = [[item] for item in values]
tab = table(ax, cellText=one_column,
#rowLabels=row_lab,
colLabels=['Col1'], colWidths=[0.3], loc=-100, cellLoc='left')
# --- plot ---
plt.show()
Result:
EDIT:
More universal version which can create many columns.
Example automatically create 3 columns for ROWS = 20.
import pandas as pd
import matplotlib.pyplot as plt
from blume.table import table
import itertools
df = pd.DataFrame({
'nr': [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49
],
'KnNamn': [
'Härryda', 'Partille', 'Öckerö', 'Stenungsund', 'Tjörn', 'Orust',
'Sotenäs', 'Munkedal', 'Tanum', 'Dals-Ed', 'Färgelanda', 'Ale',
'Lerum', 'Vårgårda', 'Bollebygd', 'Grästorp', 'Essunga',
'Karlsborg', 'Gullspång', 'Tranemo', 'Bengtsfors', 'Mellerud',
'Lilla Edet', 'Mark', 'Svenljunga', 'Herrljunga', 'Vara', 'Götene',
'Tibro', 'Töreboda', 'Göteborg', 'Mölndal', 'Kungälv', 'Lysekil',
'Uddevalla', 'Strömstad', 'Vänersborg', 'Trollhättan', 'Alingsås',
'Borås', 'Ulricehamn', 'Åmål', 'Mariestad', 'Lidköping', 'Skara',
'Skövde', 'Hjo', 'Tidaholm', 'Falköping'
],
'rel': [
0.03650425, 0.05022105, 0.03009109, 0.03966735, 0.02793296,
0.03690838, 0.04757161, 0.05607283, 0.0546372 , 0.05452821,
0.06640368, 0.04252673, 0.03677577, 0.05385784, 0.0407173 ,
0.04024881, 0.05613226, 0.04476127, 0.08543165, 0.04070175,
0.09281077, 0.08711656, 0.06111578, 0.04564958, 0.05058988,
0.04618078, 0.04640402, 0.04826498, 0.08514253, 0.07799246,
0.07829886, 0.04249149, 0.03909206, 0.06835601, 0.08027622,
0.07087295, 0.09013876, 0.1040369 , 0.05004451, 0.06584845,
0.04338739, 0.10570863, 0.0553109 , 0.05024871, 0.06531729,
0.05565605, 0.05041816, 0.04885198, 0.07954831
]
})
#df = df[:25] # test for 25 rows
# ---
fig, ax = plt.subplots(1, figsize=(10, 7))
ax.axis('off')
# --- values ---
def convert(row):
return f'{row["nr"]}. {row["KnNamn"]}: {row["rel"]*100:.2} %'
values = df.apply(convert, axis=1).values
# --- columns ---
ROWS = 20
#ROWS = 25
columns = []
for idx in range(0, len(values), ROWS):
columns.append(values[idx:idx+ROWS])
columns_widths = [0.3] * len(columns)
columns_labels = [f'Col{i}' for i in range(1, len(columns)+1)]
rows = list(itertools.zip_longest(*columns, fillvalue=''))
# --- plot ---
tab = table(ax,
cellText=rows,
#rowLabels=row_lab,
colLabels=columns_labels,
colWidths=columns_widths,
loc=-100,
cellLoc='left')
plt.show()
Result:

Related

Rearrange numpy array by indices on multiple axes

I was wondering how to go about reordering numpy arrays on different axes and with two shared dimensions. For instance, say I have the following (truncated) array of shape (3, 57, 2):
A = array([[[0.93191024, 0.15612787],
[0.71642773, 0.56644261],
[0.56412285, 0.53022111],
[0.08851486, 0.16253565],
[0.34093212, 0.41074817],
[0.02038229, 0.03693136],
[0.09371394, 0.06565686],
[0.93267282, 0.21915236],
[0.96060752, 0.07309468],
[0.82970486, 0.59032961],
[0.3997659 , 0.51113411],
[0.9933917 , 0.1476475 ],
[0.45059107, 0.98992075],
[0.64420259, 0.99047411],
[0.11300111, 0.15220392],
[0.28461117, 0.63130115],
[0.31121419, 0.62935332],
[0.05057692, 0.22276605],
[0.44223485, 0.47280298],
[0.45543663, 0.53295429],
[0.41775544, 0.90341879],
[0.80630092, 0.42294245],
[0.82215799, 0.78450402],
[0.53719004, 0.56694119],
[0.47927382, 0.55033868],
[0.51007433, 0.27610559],
[0.33946435, 0.97437761],
[0.97263574, 0.07449039],
[0.840479 , 0.84473952],
[0.65787646, 0.91151828],
[0.93803356, 0.35804058],
[0.93322294, 0.38717009],
[0.4193876 , 0.54307245],
[0.9016539 , 0.31755714],
[0.66398876, 0.40193374],
[0.65885493, 0.83359592],
[0.32875318, 0.23673417],
[0.87984555, 0.39703699],
[0.69733635, 0.59550783],
[0.81226642, 0.05097729],
[0.12092097, 0.45202225],
[0.17937865, 0.50205434],
[0.32294824, 0.7195445 ],
[0.78753686, 0.69291673],
[0.24804844, 0.30187789],
[0.0446775 , 0.81477633],
[0.505237 , 0.29917803],
[0.67580571, 0.95910518],
[0.08254148, 0.74803897],
[0.43566526, 0.31926868],
[0.4359975 , 0.2017858 ],
[0.19281317, 0.31834615],
[0.89746816, 0.05457466],
[0.3012639 , 0.24771521],
[0.60127988, 0.2070745 ],
[0.01068053, 0.15385391],
[0.36193366, 0.80346892]],
[[0.84691843, 0.64659873],
[0.4138493 , 0.65155583],
[0.1952568 , 0.79107071],
[0.44328237, 0.96547454],
[0.25811086, 0.63944066],
[0.85353904, 0.24630087],
[0.02200961, 0.43702606],
[0.51187364, 0.21604095],
[0.80455184, 0.72895394],
[0.56534909, 0.9832036 ],
[0.56615037, 0.41387091],
[0.15117167, 0.96842998],
[0.45100418, 0.33455142],
[0.76995385, 0.80203815],
[0.09164816, 0.08733553],
[0.0885783 , 0.03341749],
[0.56167238, 0.18389973],
[0.66844602, 0.84802942],
[0.52667165, 0.82921114],
[0.3668573 , 0.79127918],
[0.83358175, 0.2357053 ],
[0.99023907, 0.72416636],
[0.04969955, 0.16445317],
[0.83618043, 0.61106043],
[0.44079159, 0.53960843],
[0.28535309, 0.57542243],
[0.02355444, 0.96444916],
[0.99996202, 0.22699034],
[0.35725371, 0.3284021 ],
[0.24784074, 0.10957504],
[0.68893807, 0.33428212],
[0.19549847, 0.53242997],
[0.37411674, 0.10671197],
[0.38613786, 0.71247659],
[0.58098534, 0.46313774],
[0.94364596, 0.74239641],
[0.94446665, 0.80260609],
[0.15405246, 0.20687026],
[0.06607917, 0.39675446],
[0.49393799, 0.98976068],
[0.90401917, 0.21254029],
[0.39854018, 0.51468104],
[0.57275152, 0.23703318],
[0.28854573, 0.61574602],
[0.33767913, 0.71972114],
[0.29835304, 0.21042103],
[0.53764989, 0.10600488],
[0.93471516, 0.22275683],
[0.46161675, 0.71566021],
[0.60435755, 0.29129393],
[0.93588236, 0.45631744],
[0.68130624, 0.14505727],
[0.26229578, 0.16338432],
[0.61311318, 0.07904725],
[0.42231007, 0.40815435],
[0.18240378, 0.18156717],
[0.32826971, 0.51764318]],
[[0.61310538, 0.70781868],
[0.81069552, 0.40163557],
[0.41225382, 0.55698844],
[0.79904472, 0.57460263],
[0.15534107, 0.14216233],
[0.07721588, 0.00293088],
[0.75088213, 0.15521204],
[0.0505958 , 0.70318609],
[0.40682637, 0.77522382],
[0.65678125, 0.42817843],
[0.98454818, 0.0959132 ],
[0.04875253, 0.33109192],
[0.96330185, 0.01330943],
[0.06014761, 0.90105887],
[0.88935171, 0.82924489],
[0.68668051, 0.92581709],
[0.64966411, 0.33036089],
[0.92391759, 0.13665613],
[0.78596527, 0.01177382],
[0.36857492, 0.97854483],
[0.11493051, 0.63332842],
[0.89560528, 0.24904867],
[0.19925101, 0.78383848],
[0.16796212, 0.86980891],
[0.64345971, 0.87019182],
[0.98074058, 0.66858333],
[0.89239524, 0.68869392],
[0.05101222, 0.06246318],
[0.19412597, 0.58740619],
[0.47544267, 0.66208743],
[0.30888323, 0.03953938],
[0.39305549, 0.24437666],
[0.34944342, 0.94852464],
[0.93839378, 0.30905846],
[0.28667766, 0.18239694],
[0.93015124, 0.23186617],
[0.41991688, 0.6963337 ],
[0.73953573, 0.99268293],
[0.13625265, 0.40126139],
[0.5658131 , 0.81199939],
[0.34594279, 0.90360269],
[0.95083541, 0.97844242],
[0.13066022, 0.16431073],
[0.31765483, 0.41965885],
[0.99760275, 0.77221808],
[0.64802972, 0.71946862],
[0.35160349, 0.64195416],
[0.03926753, 0.27586446],
[0.98373212, 0.55210807],
[0.76304081, 0.62046984],
[0.75019444, 0.4965763 ],
[0.99821063, 0.92457013],
[0.8526248 , 0.0722389 ],
[0.01161104, 0.36656463],
[0.54781289, 0.32044447],
[0.68337198, 0.20499322],
[0.41070141, 0.72194802]]])
And I want to rearrange the elements at dimensions 0 and 1 with an ndarray representing indices of shape (3, 57), i.e. for each row I want to rearrange the 57 entries according to each index list of 57:
B = array([[12, 1, 9, 10, 5, 2, 7, 8, 6, 16, 4, 0, 11, 14, 13, 22,
21, 29, 3, 15, 28, 27, 18, 23, 34, 20, 33, 31, 25, 35, 26, 24,
32, 30, 17, 19, 40, 36, 39, 41, 47, 43, 37, 49, 44, 46, 45, 38,
42, 48, 50, 53, 52, 54, 51, 55, 56],
[46, 42, 55, 52, 31, 24, 43, 48, 44, 49, 35, 51, 33, 53, 16, 41,
10, 4, 7, 13, 22, 8, 40, 15, 5, 12, 1, 9, 25, 3, 30, 26,
6, 0, 34, 14, 32, 2, 28, 27, 39, 11, 17, 21, 20, 19, 23, 18,
45, 36, 50, 38, 37, 29, 47, 54, 56],
[ 4, 1, 14, 12, 6, 8, 0, 11, 13, 7, 2, 3, 9, 5, 16, 10,
27, 28, 20, 22, 32, 25, 15, 37, 17, 34, 24, 18, 19, 43, 39, 26,
23, 29, 30, 47, 45, 21, 31, 33, 35, 38, 44, 46, 40, 50, 36, 48,
49, 51, 41, 42, 52, 53, 54, 55, 56]])
How would I go about transforming array A by indices array B?
If the first dimension is 3 you can use:
np.array([A[i, B[i]] for i in range(len(A))])
#5.13 µs ± 14.7 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
Otherwise, for longer arrays, you should look for a more performing solution.

Matplotlib scatter plot with array of y values for each x

This is in the similar vein as Python Scatter Plot with Multiple Y values for each X ; that is, I have data which is:
data = [
[1, [15, 16, 17, 18, 19, 20]],
[2, [21, 22, 23, 24, 25, 26]],
[3, [27, 28, 29, 30, 31, 32]],
]
... so first column (0) is x-coordinates, and second column (1) contains arrays of y values corresponding to the single x coordinate. And, I want to plot this as a scatter plot, and the best I could do is this (code below):
Same as in the linked post, I've had to use three ax.scatter plots, and hence we have three colours, one for each column.
So my question is:
Can I issue a single ax.scatter command to get a plot like the above (but with single color/marker) from the data I have (instead of having to issue three commands)?
Alternatively, can I somehow transform the data I have, so I to get a plot like the above (but with single color/marker) with a single ax.scatter command?
Here is the code:
#!/usr/bin/env python3
import sys
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
print("versions: Python {} matplotlib {} numpy {}".format(sys.version.replace('\n', ''), matplotlib.__version__, np.version.version))
data = [
[1, [15, 16, 17, 18, 19, 20]],
[2, [21, 22, 23, 24, 25, 26]],
[3, [27, 28, 29, 30, 31, 32]],
]
ndata = np.asarray(data, dtype=object)
fig = plt.figure()
# Null formatter
ax = fig.add_subplot(1, 1, 1)
print()
print(ndata[1])
print(ndata[:,0].astype(float))
print(ndata[:,1])
datay_2D = np.stack(ndata[:,1], axis=0) # convert numpy array of lists to numpy 2D array
print()
print(datay_2D[:,0])
print(datay_2D[0])
print([ndata[:,0][0]]*len(datay_2D[0]))
ax.scatter([ndata[:,0][0]]*len(datay_2D[0]), datay_2D[0], marker="x")
ax.scatter([ndata[:,0][1]]*len(datay_2D[1]), datay_2D[1], marker="x")
ax.scatter([ndata[:,0][2]]*len(datay_2D[1]), datay_2D[2], marker="x")
plt.show()
Printout:
versions: Python 3.6.8 (default, Oct 7 2019, 12:59:55) [GCC 8.3.0] matplotlib 2.1.1 numpy 1.13.3
[2 list([21, 22, 23, 24, 25, 26])]
[ 1. 2. 3.]
[list([15, 16, 17, 18, 19, 20]) list([21, 22, 23, 24, 25, 26])
list([27, 28, 29, 30, 31, 32])]
[15 21 27]
[15 16 17 18 19 20]
[1, 1, 1, 1, 1, 1]
I suppose all lists of y values have the same length? In that case
import numpy as np
import matplotlib.pyplot as plt
data = [
[1, [15, 16, 17, 18, 19, 20]],
[2, [21, 22, 23, 24, 25, 26]],
[3, [27, 28, 29, 30, 31, 32]],
]
x, y = zip(*data)
y = np.array(y)
plt.scatter(np.repeat(x, y.shape[1]), y.flat)
plt.show()

Is there a fast way to shuffle numpy image in segments?

I want to write a function that can take small images and return a permutation of them, block-wise.
Basically I want to turn this:
Into this:
There was an excellent answer in Is there a function in Python that shuffle data by data blocks? that helped me write a solution. However for ~50,000 28x28 images this takes a long time to run.
# blocks of 7x7 shuffling
range1 = np.arange(4)
range2 = np.arange(4)
block_size = int(28 / 4)
print([[x[i*block_size:(i+1)*block_size].shape] for i in range1])
for x in x1:
np.random.shuffle(range1)
x[:] = np.block([[x[i*block_size:(i+1)*block_size]] for i in range1])
for a in x:
np.random.shuffle(range2)
a[:] = np.block([a[i*block_size:(i+1)*block_size] for i in range2])
print("x1", time.time() - begin)
begin = time.time()
Here's one approach based on this post -
def randomize_tiles_3D(x1, H, W):
# W,H are width and height of blocks
m,n,p = x1.shape
l1,l2 = n//H,p//W
combs = np.random.rand(m,l1*l2).argsort(axis=1)
r,c = np.unravel_index(combs,(l1,l2))
x1cr = x1.reshape(-1,l1,H,l2,W)
out = x1cr[np.arange(m)[:,None],r,:,c]
return out.reshape(-1,l1,l2,H,W).swapaxes(2,3).reshape(-1,n,p)
Sample run -
In [46]: x1
Out[46]:
array([[[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]],
[[36, 37, 38, 39, 40, 41],
[42, 43, 44, 45, 46, 47],
[48, 49, 50, 51, 52, 53],
[54, 55, 56, 57, 58, 59],
[60, 61, 62, 63, 64, 65],
[66, 67, 68, 69, 70, 71]]])
In [47]: np.random.seed(0)
In [48]: randomize_tiles_3D(x1, H=3, W=3)
Out[48]:
array([[[21, 22, 23, 0, 1, 2],
[27, 28, 29, 6, 7, 8],
[33, 34, 35, 12, 13, 14],
[18, 19, 20, 3, 4, 5],
[24, 25, 26, 9, 10, 11],
[30, 31, 32, 15, 16, 17]],
[[36, 37, 38, 54, 55, 56],
[42, 43, 44, 60, 61, 62],
[48, 49, 50, 66, 67, 68],
[39, 40, 41, 57, 58, 59],
[45, 46, 47, 63, 64, 65],
[51, 52, 53, 69, 70, 71]]])
I already found a solution that runs much faster. I feel silly because I didn't really need a double for loop, just two separate shuffle indexes. Leaving this solution here in case anyone wants to shuffle an image block-wise in numpy.
If anyone comes up with another good solution, let me know.
# blocks of 7x7 shuffling
range1 = np.arange(4)
range2 = np.arange(4)
block_size = int(28 / 4)
for x in x1:
np.random.shuffle(range1)
np.random.shuffle(range2)
x[:] = np.block([[x[i*block_size:(i+1)*block_size]] for i in range1])
x[:] = np.block([x[:,i*block_size:(i+1)*block_size] for i in range2])
It will be more efficient to use numpy.lib.stride_tricks.as_strided to break 2D matrices into blocks.
import numpy as np
img_width, block_width = 12, 3
n = img_width // block_width
a = np.arange(img_width * img_width).reshape(img_width, img_width)
print(a)
blocks = np.lib.stride_tricks.as_strided(a, \
shape=(n, n, block_width, block_width), \
strides=(a.itemsize * np.array([n * block_width ** 2, block_width, n * block_width, 1])))
print(blocks)
blocks = blocks.reshape((n * n, block_width, block_width)) # flatten for better shuffle
np.random.shuffle(blocks)
print(blocks)
blocks = np.lib.stride_tricks.as_strided(blocks, \
shape=(n, block_width, n, block_width), \
strides=(a.itemsize * np.array([n * block_width ** 2, block_width, block_width ** 2, 1])))
shuffled = np.reshape(blocks, (img_width, img_width))
print(shuffled)
Output can be found here: blocks_shuffle_example.ipynb
Document: numpy.lib.stride_tricks.as_strided
Here's one approach:
Assume that the original image has shape (m, n), and each block has shape (w, h).
import numpy as np
# split image into tiles of w*h blocks with shape = ((m * n) / (w * h), w, h)
tiles = np.array([img_pad[x : x+w, y : y+h] for x in range(0, m, w) for y in range(0, n, h)])
np.random.shuffle(tiles)
# merge back to shape = (m, n)
mb, nb = m // w, n // h
res = np.vstack(np.hstack(tiles[i*nb : (i+1)*nb]) for i in range(mb))
Update:
res = np.vstack(np.hstack(tiles[i*nb : (i+1)*nb]) for i in range(mb))
may cause "FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future." while running.
Use
res = np.block([[np.hstack(tiles[i*nb : (i+1)*nb])] for i in range(mb)])
instead and there're no warnings.

How to find all possible daughters in a sequence of numbers stored in dataframe

I have a python dataframe which one of its column such as column1 contains series of numbers. I have to mention that each these numbers are the result of cell mutation so cell with number n deviates to two cells with following numbers: 2*n and 2*n+1. I want to search in this column to find all rows corresponds to daughters of specific number k. I mean the rows which contains all possible {2*k, 2*k+1, 2*(2*k), 2*(2*k+1), ... } in their column1. I don't want to use tree structure, how can I approach the solution ? thanks
The two sequences look like the numbers who's binary expansion starts with 10 and the numbers for which the binary expansion starts with 11.
Both sequences can be found directly:
import math
def f(n=2):
while True:
yield int(n + 2**math.floor(math.log(n,2)))
n += 1
def g(n=2):
while True:
yield int(n + 2 * 2**math.floor(math.log(n,2)))
n += 1
a, b = f(), g()
print [a.next() for i in range(15)]
print [b.next() for i in range(15)]
>>> [4, 5, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 32]
>>> [6, 7, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31, 48]
EDIT:
For an arbitrary starting point, you can do the following, which I think meets your criteria.
import Queue
def f(k):
q = Queue.Queue()
q.put(k)
while not q.empty():
p = q.get()
a, b = 2*p, 2*p+1
q.put(a)
q.put(b)
yield a
yield b
a = f(4)
print [a.next() for i in range(16)]
>>> [8, 9, 16, 17, 18, 19, 32, 33, 34, 35, 36, 37, 38, 39, 64, 65] # ...
a = f(5)
print [a.next() for i in range(16)]
>>> [10, 11, 20, 21, 22, 23, 40, 41, 42, 43, 44, 45, 46, 47, 80, 81] # ...
Checking those sequences against OEIS:
f(2) - Starting 10 - A004754
f(3) - Starting 11 - A004755
f(4) - Starting 100 - A004756
f(5) - Starting 101 - A004756
f(6) - Starting 110 - A004758
f(7) - Starting 111 - A004759
...
Which means you can simply do:
import math
def f(k, n=2):
while True:
yield int(n + (k-1) * 2**math.floor(math.log(n, 2)))
n+=1
for i in range(2,8):
a = f(i)
print i, [a.next() for j in range(16)]
>>> 2 [4, 5, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 32]
>>> 3 [6, 7, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31, 48]
>>> 4 [8, 9, 16, 17, 18, 19, 32, 33, 34, 35, 36, 37, 38, 39, 64]
>>> 5 [10, 11, 20, 21, 22, 23, 40, 41, 42, 43, 44, 45, 46, 47, 80]
>>> 6 [12, 13, 24, 25, 26, 27, 48, 49, 50, 51, 52, 53, 54, 55, 96]
>>> 7 [14, 15, 28, 29, 30, 31, 56, 57, 58, 59, 60, 61, 62, 63, 112]
# ... where the first number is shown for clarity.
Ugly but seems to work alright. What I think you might have needed to know is the newer yield from construction. Used twice in this code. Never thought I would.
from fractions import Fraction
from itertools import count
def daughters(k):
print ('daughters of cell', k)
if k<=0:
return
if k==1:
yield from count(1)
def locateK():
cells = 1
newCells = 2
generation = 1
while True:
generation += 1
previousCells = cells
cells += newCells
newCells *= 2
if k > previousCells and k <= cells :
break
return ( generation, k - previousCells )
parentGeneration, parentCell = locateK()
cells = 1
newCells = 2
generation = 1
while True:
generation += 1
previousCells = cells
if generation > parentGeneration:
if parentCell%2:
firstChildCell=previousCells+int(Fraction(parentCell-1, 2**parentGeneration)*newCells)+1
else:
firstChildCell=previousCells+int(Fraction(parentCell, 2**parentGeneration)*newCells)+1
yield from range(firstChildCell, firstChildCell+int(newCells*Fraction(1,2)))
cells += newCells
newCells *= 2
for n, d in enumerate(daughters(2)):
print (d)
if n > 15:
break
Couple of representative results:
daughters of cell 2
4
5
8
9
10
11
16
17
18
19
20
21
22
23
32
33
34
daughters of cell 3
6
7
12
13
14
15
24
25
26
27
28
29
30
31
48
49
50

Pandas repeated values

Is there a more idiomatic way of doing this in Pandas?
I want to set-up a column that repeats the integers 1 to 48, for an index of length 2000:
df = pd.DataFrame(np.zeros((2000, 1)), columns=['HH'])
h = 1
for i in range(0,2000) :
df.loc[i,'HH'] = h
if h >=48 : h =1
else : h += 1
Here is more direct and faster way:
pd.DataFrame(np.tile(np.arange(1, 49), 2000 // 48 + 1)[:2000], columns=['HH'])
The detailed step:
np.arange(1, 49) creates an array from 1 to 48 (included)
>>> l = np.arange(1, 49)
>>> l
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48])
np.tile(A, N) repeats the array A N times, so in this case you get [1 2 3 ... 48 1 2 3 ... 48 ... 1 2 3 ... 48]. You should repeat the array 2000 // 48 + 1 times in order to get at least 2000 values.
>>> r = np.tile(l, 2000 // 48 + 1)
>>> r
array([ 1, 2, 3, ..., 46, 47, 48])
>>> r.shape # The array is slightly larger than 2000
(2016,)
[:2000] retrieves the 2000 first values from the generated array to create your DataFrame.
>>> d = pd.DataFrame(r[:2000], columns=['HH'])
df = pd.DataFrame({'HH':np.append(np.tile(range(1,49),int(2000/48)), range(1,np.mod(2000,48)+1))})
That is, appending 2 arrays:
(1) np.tile(range(1,49),int(2000/48))
len(np.tile(range(1,49),int(2000/48)))
1968
(2) range(1,np.mod(2000,48)+1)
len(range(1,np.mod(2000,48)+1))
32
And constructing the DataFrame from a corresponding dictionary.

Categories