Say I have the following DataFrame() where I have repeated observations per individual (column id_ind). Hence, first two rows belong the first individual, the third and fourth rows belong to the second individual, and so forth...
import pandas as pd
X = pd.DataFrame.from_dict({'x1_1': {0: -0.1766214634108258, 1: 1.645852185286492, 2: -0.13348860101031038, 3: 1.9681043689968933, 4: -1.7004428240831382, 5: 1.4580091413853749, 6: 0.06504113741068565, 7: -1.2168493676768384, 8: -0.3071304478616376, 9: 0.07121332925591593}, 'x1_2': {0: -2.4207773498298844, 1: -1.0828751040719462, 2: 2.73533787008624, 3: 1.5979611987152071, 4: 0.08835542172064115, 5: 1.2209786277076156, 6: -0.44205979195950784, 7: -0.692872860268244, 8: 0.0375521181289943, 9: 0.4656030062266639}, 'x1_3': {0: -1.548320898226322, 1: 0.8457342014424675, 2: -0.21250514722879738, 3: 0.5292389938329516, 4: -2.593946520223666, 5: -0.6188958526077123, 6: 1.6949245117526974, 7: -1.0271341091035742, 8: 0.637561891142571, 9: -0.7717170035055559}, 'x2_1': {0: 0.3797245517345564, 1: -2.2364391598508835, 2: 0.6205947900678905, 3: 0.6623865847688559, 4: 1.562036259999875, 5: -0.13081282910947759, 6: 0.03914373833251773, 7: -0.995761652421108, 8: 1.0649494418154162, 9: 1.3744782478849122}, 'x2_2': {0: -0.5052556836786106, 1: 1.1464291788297152, 2: -0.5662380273138174, 3: 0.6875729143723538, 4: 0.04653136473130827, 5: -0.012885303852347407, 6: 1.5893672346098884, 7: 0.5464286050059511, 8: -0.10430829457707284, 9: -0.5441755265313813}, 'x2_3': {0: -0.9762973303149007, 1: -0.983731467806563, 2: 1.465827578266328, 3: 0.5325950414202745, 4: -1.4452121324204903, 5: 0.8148816373643869, 6: 0.470791989780882, 7: -0.17951636294180473, 8: 0.7351814781280054, 9: -0.28776723200679066}, 'x3_1': {0: 0.12751822396637064, 1: -0.21926633684030983, 2: 0.15758799357206943, 3: 0.5885412224632464, 4: 0.11916562911189271, 5: -1.6436210334529249, 6: -0.12444368631987467, 7: 1.4618564171802453, 8: 0.6847234328916137, 9: -0.23177118858569187}, 'x3_2': {0: -0.6452955690715819, 1: 1.052094761527654, 2: 0.20190339195326157, 3: 0.6839430295237913, 4: -0.2607691613858866, 5: 0.3315513026670213, 6: 0.015901139336566113, 7: 0.15243420084881903, 8: -0.7604225072161022, 9: -0.4387652927008854}, 'x3_3': {0: -1.067058994377549, 1: 0.8026914180717286, 2: -1.9868531745912268, 3: -0.5057770735303253, 4: -1.6589569342151713, 5: 0.358172252880764, 6: 1.9238983803281329, 7: 2.2518318810978246, 8: -1.2781475121874357, 9: -0.7103081175166167}})
Y = pd.DataFrame.from_dict({'CHOICE': {0: 1.0, 1: 1.0, 2: 2.0, 3: 2.0, 4: 3.0, 5: 2.0, 6: 1.0, 7: 1.0, 8: 2.0, 9: 2.0}})
Z = pd.DataFrame.from_dict({'z1': {0: 2.4196730570917233, 1: 2.4196730570917233, 2: 2.822802255159467, 3: 2.822802255159467, 4: 2.073171091633643, 5: 2.073171091633643, 6: 2.044165101485163, 7: 2.044165101485163, 8: 2.4001241292606275, 9: 2.4001241292606275}, 'z2': {0: 0.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 1.0, 5: 1.0, 6: 1.0, 7: 1.0, 8: 0.0, 9: 0.0}, 'z3': {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 2.0, 5: 2.0, 6: 2.0, 7: 2.0, 8: 3.0, 9: 3.0}})
id = pd.DataFrame.from_dict({'id_choice': {0: 1.0, 1: 2.0, 2: 3.0, 3: 4.0, 4: 5.0, 5: 6.0, 6: 7.0, 7: 8.0, 8: 9.0, 9: 10.0}, 'id_ind': {0: 1.0, 1: 1.0, 2: 2.0, 3: 2.0, 4: 3.0, 5: 3.0, 6: 4.0, 7: 4.0, 8: 5.0, 9: 5.0}} )
# Create a dataframe with all the data
data = pd.concat([id, X, Z, Y], axis=1)
print(data.head(4))
# id_choice id_ind x1_1 x1_2 x1_3 x2_1 x2_2 \
# 0 1.0 1.0 -0.176621 -2.420777 -1.548321 0.379725 -0.505256
# 1 2.0 1.0 1.645852 -1.082875 0.845734 -2.236439 1.146429
# 2 3.0 2.0 -0.133489 2.735338 -0.212505 0.620595 -0.566238
# 3 4.0 2.0 1.968104 1.597961 0.529239 0.662387 0.687573
#
# x2_3 x3_1 x3_2 x3_3 z1 z2 z3 CHOICE
# 0 -0.976297 0.127518 -0.645296 -1.067059 2.419673 0.0 1.0 1.0
# 1 -0.983731 -0.219266 1.052095 0.802691 2.419673 0.0 1.0 1.0
# 2 1.465828 0.157588 0.201903 -1.986853 2.822802 0.0 1.0 2.0
# 3 0.532595 0.588541 0.683943 -0.505777 2.822802 0.0 1.0 2.0
I want to perform two operations.
First, I want to convert the DataFrame data into a dictionary of DataFrame()s where the keys are the number of individuals (in this particular case, numbers ranging from 1.0 to 5.0.). I've done this below as suggested here. Unfortunately, I am getting a dictionary of numpy values and not a dictionary of DataFrame()s.
# Create a dictionary with the data for each individual
data_dict = data.set_index('id_ind').groupby('id_ind').apply(lambda x : x.to_numpy().tolist()).to_dict()
print(data_dict.keys())
# dict_keys([1.0, 2.0, 3.0, 4.0, 5.0])
print(data_dict[1.0])
#[[1.0, -0.1766214634108258, -2.4207773498298844, -1.548320898226322, 0.3797245517345564, -0.5052556836786106, -0.9762973303149007, 0.12751822396637064, -0.6452955690715819, -1.067058994377549, 2.4196730570917233, 0.0, 1.0, 1.0], [2.0, 1.645852185286492, -1.0828751040719462, 0.8457342014424675, -2.2364391598508835, 1.1464291788297152, -0.983731467806563, -0.21926633684030983, 1.052094761527654, 0.8026914180717286, 2.4196730570917233, 0.0, 1.0, 1.0]]
Second, I want to recover the original DataFrame data reversing the previous operation. The naive approach is as follows. However, it is, of course, not producing the expected result.
# Naive approach
res = pd.DataFrame.from_dict(data_dict, orient='index')
print(res)
# 0 1
#1.0 [1.0, -0.1766214634108258, -2.4207773498298844... [2.0, 1.645852185286492, -1.0828751040719462, ...
#2.0 [3.0, -0.13348860101031038, 2.73533787008624, ... [4.0, 1.9681043689968933, 1.5979611987152071, ...
#3.0 [5.0, -1.7004428240831382, 0.08835542172064115... [6.0, 1.4580091413853749, 1.2209786277076156, ...
#4.0 [7.0, 0.06504113741068565, -0.4420597919595078... [8.0, -1.2168493676768384, -0.692872860268244,...
#5.0 [9.0, -0.3071304478616376, 0.0375521181289943,... [10.0, 0.07121332925591593, 0.4656030062266639...
This solution was inspired by #mozway comments.
# Create a dictionary with the data for each individual
data_dict = dict(list(data.groupby('id_ind')))
# Convert the dictionary into a dataframe
res = pd.concat(data_dict, axis=0).reset_index(drop=True)
print(res.head(4))
# id_choice id_ind x1_1 x1_2 x1_3 x2_1 x2_2 \
#0 1.0 1.0 -0.176621 -2.420777 -1.548321 0.379725 -0.505256
#1 2.0 1.0 1.645852 -1.082875 0.845734 -2.236439 1.146429
#2 3.0 2.0 -0.133489 2.735338 -0.212505 0.620595 -0.566238
#3 4.0 2.0 1.968104 1.597961 0.529239 0.662387 0.687573
#
# x2_3 x3_1 x3_2 x3_3 z1 z2 z3 CHOICE
#0 -0.976297 0.127518 -0.645296 -1.067059 2.419673 0.0 1.0 1.0
#1 -0.983731 -0.219266 1.052095 0.802691 2.419673 0.0 1.0 1.0
#2 1.465828 0.157588 0.201903 -1.986853 2.822802 0.0 1.0 2.0
#3 0.532595 0.588541 0.683943 -0.505777 2.822802 0.0 1.0 2.0
I am working on two datasets on churn classification, my problem is as you can see below on the two graph the y-axis are not on the same scale. Bank stops at 0.8 and telco-europa at 1, I would like to force the y-axis to always display 0, 0.2, 0.4, 0.6, 0.8, 1.
I have used the following code:
and my histogram is based on this tutorial: https://www.kaggle.com/pavanraj159/telecom-customer-churn-prediction and the bank dataset is this one https://www.kaggle.com/shrutimechlearn/churn-modelling
import plotly.graph_objs as go#visualization
import plotly.offline as py#visualization
def output_tracer(metric,color, model_performances) :
tracer = go.Bar(x = model_performances["Algorithm"] ,
y = model_performances[metric],
orientation = "v",name = metric ,
marker = dict(line = dict(width =.7),
color = color)
)
return tracer
def output_data(model_performances):
trace1 = output_tracer("1-Precision","#6699FF", model_performances)
trace2 = output_tracer('1-Recall',"red", model_performances)
trace3 = output_tracer('1-F1-score',"#33CC99", model_performances)
trace4 = output_tracer('Accuracy',"lightgrey", model_performances)
trace5 = output_tracer('AUC',"#FFCC99", model_performances)
data = [trace1,trace2,trace3,trace4,trace5]
return data
def output_layout(model):
layout = go.Layout(dict(title = model,
plot_bgcolor = "rgb(243,243,243)",
paper_bgcolor = "rgb(243,243,243)",
xaxis = dict(gridcolor = 'rgb(255, 255, 255)',
title = "",
zerolinewidth=1,
ticklen=5,gridwidth=2),
yaxis = dict(gridcolor = 'rgb(255, 255, 255)',
zerolinewidth=1,ticklen=5,gridwidth=2),
margin = dict(l = 250),
height = 400
)
)
return layout
model = "Bank"
model_performances = report_df_scoring[report_df_scoring.Dataset == model]
fig = go.Figure(data=output_data(model_performances),layout=output_layout(model))
py.iplot(fig)
And here you can fin the dataframe as a dictionary "report_df_scoring" for only the "Bank" dataset
{'Dataset': {0: 'Bank',
1: 'Bank',
2: 'Bank',
3: 'Bank',
4: 'Bank',
5: 'Bank',
6: 'Bank'},
'Algorithm': {0: 'LogisticRegressionNoSMOTE',
1: 'Logistic Regression',
2: 'SVM-linear',
3: 'SVM-rbf',
4: 'xgboost',
5: 'GaussianNB',
6: 'RandomForest'},
'W-Precision': {0: 0.8159638339642141,
1: 0.8229500536388679,
2: 0.8243426658647828,
3: 0.7956512785333915,
4: 0.8288351219512194,
5: 0.8302513223140496,
6: 0.8307514249037228},
'W-Recall': {0: 0.8324,
1: 0.7636,
2: 0.7628,
3: 0.8056,
4: 0.836,
5: 0.8176,
6: 0.8408},
'W-F1-score': {0: 0.810103868755423,
1: 0.7811452562742854,
2: 0.7807117770916884,
3: 0.7997335148514852,
4: 0.831622605929424,
5: 0.7598757585104978,
6: 0.8336474053248425},
'0-Precision': {0: 0.8493518104604381,
1: 0.9187236604455148,
2: 0.9206541490006056,
3: 0.8634596695821186,
4: 0.8834146341463415,
5: 0.8152892561983471,
6: 0.8789473684210526},
'0-Recall': {0: 0.958627648839556,
1: 0.7699293642785066,
2: 0.7669021190716448,
3: 0.8965691220988901,
4: 0.9137235116044399,
5: 0.9954591321897074,
6: 0.9268415741675076},
'0-F1-score': {0: 0.9006873666745674,
1: 0.8377710678012626,
2: 0.8367740159647675,
3: 0.8797029702970298,
4: 0.8983134920634921,
5: 0.8964107223989097,
6: 0.9022593320235756},
'1-Precision': {0: 0.6882129277566539,
1: 0.4564958283671037,
2: 0.4558303886925795,
3: 0.5361990950226244,
4: 0.62,
5: 0.8875,
6: 0.6463414634146342},
'1-Recall': {0: 0.34942084942084944,
1: 0.7393822393822393,
2: 0.747104247104247,
3: 0.4575289575289575,
4: 0.5386100386100386,
5: 0.13706563706563707,
6: 0.5115830115830116},
'1-F1-score': {0: 0.4635083226632522,
1: 0.5644804716285925,
2: 0.5662033650329188,
3: 0.49375,
4: 0.5764462809917356,
5: 0.2374581939799331,
6: 0.5711206896551725},
'Accuracy': {0: 0.8324,
1: 0.7636,
2: 0.7628,
3: 0.8056,
4: 0.836,
5: 0.8176,
6: 0.8408},
'AUC': {0: 0.6540242491302027,
1: 0.754655801830373,
2: 0.7570031830879459,
3: 0.6770490398139237,
4: 0.7261667751072393,
5: 0.5662623846276723,
6: 0.7192122928752596},
'SMOTE': {0: 'No',
1: 'Yes',
2: 'Yes',
3: 'Yes',
4: 'Yes',
5: 'Yes',
6: 'Yes'},
'top3var': {0: "['numofproducts_4', 'numofproducts_3', 'geography_germany']",
1: "['numofproducts_4', 'numofproducts_3', 'geography_germany']",
2: "['numofproducts_4', 'numofproducts_3', 'age']",
3: "['empty']",
4: "['numofproducts_2', 'numofproducts_1', 'isactivemember']",
5: "['empty']",
6: "['age', 'numofproducts_2', 'balance']"}}
You can access and edit the range of any axis of your figure using:
fig['layout']['yaxis']['range']
And set the range like:
fig['layout']['yaxis']['range'] = [0, 1]
The same thing goes for your tickvals:
fig['layout']['yaxis']['tickvals'] = [0, 0.2, 0.4, 0.6, 0.8, 1]
You can use:
fig.update_yaxes(tickvals=[0, 0.2, 0.4, 0.6, 0.8, 1])
Your code example does not work for me because "report_df_scoring" is missing.
Trying to run a decision tree regressor on my data, but whenever I try and run my code, I get this error
ValueError: Number of labels=78177 does not match number of samples=312706
#feature selection
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
target = ['sale_price']
train, test = train_test_split(housing_data, test_size=0.2)
regression_tree = DecisionTreeRegressor(criterion="entropy",random_state=100,
max_depth=4,min_samples_leaf=5)
regression_tree.fit(train,test)
I have added a sample of my code, hopefully this gives you more context to help better understand my question and problem:
{'Age of House at Sale': {0: 6,
1: 2016,
2: 92,
3: 42,
4: 90,
5: 2012,
6: 89,
7: 3,
8: 2015,
9: 104},
'AreaSource': {0: 2.0,
1: 7.0,
2: 2.0,
3: 2.0,
4: 2.0,
5: 2.0,
6: 2.0,
7: 2.0,
8: 2.0,
9: 2.0},
'AssessLand': {0: 9900.0,
1: 1571850.0,
2: 1548000.0,
3: 36532350.0,
4: 2250000.0,
5: 3110400.0,
6: 2448000.0,
7: 1354500.0,
8: 1699200.0,
9: 1282500.0},
'AssessTot': {0: 34380.0,
1: 1571850.0,
2: 25463250.0,
3: 149792400.0,
4: 27166050.0,
5: 5579990.0,
6: 28309500.0,
7: 23965650.0,
8: 3534300.0,
9: 11295000.0},
'BldgArea': {0: 2688.0,
1: 0.0,
2: 304650.0,
3: 2548000.0,
4: 356000.0,
5: 382746.0,
6: 290440.0,
7: 241764.0,
8: 463427.0,
9: 547000.0},
'BldgClass': {0: 72,
1: 89,
2: 80,
3: 157,
4: 150,
5: 44,
6: 92,
7: 43,
8: 39,
9: 61},
'BldgDepth': {0: 50.0,
1: 0.0,
2: 92.0,
3: 0.0,
4: 100.33,
5: 315.0,
6: 125.0,
7: 100.0,
8: 0.0,
9: 80.92},
'BldgFront': {0: 20.0,
1: 0.0,
2: 335.0,
3: 0.0,
4: 202.0,
5: 179.0,
6: 92.0,
7: 500.0,
8: 0.0,
9: 304.0},
'BsmtCode': {0: 5.0,
1: 5.0,
2: 5.0,
3: 5.0,
4: 2.0,
5: 5.0,
6: 2.0,
7: 2.0,
8: 5.0,
9: 5.0},
'CD': {0: 310.0,
1: 302.0,
2: 302.0,
3: 318.0,
4: 302.0,
5: 301.0,
6: 302.0,
7: 301.0,
8: 301.0,
9: 302.0},
'ComArea': {0: 0.0,
1: 0.0,
2: 304650.0,
3: 2548000.0,
4: 30000.0,
5: 11200.0,
6: 290440.0,
7: 27900.0,
8: 4884.0,
9: 547000.0},
'CommFAR': {0: 0.0,
1: 2.0,
2: 2.0,
3: 2.0,
4: 0.0,
5: 0.0,
6: 10.0,
7: 2.0,
8: 0.0,
9: 2.0},
'Council': {0: 41.0,
1: 33.0,
2: 33.0,
3: 46.0,
4: 33.0,
5: 33.0,
6: 33.0,
7: 33.0,
8: 33.0,
9: 35.0},
'Easements': {0: 0.0,
1: 0.0,
2: 0.0,
3: 1.0,
4: 0.0,
5: 0.0,
6: 0.0,
7: 0.0,
8: 0.0,
9: 0.0},
'ExemptLand': {0: 0.0,
1: 1571850.0,
2: 0.0,
3: 0.0,
4: 2250000.0,
5: 0.0,
6: 0.0,
7: 932847.0,
8: 0.0,
9: 0.0},
'ExemptTot': {0: 0.0,
1: 1571850.0,
2: 0.0,
3: 0.0,
4: 27166050.0,
5: 0.0,
6: 11304900.0,
7: 23543997.0,
8: 0.0,
9: 0.0},
'FacilFAR': {0: 0.0,
1: 6.5,
2: 0.0,
3: 0.0,
4: 4.8,
5: 4.8,
6: 10.0,
7: 3.0,
8: 5.0,
9: 4.8},
'FactryArea': {0: 0.0,
1: 0.0,
2: 0.0,
3: 0.0,
4: 0.0,
5: 0.0,
6: 0.0,
7: 0.0,
8: 0.0,
9: 547000.0},
'GarageArea': {0: 0.0,
1: 0.0,
2: 0.0,
3: 1285000.0,
4: 0.0,
5: 0.0,
6: 0.0,
7: 22200.0,
8: 0.0,
9: 0.0},
'HealthArea': {0: 6410.0,
1: 1000.0,
2: 2300.0,
3: 8822.0,
4: 2300.0,
5: 400.0,
6: 2300.0,
7: 700.0,
8: 500.0,
9: 9300.0},
'HealthCent': {0: 35.0,
1: 36.0,
2: 38.0,
3: 35.0,
4: 38.0,
5: 30.0,
6: 38.0,
7: 30.0,
8: 30.0,
9: 36.0},
'IrrLotCode': {0: 1, 1: 1, 2: 0, 3: 0, 4: 1, 5: 1, 6: 0, 7: 1, 8: 0, 9: 0},
'LandUse': {0: 2.0,
1: 10.0,
2: 5.0,
3: 5.0,
4: 8.0,
5: 4.0,
6: 5.0,
7: 3.0,
8: 3.0,
9: 6.0},
'LotArea': {0: 2252.0,
1: 134988.0,
2: 32000.0,
3: 905000.0,
4: 20267.0,
5: 57600.0,
6: 12500.0,
7: 50173.0,
8: 44704.0,
9: 113800.0},
'LotDepth': {0: 100.0,
1: 275.33,
2: 335.92,
3: 859.0,
4: 100.33,
5: 320.0,
6: 125.0,
7: 200.0,
8: 281.86,
9: 204.0},
'LotFront': {0: 24.0,
1: 490.5,
2: 92.42,
3: 930.0,
4: 202.0,
5: 180.0,
6: 100.0,
7: 521.25,
8: 225.08,
9: 569.0},
'LotType': {0: 5.0,
1: 5.0,
2: 3.0,
3: 3.0,
4: 3.0,
5: 3.0,
6: 3.0,
7: 1.0,
8: 5.0,
9: 3.0},
'NumBldgs': {0: 1.0,
1: 0.0,
2: 1.0,
3: 4.0,
4: 1.0,
5: 1.0,
6: 1.0,
7: 1.0,
8: 2.0,
9: 13.0},
'NumFloors': {0: 2.0,
1: 0.0,
2: 13.0,
3: 2.0,
4: 15.0,
5: 0.0,
6: 37.0,
7: 6.0,
8: 20.0,
9: 8.0},
'OfficeArea': {0: 0.0,
1: 0.0,
2: 264750.0,
3: 0.0,
4: 30000.0,
5: 1822.0,
6: 274500.0,
7: 4200.0,
8: 0.0,
9: 0.0},
'OtherArea': {0: 0.0,
1: 0.0,
2: 39900.0,
3: 0.0,
4: 0.0,
5: 0.0,
6: 0.0,
7: 0.0,
8: 0.0,
9: 0.0},
'PolicePrct': {0: 70.0,
1: 84.0,
2: 84.0,
3: 63.0,
4: 84.0,
5: 90.0,
6: 84.0,
7: 94.0,
8: 90.0,
9: 88.0},
'ProxCode': {0: 0.0,
1: 0.0,
2: 0.0,
3: 0.0,
4: 0.0,
5: 0.0,
6: 0.0,
7: 1.0,
8: 0.0,
9: 0.0},
'ResArea': {0: 2172.0,
1: 0.0,
2: 0.0,
3: 0.0,
4: 0.0,
5: 371546.0,
6: 0.0,
7: 213864.0,
8: 458543.0,
9: 0.0},
'ResidFAR': {0: 2.0,
1: 7.2,
2: 0.0,
3: 0.0,
4: 2.43,
5: 2.43,
6: 10.0,
7: 3.0,
8: 5.0,
9: 0.0},
'RetailArea': {0: 0.0,
1: 0.0,
2: 0.0,
3: 1263000.0,
4: 0.0,
5: 9378.0,
6: 15940.0,
7: 0.0,
8: 4884.0,
9: 0.0},
'SHAPE_Area': {0: 2316.8863224,
1: 140131.577176,
2: 34656.4472405,
3: 797554.847834,
4: 21360.1476315,
5: 58564.8643115,
6: 12947.145471,
7: 50772.624868800005,
8: 47019.5677861,
9: 118754.78573699998},
'SHAPE_Leng': {0: 249.41135038849998,
1: 1559.88914353,
2: 890.718521021,
3: 3729.78685686,
4: 620.761169374,
5: 1006.33799946,
6: 460.03168012300006,
7: 1385.27352839,
8: 992.915660585,
9: 1565.91477261},
'SanitDistr': {0: 10.0,
1: 2.0,
2: 2.0,
3: 18.0,
4: 2.0,
5: 1.0,
6: 2.0,
7: 1.0,
8: 1.0,
9: 2.0},
'SanitSub': {0: 21,
1: 23,
2: 31,
3: 22,
4: 31,
5: 21,
6: 23,
7: 7,
8: 12,
9: 22},
'SchoolDist': {0: 19.0,
1: 13.0,
2: 13.0,
3: 22.0,
4: 13.0,
5: 14.0,
6: 13.0,
7: 14.0,
8: 14.0,
9: 14.0},
'SplitZone': {0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 0, 9: 1},
'StrgeArea': {0: 0.0,
1: 0.0,
2: 0.0,
3: 0.0,
4: 0.0,
5: 0.0,
6: 0.0,
7: 1500.0,
8: 0.0,
9: 0.0},
'UnitsRes': {0: 2.0,
1: 0.0,
2: 0.0,
3: 0.0,
4: 0.0,
5: 522.0,
6: 0.0,
7: 234.0,
8: 470.0,
9: 0.0},
'UnitsTotal': {0: 2.0,
1: 0.0,
2: 0.0,
3: 123.0,
4: 1.0,
5: 525.0,
6: 102.0,
7: 237.0,
8: 472.0,
9: 1.0},
'YearAlter1': {0: 0.0,
1: 0.0,
2: 1980.0,
3: 0.0,
4: 1998.0,
5: 0.0,
6: 2009.0,
7: 2012.0,
8: 0.0,
9: 0.0},
'YearAlter2': {0: 0.0,
1: 0.0,
2: 0.0,
3: 0.0,
4: 2000.0,
5: 0.0,
6: 0.0,
7: 0.0,
8: 0.0,
9: 0.0},
'ZipCode': {0: 11220.0,
1: 11201.0,
2: 11201.0,
3: 11234.0,
4: 11201.0,
5: 11249.0,
6: 11241.0,
7: 11211.0,
8: 11249.0,
9: 11205.0},
'ZoneDist1': {0: 24,
1: 76,
2: 5,
3: 64,
4: 24,
5: 24,
6: 30,
7: 74,
8: 45,
9: 27},
'ZoneMap': {0: 3,
1: 19,
2: 19,
3: 22,
4: 19,
5: 19,
6: 19,
7: 2,
8: 19,
9: 19},
'building_class': {0: 141,
1: 97,
2: 87,
3: 176,
4: 168,
5: 8,
6: 102,
7: 46,
8: 97,
9: 66},
'building_class_at_sale': {0: 143,
1: 98,
2: 89,
3: 179,
4: 171,
5: 7,
6: 103,
7: 49,
8: 98,
9: 69},
'building_class_category': {0: 39,
1: 71,
2: 31,
3: 38,
4: 86,
5: 40,
6: 80,
7: 75,
8: 71,
9: 41},
'commercial_units': {0: 1,
1: 0,
2: 0,
3: 123,
4: 1,
5: 0,
6: 102,
7: 3,
8: 0,
9: 1},
'gross_sqft': {0: 0.0,
1: 0.0,
2: 304650.0,
3: 2548000.0,
4: 356000.0,
5: 0.0,
6: 290440.0,
7: 241764.0,
8: 0.0,
9: 547000.0},
'land_sqft': {0: 0.0,
1: 134988.0,
2: 32000.0,
3: 905000.0,
4: 20267.0,
5: 57600.0,
6: 12500.0,
7: 50173.0,
8: 44704.0,
9: 113800.0},
'neighborhood': {0: 43,
1: 48,
2: 6,
3: 44,
4: 6,
5: 40,
6: 6,
7: 28,
8: 40,
9: 56},
'residential_units': {0: 0,
1: 0,
2: 0,
3: 0,
4: 0,
5: 0,
6: 0,
7: 234,
8: 0,
9: 0},
'sale_date': {0: 2257,
1: 4839,
2: 337,
3: 638,
4: 27,
5: 1458,
6: 2450,
7: 3276,
8: 5082,
9: 1835},
'sale_price': {0: 499401179.0,
1: 345000000.0,
2: 340000000.0,
3: 276947000.0,
4: 202500000.0,
5: 185445000.0,
6: 171000000.0,
7: 169000000.0,
8: 165000000.0,
9: 161000000.0},
'tax_class': {0: 3, 1: 3, 2: 3, 3: 3, 4: 3, 5: 3, 6: 3, 7: 7, 8: 3, 9: 3},
'total_units': {0: 1,
1: 0,
2: 0,
3: 123,
4: 1,
5: 0,
6: 102,
7: 237,
8: 0,
9: 1},
'zip_code': {0: 11201,
1: 11201,
2: 11201,
3: 11234,
4: 11201,
5: 11249,
6: 11241,
7: 11211,
8: 11249,
9: 11205}}