Sklearn - Found input variables with inconsistent numbers of samples: [16512, 4128] - python

Going through the second chapter of Hands-On Machine Learning with Scikit-Learn & TensorFlow, running to the error stated above. This happens when I try to implement the following line:
linReg.fit(housingPrepared, housing_labels)
Researching online it looks like it has to do something with the dimensions of my features and my labels not matching up. Printing the shape of housingPrepared (X) and housing_labels (Y) yields the following result:
(16512, 16) (4128,)
I've spent the last hour going through line by line to see if I missed a line in this chapter, can't find anything. Wondering if someone here might have an intuition on where a potential solution for this problem could be.
Thank you so much in advance. All my code up to the problem line is posted below:
import os
import tarfile
from six.moves import urllib
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from zlib import crc32
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from pandas.plotting import scatter_matrix
from sklearn.preprocessing import Imputer, OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline, FeatureUnion
from CategoricalEncoder import CategoricalEncoder
from sklearn.linear_model import LinearRegression
from sklearn.utils.validation import check_array
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor
DOWNLOAD_ROOT = "https://raw.githubusercontent.com/ageron/handson-ml/master/"
HOUSING_PATH = os.path.join("datasets","housing")
HOUSING_URL = DOWNLOAD_ROOT + "datasets/housing/housing.tgz"
def fetchHousingData(housingUrl=HOUSING_URL, housingPath=HOUSING_PATH):
if not os.path.isdir(housingPath):
os.makedirs(housingPath)
tgzPath = os.path.join(housingPath, "housing.tgz")
urllib.request.urlretrieve(housingUrl, tgzPath)
housingTgz = tarfile.open(tgzPath)
housingTgz.extractall(path=housingPath)
housingTgz.close()
def loadHousingData(housingPath=HOUSING_PATH):
return pd.read_csv("https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.csv")
housing = loadHousingData()
#plt.hist(housing['longitude'],bins=50)
#plt.show()
def splitTrainTesT(data, testRatio):
shuffled_indices = np.random.permutation(len(data))
testSetSize = int(len(data)* testRatio)
testIndices = shuffled_indices[:testSetSize]
trainIndices = shuffled_indices[testSetSize:]
return data.iloc[trainIndices], data.iloc[testIndices]
def testSetCheck(identifier, testRatio):
return crc32(np.int64(identifier)) & 0xffffffff < testRatio * 2 ** 32
def splitTrainTestByID(data, testRatio, idColumn):
ids = data[idColumn]
inTestSet = ids.apply(lambda id_: testSetCheck(id_, testRatio))
return data.loc[~inTestSet], data.loc[inTestSet]
#housingWithID = housing.reset_index()
#trainSet, testSet = splitTrainTestByID(housingWithID,0.2,"index")
trainSet, testSet = train_test_split(housing,test_size=0.2,random_state=42)
housing["income_cat"] = np.ceil(housing["median_income"]/1.5)
housing["income_cat"].where(housing["income_cat"] < 5, 5.0, inplace=True)
#plt.hist(housing["income_cat"])
#plt.show()
split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for trainIndex, testIndex in split.split(housing, housing["income_cat"]):
stratTrainSet = housing.loc[trainIndex]
stratTestSet = housing.loc[testIndex]
for set in (stratTrainSet, stratTestSet):
set.drop("income_cat", axis=1, inplace=True)
housing = stratTrainSet.copy()
#print(housing)
#plt.scatter(x=housing["latitude"],y=housing["longitude"], alpha=0.4)
#plt.show()
corr_matrix = housing.corr()
#print(corr_matrix["median_house_value"].sort_values(ascending=False))
#attribues = ["median_house_value", "median_income", "total_rooms", "housing_median_age"]
#scatter_matrix(housing[attribues], figsize=(12,8))
#plt.show()
""" PREPARING DATA FOR MACHINE LEARNING ALGORITHMS"""
housing = stratTrainSet.drop("median_house_value", axis=1)
housing_labels = stratTestSet["median_house_value"].copy()
housing.dropna(subset=["total_bedrooms"])
imputer = Imputer(strategy="median")
housingNum = housing.drop("ocean_proximity", axis=1)
imputer.fit(housingNum)
X = imputer.transform(housingNum)
housingTr = pd.DataFrame(X, columns=housingNum.columns)
housingCat = housing["ocean_proximity"]
housingCatEncoded, housingCategories = housingCat.factorize()
encoder = OneHotEncoder()
housingCat1Hot = encoder.fit_transform(housingCatEncoded.reshape(-1,1))
"""Custom Transformers For Rooms Per Household, etc"""
roomsIX, bedroomsIX, populationIX, householdsIX = 3,4,5,6
class CombinedAttributesAdder(BaseEstimator, TransformerMixin):
def __init__(self, addBedroomsPerRoom = True):
self.addBedroomsPerRoom = addBedroomsPerRoom
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
roomsPerHousehold = X[:,roomsIX]/X[:,householdsIX]
populationPerHousehold = X[:,populationIX]/X[:,householdsIX]
if self.addBedroomsPerRoom:
bedroomsPerRoom = X[:,bedroomsIX]/X[:,roomsIX]
return np.c_[X, roomsPerHousehold, populationPerHousehold, bedroomsPerRoom]
else:
return np.c_[X, roomsPerHousehold, populationPerHousehold]
attrAdder = CombinedAttributesAdder(addBedroomsPerRoom=False)
housingExtraAttribs = attrAdder.transform(housing.values)
numPipeline = Pipeline([('imputer', Imputer(strategy='median')),
('attribs_adder', CombinedAttributesAdder()),
('std_scaler', StandardScaler()),
])
housingNumTr = numPipeline.fit_transform(housingNum)
class DataFrameSelector(BaseEstimator, TransformerMixin):
def __init__(self, attributeNames):
self.attributeNames = attributeNames
def fit(self, X, y=None):
return self
def transform(self, X):
return X[self.attributeNames].values
numAttribs = list(housingNum)
catAttribs = ["ocean_proximity"]
numPipeline = Pipeline([('selector', DataFrameSelector(numAttribs)),
('imputer', Imputer(strategy='median')),
('attribs_adder', CombinedAttributesAdder()),
('std_scaler', StandardScaler()),])
"""UPDATE SKLEARN TO INCLUDE CATEGORICAL ENCODER LIBRARY"""
catPipeline = Pipeline([('selector', DataFrameSelector(catAttribs)),
('cat_encoder', CategoricalEncoder(encoding='onehot-dense')),
])
fullPipeline = FeatureUnion(transformer_list=[("num_pipeline", numPipeline), ("cat_pipeline", catPipeline),])
housingPrepared = fullPipeline.fit_transform(housing)
linReg = LinearRegression()
print(housingPrepared.shape, housing_labels.shape)
linReg.fit(housingPrepared, housing_labels)

I believe the problem is in these two lines:
housing = stratTrainSet.drop("median_house_value", axis=1)
housing_labels = stratTestSet["median_house_value"].copy()
Change it to:
housing = stratTrainSet.drop("median_house_value", axis=1)
housing_labels = stratTrainSet["median_house_value"].copy()
and you're good to go.

Related

python eli5 show_weights() show nothing

In my program,
from sklearn import linear_model
from keras.wrappers.scikit_learn import KerasClassifier,KerasRegressor
import eli5
from eli5.sklearn import PermutationImportance
if __name__ == '__main__':
(cnn_tube_par_X_train, cnn_tube_par_X_test, cnn_tube_Y_train, cnn_tube_Y_test) = read_file()
sc_X = StandardScaler()
sc_Y = StandardScaler()
sc_cnn_tube_par_X_train = sc_X.fit_transform(cnn_tube_par_X_train.iloc[:, 1:6].values)
sc_cnn_tube_par_X_test = sc_X.transform(cnn_tube_par_X_test.iloc[:, 1:6].values)
sc_cnn_tube_eff_Y_train = sc_Y.fit_transform(cnn_tube_Y_train.iloc[:, -1:].values)
sc_cnn_tube_eff_Y_test = sc_Y.transform(cnn_tube_Y_test.iloc[:, -1:].values)
MLR_pImportance(sc_cnn_tube_par_X_train,sc_cnn_tube_par_X_test,sc_cnn_tube_eff_Y_train,sc_cnn_tube_eff_Y_test)
def MLR_pImportance(sc_mlr_tube_par_X_train,sc_mlr_tube_par_X_test,sc_mlr_tube_eff_Y_train,sc_mlr_tube_eff_Y_test):
mlr = linear_model.LinearRegression()
mlr.fit(sc_mlr_tube_par_X_train,sc_mlr_tube_eff_Y_train)
perm = PermutationImportance(mlr,random_state=1).fit(sc_mlr_tube_par_X_test,sc_mlr_tube_eff_Y_test)
print(perm.feature_importances_)
print(perm.feature_importances_std_)
eli5.show_weights(perm)
The results show that :
[0.63895352 0.1270582 0.06904505 0.32131836 0.02549574]
[0.02766096 0.01535046 0.01789114 0.02761288 0.01048179]
these are the result of
print(perm.feature_importances_)
print(perm.feature_importances_std_)
but the sebtance:
eli5.show_weights(perm)
show nothing
could you tell the reason,and how to solve it

Upto which part i need to dump my K-mean Clustering Model

Upto which part i need to dump my K-mean Clustering Model,So that the input will be given in recommend songs and the output will be display
This is my below ml program
I used K-means clustering for cluster both genres & songs for music recommendation
import os
import numpy as np
import pandas as pd
import json
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.manifold import TSNE
import sys
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
from collections import defaultdict
import sklearn
from sklearn.decomposition import PCA
from sklearn.metrics import euclidean_distances
from scipy.spatial.distance import cdist
from sklearn.decomposition import PCA
import warnings
warnings.filterwarnings("ignore")
from collections import defaultdict
from sklearn.metrics import euclidean_distances
from scipy.spatial.distance import cdist
import difflib
data = pd.read_csv('./data.csv')
genre_data = pd.read_csv("./data_w_genres.csv")
year_data = pd.read_csv("./data_by_year.csv")
artist_data = pd.read_csv("./data_by_artist.csv")
sklearn.cluster.KMeans(n_clusters=8, init='k-means++', n_init=10, max_iter=300, tol=0.0001, verbose=0, random_state=None,copy_x=True, algorithm='auto')
cluster_pipeline = Pipeline([('scaler', StandardScaler()), ('kmeans', KMeans(n_clusters=10))])
X = genre_data.select_dtypes(np.number)
cluster_pipeline.fit(X)
genre_data['cluster'] = cluster_pipeline.predict(X)
tsne_pipeline = Pipeline([('scaler', StandardScaler()), ('tsne', TSNE(n_components=2, verbose=1))])
genre_embedding = tsne_pipeline.fit_transform(X)
projection = pd.DataFrame(columns=['x', 'y'], data=genre_embedding)
projection['genres'] = genre_data['genres']
projection['cluster'] = genre_data['cluster']
song_cluster_pipeline = Pipeline([('scaler', StandardScaler()),
('kmeans', KMeans(n_clusters=20,
verbose=False))
], verbose=False)
X = data.select_dtypes(np.number)
number_cols = list(X.columns)
song_cluster_pipeline.fit(X)
song_cluster_labels = song_cluster_pipeline.predict(X)
data['cluster_label'] = song_cluster_labels
pca_pipeline = Pipeline([('scaler', StandardScaler()), ('PCA', PCA(n_components=2))])
song_embedding = pca_pipeline.fit_transform(X)
projection = pd.DataFrame(columns=['x', 'y'], data=song_embedding)
projection['title'] = data['name']
projection['cluster'] = data['cluster_label']
os.environ['SPOTIPY_CLIENT_ID'] = 'd267a64373c94780b083db4b58cb5c76'
os.environ['SPOTIPY_CLIENT_SECRET'] = '8244aa724749468a8314c7305ea9db44'
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=os.environ["SPOTIPY_CLIENT_ID"],
client_secret=os.environ["SPOTIPY_CLIENT_SECRET"]))
def find_song(name, year):
song_data = defaultdict()
results = sp.search(q= 'track: {} year: {}'.format(name,year), limit=1)
if results['tracks']['items'] == []:
return None
results = results['tracks']['items'][0]
track_id = results['id']
audio_features = sp.audio_features(track_id)[0]
song_data['name'] = [name]
song_data['year'] = [year]
song_data['explicit'] = [int(results['explicit'])]
song_data['duration_ms'] = [results['duration_ms']]
song_data['popularity'] = [results['popularity']]
for key, value in audio_features.items():
song_data[key] = value
return pd.DataFrame(song_data)
number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit','instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
def get_song_data(song, spotify_data):
try:
song_data = spotify_data[(spotify_data['name'] == song['name'])
& (spotify_data['year'] == song['year'])].iloc[0]
return song_data
except IndexError:
return find_song(song['name'], song['year'])
def get_mean_vector(song_list, spotify_data):
song_vectors = []
for song in song_list:
song_data = get_song_data(song, spotify_data)
if song_data is None:
print('Warning: {} does not exist in Spotify or in database'.format(song['name']))
continue
song_vector = song_data[number_cols].values
song_vectors.append(song_vector)
song_matrix = np.array(list(song_vectors))
return np.mean(song_matrix, axis=0)
def flatten_dict_list(dict_list):
flattened_dict = defaultdict()
for key in dict_list[0].keys():
flattened_dict[key] = []
for dictionary in dict_list:
for key, value in dictionary.items():
flattened_dict[key].append(value)
return flattened_dict
def recommend_songs(song_list, data, n_songs=30):
metadata_cols = ['name', 'year', 'artists']
song_dict = flatten_dict_list(song_list)
song_center = get_mean_vector(song_list, data)
scaler = song_cluster_pipeline.steps[0][1]
scaled_data = scaler.transform(data[number_cols])
scaled_song_center = scaler.transform(song_center.reshape(1, -1))
distances = cdist(scaled_song_center, scaled_data, 'cosine')
index = list(np.argsort(distances)[:, :n_songs][0])
rec_songs = data.iloc[index]
rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
dictionary = rec_songs[metadata_cols].to_dict(orient='records')
json_ob = json.dumps(dictionary,index = 2);
with open("output.json","w") as outfile:
outfile.write(json_ob);
recommend_songs([{'name': 'Come As You Are', 'year':1991},
{'name': 'Smells Like Teen Spirit', 'year': 1991},
{'name': 'Lithium', 'year': 1992},
{'name': 'All Apologies', 'year': 1993},
{'name': 'Stay Away', 'year': 1993}], data)
I tried joblib but i don't know which part must be pickled so only input must be given

Machine Learning Web Application error in Flask

I am a newbie in machine learning model deployment and have been trying to deploy a simple machine learning model on car price prediction in Flask and Heroku. Have made the model using sklearn pipeline and transformer. The code runs perfectly on the Jupyter Notebook.
Deploying it in Heroku through Github, shows Build succeeded. However on launching the application, its shows application error.
It seems there something is wrong with the app.py file code for Flask. Any help or insight would be really helpful. Thank You
Jupyter Notebook Code:
import numpy as np
import pandas as pd
import matplotlib as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
data = pd.read_csv('car data.csv')
data['current_year'] = 2020
data['car_age'] = data['current_year'] - data['Year']
num_features = [col for col in data.columns if data[col].dtype != 'O' and col != 'Selling_Price']
cat_features = [col for col in data.columns if data[col].dtype == 'O']
X= data.drop(['Selling_Price'], axis=1)
y = data['Selling_Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
num_transformer = Pipeline(steps = [('scaler', StandardScaler())])
cat_transformer = Pipeline(steps =[('OneHot', OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer(transformers = [('numerical_transformer', num_transformer, num_features),
('categorical_transformer', cat_transformer, cat_features)])
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor()
grid_param = {'n_estimators' :[100, 200, 500, 800, 1000]}
grid = GridSearchCV(estimator = rf, param_grid= grid_param)
model = Pipeline(steps= [('preprocessor',preprocessor), ('grid_regressor', grid)])
model.fit(X_train, y_train)
y_predict = model.predict(X_test)
from sklearn.metrics import mean_absolute_error, mean_squared_error
MAE = mean_absolute_error(y_test, y_predict)
MSE = mean_squared_error(y_test, y_predict)
RMSE = np.sqrt(MSE)
import pickle
file = open('regression_model.pkl', 'wb')
pickle.dump(model, file)
Code for the app.py file for Flask:
from flask import Flask, render_template, request
import jsonify
import pickle
import numpy as np
import sklearn
model = pickle.load(open('regression_model.pkl', 'rb'))
app = Flask(__name__)
#app.route('/', methods=['GET'])
def Home():
return render_template('index.html')
#app.route("/predict", methods= ['POST']
def predict():
if request.method == 'POST':
Year = int(request.form['Year'])
Selling_Price = float(request.form['Selling_Price'])
Present_Price = float(request.form['Present_Price'])
Kms_Driven = int(request.form['Kms_Driven'])
Owner = int(request.form['Owner'])
car_age = 2020 - Year
Fuel_Type_Petrol = request.form['Fuel_Type_Petrol']
if(Fuel_Type_Petrol=='Petrol'):
Fuel_Type_Petrol=1
Fuel_Type_Diesel=0
elif (Fuel_Type_Petrol =='Diesel'):
Fuel_Type_Petrol=0
Fuel_Type_Diesel=1
else:
Fuel_Type_Petrol=0
Fuel_Type_Diesel=0
Seller_Type_Individual = request.form['Seller_Type_Individual']
if(Seller_Type_Individual =='Individual'):
Seller_Type_Individual=1
else:
Seller_Type_Individual=0
Transmission_Mannual=request.form['Transmission_Mannual']
if(Transmission_Mannual=='Mannual'):
Transmission_Mannual=1
else:
Transmission_Mannual=0
data = [Selling_Price, Present_Price, Kms_Driven, Owner, Fuel_Type_Petrol, Fuel_Type_Diesel, Seller_Type_Individual, Transmission_Mannual, car_age]
prediction = model.predict([data])
output = round(prediction[0], 2)
if output <0:
return render_template('index.html', prediction_texts="Sorry you cannot sell this car")
else:
return render_template('index.html', prediction_texts= "You can sell the car at {}".format(output))
else:
render_template('index.html')
if __name__ == '__main__':
app.run(debug=True)

AttributeError: 'tuple' object has no attribute when selecting model

I want to select model using the following code.
import numpy as np
import pandas as pd
from math import log
from sklearn import model_selection
from sklearn.model_selection import train_test_split as split
from sklearn.metrics import accuracy_score as accuracy
import xgboost as xgb
dataframe_training = pd.read_csv("train.csv")
train_tag=dataframe_training['tags']
train_dummies_tags= train_tag.str.get_dummies(",")
dataframe_training=dataframe_training.filter(items=['review_ratio','log_date_difference','log_price'])
x_train=pd.concat([dataframe_training,train_dummies_tags], axis=1, sort=False)
y_train=dataframe_training.filter(items=['playtime_forever'])
Xtrain, Xtest, Ytrain, Ytest = split(x_train,y_train, test_size=0.25, random_state=7)
from sklearn.ensemble import *
AllRegressorModel = [xgb.XGBRegressor,AdaBoostRegressor,BaggingRegressor,ExtraTreesRegressor,GradientBoostingRegressor,RandomForestRegressor]
def Model_Selection_By_Cross_Valid():
ThisRound_SelectedModel = None,
ThisRound_SelectedModel_Name = None,
ThisRound_SelectedModel_Score = None,
for temp_select_model_name in AllRegressorModel:
kfold = model_selection.KFold(n_splits=10, random_state=7),
print (kfold),
temp_model= temp_select_model_name(),
temp_model.fit(Xtrain, Ytrain.ravel()),
results = model_selection.cross_val_score(temp_model, X_train, Y_train.ravel(), cv=kfold, scoring='neg_mean_squared_error'),
print(temp_select_model_name,results.mean()),
if (ThisRound_SelectedModel == None) or (abs(results.mean()) < ThisRound_SelectedModel_Score):
ThisRound_SelectedModel = temp_model,
ThisRound_SelectedModel_Name = temp_select_model_name,
ThisRound_SelectedModel_Score = abs(results.mean()),
print ("This round Model Name: ", temp_model,"MSE Score: ",abs(results.mean())),
print ("This Model Feature Importance",temp_model.feature_importances_),
print("This Model Do No Have Feature Importance......"),
print ("<----------------------------------->"),
print ("Selected Model Name:", ThisRound_SelectedModel, "MSE Score:",ThisRound_SelectedModel_Score),
return {"ModelName": ThisRound_SelectedModel_Name,"Model": ThisRound_SelectedModel}
When I run the program,
SelectedModel = Model_Selection_By_Cross_Valid()
AttributeError: 'tuple' object has no attribute 'fit' is shown.
How can I solve the problem?
Thank you very much.

Python Connected With SQL Server (HY105 Error) Invalid Parameter Type HY105

I was trying to add a CSV to my python code and use LR, LDA and various other Algorithms to test their score results.
and Then I wanted to show the results in the form of a table in SQL server.
I get the error "Invalid Parameter Type. param-index= 0 and param-type = numpy.ndarray HY105"
Added Knowledge would be appreciated.
Here is the code:
import matplotlib.pyplot as plt
import numpy as np
import pandas
from pandas.tools.plotting import scatter_matrix
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import sqlalchemy
import pypyodbc
import pyodbc
# Load dataset
url = "AirPassengers.csv"
names = ['Serial Number', 'Time', 'Air Passengers']
dataset = pandas.read_csv(url, names=names)
print(dataset.describe())
print(dataset)
#Creating Validation Sets
array = dataset.values
X = array[1:, 0:4]
Y = array[1: ,2]
validation_size = 0.30
seed = 5
X_train, X_validation, Y_train, Y_validation =
model_selection.train_test_split(X,Y,test_size=validation_size,
random_state=seed)
#Test Harness and Evaluation Metrics
seed = 5
scoring = 'accuracy'
#Building Models
models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))
# evaluate each model in turn
results = []
names = []
for name, model in models:
kfold = model_selection.KFold(n_splits=10, random_state=seed)
cv_results = model_selection.cross_val_score(model, X_train, Y_train,
cv=kfold, scoring=scoring)
results.append(cv_results)
names.append(name)
msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
print(msg)
conn = pyodbc.connect(
r'DRIVER={ODBC Driver 11 for SQL Server};'
r'SERVER=Q3GN0570\MSSQLSERVER1;'
r'DATABASE=AdventureWorksDW2014;'
r'Trusted_Connection=yes;'
)
cursorexec = conn.cursor()
cursorexec.execute("INSERT INTO pythonTest(LR,LDA,KNN,CART,NB,SVM) VALUES
(?,?,?,?,?,?)", results)
cursorexec.commit()
conn.close()

Categories