I'm trying to run an AI generated SQL statement to run on my table from an unploaded CSV file and everything reads the table fine except the last section when trying to implement the SQL. can anyone please suggest where i'm going wrong?
PandaSQLException: (sqlite3.OperationalError) no such table: df [SQL: select * from df] (Background on this error at: https://sqlalche.me/e/14/e3q8)
import streamlit as st
from pandasql import sqldf
from QG_Backend import AI_backend
def pysqldf(q):
'''
This function allows you to run SQL queries against a pandas dataframe
:param q: the query string
:return: A dataframe.
'''
return sqldf(q, globals())
def app():
backend = AI_backend
df = None
tables = None
st.title('Data')
# This is creating a variable for the user to upload a CSV, XLS or XLS file.
uploaded_file = st.sidebar.file_uploader("Upload CSV:",type=['CSV','xlsx','xls'])
# This is checking to see if the user has uploaded a file. If they have, then the code will try to
# read the file as a CSV. If it is not a CSV, then it will try to read it as an Excel spreadsheet. If
# it is not a CSV or an Excel spreadsheet, then it will display an error message.
if uploaded_file is not None:
try:
df = pd.read_csv(uploaded_file)
df.columns = df.columns.str.replace(' ', '_')
df = df.applymap(lambda s: s.casefold() if type(s) == str else s)
except:
df = pd.read_excel(uploaded_file)
df.columns = df.columns.str.replace(' ', '_')
df = df.applymap(lambda s: s.casefold() if type(s) == str else s)
else:
st.sidebar.info("Upload a file to query")
st.subheader("File Query")
# Create the columns/layout
col1, col2 = st.columns(2)
# This is creating a form for the user to enter their query.
with col1:
with st.form(key='query_form'):
plain_text = st.text_area("Enter your query:")
submit_text = st.form_submit_button("Execute")
# This is checking to see if there are any spelling mistakes in the query. If there
# are, it will correct them.
fixed = backend.spellCheck(plain_text)
# Sends the files column headers into a variable
if df is not None:
tables = (df.dtypes).to_string()
# This is creating the prompt for the AI to generate the SQL code.
prompt="### Example SQL querys:\nSELECT * FROM df WHERE star LIKE '%Tom Hanks%'\n\n----------\n\nCSV table name: df\n" + tables + "\n### A query to " + fixed + ".\nSELECT"
st.write(fixed)
# This is generating the SQL code for the user.
output = backend.generateSQL(prompt)
# This is adding the SELECT statement to the output of the AI generated SQL code.
table_query = "SELECT" + output
st.write(table_query)
# This displays the AI generated SQL code as a visual representation to the user
if df is not None:
with col2:
if submit_text:
st.info("Query Submitted")
st.subheader("SQL Generated Code:")
st.write("SELECT" + output)
# This is creating a collapsible box for the column headers and their types.
with st.expander("File Columns:"):
dashed_tables = tables.replace(' ','-')
st.write(dashed_tables)
# This is creating a collapsible box for the table.
with st.expander("Table:"):
if submit_text:
output_table = pysqldf(table_query.casefold())
#output_table = sqldf(table_query.casefold(), globals())
output_table
Related
I tried a few different ways, below but having trouble a) removing the width and b) removing the \n with a comma. I have a txt file like the below and I want to take that information and create a table in sqlite (all using python)
"field",width, type
name, 15, string
revenue, 10, decimal
invoice_date, 10, string
amount, 2, integer
Current python code - trying to read in the file, and get the values to pass in the sql statement below
import os
import pandas as pd
dir_path = os.path.dirname(os.path.realpath(__file__))
file = open(str(dir_path) + '/revenue/revenue_table_specifications.txt','r')
lines = file.readlines()
table = lines[2::]
s = ''.join(str(table).split(','))
x = s.replace("\n", ",").strip()
print(x)
sql I want to pass in
c = sqlite3.connect('rev.db') #connnect to DB
try:
c.execute('''CREATE TABLE
revenue_table (information from txt file,
information from txt file,
....)''')
except sqlite3.OperationalError: #i.e. table exists already
pass
This produces something that will work.
def makesql(filename):
s = []
for row in open(filename):
if row[0] == '"':
continue
parts = row.strip().split(", ")
s.append( f"{parts[0]} {parts[2]}" )
return "CREATE TABLE revenue_table (\n" + ",\n".join(s) + ");"
sql = makesql( 'x.csv' )
print(sql)
c.execute( sql )
(Very) beginner python user here. I'm trying to load an xlsx file into a MySQL table using xlrd and pymysql python libraries and I'm getting an error:
pymysql.err.InternalError: (1292, "Incorrect date value: '43500' for column 'invoice_date' at row 1")
The datatype for invoice_date for my table is DATE. The format for this field on my xlsx file is also Date. Things work fine if I change the table datatype to varchar, but I'd prefer to have the data load into my table as a date instead of converting after the fact. Any ideas as to why I'm getting this error? It appears that xlrd or pymysql is reading '2/4/2019' in my xlxs file as '43500' and mysql is rejecting it due to a datatype mismatch.
import xlrd
import pymysql as MySQLdb
# Open workbook and define first sheet
book = xlrd.open_workbook("2019_Complete.xlsx")
sheet = book.sheet_by_index(0)
# MySQL connection
database = MySQLdb.connect (host="localhost", user="root",passwd="password", db="vendor")
# Get cursor, which is used to traverse the databse, line by line
cursor = database.cursor()
# INSERT INTO SQL query
query = """insert into table values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
# Create a For loop to iterate through each row in the XLS file, starting at row 2 to skip the headers
for r in range(1, sheet.nrows):
lp = sheet.cell(r,0).value
pallet_lp = sheet.cell(r,1).value
bol = sheet.cell(r,2).value
invoice_date = sheet.cell(r,3).value
date_received = sheet.cell(r,4).value
date_repaired = sheet.cell(r,5).value
time_in_repair = sheet.cell(r,6).value
date_shipped = sheet.cell(r,7).value
serial_number = sheet.cell(r,8).value
upc = sheet.cell(r,9).value
product_type = sheet.cell(r,10).value
product_description = sheet.cell(r,11).value
repair_code = sheet.cell(r,12).value
condition = sheet.cell(r,13).value
repair_cost = sheet.cell(r,14).value
parts_cost = sheet.cell(r,15).value
total_cost = sheet.cell(r,16).value
repair_notes = sheet.cell(r,17).value
repair_cap = sheet.cell(r,18).value
complaint = sheet.cell(r,19).value
delta = sheet.cell(r,20).value
# Assign values from each row
values = (lp, pallet_lp, bol, invoice_date, date_received, date_repaired, time_in_repair, date_shipped, serial_number, upc, product_type, product_description, repair_code, condition, repair_cost, parts_cost, total_cost, repair_notes, repair_cap, complaint, delta)
# Execute sql Query
cursor.execute(query, values)
# Close the cursor
cursor.close()
# Commit the transaction
database.commit()
# Close the database connection
database.close()
# Print results
print ("")
columns = str(sheet.ncols)
rows = str(sheet.nrows)
print ("I just imported " + columns + " columns and " + rows + " rows to MySQL!")
You can see this answer for a more detailed explanation, but basically Excel treats dates as a number relative to 1899-12-31, and so to convert your date value to an actual date you need to convert that number into an ISO format date which MySQL will accept. You can do that using date.fromordinal and date.isoformat. For example:
dval = 43500
d = date.fromordinal(dval + 693594)
print(d.isoformat())
Output:
2019-02-04
I am learning Python and am currently working with it to parse a CSV file.
The CSV file has 3 columns:
Full_name, university, and Birth_Year.
I have successfully loaded,read, and printed the content of a given CSV file into Python, but here’s where I am stuck at:
I want to use and parse ONLY the column Full_name to 3 columns: first, middle, and last. If there are only 2 words in the name, then the middle name should be null.
The resulting parsed output should then be inserted to a sql db through Python.
Here’s my code so far:
import csv
if __name__ == '__main__':
if len (sys.argv) != 2:
print("Please enter the csv file too: python name_parsing.py student_info.csv")
sys.exit()
else:
with open(sys.argv[1], "r" ) as file:
reader = csv.DictReader(file) #I use DictReader because the csv file has > 1 col
# Print the names on the cmd
for row in reader:
name = row["Full_name"]
for name in reader:
if len(name) == 2:
print(first_name = name[0])
print(middle_name = None)
print(last_name = name[2])
if len(name) == 3 : # The assumption is that name is either 2 or 3 words only.
print(first_name = name[0])
print(middle_name = name[1])
print(last_name = name[2])
db.execute("INSERT INTO name (first, middle, last) VALUES(?,?,?)",
row["first_name"], row["middle_name"], row["last_name"])
Running the program above gives me no output whatsoever. How to parse my code the right way? Thank you.
I created a sample file based on your description. The content looks as below:
Full_name,University,Birth_Year
Prakash Ranjan Gupta,BPUT,1920
Hari Shankar,NIT,1980
John Andrews,MIT,1950
Arbaaz Aslam Khan,REC,2005
And then I executed the code below. It runs fine on my jupyter notebook. You can add the lines (sys.argv) != 2 etc) with this as you need. I have used sqlite3 database I hope this works. In case you want the if/main block added to this, let me know: can edit.
This is going by your code. (Otherwise You can do this using pandas in an easier way I believe.)
import csv
import sqlite3
con = sqlite3.connect('name_data.sql') ## Make DB connection and create a table if it does not exist
cur = con.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS UNIV_DATA
(FIRSTNAME TEXT,
MIDDLE_NAME TEXT,
LASTNAME TEXT,
UNIVERSITY TEXT,
YEAR TEXT)''')
with open('names_data.csv') as fh:
read_data = csv.DictReader(fh)
for uniData in read_data:
lst_nm = uniData['Full_name'].split()
if len(lst_nm) == 2:
fn,ln = lst_nm
mn = None
else:
fn,mn,ln = lst_nm
# print(fn,mn,ln,uniData['University'],uniData['Birth_Year'] )
cur.execute('''
INSERT INTO UNIV_DATA
(FIRSTNAME, MIDDLE_NAME, LASTNAME, UNIVERSITY, YEAR)
VALUES(?,?,?,?,?)''',
(fn,mn,ln,uniData['University'],uniData['Birth_Year'])
)
con.commit()
cur.close()
con.close()
If you want to read the data in the table UNIV_DATA:
Option 1: (prints the rows in the form of tuple)
import sqlite3
con = sqlite3.connect('name_data.sql') #Make connection to DB and create a connection object
cur = con.cursor() #Create a cursor object
results = cur.execute('SELECT * FROM UNIV_DATA') # Execute the query and store the rows retrieved in 'result'
[print(result) for result in results] #Traverse through 'result' in a loop to print the rows retrieved
cur.close() #close the cursor
con.close() #close the connection
Option 2: (prints all the rows in the form of a pandas data frame - execute in jupyter ...preferably )
import sqlite3
import pandas as pd
con = sqlite3.connect('name_data.sql') #Make connection to DB and create a connection object
df = pd.read_sql('SELECT * FROM UNIV_DATA', con) #Query the table and store the result in a dataframe : df
df
When you call name = row["Full_name"] it is going to return a string representing the name, e.g. "John Smith".
In python strings can be treated like lists, so in this case if you called len(name) it would return 10 as "John Smith" has 10 characters. As this doesn't equal 2 or 3, nothing will happen in your for loop.
What you need is some way to turn the string into a list that containing the first, second and last names. You can do this using the split function. If you call name.split(" ") it would split the string whenever there is a space, continuing the above example this would return ["John", "Smith"] which should make your code work.
I have a list of SQL scripts I have narrowed down and want to execute. The data in the list follows this pattern more or less:
[DROP TABLE ABC ....;, CREATE TABLE ABC ....;, INSERT INTO TABLE ABC .....;,UPDATE TABLE ABC .....;]
Then it repeats itself for the next table. All of this is confined to a single list below:
dfToList_P
The problem I am having is when a table doesn't exist, I want to ignore the error and execute the CREATE TABLE statement that follows it. Here is my code:
def load_test_table(self):
s = self.connection()
df = self.retrieve_sql()
df_P = df.loc[df['STEP_TYPE'] == 'P']
dfToList_P = df_P['SQL_SCRIPT'].tolist()
try:
for sql_script in dfToList_P:
#print(sql_script)
pd.read_sql(sql_script, s)
except teradata.DatabaseError as ex:
sql_state = ex.args[0]
if sql_state == 3807:
print('Tried to DROP table that did not exist:' + sql_script)
else:
print('DatabaseError in SQL Script: ' + sql_script)
I google'd around and added the try/except condition but I don't think that is actually doing anything in this instance.
Running the script errors out with:
pandas.io.sql.DatabaseError: Execution failed on sql 'DROP TABLE ABC;': (3807, "[42S02] [Teradata][ODBC Teradata Driver][Teradata Database](-3807)Object 'ABC' does not exist.")
Any ideas?
I figured it out, thanks to a combo of comments above. First, my Try/Except was in the incorrect location and also I switched from using pandas read_sql to just using regular session execute and it worked as expected. If table exists, drop it first, if not then create it.
Revised code below:
def load_test_table(self):
s = self.connection()
df = self.retrieve_sql()
df_P = df.loc[df['STEP_TYPE'] == 'P']
dfToList_P = df_P['SQL_SCRIPT'].tolist()
for sql_script in dfToList_P:
try:
s.execute(sql_script)
except teradata.DatabaseError as ex:
sql_state = ex.args[0]
if sql_state == 3807:
print('Tried to DROP table that did not exist:' + sql_script)
else:
print('DatabaseError in SQL Script: ' + sql_script)
continue
I am trying to write. code that will allow a user to select specific columns from a sqlite database which will then be transformed into a pandas data frame. I am using a test database titled test_database.db with a table titled test. The table has three columns, id, value_one, and value_two. The function I am showing exists within a class that establishes a connection to the database and in this function the user only needs to pass the table name and a list of columns that they would like to extract. For instance in command line sqlite I might type the command select value_one, value_two from test if I wanted only to read in the columns value_one and column_two from the table test. If I type this command into command line the method works. However, in this case I use python to build the text string which is fed into pandas.read_sql_query() and the method does not work. My code is shown below
class ReadSQL:
def __init__(self, database):
self.database = database
self.conn = sqlite3.connect(self.database)
self.cur = self.conn.cursor()
def query_columns_to_dataframe(table, columns):
query = 'select '
for i in range(len(columns)):
query = query + columns[I] + ', '
query = query[:-2] + ' from ' + table
# print(query)
df = pd.read_sql_query(query, self.conn)
return
def close_database()
self.conn.close
return
test = ReadSQL(test_database.db)
df = query_columns_to_dataframe('test', ['value_one', 'value_two'])
I am assuming my problem has something to do with the way that query_columns_to_dataframe() pre-processes the information because if I uncomment the print command in query_columnes_to_dataframe() I get a text string that looks identical to what works if I just type it directly into command line. Any help is appreciated.
I mopped up a few mistakes in your code to produce this, which works. Note that I inadvertently changed the names of the fields in your test db.
import sqlite3
import pandas as pd
class ReadSQL:
def __init__(self, database):
self.database = database
self.conn = sqlite3.connect(self.database)
self.cur = self.conn.cursor()
def query_columns_to_dataframe(self, table, columns):
query = 'select '
for i in range(len(columns)):
query = query + columns[i] + ', '
query = query[:-2] + ' from ' + table
#~ print(query)
df = pd.read_sql_query(query, self.conn)
return df
def close_database():
self.conn.close
return
test = ReadSQL('test_database.db')
df = test.query_columns_to_dataframe('test', ['value_1', 'value_2'])
print (df)
Output:
value_1 value_2
0 2 3
Your code are full of syntax errors and issues
The return in query_columns_to_dataframe should be return df. This is the primary reason why your code does not return anything.
self.cur is not used
Missing self parameter when declaring query_columns_to_dataframe
Missing colon at the end of the line def close_database()
Missing self parameter when declaring close_database
Missing parentheses here: self.conn.close
This df = query_columns_to_dataframe should be df = test.query_columns_to_dataframe
Fixing these errors and your code should work.