From http://fuzzytolerance.info/blog/2012/01/13/2012-01-14-updating-google-fusion-table-from-a-csv-file-using-python/ I have edited his code to import the necessary modules, however I get the following error "AttributeError: 'module' object has no attribute 'urlencode'". I run the code and I am prompted to enter my password, I enter my own google account password, and then the code gives me the error message, pehaps I need to define a password somewhere?
I wonder if anyone can please trouble shoot my code or advise me on how to avoid this error or even advise me of an EASIER way to import a CSV into a GOOGLE FUSION TABLE that I OWN
Here is my code
import csv
from decimal import *
import getpass
from fusiontables.authorization.clientlogin import ClientLogin
from fusiontables import ftclient
nameAgeNick = 'C:\\Users\\User\\Desktop\\NameAgeNickname.txt'
# check to see if something is an integer
def isInt(s):
try:
int(s)
return True
except ValueError:
return False
# check to see if something is a float
def isFloat(s):
try:
float(s)
return True
except ValueError:
return False
# open the CSV file
ifile = open(nameAgeNick, "rb")
reader = csv.reader(ifile)
# GFT table ID
tableID = "tableid"
# your username
username = "username"
# prompt for your password - you can hardcode it but this is more secure
password = getpass.getpass("Enter your password:")
# Get token and connect to GFT
token = ClientLogin().authorize(username, password)
ft_client = ftclient.ClientLoginFTClient(token)
# Loop through the CSV data and upload
# Assumptions for my data: if it's a float less than 0, it's a percentage
# Floats are being rounded to 1 significant digit
# Non-numbers are wrapped in a single quote for string-type in the updatate statement
# The first row is the column names and matches exactly the column names in Fustion tables
# The first column is the unique ID I'll use to select the record for updating in Fusion Tables
rownum = 0
setList = list()
nid = 0
for row in reader:
# Save header row.
if rownum == 0:
header = row
else:
colnum = 0
setList[:] = []
for col in row:
thedata = col
# This bit rounds numbers and turns numbers < 1 into percentages
if isFloat(thedata):
if isInt(thedata) is False:
if float(thedata) < 1:
thedata = float(thedata) * 100
thedata = round(float(thedata), 1)
else:
thedata = "'" + thedata + "'"
# make sql where clause for row
setList.append(header[colnum] + "=" + str(thedata))
nid = row[0]
colnum += 1
# get rowid and update the record
rowid = ft_client.query("select ROWID from " + tableID + " where ID = " + nid).split("\n")[1]
print( rowid)
print( ft_client.query("update " + tableID + " set " + ",".join(map(str, setList)) + " where rowid = '" + rowid + "'"))
rownum += 1
ifile.close()
And this is the module where the error occurs:
#!/usr/bin/python
#
# Copyright (C) 2010 Google Inc.
""" ClientLogin.
"""
__author__ = 'kbrisbin#google.com (Kathryn Brisbin)'
import urllib, urllib2
class ClientLogin():
def authorize(self, username, password):
auth_uri = 'https://www.google.com/accounts/ClientLogin'
authreq_data = urllib.urlencode({ //////HERE IS ERROR
'Email': username,
'Passwd': password,
'service': 'fusiontables',
'accountType': 'HOSTED_OR_GOOGLE'})
auth_req = urllib2.Request(auth_uri, data=authreq_data)
auth_resp = urllib2.urlopen(auth_req)
auth_resp_body = auth_resp.read()
auth_resp_dict = dict(
x.split('=') for x in auth_resp_body.split('\n') if x)
return auth_resp_dict['Auth']
Related
I have a .sql file full of commands that I want to run through my python program. When I enter each line into the terminal individually, my program works as expected, as it has methods of parsing through the individual line entry to do what it needs to do. However, when I run the program (pa_2.py) by typing it into the terminal as such:
python3 pa_2.py < PA2_test.sql
the program doesn't read each line correctly and malfunctions, my guess being that it is unable to parse through the sql file correctly. The expected output is:
-- Database CS457_PA2 created.
-- Using database CS457_PA2.
-- Table Product created.
-- 1 new record inserted.
-- 1 new record inserted.
-- 1 new record inserted.
-- 1 new record inserted.
-- 1 new record inserted.
-- pid int|name varchar(20)|price float
-- 1|Gizmo|19.99
-- 2|PowerGizmo|29.99
-- 3|SingleTouch|149.99
-- 4|MultiTouch|199.99
-- 5|SuperGizmo|49.99
-- 1 record modified.
-- 2 records modified.
-- pid int|name varchar(20)|price float
-- 1|Gizmo|14.99
-- 2|PowerGizmo|29.99
-- 3|SingleTouch|149.99
-- 4|MultiTouch|199.99
-- 5|Gizmo|14.99
-- 2 records deleted.
-- 1 record deleted.
-- pid int|name varchar(20)|price float
-- 2|PowerGizmo|29.99
-- 3|SingleTouch|149.99
-- name varchar(20)|price float
-- SingleTouch|149.99
When I type each command as an individual line after typing in the terminal:
python3 pa_2.py
I get the expected output. However, when I run the script in the command line as:
python3 pa_2.py < PA2_test.sql
the output I get is:
created.CS457_PA2
because it does not exist.7_PA2
Created table Product.
1 new record inserted.
1 new record inserted.
1 new record inserted.
1 new record inserted.
1 new record inserted.
because it does not exist.uct
0 records modified.
0 records modified.
because it does not exist.uct
0 records modified.
Traceback (most recent call last):
File "/Users/isaac_reilly/Desktop/College/UNR/Classes/Year 3 2022-2023/Semester 1 2022/CS 457 Database Managemant Systems/Project 3/pa_2.py", line 79, in <module>
tablefunctions.deleteData(user_input, currentdb)
File "/Users/isaac_reilly/Desktop/College/UNR/Classes/Year 3 2022-2023/Semester 1 2022/CS 457 Database Managemant Systems/Project 3/tablefunctions.py", line 114, in deleteData
if float(splitter[4]) > float(searchText):
ValueError: could not convert string to float: '19.99)'
I want it to know that the end of each line is a semicolon ";". How would I use PA2_test.sql as a command line argument and run each line as expected? Below is the .sql file, as well as the rest of my program.
PA2_test.sql:
CREATE DATABASE CS457_PA2;
USE CS457_PA2;
CREATE TABLE Product (pid int, name varchar(20), price float);
INSERT INTO Product values(1, 'Gizmo', 19.99);
INSERT INTO Product values(2, 'PowerGizmo', 29.99);
INSERT INTO Product values(3, 'SingleTouch', 149.99);
INSERT INTO Product values(4, 'MultiTouch', 199.99);
INSERT INTO Product values(5, 'SuperGizmo', 49.99);
SELECT * FROM Product;
UPDATE Product set name = 'Gizmo' where name = 'SuperGizmo';
UPDATE Product set price = 14.99 where name = 'Gizmo';
SELECT * FROM Product;
DELETE FROM Product where name = 'Gizmo';
DELETE FROM Product where price > 150;
SELECT * FROM Product;
SELECT name, price FROM Product where pid != 2;
.EXIT
pa_2.py:
import fileinput
import sys
import dbfunctions
import tablefunctions
import selections
currentdb = None
user_input = None
TableList = [None]
#Loop continously prompts the terminal for an input from the user and then decides what to do based on input.
while (user_input != ".EXIT"):
user_input = input()
#print(user_input)
#States that all commands must end with a ';' if user types invalid command
if ";" not in user_input and user_input != ".EXIT":
print("Invalid command, all commands must end with ';'")
#Creates a database
if "CREATE DATABASE" in user_input:
dbName = dbfunctions.inputCleaner("CREATE DATABASE ", user_input)
dbfunctions.create_db(dbName)
#Deletes a database
if "DROP DATABASE" in user_input:
dbName = dbfunctions.inputCleaner("DROP DATABASE ", user_input)
dbfunctions.remove_db(dbName)
#Creates a table using attributes inputted by user
if "CREATE TABLE" in user_input:
tInput = dbfunctions.inputCleaner("CREATE TABLE ", user_input)
tableName = tInput.split()[0]
tablefunctions.createTable(tInput, tableName, currentdb)
#Deletes a table
if "DROP TABLE" in user_input:
tableName = dbfunctions.inputCleaner("DROP TABLE ", user_input)
tablefunctions.dropTable(tableName, currentdb)
#Modifies a table using attributes inputted by the user
if "ALTER TABLE" in user_input:
rawInput = dbfunctions.inputCleaner("ALTER TABLE ", user_input)
tablefunctions.alterTable(rawInput, currentdb)
#Sets current working database
if "USE" in user_input:
dbName = dbfunctions.inputCleaner("USE ", user_input)
#print(dbName)
currentdb = dbName
dbfunctions.finddb(currentdb)
#print("Using database " + currentdb)
#elif dbfunctions.finddb(currentdb) == 0:
#print("Unable to use database " + dbName + " because it does not exist.")
#Selects data from a user specified table and prints contents to terminal
if "SELECT" in user_input:
selections.selectSpecified(user_input, currentdb)
#Inserts given data into a specified table
if "INSERT INTO" in user_input:
dataInput = dbfunctions.inputCleaner("INSERT INTO ", user_input)
tableName = dataInput.split()[0]
tablefunctions.insertData(dataInput, tableName, currentdb)
#Changes data in table as specified
if "UPDATE" in user_input:
tablefunctions.updateData(user_input, currentdb)
#Deletes data from table as specified
if "DELETE FROM" in user_input:
tablefunctions.deleteData(user_input, currentdb)
dbfunctions.py:
import os
import subprocess
import shlex
import shutil
#Removes semicolon and given phrase from input
def inputCleaner(removePhrase, input):
cleaned = input.replace(";", "")
return cleaned.replace(removePhrase, "")
#Function used to create specified database (local directory)
def create_db(dbName):
try:
#Tries making directory
os.makedirs(dbName)
print("Database " + dbName + " created.")
except FileExistsError:
#Checks to see if directory already exists, throws exception if it does
print("!Failed to create database " + dbName + " because it already exists.")
#Function used to remove specified database (local directory)
def remove_db(dbName):
#Checks to see if specified directory exists and deletes if it does
if os.path.exists(dbName):
shutil.rmtree(dbName)
print("Database " + dbName + " deleted.")
#If selected directory does not exists, prints an error message to the screen
else:
print("!Failed to delete " + dbName + " because it does not exist.")
#Checks to make sure that specified database exists
def finddb(dbName):
if dbName in subprocess.run(['ls', '|', 'grep', dbName], capture_output = True, text = True).stdout:
print("Using database " + dbName)
else:
print("Unable to use database", dbName,"because it does not exist.")
def getOperand(op):
operand = None
if (op == '='):
operand = 0
elif (op == '!='):
operand = -3
elif (op == '<'):
operand = -1
elif (op == '>'):
operand = 1
return operand
tablefunctions.py:
import subprocess
import os
#Checks to make sure that specified table exists
def findtable(tableName, currentdb):
if tableName in subprocess.run(['ls', currentdb, '|', 'grep', tableName], capture_output = True, text = True).stdout:
return 1
else:
return 0
#Creates table with specified headers
def createTable(dataInput, tableName, currentdb):
unformattedAttributes = dataInput.replace(tableName, "")
tableAttributes1 = unformattedAttributes[2:]
tableAttributes2 = tableAttributes1[:-1]
formattedAttributes = tableAttributes2.split(",")
if (currentdb != None):
if findtable(tableName, currentdb) == 0:
os.system(f'touch {currentdb}/{tableName}.txt')
filename = currentdb + '/' + tableName + '.txt'
fedit = open(filename, 'w')
fedit.write(" |".join(formattedAttributes))
fedit.close()
print(f"Created table {tableName}.")
else:
print("!Failed to create table " + tableName + " because it already exists.")
else:
print("Please specify which database to use.")
#Deletes specified table
def dropTable(tableName, currentdb):
if (currentdb != None):
if findtable(tableName, currentdb) != 0:
os.system(f'rm {currentdb}/{tableName}.txt')
print("Table " + tableName + " deleted.")
else:
print("!Failed to delete " + tableName + " because it does not exist.")
else:
print("No specified database, enter 'USE <database_name>;'")
#Inserts data into specified table
def insertData(dataInput, tableName, currentdb):
unformattedInput = dataInput.replace(tableName, "")
cleanedInput1 = unformattedInput.replace("'", "")
cleanedInput2 = cleanedInput1.replace(" ", "")
unformattedAttributes = cleanedInput2[7:-1]
formattedAttributes = unformattedAttributes.split(",")
if (currentdb != None):
if findtable(tableName, currentdb):
fedit = open(f'{currentdb}/{tableName}.txt', 'a')
fedit.write("\n" + " | ".join(formattedAttributes))
fedit.close()
print("1 new record inserted.")
else:
print("!Failed to insert data into " + tableName + " because it does not exist.")
else:
print("No specified database, enter 'USE <database_name>;'")
#Modifies a table using attributes inputted by the user
def alterTable(rawInput, currentdb):
tableName = rawInput.split()[0]
alterCmd = rawInput.split()[1]
alterAttribute1 = rawInput.replace(tableName, "")
alterAttribute2 = alterAttribute1.replace(alterCmd, "")
newAttr = alterAttribute2[2:]
if (currentdb != None):
if findtable(tableName, currentdb):
fedit = open(f'{currentdb}/{tableName}.txt', 'a')
fedit.write(f" | {newAttr}")
fedit.close()
print("Table " + tableName + " modified.")
else:
print("!Failed to modify " + tableName + " because it does not exist.")
else:
print("No specified database, enter 'USE <database_name>;'")
#Removes data from specified table
def deleteData(user_input, currentdb):
if (currentdb != None):
cleanedInput1 = user_input[12:-1]
cleanedInput2 = cleanedInput1.replace("'", "")#Cleans input
tableName = cleanedInput2.split()[0]
if findtable(tableName, currentdb) != 0:
replaceText = ""
searchText = cleanedInput2.split()[4]
searchCategory = cleanedInput2.split()[2]
with open(f'{currentdb}/{tableName}.txt', 'r') as file:
count = 0
replacement = ""
if cleanedInput2.split()[3] == "=":
#Loops line by line for keywords
for line in file:
line = line.strip()
splitter = line.split()#Puts line into list elements
if searchText == splitter[2]: #If elements matches search text
updatedLine = "" #delete it
count += 1 #Keeps track of number of edits
else:
updatedLine = line + "\n" #Reads line unchanged if specified data is not present
replacement = replacement + updatedLine
if cleanedInput2.split()[3] == ">":
lineCount = 0
for line in file:
line = line.strip()
splitter = line.split()
if lineCount == 0:
lineCount += 1
updatedLine = line + "\n"
else:
if float(splitter[4]) > float(searchText):
updatedLine = ""
count += 1
else:
updatedLine = line + "\n"
replacement = replacement + updatedLine
if cleanedInput2.split()[3] == "<":
lineCount = 0
for line in file:
line = line.strip()
splitter = line.split()
if lineCount == 0:
lineCount += 1
updatedLine = line + "\n"
else:
if float(splitter[4]) < float(searchText):
updatedLine = ""
count += 1
else:
updatedLine = line + "\n"
replacement = replacement + updatedLine
file.close()
with open(f'{currentdb}/{tableName}.txt', 'w') as file:
file.write(replacement)
file.close()
if count == 1:
print(str(count) + " record modified.")
else:
print(str(count) + " records modified.")
else:
print("!Failed to update " + tableName + " table because it does not exist.")
else:
print("No specified database, enter 'USE <database_name>;'")
def updateData(user_input,currentdb):
if (currentdb != None):
cleanedInput1 = user_input[7:-1]
cleanedInput2 = cleanedInput1.replace("'", "")
tableName = cleanedInput2.split()[0]
if findtable(tableName, currentdb) != 0:
replaceText = cleanedInput2.split()[4]
searchText = cleanedInput2.split()[8]
replaceCategory = cleanedInput2.split()[2]
searchCategory = cleanedInput2.split()[6]
with open(f'{currentdb}/{tableName}.txt', 'r') as file:
count = 0
replacement = ""
if (replaceCategory == searchCategory):#if both columns being referenced are the same
for line in file:
line = line.strip()
if searchText in line:
updatedLine = line.replace(searchText, replaceText)
count += 1
else:
updatedLine = line
replacement = replacement + updatedLine + "\n"
else:
for line in file:
splitter = line.split()
splitter[4] = replaceText
line = line.strip()
if searchText == splitter[2]:
updatedLine = " ".join(splitter)
count += 1
else:
updatedLine = line
replacement = replacement + updatedLine + "\n"
file.close()
with open(f'{currentdb}/{tableName}.txt', 'w') as file:
file.write(replacement)
file.close()
if count == 1:
print(str(count) + " record modified.")
else:
print(str(count) + " records modified.")
else:
print("!Failed to update " + tableName + " table because it does not exist.")
else:
print("No specified database, enter 'USE <database_name>;'")
selections.py:
import tablefunctions
import dbfunctions
def selectAll(tableName, currentdb):
if currentdb == None:
print("No specified database, enter 'USE <database_name>;'")
else:
if tablefunctions.findtable(tableName, currentdb):
fedit = open(f'{currentdb}/{tableName}.txt', 'r')
print(fedit.read())
fedit.close()
else:
print("!Failed to query table " + tableName + " because it does not exist.")
def selectSpecified(user_input, currentdb):
if "SELECT * FROM" in user_input:
tableName = dbfunctions.inputCleaner("SELECT * FROM ", user_input)
selectAll(tableName, currentdb)
else:
if "SELECT" in user_input:
selLower = user_input[7:-1]
selection = user_input[7:-1]
elif "select" in user_input:
selection = user_input[7:-1]
# Gathering list of variables
selectColumns = selection.replace(",", "").split()
selectColumns = selectColumns[:selectColumns.index("FROM")]
# Table name
tableName = selection.split()[len(selectColumns)+1]
# Gathering what to filter by
whereColumn = selection.split()[len(selectColumns)+3]
whereRecord = selection.split()[len(selectColumns)+5]
operand = dbfunctions.getOperand(selection.split()[len(selectColumns)+4])
if currentdb != None:
if tablefunctions.findtable(tableName, currentdb):
f = open(f'{currentdb}/{tableName}.txt', 'r')
file = f.readlines()
f.close()
selectColumnNums = []
columnNameString = ""
listToReturn = []
count = 0
for line in file:
if (count == 0): # Headers
# Finding the indexes of select and where columns
columnList = line.split()
columnListWithTypes = columnList.copy()
del columnListWithTypes[2::3]
del columnList[1::3]
columnCount = 0
# If variable is found in table, record its index
for word in columnList:
if word in selectColumns:
selectColumnNums.append(columnCount)
if (word == whereColumn):
whereColumnNum = columnCount
columnCount += 1
# Creating a custom table header for the selected columns
for index in selectColumnNums:
columnNameString += f"{columnListWithTypes[index]} {columnListWithTypes[index+1]} | "
queryHeader = columnNameString[:-3]
listToReturn.append(queryHeader)
if (count > 0): # Values
tupleDetails = line.split()
# Determines what to do with each row
def querySpecificHelper():
# Creates the row output
def queryStringMaker():
queryString = ""
for index in selectColumnNums:
queryString += f"{tupleDetails[index]} | "
queryResult = queryString[:-3]
listToReturn.append(queryResult)
if (operand == 0): # Equality
# The type checking here handles strings and numbers separately
# Ex. 150 or 150.00 would not find 150.00 or 150, respectively
if (type(tupleDetails[whereColumnNum]) is str):
if (tupleDetails[whereColumnNum] == whereRecord):
queryStringMaker()
elif (type(tupleDetails[whereColumnNum]) is not str):
if (float(tupleDetails[whereColumnNum]) == float(whereRecord)):
queryStringMaker()
elif (operand == 1): # Greater than
if (float(tupleDetails[whereColumnNum]) > float(whereRecord)):
queryStringMaker()
elif (operand == -1): # Less than
if (float(tupleDetails[whereColumnNum]) < float(whereRecord)):
queryStringMaker()
elif (operand == -3): # Inequality
if (type(tupleDetails[whereColumnNum]) is str):
if (tupleDetails[whereColumnNum] != whereRecord):
queryStringMaker()
elif (type(tupleDetails[whereColumnNum]) is not str):
if (float(tupleDetails[whereColumnNum]) != float(whereRecord)):
queryStringMaker()
querySpecificHelper()
count += 1
for line in listToReturn: # Prints table
print(line)
else:
print(f"Could not query table {tableName} because it does not exist.")
else:
print("Please specify which database to use.")
The goal here is to write a function called displayPerson that takes in an integer called id as its first parameter, and a dictionary as its second parameter, called personData.
The purpose of the function is to print the name and birthday of a given user identified by the input id. If there is no entry with the given id, then print “No user found with that id” instead.
The format should be “Person # id is name with a birthday of
date”, where id is the id # inputted, and name is the name of the person (from the file) and date is the birthday of the user (formatted as YYYY-MM-DD.
This is what I have so far
import argparse
import urllib.request
import datetime
import logging
#url https://s3.amazonaws.com/cuny-is211-spring2015/birthdays100.csv
#url = input('Insert URL here: ')
url = "https://s3.amazonaws.com/cuny-is211-spring2015/birthdays100.csv"
def downloadData(url):
response = urllib.request.urlopen(url)
data = response.read().decode('utf-8')
#print(data)
return data
def processData(file_content):
dictionary = {}
#print(file_content)
# [
# "1,Charles Paige,06/01/1963",
# "2,Andrew Bell,29/03/1972",
# ...
# "99,Alan Wilson,03/04/1960",
# "100,Austin Burgess,04/06/1979"
# ]
count = 0
data_items = file_content.splitlines()
logging.basicConfig(filename='error.log', filemode='w', level=logging.ERROR)
for line in data_items[1:]:
data_pieces = line
data_pieces = data_pieces.split(',')
# ["1", "Charles Paige", "06/01/1963"]
count = count + 1
#print(data_pieces)
# dictionary[data_pieces[0]] = (data_pieces[1]), datetime.datetime.strptime((data_pieces[2]), '%d/%m/%Y')
try:
dictionary[data_pieces[0]] = (data_pieces[1]), datetime.datetime.strptime((data_pieces[2]), '%d/%m/%Y')
except ValueError:
logging.error("Error processing line #: " + str(count) + " for ID #: " + str(data_pieces[0]))
return dictionary
def displayPerson(id, personData):
#print(personData)
#return
try:
id = input("ID:")
print("Person #" + id + "is" + dictionary[data_pieces[1]] + "with a birthday of" + datetime.datetime.strptime((data_pieces[2]), '%Y-%m-%d'))
except:
print("No user ID found")
def main():
downloadData(url)
file_content = downloadData(url)
values = processData(file_content)
#print(values)
displayPerson(id, values)
When I input an ID number, it raises the except every time. I'm not sure how to format the code to correspond the ID number with the values from the dictionary I created in processData.
Your code all seems to work OK except for the print line in your displayPerson function. Replace that line with this, and I think you'll get the behavior you're looking for:
print("Person #" + id + " is " + personData[id][0] + " with a birthday of " + personData[id][1].strftime('%d/%m/%Y'))
When I enter a value of "1", I get the following output:
Person #1 is Charles Paige with a birthday of 06/01/1963
Problem houses of pset7, cs50. The output of my program is as per requirement but my code doesn't qualify 5 out of 6 tests. I have tried a lot but couldn't find the mistake. Please help me out.
import.py
import sys
import cs50
import csv
if (len(sys.argv) !=2):
print("Error: Incorrect Command-line Arguments")
exit (1)
db =cs50.SQL("sqlite:///students.db")
db.execute ("DROP TABLE students")
db.execute ("CREATE TABLE IF NOT EXISTS students (first TEXT,middle TEXT,last TEXT,house TEXT,birth NUMERIC)")
with open ("characters.csv", "r") as characters:
reader = csv.DictReader(characters,delimiter = ",")
for row in reader:
name = row["name"]
name_list = name.split()
if (len(name_list) == 3):
first = name_list[0]
middle = name_list[1]
last = name_list[2]
elif (len(name_list) == 2):
first = name_list[0]
middle = None
last = name_list[1]
house = row["house"]
birth = int (row["birth"])
db.execute("INSERT INTO students (first,middle,last,house,birth) VALUES (?,?,?,?,?)",first,middle,last,house,birth)
roster.py
import sys
import cs50
import csv
import sqlite3
if (len(sys.argv) !=2):
print("Error: Incorrect Command-line Arguments")
exit (1)
user_house = sys.argv[1]
db = cs50.SQL("sqlite:///students.db")
list_dicts = db.execute("SELECT first, middle, last, birth FROM students WHERE house = (?) ORDER BY last, first", (user_house))
for row in list_dicts:
if (row ["middle"] == None):
print(row["first"] + " " + row["last"] + ", born" + " " + str(row["birth"]) )
else:
print(row["first"] + " " + row["middle"] + " " + row["last"] + ", born" + " " + str (row["birth"]))
Your students table does not match the schema supplied in the distro code.
This is a piece of code which needs to perform the follow functionality:
Dump all table names in a database
From each table search for a column with either Latitude or Longitude in
Store these co-ords as a json file
The code was tested and working on a single database. However once it was put into another piece of code which calls it with different databases it now is not entering line 49. However there is no error either so I am struggling to see what the issue is as I have not changed anything.
Code snippet line 48 is the bottom line -
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
print (cursor)
for tablerow in cursor.fetchall():
I am running this in the /tmp/ dir due to an earlier error with sqlite not working outside the temp.
Any questions please ask them.
Thanks!!
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sqlite3
import os
import sys
filename = sys.argv[1]
def validateFile(filename):
filename, fileExt = os.path.splitext(filename)
print ("[Jconsole] Python: Filename being tested - " + filename)
if fileExt == '.db':
databases(filename)
elif fileExt == '.json':
jsons(fileExt)
elif fileExt == '':
blank()
else:
print ('Unsupported format')
print (fileExt)
def validate(number):
try:
number = float(number)
if -90 <= number <= 180:
return True
else:
return False
except ValueError:
pass
def databases(filename):
dbName = sys.argv[2]
print (dbName)
idCounter = 0
mainList = []
lat = 0
lon = 0
with sqlite3.connect(filename) as conn:
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
print (cursor)
for tablerow in cursor.fetchall():
print ("YAY1")
table = tablerow[0]
cursor.execute('SELECT * FROM {t}'.format(t=table))
for row in cursor:
print(row)
print ("YAY")
tempList = []
for field in row.keys():
tempList.append(str(field))
tempList.append(str(row[field]))
for i in tempList:
if i in ('latitude', 'Latitude'):
index = tempList.index(i)
if validate(tempList[index + 1]):
idCounter += 1
tempList.append(idCounter)
(current_item, next_item) = \
(tempList[index], tempList[index + 1])
lat = next_item
if i in ('longitude', 'Longitude'):
index = tempList.index(i)
if validate(tempList[index + 1]):
(current_item, next_item) = \
(tempList[index], tempList[index + 1])
lon = next_item
result = '{ "id": ' + str(idCounter) \
+ ', "content": "' + dbName + '", "title": "' \
+ str(lat) + '", "className": "' + str(lon) \
+ '", "type": "box"},'
mainList.append(result)
file = open('appData.json', 'a')
for item in mainList:
file.write('%s\n' % item)
file.close()
# {
# ...."id": 1,
# ...."content": "<a class='thumbnail' href='./img/thumbs/thumb_IMG_20161102_151122.jpg'>IMG_20161102_151122.jpg</><span><img src='./img/thumbs/thumb_IMG_20161102_151122.jpg' border='0' /></span></a>",
# ...."title": "50.7700721944444",
# ...."className": "-0.8727045",
# ...."start": "2016-11-02 15:11:22",
# ...."type": "box"
# },
def jsons(filename):
print ('JSON')
def blank():
print ('blank')
validateFile(filename)
Fixed.
The issue was up here
filename, fileExt = os.path.splitext(filename)
The filename variable was being overwritten without the file extension so when SQLite searched it didn't find the file.
Strange no error appeared but it is fixed now by changing the filename var to filename1.
Hello to all passionate programmers out there. I need your help with my code.
My Goal: To efficiently move data from Amazon S3 to Amazon Redshift.
Basically I am moving all CSV files on my S3 to Redshift using the below code. I parse through part of the file, build a table structure and then use the copy command to load data into redshift.
'''
Created on Feb 25, 2015
#author: Siddartha.Reddy
'''
import sys
from boto.s3 import connect_to_region
from boto.s3.connection import Location
import csv
import itertools
import psycopg2
''' ARGUMENTS TO PASS '''
AWS_KEY = sys.argv[1]
AWS_SECRET_KEY = sys.argv[2]
S3_DOWNLOAD_PATH = sys.argv[3]
REDSHIFT_SCHEMA = sys.argv[4]
TABLE_NAME = sys.argv[5]
UTILS = S3_DOWNLOAD_PATH.split('/')
class UTIL():
global UTILS
def bucket_name(self):
self.BUCKET_NAME = UTILS[0]
return self.BUCKET_NAME
def path(self):
self.PATH = ''
offset = 0
for value in UTILS:
if offset == 0:
offset += 1
else:
self.PATH = self.PATH + value + '/'
return self.PATH[:-1]
def GETDATAINMEMORY():
conn = connect_to_region(Location.USWest2,aws_access_key_id = AWS_KEY,
aws_secret_access_key = AWS_SECRET_KEY,
is_secure=False,host='s3-us-west-2.amazonaws.com'
)
ut = util()
BUCKET_NAME = ut.bucket_name()
PATH = ut.path()
filelist = conn.lookup(BUCKET_NAME)
''' Fecth part of the data from S3 '''
for path in filelist:
if PATH in path.name:
DATA = path.get_contents_as_string(headers={'Range': 'bytes=%s-%s' % (0,100000000)})
return DATA
def TRAVERSEDATA():
DATA = getdatainmemory()
CREATE_TABLE_QUERY = 'CREATE TABLE ' + REDSHIFT_SCHEMA + '.' + TABLE_NAME + '( '
JUNKED_OUT = DATA[3:]
PROCESSED_DATA = JUNKED_OUT.split('\n')
CSV_DATA = csv.reader(PROCESSED_DATA,delimiter=',')
COUNTER,STRING,NUMBER = 0,0,0
COLUMN_TYPE = []
''' GET COLUMN NAMES AND COUNT '''
for line in CSV_DATA:
NUMBER_OF_COLUMNS = len(line)
COLUMN_NAMES = line
break;
''' PROCESS COLUMN NAMES '''
a = 0
for REMOVESPACE in COLUMN_NAMES:
TEMPHOLDER = REMOVESPACE.split(' ')
temp1 = ''
for x in TEMPHOLDER:
temp1 = temp1 + x
COLUMN_NAMES[a] = temp1
a = a + 1
''' GET COLUMN DATA TYPES '''
# print(NUMBER_OF_COLUMNS,COLUMN_NAMES,COUNTER)
# print(NUMBER_OF_COLUMNS)
i,j,a= 0,500,0
while COUNTER < NUMBER_OF_COLUMNS:
for COLUMN in itertools.islice(CSV_DATA,i,j+1):
if COLUMN[COUNTER].isdigit():
NUMBER = NUMBER + 1
else:
STRING = STRING + 1
if NUMBER == 501:
COLUMN_TYPE.append('INTEGER')
# print('I CAME IN')
NUMBER = 0
else:
COLUMN_TYPE.append('VARCHAR(2500)')
STRING = 0
COUNTER = COUNTER + 1
# print(COUNTER)
COUNTER = 0
''' BUILD SCHEMA '''
while COUNTER < NUMBER_OF_COLUMNS:
if COUNTER == 0:
CREATE_TABLE_QUERY = CREATE_TABLE_QUERY + COLUMN_NAMES[COUNTER] + ' ' + COLUMN_TYPE[COUNTER] + ' NOT NULL,'
else:
CREATE_TABLE_QUERY = CREATE_TABLE_QUERY + COLUMN_NAMES[COUNTER] + ' ' + COLUMN_TYPE[COUNTER] + ' ,'
COUNTER += 1
CREATE_TABLE_QUERY = CREATE_TABLE_QUERY[:-2]+ ')'
return CREATE_TABLE_QUERY
def COPY_COMMAND():
S3_PATH = 's3://' + S3_DOWNLOAD_PATH
COPY_COMMAND = "COPY "+REDSHIFT_SCHEMA+"."+TABLE_NAME+" from '"+S3_PATH+"' credentials 'aws_access_key_id="+AWS_KEY+";aws_secret_access_key="+AWS_SECRET_KEY+"' REGION 'us-west-2' csv delimiter ',' ignoreheader as 1 TRIMBLANKS maxerror as 500"
return COPY_COMMAND
def S3TOREDSHIFT():
conn = psycopg2.connect("dbname='xxx' port='5439' user='xxx' host='xxxxxx' password='xxxxx'")
cursor = conn.cursor()
cursor.execute('DROP TABLE IF EXISTS '+ REDSHIFT_SCHEMA + "." + TABLE_NAME)
SCHEMA = TRAVERSEDATA()
print(SCHEMA)
cursor.execute(SCHEMA)
COPY = COPY_COMMAND()
print(COPY)
cursor.execute(COPY)
conn.commit()
S3TOREDSHIFT()
Current Challenges:
Challenges with creating the table structure :
Field lengths : Right now I am just hardcoding the VARCHAR fields to 2500. All my files are > 30gb and parsing through the whole file to calculate length of a field takes lot of processing time.
Determining if a column is null: I am simply hard coding the first column to NOT NULL using the COUNTER variable. ( All my files have ID as first column ). Would like to know if there is a better way of doing it.
Is there any data structure I can use? I am always interested in learning new ways to improve the performance, if you guys have any suggestions please feel free to comment.