Update value in SQL table using SqlAlchemy in python? - python

"""
Created on Tue Sep 7 14:06:54 2021
#author: hp
"""
"""BOOK DETAILS IN SQL USING PYTHON----python librarian"""
import pandas as pd
import pyodbc
from sqlalchemy import create_engine,update
"""SERVER="DESKTOP-JKOITFK\SQLEXPRESS"
DATABASE="newdatabase"
DRIVER="SQL SERVER NATIVE CLIENT 11.0"
USERNAME="chitransh"
PASSWORD="reshushrey#2027"
#DATABASE_CONNECTION=f'mssql://{USERNAME}:{PASSWORD}#{SERVER}/{DATABASE}?driver={DRIVER}'"""
table_name="bookdetails"
engine=create_engine("mssql+pyodbc://#DESKTOP-JKOITFK\SQLEXPRESS/newdatabase?driver=SQL SERVER NATIVE CLIENT 11.0")
connection=engine.connect()
details={}
def bookdetails():
print("enter the book details:")
name=input("enter the name of the book:")
author=input("enter the author of the book:")
year=int(input("enter the year of the book:"))
publisher=input("enter the publisher of the book:")
quantities=int(input("enter the quantities of the book:"))
next_action=int(input("details entry completed. Press 1 for another entry, else press 2 for exit:"))
details={"BookName":[name],"Author":[author],"Year":[year],"Publisher":[publisher],"Quantities":[quantities]}
dict_df=pd.DataFrame(details)
#print(dict_df)
create_table=dict_df.to_sql(table_name,connection,if_exists="append",index=False)
if next_action==1:
bookdetails()
else:
authorized()
def issuebooks():
print("issue the books")
issue_book=input("which book to issue:")
frame=pd.read_sql("select Quantities from bookdetails where BookName = '{}'".format(issue_book),connection)
updated_value_query=(update(bookdetails).values(Quantities=(int(frame.values)-1)).where(bookdetails.BookName=='{}'.format(issue_book)))
connection.execute(updated_value_query)
def depositbooks():
print("deposit the books")
def authorized():
action=int(input("enter 1 for entering book details, enter 2 to issue books, enter 3 to deposit the book, enter 4 for exit:"))
if action==1:
bookdetails()
elif action==2:
issuebooks()
elif action==3:
depositbooks()
#else:
# main()
def enter_func(username,password):
if username not in librarian.keys():
print("you are not authorized to enter")
else:
if password==librarian[username]:
print("enter")
authorized()
else:
print("password donot match,try again")
#main()
while True:
first=int(input("press 1 to login, 2 for exit:"))
if (first==1):
librarian={"deepika":"chiku","pragya":"praveen"}
username=input("enter the username:")
password=input("enter the password:")
enter_func(username,password)
else:
break
I am try to make a book entry system and for that i am trying to connect SQL and python. When ever i try to update a value in SQL using update query, it shows the error
press 1 to login, 2 for exit:1
enter the username:deepika
enter the password:chiku
enter
enter 1 for entering book details, enter 2 to issue books, enter 3 to deposit the book, enter 4 for exit:2
issue the books
which book to issue:shiva2
Traceback (most recent call last):
File "<ipython-input-1-df831d64649f>", line 1, in <module>
runfile('C:/Users/hp/Desktop/project_part1.py', wdir='C:/Users/hp/Desktop')
File "C:\Users\hp\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 705, in runfile
execfile(filename, namespace)
File "C:\Users\hp\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/hp/Desktop/project_part1.py", line 101, in <module>
enter_func(username,password)
File "C:/Users/hp/Desktop/project_part1.py", line 89, in enter_func
authorized()
File "C:/Users/hp/Desktop/project_part1.py", line 77, in authorized
issuebooks()
File "C:/Users/hp/Desktop/project_part1.py", line 61, in issuebooks
updated_value_query=update(bookdetails).values(Quantities=frame-1).where("BookName=='{}'".format(issue_book))
File "<string>", line 2, in update
File "C:\Users\hp\Anaconda3\lib\site-packages\sqlalchemy\sql\dml.py", line 735, in __init__
ValuesBase.__init__(self, table, values, prefixes)
File "C:\Users\hp\Anaconda3\lib\site-packages\sqlalchemy\sql\dml.py", line 201, in __init__
self.table = _interpret_as_from(table)
File "C:\Users\hp\Anaconda3\lib\site-packages\sqlalchemy\sql\selectable.py", line 49, in _interpret_as_from
raise exc.ArgumentError("FROM expression expected")
ArgumentError: FROM expression expected
This is the error which i am facing. It is saying FROM expression is expected, but when i write query in SQL, no FROM expression is written. I want to update the subtracted value in the SQL table.

I couldn't reproduce your exact error but this line
updated_value_query=(update(bookdetails).values(Quantities=(int(frame.values)-1)).where(bookdetails.BookName=='{}'.format(issue_book)))
needs a couple of changes to work:
the name bookdetails refers to the function bookdetails, so it isn't a valid argument for update, which expects a Table object or similar
in the where clause, the Quantities attribute must accessed through a table's columns or c attribute
import sqlalchemy as sa
...
# Create a table object that maps to the table in the database
bookdetails_table = sa.Table(table_name, sa.MetaData(), autoload_with=engine)
# Use the table object in the query
updated_value_query = (
update(bookdetails_table)
.values(Quantities=(int(frame.values) - 1))
.where(bookdetails_table.c.BookName == '{}'.format(issue_book))
)
You don't really need Pandas for your code, you could replace it with SQLAlchemy Core inserts and updates. For example (assuming SQLAlchemy 1.4+)
from sqlalchemy import create_engine, update
import sqlalchemy as sa
table_name = 'bookdetails'
engine = create_engine(...)
# Create a table and assign it to a global variable.
# Lowercase table and column names cause fewer problems than mixed or upper case
metadata = sa.MetaData()
book_table = sa.Table(
table_name,
metadata,
sa.Column('id', sa.Integer, primary_key=True),
sa.Column('bookname', sa.String(128)),
sa.Column('author', sa.String(128)),
sa.Column('year', sa.Integer),
sa.Column('publisher', sa.String(128)),
sa.Column('quantities', sa.Integer),
)
book_table.create(engine, checkfirst=True)
def bookdetails():
print('enter the book details:')
name = input('enter the name of the book:')
author = input('enter the author of the book:')
year = int(input('enter the year of the book:'))
publisher = input('enter the publisher of the book:')
quantities = int(input('enter the quantities of the book:'))
next_action = int(
input(
'details entry completed. Press 1 for another entry, else press 2 for exit:'
)
)
details = {
'bookname': name,
'author': author,
'year': year,
'publisher': publisher,
'quantities': quantities,
}
insert = book_table.insert().values(**details)
with engine.begin() as conn:
conn.execute(insert)
if next_action == 1:
bookdetails()
else:
authorized()
def issuebooks():
print('issue the books')
issue_book = input('which book to issue:')
# Here we don't need to select and then update: we can express the update
# as an operation on the column.
updated_value_query = (
update(book_table)
.values(quantities=book_table.c.quantities - 1)
.where(book_table.c.bookname == issue_book)
)
with engine.begin() as conn:
conn.execute(updated_value_query)

Related

Python List Index Ouf Of Range In IF Statement

So i have multiple patients' information stored in database.txt and i want to retrieve the data from the file into a list.
And the system prompt for patient's id to search and display other information of the patient such as Name, Age, Group & Zone.
However, i'm getting error from line 12, but the similar syntax in line 17 is able to run without problem.
search_keyword = input() # Asks for patient's name or id (either one)
with open("database.txt", "r") as database:
for data in database:
for patients in data.split('|'):
patient_details = []
for details in patients.split(','):
patient_details.append(details)
print(patient_details) # test
print(len(patient_details) # test
print(patient_details.index('Patient001')) # test
print(patient_details[4]) # test
if search_keyword == patient_details[0] or search_keyword == patient_details[4]: # error occured here, where it says list index out of range.
print("Name: " + patient_details[0])
print("Age: " + patient_details[1])
print("Group: " + patient_details[2])
print("Zone: " + patient_details[3])
print("ID: " + patient_details[4]) # no error here, patient_details[4] is able to display patient's id
database.txt
John,18,A,1,Patient001|Nick,20,F,9,Patient002
Test command for line 8,9, 10 and 11:
Line 8: [John, 18, A, 1, Patient001]
Line 9: 5
Line 10: 4
Line 11: IndexError: list index out of range
Can someone explain why this is happening, and any solutions regarding this issue without using any imported modules? Thank you for any assistance.
Imo a very good use-case for a named tuple:
from collections import namedtuple
text = "John,18,A,1,Patient001|Nick,20,F,9,Patient002"
# build database
Patient = namedtuple('Patient', ['name', 'age', 'group', 'zone', 'id'])
db = [Patient(*patient) for entry in text.split("|") for patient in [entry.split(",")]]
# Asks for patient's id
search_keyword = input("Please give an ID: ")
# query the database
result = [patient for patient in db if patient.id == search_keyword]
# or patient.id.startswith(), etc.
print(result)
Without any imported modules, you could use
text = "John,18,A,1,Patient001|Nick,20,F,9,Patient002"
# build database
db = [entry.split(",") for entry in text.split("|")]
search_keyword = input("Please give an ID: ") # Asks for patient's id
# query the database
result = [patient for patient in db if patient[4] == search_keyword]
print(result)
I see no flaw in the code. Although, I can point out a few ways to optimise it :
patient_details = dict()
with open("database.txt", "r") as database:
for data in database:
for patients in data.split('|'):
patients = patients.split(',')
patient_details[patients[4]] = patients[0:4]
search_keyword = input() # Asks for patient's id
if patient_details.get(search_keyword, None):
patient_detail = patient_details[search_keyword]
print("Name: " + patient_detail[0])
print("Age: " + patient_detail[1])
print("Group: " + patient_detail[2])
print("Zone: " + patient_detail[3])
print("ID: " + search_keyword)
Using map instead of a linear search would allow you to search optimally.

Why is this throwing an IndexError?

I'm trying to figure out why it is throwing that error. The length of the tuple should be 4, and it is. Any hints, ideas?
This code is for a swiss-style project I'm working on for the Intro to Programming Udacity nano degree.
Relevant code Python code from tournament.py:
def playerStandings():
"""Returns a list of the players and their win records, sorted by wins.
The first entry in the list should be the player in first place, or a player
tied for first place if there is currently a tie.
Returns:
A list of tuples, each of which contains (id, name, wins, matches):
id: the player's unique id (assigned by the database)
name: the player's full name (as registered)
wins: the number of matches the player has won
matches: the number of matches the player has played
"""
conn = connect()
c = conn.cursor()
c.execute("SELECT COUNT(id) FROM Players;")
total = c.fetchone()[0]
num_of_players = countPlayers()
standings = [None]*num_of_players
c.execute("SELECT * FROM wincounter;")
winners = c.fetchall()
i = 0
for player in winners:
standings[i] = (player[0], player[1], player[2], player[3])
i += 1
conn.close()
return standings
Relevant code from tournament_test.py:
def testStandingsBeforeMatches():
"""
Test to ensure players are properly represented in standings prior
to any matches being reported.
"""
deleteMatches()
deletePlayers()
registerPlayer("Melpomene Murray")
registerPlayer("Randy Schwartz")
standings = playerStandings()
if len(standings) < 2:
raise ValueError("Players should appear in playerStandings even before "
"they have played any matches.")
elif len(standings) > 2:
raise ValueError("Only registered players should appear in standings.")
if len(standings[0]) != 4:
raise ValueError("Each playerStandings row should have four columns.")
[(id1, name1, wins1, matches1), (id2, name2, wins2, matches2)] = standings
if matches1 != 0 or matches2 != 0 or wins1 != 0 or wins2 != 0:
raise ValueError(
"Newly registered players should have no matches or wins.")
if set([name1, name2]) != set(["Melpomene Murray", "Randy Schwartz"]):
raise ValueError("Registered players' names should appear in standings, "
"even if they have no matches played.")
print ("6. Newly registered players appear in the standings with no matches.")
PostgreSQL schema:
DROP DATABASE IF EXISTS tournament;
CREATE DATABASE tournament;
\c tournament;
CREATE TABLE players (
id serial PRIMARY KEY NOT NULL,
Name text
);
CREATE TABLE matches (
match_id serial PRIMARY KEY NOT NULL,
winner int REFERENCES players(id),
loser int REFERENCES players(id)
);
CREATE VIEW wincounter
AS
SELECT players.id,
players.name,
COUNT(matches.winner) AS wins
FROM players
LEFT JOIN matches
ON players.id = matches.winner
GROUP BY players.id;
Error message:
vagrant#vagrant-ubuntu-trusty-32:/vagrant/tournament$ python tournament_test.py
1. countPlayers() returns 0 after initial deletePlayers() execution.
2. countPlayers() returns 1 after one player is registered.
3. countPlayers() returns 2 after two players are registered.
4. countPlayers() returns zero after registered players are deleted.
5. Player records successfully deleted.
Traceback (most recent call last):
File "tournament_test.py", line 152, in <module>
testStandingsBeforeMatches()
File "tournament_test.py", line 54, in testStandingsBeforeMatches
standings = playerStandings()
File "/vagrant/tournament/tournament.py", line 85, in playerStandings
standings[i] = (player[0], player[1], player[2], player[3])
IndexError: tuple index out of range
vagrant#vagrant-ubuntu-trusty-32:/vagrant/tournament$
The error is occurring on the line:
standings[i] = (player[0], player[1], player[2], player[3])
Your player variable is a tuple representing a row from the wincounter view in your database. That view has only three columns (players.id, players.name and wins), so you get an index error when you try to access a fourth value (player[3]).
It's not entirely clear why you have that line that way at all. If you want all the items from the player tuple to be added to standings, you could do standings[i] = player, or even get rid of the explicit loop and just write standings = list(winners) instead.

creating a data base with peewee execute_sql self.commit()

I used some very simple code to create a database with peewee, I am new to using python ORMs so I can't really tell why I'm getting a whole bunch of errors.
What this code does is: First I create a database 'diary.db'
the data types used are entries, which is a Text Field, and date, which is a DateTimeField. I created some functions: 'initialize' to run basic commands and initialize the database, 'menu_loop' that will show a menu that works with an infinite loop and may call the function 'add_entry' that adds new entries to the database.
Heres the code:
#!/usr/bin/env python3
from collections import OrderedDict
from peewee import *
import datetime
import sys
db = SqliteDatabase('diary.db')
class Diary(Model):
entries = TextField()
date = DateTimeField(default = datetime.datetime.now)
class Meta:
database = db
def initialize():
"""initializes the database"""
db.connect()
db.create_tables([Diary], safe = True)
#end of initialize
def menu_loop():
"""show menu"""
choice = 0
while choice != 2:
print("Enter '2' to quit")
print('1) to add an entry')
choice = input()
if choice == 1:
add_entry()
#end of menu_loop
def add_entry():
"""add an entry"""
print("Enter your entry, press ctrl+d when done")
data = sys.stdin.read().strip()
if data:
while(True):
option = input('\nSave entry?[1 = yes 0 = no] ')
if option == 1:
Diary.create(content=data)
print ("Saved sucessfully!")
if option == 0:
print ("Program exited")
break;
#end of add_entry
if __name__ == '__main__':
initialize()
menu_loop()
and the error log
Enter '2' to quit
1) to add an entry
1
Enter your entry, press ctrl+d when done
this is my new entry
hello world^D
Save entry?[1 = yes 0 = no] 1
Traceback (most recent call last):
File "ispythonsmart.py", line 50, in <module>
menu_loop()
File "ispythonsmart.py", line 30, in menu_loop
add_entry()
File "ispythonsmart.py", line 41, in add_entry
Diary.create(content=data)
File "/Library/Python/2.7/site-packages/peewee.py", line 4494, in create
inst.save(force_insert=True)
File "/Library/Python/2.7/site-packages/peewee.py", line 4680, in save
pk_from_cursor = self.insert(**field_dict).execute()
File "/Library/Python/2.7/site-packages/peewee.py", line 3213, in execute
cursor = self._execute()
File "/Library/Python/2.7/site-packages/peewee.py", line 2628, in _execute
return self.database.execute_sql(sql, params, self.require_commit)
File "/Library/Python/2.7/site-packages/peewee.py", line 3461, in execute_sql
self.commit()
File "/Library/Python/2.7/site-packages/peewee.py", line 3285, in __exit__
reraise(new_type, new_type(*exc_args), traceback)
File "/Library/Python/2.7/site-packages/peewee.py", line 3454, in execute_sql
cursor.execute(sql, params or ())
peewee.IntegrityError: NOT NULL constraint failed: diary.entries
You need to set entries to null=True or use a default value for entries:
class Diary(Model):
entries = TextField(null=True)
Output:
Enter '2' to quit
1) to add an entry
1
Enter your entry, press ctrl+d when done
foobar
Save entry?[1 = yes 0 = no] 1
Saved successfully!
You want to see "entries" TEXT in the db not "entries" TEXT NOT NULL. If a column is set to NOT NULL you must insert a value or you will get an integrity error, an alternative is to give a default value for the column i.e TextField(default="foo"). On a sidenote you have #!/usr/bin/env python3 as your shebang but your code is written for python 2 so you may want to correct that.

python-storm orm many-to-many

I'm using python-storm as orm. The many-to-many reference set is giving me headaches :(
These are the relevant objects:
class Author(object):
__storm_table__ = "author"
id = Int(primary=True)
name = Unicode()
institution_id = Int()
institution = Reference(institution_id, Institution.id)
def __init__(self, name):
self.name = name
class Paper(object):
__storm_table__ = "paper"
id = Int(primary=True)
name = Unicode()
conference_id = Int()
conference = Reference(conference_id, Conference.id)
def __init__(self, name):
self.name = name
class AuthorPapers(object):
__storm_table__ = "authorpapers"
__storm_primary__ = "author_id", "paper_id"
author_id = Int()
paper_id = Int()
The respective sqlite table look like this
store.execute("CREATE TABLE if not exists author (id INTEGER PRIMARY KEY, name VARCHAR, institution_id INTEGER, FOREIGN KEY (institution_id) REFERENCES institution(id))")
store.execute("CREATE TABLE if not exists paper (id INTEGER PRIMARY KEY, name VARCHAR, conference_id INTEGER, FOREIGN KEY (conference_id) REFERENCES conference(id))")
store.execute("CREATE TABLE if not exists authorpapers (author_id INTEGER, paper_id INTEGER, PRIMARY KEY (author_id, paper_id))")
Now say if a have two author the collaborated on a paper
a = Author(u"Steve Rogers")
b = Author(u"Captain America")
and a paper
p6 = Paper(u"Bunga Bunga")
So now I want to associate both author to the paper using
Author.papers = ReferenceSet(Author.id, AuthorPapers.author_id, Paper.id, AuthorPapers.paper_id)
and doing this
a.papers.add(p6)
b.papers.add(p6)
This is btw it says it is supposed to work in the storm tutorial...but I get
File "/usr/lib64/python2.7/site-packages/storm/references.py", line 376, in add
self._relation2.link(remote, link, True)
File "/usr/lib64/python2.7/site-packages/storm/references.py", line 624, in link
pairs = zip(self._get_local_columns(local.__class__),
File "/usr/lib64/python2.7/site-packages/storm/references.py", line 870, in _get_local_columns
for prop in self.local_key)
File "/usr/lib64/python2.7/site-packages/storm/references.py", line 870, in <genexpr>
for prop in self.local_key)
File "/usr/lib64/python2.7/site-packages/storm/properties.py", line 53, in __get__
return self._get_column(cls)
File "/usr/lib64/python2.7/site-packages/storm/properties.py", line 97, in _get_column
attr = self._detect_attr_name(cls)
File "/usr/lib64/python2.7/site-packages/storm/properties.py", line 82, in _detect_attr_name
raise RuntimeError("Property used in an unknown class")
RuntimeError: Property used in an unknown class
And I'm not really able to make sense of this right now.
I'm not really, familiar with storm, but looking at the documentation example, looks like is just an issue related to the order in which the arguments to ReferenceSet are passed. I tried to use this:
Author.papers = ReferenceSet(Author.id, AuthorPapers.author_id, AuthorPapers.paper_id, Paper.id)
instead of this:
Author.papers = ReferenceSet(Author.id, AuthorPapers.author_id, Paper.id, AuthorPapers.paper_id)
and no exception was raised.

Error message "MemoryError" in Python

Here's my problem: I'm trying to parse a big text file (about 15,000 KB) and write it to a MySQL database. I'm using Python 2.6, and the script parses about half the file and adds it to the database before freezing up. Sometimes it displays the text:
MemoryError.
Other times it simply freezes. I figured I could avoid this problem by using generator's wherever possible, but I was apparently wrong.
What am I doing wrong?
When I press Ctrl + C to keyboard interrupt, it shows this error message:
...
sucessfully added vote # 2281
sucessfully added vote # 2282
sucessfully added vote # 2283
sucessfully added vote # 2284
floorvotes_db.py:35: Warning: Data truncated for column 'vote_value' at row 1
r['bill ID'] , r['last name'], r['vote'])
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "floorvotes_db.py", line 67, in addAllFiles
addFile(file)
File "floorvotes_db.py", line 61, in addFile
add(record)
File "floorvotes_db.py", line 35, in add
r['bill ID'] , r['last name'], r['vote'])
File "build/bdist.linux-i686/egg/MySQLdb/cursors.py", line 166, in execute
File "build/bdist.linux-i686/egg/MySQLdb/connections.py", line 35, in defaulte rrorhandler
KeyboardInterrupt
import os, re, datetime, string
# Data
DIR = '/mydir'
tfn = r'C:\Documents and Settings\Owner\Desktop\data.txt'
rgxs = {
'bill number': {
'rgx': r'(A|S)[0-9]+-?[A-Za-z]* {50}'}
}
# Compile rgxs for speediness
for rgx in rgxs: rgxs[rgx]['rgx'] = re.compile(rgxs[rgx]['rgx'])
splitter = rgxs['bill number']['rgx']
# Guts
class floor_vote_file:
def __init__(self, fn):
self.iterdata = (str for str in
splitter.split(open(fn).read())
if str and str <> 'A' and str <> 'S')
def iterVotes(self):
for record in self.data:
if record: yield billvote(record)
class billvote(object):
def __init__(self, section):
self.data = [line.strip() for line
in section.splitlines()]
self.summary = self.data[1].split()
self.vtlines = self.data[2:]
self.date = self.date()
self.year = self.year()
self.votes = self.parse_votes()
self.record = self.record()
# Parse summary date
def date(self):
d = [int(str) for str in self.summary[0].split('/')]
return datetime.date(d[2],d[0],d[1]).toordinal()
def year(self):
return datetime.date.fromordinal(self.date).year
def session(self):
"""
arg: 2-digit year int
returns: 4-digit session
"""
def odd():
return divmod(self.year, 2)[1] == 1
if odd():
return str(string.zfill(self.year, 2)) + \
str(string.zfill(self.year + 1, 2))
else:
return str(string.zfill(self.year - 1, 2))+ \
str(string.zfill(self.year, 2))
def house(self):
if self.summary[2] == 'Assembly': return 1
if self.summary[2] == 'Senate' : return 2
def splt_v_line(self, line):
return [string for string in line.split(' ')
if string <> '']
def splt_v(self, line):
return line.split()
def prse_v(self, item):
"""takes split_vote item"""
return {
'vote' : unicode(item[0]),
'last name': unicode(' '.join(item[1:]))
}
# Parse votes - main
def parse_votes(self):
nested = [[self.prse_v(self.splt_v(vote))
for vote in self.splt_v_line(line)]
for line in self.vtlines]
flattened = []
for lst in nested:
for dct in lst:
flattened.append(dct)
return flattened
# Useful data objects
def record(self):
return {
'date' : unicode(self.date),
'year' : unicode(self.year),
'session' : unicode(self.session()),
'house' : unicode(self.house()),
'bill ID' : unicode(self.summary[1]),
'ayes' : unicode(self.summary[5]),
'nays' : unicode(self.summary[7]),
}
def iterRecords(self):
for vote in self.votes:
r = self.record.copy()
r['vote'] = vote['vote']
r['last name'] = vote['last name']
yield r
test = floor_vote_file(tfn)
import MySQLdb as dbapi2
import floorvotes_parse as v
import os
# Initial database crap
db = dbapi2.connect(db=r"db",
user="user",
passwd="XXXXX")
cur = db.cursor()
if db and cur: print "\nConnected to db.\n"
def commit(): db.commit()
def ext():
cur.close()
db.close()
print "\nConnection closed.\n"
# DATA
DIR = '/mydir'
files = [DIR+fn for fn in os.listdir(DIR)
if fn.startswith('fvote')]
# Add stuff
def add(r):
"""add a record"""
cur.execute(
u'''INSERT INTO ny_votes (vote_house, vote_date, vote_year, bill_id,
member_lastname, vote_value) VALUES
(%s , %s , %s ,
%s , %s , %s )''',
(r['house'] , r['date'] , r['year'],
r['bill ID'] , r['last name'], r['vote'])
)
#print "added", r['year'], r['bill ID']
def crt():
"""create table"""
SQL = """
CREATE TABLE ny_votes (openleg_id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
vote_house int(1), vote_date int(5), vote_year int(2), bill_id varchar(8),
member_lastname varchar(50), vote_value varchar(10));
"""
cur.execute(SQL)
print "\nCreate ny_votes.\n"
def rst():
SQL = """DROP TABLE ny_votes"""
cur.execute(SQL)
print "\nDropped ny_votes.\n"
crt()
def addFile(fn):
"""parse and add all records in a file"""
n = 0
for votes in v.floor_vote_file(fn).iterVotes():
for record in votes.iterRecords():
add(record)
n += 1
print 'sucessfully added vote # ' + str(n)
def addAllFiles():
for file in files:
addFile(file)
if __name__=='__main__':
rst()
addAllFiles()
Generators are a good idea, but you seem to miss the biggest problem:
(str for str in splitter.split(open(fn).read()) if str and str <> 'A' and str <> 'S')
You're reading the whole file in at once even if you only need to work with bits at a time. You're code is too complicated for me to fix, but you should be able to use file's iterator for your task:
(line for line in open(fn))
I noticed that you use a lot of slit() calls. This is memory consuming, according to http://mail.python.org/pipermail/python-bugs-list/2006-January/031571.html . You can start investigating this.
Try to comment out add(record) to see if the problem is in your code or on the database side. All the records are added in one transaction (if supported) and maybe this leads to a problem if it get too many records. If commenting out add(record) helps, you could try to call commit() from time to time.
This isn't a Python memory issue, but perhaps it's worth thinking about. The previous answers make me think you'll sort that issue out quickly.
I wonder about the rollback logs in MySQL. If a single transaction is too large, perhaps you can checkpoint chunks. Commit each chunk separately instead of trying to rollback a 15MB file's worth.

Categories