Here's my problem: I'm trying to parse a big text file (about 15,000 KB) and write it to a MySQL database. I'm using Python 2.6, and the script parses about half the file and adds it to the database before freezing up. Sometimes it displays the text:
MemoryError.
Other times it simply freezes. I figured I could avoid this problem by using generator's wherever possible, but I was apparently wrong.
What am I doing wrong?
When I press Ctrl + C to keyboard interrupt, it shows this error message:
...
sucessfully added vote # 2281
sucessfully added vote # 2282
sucessfully added vote # 2283
sucessfully added vote # 2284
floorvotes_db.py:35: Warning: Data truncated for column 'vote_value' at row 1
r['bill ID'] , r['last name'], r['vote'])
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "floorvotes_db.py", line 67, in addAllFiles
addFile(file)
File "floorvotes_db.py", line 61, in addFile
add(record)
File "floorvotes_db.py", line 35, in add
r['bill ID'] , r['last name'], r['vote'])
File "build/bdist.linux-i686/egg/MySQLdb/cursors.py", line 166, in execute
File "build/bdist.linux-i686/egg/MySQLdb/connections.py", line 35, in defaulte rrorhandler
KeyboardInterrupt
import os, re, datetime, string
# Data
DIR = '/mydir'
tfn = r'C:\Documents and Settings\Owner\Desktop\data.txt'
rgxs = {
'bill number': {
'rgx': r'(A|S)[0-9]+-?[A-Za-z]* {50}'}
}
# Compile rgxs for speediness
for rgx in rgxs: rgxs[rgx]['rgx'] = re.compile(rgxs[rgx]['rgx'])
splitter = rgxs['bill number']['rgx']
# Guts
class floor_vote_file:
def __init__(self, fn):
self.iterdata = (str for str in
splitter.split(open(fn).read())
if str and str <> 'A' and str <> 'S')
def iterVotes(self):
for record in self.data:
if record: yield billvote(record)
class billvote(object):
def __init__(self, section):
self.data = [line.strip() for line
in section.splitlines()]
self.summary = self.data[1].split()
self.vtlines = self.data[2:]
self.date = self.date()
self.year = self.year()
self.votes = self.parse_votes()
self.record = self.record()
# Parse summary date
def date(self):
d = [int(str) for str in self.summary[0].split('/')]
return datetime.date(d[2],d[0],d[1]).toordinal()
def year(self):
return datetime.date.fromordinal(self.date).year
def session(self):
"""
arg: 2-digit year int
returns: 4-digit session
"""
def odd():
return divmod(self.year, 2)[1] == 1
if odd():
return str(string.zfill(self.year, 2)) + \
str(string.zfill(self.year + 1, 2))
else:
return str(string.zfill(self.year - 1, 2))+ \
str(string.zfill(self.year, 2))
def house(self):
if self.summary[2] == 'Assembly': return 1
if self.summary[2] == 'Senate' : return 2
def splt_v_line(self, line):
return [string for string in line.split(' ')
if string <> '']
def splt_v(self, line):
return line.split()
def prse_v(self, item):
"""takes split_vote item"""
return {
'vote' : unicode(item[0]),
'last name': unicode(' '.join(item[1:]))
}
# Parse votes - main
def parse_votes(self):
nested = [[self.prse_v(self.splt_v(vote))
for vote in self.splt_v_line(line)]
for line in self.vtlines]
flattened = []
for lst in nested:
for dct in lst:
flattened.append(dct)
return flattened
# Useful data objects
def record(self):
return {
'date' : unicode(self.date),
'year' : unicode(self.year),
'session' : unicode(self.session()),
'house' : unicode(self.house()),
'bill ID' : unicode(self.summary[1]),
'ayes' : unicode(self.summary[5]),
'nays' : unicode(self.summary[7]),
}
def iterRecords(self):
for vote in self.votes:
r = self.record.copy()
r['vote'] = vote['vote']
r['last name'] = vote['last name']
yield r
test = floor_vote_file(tfn)
import MySQLdb as dbapi2
import floorvotes_parse as v
import os
# Initial database crap
db = dbapi2.connect(db=r"db",
user="user",
passwd="XXXXX")
cur = db.cursor()
if db and cur: print "\nConnected to db.\n"
def commit(): db.commit()
def ext():
cur.close()
db.close()
print "\nConnection closed.\n"
# DATA
DIR = '/mydir'
files = [DIR+fn for fn in os.listdir(DIR)
if fn.startswith('fvote')]
# Add stuff
def add(r):
"""add a record"""
cur.execute(
u'''INSERT INTO ny_votes (vote_house, vote_date, vote_year, bill_id,
member_lastname, vote_value) VALUES
(%s , %s , %s ,
%s , %s , %s )''',
(r['house'] , r['date'] , r['year'],
r['bill ID'] , r['last name'], r['vote'])
)
#print "added", r['year'], r['bill ID']
def crt():
"""create table"""
SQL = """
CREATE TABLE ny_votes (openleg_id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
vote_house int(1), vote_date int(5), vote_year int(2), bill_id varchar(8),
member_lastname varchar(50), vote_value varchar(10));
"""
cur.execute(SQL)
print "\nCreate ny_votes.\n"
def rst():
SQL = """DROP TABLE ny_votes"""
cur.execute(SQL)
print "\nDropped ny_votes.\n"
crt()
def addFile(fn):
"""parse and add all records in a file"""
n = 0
for votes in v.floor_vote_file(fn).iterVotes():
for record in votes.iterRecords():
add(record)
n += 1
print 'sucessfully added vote # ' + str(n)
def addAllFiles():
for file in files:
addFile(file)
if __name__=='__main__':
rst()
addAllFiles()
Generators are a good idea, but you seem to miss the biggest problem:
(str for str in splitter.split(open(fn).read()) if str and str <> 'A' and str <> 'S')
You're reading the whole file in at once even if you only need to work with bits at a time. You're code is too complicated for me to fix, but you should be able to use file's iterator for your task:
(line for line in open(fn))
I noticed that you use a lot of slit() calls. This is memory consuming, according to http://mail.python.org/pipermail/python-bugs-list/2006-January/031571.html . You can start investigating this.
Try to comment out add(record) to see if the problem is in your code or on the database side. All the records are added in one transaction (if supported) and maybe this leads to a problem if it get too many records. If commenting out add(record) helps, you could try to call commit() from time to time.
This isn't a Python memory issue, but perhaps it's worth thinking about. The previous answers make me think you'll sort that issue out quickly.
I wonder about the rollback logs in MySQL. If a single transaction is too large, perhaps you can checkpoint chunks. Commit each chunk separately instead of trying to rollback a 15MB file's worth.
Related
I'm searching through a MongoDB database to find a collection and pass it to another function that prints it to a file.
The name of my collection is: ec2_list_01122021
When I pass that collection to mongo db from a function called print_collections(), it complains that the find() command takes at least 1 argument:
Traceback (most recent call last):
File "C:\Users\tdun0002\OneDrive - Company\Desktop\important_folders\Project\git\cloud_scripts\aws_scripts\python\aws_tools\ec2_mongo.py", line 533, in <module>
main()
File "C:\Users\tdun0002\OneDrive - Company\Desktop\important_folders\Project\git\cloud_scripts\aws_scripts\python\aws_tools\ec2_mongo.py", line 522, in main
print_reports(interactive,aws_account,aws_account_number)
File "C:\Users\tdun0002\OneDrive - Company\Desktop\important_folders\Project\git\cloud_scripts\aws_scripts\python\aws_tools\ec2_mongo.py", line 432, in print_reports
mongo_export_to_file(interactive, aws_account, aws_account_number,instance_col)
File "C:\Users\tdun0002\OneDrive - Company\Desktop\important_folders\Project\git\cloud_scripts\aws_scripts\python\aws_tools\ec2_mongo.py", line 279, in mongo_export_to_file
mongo_docs = instance_col.find()
TypeError: find() takes at least 1 argument (0 given)
This is my code for print_collections():
def print_collections():
myclient = connect_db()
message = f"* Print DB Collections *"
banner(message, border="*")
print(f"This command prints the database collection names.\n")
if myclient != None:
# the list_database_names() method returns a list of strings
database_names = myclient.list_database_names()
print ("There are", len(database_names), "databases.")
for db_num, db in enumerate(database_names):
print ("\nGetting collections for database:", db, "--", db_num)
collection_names = myclient[db].list_collection_names()
print ("The MongoDB database returned", len(collection_names), "collections.")
# iterate over the list of collection names
for col_num, col in enumerate(collection_names):
print (col, "--", col_num)
In my output I printed the value of the variable holding the collection name, so i don't know why the find command isn't working:
Enter the date in format 'dd/mm/yyyy': 01/12/2021
Input date is valid: 01/12/2021
Instance Collection: ec2_list_01122021 # <--- the collection that I'm passing to MongoDB
Interactive: 0
If I call the mongo_export_to_file() function directly from within this script and others, it does work. This is my mongo_export_to_file() function:
def set_db():
myclient = connect_db()
today = datetime.today()
today = today.strftime("%m%d%Y")
mydb = myclient["aws_inventories"]
instance_col = "ec2_list_" + today
instance_col = mydb[instance_col]
return mydb, instance_col
def mongo_export_to_file(interactive, aws_account, aws_account_number,instance_col=None):
create_directories()
today = datetime.today()
today = today.strftime("%m-%d-%Y")
if not instance_col:
_, _, instance_col = set_db()
print(f"Instance Collection: {instance_col}\nInteractive: {interactive}")
time.sleep(10)
# make an API call to the MongoDB server
if interactive == 0:
print(f"Instance Collection: {instance_col}\nInteractive: {interactive}")
time.sleep(10)
mongo_docs = instance_col.find()
else:
mongo_docs = instance_col.find({"Account Number": aws_account_number})
docs = pandas.DataFrame(mongo_docs)
docs.pop("_id")
output_dir = os.path.join("..", "..", "output_files", "aws_instance_list", "csv", "")
output_file = os.path.join(output_dir, "aws-instance-master-list-" + today +".csv")
These are the collections in MongoDB:
> use aws_inventories
switched to db aws_inventories
> show collections
ec2_list_01082021
ec2_list_01112021
ec2_list_01122021
I'm naming the collections this way so they can be retrieved by date.
The purpose of the print_reports() function is to print reports for old entries from the database. By default the mongo_export_to_file() function prints a report for today's entry and that works fine.
Why can't mongodb find the collection name if I pass it into the function?
I am using the following line of code for executing and printing data from my sql database. For some reason that is the only command that works for me.
json_string = json.dumps(location_query_1)
My question is that when I print json_string it shows data in the following format:
Actions.py code:
class FindByLocation(Action):
def name(self) -> Text:
return "action_find_by_location"
def run (self, dispatcher: CollectingDispatcher,
tracker: Tracker,
doman: Dict[Text, Any])-> List[Dict[Text,Any]]:
global flag
location = tracker.get_slot("location")
price = tracker.get_slot("price")
cuisine = tracker.get_slot("cuisine")
print("In find by Location")
print(location)
location_query = "SELECT Name FROM Restaurant WHERE Location = '%s' LIMIT 5" % location
location_count_query = "SELECT COUNT(Name) FROM Restaurant WHERE Location = '%s'" % location
location_query_1 = getData(location_query)
location_count_query_1 = getData(location_count_query)
if not location_query_1:
flag = 1
sublocation_view_query = "CREATE VIEW SublocationView AS SELECT RestaurantID, Name, PhoneNumber, Rating, PriceRange, Location, Sublocation FROM Restaurant WHERE Sublocation = '%s'"%(location)
sublocation_view = getData(sublocation_view_query)
dispatcher.utter_message(text="یہ جگہ کس ایریا میں ہے")
else:
flag = 0
if cuisine is None and price is None:
json_string = json.dumps(location_query_1)
print(isinstance(json_string, str))
print("Check here")
list_a=json_string.split(',')
remove=["'",'"','[',']']
for i in remove:
list_a=[s.replace(i, '') for s in list_a]
dispatcher.utter_message(text="Restaurants in Location only: ")
dispatcher.utter_message(list_a)
What should I do se that the data is showed in a vertical list format (new line indentation) and without the bracket and quotation marks? Thank you
First of all, have you tried reading your data into a pandas object? I have done some programs with a sqlite database and this worked for me:
df = pd.read_sql_query("SELECT * FROM {}".format(self.tablename), conn)
But now to the string formatting part:
# this code should do the work for you
# first of all we have our string a like yours
a="[['hallo'],['welt'],['kannst'],['du'],['mich'],['hoeren?']]"
# now we split the string into a list on every ,
list_a=a.split(',')
# this is our list with chars we want to remove
remove=["'",'"','[',']']
# now we replace all elements step by step with nothing
for i in remove:
list_a=[s.replace(i, '') for s in list_a]
print(list_a)
for z in list_a:
print(z)
The output is then:
['hallo', 'welt', 'kannst', 'du', 'mich', 'hoeren?']
hallo
welt
kannst
du
mich
hoeren?
I hope I could help.
new to flask, i'm not so sure why I am getting this name error: 'Nontype' object has no attribute value 'name'.
(PLease ignore: "It looks like your post is mostly code; please add some more details.
It looks like your post is mostly code; please add some more details.
It looks like your post is mostly code; please add some more details.
")
Here is what it looks like in the console
File "/Users/thomashunt/projects/ct-platform-api/apis/student_api.py", line 448, in put
return StudentService.student_WorkShadow(submission)
File "/Users/thomashunt/projects/ct-platform-api/services/students.py", line 234, in student_WorkShadow
AddressService.set_address_info(submission.student_detail.location_address)
File "/Users/thomashunt/projects/ct-platform-api/services/addresses.py", line 18, in set_address_info
address_description = address.address_description(country.name)
AttributeError: 'NoneType' object has no attribute 'name'
services/students
#staticmethod
def student_WorkShadow(submission: StudentWorkShadowEdit) -> Person:
repo = PersonData()
advisor = repo.find_by_email(submission.advisor_email)
email = submission.email.lower()
student = repo.find_by_email(email)
if not student:
raise RecordNotFoundException('No Record with this email in the database')
if not advisor:
raise RecordNotFoundException('No Record with this advisor email in the database')
# Forced re-write of Address entered by Student
student.student_detail.location_address = \
AddressService.set_address_info(submission.student_detail.location_address)
submission.set_model(student)
files = StudentService.promote_student_files(advisor, submission.file_ids, student.id)
# Forced re-write of Address entered by Student
repo.save(student, advisor.id)
repo.session.commit()
student_statement = 'student workshadow details updated'
reference_fields = [EventItemReferenceField('updated_workshadowDetails', 'Updated workshadow Details'),
EventItemReferenceField('form_action', 'confidential_updated')]
reference_content = [student_statement]
MessagingActivityService.create_student_event_for_action(student.id, None, student,
True,
ActionTypes.Student.value.InternalNote,
student_statement,
reference_fields,
reference_content, files, None,
None, None, True, True)
StudentService.re_index(student)
return student
API Endpoints
#ns.route('/StudentWorkShadow')
class StudentWorkShadowEndpoint(Resource):
#SecurityService.requires_system
#ns.expect(student_workshadow_model, validate=True)
#ns.marshal_with(student_person_model)
def put(self):
logging.info('student workshadow details updated')
submission = StudentWorkShadowEdit.from_dict(request.json)
return StudentService.student_WorkShadow(submission)
services/address
import logging
from models import Address
from resources import AddressEdit
from utility import GoogleUtility
from .data import CountryData
class AddressService:
#staticmethod
def set_address_info(address: Address):
countries = CountryData()
country = countries.load_country(address.country_code)
if address.suburb is not None and address.state is not None:
address.location_description = address.suburb + ', ' + address.state
address_description = address.address_description(country.name)
maps_result = GoogleUtility.resolve_coords(address_description)
try:
first_result = maps_result[0]
print(first_result)
address.latitude = first_result['geometry']['location']['lat']
address.longitude = first_result['geometry']['location']['lng']
address.raw_location = first_result
address.formatted_address = first_result['formatted_address']
except TypeError:
print(maps_result.error)
logging.error(maps_result.error)
except IndexError:
logging.error('No result for address resolution')
return address
#staticmethod
def has_address_changed(old_address: Address, new_address: AddressEdit):
if not old_address and new_address:
return True
return not (old_address.line_1 == new_address.line_1
and old_address.line_2 == new_address.line_2
and old_address.suburb == new_address.suburb
and old_address.postcode == new_address.postcode
and old_address.country_code == new_address.country_code)
country/data outputs:
import json
from resources import Country
class CountryData:
with open('services/data/countries.json') as json_data:
source = json.load(json_data)
countries = [Country.from_dict(l) for l in source]
def load_country(self, country_code: str):
result = None
for country in self.countries:
if country.country_code == country_code:
result = country
return result
def load_state(self, country_code: str, short_title: str):
result = None
country = self.load_country(country_code)
for state in country.states:
if state.short_title == short_title:
result = state
return result
def list_states(self, country_code: str):
return self.load_country(country_code).states
My suspicion is that the value you pass for country_code does not match against any country.country_code attribute.
My advice is to put a debug print line in the method like this:
class CountryData:
...
def load_country(self, country_code: str):
result = None
for country in self.countries:
if country.country_code == country_code:
result = country
print(result, country.country_code) # this line added
return result
...
Doing this, you should be able to see if result is ever set to a value other than None, and you can observe exactly which country code triggers it. Moreover, this will print all available country codes (one per line). If your country_code is not one of these, that is the problem.
I am trying to automate this scenario. I have 2 .sql files (add1.sql and add2.sql) which has 1 insert script each.
My goal is to write one record to table1 by executing lines from add1.sql and one record to cm.cl by executing lines from add2.sql, waiting for about 5 mins so a backend service runs. This service writes from DB1 to DB2. I then connect to DB2 to see if the record from DB1 matches what was written to DB2. Depending no the results, an email is sent.
Below is my code. Everything works just fine except that it writes twice to DB1. So, basically 4 records are inserted instead of 2. Any idea why it writes 4 records?
import pypyodbc as pyodbc
import smtplib
sender = 'abc#abc.com'
receivers = ['abc#abc.com','xyz#abc.com']
import unittest
import time
class TestDB1(unittest.TestCase):
def testing_master(self):
Master_Conn = 'Driver=
{SQLServer};Server=server\servername;Database=database;UID=userid;PWD=password'
Master_db = pyodbc.connect(Master_Conn)
Master_Cursor = Master_db.cursor()
try:
#Open, read and execute add_shell.sql
file = open('C:\\aaa\\add1.sql', 'r')
line = file.read()
lines = line.replace('\n', ' ')
file1 = open('C:\\aaa\\add2.sql', 'r')
line1=file1.read()
lines1=line1.replace('\n', ' ')
Master_Cursor.execute(lines)
time.sleep(1)
Master_Cursor.execute(lines1)
Master_db.commit()
file.close()
file1.close()
#Get python object for latest record inserted in DB1
Master_CID=Master_Cursor.execute("select col1 from tablename1 order by sequenceid desc").fetchone()
#convert tuple to srting [0] gives first tuple element.
Master_CID_str=str(Master_CID[0])
#Get GUID by stripping first 2 chars and last char.
Master_CID_str=Master_CID_str[2:len(Master_CID_str)-1]
Master_CLID=Master_Cursor.execute("select col2 from tablename2 order by sequenceid desc").fetchone()
Master_CLID_str=str(Master_CLID[0])
Master_CLID_str=Master_CLID_str[2:len(Master_CLID_str) - 1]
# Wait for service that transfers data from one db to another DB to run
time.sleep(310)
finally:
Master_Cursor.close()
Master_db.close()
return Master_CID,Master_CID_str,Master_CLID,Master_CLID_str
def testing_int_instance(self):
#unpacking return value of tuple from testing_master() function
Master_CID,Master_CID_str,Master_CLID,Master_CLID_str=self.testing_master()
print ("printing from testing_int_instance {0}".format(Master_CID))
Int_Instance_Conn = 'Driver={SQL Server};Server=server2\servername2;Database=database2;UID=uid;PWD=password;'
Int_db = pyodbc.connect(Int_Instance_Conn)
Int_Cursor = Int_db.cursor()
#return Int_db, Int_Cursor
#execute select from db where col matches that of one inserted in master db.
Int_Instance_CID=Int_Cursor.execute("select col1 from table1 where cartridgemodelid = '%s'" %(Master_CID_str)).fetchone()
print(Int_Instance_CID)
smtpObj = smtplib.SMTP('22.101.1.333', 25)
if (Master_CID==Int_Instance_CID):
print("Matched")
content="This email confirms successful data transfer from Master to Instance for col1: \n"
message = "\r\n".join(["From:" + sender,"To:" + str(receivers[:]),"Subject: Test Result","",content +Master_CID_str])
#smtpObj = smtplib.SMTP('22.101.2.222', 25)
smtpObj.sendmail(sender, receivers, message)
elif (Master_CID!=Int_Instance_CID):
print("no match")
content = "This email confirms failure of data transfer from DB1 to DB2 for COL1: \n"
message = "\r\n".join(["From:" + sender, "To:" + str(receivers[:]), "Subject: Test Result", "",content +Master_CID_str])
smtpObj.sendmail(sender, receivers, message)
Int_Instance_CLID=Int_Cursor.execute("select COL2 from table2 where col= '%s'" %(Master_CLID_str)).fetchone()
print (Int_Instance_CLID)
if (Master_CLID == Int_Instance_CLID):
print ("Printing int_instance CLID {0}".format(Int_Instance_CLID))
content = "This email confirms successful data transfer from DB1 to DB2 for COL: \n"
message = "\r\n".join(
["From:" + sender, "To:" + str(receivers[:]), "Subject: Test Result", "", content + Master_CLID_str])
#smtpObj = smtplib.SMTP('22.101.2.222', 25)
smtpObj.sendmail(sender, receivers, message)
print ("Ids Matched")
elif (Master_CLID != Int_Instance_CLID):
DB1 to DB2 for COL: \n"
message = "\r\n".join(
["From:" + sender, "To:" + str(receivers[:]), "Subject: Test Result", "", content + Master_CLID_str])
#smtpObj = smtplib.SMTP('22.101.2.222', 25)
smtpObj.sendmail(sender, receivers, message)
smtpObj.quit()
Int_db.close()
if name == 'main':
unittest.main()
add1.sql is:
DECLARE #Name VARCHAR(2000)
DECLARE #PartNumber VARCHAR(2000)
SELECT #Name='test'+convert(varchar,getdate(),108)
SELECT #PartNumber='17_00001_'+convert(varchar,getdate(),108)
DECLARE #XML XML
DECLARE #FileName VARCHAR(1000)
DECLARE #Id UNIQUEIDENTIFIER
SELECT #Id = NEWID()
SELECT #FileName = 'test.xml'
SELECT #XML='<model>
<xml tags go here>
BEGIN
INSERT INTO table1
(ID,Name,Type,Desc,Number,Revision,Model,status,Modifiedby,Modifiedon)
VALUES(#Id,#Name,'xyz','',#partnumber,'01',#XML,'A','453454-4545-4545-4543-345342343',GETUTCDATE())
add2.sql is:
DECLARE #XML XML
DECLARE #CM_Name VARCHAR(2000)
DECLARE #FileName VARCHAR(1000)
DECLARE #PartNumber VARCHAR(2000)
DECLARE #Id UNIQUEIDENTIFIER
SELECT #Id=NEWID()
DECLARE #Name VARCHAR(2000)
DECLARE #CMId VARCHAR(2000)
DECLARE #CM_PartName VARCHAR(2000)
DECLARE #CM_Partnumber VARCHAR(2000)
SELECT #Name='test'+convert(varchar,getdate(),108)
SELECT #PartNumber='test'+convert(varchar,getdate(),108)
DECLARE #RowCount INT
DECLARE #Message VARCHAR(100);
SELECT #FileName = 'test.xml'
SELECT #CMId = CM.CMID,
#CM_Name = CM.CMName,
#CM_PN = CM.PN
FROM cm.Model CM
WHERE CM.MName LIKE 'test%'
ORDER BY CM.ModifiedBy DESC
SELECT #XML='<Layout>
other xml tags...
BEGIN
INSERT INTO cm.CL(ID, ModelID, Layout, Description, PN, Revision, CLayout, Status, ModifiedBy, ModifiedOn)
SELECT TOP 1 #Id, #CMId, #Name, '', #PartNumber, '01', #XML, 'A', '453454-345-4534-4534-4534543545', GETUTCDATE()
FROM cm.table1 CM
WHERE CM.Name=#CM_Name
AND CM.Partnumber=#CM_Partnumber
Currently, you are calling test_master() twice! First as your named method and then in second method when you unpack the returned values. Below is a demonstration of defined methods outside of the Class object. If called as is, testing_master will run twice.
Consider also using a context manager to read .sql scripts using with() which handles open and close i/o operations shown below:
# FIRST CALL
def testing_master():
#...SAME CODE...
try:
with open('C:\\aaa\\add1.sql', 'r') as file:
lines = file.read().replace('\n', ' ')
Master_Cursor.execute(lines)
Master_db.commit()
time.sleep(1)
with open('C:\\aaa\\add2.sql', 'r') as file1:
lines1 = file1.read().replace('\n', ' ')
Master_Cursor.execute(lines1)
Master_db.commit()
#...SAME CODE...
return Master_CID, Master_CID_str, Master_CLID, Master_CLID_str
def testing_int_instance():
# SECOND CALL
Master_CID, Master_CID_str, Master_CLID, Master_CLID_str = testing_master()
#...SAME CODE...
if __name__ == "__main__":
testing_master()
testing_int_instance()
Commenting out the time(310) seems like it works but as you mention the background Windows service does not effectively run and so interrupts database transfer.
To resolve, consider calling the second method at the end of the first by passing the values as parameters without any return and remove unpacking line. Then, in the main global environment, only run testing_master(). Of course qualify with self when inside a Class definition.
def testing_master():
#...SAME CODE...
testing_int_instance(Master_CID, Master_CID_str, Master_CLID, Master_CLID_str)
def testing_int_instance(Master_CID, Master_CID_str, Master_CLID, Master_CLID_str):
#...SKIP UNPACK LINE
#...CONTINUE WITH SAME CODE...
if __name__ == "__main__":
testing_master()
Due to your unittest, consider slight adjustment to original setup where you qualify every variable with self:
def testing_master():
...
self.Master_CID=Master_Cursor.execute("select col1 from tablename1 order by sequenceid desc").fetchone()
self.Master_CID_str=str(Master_CID[0])
self.Master_CID_str=Master_CID_str[2:len(Master_CID_str)-1]
self.Master_CLID=Master_Cursor.execute("select col2 from tablename2 order by sequenceid desc").fetchone()
self.Master_CLID_str=str(Master_CLID[0])
self.Master_CLID_str=Master_CLID_str[2:len(Master_CLID_str) - 1]
def testing_int_instance(self):
# NO UNPACK LINE
# ADD self. TO EVERY Master_* VARIABLE
...
The purpose of this form is to let users enter a lot of places (comma separated) and it'll retrieve the phone, name, website. Have it working in a python IDE, no problem, but having issues putting it into my webapp.
I'm getting the error Exception Value: Can't pickle local object 'GetNums.<locals>.get_data' at the line where a is assigned. I checked the type of inputText and verified that it is indeed a list. So, I'm not sure why it won't pickle.
def GetNums(request):
form = GetNumsForm(request.POST or None)
if form.is_valid():
inputText = form.cleaned_data.get('getnums')
# all experimental
inputText = inputText.split(',')
def get_data(i):
#DON'T FORGET TO MOVE THE PRIMARY KEY LATER TO SETTINGS
r1 = requests.get('https://maps.googleapis.com/maps/api/place/textsearch/json?query=' + i + '&key=GET_YOUR_OWN')
a = r1.json()
pid = a['results'][0]['place_id']
r2 = requests.get('https://maps.googleapis.com/maps/api/place/details/json?placeid=' + pid + '&key=GET_YOUR_OWN')
b = r2.json()
phone = b['result']['formatted_phone_number']
name = b['result']['name']
try:
website = b['result']['website']
except:
website ='No website found'
return ' '.join((phone, name, website))
v = str(type(inputText))
with Pool(5) as p:
a = (p.map(get_data, inputText))
# for line in p.map(get_data, inputText):
# print(line)
#code assist by http://stackoverflow.com/a/34512870/5037442
#end experimental
return render(request, 'about.html', {'v': a})
It's actually barfing when trying to pickle get_data, which is a nested function/closure.
Move get_data out of GetNums (and agh rename it to snake_case please) and it should work.