I'm getting an arguments error from the MySQL module looking for 4 operands. I just don't see which operands it's looking for. It works for some case types and not others. Line numbers of error (trimmed at bottom which refers to MySQL library):
Traceback (most recent call last):
File "/Users/christoph/PycharmProjects/physicianWorkQueueProject/physicianWorkQueueProject.py", line 158, in <module>
parse()
File "/Users/christoph/PycharmProjects/physicianWorkQueueProject/physicianWorkQueueProject.py", line 138, in parse
c.execute(getPhysiciansql_cmd)
...
mysql.connector.errors.DataError: 1241 (21000): Operand should contain 4 column(s)
#!/usr/bin/python
##################################################
# This is a prototype pathologist work queue management system
# The program calls out to a database of pathologists, their specialities,
# and an index of case types and their intended specialists
#
# Usage: Run at your command line. You will then enter case numbers (which aren't validated (currently))
# and case types (which are validated). The program will distribute the entered case as follows:
#
# If the case is intended for generalists, a system that amounts to names being pulled from a hat
# is employed. When a name is selected (at random), the case is entered into that pathologists' work queue.
# That pathologists' name is not returned to the pool.
# The cycle will then repeat with a random name picked every time, in this same way, for generalist-requiring cases,
# until the entirety of names have been pulled. At this point, all names are returned to the pool and the whole cycle
# begins again
#
# For the specialist requiring cases, the name-draw system is bypassed and the case is directly entered
# into the pathologists' work queue.
# 16 Aug 2018
# My Real Name
##################################################
import mysql.connector as mariaDB
import time
import pandas as pd
from random import choice
def distributefairly(inputCaseNumber, inputcasetype):
# function distrbutefairly does a draw out of a hat, with each name being pulled and the pot shrinking until none are
# left at which point all names are added back
c.execute("SELECT physicianName FROM physicianNamesSpecialties;")
originalPhysicianList = c.fetchall()
physicianList = originalPhysicianList
# print("counter at:",count)
chosenPhysician = choice(physicianList)
pos = physicianList.index(chosenPhysician)
physicianList.pop(pos)
cleanedUpChosenPhysician = chosenPhysician[0]
insert(cleanedUpChosenPhysician)
print("This case is going to", cleanedUpChosenPhysician + ".")
select()
increment()
if len(physicianList) == 0:
reset()
def reset():
# this resets the counter the distributefairly module calls
global count
global physicianList
count = 0
c.execute("SELECT physicianName FROM physicianNamesSpecialties;")
physicianList = c.fetchall()
def increment():
# increments the counter of the distributefairly module
global count
count +=
def print_count():
print(count)
def insert(cleanedUpPhysResult):
# adds inputted cases into the workQueue table
global inputCaseNumber
global inputCaseType
ts = time.gmtime()
readableTs = time.strftime("%Y-%m-%d %H:%M:%S", ts)
c2.execute("INSERT INTO workQueue (name, caseNumber, timestamp, tableCaseType) values (%s,%s,%s,%s)", (cleanedUpPhysResult, inputCaseNumber, readableTs, str(inputCaseType)))
conn2.commit()
def select():
# this outputs the workQueue after every addition
sql = "SELECT * FROM workQueue"
c2.execute(sql)
rWQ = c2.fetchall()
print(pd.DataFrame(rWQ, columns= ['Name','Case Number','Time Stamp','Specialty','Row ID'])) # .set_index('Row ID')
def startup():
# create()
global inputCaseType
global inputCaseNumber
inputCaseNumber = input("Enter Case Number: ")
inputCaseType = input("Enter case type (use proper abbreviations): ")
def parse():
global inputCaseNumber
global inputCaseType
caseInputsql_cmd = "SELECT specialtyRequiredToProcess,Description FROM caseTypes WHERE caseTypeName='{}'".format(inputCaseType)
c.execute(caseInputsql_cmd)
rows_returned = c.fetchall()
if not rows_returned:
print("No match to table of specimen types returned. Check the case type abbreviation and try again.")
return
else:
for row in rows_returned:
r = row[0]
d = row[1]
print("This is a", r, "service case. It is a", d,"type case.")
if r != "GENERALIST":
getPhysiciansql_cmd = "SELECT physicianName FROM physicianNamesSpecialties WHERE (specialty, specialty2, specialty3, specialty4) ='{}'".format(r)
c.execute(getPhysiciansql_cmd)
physResult = choice(c.fetchall())
cleanedUpPhysResult = physResult[0]
print("This case is going to", cleanedUpPhysResult+".")
insert(cleanedUpPhysResult)
select()
else:
distributefairly(inputCaseNumber, inputCaseType)
conn = mariaDB.connect(host='xxxxx', user='xxxxx',password='xxxxxx',db='lookupDB')
conn2 = mariaDB.connect(host='xxxxxxl', user='xxxxx',password='xxxxxxx',db='workQueue')
c = conn.cursor()
c2 = conn2.cursor()
count = 0
while True:
startup()
parse()
This line of code .execute(...) will fail:
getPhysiciansql_cmd =
"SELECT physicianName FROM physicianNamesSpecialties WHERE (specialty, specialty2, specialty3, specialty4) ='{}'".format(r)
c.execute(getPhysiciansql_cmd)
My Var r holds this data:
print("This is a", r, "service case. It is a", d,"type case.")
This is a THORACIC service case. It is a TRANSBRONCHIAL WANG NEEDLE ASPIRATION type case.
My Var getPhysiciansql_cmd holds this data:
getPhysiciansql_cmd = SELECT physicianName FROM
physicianNamesSpecialties WHERE (specialty, specialty2, specialty3, specialty4) ='THORACIC'
I think this is just a matter of how I'm using the WHERE clause. I saw somewhere that the WHERE with multiple fields is written, you use parens around the fields. I've experimented in reducing this to the bare minimum and using the OR operator in the WHERE clause and I have successfully received a proper response.
import mysql.connector as mariaDB
conn = mariaDB.connect(host='xxxxx', user='xxxx',password='xxxxx',db='lookupDB')
conn2 = mariaDB.connect(host='xxxxx', user='xxxx',password='xxxxxx',db='workQueue')
c = conn.cursor()
c2 = conn2.cursor()
inputSpecialty = input("specialty? ")
r = inputSpecialty
c.execute("SELECT physicianName FROM physicianNamesSpecialties WHERE specialty = %s OR specialty2 = %s OR specialty3 = %s OR specialty4 = %s", (r,r,r,r))
physResult = c.fetchall()
cleanedUpPhysResult = physResult
print(cleanedUpPhysResult)
Output:
specialty? THORACIC
[('Song',), ('Han',), ('He',), ('Goldfischer',)]
Related
Given a list of data to process and a 64-core CPU (plus 500 GB RAM).
The list should sort strings and store data in a result set of millions of records, which runs just fine, takes a few seconds with multiprocessing.
But I'd also need to store the result somehow, either in a txt, csv output or a database. So far I haven't found a viable solution, because after the first part (process), the insert method either gives an error with trying it with MySQL pooling, or takes an insanely long time giving the txt output.
What Ive tried so far: simple txt output, print out to txt file, using csv, pandas and numpy libs. Nothing seems to speed it up. Any help would be greatly appreciated!
My code right now:
import os
import re
import datetime
import time
import csv
import mysql.connector as connector
from mysql.connector.pooling import MySQLConnectionPool
import mysql
import numpy as np
from tqdm import tqdm
from time import sleep
import multiprocessing as mp
import numpy
pool = MySQLConnectionPool( pool_name="sql_pool",
pool_size=32,
pool_reset_session=True,
host="localhost",
port="3306",
user="homestead",
password="secret",
database="homestead")
# # sql connection
db = mysql.connector.connect(
host="localhost",
port="3306",
user="homestead",
password="secret",
database="homestead"
)
sql_cursor = db.cursor()
delete_statement = "DELETE FROM statistics"
sql_cursor.execute(delete_statement)
db.commit()
sql_statement = "INSERT INTO statistics (name, cnt) VALUES (%s, %s)"
list = []
domains = mp.Manager().list()
unique_list = mp.Manager().list()
invalid_emails = mp.Manager().list()
result = mp.Manager().list()
regex_email = '^(\w|\.|\_|\-)+[#](\w|\_|\-|\.)+[.]\w{2,3}$'
# check email validity
def check(list, email):
if(re.search(regex_email, email)):
domains.append(email.lower().split('#')[1])
return True
else:
invalid_emails.append(email)
return False
#end of check email validity
# execution time converter
def convertTime(seconds):
seconds = seconds % (24 * 3600)
hour = seconds // 3600
seconds %= 3600
minutes = seconds // 60
seconds %= 60
if(hour == 0):
if(minutes == 0):
return "{0} sec".format(seconds)
else:
return "{0}min {1}sec".format(minutes, seconds)
else:
return "{0}hr {1}min {2}sec".format(hour, minutes, seconds)
# execution time converter end
#process
def process(list):
for item in tqdm(list):
if(check(list, item)):
item = item.lower().split('#')[1]
if item not in unique_list:
unique_list.append(item)
# end of process
def insert(list):
global sql_statement
# Add to db
con = pool.get_connection()
cur = con.cursor()
print("PID %d: using connection %s" % (os.getpid(), con))
#cur.executemany(sql_statement, sorted(map(set_result, list)))
for item in list:
cur.execute(sql_statement, (item, domains.count(item)))
con.commit()
cur.close()
con.close()
# def insert_into_database(list):
#sql_cursor.execute(sql_statement, (unique_list, 1), multi=True)
# sql_cursor.executemany(sql_statement, sorted(map(set_result, list)))
# db.commit()
# statistics
def statistics(list):
for item in tqdm(list):
if(domains.count(item) > 0):
result.append([domains.count(item), item])
# end of statistics
params = sys.argv
filename = ''
process_count = -1
for i, item in enumerate(params):
if(item.endswith('.txt')):
filename = item
if(item == '--top'):
process_count = int(params[i+1])
def set_result(item):
return item, domains.count(item)
# main
if(filename):
try:
start_time = time.time()
now = datetime.datetime.now()
dirname = "email_stats_{0}".format(now.strftime("%Y%m%d_%H%M%S"))
os.mkdir(dirname)
list = open(filename).read().split()
if(process_count == -1):
process_count = len(list)
if(process_count > 0):
list = list[:process_count]
#chunking list
n = int(len(list) / mp.cpu_count())
chunks = [list[i:i + n] for i in range(0, len(list), n)]
processes = []
print('Processing list on {0} cores...'.format(mp.cpu_count()))
for chunk in chunks:
p = mp.Process(target=process, args=[chunk])
p.start()
processes.append(p)
for p in processes:
p.join()
# insert(unique_list)
## step 2 - write sql
## Clearing out db before new data insert
con = pool.get_connection()
cur = con.cursor()
delete_statement = "DELETE FROM statistics"
cur.execute(delete_statement)
u_processes = []
#Maximum pool size for sql is 32, so maximum chunk number should be that too.
if(mp.cpu_count() < 32):
n2 = int(len(unique_list) / mp.cpu_count())
else:
n2 = int(len(unique_list) / 32)
u_chunks = [unique_list[i:i + n2] for i in range(0, len(unique_list), n2)]
for u_chunk in u_chunks:
p = mp.Process(target=insert, args=[u_chunk])
p.start()
u_processes.append(p)
for p in u_processes:
p.join()
for p in u_processes:
p.close()
# sql_cursor.executemany(sql_statement, sorted(map(set_result, unique_list)))
# db.commit()
# for item in tqdm(unique_list):
# sql_val = (item, domains.count(item))
# sql_cursor.execute(sql_statement, sql_val)
#
# db.commit()
## numpy.savetxt('saved.txt', sorted(map(set_result, unique_list)), fmt='%s')
# with(mp.Pool(mp.cpu_count(), initializer = db) as Pool:
# Pool.map_async(insert_into_database(),set(unique_list))
# Pool.close()
# Pool.join()
print('Creating statistics for {0} individual domains...'.format(len(unique_list)))
# unique_list = set(unique_list)
# with open("{0}/result.txt".format(dirname), "w+") as f:
# csv.writer(f).writerows(sorted(map(set_result, unique_list), reverse=True))
print('Writing final statistics...')
print('OK.')
f = open("{0}/stat.txt".format(dirname),"w+")
f.write("Number of processed emails: {0}\r\n".format(process_count))
f.write("Number of valid emails: {0}\r\n".format(len(list) - len(invalid_emails)))
f.write("Number of invalid emails: {0}\r\n".format(len(invalid_emails)))
f.write("Execution time: {0}".format(convertTime(int(time.time() - start_time))))
f.close()
except FileNotFoundError:
print('File not found, path or file broken.')
else:
print('Wrong file format, should be a txt file.')
# main
See my comments regarding some changes you might wish to make, one of which might improve performance. But I think one area of performance which could really be improved is in your use of managed lists. These are represented by proxies and each operation on such a list is essentially a remote procedure call and thus very slow. You cannot avoid this given that you need to have multiple processes updating a common, shared lists (or dict if you take my suggestion). But in the main process you might be trying, for example, to construct a set from a shared list as follows:
Pool.map_async(insert_into_database(),set(unique_list))
(by the way, that should be Pool.map(insert_into_database, set(unique_list)), i.e. you have an extra set of () and you can then get rid of the calls to pool.close() and pool.join() if you wish)
The problem is that you are iterating every element of unique_list through a proxy, which might be what is taking a very long time. I say "might" because I would think the use of managed lists would prevent the code as is, i.e. without outputting the results, from completing in "a few seconds" if we are talking about "millions" of records and thus millions of remote procedure calls. But this number could certainly be reduced if you could somehow get the underlying list as a native list.
First, you need to heed my comment about having declared a variable named list thus making it impossible to create native lists or subclasses of list. Once your have renamed that variable to something more reasonable, we can create our own managed class MyList that will expose the underlying list on which it is built. Note that you can do the same thing with a MyDict class that subclasses dict. I have defined both classes for you. Here is a benchmark showing the difference between constructing a native list from a managed list versus creating a native list from a MyList:
import multiprocessing as mp
from multiprocessing.managers import BaseManager
import time
class MyManager(BaseManager):
pass
class MyList(list):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def get_underlying_list(self):
return self
class MyDict(dict):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def get_underlying_dict(self):
return self
# required for windows, which I am running on:
if __name__ == '__main__':
l = mp.Manager().list()
for i in range(100_000):
l.append(i)
t = time.time()
l2 = list(l)
print(time.time() - t, l2[0:5], l2[-5:])
MyManager.register('MyList', MyList)
MyManager.register('MyDict', MyDict)
my_manager = MyManager()
# must explicitly start the manager or use: with MyManager() as manager:
my_manager.start()
l = my_manager.MyList()
for i in range(100_000):
l.append(i)
t = time.time()
l2 = list(l.get_underlying_list())
print(time.time() - t, l2[0:5], l2[-5:])
Prints:
7.3949973583221436 [0, 1, 2, 3, 4] [99995, 99996, 99997, 99998, 99999]
0.007997751235961914 [0, 1, 2, 3, 4] [99995, 99996, 99997, 99998, 99999]
i'm trying to speed up my code with multiprocessing and making multiple api calls at once. currently i'm making an api call, get the data needed from there and then insert them into the database. it works but it's very slow. i need to have about 700-800 million users in the database and at this current speed it will take about 200-250 days. how can i make multiple api calls?
import traceback
import requests
import json
import sys
from time import time, sleep
from multiprocessing import Process, Queue
from io import BytesIO
import imagehash
from PIL import Image
import sqlite3
from multiprocessing import Process
from multiprocessing import Pool as ThreadPool
min = 7960265729
max = 9080098567
database_location = 'D:/Script/steam_database.db'
key = []
pool_size = 32
image_hashes = []
def queue_flusher(queue, flush_limit=80, temp = 0):
connection = sqlite3.connect(database_location)
cursor = connection.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS user (id INTEGER PRIMARY KEY AUTOINCREMENT, hash TEXT, profile TEXT)")
connection.commit()
while True:
if(queue.qsize() < flush_limit):
sleep(.1)
else:
temp += 80
print(f"Flushing {flush_limit} out of queue {temp}")
queue_input = [queue.get() for _ in range(0, flush_limit)]
cursor = connection.cursor()
for row in queue_input:
if row['image'] not in image_hashes:
print(f"Inserting Row: {repr(row)}")
cursor.execute("INSERT INTO user (hash, profile) VALUES (?, ?);", (row['image'], row['profileUrl']))
image_hashes.append(row['image'])
connection.commit()
connection.close()
def databaseFiller(queue, min = 0, max = 0):
while True:
try:
for i in range(min, max):
r = requests.get(f'http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key={key[3]}&steamids=7656119{i}').json()
pool = ThreadPool(8)
all = pool.map(databaseFiller, i)
response = r
player = None
steamid = None
response = response.get('response', None)
if response is None or not response.get('players', None):
continue
player = response['players'][0]
pfp = player.get('avatar', None)
profileUrl = player.get('profileurl', None)
if pfp != "https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/fe/fef49e7fa7e1997310d705b2a6158ff8dc1cdfeb.jpg":
img = requests.get(pfp)
img = Image.open(BytesIO(img.content))
image = str(imagehash.average_hash(img))
queue.put({'image': image, 'profileUrl': profileUrl})
except Exception as e:
# print(f'Received Response: {response}')
print("Printing only the traceback above the current stack frame")
print("".join(traceback.format_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2])))
print("Printing the full traceback as if we had not caught it here...")
print(format_exception(e))
def format_exception(e):
exception_list = traceback.format_stack()
exception_list = exception_list[:-2]
exception_list.extend(traceback.format_tb(sys.exc_info()[2]))
exception_list.extend(traceback.format_exception_only(
sys.exc_info()[0], sys.exc_info()[1]))
exception_str = "Traceback (most recent call last):\n"
exception_str += "".join(exception_list)
exception_str = exception_str[:-1]
return exception_str
if __name__ == '__main__':
database_connection = sqlite3.connect("steam_database.db")
data_queue = Queue()
data_flush_process = Process(target=queue_flusher, args=([data_queue]))
data_flush_process.start()
total_nums = max - min
nums_per_process = total_nums // pool_size
for i in range(pool_size):
new_min = min + (nums_per_process * i)
new_max = max if i == (pool_size-1) else new_min + nums_per_process
Process(target=databaseFiller, args=([data_queue, new_min, new_max])).start()
thanks.
This will not solve 100% your problem but I see you are inserting text into the sqlite file, you should download the whole thing into e.g. a csv and the use execute cursor.executemany instead of cursor.execute. That insertion is faster.
How long does it take to make 1 download?
I store QuertyText within a pandas dataframe. Once I've loaded all the queries into I want to conduct an analysis again each query. Currently, I have ~50k to evaluate. So, doing it one by one, will take a long time.
So, I wanted to implement concurrent.futures. How do I take the individual QueryText stored within fullAnalysis as pass it to concurrent.futures and return the output as a variable?
Here is my entire code:
import pandas as pd
import time
import gensim
import sys
import warnings
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed
fullAnalysis = pd.DataFrame()
def fetch_data(jFile = 'ProcessingDetails.json'):
print("Fetching data...please wait")
#read JSON file for latest dictionary file name
baselineDictionaryFileName = 'Dictionary/Dictionary_05-03-2020.json'
#copy data to pandas dataframe
labelled_data = pd.read_json(baselineDictionaryFileName)
#Add two more columns to get the most similar text and score
labelled_data['SimilarText'] = ''
labelled_data['SimilarityScore'] = float()
print("Data fetched from " + baselineDictionaryFileName + " and there are " + str(labelled_data.shape[0]) + " rows to be evalauted")
return labelled_data
def calculateScore(inputFunc):
warnings.filterwarnings("ignore", category=DeprecationWarning)
model = gensim.models.Word2Vec.load('w2v_model_bigdata')
inp = inputFunc
print(inp)
out = dict()
strEvaluation = inp.split("most_similar ",1)[1]
#while inp != 'quit':
split_inp = inp.split()
try:
if split_inp[0] == 'help':
pass
elif split_inp[0] == 'similarity' and len(split_inp) >= 3:
pass
elif split_inp[0] == 'most_similar' and len(split_inp) >= 2:
for pair in model.most_similar(positive=[split_inp[1]]):
out.update({pair[0]: pair[1]})
except KeyError as ke:
#print(str(ke) + "\n")
inp = input()
return out
def main():
with ThreadPoolExecutor(max_workers=5) as executor:
for i in range(len(fullAnalysis)):
text = fullAnalysis['QueryText'][i]
arg = 'most_similar'+ ' ' + text
#for item in executor.map(calculateScore, arg):
output = executor.map(calculateScore, arg)
return output
if __name__ == "__main__":
fullAnalysis = fetch_data()
results = main()
print(f'results: {results}')
The Python Global Interpreter Lock or GIL allows only one thread to hold control of the Python interpreter. Since your function calculateScore might be cpu-bound and requires the interpreter to execute its byte code, you may be gaining little by using threading. If, on the other hand, it were doing mostly I/O operations, it would be giving up the GIL for most of its running time allowing other threads to run. But that does not seem to be the case here. You probably should be using the ProcessPoolExecutor from concurrent.futures (try it both ways and see):
def main():
with ProcessPoolExecutor(max_workers=None) as executor:
the_futures = {}
for i in range(len(fullAnalysis)):
text = fullAnalysis['QueryText'][i]
arg = 'most_similar'+ ' ' + text
future = executor.submit(calculateScore, arg)
the_futures[future] = i # map future to request
for future in as_completed(the_futures): # results as they become available not necessarily the order of submission
i = the_futures[future] # the original index
result = future.result() # the result
If you omit the max_workers parameter (or specify a value of None) from the ProcessPoolExecutor constructor, the default will be the number of processors you have on your machine (not a bad default). There is no point in specifying a value larger than the number of processors you have.
If you do not need to tie the future back to the original request, then the_futures can just be a list to which But simplest yest in not even to bother to use the as_completed method:
def main():
with ProcessPoolExecutor(max_workers=5) as executor:
the_futures = []
for i in range(len(fullAnalysis)):
text = fullAnalysis['QueryText'][i]
arg = 'most_similar'+ ' ' + text
future = executor.submit(calculateScore, arg)
the_futures.append(future)
# wait for the completion of all the results and return them all:
results = [f.result() for f in the_futures()] # results in creation order
return results
It should be mentioned that code that launches the ProcessPoolExecutor functions should be in a block governed by a if __name__ = '__main__':. If it isn't you will get into a recursive loop with each subprocess launching the ProcessPoolExecutor. But that seems to be the case here. Perhaps you meant to use the ProcessPoolExecutor all along?
Also:
I don't know what the line ...
model = gensim.models.Word2Vec.load('w2v_model_bigdata')
... in function calculateStore does. It may be the one i/o-bound statement. But this appears to be something that does not vary from call to call. If that is the case and model is not being modified in the function, shouldn't this statement be moved out of the function and computed just once? Then this function would clearly run faster (and be clearly cpu-bound).
Also:
The exception block ...
except KeyError as ke:
#print(str(ke) + "\n")
inp = input()
... is puzzling. You are inputting a value that will never be used right before returning. If this is to pause execution, there is no error message being output.
With Booboo assistance, I was able to update code to include ProcessPoolExecutor. Here is my updated code. Overall, processing has been speed up by more than 60%.
I did run into a processing issue and found this topic BrokenPoolProcess that addresses the issue.
output = {}
thePool = {}
def main(labelled_data, dictionaryRevised):
args = sys.argv[1:]
with ProcessPoolExecutor(max_workers=None) as executor:
for i in range(len(labelled_data)):
text = labelled_data['QueryText'][i]
arg = 'most_similar'+ ' '+ text
output = winprocess.submit(
executor, calculateScore, arg
)
thePool[output] = i #original index for future to request
for output in as_completed(thePool): # results as they become available not necessarily the order of submission
i = thePool[output] # the original index
text = labelled_data['QueryText'][i]
result = output.result() # the result
maximumKey = max(result.items(), key=operator.itemgetter(1))[0]
maximumValue = result.get(maximumKey)
labelled_data['SimilarText'][i] = maximumKey
labelled_data['SimilarityScore'][i] = maximumValue
return labelled_data, dictionaryRevised
if __name__ == "__main__":
start = time.perf_counter()
print("Starting to evaluate Query Text for labelling...")
output_Labelled_Data, output_dictionary_revised = preProcessor()
output,dictionary = main(output_Labelled_Data, output_dictionary_revised)
finish = time.perf_counter()
print(f'Finished in {round(finish-start, 2)} second(s)')
I have been trying to retrieve data from an Oracle DB using Oracle_CX library. However, when I hard code c.execute(v_sql,['02502-9001-12','SS']) it will return me the right data. However, when I have to pass the variable to the function c.execute(v_sql,[Part_No,Rev_no]) does not recognize the values the output is (none)
What I tried to do is passing the values to Part_No and Rev_no
only when I hard code the data I can receive the right data. I even print before c.execute() is happening to see if the Part_No and Rev_no is printed and it happened. The actual error is that I am not receiving the right data (none) I suppose to receive AAAW07AAFAAF5/PAAq. Looks like the values of def why_(Part_no, Rev_no) do not get populated it to c.execute(v_sql,[Part_No,Rev_no])
c.execute(v_sql,[Part_No,Rev_no])
c.execute(v_sql,['02502-9001-12','SS'])
import cx_Oracle
import os
def why__(Part_No, Rev_no):
dsn_tns = cx_Oracle.makedsn('XXXXX', 'XXXX', service_name='XXXXX')
conn = cx_Oracle.connect(user=r'XXXX', password='XXXX', dsn=dsn_tns)
c = conn.cursor()
#u_sql =('SELECT PART_REV_CFP.GET_OBJKEY(:Part_No,:Rev_no) from dual')
v_sql =('SELECT PART_REV_CFP.GET_OBJKEY(:Part_No,:Rev_no) from dual')
print(Part_No)
print(Rev_no)
c.execute(v_sql,[Part_No,Rev_no])
#c.execute(v_sql,['02502-9001-12','SS'])
R = c.fetchone()
print('before the loop')
i=0
for row in R:
print('inside the loop')
i+=1
print(i)
print(str(row))
print('after the loop')
conn.close()
return row
Part_No = input("Enter the obs part num: ")
Rev_no = input("Enter the obs part rev: ")
why__(Part_No,Rev_no)
print('outside of the method')
print(why__(Part_No,Rev_no))
OutPut when c.execute(v_sql,[Part_No,Rev_no]) --IN other words no hard coding the values of Part_no and Rev_no
Enter the obsolete part number: 02502-9001-12
Enter the obsolete part revision: ss
before the loop
inside the loop
1
(None,)
after the loop
***Repl Closed***
Output when c.execute(v_sql,['02502-9001-12','SS'])
Enter the obsolete part number: 02502-9001-12
Enter the obsolete part revision: ss
02502-9001-12
ss
before the loop
inside the loop
1
AAAW07AAFAAF5/PAAq
after the loop
outside of the method
02502-9001-12
ss
before the loop
inside the loop
1
AAAW07AAFAAF5/PAAq
after the loop
AAAW07AAFAAF5/PAAq
***Repl Closed***
so my issues is that im receiving values from a serial port.
The values could be any of the flowing.
BASE_RAD: NEW,ATC001#T1412010472R-77,ATC005:T1412010460R-70,SU0003;Q6V8.9S0C11.5*xx
BASE_RAD: NEW,ATC001#T1413824282R-102,ATC003:T1413824274R-98,SU001G;Q0V14.0D00*x
their is minor chnages in the out put but the biggest difference is the second line has the value D00 instead of S0
So this serial out will update me with changes to sensors and the D00 is for digital output but S0 is for the fan speed.
So my question is i have written a regular expression if i receive the first serial output that has the S0 value but if i then receive the D00 the regular expression will break.
I want to be able to write it so if it doesn't have the S0 value it would then look for the D00 value instead.
thank you for any help or advise in advance. im not sure where i should be looking or what direction i should be taking.
The code below checks the serial output and then runs the regular expression, if it find a match it then inserts that into the data base.
CODE BELOW IS PYTHON
import serial, string, MySQLdb, re
db = MySQLdb.connect(host="localhost", user="root", passwd="", db="walnut_farm")
cur = db.cursor()
serialPort = 'COM4' # BAUD Rate is 9600 as default
ser = serial.Serial()
ser.setPort(serialPort)
#ser.setBaudrate(115200) Enable if BAUD is not deault value
try:
ser.open()
except:
print('Port Error!')
else:
while True:
try:
ardString = ser.readline()
Serial_Output = ardString
p = re.compile(ur'^BASE_RAD: NEW,(.*)#T(\d*)R-(\d*),(.*):T(\d*)R-(\d*),(.*);Q(\d*)V(\d*\.?\d*)S(\d*)C(\d*\.?\d*)(.*)') # here is the regular expressions i created from this link http://regex101.com/r/dP6fE1/1
Serial_Results = re.match(p, Serial_Output)
# Assigning variables to the array values
Base_ID = Serial_Results.group(1)
Base_Time_Stamp = Serial_Results.group(2)
Base_Signal = Serial_Results.group(3)
Repeater_ID = Serial_Results.group(4)
Repeater_Time_Stamp = Serial_Results.group(5)
Repeater_Signal = Serial_Results.group(6)
Sensor_ID = Serial_Results.group(7)
Sensor_Sequence = Serial_Results.group(8)
Sensor_Input_Voltage = Serial_Results.group(9)
Sensor_Wind_Speed = Serial_Results.group(10)
Sensor_Temperature = Serial_Results.group(11)
Checksum = Serial_Results.group(12)
# Execute the SQL query to INSERT the above variables into the Database
cur.execute('INSERT INTO serial_values (Base_ID, Base_Time_Stamp, Base_Signal, Repeater_ID, Repeater_Time_Stamp, Repeater_Signal, Sensor_ID, Sensor_Sequence, Sensor_Input_Voltage, Sensor_Wind_Speed, Sensor_Temperature, Checksum) VALUES ("'+Base_ID+'", "'+Base_Time_Stamp+'", "'+Base_Signal+'", "'+Repeater_ID+'", "'+Repeater_Time_Stamp+'", "'+Repeater_Signal+'", "'+Sensor_ID+'", "'+Sensor_Sequence+'", "'+Sensor_Input_Voltage+'", "'+Sensor_Wind_Speed+'", "'+Sensor_Temperature+'", "'+Checksum+'")')
db.commit()
#ser.close()
except Exception:
pass
Take a look at this, if my interpretation is right. It's a starting point, you then have to include your mysql insert into the database.
import re
def get_output_parameters(serial_output):
p = re.compile(ur'^BASE_RAD: NEW,(.*)#T(\d*)R-(\d*),(.*):T(\d*)R-(\d*),(.*);Q(\d*)V(\d*\.?\d*)S(\d*)C(\d*\.?\d*)(.*)') # here is the regular expressions i created from this link http://regex101.com/r/dP6fE1/1
p2 = re.compile(ur'^BASE_RAD: NEW,(.*)#T(\d*)R-(\d*),(.*):T(\d*)R-(\d*),(.*);Q(\d*)V(\d*\.?\d*)D(\d*)(.*)')
Serial_Results = re.match(p, serial_output)
digital_out = False
if not Serial_Results:
Serial_Results = re.match(p2, serial_output)
digital_out = True
# Assigning variables to the array values
Base_ID = Serial_Results.group(1)
Base_Time_Stamp = Serial_Results.group(2)
Base_Signal = Serial_Results.group(3)
Repeater_ID = Serial_Results.group(4)
Repeater_Time_Stamp = Serial_Results.group(5)
Repeater_Signal = Serial_Results.group(6)
Sensor_ID = Serial_Results.group(7)
Sensor_Sequence = Serial_Results.group(8)
Sensor_Input_Voltage = Serial_Results.group(9)
Sensor_Wind_Speed = Serial_Results.group(10)
Sensor_Temperature = Serial_Results.group(11)
if not digital_out:
Checksum = Serial_Results.group(12)
print Sensor_Temperature
Serial_Output = "BASE_RAD: NEW,ATC001#T1412010472R-77,ATC005:T1412010460R-70,SU0003;Q6V8.9S0C11.5*xx"
Serial_Output2 = "BASE_RAD: NEW,ATC001#T1413824282R-102,ATC003:T1413824274R-98,SU001G;Q0V14.0D00*x"
get_output_parameters(Serial_Output)
get_output_parameters(Serial_Output2)