from arcgis.gis import GIS
from IPython.display import display
gis = GIS("portal url", 'username', 'password')
#search for the feature layer named Ports along west coast
search_result = gis.content.search('title:Ports along west coast')
#access the item's feature layers
ports_item = search_result[0]
ports_layers = ports_item.layers
#query all the features and display it on a map
ports_fset = ports_layers[0].query() #an empty query string will return all
ports_flayer = ports_layers[0]
ports_features = ports_fset.features
# select San Francisco feature
sfo_feature = [f for f in ports_features if f.attributes['port_name']=='SAN FRANCISCO'][0]
sfo_feature.attributes
try:
update_result = ports_flayer.edit_features(updates=[sfo_edit])
except:
pass
This is the example that I have shown in which I am trying to update feature layer. Actually I am updating the records in a loop so there are many records. The problem is in the case "Internet Connection" is just shot down It get stuck to the function edit_features.
So there is no way It could go to except and continue the flow.
I just have to ctrl+c to stop script execution because it got hanged and edit_features() function. What can I do?
If I am in your situation i would search arcgis API docs for setting connection timeout, if you can't find any i suggest:
use threading module to run update function in separate thread, it is not the efficient way but in case if it gets stuck, you can continue running your remaining code.
use python requests library to check any website and you check for response code before making the update.
code for threading will look like:
from threading import Thread
from time import sleep
def update():
global update_result
update_result = ports_flayer.edit_features(updates=[sfo_edit])
try:
update_result = None
t1 = Thread(target=update)
t1.daemon = True # mark our thread as a daemon
t1.start()
sleep(10) # wait 10 seconds then timeout, adjust according to your needs
if update_result == None:
print('failed to update')
except Exception as e:
print(e)
Related
I'm having a problem with my multiprocessing and I'm afraid it's a rather simple fix and I'm just not properly implementing the multiprocessing correctly. I've been researching the things that can cause the problem, but all I'm really finding is people recommending the use of a queue to prevent this, but that doesn't seem to be stopping it (again, I may just be implementing the queue incorrectly) I've been at this a couple of days now and I was hoping I could get some help.
Thanks in advance!
import csv
import multiprocessing as mp
import os
import queue
import sys
import time
import connections
import packages
import profiles
def execute_extract(package, profiles, q):
# This is the package execution for the extract
# It fires fine and will print the starting message below
started_at = time.monotonic()
print(f"Starting {package.packageName}")
try:
oracle_connection = connections.getOracleConnection(profiles['oracle'], 1)
engine = connections.getSQLConnection(profiles['system'], 1)
path = os.path.join(os.getcwd(), 'csv_data', package.packageName + '.csv')
cursor = oracle_connection.cursor()
if os.path.exists(path):
os.remove(path)
f = open(path, 'w')
chunksize = 100000
offset = 0
row_total = 0
csv_writer = csv.writer(f, delimiter='^', lineterminator='\n')
# I am having to do some data cleansing. I know this is not the most efficient way to do this, but currently
# it is what I am limited too
while True:
cursor.execute(package.query + f'\r\n OFFSET {offset} ROWS\r\n FETCH NEXT {chunksize} ROWS ONLY')
test = cursor.fetchone()
if test is None:
break
else:
while True:
row = cursor.fetchone()
if row is None:
break
else:
new_row = list(row)
new_row.append(package.sourceId[0])
new_row.append('')
i = 0
for item in new_row:
if type(item) == float:
new_row[i] = int(item)
elif type(item) == str:
new_row[i] = item.encode('ascii', 'replace')
i += 1
row = tuple(new_row)
csv_writer.writerow(row)
row_total += 1
offset += chunksize
f.close()
# I know that execution is at least reaching this point. I can watch the CSV files grow as more and more
# rows are added to the for all the packages What I never get are either the success message or error message
# below, and there are never any entries placed in the tables
query = f"BULK INSERT {profiles['system'].database.split('_')[0]}_{profiles['system'].database.split('_')[1]}_test_{profiles['system'].database.split('_')[2]}.{package.destTable} FROM \"{path}\" WITH (FIELDTERMINATOR='^', ROWTERMINATOR='\\n');"
engine.cursor().execute(query)
engine.commit()
end_time = time.monotonic() - started_at
print(
f"{package.packageName} has completed. Total rows inserted: {row_total}. Total execution time: {end_time} seconds\n")
os.remove(path)
except Exception as e:
print(f'An error has occured for package {package.packageName}.\r\n {repr(e)}')
finally:
# Here is where I am trying to add an item to the queue so the get method in the main def will pick it up and
# remove it from the queue
q.put(f'{package.packageName} has completed')
if oracle_connection:
oracle_connection.close()
if engine:
engine.cursor().close()
engine.close()
if __name__ == '__main__':
# Setting mp creation type
ctx = mp.get_context('spawn')
q = ctx.Queue()
# For the Etl I generate a list of class objects that hold relevant information profs contains a list of
# connection objects (credentials, connection strings, etc) packages contains the information to run the extract
# (destination tables, query string, package name for logging, etc)
profs = profiles.get_conn_vars(sys.argv[1])
packages = packages.get_etl_packages(profs)
processes = []
# I'm trying to track both individual package execution time and overall time so I can get an estimate on rows
# per second
start_time = time.monotonic()
sqlConn = connections.getSQLConnection(profs['system'])
# Here I'm executing a SQL command to truncate all my staging tables to ensure they are empty and will not
# generate any key violations
sqlConn.execute(
f"USE[{profs['system'].database.split('_')[0]}_{profs['system'].database.split('_')[1]}_test_{profs['system'].database.split('_')[2]}]\r\nExec Sp_msforeachtable #command1='Truncate Table ?',#whereand='and Schema_Id=Schema_id(''my_schema'')'")
# Here is where I start generating a process per package to try and get all packages to run simultaneously
for package in packages:
p = ctx.Process(target=execute_extract, args=(package, profs, q,))
processes.append(p)
p.start()
# Here is my attempt at managing the queue. This is a monstrosity of fixes I've tried to get this to work
results = []
while True:
try:
result = q.get(False, 0.01)
results.append(result)
except queue.Empty:
pass
allExited = True
for t in processes:
if t.exitcode is None:
allExited = False
break
if allExited & q.empty():
break
for p in processes:
p.join()
# Closing out the end time and writing the overall execution time in minutes.
end_time = time.monotonic() - start_time
print(f'Total execution time of {end_time / 60} minutes.')
I can't be sure why you are experiencing a deadlock (I am not at all convinced it is related to your queue management), but I can say for sure that you can simplify your queue management logic if you do one of either two things:
Method 1
Ensure that your worker function, execute_extract will put something on the results queue even in the case of an exception (I would recommend placing the Exception object itself). Then your entire main process loop that begins with while True: that attempts to get the results can be replaced with:
results = [q.get() for _ in range(len(processes))]
You are guaranteed that there will be a fixed number of messages on the queue equal to the number of processes created.
Method 2 (even simpler)
Simply reverse the order in which you wait for the subprocesses to complete and you process the results queue. You don't know how many messages will be on the queue but you aren't processing the queue until all the processes have returned. So however many messages are on the queue is all you will ever get. Just retrieve them until the queue is empty:
for p in processes:
p.join()
results = []
while not q.empty():
results.append(q.get())
At this point I would normally suggest that you use a multiprocessing pool class such as multiprocessing.Pool which does not require an explicit queue to retrieve results. But make either of these changes (I suggest Method 2, as I cannot see how it can cause a deadlock since only the main process is running at this point) and see if your problem goes away. I am not, however, guaranteeing that your issue is not somewhere else in your code. While your code is overly complicated and inefficient, it is not obviously "wrong." At least you will know whether your problem is elsewhere.
And my question for you: What does it buy you to do everything using a context acquired with ctx = mp.get_context('spawn') instead of just calling the methods on the multiprocessing module itself? If your platform had support for a fork call, which would be the default context, would you not want to use that?
I am trying to simulate an environment with vms and trying to run an object method in background thread. My code looks like the following.
hyper_v.py file :
import random
from threading import Thread
from virtual_machine import VirtualMachine
class HyperV(object):
def __init__(self, hyperv_name):
self.hyperv_name = hyperv_name
self.vms_created = {}
def create_vm(self, vm_name):
if vm_name not in self.vms_created:
vm1 = VirtualMachine({'vm_name': vm_name})
self.vms_created[vm_name] = vm1
vm1.boot()
else:
print('VM:', vm_name, 'already exists')
def get_vm_stats(self, vm_name):
print('vm stats of ', vm_name)
print(self.vms_created[vm_name].get_values())
if __name__ == '__main__':
hv = HyperV('temp')
vm_name = 'test-vm'
hv.create_vm(vm_name)
print('getting vm stats')
th2 = Thread(name='vm1_stats', target=hv.get_vm_stats(vm_name) )
th2.start()
virtual_machine.py file in the same directory:
import random, time, uuid, json
from threading import Thread
class VirtualMachine(object):
def __init__(self, interval = 2, *args, **kwargs):
self.vm_id = str(uuid.uuid4())
#self.vm_name = kwargs['vm_name']
self.cpu_percentage = 0
self.ram_percentage = 0
self.disk_percentage = 0
self.interval = interval
def boot(self):
print('Bootingup', self.vm_id)
th = Thread(name='vm1', target=self.update() )
th.daemon = True #Setting the thread as daemon thread to run in background
print(th.isDaemon()) #This prints true
th.start()
def update(self):
# This method needs to run in the background simulating an actual vm with changing values.
i = 0
while(i < 5 ): #Added counter for debugging, ideally this would be while(True)
i+=1
time.sleep(self.interval)
print('updating', self.vm_id)
self.cpu_percentage = round(random.uniform(0,100),2)
self.ram_percentage = round(random.uniform(0,100),2)
self.disk_percentage = round(random.uniform(0,100),2)
def get_values(self):
return_json = {'cpu_percentage': self.cpu_percentage,
'ram_percentage': self.ram_percentage,
'disk_percentage': self.disk_percentage}
return json.dumps(return_json)
The idea is to create a thread that keeps on updating the values and on request, we read the values of the vm object by calling the vm_obj.get_values() we would be creating multiple vm_objects to simulate multiple vms running in parallel and we need to get the information from a particular vm on request.
The problem, that I am facing, is that the update() function of the vm doesnot run in the background (even though the thread is set as daemon thread).
The method call hv.get_vm_stats(vm_name) waits until the completion of vm_object.update() (which is called by vm_object.boot()) and then prints the stats. I would like to get the stats of the vm on request by keeping the vm_object.update() running in the background forever.
Please share your thoughts if I am overlooking anything related to the basics. I tried looking into the issues related to the python threading library but I could not come to any conclusion. Any help is greatly appreciated. The next steps would be to have a REST api to call these functions to get the data of any vm but I am struck with this problem.
Thanks in advance,
As pointed out by #Klaus D in the comments, my mistake was using the braces when specifying the target function in the thread definition, which resulted in the function being called right away.
target=self.update() will call the method right away. Remove the () to
hand the method over to the thread without calling it.
I have below code to download a file inside a loop,
import wget
try:
wget.download(url)
except:
pass
But if the Internet goes down, it doesn't return!
So my whole loop is stuck.
I want to repeat the same download if internet goes down. So I wanna know does any error happen.
How can i mitigate this?
One simple solution is to move your download code to a thread and make it a separate process which can be interrupted.
You can use python Thread and Timer module to achieve it.
from threading import Thread, Timer
from functools import partial
import time
import urllib
def check_connectivity(t):
try:
urllib.request.urlopen("http://google.com", timeout=2)
except Exception as e:
t._Thread__stop()
class Download(Thread):
def run(self):
print("Trying to download file....")
con = partial(check_connectivity, self)
while True:
t = Timer(5, con) # Checks the connectivity every 5 second or less.
t.start()
# your download code....
def main():
down = Download()
down.start()
down.join()
You code move your main download loop inside the thread's run method. And start a timer inside which listens for the network connectivity.
The next script I'm using is used to listen to IMAP connection using IMAP IDLE and depends heavily on threads. What's the easiest way for me to eliminate the treads call and just use the main thread?
As a new python developer I tried editing def __init__(self, conn): method but just got more and more errors
A code sample would help me a lot
#!/usr/local/bin/python2.7
print "Content-type: text/html\r\n\r\n";
import socket, ssl, json, struct, re
import imaplib2, time
from threading import *
# enter gmail login details here
USER="username#gmail.com"
PASSWORD="password"
# enter device token here
deviceToken = 'my device token x x x x x'
deviceToken = deviceToken.replace(' ','').decode('hex')
currentBadgeNum = -1
def getUnseen():
(resp, data) = M.status("INBOX", '(UNSEEN)')
print data
return int(re.findall("UNSEEN (\d)*\)", data[0])[0])
def sendPushNotification(badgeNum):
global currentBadgeNum, deviceToken
if badgeNum != currentBadgeNum:
currentBadgeNum = badgeNum
thePayLoad = {
'aps': {
'alert':'Hello world!',
'sound':'',
'badge': badgeNum,
},
'test_data': { 'foo': 'bar' },
}
theCertfile = 'certif.pem'
theHost = ('gateway.push.apple.com', 2195)
data = json.dumps(thePayLoad)
theFormat = '!BH32sH%ds' % len(data)
theNotification = struct.pack(theFormat, 0, 32,
deviceToken, len(data), data)
ssl_sock = ssl.wrap_socket(socket.socket(socket.AF_INET,
socket.SOCK_STREAM), certfile=theCertfile)
ssl_sock.connect(theHost)
ssl_sock.write(theNotification)
ssl_sock.close()
print "Sent Push alert."
# This is the threading object that does all the waiting on
# the event
class Idler(object):
def __init__(self, conn):
self.thread = Thread(target=self.idle)
self.M = conn
self.event = Event()
def start(self):
self.thread.start()
def stop(self):
# This is a neat trick to make thread end. Took me a
# while to figure that one out!
self.event.set()
def join(self):
self.thread.join()
def idle(self):
# Starting an unending loop here
while True:
# This is part of the trick to make the loop stop
# when the stop() command is given
if self.event.isSet():
return
self.needsync = False
# A callback method that gets called when a new
# email arrives. Very basic, but that's good.
def callback(args):
if not self.event.isSet():
self.needsync = True
self.event.set()
# Do the actual idle call. This returns immediately,
# since it's asynchronous.
self.M.idle(callback=callback)
# This waits until the event is set. The event is
# set by the callback, when the server 'answers'
# the idle call and the callback function gets
# called.
self.event.wait()
# Because the function sets the needsync variable,
# this helps escape the loop without doing
# anything if the stop() is called. Kinda neat
# solution.
if self.needsync:
self.event.clear()
self.dosync()
# The method that gets called when a new email arrives.
# Replace it with something better.
def dosync(self):
print "Got an event!"
numUnseen = getUnseen()
sendPushNotification(numUnseen)
# Had to do this stuff in a try-finally, since some testing
# went a little wrong.....
while True:
try:
# Set the following two lines to your creds and server
M = imaplib2.IMAP4_SSL("imap.gmail.com")
M.login(USER, PASSWORD)
M.debug = 4
# We need to get out of the AUTH state, so we just select
# the INBOX.
M.select("INBOX")
numUnseen = getUnseen()
sendPushNotification(numUnseen)
typ, data = M.fetch(1, '(RFC822)')
raw_email = data[0][1]
import email
email_message = email.message_from_string(raw_email)
print email_message['Subject']
#print M.status("INBOX", '(UNSEEN)')
# Start the Idler thread
idler = Idler(M)
idler.start()
# Sleep forever, one minute at a time
while True:
time.sleep(60)
except imaplib2.IMAP4.abort:
print("Disconnected. Trying again.")
finally:
# Clean up.
#idler.stop() #Commented out to see the real error
#idler.join() #Commented out to see the real error
#M.close() #Commented out to see the real error
# This is important!
M.logout()
As far as I can tell, this code is hopelessly confused because the author used the "imaplib2" project library which forces a threading model which this code then never uses.
Only one thread is ever created, which wouldn't need to be a thread but for the choice of imaplib2. However, as the imaplib2 documentation notes:
This module presents an almost identical API as that provided by the standard python library module imaplib, the main difference being that this version allows parallel execution of commands on the IMAP4 server, and implements the IMAP4rev1 IDLE extension. (imaplib2 can be substituted for imaplib in existing clients with no changes in the code, but see the caveat below.)
Which makes it appear that you should be able to throw out much of class Idler and just use the connection M. I recommend that you look at Doug Hellman's excellent Python Module Of The Week for module imaplib prior to looking at the official documentation. You'll need to reverse engineer the code to find out its intent, but it looks to me like:
Open a connection to GMail
check for unseen messages in Inbox
count unseen messages from (2)
send a dummy message to some service at gateway.push.apple.com
Wait for notice, goto (2)
Perhaps the most interesting thing about the code is that it doesn't appear to do anything, although what sendPushNotification (step 4) does is a mystery, and the one line that uses an imaplib2 specific service:
self.M.idle(callback=callback)
uses a named argument that I don't see in the module documentation. Do you know if this code ever actually ran?
Aside from unneeded complexity, there's another reason to drop imaplib2: it exists independently on sourceforge and PyPi which one maintainer claimed two years ago "An attempt will be made to keep it up-to-date with the original". Which one do you have? Which would you install?
Don't do it
Since you are trying to remove the Thread usage solely because you didn't find how to handle the exceptions from the server, I don't recommend removing the Thread usage, because of the async nature of the library itself - the Idler handles it more smoothly than a one thread could.
Solution
You need to wrap the self.M.idle(callback=callback) with try-except and then re-raise it in the main thread. Then you handle the exception by re-running the code in the main thread to restart the connection.
You can find more details of the solution and possible reasons in this answer: https://stackoverflow.com/a/50163971/1544154
Complete solution is here: https://www.github.com/Elijas/email-notifier
I have an FTP function that traces the progress of running upload but my understanding of threading is limited and i have been unable to implement a working solution... I'd like to add a GUI progress bar to my current Application by using threading. Can someone show me a basic function using asynchronous threads that can be updated from another running thread?
def ftpUploader():
BLOCKSIZE = 57344 # size 56 kB
ftp = ftplib.FTP()
ftp.connect(host)
ftp.login(login, passwd)
ftp.voidcmd("TYPE I")
f = open(zipname, 'rb')
datasock, esize = ftp.ntransfercmd(
'STOR %s' % os.path.basename(zipname))
size = os.stat(zipname)[6]
bytes_so_far = 0
print 'started'
while 1:
buf = f.read(BLOCKSIZE)
if not buf:
break
datasock.sendall(buf)
bytes_so_far += len(buf)
print "\rSent %d of %d bytes %.1f%%\r" % (
bytes_so_far, size, 100 * bytes_so_far / size)
sys.stdout.flush()
datasock.close()
f.close()
ftp.voidresp()
ftp.quit()
print 'Complete...'
Here's a quick overview of threading, just in case :) I won't go into too much detail into the GUI stuff, other than to say that you should check out wxWidgets. Whenever you do something that takes a long time, like:
from time import sleep
for i in range(5):
sleep(10)
You'll notice that to the user, the entire block of code seems to take 50 seconds. In those 5 seconds, your application can't do anything like update the interface, and so it looks like it's frozen. To solve this problem, we use threading.
Usually there are two parts to this problem; the overall set of things you want to process, and the operation that takes a while, that we'd like to chop up. In this case, the overall set is the for loop and the operation we want chopped up is the sleep(10) function.
Here's a quick template for the threading code, based on our previous example. You should be able to work your code into this example.
from threading import Thread
from time import sleep
# Threading.
# The amount of seconds to wait before checking for an unpause condition.
# Sleeping is necessary because if we don't, we'll block the os and make the
# program look like it's frozen.
PAUSE_SLEEP = 5
# The number of iterations we want.
TOTAL_ITERATIONS = 5
class myThread(Thread):
'''
A thread used to do some stuff.
'''
def __init__(self, gui, otherStuff):
'''
Constructor. We pass in a reference to the GUI object we want
to update here, as well as any other variables we want this
thread to be aware of.
'''
# Construct the parent instance.
Thread.__init__(self)
# Store the gui, so that we can update it later.
self.gui = gui
# Store any other variables we want this thread to have access to.
self.myStuff = otherStuff
# Tracks the paused and stopped states of the thread.
self.isPaused = False
self.isStopped = False
def pause(self):
'''
Called to pause the thread.
'''
self.isPaused = True
def unpause(self):
'''
Called to unpause the thread.
'''
self.isPaused = False
def stop(self):
'''
Called to stop the thread.
'''
self.isStopped = True
def run(self):
'''
The main thread code.
'''
# The current iteration.
currentIteration = 0
# Keep going if the job is active.
while self.isStopped == False:
try:
# Check for a pause.
if self.isPaused:
# Sleep to let the os schedule other tasks.
sleep(PAUSE_SLEEP)
# Continue with the loop.
continue
# Check to see if we're still processing the set of
# things we want to do.
if currentIteration < TOTAL_ITERATIONS:
# Do the individual thing we want to do.
sleep(10)
# Update the count.
currentIteration += 1
# Update the gui.
self.gui.update(currentIteration,TOTAL_ITERATIONS)
else:
# Stop the loop.
self.isStopped = True
except Exception as exception:
# If anything bad happens, report the error. It won't
# get written to stderr.
print exception
# Stop the loop.
self.isStopped = True
# Tell the gui we're done.
self.gui.stop()
To call this thread, all you have to do is:
aThread = myThread(myGui,myOtherStuff)
aThread.start()