Using .hdf5 files only once they are finished writing - python

I am trying to use .hdf5 files once they are done writing (in my case, trying to emit them). But the problem is that I don't have a way to 1) test if they are finished writing and 2) then send them. The code that I have been trying to work with is follows:
while True:
event = self._q.get()
while True:
try:
file = h5py.File(event.src_path, "r")
file.close()
self.new_file.emit(event.src_path, os.path.basename(event.src_path))
break
except OSError:
if retry_count < max_retry_count:
retry_count += 1
print(f"h5 file <{event.src_path}> is locked, retrying {retry_count}/{max_retry_count}")
time.sleep(retry_interval_seconds)
else:
print(f"h5 file <{event.src_path}> reached max retry count, skipping")
except Exception as err:
print(f"Got unexpected Error <{type(err).__name__}> while opening <{event.src_path}> ")
traceback.print_exc()
Obviously this is problematic with the break. But without the break, the try stays in the loop and emits the same file over and over again. This code tests if they are done writing perfectly but the ability to send them and continue to take in new files does not work. Any insight is greatly appreciated.

I solved this by the following code:
while True:
event = self._q.get()
max_retry_count = 350 # for test purposes now but want to set an upper bound on verifying a file is finished.
retry_interval_seconds = .01 # every hundreth it will try the file to see if it finished writing
retry_count = 0
if event.event_type == "created" and event.src_path.lower().endswith(".hdf5"):
while True:
try:
file = h5py.File(event.src_path, "r")
file.close()
except OSError:
if retry_count < max_retry_count:
retry_count += 1
print(f"h5 file <{event.src_path}> is locked, retrying {retry_count}/{max_retry_count}")
time.sleep(retry_interval_seconds)
else:
print(f"h5 file <{event.src_path}> reached max retry count, skipping")
break # <--- looks useful here
except Exception as err:
print(f"Got unexpected Error <{type(err).__name__}> while opening <{event.src_path}> ")
traceback.print_exc()
else:
self.new_file.emit(event.src_path, os.path.basename(event.src_path))
break

Related

Error with a valor on For loop with a path with concatenation to read a xml file

I've done a for loop with a valor who takes +1 at end of my loop.
My for loop is to read a xml file. It have to do an update when reading tag CP, on the file. I have 4 instructs with different tag. But my for loop stop to works after the first loop.
Sorry for my bad english.
The incrementation works. I checked it with print.
for cmd in root.iterfind('./instructs[#name="'+functions.str_instruct_number+'"]/command'):
print('./instructs[#name="'+functions.str_instruct_number+'"]/command')
print("cmd >",cmd.text)
if cmd.text == "CP":
try:
for file in root.iterfind('./instructs[#name="'+functions.str_instruct_number+'"]/file'):
print("file >", file.text)
name_file = file.text
tags_treatement_functions.CP_tag(name_file)
except:
print("for instruct morte")
finally:
print("finally CP pour voir")
functions.instruct_number += 1
functions.str_instruct_number = str(functions.instruct_number)
print("INCREMENTATION", functions.instruct_number)
print("INCREMENTATION STR", functions.str_instruct_number)
tags_treatement_functions.RLD_tag(config.name_script)
print(functions.instruct_number)
print(functions.str_instruct_number)
print(root.iterfind('./instructs[#name="'+functions.str_instruct_number+'"]/command'))
print('./instructs[#name="'+functions.str_instruct_number+'"]/command')
print("for finished")
print("for stopped")
that's my CP_tag function :
def CP_tag(name_file):
bool_copy = True
for update_file in glob.glob(config.video_path+"*.*"):
if name_file == update_file:
print("pas de téléchargement")
bool_copy = False
break
if bool_copy == True:
ftp = ftplib.FTP(config.FTP_adress, timeout=5)
ftp.login(config.FTP_user, config.FTP_pwd)
print("download ftp")
try:
ftp.retrbinary('RETR ' + name_file, open(config.local_temp_path+name_file, 'wb').write)
print("download OK")
except:
print("download failed")
bool_copy = True
Expect that my loop would done 4 loops, but it does only one
i dont know why because after one loop the valor is:
INCREMENTATION 2
INCREMENTATION STR 2
Thank you for your help :)

How to handle socket timeout in Python

I'm using Python 3.4 on an RaspberryPi to read and upload data to Weather Underground. It works great most of the time, but occasionally either my internet connections is poor or Weather Underground servers are slow The other day I got this error:
socket.timeout: _ssl.c:584: The handshake operation timed out
I have try/except code, but it didn't match with any of the exceptions. I assumed the last "except:" would have caught the error, but I guess not. Should I just add "except socket.timeout:"?
try:
r = requests.get(full_URL, timeout=5) # send data to WU
# If uploaded successfully, website will reply with 200
if r.status_code == 200:
return(True)
else:
print('Upload Error: {} {}'.format(r.status_code, r.text))
return(False)
except requests.exceptions.ConnectionError:
print("Upload Error in upload2WU() - ConnectionError")
return(False)
except requests.exceptions.NewConnectionError:
print("Upload Error in upload2WU() - NewConnectionError")
return(False)
except requests.exceptions.MaxRetryError:
print("Upload Error in upload2WU() - MaxRetryError")
return(False)
except socket.gaierror:
print("Upload Error in upload2WU() - socket.gaierror")
return(False)
except:
print("Upload Error in upload2WU() - other")
return(False)
I do have two other places I'm using requests.get(), but they both use try: and except:
try:
response = requests.get(getUrl, timeout=5).json()
if len(response) > 1:
if isNumber(response['current_observation']['precip_today_in']):
daily_rain = float(response['current_observation']['precip_today_in'])
print('Suntec station daily rain={}'.format(daily_rain))
return(daily_rain)
return(ERR_INVALID_DATA)
except:
print("Error in WU_download.py getDailyRain() - failed get() request")
return(ERR_FAILED_GET)
Here's the other one:
try:
response = requests.get(getUrl, timeout=5).json()
if len(response) > 1: # valid response returns 3, if there's an error, the len() is 1
if isNumber(response['current_observation']['pressure_in']):
nearby_pressure = float(response['current_observation']['pressure_in'])
nearby_last_update_time = int(response['current_observation']['observation_epoch'])
if(nearby_pressure) > 25: # a pressure less than 25 inHg isn't gonna be valid
return(nearby_pressure)
# Didn't get a valid pressure. Try the next station in WU_STATIONS tuple
print("Couldn't get pressure data from {}".format(WU_STATIONS[i]))
nearby_pressure = ERR_INVALID_DATA
nearby_last_update_time = 0
i = i + 1
time.sleep(10)
except:
print("Error in WU_download.py getPressure(), failed get request for station {}".format(WU_STATIONS[i]))
i = i + 1
if (i >= len(WU_STATIONS)):
return(ERR_FAILED_GET)

OSX [Error 32] Broken pipe on python app

I am using mac osx 10.9 and python 2.7.
Made app using py2app which is basically continuously checking for save time of a file.
Monitoring any changes made in the file in a while loop with small sleep in each iteration.
Process should not stop at all, but it is exiting with error-32 Broken pipe after 15-20 minutes.
How to resolve it.
try:
while True:
app_log.debug("while true")
time.sleep(5)
configProp.read(propfile)
fileNameList=configProp.sections()
if len(fileNameList)!=0:
app_log.debug("fileNameList is not zero")
for i in range(0,len(fileNameList)):
tempnameinfile=configProp.options(fileNameList[i])
openTimeLive = configProp.get(fileNameList[i], "openTimeLive")
openTimeLive = float(openTimeLive)
openTime=float(openTime)
configureTime = 3600*float(configureTime)
monitorTime=float(openTimeLive + configureTime)
if monitorTime > time.time():
lastSavedTime = os.path.getmtime(str(tempname))
app_log.debug(lastSavedTime)
aa = abs((float(openTime)) - (float(lastSavedTime)))
if abs(aa) > 1 :
app_log.debug("file modified")
t = ThreadClass(fileNameList[i])
# t.setDaemon(True)
t.start()
time.sleep(5)
configProp.set(fileNameList[i], str(tempnameinfile[0]),lastSavedTime)
with open(propfile, 'wb') as propFile:
configProp.write(propFile)
app_log.debug("completed")
except Exception as e:
app_log.error(e)
print e

python "local variable referenced before assignment" with hundreds of threads

I am having a problem with a piece of code that is executed inside a thread in python. Everything works fine until I start using more than 100 or 150 threads, then I get the following error in several threads:
resp.read(1)
UnboundLocalError: local variable 'resp' referenced before assignment.
The code is the following:
try:
resp = self.opener.open(request)
code = 200
except urllib2.HTTPError as e:
code = e.code
#print e.reason,_url
#sys.stdout.flush()
except urllib2.URLError as e:
resp = None
code = None
try:
if code:
# ttfb (time to first byte)
resp.read(1)
ttfb = time.time() - start
# ttlb (time to last byte)
resp.read()
ttlb = time.time() - start
else:
ttfb = 0
ttlb = 0
except httplib.IncompleteRead:
pass
As you can see if "resp" is not assigned due to an exception, it should raise the exception and "code" coundn't be assigned so it couldn't enter in "resp.read(1)".
Anybody has some clue on wht it is failing? I guess it is related to scopes but I don't know how to avoid this or how to implement it differently.
Thanks and regards.
Basic python:
If there is a HttpError during the open call, resp will not be set, but code will be set to e.code in the exception handler.
Then code is tested and resp.read(1) is called.
This has nothing to do with threads directly, but maybe the high number of threads caused the HTTPError.
Defining and using resp variable are not is same code block. One of them in a try/except, the other is in another try/except block. Try to merge them:
Edited:
ttfb = 0
ttlb = 0
try:
resp = self.opener.open(request)
code = 200
resp.read(1)
ttfb = time.time() - start
resp.read()
ttlb = time.time() - start
except urllib2.HTTPError as e:
code = e.code
#print e.reason,_url
#sys.stdout.flush()
except urllib2.URLError as e:
pass
except httplib.IncompleteRead:
pass

Python urllib2 resume download doesn't work when network reconnects

I'm using urllib2 to make a resuming downloader, roughly based on this method. I can end the program and re-start it, and it starts downloading where it left off, downloading the file that ends up the same size as if it were downloaded all at once.
However, I have tested it when disabling and reenabling network, and it doesn't download correctly. The file size ends up longer than the file should be, and the file doesn't work correctly. Is there something I missed, or could this be a urllib2 bug?
import urllib2
opener = urllib2.build_opener();
self.count = 0 # Counts downloaded size.
self.downloading = True
while (not(self.success) and self.downloading):
try:
self.Err = ""
self._netfile = self.opener.open(self.url)
self.filesize = float(self._netfile.info()['Content-Length'])
if (os.path.exists(self.localfile) and os.path.isfile(self.localfile)):
self.count = os.path.getsize(self.localfile)
print self.count,"of",self.filesize,"downloaded."
if self.count >= self.filesize:
#already downloaded
self.downloading = False
self.success = True
self._netfile.close()
return
if (os.path.exists(self.localfile) and os.path.isfile(self.localfile)):
#File already exists, start where it left off:
#This seems to corrupt the file sometimes?
self._netfile.close()
req = urllib2.Request(self.url)
print "file downloading at byte: ",self.count
req.add_header("Range","bytes=%s-" % (self.count))
self._netfile = self.opener.open(req)
if (self.downloading): #Don't do it if cancelled, downloading=false.
next = self._netfile.read(1024)
self._outfile = open(self.localfile,"ab") #to append binary
self._outfile.write(next)
self.readsize = desc(self.filesize) # get size mb/kb
self.count += 1024
while (len(next)>0 and self.downloading):
next = self._netfile.read(1024)
self._outfile.write(next)
self.count += len(next)
self.success = True
except IOError, e:
print e
self.Err=("Download error, retrying in a few seconds: "+str(e))
try:
self._netfile.close()
except Exception:
pass
time.sleep(8) #Then repeat
I added self._outfile.close() with the self._netfile.close() in the IOError handler, that seems to have fixed it. I guess this error was caused by opening for appending again without closing it.

Categories