upload photos to s3 from raspberry pi without delay - python

I am writing a program to capture motion, take a picture, sleep, blink etc. Here is the code so far without the functions because those aren't the problem.
try:
starting()
while duration < 3:
previous_state = current_state
current_state = GPIO.input(sensor)
if current_state != previous_state:
new_state = "High" if current_state else "low"
if current_state:
blink_led(5)
pic = '/home/pi/pic%s.jpg' % num
num += 1
camera.capture(pic)
data = open(pic, 'rb')
s3.Bucket('bucket').put_object(Key=pic, Body=data)
sleep(5)
duration += 1
The problem here is that I want it to sleep for 5 seconds but if the upload takes a while then it is basically sleeping for more than 5 seconds. Depending on my internet connection. I have tried
while s3.Bucket('bucket').put_object(Key=pic, Body=data):
sleep(5)
duration += 1
but then it gets stuck in an infinite loop and if I do an if clause at the end like:
if duration == 3:
s3.Bucket('bucket').put_object(Key=pic, Body=data)
it will only upload the last photo.
I have also tried doing at the end:
if duration == 3:
s3.Bucket('bucket').put_object(Key='/home/pi/pic1.jpg', Body=data)
s3.Bucket('bucket').put_object(Key='/home/pi/pic2.jpg', Body=data)
s3.Bucket('bucket').put_object(Key='/home/pi/pic3.jpg', Body=data)
It can only upload the first then it gets hung up. How should I write the uploading as a background process so it doesn't sleep longer than 5 seconds? I feel like I have tried everything
other than asynchronous which I think is specific to the raspberry pi

Related

Multiple Python threads writing to single JSON file

I am adapting the Python script in this project (expanded below) to a point where it updates a JSON file's elements, instead of the InitialState streamer. However, with the multiple threads that are opened by the script, it is impossible to succinctly write the data from each thread back to the file as it would be read, changed, and written back to the file in all threads at the same time. As there can only be one file, no version will ever be accurate as the last thread would override all others.
Question: How can I update the states in the JSON based in each thread (simultaneously) without it affecting the other thread's writing operation or locking up the file?
JSON file contains the occupant's status that I would like to manipulate with the python script:
{
"janeHome": "false",
"johnHome": "false",
"jennyHome": "false",
"jamesHome": "false"
}
This is the python script:
import subprocess
import json
from time import sleep
from threading import Thread
# Edit these for how many people/devices you want to track
occupant = ["Jane","John","Jenny","James"]
# MAC addresses for our phones
address = ["11:22:33:44:55:66","77:88:99:00:11:22","33:44:55:66:77:88","99:00:11:22:33:44"]
# Sleep once right when this script is called to give the Pi enough time
# to connect to the network
sleep(60)
# Some arrays to help minimize streaming and account for devices
# disappearing from the network when asleep
firstRun = [1] * len(occupant)
presentSent = [0] * len(occupant)
notPresentSent = [0] * len(occupant)
counter = [0] * len(occupant)
# Function that checks for device presence
def whosHere(i):
# 30 second pause to allow main thread to finish arp-scan and populate output
sleep(30)
# Loop through checking for devices and counting if they're not present
while True:
# Exits thread if Keyboard Interrupt occurs
if stop == True:
print ("Exiting Thread")
exit()
else:
pass
# If a listed device address is present print
if address[i] in output:
print(occupant[i] + "'s device is connected")
if presentSent[i] == 0:
# TODO: UPDATE THIS OCCUPANT'S STATUS TO TRUE
# Reset counters so another stream isn't sent if the device
# is still present
firstRun[i] = 0
presentSent[i] = 1
notPresentSent[i] = 0
counter[i] = 0
sleep(900)
else:
# If a stream's already been sent, just wait for 15 minutes
counter[i] = 0
sleep(900)
# If a listed device address is not present, print and stream
else:
print(occupant[i] + "'s device is not connected")
# Only consider a device offline if it's counter has reached 30
# This is the same as 15 minutes passing
if counter[i] == 30 or firstRun[i] == 1:
firstRun[i] = 0
if notPresentSent[i] == 0:
# TODO: UPDATE THIS OCCUPANT'S STATUS TO FALSE
# Reset counters so another stream isn't sent if the device
# is still present
notPresentSent[i] = 1
presentSent[i] = 0
counter[i] = 0
else:
# If a stream's already been sent, wait 30 seconds
counter[i] = 0
sleep(30)
# Count how many 30 second intervals have happened since the device
# disappeared from the network
else:
counter[i] = counter[i] + 1
print(occupant[i] + "'s counter at " + str(counter[i]))
sleep(30)
# Main thread
try:
# Initialize a variable to trigger threads to exit when True
global stop
stop = False
# Start the thread(s)
# It will start as many threads as there are values in the occupant array
for i in range(len(occupant)):
t = Thread(target=whosHere, args=(i,))
t.start()
while True:
# Make output global so the threads can see it
global output
# Reads existing JSON file into buffer
with open("data.json", "r") as jsonFile:
data = json.load(jsonFile)
jsonFile.close()
# Assign list of devices on the network to "output"
output = subprocess.check_output("arp-scan -interface en1 --localnet -l", shell=True)
temp = data["janeHome"]
data["janeHome"] = # RETURNED STATE
data["johnHome"] = # RETURNED STATE
data["jennyHome"] = # RETURNED STATE
data["jamesHome"] = # RETURNED STATE
with open("data.json", "w") as jsonFile:
json.dump(data, jsonFile)
jsonFile.close()
# Wait 30 seconds between scans
sleep(30)
except KeyboardInterrupt:
# On a keyboard interrupt signal threads to exit
stop = True
exit()
I think we can all agree that the best idea would be to return the data from each thread to the main and write it to the file in one location but here is where it gets confusing, with each thread checking for a different person, how can the state be passed back to main for writing?

Creating multiprocessing.Pool pools every hour

I have a program in place that follows logic like this:
At the start of every hour, multiple directories receive a file that is continuously fed data. I'm developing a simple program that can read all the files simultaneously and abstracted the tailing/reading part into a function, lets call it 'tail' for now. The external program feeding the data doesn't always run smoothly. Sometimes a file will come in late, sometimes the next hour will hit and the program still feeds the stale file data. I can't afford to lose the data. My solution looks something like this using multiprocessing.pool using pseudo code in parts of it:
def process_data(logfile):
num_retries = 5
while num_retries > 0:
if os.path.isfile(logfile):
for record in tail(logfile):
do_something(record)
else:
num_retries -= 1
time.sleep(30)
def tail(logfile):
logfile = open(logfile, 'r')
logfile.seek(0, 2)
while True:
line = logfile.readline()
if line:
wait_time = 0
yield line
else:
if wait_time >= 360:
break
wait_time += 1
time.sleep(1)
continue
if __name__ == '__main__':
start_time = sys.argv[1]
next_hour = None
while True:
logdirs = glob.glob("/opt/logs/plog*")
current_date = datetime.now()
current_hour = current_date.strftime('%H')
current_format = datetime.now().strftime("%Y%m%d%H")
logfiles = [logdir + '/some/custom/path/tofile.log' for logdir in logdirs]
if not next_hour:
next_hour = current_date + timedelta(hours=1)
if current_hour == next_hour.strftime('%H') or current_hour == start_time:
start_time = None
pool = multiprocessing.Pool()
pool.map(process_data, logfiles)
pool.close()
pool.join()
next_hour = current_date + timedelta(hours=1)
time.sleep(30)
Here's what I'm observing when I have logging implemented at the process level:
all files in each directory are getting read appropriately
when the next hour hits, there's a delay of 360s (6 minutes) before the next set of files get read
so if hour 4 ends, a new pool doesn't get created for hour 5 until processes for hour 4 finish
What I'm looking for: I'd like to keep using multiprocessing, but can't figure out why the code inside the main while loop doesn't go through until the previous Pool of processes finishes. I have tried the hourly logic for other examples without multiprocessing and have had it work fine. I'm lead to believe that this has to do with the Pool class and hoping to get advice on how to make it so that even while the previous Pool is active, I can create a new Pool for the new hour and begin processing new files even if it means this creates a ton of processes.

Python script is causing time drift

I have the following Python script:
#!/usr/bin/env python
# coding: utf-8
import time
import serial
import datetime
from datetime import timedelta
import os.path
PATH = '/home/pi/test/'
Y = datetime.datetime.now().strftime('%Y')[3]
def get_current_time():
return datetime.datetime.now()
def get_current_time_f1():
return datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')
def get_current_time_f2():
return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
def compare_time():
current_time = get_current_time()
if current_time.minute == 59 and current_time.second >= 45:
return "new"
def file_date():
if compare_time() == "new":
plusonehour = datetime.datetime.now() + timedelta(hours=1)
return plusonehour.strftime('Z'+Y+'%m%d%H')
else:
return datetime.datetime.now().strftime('Z'+Y+'%m%d%H')
def createCeilFile():
filename = os.path.join(PATH, file_date()+".dat")
fid = open(filename, "w")
fid.writelines(["-Ceilometer Logfile","\n","-File created: "+get_current_time_f1(),"\n"])
return fid
# open the first file at program start
fid = createCeilFile()
# serial port settings
ser=serial.Serial(
port='/dev/ttyUSB0',
baudrate = 19200,
parity=serial.PARITY_NONE,
stopbits=serial.STOPBITS_ONE,
bytesize=serial.EIGHTBITS,
)
counter=0
# read first byte, grab date string, read rest of string, print both in file
while 1:
tdata = ser.read()
time.sleep(3)
data_left = ser.inWaiting()
tdata += ser.read(data_left)
fid.writelines(["-"+get_current_time_f2(),"\n",tdata,"\n"])
#should have ~10 secs before next message needs to come in
#if next string will go into the next hour
if compare_time() == "new":
# close old file
fid.writelines(["File closed: "+get_current_time_f2(),"\n"])
fid.close()
# open new file
fid = createCeilFile()
# then it goes back to 'tdata = ser.read()' and waits again.
It works fine and stores all the data I need in the correct format and so on.
A data message from the device comes trough every 15 seconds. The python script runs for an infinite time and reads those messages. At the beginning of each message the script adds a time, when the message was written to the file and therefor received. And the time is the problem with this script. I have a time drift of about 3 to 4 seconds in 24 hours. Weird about that is, that the time drifts backwards. So if I start with data messages coming in at 11, 26, 41 and 56 seconds during the minute, after 24 hours the messages seem to come in at 8, 23, 38 and 53 seconds in the minute.
Has anyone an explanation for that or maybe a way to compensate it? I thought about restarting the program every hour, after it saved the hourly file. Maybe that helps resetting the weird time drift?

Why the code stops before its expected completion

I'm using the following function to get a scraping job that starts at 9 AM and expected to stop at 4 PM. But it stops much before that, contrary to what I expect it to do.
Let's say the scraping job is run at t=1 and it finishes at t=T. If T < 1 sec, then it sleeps for (1-T) sec. After the sleep, it repeats the scrape cycle. The loop starts at 9 AM and supposed to stop at 4 PM.
With the nosleep = False it doesn't terminate immediately, instead it runs for sometime, eventually not writing any new data to csv, though the function doesn't exit.
If I remove the sleeping for (1-T) sec by setting nosleep = True then it runs fine.
Please let me know what is going wrong with this fragment.
Edit: the same works fine when ported to Python 3.4, i.e. it doesn't stop the scraping when nosleep = False. Earlier was executing in Python 2.7.
def getOCdata(ed = '25FEB2016', start = 540*60, stop = 960*60, waitFlag = True, nosleep = True):
ptime = datetime.now()
while True:
if (ptime.hour*60*60 + ptime.minute*60 + ptime.second) > stop:
break
else:
try:
ptime = datetime.now()
scrapedata = pd.DataFrame(scrapefn(ed))
scrapedata = scrapedata.ix[:, 1:21]
scrapedata['timestamp'] = ptime
scrapedata.to_csv(datafile, index=False, mode='a', header=False)
if nosleep == False:
end = datetime.now()
tdiff = end-ptime
time.sleep(1 - (tdiff.seconds + tdiff.microseconds*1.0/10**6))
except:
continue
return 0

SerialTimeoutException crashes script

This script crashes due to a serial timout exception after an extended period of time. It's been making it to 5000 loops or so before it has the following error:
Traceback (most recent call last):
File "C:\Users\3d Exposure\Desktop\venderFix.py", line 69, in <module>
A.setHigh(12)
File "C:\Python27\lib\arduino\arduino.py", line 30, in setHigh
self.__sendData('1')
File "C:\Python27\lib\arduino\arduino.py", line 58, in __sendData
self.serial.write(str(serial_data))
File "C:\Python27\lib\site-packages\serial\serialwin32.py", line 270, in write
raise writeTimeoutError
SerialTimeoutException: Write timeout
Why would it time out after working perfectly for so long? How do I stop it doing so?
import json
import urllib
from pprint import pprint
import time
from arduino import Arduino
##############################################################
#Change to suit the Mars Bars, currently at 0.2 of a second #
vendtime = 0.2 #
#
#Delay Time Between each Search (never below 15 seconds) #
delayTime = 15 #
#This is the search term for the URL. (%23 = #) #
searchTerm = '%23happy' #
#
A = Arduino('COM3') #
A.output([13]) #Output on pin 12 #
##############################################################
#to collect the first tweet without vending
countTweet = 0
#To test Twitter for consistancy
tweet= 0
noTweet= 0
#the infinate loop
while True:
#j contains the JSON we load from the URL
j =json.loads(urllib.urlopen('http://search.twitter.com/search.json?q='+searchTerm+'&result_type=recent&rpp=1&filter:retweets').read())
#Debug JSON from twitter (for faults on the Twitter end or possible GET limit id below 15 seconds per request)
#pprint(j) #needed for debugging only
#find the text and the tweet id
if 'results' in j and j['results']:
text = j['results'][0]['text']
id = j['results'][0]['id']
#how many times the Json is complete
tweet+= 1
else:
#How many times the Json is incomplete (sometimes twitter malfunctions. About 0.1 in 100 are broken)
noTweet += 1
#print the text and id to the screen
pprint(text) #needed for debugging only
pprint(id) #needed for debugging only
#to get the existing tweet from before we power on, if the first ID has been stored already (count == 1)
if countTweet != 0: #if countTweet is not equal to 0 then it's not the first tweet
#pprint ("new loop") #needed for debugging only
#if lastID is not equal to ID
if lastID != id:
#Tell Arduino to Vend
#pin 12 HIGH
A.setHigh(13)
#Sleep for the time specified in vendtime
time.sleep(vendtime)
#pin 12 LOW
A.setLow(13)
#Display the tweet that triggered the vend
#pprint(text) #needed for debugging only
#pprint(id) #needed for debugging only
#Make lastID equal to ID so that next time we can compare it
lastID = id
#pprint ('lastID updated') #needed for debugging only
#if no new tweets, print
else: #needed for debugging only
pprint ('no new tweets') #needed for debugging only
#If it's the first loop, confirm by printing to the screen
else:
pprint("First loop complete")
pprint(text)
pprint(id)
lastID = id
pprint(lastID)
countTweet += 1 #Add 1 to countTweet
pprint ('Number of Tweets')
pprint (countTweet)
pprint('Working JSON')
pprint(tweet)
pprint('Broken JSON')
pprint(noTweet)
pprint('waiting')
time.sleep(delayTime)
For communication with the Arduino I am using the Python Arduino Prototyping API v2. It works well, this is the first and only issue in serial I have had with this script.
I imagine I need to set a timeout myself although I am not 100% sure how to ensure this would not have the same or a similar problem.

Categories