aws s3 boto retrieve objects by date - python

The format that I stored them in was:
website/website/objecthash/xcord/ycord/likelyhood/year/month/datenumber/hour/minutes
Right now I have the bucket I want to pull them out of.
Say I want the most recent 10 stored objects. What is an efficient way to do this?
I have the bucket, what do I do with it?
My solution was something like this for get todays however I'm not sure about the logic for get most recent:
def getKeys():
b = bucket.list()
theKeys=[]
for key in b:
theKeys.append(key)
return theKeys
def getDecompiledToday():
time = datetime.datetime.now()
year =time.strftime("%Y")
month = time.strftime("%m")
day = time.strftime("%D")
keys = getKeys()
objects = []
for k in keys:
splitK= k.split("/")
if splitK[6]==year and splitK[7]==month and splitK[8]==day:
objets.append(bucket.get_key(k))
return

The solution that I came up with.
def getPastAmountDecompiledFromFile(number):
if bucketKeys.__len__() > 0:
Found=[]
latest=bucketKeys[0]
while Found.__len__() < number:
laterFound = False
for k in bucketKeys:
if latest in Found:
latest=k
current = k.split("/")
best = k.split("/")
if k not in Found and latest != k:
if int(current[6]) > int(best[6]):
laterFound=True
if int(current[6]) == int(best[6]) and int(current[7]) > int(best[7]):
laterFound=True
if int(current[6]) == int(best[6]) and int(current[7]) == int(best[7]) and int(current[8]) > int(best[8]):
laterFound=True
if int(current[6]) == int(best[6]) and int(current[7]) == int(best[7]) and int(current[8]) == int(best[8]) and int(current[9]) > int(best[9]):
laterFound=True
if laterFound:
latest = k
if laterFound:
Found.append(latest)
return getKeyFromKeyNames(Found)
else:
getKeysInFile()
getPastAmountDecompiledFromFile(number)
return

Related

How to count somekind of results of a request?

I am pursuing a MSc in Data Science and in the subject of Python I have the statement below:
Call 100 times the following URL and count how many calls have three or less participants.
The API is: http://www.boredapi.com/api/activity/
After I understood the statement I build up the function below:
import requests
total_calls = 100
call_0 = 0
def calls(total_calls, call_0):
while total_calls > call_0:
activity = ('http://www.boredapi.com/api/activity/')
call_n = requests.get(activity)
print(call_n.text)
call_0 += 1
if total_calls - call_0 < 0:
print(call_0)
elif total_calls - call_0 == 0:
break
return output_call
output_call = calls(total_calls, call_0)
output_call
I am stuck up because I don't know how to count how many times "output_call" have <= 3 participants.
If I run for example 9 times instead of 100 there is a result:
{"activity":"Hold a yard sale","type":"social","participants":1,"price":0,"link":"","key":"1432113","accessibility":0.1}
{"activity":"Meditate for five minutes","type":"relaxation","participants":1,"price":0,"link":"","key":"3699502","accessibility":0.05}
{"activity":"Draw and color a Mandala","type":"relaxation","participants":1,"price":0.05,"link":"https://en.wikipedia.org/wiki/Mandala","key":"4614092","accessibility":0.1}
{"activity":"Go to a local thrift shop","type":"recreational","participants":1,"price":0.1,"link":"","key":"8503795","accessibility":0.2}
{"activity":"Organize your basement","type":"busywork","participants":1,"price":0,"link":"","key":"8203595","accessibility":0.9}
{"activity":"Back up important computer files","type":"busywork","participants":1,"price":0.2,"link":"","key":"9081214","accessibility":0.2}
{"activity":"Fix something that's broken in your house","type":"diy","participants":1,"price":0.1,"link":"","key":"6925988","accessibility":0.3}
{"activity":"Clean out your closet and donate the clothes you've outgrown","type":"charity","participants":1,"price":0,"link":"","key":"9026787","accessibility":0.1}
{"activity":"Go to the gym","type":"recreational","participants":1,"price":0.2,"link":"","key":"4387026","accessibility":0.1}
{}
Following should work fine for you:
import requests, json
total_calls = 100
call_0 = 0
def calls(total_calls, call_0):
less_than_3_count = 0
while total_calls > call_0:
# Check break condition in the beginning
if total_calls - call_0 == 0:
break
activity = 'http://www.boredapi.com/api/activity/'
response = requests.get(activity)
call_0 += 1
print(call_0, response.text)
json_object = json.loads(response.text)
if json_object['participants'] <= 3:
less_than_3_count += 1
return less_than_3_count
output_call = calls(total_calls, call_0)
output_call
You could proceed like that.
Design your calls() function to return the number of calls you're interested in.
Then initialize a counter to 0 and increment it when number of participants lte 3.
range() is the builtin Python function you use very often to loop n times.
From the request result you'd better ask for JSON instead of text, which gives you a Python dictionary (equivalent to its JSON counterpart).
Access the value number of participants by using the participant key.
activity = "http://www.boredapi.com/api/activity/"
total_calls = 100
def calls(total_calls: int) -> int:
counter = 0
for _ in range(total_calls):
r = requests.get(activity)
if r.ok:
if r.json()["participants"] <= 3:
counter += 1
return counter
calls(total_calls)
import requests
def calls(total_calls):
output_calls = 0
activity = 'http://www.boredapi.com/api/activity/'
while total_calls > 0:
call_n = requests.get(activity).json()
if call_n['participants'] <= 3:
output_calls += 1
total_calls -= 1
return output_calls
output_call = calls(total_calls=100) # change the total call value here
print(output_call)
Assuming you will handle exceptions for requests.
Here is the simplified and pythonic version;
import requests
from pprint import pp
def calls(count=100):
bigger_then_3 = 0
bigger_then_3_calls = []
for cal in range(count):
resp = requests.get("http://www.boredapi.com/api/activity/").json()
if resp["participants"] <= 3:
bigger_then_3_calls.append(resp)
bigger_then_3 += 1
return bigger_then_3, bigger_then_3_calls
if __name__ == '__main__':
bigger_count, bigger_calls = calls(10)
print(f"Bigger count: {bigger_count}")
pp(bigger_calls)

Python issue with return statement

The code takes a list as input for example:
[1995, 1750, 2018]
and I am expecting it to give back
Basically, this code searches for the closest leap year for each year in a list of years
1996
1948
2016
all in a separate line.
The output I get back with the return statement is:
1996 1748 2016
But the thing is I must use return because I use a map thing to write it to file, but I get
map argument #1 must support iteration
Is there a solution to my problem?
#!/bin/python3
import math
import os
import random
import re
import sys
def is_leap(year):
leap = False
if year % 4 == 0:
if year % 100 != 0 or year % 400 == 0:
leap = True
return leap
forward_list = {}
back_list = {}
newLst = []
def year_forward(yearBounds):
for item in yearBounds:
counter = 0
# forwad list
while not is_leap(item):
item = item + 1
counter += 1
#forward_list.append(item)
forward_list[item] = counter
return forward_list
def year_backward(yearBounds):
# back_list
for item in yearBounds:
counter = 0
while not is_leap(item):
item = item - 1
counter -= 1
#back_list.append(item)
back_list[item] = counter
return back_list
def findLastLeapYears(yearBounds):
forward = (year_forward(yearBounds))
backward = (year_backward(yearBounds))
tuple_forward = list(forward.items())
tuple_backward = list(backward.items())
counter = 0
for item in tuple_forward:
if abs(item[1]) < abs(tuple_backward[counter][1]):
newLst.append (item[0])
counter+=1
elif abs(item[1]) == abs(tuple_backward[counter][1]):
if item[0] < tuple_backward[counter][0]:
newLst.append (item[0])
counter += 1
else:
newLst.append (tuple_backward[counter][0])
counter += 1
else:
newLst.append (tuple_backward[counter][0])
counter+=1
return newLst
The call:
leapYears = findLastLeapYears(years)
fptr.write(' '.join(map(str, leapYears)))
fptr.write('\n')
fptr.close()
Your code runs fine, if you want it to be on separate line use '\n'.join(...) instead.
For me, using your code, I can't reproduce the error and everything works fine.
The error map argument #1 must support iteration suggests that you're using str as a variable or function that overwrites the default str.

Undefined dictionaries in my main function

def monday_availability(openhours_M): #openhours_M = number hours pool is open
hourone = int(input('Input the first hour in the range of hours the guard can work'))
hourlast = int(input('Input the last hour in the range of hours the guard'))
hour = 1
availability_M = []
while hour <= openhours_M:
if hour >= hourone & hour <= hourlast:
availability_M.append(1)
else:
availability_M.append(0)
return availability_M
Above is a function gathering the availability of a lifeguard and storing the hours a guard can work as a 1 in availability list or a 0 if they cannot. I return this list with the intent of adding it to a dictionary in the function below.
def guard_availability(guards, openhours_M, openhours_T, openhours_W,
openhours_R, openhours_F, openhours_S, openhours_Su):
continueon = 1
while continueon == 1:
name = input('Input guards name of lifeguard to update availability' )
availability = {}
days = {}
if openhours_M != 0:
monday_availability(openhours_M)
if openhours_T != 0:
tuesday_availability(openhours_T)
if openhours_W != 0:
wednesday_availability(openhours_W)
if openhours_R != 0:
thursday_availability(openhours_R)
if openhours_F != 0:
friday_availability(openhours_F)
if openhours_S != 0:
saturday_availability(openhours_S)
if openhours_Su != 0:
sunday_availability(openhours_Su)
days['Monday'] = availability_M
days['Tuesday'] = availability_T
days['Wednesday'] = availability_W
days['Thursday'] = availability_R
days['Friday'] = availability_F
days['Saturday'] = availability_S
days['Sunday'] = availability_Su
availability[name]= days
continueon = input('Enter 1 to add availability for another guard, 0 to stop: ')
return days
When I run this code, I get an error saying my availability lists are undefined even though I returned them in the functions above. Where is the error in my understanding of returning in functions, and how can I remedy this problem.
monday_availability(openhours_M) returns a value.
Returning a variable does not assign it to anything outside the scope of that function.
If you renamed return availability_M to use return foo and update the other uses only within that function accordingly, would the error make more sense?
Now, actually capture the result
availability_M = monday_availability(openhours_M)
Or even just
days['Monday'] = monday_availability(openhours_M)
Also, not seeing how that function has anything to do with Mondays. Try to write DRY code
You return the dic value in your function but don't assign it to any variable. You should do it like this:
if openhours_M != 0:
availability_M=monday_availability(openhours_M)
if openhours_T != 0:
availability_T=tuesday_availability(openhours_T)
if openhours_W != 0:
availability_W=wednesday_availability(openhours_W)
if openhours_R != 0:
availability_R=thursday_availability(openhours_R)
if openhours_F != 0:
availability_F=friday_availability(openhours_F)
if openhours_S != 0:
availability_S=saturday_availability(openhours_S)
if openhours_Su != 0:
availability_Su=sunday_availability(openhours_Su)

Inteviewstreet Median in python. Fails on all but the first test case

So i wrote this code and it passes the first test case, and fails all the rest. However, I can't seem to find an input that breaks it. Maybe it's because I've been staring at the code too long, but i would appreciate any help.
The algorithm uses two priority queues for the smallest and largest halves of the current list. Here's the code:
#!/bin/python
import heapq
def fix(minset, maxset):
if len(maxset) > len(minset):
item = heapq.heappop(maxset)
heapq.heappush(minset, -item)
elif len(minset) > (len(maxset) + 1):
item = heapq.heappop(minset)
heapq.heappush(maxset, -item)
N = int(raw_input())
s = []
x = []
for i in range(0, N):
tmp = raw_input()
a, b = [xx for xx in tmp.split(' ')]
s.append(a)
x.append(int(b))
minset = []
maxset = []
for i in range(0, N):
wrong = False
if s[i] == "a":
if len(minset) == 0:
heapq.heappush(minset,-x[i])
else:
if x[i] > minset[0]:
heapq.heappush(maxset, x[i])
else:
heapq.heappush(minset, -x[i])
fix(minset, maxset)
elif s[i] == "r":
if -x[i] in minset:
minset.remove(-x[i])
heapq.heapify(minset)
elif x[i] in maxset:
maxset.remove(x[i])
heapq.heapify(maxset)
else:
wrong = True
fix(minset, maxset)
if len(minset) == 0 and len(maxset) == 0:
wrong = True
if wrong == False:
#Calculate median
if len(minset) > len(maxset):
item = - minset[0]
print int(item)
else:
item = ((-float(minset[0])) + float(maxset[0])) / 2
if item.is_integer():
print int(item)
continue
out = str(item)
out.rstrip('0')
print out
else:
print "Wrong!"
Your original was not so legible, so first I made it object-oriented:
MedianHeapq supports methods rebalance(), add(), remove(), size(), median(). We seriously want to hide the members minset,maxset from the client code, for all sorts of sensible reasons: prevent client from swapping them, modifying them etc. If client needs to see them you just write an accessor.
We also added a __str__() method which we will use to debug visually and make your life easier.
Also added legibility changes to avoid the indexing with [i] everywhere, rename s,x arrays to op,val, add prompts on the raw_input(), reject invalid ops at the input stage.
Your actual computation of the median confuses me (when do you want float and when integer? the rstrip('0') is a bit wack), so I rewrote it, change that if you want something else.
A discussion of the algorithm is here.
Now it is legible and self-contained. Also makes it testable.
You might be making sign errors in your code, I don't know, I'll look at that later.
Next we will want to automate it by writing some PyUnit testcases. doctest is also a possibility. TBC.
Ok I think I see a bug in the sloppiness about locating the median. Remember the minset and maxset can have a size mismatch of +/-1. So take more care about precisely where the median is located.
#!/bin/python
import heapq
class MedianHeapq(object):
def __init__(self):
self.minset = []
self.maxset = []
def rebalance(self):
size_imbalance = len(self.maxset) - len(self.minset)
if len(self.maxset) > len(self.minset):
#if size_imbalance > 0:
item = heapq.heappop(self.maxset)
heapq.heappush(self.minset, -item)
#elif size_imbalance < -1:
elif len(self.minset) > (len(self.maxset) + 1):
item = heapq.heappop(self.minset)
heapq.heappush(self.maxset, -item)
def add(self, value, verbose=False):
if len(self.minset) == 0:
heapq.heappush(self.minset,-value)
else:
if value > self.minset[0]:
heapq.heappush(self.maxset, value)
else:
heapq.heappush(self.minset, -value)
self.rebalance()
if verbose: print self.__str__()
return False
def remove(self,value,verbose=False):
wrong = False
if -value in self.minset:
minset.remove(-value)
heapq.heapify(self.minset)
elif value in maxset:
maxset.remove(value)
heapq.heapify(self.maxset)
else:
wrong = True
self.rebalance()
if verbose: print self.__str__()
return wrong
def size(self):
return len(self.minset)+len(self.maxset)
def median(self):
if len(self.minset) > len(self.maxset):
item = - self.minset[0]
return int(item)
else:
item = (-self.minset[0] + self.maxset[0]) / 2.0
# Can't understand the intent of your code here: int, string or float?
if item.is_integer():
return int(item)
# continue # intent???
else:
return item
# The intent of this vv seems to be round floats and return '%.1f' % item ??
#out = str(item)
#out.rstrip('0') # why can't you just int()? or // operator?
#return out
def __str__(self):
return 'Median: %s Minset:%s Maxset:%s' % (self.median(), self.minset,self.maxset)
# Read size and elements from stdin
N = int(raw_input('Size of heap? '))
op = []
val = []
while(len(val)<N):
tmp = raw_input('a/r value : ')
op_, val_ = tmp.split(' ')
if op_ not in ['a','r']: # reject invalid ops
print 'First argument (operation) must be a:Add or r:Remove! '
continue
op.append(op_)
val.append(int(val_))
mhq = MedianHeapq()
for op_,val_ in zip(op,val): # use zip to avoid indexing with [i] everywhere
wrong = False
if op_ == 'a':
wrong = mhq.add(val_)
elif op_ == 'r':
wrong = mhq.remove(val_)
assert (mhq.size()>0), 'Heap has zero size!'
assert (not wrong), 'Heap structure is wrong!'
if not wrong:
print mhq.__str__()

Map actors and their movie onto a dictionary

def parse_actor_data(actor_data):
while 1:
line = actor_data.readline().strip()
if line.count('-') > 5:
break
actor_movie = {}
values = []
actor_name = ''
running_list = []
movie = []
for line in actor_data:
position = line.find(')')
running = line[:position + 1]
value = running.split('\t')
for k in value:
if k != '':
running_list.append(k)
actor_name_list = value[0].split(',')
actor_name = actor_name_list[0] + actor_name_list[-1]
for i in range(len(running_list)):
if value[0] == running_list[i]:
position2 = i
movie = running_list[position2+1:]
actor_movie[actor_name] = movie
check = actor_movie.keys()
for c in range(len(check)):
if len(check[c]) < 1:
actor_movie.pop(check[c])
return actor_movie
Problem I'm having now is that only the first item of movie is added into the actor_movie anyone can help? i tried so long for this already i seriously have no idea why isn't this working...
Every time you run:
actor_movie[actor_name] = movie
you're overwriting the last movie that was associated with that actor. Try something like this instead where you're storing a list of movies, not just a single value:
try:
actor_movie[actor_name].append(movie)
except KeyError:
actor_movie[actor_name] = [movie]
There are other ways (defaultdict, dict.setdefault, etc.) to do the same thing but that should get you up and running.

Categories