I want to transfer label_count and card_m to my main flask python file. How do I do that? I already tried importing it it didn't work. And if there is any solution to card_m I don't want repeat request so many times
import requests
import json
from itertools import chain
from collections import Counter
url = "https://api.trello.com/1/boards/OIeEN1vG/cards"
query = {
'key': 'e8cac9f95a86819d54194324e95d4db8',
'token': 'aee28b52f9f8486297d8656c82a467bb4991a1099e23db539604ac35954d5633'
}
response = requests.request(
"GET",
url,
params=query
)
data = response.json()
card_labels_string = list(chain.from_iterable([d['labels']for d in data]))
card_labels = [c ["color"] for c in card_labels_string]
label_count = dict((i, card_labels.count(i)) for i in card_labels)
cards = dict(zip([d['name']for d in data],[d['shortLink']for d in data]))
card_m = {}
for key,value in cards.items():
url_card = "https://api.trello.com/1/cards/{}/members".format(value)
res = requests.request(
"GET",
url_card,
params=query
)
names = [f['fullName']for f in res.json()]
card_m.update({key : names})
print(label_count, card_m)
Ok based on you comments i think i can help you out now. So two things you should do to make this as clean as possible and to avoid bugs later on.
Right now your code is in the global scope. You should avoid doing this at cost unless there is literally no other option. So first thing you should do is create a static class for holding this data. Maybe something like this.
class LabelHelper(object):
card_m = {}
label_count = None
#classmethod
def startup(cls):
url = "https://api.trello.com/1/boards/OIeEN1vG/cards"
query = {
'key': 'e8cac9f95a86819d54194324e95d4db8',
'token': 'aee28b52f9f8486297d8656c82a467bb4991a1099e23db539604ac35954d5633'
}
response = requests.request(
"GET",
url,
params=query
)
data = response.json()
card_labels_string = list(chain.from_iterable([d['labels'] for d in data]))
card_labels = [c["color"] for c in card_labels_string]
cls.label_count = dict((i, card_labels.count(i)) for i in card_labels)
cards = dict(zip([d['name'] for d in data], [d['shortLink'] for d in data]))
for key, value in cards.items():
url_card = "https://api.trello.com/1/cards/{}/members".format(value)
res = requests.request(
"GET",
url_card,
params=query
)
names = [f['fullName'] for f in res.json()]
cls.card_m.update({key: names})
#classmethod
def get_data(cls):
return cls.label_count, cls.card_m
Now we need to run that startup method in this class before we start up flask via app.run. So it can look something like this...
if __name__ == '__main__':
LabelHelper.startup()
app.run("your interface", your_port)
Now we have populated those static variables with the data. Now you just need to import that static class in whatever file you want and just call get_data and you will get what you want. So like this...
from labelhelper import LabelHelper
def some_function():
label_count, card_m = LabelHelper.get_data()
FYI in the from import labelhelper being lowercase if cause in general you would name the file containing that class labelhelper.py
What do you mean, "transfer"? If you want to use them in another function, do this:
from main_python import other_function
print(label_count, card_m)
other_function(label_count, card_m)
Related
What am I doing wrong? I have an extractor that works great but writing the test is stumping me and it's failing. Can anyone help me figure out where I'm going wrong?
from unittest.mock import MagicMock, patch
import pandas as pd
import requests
from my_project.task import extractor
from my_project.tests import utils
from prefect.logging import disable_run_logger
CONTACT_RECORD = utils.TEST_CONTACT_RECORD
PAGED_CONTACT_RECORD = utils.TEST_PAGED_CONTACT_RECORD
EXPECTED_CONTACT_RECORD = utils.EXPECTED_CONTACT_RECORD
#patch("requests.get")
def test_contact_extractor(get: MagicMock):
"""
Should call "request.get" once and return a json
containing contact data.
"""
get.return_value.json.return_value = CONTACT_RECORD
with disable_run_logger():
result = extractor.get_contacts()
assert get.call_count == 1
assert result == pd.DataFrame(EXPECTED_CONTACT_RECORD)
#patch("my_project.extractor.get_contacts")
def test_get_paged_contacts(get_contacts: MagicMock):
"""
Should run "requests.get" until ['has-more'] is False
and there is no offset value.
"""
get_contacts.return_value.json.side_effect = [
PAGED_CONTACT_RECORD,
PAGED_CONTACT_RECORD,
PAGED_CONTACT_RECORD,
CONTACT_RECORD,
]
with disable_run_logger():
data = extractor.get_paged_contacts(
endpoint=MagicMock, query_string=MagicMock, df=MagicMock
)
assert get_contacts.call_count == 4
assert data == pd.DataFrame(EXPECTED_CONTACT_RECORD)
Some errors I'm getting are:
requests imported but not used
callable[[Union[str,btyes],....], Response] has no attribute "return_value"
EDIT:
No longer getting the second error because I realized I had a typo, but currently getting:
AttributeError: 'NoneType' object has no attribute 'client'
Edit:
Here is my get_paged_data() function:
def get_paged_contacts(
endpoint: str, query_string: typing.Dict[str, typing.Any], df: pd.DataFrame
) -> pd.DataFrame:
"""
Return the results of the get request.
Loops over api response and appends the results of a while loop for pagination, then
merges the results with the previously extracted dataframe.
"""
url = endpoint
contacts = []
response = requests.request("GET", url, headers=header, params=query_string).json()
has_more = response["has-more"]
offset = response["vid-offset"]
while has_more is True:
querystring = {"limit": "100", "archived": "false", "offset": offset}
try:
response = requests.request(
"GET", url, headers=header, params=querystring
).json()
time.sleep(10)
except (requests.exceptions.ConnectionError, json.decoder.JSONDecodeError) as j:
logger.error(f"Error occurred: {j}.")
break
for x in range(len(response["contacts"])):
contacts.append(response["contacts"][x])
contacts = json_normalize(contacts)
merged = pd.concat([df, contacts])
return merged
After checking the edited question, here is a possible approach. The code under test could be the following:
def get_paged_contacts(endpoint: str,
query_string: typing.Dict[str, typing.Any],
df: pd.DataFrame) -> pd.DataFrame:
"""
Return the results of the get request.
Loops over api response and appends the results of a while loop
for pagination, then merges the results with the previously
extracted dataframe.
"""
url = endpoint
contacts = []
response = requests.request("GET", url,
headers=header,
params=query_string).json()
has_more = response["has-more"]
offset = response["vid-offset"]
# Get the contacts coming from the first response
contacts.extend(response['contacts'])
while has_more:
querystring = {"limit": "100",
"archived": "false", "offset": offset}
try:
response = requests.request("GET", url,
headers=header,
params=querystring).json()
# Update the looping condition in every response
has_more = response["has-more"]
contacts.extend(response['contacts'])
time.sleep(10)
except (requests.exceptions.ConnectionError, json.decoder.JSONDecodeError) as j:
logger.error(f"Error occurred: {j}.")
break
contacts = pd.json_normalize(contacts)
merged = pd.concat([df, contacts])
# Reset the dataframe index after concatenating
merged.reset_index(drop=True, inplace=True)
return merged
It can be refactored by having all requests inside the while loop, to avoid duplication, but it is not clear how you want to handle the query_string parameter, so I left it as it is. Then, the test code could be something like this:
#patch('my_project.task.extractor.requests.request')
def test_get_paged_contacts(request_mock):
request_mock.return_value.json.side_effect = [
PAGED_CONTACT_RECORD,
PAGED_CONTACT_RECORD,
PAGED_CONTACT_RECORD,
CONTACT_RECORD,
]
expected_df = pd.DataFrame(EXPECTED_CONTACT_RECORD)
input_df = pd.DataFrame()
res = get_paged_contacts('dummy_endpoint', None, input_df)
assert request_mock.call_count == 4
assert_frame_equal(res, expected_df)
The assert_frame_equal function is a utility provided by pandas to check two dataframes for equality. It is particularly useful for unit testing with pandas dataframes. You can check it here. Of course, you need to import it with from pandas.testing import assert_frame_equal
I'm trying to get a dataframe from a api response.
For optimization I run parallels threads, but the time is really high.
An code example:
def parall_func(tuple):
output = pd.DataFrame()
list_caracts = list(map(str,tuple[2]))
item = [(tuple[1])]
q = len(list_caracts)
headers = {
'Content-Type':'application/json'
}
raw_data = json.dumps(
{"item": item,"list_caracts": list_caracts, "sizePage":q, "numberPage":1}
)
try:
url = "https://thisisaurl.com/rep/store"
response = requests.get(url,headers=headers,data=raw_data)
resp_to_json = json.loads(response.text)
for i in resp_to_json['tag']:
output = output.append([i])
except:
print("Error: ", sys.exc_info()[0])
raise
return output
pool = Threads(cpu_count())
df_parall=list(pool.imap(parall_func, df_queries.itertuples(name=None)))
pool.close()
Final=pd.concat(df_parall, ignore_index=True)
can you help me to correct or suggest another logic or structure different to pandas
the final response has at about 3 millions of records
After I can get the structure i need do some of calcs and then connect to a db with pyodbc to save the data
The two things I would try are:
Create a requests.Session instance and use that to issue your GET requests. According to the documentation for this:
The Session object allows you to persist certain parameters across requests. It also persists cookies across all requests made from the Session instance, and will use urllib3’s connection pooling. So if you’re making several requests to the same host, the underlying TCP connection will be reused, which can result in a significant performance increase (see HTTP persistent connection).
Since you are using multithreading, limiting yourself to only a number of threads equal to the number of cores you have will result in under performance. Try creating 500 threads. The only issue becomes whether the website will not complain that too many requests per second are being made.
By the way, you source had an indentation error. I have supplied missing import statements as I suppose they should be and I have renamed argument tuple to tpl since tuple is a built-in type and you should not redefine built-in types without a good reason.
from multiprocessing.pool import ThreadPool as Threads
from requests import Session
from functools import partial
import pandas as pd
import sys
def parall_func(session, tpl):
output = pd.DataFrame()
list_caracts = list(map(str,tpl[2]))
item = [(tpl[1])]
q = len(list_caracts)
raw_data = json.dumps(
{"item": item,"list_caracts": list_caracts, "sizePage":q, "numberPage":1}
)
try:
url = "https://thisisaurl.com/rep/store"
response = session.get(url, data=raw_data)
resp_to_json = json.loads(response.text)
for i in resp_to_json['tag']:
output = output.append([i])
except:
print("Error: ", sys.exc_info()[0])
raise
return output
with Session() as session:
headers = {
'Content-Type':'application/json'
}
session.headers = headers
pool = Threads(500)
df_parall=list(pool.imap(partial(parall_func, session), df_queries.itertuples(name=None)))
pool.close()
Final=pd.concat(df_parall, ignore_index=True)
Update
One additional thing you can try is to replace creating variable output by doing multiple append operations with a single concat:
def parall_func(session, tpl):
list_caracts = list(map(str,tpl[2]))
item = [(tpl[1])]
q = len(list_caracts)
raw_data = json.dumps(
{"item": item,"list_caracts": list_caracts, "sizePage":q, "numberPage":1}
)
try:
url = "https://thisisaurl.com/rep/store"
response = session.get(url, data=raw_data)
resp_to_json = json.loads(response.text)
dataframes = [pd.DataFrame([i]) for i in resp_to_json['tag']]
output = pd.concat(dataframes)
except:
print("Error: ", sys.exc_info()[0])
raise
return output
If the above doesn't improve performance, one last thing to try is to have the creation of the dataframes done using multiprocessing:
from multiprocessing.pool import ThreadPool as Threads, Pool as MultiProcessingPool
from requests import Session
from functools import partial
import pandas as pd
import sys
def create_data_frames(response):
resp_to_json = json.loads(response.text)
dataframes = [pd.DataFrame([i]) for i in resp_to_json['tag']]
# Perhaps you might want to specify ignore_index=True on the following:
output = pd.concat(dataframes)
return output
def parall_func(session, multiprocessing_pool, tpl):
list_caracts = list(map(str,tpl[2]))
item = [(tpl[1])]
q = len(list_caracts)
raw_data = json.dumps(
{"item": item,"list_caracts": list_caracts, "sizePage":q, "numberPage":1}
)
try:
url = "https://thisisaurl.com/rep/store"
response = session.get(url, data=raw_data)
output = multiprocessing_pool.apply(create_data_frames, args=(response,))
except:
print("Error: ", sys.exc_info()[0])
raise
return output
with Session() as session:
headers = {
'Content-Type':'application/json'
}
session.headers = headers
multiprocessing_pool = MultiProcessingPool()
pool = Threads(500)
df_parall=list(pool.imap(partial(parall_func, session, multiprocessing_pool), df_queries.itertuples(name=None)))
multiprocessing_pool.close()
multiprocessing_pool.join()
pool.close()
pool.join()
Final=pd.concat(df_parall, ignore_index=True)
This output should be way longer than it is in here.
I start with a GET request, I parse a JSON list and extract the id, which I then call on the second function, that will give me a second ID which then I will use to call on the 3rd function. But, I am only getting one entry whereas I should be getting way more entries.
The code is the following:
from requests.auth import HTTPBasicAuth
import requests
import json
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def countries():
data = requests.get("https://localhost:8543/api/netim/v1/countries/", verify=False, auth=HTTPBasicAuth("admin", "admin"))
rep = data.json()
return [elem.get("id","") for elem in rep['items']]
def regions():
for c in countries():
url = requests.get("https://localhost:8543/api/netim/v1/countries/{}/regions".format(c), verify=False, auth=HTTPBasicAuth("admin", "admin"))
response = url.json()
return [cid.get("id","") for cid in response['items']]
def city():
for r in regions():
api = requests.get("https://localhost:8543/api/netim/v1/regions/{}/cities".format(r), verify=False, auth=HTTPBasicAuth("admin", "admin"))
resolt = api.json()
return(json.dumps([{"name":r.get("name",""),"id":r.get("id", "")} for r in resolt['items']], indent=4))
city()
print(city())
The output is the following :
[
{
"name": "Herat",
"id": "AF~HER~Herat"
}
]
I should have a huge list, so I am not sure what am I missing?
You need to go through all the iterations of your loop and collect the results, then jsonify the and return them.
data = []
for r in regions():
api = requests.get("https://localhost:8543/api/netim/v1/regions/{}/cities".format(r), verify=False, auth=HTTPBasicAuth("admin", "admin"))
resolt = api.json()
data.extend([{"name":r.get("name",""),"id":r.get("id", "")} for r in resolt['items']])
return json.dumps(data, indent=4)
This would be a fix for city() but you have the same problem in all your functions. return immediately exits the function and does not do anything else, effectively all your for loops are doing 1 iteration.
I'll update my example here to give you a better idea what's occurring.
Your functions are basically this:
def test_fn():
for i in [1,2,3,4]:
return i
# output:
1
# We never see 2 or 3 or 4 because we return before looping on them.
What you want:
def test_fn():
results = []
for i in [1,2,3,4]:
results.append(i)
return results
# output
[1,2,3,4]
It seems like you understand that the for loop is going to take some action once for each element in the list. What you're not understanding is that return ends the function NOW. No more for loop, no more actions, and in your code, you immediately return inside the for loop, stopping any further action.
I am a beginner when it comes to writing tests and mocking.
I have a created two modules. One module object (Site) creates another object from my second module (Item) on init. The Item object makes a call to an API endpoint to get some data using requests.
I want to Mock the API call I am making so I can test things like a bad response and importantly have control over the response data.
I have simplified my code and put below. When I run the test I get back the actual response data and not what I have mocked.
I have a feeling I am not putting my Mock in the right place. Also, I have seen lots of people saying to use #unittest.patch annotation. I am not clear if I should be using that here.
So I am looking for how to get _get_range_detail to actually return a Mocked response from requests and also just general feedback on if it looks like I am approaching this the right way.
# hello_world.py
from mymodule.site import Site
sites = [
dict(
name="site1",
ranges=[
"range1",
"range2"
]
)
]
site_object = Site(sites[0]['name'], sites[0]['ranges'])
for i in site_object.get_ranges_objects():
print(i.range_detail)
# site.py
from mymodule.item import Item
class Site:
def __init__(self, name, ranges):
self.name = name
self.ranges = ranges
self.ranges_objects = []
for my_range in ranges:
self.ranges_objects.append(Item(my_range))
def get_ranges_objects(self):
return self.ranges_objects
# item.py
import requests
class Item:
def __init__(self, range_name):
self.range_name = range_name
self.range_detail = self._get_range_detail(self.range_name)
def _get_range_detail(self, range_name):
uri = "https://postman-echo.com/get?some_cool_value=real_value"
try:
r = requests.get(uri)
if r.status_code == 200:
return r.json()['args']
else:
return None
except Exception as e:
print(e)
exit(1)
# test_site.py
import pytest
from mymodule.site import Site
from unittest import mock
from mymodule.item import requests
def test_get_ranges_objects():
sites = [
dict(
name="site1",
ranges=[
"range1",
"range2"
]
)
]
requests = mock.Mock()
requests.status_code = 200
requests.json.return_value = {
'args': {'some_mock_value': 'mocky'}
}
site_object = Site(sites[0]['name'], sites[0]['ranges'])
assert site_object.name == "site1"
assert isinstance(site_object.ranges_objects, list)
assert site_object.ranges_objects[0].range_detail == dict(some_mock_value='mocky')
You can use pytest-mock. it makes mocking in pytest simple. (pip install pytest-mock)
You should replace requests.get. Simply
requests_get = mock.patch('requests.get').start()
If you use pytest-mock,
requests_get = mocker.patch('requests.get')
Rewrote test case using pytest-mock
# test_site.py
import pytest
from mymodule.site import Site
from unittest import mock
#pytest.fixture
def requests_get(mocker):
requests_get = mocker.patch('requests.get')
yield requests_get
def test_get_ranges_objects(mocker, requests_get):
response = mocker.MagicMock()
response.status_code = 200
response.json.return_value = {'args': {'some_mock_value': 'mocky'}}
requests_get.return_value = response
sites = [
dict(
name="site1",
ranges=[
"range1",
"range2"
]
)
]
site_object = Site(sites[0]['name'], sites[0]['ranges'])
assert site_object.name == "site1"
assert isinstance(site_object.ranges_objects, list)
assert site_object.ranges_objects[0].range_detail == {'some_mock_value': 'mocky'}
Hello I designed a python script that works locally however I would like to push it to AWS Lambda, I'm having some issues specifically with creating the handler within a class. I have figured out how to get rid of the 'handler error' in lambda by creating the handler function outside of the class but unfortunately that doesn't run the rest of my code. My goal is to place the "lambda_handler" function either inside my class or have the function call the class. Any advice is really appreciated!
#!/usr/bin/python
import sys
import os
import json
import time
from datetime import datetime, timedelta
key = 'OKTA_AUTH'
### key = os.environ['OKTA_AUTH'] #####
outcome = 'outcome.result eq "FAILURE"'
event_type = 'eventType eq "application.provision.user.deactivate"'
app_id = 'target.id eq "SOME OKTA APP ID"'
all_params = f'{event_type} and {app_id} and {outcome}'
api_url = 'https://domain.okta.com/api/v1/logs'
slack_url = "SLACK URL"
last_hour_date_time = datetime.utcnow() - timedelta(days=10)
since = str(last_hour_date_time.strftime('%Y-%m-%dT%H:%M:%S.000Z'))
actor_list=[]
unique_list=[]
class Events:
def lambda_handler(event, context):
okta_auth()
def okta_auth(self):
event_list=[]
url = api_url.format()
params = {
'filter': all_params,
'since': since
}
response = requests.get(url, params=params,
headers={'Accept': 'application/json',
'authorization': key})
response_json = response.json()
for event_data in response_json:
events = event_data['outcome']['reason']
event_list.append(events)
actors = event_data['actor']['alternateId']
actor_list.append(actors)
unique_set = set(actor_list)
unique_list.append(unique_set)
if event_list != []:
self.post_slack()
else:
sys.exit(0)
def post_slack(self):
url = slack_url.format()
payload = "{\"text\": \" Twillio Flex provisioing failure. Please check the following users %s \"}" % (unique_list)
requests.post(url, headers={'Accept': 'application/json'}, data=payload)
### newly added code
if __name__ == "__main__":
Events().lambda_handler()
### end
####ORIGINAL CODE USED TO BE
#if __name__ == "__main__":
# Events().okta_auth()
After some solid studying, I discovered I was running into two issues with my code and how AWS Lambda works. The first issue was how I was calling the class in Lambda. I though that you had to have the function inside the class, but instead I created a function to run the class.
def lambda_handler(event, context):
Events().okta_auth() #### the function (okta_auth) within the class (Events)
My second issue was deployment via inline code. Lambda does not have the requests module installed by default, so I created a local directory, where I then pip3 installed requests, and moved the python script to. You can then zip the folder contents and upload to aws lambda.
mkdir lambda_deploy
pip3 install --target ./lambda_deploy/ requests
cd lambda_deploy/
zip -r9 ${OLDPWD}/function.zip .
heres the final code below for reference.
#!/usr/bin/python
import requests
import sys
import os
import json
import time
from datetime import datetime, timedelta
key = os.environ['OKTA_AUTH']
outcome = 'outcome.result eq "FAILURE"'
event_type = 'eventType eq "application.provision.user.deactivate"'
target_type = 'target.type eq "User"'
app_id = 'target.id eq "SOME APP ID"'
all_params = f'{event_type} and {target_type} and {app_id} and {outcome}'
api_url = f'https://domain.okta.com/api/v1/logs'
slack_url = "some slack WEBHOOK URL"
last_hour_date_time = datetime.utcnow() - timedelta(days=1)
since = str(last_hour_date_time.strftime('%Y-%m-%dT%H:%M:%S.000Z'))
unique_set=[]
def lambda_handler(event, context):
Events().okta_auth()
class Events:
def okta_auth(self):
event_list=[]
url = api_url.format()
params = {
'filter': all_params,
'since': since
}
response = requests.get(url, params=params,
headers={'Accept': 'application/json',
'authorization': key})
response_json = response.json()
for event_data in response_json:
events = event_data['outcome']['reason']
targets = event_data['target']
parse = list(map(lambda x: x['alternateId'], targets))
target_list=[]
event_list.append(events)
target_list.append(parse[1])
for item in target_list:
if item not in unique_set:
unique_set.append(item)
if event_list != []:
self.post_slack()
else:
print("no errors today")
def post_slack(self):
url = slack_url.format()
payload = "{\"text\": \" Twilio Flex provisioing failure. Please check the following users: \n %s \"}" % '\n'.join(unique_set)
requests.post(url, headers={'Accept': 'application/json'}, data=payload)
Your function
def lambda_handler(event, context):
print(event)
only prints the event and does not execute anything else. I guess that's why the lambda is not doing anything. The lambda_handler is the entry point of your lambda.