zoom api cmeeting created with wrong start date - python

I'm working on an elearning website and I'm trying to integrate Zoom meetings using the API
According to the official documentation, the start_time must be set to the yyyy-MM-ddTH:M:S.
Example : 2020-10-02T18:00:00
Based on that, this is the code I'm using.
class Zoom:
...
def parse_date(self, date):
parts = date.strip().split(' ')
part1 = parts[0]
part2 = parts[1]
parts1 = part1.split('/')
day = parts1[0]
month = parts1[1]
year = parts1[2]
parts2 = part2.split(':')
h = parts2[0]
m = parts2[1]
formatted_date = year + '-' + month + '-' + day + 'T' + h + ':' + m + ':00Z'
return formatted_date
def create_meeting(self, topic, start_date, password):
token = self.get_token()
conn = http.HTTPSConnection(Zoom.ZOOM_API_URL)
headers = {'authorization': "Bearer " + token, 'content-type': "application/json"}
data = {'topic': topic, 'type': 2, 'start_time': self.parse_date(start_date), 'timezone': 'Africa/Casablanca', 'password': password}
conn.request("POST", "/v2/users/me/meetings", json.dumps(data), headers)
response = json.loads(conn.getresponse().read().decode('utf-8'))
return response
zoom = Zoom('API_KEY', 'API_SECRET')
meeting = zoom.create_meeting(topic='Learning test', start_date='02/10/2020 18:00', password='123456')
The meeting is created but the start date is ignored as shown in the image
As you can see I specified 6 PM as a start date but it's 7 PM.

It seems the problem was caused by the Z at the end of the date. After removing it the date hour is no longer incremented.

Related

Separate column items by pipe instead of comma

I made a scraper for yellow pages in python. There is a table with working hours of the businesses listed. I scrape that into a list and save it in a csv using scrapy. These different items are seperated by a comma by default like
Mon,Closed,Tue - Fri ,9 00 am - 6:00 pm,Sat ,9 00 am - 1:00 pm,Sun,Closed
I want to use a pipe (|) instead of commas. So the final list be like this: Mon,Closed| Tue - Fri ,9 00 am - 6:00 pm|Sat ,9 00 am - 1:00 pm|Sun,Closed
Any help on how should i implement this would be appreciated.Following is my parse method:
def parse_item(self, response):
item = YellowItem()
item['keyword'] = category_i
item['title'] = response.xpath('//h1/text()').extract_first()
item['phone'] = response.xpath('//p[#class="phone"]/text()').extract_first()
addr = response.xpath('//h2[#class="address"]/text()').extract_first()
item['street_address'] = addr
email = response.xpath('//a[#class="email-business"]/#href').extract_first()
try:
item['email'] = email.replace("mailto:", '')
except AttributeError:
pass
item['website'] = response.xpath('//a[#class="primary-btn website-link"]/#href').extract_first()
item['Description'] = response.xpath('//dd[#class="general-info"]/text()').extract_first()
hours = response.xpath(
'//div[#class="open-details"]/descendant-or-self::*/text()[not(ancestor::*['
'#class="hour-category"])]').extract()
t_f_h = []
for hour in hours:
data = re.findall(r'(\d{1,2}:\d{2})\s(AM|PM|am|pm)', hour)
if data:
time = data[0][0] + " " + data[0][1]
time_t = data[1][0] + " " + data[1][1]
d = time
t = pd.to_datetime(d).strftime('%H:%M')
start = t
d_t = time_t
time_d = pd.to_datetime(d_t).strftime('%H:%M')
end = time_d
fin_t = hour.replace(time, start)
m_f_t = fin_t.replace(time_t, end)
t_f_h.append(m_f_t)
if not data:
t_f_h.append(hour)
item['t_hour_format'] = t_f_h
try:
clean_l = []
for hour in hours:
clean_st = hour.replace(":", " ", 1)
clean_l.append(clean_st)
item['Hours'] = clean_l
except AttributeError:
pass
item['Other_info'] = response.xpath(
'//dd[#class="other-information"]/descendant-or-self::*/text()').extract()
category_ha = response.xpath('//dd[#class="categories"]/descendant-or-self::*/text()').extract()
item['Categories'] = " ".join(category_ha)
item['Years_in_business'] = response.xpath('//div[#class="number"]/text()').extract_first()
year = item['Years_in_business']
if year:
opened = 2020 - int(year) # change the year here
item['year_opened'] = 'Year Opened: ' + str(opened)
neighborhood = response.xpath('//dd[#class="neighborhoods"]/descendant-or-self::*/text()').extract()
item['neighborhoods'] = ' '.join(neighborhood)
item['other_links'] = response.xpath('//dd[#class="weblinks"]/descendant-or-self::*/text()').extract()
item['BBB_Grade'] = response.xpath('//span[#class="bbb-no-link"]/text()').extract_first()
item['link_to_the_listing'] = response.url
adress = str(addr)
data = usaddress.tag(adress)
if "PlaceName" in data[0].keys():
item["City"] = data[0]["PlaceName"]
if "StateName" in data[0].keys():
item["State"] = data[0]["StateName"]
if "ZipCode" in data[0].keys():
item["Zip"] = data[0]["ZipCode"]
return item
You could write your own exporter based on CsvItemExporter.
from scrapy.exporters import CsvItemExporter
class MyExporter(CsvItemExporter):
def __init__(self, *args, **kwargs):
kwargs['delimiter'] = '|'
super(MyExporter, self).__init__(*args, **kwargs)
You can then set your new ItemExporter in your project's settings.py.
FEED_EXPORTERS = {
'csv': 'my_project.file_containing_exporter.MyExporter'
}

"if' and "else" being ignored

As per the title, my if/else below are not being considered — not sure why.
Here is my code:
cursor.execute("SELECT epic, MAX(timestamp) FROM market_data GROUP BY epic")
epics=(
"KA.D.MXUSLN.DAILY.IP",
"CS.D.BITCOIN.TODAY.IP",
"CS.D.CRYPTOB10.TODAY.IP")
for row in cursor:
for epic in epics:
# If epic exists in the market_data table then take the max timestamp and request new data with date1=maxtimestamp+1min and date2=now()
if epic in row['epic']:
date1 = row['max'] + datetime.timedelta(minutes=1)
date2 = datetime.datetime.now()
else:
# if epic not already in market_data table then fresh new request with date1=now() and date2=now()+1min
date1 = datetime.datetime.now()
date2 = datetime.datetime.now() + datetime.timedelta(minutes=1)
# URL PRODUCTION/LIVE Enviroment - demo most likely throttled and limited
fmt = "https://example.com/" + str(epic) + "/1/MINUTE/batch/start/{date1:%Y/%m/%d/%H/%M/0/0}/end/{date2:%Y/%m/%d/%H/%M/%S/0}?format=json"
# while date1 <= date2:
url = fmt.format(epic, date1=date1, date2=date2)
resp = requests.get(url, headers=headers)
print(url)
The output of cursor is:
CS.D.BITCOIN.TODAY.IP 2019-05-01 00:00:00
KA.D.MXUSLN.DAILY.IP 2020-02-14 14:26:00
The code above outputs this:
https://example.com/CS.D.BITCOIN.TODAY.IP/start/2019/05/01/00/01/0/0/end/2020/02/14/15/10/44/0?format=json
https://example.com/CS.D.CRYPTOB10.TODAY.IP/start/2020/02/14/15/10/0/0/end/2020/02/14/15/11/44/0?format=json
https://example/KA.D.MXUSLN.DAILY.IP/start/2020/02/14/14/27/0/0/end/2020/02/14/15/10/44/0?format=json
https://example.com/CS.D.BITCOIN.TODAY.IP/start/2020/02/14/15/10/0/0/end/2020/02/14/15/11/44/0?format=json
https://example.com/CS.D.CRYPTOB10.TODAY.IP/start/2020/02/14/15/10/0/0/end/2020/02/14/15/11/44/0?format=json
Note - as, epics "KA.D.MXUSLN.DAILY.IP" and "CS.D.BITCOIN.TODAY.IP are already in cursor, I expect the output to just be:
https://example.com/CS.D.BITCOIN.TODAY.IP/start/2019/05/01/00/01/0/0/end/2020/02/14/15/10/44/0?format=json
https://example.com/CS.D.CRYPTOB10.TODAY.IP/start/2020/02/14/15/10/0/0/end/2020/02/14/15/11/44/0?format=json
https://example/KA.D.MXUSLN.DAILY.IP/start/2020/02/14/14/27/0/0/end/2020/02/14/15/10/44/0?format=json
Why aren't my if and else being considered?
It is considered, but then you continue to iterate over the other epics anyway and print those too. You could use next instead of your inner for loop, if you find a match, remove it from the list of epics. and then any remaining epics can be handled afterwards as required
for row in cursor:
epic = next(epic for epic in epics if epic in row["epic"])
if epic is not None:
date1 = row['max'] + datetime.timedelta(minutes=1)
date2 = datetime.datetime.now()
epics.remove(epic)
else:
date1 = datetime.datetime.now()
date2 = datetime.datetime.now() + datetime.timedelta(minutes=1)
# URL PRODUCTION/LIVE Enviroment - demo most likely throttled and limited
fmt = "https://example.com/" + str(epic) + "/1/MINUTE/batch/start/{date1:%Y/%m/%d/%H/%M/0/0}/end/{date2:%Y/%m/%d/%H/%M/%S/0}?format=json"
# while date1 <= date2:
url = fmt.format(epic, date1=date1, date2=date2)
resp = requests.get(url, headers=headers)
print(url)
Note: This leaves an issue where your fmt url will contain None, if there are no matches, not sure how you wish to handle this.

Proper way to format date for Fedex API XML

I have a Django application where I am trying to make a call to Fedex's API to send out a shipping label for people wanting to send in a product for cash. When I try to make the call though it says there is a data validation issue with the Expiration field in the XML I am filling out. I swear this has worked in the past with me formatting the date as "YYYY-MM-DD", but now it is not. I read that with Fedex, you need to format the date as ISO, but that is also not passing the data validation. I am using a python package created to help with tapping Fedex's API.
Django view function for sending API Call
def Fedex(request, quote):
label_link = ''
expiration_date = datetime.datetime.now() + datetime.timedelta(days=10)
# formatted_date = "%s-%s-%s" % (expiration_date.year, expiration_date.month, expiration_date.day)
formatted_date = expiration_date.replace(microsecond=0).isoformat()
if quote.device_type != 'laptop':
box_length = 9
box_width = 12
box_height = 3
else:
box_length = 12
box_width = 14
box_height = 3
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
## Page 411 of FedEx Dev Guide - 20.14 Email Labels
CONFIG_OBJ = FedexConfig(key=settings.FEDEX_KEY, password=settings.FEDEX_PASSWORD, account_number=settings.FEDEX_ACCOUNT,
meter_number=settings.FEDEX_METER, use_test_server=settings.USE_FEDEX_TEST)
fxreq = FedexCreatePendingShipRequestEmail(CONFIG_OBJ, customer_transaction_id='xxxxxx id:01')
fxreq.RequestedShipment.ServiceType = 'FEDEX_GROUND'
fxreq.RequestedShipment.PackagingType = 'YOUR_PACKAGING'
fxreq.RequestedShipment.DropoffType = 'REGULAR_PICKUP'
fxreq.RequestedShipment.ShipTimestamp = datetime.datetime.now()
# Special fields for the email label
fxreq.RequestedShipment.SpecialServicesRequested.SpecialServiceTypes = ('RETURN_SHIPMENT', 'PENDING_SHIPMENT')
fxreq.RequestedShipment.SpecialServicesRequested.PendingShipmentDetail.Type = 'EMAIL'
fxreq.RequestedShipment.SpecialServicesRequested.PendingShipmentDetail.ExpirationDate = formatted_date
email_address = fxreq.create_wsdl_object_of_type('EMailRecipient')
email_address.EmailAddress = quote.email
email_address.Role = 'SHIPMENT_COMPLETOR'
# RETURN SHIPMENT DETAIL
fxreq.RequestedShipment.SpecialServicesRequested.ReturnShipmentDetail.ReturnType = ('PENDING')
fxreq.RequestedShipment.SpecialServicesRequested.ReturnShipmentDetail.ReturnEMailDetail = fxreq.create_wsdl_object_of_type(
'ReturnEMailDetail')
fxreq.RequestedShipment.SpecialServicesRequested.ReturnShipmentDetail.ReturnEMailDetail.MerchantPhoneNumber = 'x-xxx-xxx-xxxx'
fxreq.RequestedShipment.SpecialServicesRequested.PendingShipmentDetail.EmailLabelDetail.Recipients = [email_address]
fxreq.RequestedShipment.SpecialServicesRequested.PendingShipmentDetail.EmailLabelDetail.Message = "Xxxxxx Xxxxxx"
fxreq.RequestedShipment.LabelSpecification = {'LabelFormatType': 'COMMON2D', 'ImageType': 'PDF'}
fxreq.RequestedShipment.Shipper.Contact.PersonName = quote.first_name + ' ' + quote.last_name
fxreq.RequestedShipment.Shipper.Contact.CompanyName = ""
fxreq.RequestedShipment.Shipper.Contact.PhoneNumber = quote.phone
fxreq.RequestedShipment.Shipper.Address.StreetLines.append(quote.address)
fxreq.RequestedShipment.Shipper.Address.City = quote.city
fxreq.RequestedShipment.Shipper.Address.StateOrProvinceCode = quote.state
fxreq.RequestedShipment.Shipper.Address.PostalCode = quote.zip
fxreq.RequestedShipment.Shipper.Address.CountryCode = settings.FEDEX_COUNTRY_CODE
fxreq.RequestedShipment.Recipient.Contact.PhoneNumber = settings.FEDEX_PHONE_NUMBER
fxreq.RequestedShipment.Recipient.Address.StreetLines = settings.FEDEX_STREET_LINES
fxreq.RequestedShipment.Recipient.Address.City = settings.FEDEX_CITY
fxreq.RequestedShipment.Recipient.Address.StateOrProvinceCode = settings.FEDEX_STATE_OR_PROVINCE_CODE
fxreq.RequestedShipment.Recipient.Address.PostalCode = settings.FEDEX_POSTAL_CODE
fxreq.RequestedShipment.Recipient.Address.CountryCode = settings.FEDEX_COUNTRY_CODE
fxreq.RequestedShipment.Recipient.AccountNumber = settings.FEDEX_ACCOUNT
fxreq.RequestedShipment.Recipient.Contact.PersonName = ''
fxreq.RequestedShipment.Recipient.Contact.CompanyName = 'Xxxxxx Xxxxxx'
fxreq.RequestedShipment.Recipient.Contact.EMailAddress = 'xxxxxx#xxxxxxxxx'
# Details of Person Who is Paying for the Shipping
fxreq.RequestedShipment.ShippingChargesPayment.PaymentType = 'SENDER'
fxreq.RequestedShipment.ShippingChargesPayment.Payor.ResponsibleParty.AccountNumber = settings.FEDEX_ACCOUNT
fxreq.RequestedShipment.ShippingChargesPayment.Payor.ResponsibleParty.Contact.PersonName = 'Xxxxx Xxxxx'
fxreq.RequestedShipment.ShippingChargesPayment.Payor.ResponsibleParty.Contact.CompanyName = 'Xxxxx Xxxxxx'
fxreq.RequestedShipment.ShippingChargesPayment.Payor.ResponsibleParty.Contact.PhoneNumber = 'x-xxx-xxx-xxxx'
fxreq.RequestedShipment.ShippingChargesPayment.Payor.ResponsibleParty.Contact.EMailAddress = 'xxxxxxx#xxxxxxxxx'
fxreq.RequestedShipment.ShippingChargesPayment.Payor.ResponsibleParty.Address.StreetLines = 'Xxxxx N. xXxxxxx'
fxreq.RequestedShipment.ShippingChargesPayment.Payor.ResponsibleParty.Address.City = 'Xxxxxxx'
fxreq.RequestedShipment.ShippingChargesPayment.Payor.ResponsibleParty.Address.StateOrProvinceCode = 'XX'
fxreq.RequestedShipment.ShippingChargesPayment.Payor.ResponsibleParty.Address.PostalCode = 'xxxxx'
fxreq.RequestedShipment.ShippingChargesPayment.Payor.ResponsibleParty.Address.CountryCode = 'US'
# Package Info
package1 = fxreq.create_wsdl_object_of_type('RequestedPackageLineItem')
package1.SequenceNumber = '1'
package1.Weight.Value = 1
package1.Weight.Units = "LB"
package1.Dimensions.Length = box_length
package1.Dimensions.Width = box_width
package1.Dimensions.Height = box_height
package1.Dimensions.Units = "IN"
package1.ItemDescription = 'Phone'
fxreq.RequestedShipment.RequestedPackageLineItems.append(package1)
fxreq.RequestedShipment.PackageCount = '1'
try:
fxreq.send_request()
label_link = str(fxreq.response.CompletedShipmentDetail.AccessDetail.AccessorDetails[0].EmailLabelUrl)
except Exception as exc:
print('Fedex Error')
print('===========')
print(exc)
print('==========')
return label_link
Error Log
Error:cvc-datatype-valid.1.2.1: \\'2017-11-3\\' is not a valid value for \\'date\\'.\\ncvc-type.3.1.3: The value \\'2017-11-3\\' of element \\'ns0:ExpirationDate\\' is not valid."\\n }\\n }' (Error code: -1)

getting an empty array from a request to eventful api

Im trying to use the eventful api to get information about only music events (concerts) between two dates. For example I want to get the below information about each concert from 20171012 to 20171013:
- city
- performer
- country
- latitude
- longitude
- genre
- title
- image
- StarTime
Im using a python example available online and change it to get the data above. But for now its not working Im just able to get this information:
{'latitude': '40.4',
'longitude': '-3.68333',
'start_time': '2017-10-12 20:00:00',
'city_name': 'Madrid', 'title': 'Kim Waters & Maysa Smooth en Hot Jazz Festival'}
But the performer, genre country and image url its not working. Do you know how to get that information? When I change the python example below to get this information it returns always a empty array.
python example working: (However, without getting the performer, genre, country and image url, if I add theese elements to the event_features I get an empty array)
import requests
import datetime
def get_event(user_key, event_location , start_date, end_date, event_features, fname):
data_lst = [] # output
start_year = int(start_date[0:4])
start_month = int(start_date[4:6])
start_day = int(start_date[6:])
end_year = int(end_date[0:4])
end_month = int(end_date[4:6])
end_day = int(end_date[6:])
start_date = datetime.date(start_year, start_month, start_day)
end_date = datetime.date(end_year, end_month, end_day)
step = datetime.timedelta(days=1)
while start_date <= end_date:
date = str(start_date.year)
if start_date.month < 10:
date += '0' + str(start_date.month)
else:
date += str(start_date.month)
if start_date.day < 10:
date += '0' + str(start_date.day)
else:
date += str(start_date.day)
date += "00"
date += "-" + date
url = "http://api.eventful.com/json/events/search?"
url += "&app_key=" + user_key
url += "&location=" + event_location
url += "&date=" + date
url += "&page_size=250"
url += "&sort_order=popularity"
url += "&sort_direction=descending"
url += "&q=music"
url+= "&c=music"
data = requests.get(url).json()
try:
for i in range(len(data["events"]["event"])):
data_dict = {}
for feature in event_features:
data_dict[feature] = data["events"]["event"][i][feature]
data_lst.append(data_dict)
except:
pass
print(data_lst)
start_date += step
def main():
user_key = ""
event_location = "Madrid"
start_date = "20171012"
end_date = "20171013"
event_location = event_location.replace("-", " ")
start_date = start_date
end_date = end_date
event_features = ["latitude", "longitude", "start_time"]
event_features += ["city_name", "title"]
event_fname = "events.csv"
get_event(user_key, event_location, start_date, end_date, event_features, event_fname)
if __name__ == '__main__':
main()
You should debug your problem and not to ignore all exceptions.
Replace lines try: ... except: pass by:
data = requests.get(url).json()
if "event" in data.get("event", {}):
for row in data["events"]["event"]:
# print(row) # you can look here what are the available data, while debugging
data_dict = {feature: row[feature] for feature in features}
data_lst.append(data_dict)
else:
pass # a problem - you can do something here
You will see a KeyError with a name of the missing feature that is not present in "row". You should fix missing features and read documentation about API of that service. Country feature is probably "country_name" similarly to "city_name". Maybe you should set the "include" parameter to specify more sections of details in search than defaults only.
An universal try: ... except: pass should never used, because "Errors should never pass silently." (The Zen of Python)
Read Handling Exceptions:
... The last except clause may omit the exception name(s), to serve as a wildcard. Use this with extreme caution, since it is easy to mask a real programming error in this way! ...
A more important command where unexpected exceptions are possible is requests.get(url).json(), e.g. TimeoutException. Anyway you should not continue the "while" loop if there is a problem.
If you look at the data returned by eventful.com, a few things are clear:
For country, the field to be used is country_name. This was missing from your "event_features" list
There can be multiple performers for each event. To get all the performers, you need to add "performers" to your "event_features" list
There is no field named Genre and hence you cannot find Genre
The "image" field is always None. This means there is no image available.
Here is modified code. Hopefully it works much better and it will help you move forward.
import datetime
import requests
data_lst = [] # output
event_features = ["latitude", "longitude", "start_time", "city_name",
"country_name", "title", "image", "performers"]
def get_event(user_key, event_location, start_date, end_date):
start_year = int(start_date[0:4])
start_month = int(start_date[4:6])
start_day = int(start_date[6:])
end_year = int(end_date[0:4])
end_month = int(end_date[4:6])
end_day = int(end_date[6:])
start_date = datetime.date(start_year, start_month, start_day)
end_date = datetime.date(end_year, end_month, end_day)
step = datetime.timedelta(days=1)
while start_date <= end_date:
date = str(start_date.year)
if start_date.month < 10:
date += '0' + str(start_date.month)
else:
date += str(start_date.month)
if start_date.day < 10:
date += '0' + str(start_date.day)
else:
date += str(start_date.day)
date += "00"
date += "-" + date
url = "http://api.eventful.com/json/events/search?"
url += "&app_key=" + user_key
url += "&location=" + event_location
url += "&date=" + date
url += "&page_size=250"
url += "&sort_order=popularity"
url += "&sort_direction=descending"
url += "&q=music"
url += "&c=music"
data = requests.get(url).json()
print "==== Data Returned by eventful.com ====\n", data
try:
for i in range(len(data["events"]["event"])):
data_dict = {}
for feature in event_features:
data_dict[feature] = data["events"]["event"][i][feature]
data_lst.append(data_dict)
except IndexError:
pass
print "===================================="
print data_lst
start_date += step
def main():
user_key = "Enter Your Key Here"
event_location = "Madrid"
start_date = "20171012"
end_date = "20171013"
event_location = event_location.replace("-", " ")
start_date = start_date
end_date = end_date
#event_fname = "events.csv"
get_event(user_key, event_location, start_date, end_date)
if __name__ == '__main__':
main()
I was able to successfully pull data from the Eventful API for the performer, image, and country fields. However, I don't think the Eventful Search API supports genre - I don't see it in their documentation.
To get country, I added "country_name", "country_abbr" to your event_features array. That adds these values to the resulting JSON:
'country_abbr': u'ESP',
'country_name': u'Spain'
Performer also can be retrieved by adding "performers" to event_features. That will add this to the JSON output:
'performers': {
u'performer': {
u'name': u'Kim Waters',
u'creator': u'evdb',
u'url': u'http://concerts.eventful.com/Kim-Waters?utm_source=apis&utm_medium=apim&utm_campaign=apic',
u'linker': u'evdb',
u'short_bio': u'Easy Listening / Electronic / Jazz', u'id': u'P0-001-000333271-4'
}
}
To retrieve images, add image to the event_features array. Note that not all events have images, however. You will either see 'image': None or
'image': {
u'medium': {
u'url': u'http://d1marr3m5x4iac.cloudfront.net/store/skin/no_image/categories/128x128/other.jpg',
u'width': u'128',
u'height': u'128'
},
u'thumb': {
u'url': u'http://d1marr3m5x4iac.cloudfront.net/store/skin/no_image/categories/48x48/other.jpg',
u'width': u'48',
u'height': u'48'
}
}
Good luck! :)

Read data from OECD API into python (and pandas)

I'm trying to download data from OECD API (https://data.oecd.org/api/sdmx-json-documentation/) into python.
I managed to download data in SDMX-JSON format (and transform it to JSON) so far:
OECD_ROOT_URL = "http://stats.oecd.org/SDMX-JSON/data"
def make_OECD_request(dsname, dimensions, params = None, root_dir = OECD_ROOT_URL):
"""Make URL for the OECD API and return a response"""
"""4 dimensions: location, subject, measure, frequency"""
if not params:
params = {}
dim_args = ['+'.join(d) for d in dimensions]
dim_str = '.'.join(dim_args)
url = root_dir + '/' + dsname + '/' + dim_str + '/all'
print('Requesting URL ' + url)
return rq.get(url = url, params = params)
response = make_OECD_request('MEI'
, [['USA', 'CZE'], [], [], ['M']]
, {'startTime': '2009-Q1', 'endTime': '2010-Q1'})
if (response.status_code == 200):
json = response.json()
How can I transform the data set into pandas.DataFrame? I tried pandas.read_json() and pandasdmx library, but I was not able to solve this.
The documentation the original question points to does not (yet?) mention that the API accepts the parameter contentType, which may be set to csv. That makes it trivial to use with Pandas.
import pandas as pd
def get_from_oecd(sdmx_query):
return pd.read_csv(
f"https://stats.oecd.org/SDMX-JSON/data/{sdmx_query}?contentType=csv"
)
print(get_from_oecd("MEI_FIN/IRLT.AUS.M/OECD").head())
Update:
The function to automatically download the data from OECD API is now available in my Python library CIF (abbreviation for the Composite Indicators Framework, installable via pip):
from cif import cif
data, subjects, measures = cif.createDataFrameFromOECD(countries = ['USA'], dsname = 'MEI', frequency = 'M')
Original answer:
If you need your data in Pandas DataFrame format, it is IMHO better to send your request to OECD with additional parameter 'dimensionAtObservation': 'AllDimensions', which results in more comprehensive JSON file.
Use following functions to download the data:
import requests as rq
import pandas as pd
import re
OECD_ROOT_URL = "http://stats.oecd.org/SDMX-JSON/data"
def make_OECD_request(dsname, dimensions, params = None, root_dir = OECD_ROOT_URL):
# Make URL for the OECD API and return a response
# 4 dimensions: location, subject, measure, frequency
# OECD API: https://data.oecd.org/api/sdmx-json-documentation/#d.en.330346
if not params:
params = {}
dim_args = ['+'.join(d) for d in dimensions]
dim_str = '.'.join(dim_args)
url = root_dir + '/' + dsname + '/' + dim_str + '/all'
print('Requesting URL ' + url)
return rq.get(url = url, params = params)
def create_DataFrame_from_OECD(country = 'CZE', subject = [], measure = [], frequency = 'M', startDate = None, endDate = None):
# Request data from OECD API and return pandas DataFrame
# country: country code (max 1)
# subject: list of subjects, empty list for all
# measure: list of measures, empty list for all
# frequency: 'M' for monthly and 'Q' for quarterly time series
# startDate: date in YYYY-MM (2000-01) or YYYY-QQ (2000-Q1) format, None for all observations
# endDate: date in YYYY-MM (2000-01) or YYYY-QQ (2000-Q1) format, None for all observations
# Data download
response = make_OECD_request('MEI'
, [[country], subject, measure, [frequency]]
, {'startTime': startDate, 'endTime': endDate, 'dimensionAtObservation': 'AllDimensions'})
# Data transformation
if (response.status_code == 200):
responseJson = response.json()
obsList = responseJson.get('dataSets')[0].get('observations')
if (len(obsList) > 0):
print('Data downloaded from %s' % response.url)
timeList = [item for item in responseJson.get('structure').get('dimensions').get('observation') if item['id'] == 'TIME_PERIOD'][0]['values']
subjectList = [item for item in responseJson.get('structure').get('dimensions').get('observation') if item['id'] == 'SUBJECT'][0]['values']
measureList = [item for item in responseJson.get('structure').get('dimensions').get('observation') if item['id'] == 'MEASURE'][0]['values']
obs = pd.DataFrame(obsList).transpose()
obs.rename(columns = {0: 'series'}, inplace = True)
obs['id'] = obs.index
obs = obs[['id', 'series']]
obs['dimensions'] = obs.apply(lambda x: re.findall('\d+', x['id']), axis = 1)
obs['subject'] = obs.apply(lambda x: subjectList[int(x['dimensions'][1])]['id'], axis = 1)
obs['measure'] = obs.apply(lambda x: measureList[int(x['dimensions'][2])]['id'], axis = 1)
obs['time'] = obs.apply(lambda x: timeList[int(x['dimensions'][4])]['id'], axis = 1)
obs['names'] = obs['subject'] + '_' + obs['measure']
data = obs.pivot_table(index = 'time', columns = ['names'], values = 'series')
return(data)
else:
print('Error: No available records, please change parameters')
else:
print('Error: %s' % response.status_code)
You can create requests like these:
data = create_DataFrame_from_OECD(country = 'CZE', subject = ['LOCOPCNO'])
data = create_DataFrame_from_OECD(country = 'USA', frequency = 'Q', startDate = '2009-Q1', endDate = '2010-Q1')
data = create_DataFrame_from_OECD(country = 'USA', frequency = 'M', startDate = '2009-01', endDate = '2010-12')
data = create_DataFrame_from_OECD(country = 'USA', frequency = 'M', subject = ['B6DBSI01'])
data = create_DataFrame_from_OECD(country = 'USA', frequency = 'Q', subject = ['B6DBSI01'])
You can recover the data from the source using code like this.
from urllib.request import urlopen
import json
URL = 'http://stats.oecd.org/SDMX-JSON/data/MEI/USA+CZE...M/all'
response = urlopen(URL).read()
responseDict = json.loads(str(response)[2:-1])
print (responseDict.keys())
print (len(responseDict['dataSets']))
Here is the output from this code.
dict_keys(['header', 'structure', 'dataSets'])
1
If you are curious about the appearance of the [2:-1] (I would be) it's because for some reason unknown to me the str function leaves some extraneous characters at the beginning and end of the string when it converts the byte array passed to it. json.loads is documented to require a string as input.
This is the code I used to get to this point.
>>> from urllib.request import urlopen
>>> import json
>>> URL = 'http://stats.oecd.org/SDMX-JSON/data/MEI/USA+CZE...M/all'
>>> response = urlopen(URL).read()
>>> len(response)
9886387
>>> response[:50]
b'{"header":{"id":"1975590b-346a-47ee-8d99-6562ccc11'
>>> str(response[:50])
'b\'{"header":{"id":"1975590b-346a-47ee-8d99-6562ccc11\''
>>> str(response[-50:])
'b\'"uri":"http://www.oecd.org/contact/","text":""}]}}\''
I understand that this is not a complete solution as you must still crack into the dataSets structure for the data to put into pandas. It's a list but you could explore it starting with this sketch.
The latest release of pandasdmx (pandasdmx.readthedocs.io) fixes previous issues accessing OECD data in sdmx-json.

Categories