How to output plot graphs as jpeg images in python - python

I use the latest version of Spyder to code. I made a simple graph with this code:
import pandas as pd
import sys
import os
import plotly.express as px
filepath = input('Enter filepath: ')
assert os.path.exists(filepath), "I did not find the file at, " + str(filepath)
f = open(filepath, 'r+')
print("Hooray we found your file!")
f.close()
file = pd.read_csv(filepath, encoding='latin1', delimiter=',')
fig = px.histogram(file, x='Idade', color='Categoria')
print(fig)
#Idade means age and Categoria will show who has canceled services or not ('Cliente' and 'Cancelado', I'm sure you know what each means). The idea is that the graph has to show the ratio of cancellation of services between different ages. Ex: in a group of people of 20 years of age, 50 cancelled but 120 still remain clients.
But when I try to run it, spyder shows me this weird... I don't even know what to call this
Figure({
'data': [{'alignmentgroup': 'True',
'hovertemplate': 'Categoria=Cliente<br>Idade=%{x}<br>index=%{y}<extra></extra>',
'legendgroup': 'Cliente',
'marker': {'color': '#636efa'},
'name': 'Cliente',
'offsetgroup': 'Cliente',
'orientation': 'h',
'showlegend': True,
'textposition': 'auto',
'type': 'bar',
'x': array([45, 49, 51, ..., 54, 56, 50], dtype=int64),
'xaxis': 'x',
'y': array([ 0, 1, 2, ..., 10120, 10121, 10122], dtype=int64),
'yaxis': 'y'},
{'alignmentgroup': 'True',
'hovertemplate': 'Categoria=Cancelado<br>Idade=%{x}<br>index=%{y}<extra></extra>',
'legendgroup': 'Cancelado',
'marker': {'color': '#EF553B'},
'name': 'Cancelado',
'offsetgroup': 'Cancelado',
'orientation': 'h',
'showlegend': True,
'textposition': 'auto',
'type': 'bar',
'x': array([62, 66, 54, ..., 44, 30, 43], dtype=int64),
'xaxis': 'x',
'y': array([ 21, 39, 51, ..., 10124, 10125, 10126], dtype=int64),
'yaxis': 'y'}],
'layout': {'barmode': 'relative',
'legend': {'title': {'text': 'Categoria'}, 'tracegroupgap': 0},
'margin': {'t': 60},
'template': '...',
'xaxis': {'anchor': 'y', 'domain': [0.0, 1.0], 'title': {'text': 'Idade'}},
'yaxis': {'anchor': 'x', 'domain': [0.0, 1.0], 'title': {'text': 'index'}}}
})
How do I get an actual image insted of this? btw I can't use jupyter or google colabs since I have to make an executable program that generate said images as jpeg or whatever

Related

google dataflow job cost optimization

I have run the below code for 522 gzip files of size 100 GB and after decompressing, it will be around 320 GB data and data in protobuf format and write the output to GCS. I have used n1 standard machines and region for input, output all taken care and job cost me around 17$, this is for half-hour data and so I really need to do some cost optimization here very badly.
Cost I get from the below query
SELECT l.value AS JobID, ROUND(SUM(cost),3) AS JobCost
FROM `PROJECT.gcp_billing_data.gcp_billing_export_v1_{}` bill,
UNNEST(bill.labels) l
WHERE service.description = 'Cloud Dataflow' and l.key = 'goog-dataflow-job-id' and
extract(date from _PARTITIONTIME) > "2020-12-31"
GROUP BY 1
Complete code
import time
import sys
import argparse
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions
import csv
import base64
from google.protobuf import timestamp_pb2
from google.protobuf.json_format import MessageToDict
from google.protobuf.json_format import MessageToJson
import io
import logging
from io import StringIO
from google.cloud import storage
import json
###PROTOBUF CLASS
from otherfiles import processor_pb2
class ConvertToJson(beam.DoFn):
def process(self, message, *args, **kwargs):
import base64
from otherfiles import processor_pb2
from google.protobuf.json_format import MessageToDict
from google.protobuf.json_format import MessageToJson
import json
if (len(message) >= 4):
b64ProtoData = message[2]
totalProcessorBids = int(message[3] if message[3] and message[3] is not None else 0);
b64ProtoData = b64ProtoData.replace('_', '/')
b64ProtoData = b64ProtoData.replace('*', '=')
b64ProtoData = b64ProtoData.replace('-', '+')
finalbunary = base64.b64decode(b64ProtoData)
log = processor_pb2.ProcessorLogProto()
log.ParseFromString(finalbunary)
#print(log)
jsonObj = MessageToDict(log,preserving_proto_field_name=True)
jsonObj["totalProcessorBids"] = totalProcessorBids
#wjdata = json.dumps(jsonObj)
print(jsonObj)
return [jsonObj]
else:
pass
class ParseFile(beam.DoFn):
def process(self, element, *args, **kwargs):
import csv
for line in csv.reader([element], quotechar='"', delimiter='\t', quoting=csv.QUOTE_ALL, skipinitialspace=True):
#print (line)
return [line]
def run():
parser = argparse.ArgumentParser()
parser.add_argument("--input", dest="input", required=False)
parser.add_argument("--output", dest="output", required=False)
parser.add_argument("--bucket", dest="bucket", required=True)
parser.add_argument("--bfilename", dest="bfilename", required=True)
app_args, pipeline_args = parser.parse_known_args()
#pipeline_args.extend(['--runner=DirectRunner'])
pipeline_options = PipelineOptions(pipeline_args)
pipeline_options.view_as(SetupOptions).save_main_session = True
bucket_input=app_args.bucket
bfilename=app_args.bfilename
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_input)
blob = bucket.blob(bfilename)
blob = blob.download_as_string()
blob = blob.decode('utf-8')
blob = StringIO(blob)
pqueue = []
names = csv.reader(blob)
for i,filename in enumerate(names):
if filename and filename[0]:
pqueue.append(filename[0])
with beam.Pipeline(options=pipeline_options) as p:
if(len(pqueue)>0):
input_list=app_args.input
output_list=app_args.output
events = ( p | "create PCol from list" >> beam.Create(pqueue)
| "read files" >> beam.io.textio.ReadAllFromText()
| "Transform" >> beam.ParDo(ParseFile())
| "Convert To JSON" >> beam.ParDo(ConvertToJson())
| "Write to BQ" >> beam.io.WriteToBigQuery(
table='TABLE',
dataset='DATASET',
project='PROJECT',
schema="dataevent:STRING",
create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND,
insert_retry_strategy=RetryStrategy.RETRY_ON_TRANSIENT_ERROR,
custom_gcs_temp_location='gs://BUCKET/gcs-temp-to-bq/',
method='FILE_LOADS'))
##bigquery failed rows NOT WORKING so commented
#(events[beam.io.gcp.bigquery.BigQueryWriteFn.FAILED_ROWS] | "Bad lines" >> beam.io.textio.WriteToText("error_log.txt"))
##WRITING TO GCS
#printFileConetent | "Write TExt" >> beam.io.WriteToText(output_list+"file_",file_name_suffix=".json",num_shards=1, append_trailing_newlines = True)
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
run()
The job took around 49 mins
Things I tried:
1) For avro, generated schema that needs to be in JSON for proto file and tried below code to convert a dictionary to avro msg, but it is taking time as the size of the dictionary is more.
schema_separated= is an avro JSON schema and it is working fine
with beam.Pipeline(options=pipeline_options) as p:
if(len(pqueue)>0):
input_list=app_args.input
output_list=app_args.output
p1 = p | "create PCol from list" >> beam.Create(pqueue)
readListofFiles=p1 | "read files" >> beam.io.textio.ReadAllFromText()
parsingProtoFile = readListofFiles | "Transform" >> beam.ParDo(ParseFile())
printFileConetent = parsingProtoFile | "Convert To JSON" >> beam.ParDo(ConvertToJson())
compressIdc=True
use_fastavro=True
printFileConetent | 'write_fastavro' >> WriteToAvro(
output_list+"file_",
# '/tmp/dataflow/{}/{}'.format(
# 'demo', 'output'),
# parse_schema(json.loads(SCHEMA_STRING)),
parse_schema(schema_separated),
use_fastavro=use_fastavro,
file_name_suffix='.avro',
codec=('deflate' if compressIdc else 'null'),
)
In the main code, I tried to insert JSON record as a string to bigquery table and so that I can use JSON functions in bigquery to extract the data and that also didn't go well and getting this below error.
message: 'Error while reading data, error message: JSON table encountered too many errors,
giving up. Rows: 1; errors: 1. Please look into the errors[] collection for more details.'
reason: 'invalid'> [while running 'Write to
BQ/BigQueryBatchFileLoads/WaitForDestinationLoadJobs']
Tried to insert the above JSON dictionary to bigquery providing JSON schema to table and is working fine as well
Now the challenge is size after deserialising the proto to JSON dict is doubled and cost will be calculated in dataflow by how much data processed
I'm trying and reading a lot to make this work and if it works, then I can make it stable for production.
Sample JSON record.
{'timestamp': '1609286400', 'bidResponseId': '5febc300000115cd054b9fd6840a5af1', 'aggregatorId': '1', 'userId': '7567d74e-2e43-45f4-a42a-8224798bb0dd', 'uniqueResponseId': '', 'adserverId': '1002418', 'dataVersion': '1609285802', 'geoInfo': {'country': '101', 'region': '122', 'city': '11605', 'timezone': '420'}, 'clientInfo': {'os': '4', 'browser': '1', 'remoteIp': '36.70.64.0'}, 'adRequestInfo': {'requestingPage': 'com.opera.mini.native', 'siteId': '557243954', 'foldPosition': '2', 'adSlotId': '1', 'isTest': False, 'opType': 'TYPE_LEARNING', 'mediaType': 'BANNER'}, 'userSegments': [{'id': '2029660', 'weight': -1.0, 'recency': '1052208'}, {'id': '2034588', 'weight': -1.0, 'recency': '-18101'}, {'id': '2029658', 'weight': -1.0, 'recency': '744251'}, {'id': '2031067', 'weight': -1.0, 'recency': '1162398'}, {'id': '2029659', 'weight': -1.0, 'recency': '862833'}, {'id': '2033498', 'weight': -1.0, 'recency': '802749'}, {'id': '2016729', 'weight': -1.0, 'recency': '1620540'}, {'id': '2034584', 'weight': -1.0, 'recency': '111571'}, {'id': '2028182', 'weight': -1.0, 'recency': '744251'}, {'id': '2016726', 'weight': -1.0, 'recency': '1620540'}, {'id': '2028183', 'weight': -1.0, 'recency': '744251'}, {'id': '2028178', 'weight': -1.0, 'recency': '862833'}, {'id': '2016722', 'weight': -1.0, 'recency': '1675814'}, {'id': '2029587', 'weight': -1.0, 'recency': '38160'}, {'id': '2028177', 'weight': -1.0, 'recency': '862833'}, {'id': '2016719', 'weight': -1.0, 'recency': '1675814'}, {'id': '2027404', 'weight': -1.0, 'recency': '139031'}, {'id': '2028172', 'weight': -1.0, 'recency': '1052208'}, {'id': '2028173', 'weight': -1.0, 'recency': '1052208'}, {'id': '2034058', 'weight': -1.0, 'recency': '1191459'}, {'id': '2016712', 'weight': -1.0, 'recency': '1809526'}, {'id': '2030025', 'weight': -1.0, 'recency': '1162401'}, {'id': '2015235', 'weight': -1.0, 'recency': '139031'}, {'id': '2027712', 'weight': -1.0, 'recency': '139031'}, {'id': '2032447', 'weight': -1.0, 'recency': '7313670'}, {'id': '2034815', 'weight': -1.0, 'recency': '586825'}, {'id': '2034811', 'weight': -1.0, 'recency': '659366'}, {'id': '2030004', 'weight': -1.0, 'recency': '139031'}, {'id': '2027316', 'weight': -1.0, 'recency': '1620540'}, {'id': '2033141', 'weight': -1.0, 'recency': '7313670'}, {'id': '2034736', 'weight': -1.0, 'recency': '308252'}, {'id': '2029804', 'weight': -1.0, 'recency': '307938'}, {'id': '2030188', 'weight': -1.0, 'recency': '3591519'}, {'id': '2033449', 'weight': -1.0, 'recency': '1620540'}, {'id': '2029672', 'weight': -1.0, 'recency': '1441083'}, {'id': '2029664', 'weight': -1.0, 'recency': '636630'}], 'perfInfo': {'timeTotal': '2171', 'timeBidInitialize': '0', 'timeProcessDatastore': '0', 'timeGetCandidates': '0', 'timeAdFiltering': '0', 'timeEcpmComputation': '0', 'timeBidComputation': '0', 'timeAdSelection': '0', 'timeBidSubmit': '0', 'timeTFQuery': '0', 'timeVWQuery': '8'}, 'learningPercent': 0.10000000149011612, 'pageLanguageId': '0', 'sspUserId': 'CAESECHFlNeuUm16IYThguoQ8ck_1', 'minEcpm': 0.12999999523162842, 'adSpotId': '1', 'creativeSizes': [{'width': '7', 'height': '7'}], 'pageTypeId': '0', 'numSlots': '0', 'eligibleLIs': [{'type': 'TYPE_OPTIMIZED', 'liIds': [{'id': 44005, 'reason': '12', 'creative_id': 121574, 'bid_amount': 8.403361132251052e-08}, {'id': 46938, 'reason': '12', 'creative_id': 124916, 'bid_amount': 8.403361132251052e-06}, {'id': 54450, 'reason': '12', 'creative_id': 124916, 'bid_amount': 2.0117618771650174e-05}, {'id': 54450, 'reason': '12', 'creative_id': 135726, 'bid_amount': 2.4237295484638312e-05}]}, {'type': 'TYPE_LEARNING'}], 'bidType': 4, 'isSecureRequest': True, 'sourceType': 3, 'deviceBrand': 82, 'deviceModel': 1, 'sellerNetworkId': 12814, 'interstitialRequest': False, 'nativeAdRequest': True, 'native': {'mainImg': [{'w': 0, 'h': 0, 'wmin': 1200, 'hmin': 627}, {'w': 0, 'h': 0, 'wmin': 1200, 'hmin': 627}, {'w': 0, 'h': 0, 'wmin': 1200, 'hmin': 627}, {'w': 0, 'h': 0, 'wmin': 1200, 'hmin': 627}], 'iconImg': [{'w': 0, 'h': 0, 'wmin': 0, 'hmin': 0}, {'w': 0, 'h': 0, 'wmin': 100, 'hmin': 100}, {'w': 0, 'h': 0, 'wmin': 0, 'hmin': 0}, {'w': 0, 'h': 0, 'wmin': 100, 'hmin': 100}], 'logoImg': [{'w': 0, 'h': 0, 'wmin': 100, 'hmin': 100}, {'w': 0, 'h': 0, 'wmin': 0, 'hmin': 0}, {'w': 0, 'h': 0, 'wmin': 100, 'hmin': 100}, {'w': 0, 'h': 0, 'wmin': 0, 'hmin': 0}]}, 'throttleWeight': 1, 'isSegmentReceived': False, 'viewability': 46, 'bannerAdRequest': False, 'videoAdRequest': False, 'mraidAdRequest': True, 'jsonModelCallCount': 0, 'totalProcessorBids': 1}
Can someone help me here?
PFA screenshots for reference as well
My advice here would be to use Java to perform your transformations.
In Java, you can convert the Protobuf into Avro like this: Writing protobuf object in parquet using apache beam
And once you've done that, you can use AvroIO to write the data to files.
Java is much more performant than Python, and will save you computing resources. Since this job does something very simple, and does not require any special Python libraries, I encourage you strongly to try and go with Java.
Just wanted to bring your attention to "FlexRS" if you haven't checked this. This uses preemptible virtual machine (VM) instances and that way you can reduce your cost.

Drag and move the vertical line on the time series data graph to mark and hold

I want to use Python plotly to mark a vertical line that can be dragged around on a time series data graph.
Share my image below.
I remember browsing the plotly or dash web pages that previously described the features of this image, but I couldn't find it when I searched again.
If my mistake is, please let me know how to realize this function.
One approach is to use a shapes object in a dcc.Graph. You have to configure the graph to editable to be able to move the shape. You can then use the relayoutData property of the dcc.Graph as input in the callback function in order to get the position of the shape on the graph. This is explained in the link below. I don't think there is a way to restrict the movement of the shape, unfortunately. So in your case, there is no way to restrict the vertical line to stay vertical. A user would be able to alter it's angle, for example.
https://community.plotly.com/t/moving-the-location-of-a-graph-point-interactively/7161/2
I've also included some starter code as an example of a movable vertical line on a dash plot.
import json
from textwrap import dedent as d
import dash
from dash.dependencies import Input, Output
import dash_core_components as dcc
import dash_html_components as html
app = dash.Dash(__name__)
app.css.append_css({'external_url': 'https://codepen.io/chriddyp/pen/dZVMbK.css'})
styles = {'pre': {'border': 'thin lightgrey solid', 'overflowX': 'scroll'}}
app.layout = html.Div(className='row', children=[
dcc.Graph(
id='basic-interactions',
className='six columns',
figure={
'data': [{
'x': [1, 2, 3, 4],
'y': [4, 1, 3, 5],
'text': ['a', 'b', 'c', 'd'],
'customdata': ['c.a', 'c.b', 'c.c', 'c.d'],
'name': 'Trace 1',
'mode': 'markers',
'marker': {
'size': 12
}
}, {
'x': [1, 2, 3, 4],
'y': [9, 4, 1, 4],
'text': ['w', 'x', 'y', 'z'],
'customdata': ['c.w', 'c.x', 'c.y', 'c.z'],
'name': 'Trace 2',
'mode': 'markers',
'marker': {
'size': 12
}
}],
'layout': {
'shapes': [{
'type': 'line',
'x0': 0.5,
'x1': 0.5,
'xref': 'paper',
'y0': 0,
'y1': 9,
'yref': 'y',
'line': {
'width': 4,
'color': 'rgb(30, 30, 30)',
'dash': 'dashdot'
}
}]
}
},
config={
'editable': True,
'edits': {
'shapePosition': True
}
}
),
html.Div(
className='six columns',
children=[
html.Div(
[
dcc.Markdown(
d("""
**Zoom and Relayout Data**
""")),
html.Pre(id='relayout-data', style=styles['pre']),
]
)
]
)
])
#app.callback(
Output('relayout-data', 'children'),
[Input('basic-interactions', 'relayoutData')])
def display_selected_data(relayoutData):
print("relayoutData:" + str(relayoutData))
return json.dumps(relayoutData, indent=2)
if __name__ == '__main__':
app.run_server(debug=True)

Iterate on figures in plotly to create an animation

My function to create figures looks as follows:
def bar_plot_plotly(self, frame=None):
md = self.get_book()
plotly.offline.init_notebook_mode(connected=True)
fig = go.Figure()
if md:
for step in range(2):
fig.add_trace(go.Indicator(
mode = "number+gauge+delta", value = md.qasks[step],
delta = {'reference': md.qasks[step]},
domain = {'x': [0, 0.4], 'y': [0.2*step, 0.2*step+0.1]},
title = {'text': str(md.asks[step])},
gauge = {
'shape': "bullet",
'axis': {'range': [min(md.qasks), max(md.qasks)]},
'threshold': {
'line': {'color': "red", 'width': 2},
'thickness': 0.75,
'value': md.qasks[step]},
'bar': {'color': "red"}}))
fig.add_trace(go.Indicator(
mode = "number+gauge+delta", value = md.qbids[step],
delta = {'reference': md.qbids[step]},
domain = {'x': [0.6, 1], 'y': [0.2*step, 0.2*step+0.1]},
title = {'text': str(md.bids[step])},
gauge = {
'shape': "bullet",
'axis': {'range': [min(md.qbids), max(md.qbids)]},
'threshold': {
'line': {'color': "green", 'width': 2},
'thickness': 0.75,
'value': md.qbids[step]},
'bar': {'color': "green"}}))
So at each iteration, it creates a figure, my goal would be to iterate on each of those figures in order to create an animation. The idea would be to do the same as in the documentation https://plot.ly/python/animations/#animated-bar-charts-with-plotly-express, but in there they use a pandas DataFrame, I would like to do it per figure basically.
I used to use Funcanimation on matplotlib, I was thus wondering if it was possible to use the above code to do the same? Any advice is more than welcome!
Thanks

plotly colormap addition to the chart

import plotly
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import numpy as np
plotly.offline.init_notebook_mode(connected=True)
colorscale='Earth'
#print(" : Min : Q1 : Median : Q3 : Max : Mean : Good Mean : Good STD : Outliers :")
data = [
{
'x': ['Min','Min','Min','Min','Min'],
'y': ['config1','config2','config3','config4','config5'],
'mode': 'markers',
'marker': {
'color': [0.89,0.892,0.886,0.901,0.869],
'size': [30, 30, 30, 30, 30],
'showscale': False,
'colorscale':colorscale,
'reversescale':True,
'cmin':0.865,
'cmax':0.901,
}
},
{
'x': ['Q1','Q1','Q1','Q1','Q1'],
'y': ['config1','config2','config3','config4','config5'],
'mode': 'markers',
'marker': {
'color': [0.912,0.908,0.892,0.915,0.889],
'size': [30, 30, 30, 30, 30],
'showscale': False,
'colorscale':colorscale,
'reversescale':True,
'cmin':0.885,
'cmax':0.915,
}
},
{
'x': ['Median','Median','Median','Median','Median'],
'y': ['config1','config2','config3','config4','config5'],
'mode': 'markers',
'marker': {
'color': [0.919,0.912,0.914,0.917,0.9],
'size': [30, 30, 30, 30, 30],
'showscale': False,
'colorscale':colorscale,
'reversescale':True,
'cmin':0.89,
'cmax':0.919,
}
},
{
'x': ['Q3','Q3','Q3','Q3','Q3'],
'y': ['config1','config2','config3','config4','config5'],
'mode': 'markers',
'marker': {
'color': [0.929,0.919,0.925,0.922,0.909],
'size': [30, 30, 30, 30, 30],
'showscale': False,
'colorscale':colorscale,
'reversescale':True,
'cmin':0.90,
'cmax':0.929,
}
},
{
'x': ['Max','Max','Max','Max','Max'],
'y': ['config1','config2','config3','config4','config5'],
'mode': 'markers',
'marker': {
'color': [0.95,0.932,0.933,0.935,0.935],
'size': [30, 30, 30, 30, 30],
'showscale': False,
'colorscale':colorscale,
'reversescale':True,
'cmin':0.93,
'cmax':0.95,
}
},
{
'x': ['Mean','Mean','Mean','Mean','Mean'],
'y': ['config1','config2','config3','config4','config5'],
'mode': 'markers',
'marker': {
'color': [0.921,0.912,0.91,0.918,0.9],
'size': [30, 30, 30, 30, 30],
'showscale': False,
'colorscale':colorscale,
'reversescale':True,
'cmin':0.898,
'cmax':0.921,
}
}
]
layout = go.Layout(title='Parameters of evaluation', xaxis=dict(range=[-0.5, 10]), yaxis=dict(range=[-0.5, 5.5]))
fig = go.Figure(data=data, layout=layout)
#plotly.offline.iplot(data, filename='scatter-colorscale')
plotly.offline.iplot(fig)
Hello all i am working on a visualization, just started with plotly today, it is so cool. Anyways, I wish to add colormap at the side of the diagram, for what ever color scheme I chose, for me it is probably impossible to find a global colormap because my color gradient start and end points are different. But how can I at least add a colormap at the side which show which is highest color and what is the lowest color. Is there a easy way?
I want to add colormap like this, at right hand side
{
'x': ['Mean','Mean','Mean','Mean','Mean'],
'y': ['config1','config2','config3','config4','config5'],
'mode': 'markers,text',
'marker': {
'colorbar':dict(nticks=3,tickmode='array',tickvals=[2,4.5,7],showticklabels=True,ticktext=['lowest','middle','highest']),
'size': [30, 30, 30, 30, 30],
'showscale': True,
'colorscale':colorscale,
'reversescale':True,
'opacity':0
}
}
I added one of the hack like this, to solve my problem for now. Others can suggest me better way to do it. I added a extra trace with opacity zero and added a custom colorbar to it. It enabled me to add a colorbar which does not mess with other data. I could have added this colorbar in any of the trace too.

Looping through Get Request in Python

I am trying to scrape some ticketing inventory info using Stubhub's API, but I cannot seem to figure out how to loop through the get request.
I basically want to loop through multiple events. The eventid_list is a list of eventids. The code I have is below:
inventory_url = 'https://api.stubhub.com/search/inventory/v2'
for eventid in eventid_list:
data = {'eventid': eventid, 'rows':500}
inventory = requests.get(inventory_url, headers=headers, params=data)
inv = inventory.json()
print(inv)
listing_df = pd.DataFrame(inv['listing'])
When I run this, the dataframe only returns results for one event, instead of multiple. What am I doing wrong?
EDIT: print(inv) outputs something like this:
{
'eventId': 102994860,
'totalListings': 82,
'totalTickets': 236,
'minQuantity': 1,
'maxQuantity': 6,
'listing': [
{
'listingId': 1297697413,
'currentPrice': {'amount': 108.58, 'currency': 'USD'},
'listingPrice': {'amount': 88.4, 'currency': 'USD'},
'sectionId': 1638686,
'row': 'E',
'quantity': 6,
'sellerSectionName': 'FRONT MEZZANINE RIGHT',
'sectionName': 'Front Mezzanine Sides',
'seatNumbers': '2,4,6,8,10,12',
'zoneId': 240236,
'zoneName': 'Front Mezzanine',
'deliveryTypeList': [5],
'deliveryMethodList': [23, 24, 25],
'isGA': 0,
'dirtyTicketInd': False,
'splitOption': '2',
'ticketSplit': '1',
'splitVector': [1, 2, 3, 4, 6],
'sellerOwnInd': 0,
'score': 0.0
},
...
{
'listingId': 1297697417,
'currentPrice': {'amount': 108.58, 'currency': 'USD'},
'listingPrice': {'amount': 88.4, 'currency': 'USD'},
'sectionId': 1638686,
'row': 'D',
'quantity': 3,
'sellerSectionName': 'FRONT MEZZANINE RIGHT',
'sectionName': 'Front Mezzanine Sides',
'seatNumbers': '2,4,6',
'zoneId': 240236,
'zoneName': 'Front Mezzanine',
'deliveryTypeList': [5],
'deliveryMethodList': [23, 24, 25],
'isGA': 0,
'dirtyTicketInd': False,
'splitOption': '2',
'ticketSplit': '1',
'splitVector': [1, 3],
'sellerOwnInd': 0,
'score': 0.0
},
]
}
I'm guessing inventory.json()['listing'] is a list of events. If so, you can try this:
inventory_url = 'https://api.stubhub.com/search/inventory/v2'
def get_event(eventid):
"""Given an event id returns inventory['listing']"""
data = {'eventid': eventid, 'rows':500}
inventory = requests.get(inventory_url, headers=headers, params=data)
return inventory.json().get('listing', [])
# Concatenate output of all events
events = itertools.flatten(get_event(eventid) for eventid in eventid_list)
listing_df = pd.DataFrame(list(events))
This is just a starting point, you will have to deal with cases where inventory.statos_code != 200. The result probably is not very useful, so you may have to flat some of the attributes for the listing items line currentPrice and listingPrice:

Categories