Data output is not the same as inside function - python

I am currently having an issue where I am trying to store data in a list (using dataclasses). When I print the data inside the list in the function (PullIncursionData()) it responded with a certain amount of numbers (never the same, not possible due to it's nature). When printing it after it being called to store it's return in a Var it somehow prints only the same number.
I cannot share the numbers, as they update with EVE Online's API, so the only way is to run it locally and read the first list yourself.
The repository is Here: https://github.com/AtherActive/EVEAPI-Demo
Heads up! Inside the main.py (the file with issues) (a snippet of code is down below) are more functions. All functions from line 90 and forward are important, the rest can be ignored for this question, as they do not interact with the other functions.
def PullIncursionData():
#Pulls data from URL and converts it into JSON
url = 'https://esi.evetech.net/latest/incursions/?datasource=tranquility'
data = rq.get(url)
jsData = data.json()
#Init var to store incursions
incursions = []
#Set lenght for loop. yay
length = len(jsData)
# Every loop incursion data will be read by __parseIncursionData(). It then gets added to var Incursions.
for i in range(length):
# Add data to var Incursion.
incursions.append(__parseIncursionData(jsData, i))
# If Dev mode, print some debug. Can be toggled in settings.py
if settings.developerMode == 1:
print(incursions[i].constellation_id)
return incursions
# Basically parses the input data in a decent manner. No comments needed really.
def __parseIncursionData(jsData, i):
icstruct = stru.Incursion
icstruct.constellation_id = jsData[i]['constellation_id']
icstruct.constellation_name = 'none'
icstruct.staging = jsData[i]['staging_solar_system_id']
icstruct.region_name = ResolveSystemNames(icstruct.constellation_id, 'con-reg')
icstruct.status = jsData[i]['state']
icstruct.systems_id = jsData[i]['infested_solar_systems']
icstruct.systems_names = ResolveSystemNames(jsData[i]['infested_solar_systems'], 'system')
return icstruct
# Resolves names for systems, regions and constellations. Still WIP.
def ResolveSystemNames(id, mode='constellation'):
#init value
output_name = 'none'
# If constellation, pull data and find region name.
if mode == 'con-reg':
url = 'https://www.fuzzwork.co.uk/api/mapdata.php?constellationid={}&format=json'.format(id)
data = rq.get(url)
jsData = data.json()
output_name = jsData[0]['regionname']
# Pulls system name form Fuzzwork.co.uk.
elif mode == 'system':
#Convert output to a list.
output_name = []
lenght = len(id)
# Pulls system name from Fuzzwork. Not that hard.
for i in range(lenght):
url = 'https://www.fuzzwork.co.uk/api/mapdata.php?solarsystemid={}&format=json'.format(id[i])
data = rq.get(url)
jsData = data.json()
output_name.append(jsData[i]['solarsystemname'])
return output_name
icdata = PullIncursionData()
print('external data check:')
length = len(icdata)
for i in range(length):
print(icdata[i].constellation_id)
structures.py (custom file)
#dataclass
class Incursion:
constellation_id = int
constellation_name = str
staging = int
staging_name = str
systems_id = list
systems_names = list
region_name = str
status = str
def ___init___(self):
self.constellation_id = -1
self.constellation_name = 'undefined'
self.staging = -1
self.staging_name = 'undefined'
self.systems_id = []
self.systems_names = []
self.region_name = 'undefined'
self.status = 'unknown'

Related

How to convert Hive schema to Bigquery schema using Python?

What i get from api:
"name":"reports"
"col_type":"array<struct<imageUrl:string,reportedBy:string>>"
So in hive schema I got:
reports array<struct<imageUrl:string,reportedBy:string>>
Note: I got hive array schema as string from api
My target:
bigquery.SchemaField("reports", "RECORD", mode="NULLABLE",
fields=(
bigquery.SchemaField('imageUrl', 'STRING'),
bigquery.SchemaField('reportedBy', 'STRING')
)
)
Note: I would like to create universal code that can handle when i receive any number of struct inside of the array.
Any tips are welcome.
I tried creating a script that parses your input which is reports array<struct<imageUrl:string,reportedBy:string>>. This converts your input to a dictionary that could be used as schema when creating a table. The main idea of the apporach is instead of using SchemaField(), you can create a dictionary which is much easier than creating SchemaField() objects with parameters using your example input.
NOTE: The script is only tested based on your input and it can parse more fields if added in struct<.
import re
from google.cloud import bigquery
def is_even(number):
if (number % 2) == 0:
return True
else:
return False
def clean_string(str_value):
return re.sub(r'[\W_]+', '', str_value)
def convert_to_bqdict(api_string):
"""
This only works for a struct with multiple fields
This could give you an idea on constructing a schema dict for BigQuery
"""
num_even = True
main_dict = {}
struct_dict = {}
field_arr = []
schema_arr = []
# Hard coded this since not sure what the string will look like if there are more inputs
init_struct = sample.split(' ')
main_dict["name"] = init_struct[0]
main_dict["type"] = "RECORD"
main_dict["mode"] = "NULLABLE"
cont_struct = init_struct[1].split('<')
num_elem = len(cont_struct)
# parse fields inside of struct<
for i in range(0,num_elem):
num_even = is_even(i)
# fields are seen on even indices
if num_even and i != 0:
temp = list(filter(None,cont_struct[i].split(','))) # remove blank elements
for elem in temp:
fields = list(filter(None,elem.split(':')))
struct_dict["name"] = clean_string(fields[0])
# "type" works for STRING as of the moment refer to
# https://cloud.google.com/bigquery/docs/schemas#standard_sql_data_types
# for the accepted data types
struct_dict["type"] = clean_string(fields[1]).upper()
struct_dict["mode"] = "NULLABLE"
field_arr.append(struct_dict)
struct_dict = {}
main_dict["fields"] = field_arr # assign dict to array of fields
schema_arr.append(main_dict)
return schema_arr
sample = "reports array<struct<imageUrl:string,reportedBy:string,newfield:bool>>"
bq_dict = convert_to_bqdict(sample)
client = bigquery.Client()
project = client.project
dataset_ref = bigquery.DatasetReference(project, '20211228')
table_ref = dataset_ref.table("20220203")
table = bigquery.Table(table_ref, schema=bq_dict)
table = client.create_table(table)
Output:

Unable to iterate through a csv.DictReader object

I'm having trouble iterating through a DictReader object. I'm completely stumped because the object gets returned from another function with the same properties (size and fieldnames), but I cannot figure out for the life of me how to iterate through it. I know there's been some changes on the object type, so I'll mention that this is on Python 3.7. Is there another way to access data in a DictReader object?
Anywho, here's relevant code. I cleaned it up to post here, so hope it's not too confusing:
def schedchange():
data = otherfunction.data_get(referer_url=setup.url)
logging.info(type(data)) # <class 'csv.DictReader'>
import sys
logging.info(sys.getsizeof(data)) # logs a value of 56
logging.info(data.fieldnames) # logs the fieldnames properly
count = 0
for row in data:
count += 1
logging.info('aspen count: ')
logging.info(count) # logs a value of: 37337
return data
def main():
rosters = client.schedchange()
logger.info(rosters.fieldnames) # Prints/logs fieldname properly
import sys
logging.info(sys.getsizeof(rosters)) # Logs 56, the same size as in schedchange
logging.info(type(rosters)) # <class 'csv.DictReader'>
count = 0
for i in rosters: #I've also tried enumerate(rosters) here
count += 1
logger.info(count) #nothing gets logged
logger.info('end count: ')
logger.info(count) # Count never gets incremented so a 0 is logged here
def data_get(self, referer_url):
url = self.baseurl + 'quickReportMenu.do' + ';jessionid={}'.format(self.sessionid)
r1 = get_retry(self.s, url, headers={'Referer': referer_url},
params={'format': 'simple',
'extension': '0',
'deploymentId': 'did'})
url = self.baseurl + 'quickReportMenu.do'
self.payload['org.apache.struts.taglib.html.TOKEN'] = findstruts(r1.text)
self.payload['userEvent'] = '930'
self.payload['deploymentId'] = 'did'
self.payload['extension'] = '0'
p1 = post_retry(self.s, url, headers={'Referer': r1.url}, data=self.payload)
# Extract the url for the csv from the text of the response
popupid = findurl(p1.text)
# Get csv
url = self.baseurl + popupid
d1 = get_retry_simple(self.s, url, headers={'Referer': p1.url})
# Break text into iterable lines for each /n
d1.encoding = 'utf-8-sig'
iterdat = str.splitlines(d1.text)
# Parse csv
data = csv.DictReader(iterdat)
return data

Iterating through the python and pandas loop

What is the best way to look into how the loop works thourh iterations?
I have defined 2 functions which have to go in one after another (the 2nd gets the result of the 1st and works it through).
Ultimately I need 2-line pandas dataframe as the output.
Sample code below.
def candle_data (
figi,
int1 = candle_resolution,
from1 = previous_minutemark,
to1 = last_minutemark
):
response = market_api.market_candles_get(figi = ticker_figi_test, from_ = from1, to = to1, interval = int1)
if response.status_code == 200:
return response.parse_json().dict()
else:
return print(response.parse_error())
def response_to_pandas_df (response):
df_candles = pd.DataFrame(response['payload'])
df_candles = pd.json_normalize(df_candles['candles'])
df_candles = df_candles[df_candles['time'] >= previous_minutemark]
df_candles = df_candles[['c', 'figi','time']]
df_candles_main = df_candles_template.append(df_candles)
return df_candles_main
then I call the functions in a loop:
ticker_figi_list = ["BBG000CL9VN6", "BBG000R7Z112"]
df_candles_main = df_candles_template
for figi in ticker_figi_list:
response = candle_data(figi)
df_candles_main = response_to_pandas_df(response)
But in return I get only 1 row of data for the 1st FIGI in the list.
I suppose, that I define the candle_data() function with figi_ticker_test which contain only 1 value may be the case. But I'm not sure how to work this around.
Thank you in advance.
It looks like the problem is you are calling the api with figi = ticker_figi_test. I assume ticket_figi_test is equal to the first figi in your list and so you aren't actually calling the api with different figi on each iteration. Try changing to the following:
response = market_api.market_candles_get(figi = figi, from_ = from1, to = to1, interval = int1)

Exporting data from python to text file using petl package in python

I am trying to extract raw data from a text file and after processing the raw data, I want to export it to another text file. Below is the python code I have written for this process. I am using the "petl" package in python 3 for this purpose. 'locations.txt' is the raw data file.
import glob, os
from petl import *
class ETL():
def __init__(self, input):
self.list = input
def parse_P(self):
personids = None
for term in self.list:
if term.startswith('P'):
personids = term[1:]
personid = personids.split(',')
return personid
def return_location(self):
location = None
for term in self.list:
if term.startswith('L'):
location = term[1:]
return location
def return_location_id(self, location):
location = self.return_location()
locationid = None
def return_country_id(self):
countryid = None
for term in self.list:
if term.startswith('C'):
countryid = term[1:]
return countryid
def return_region_id(self):
regionid = None
for term in self.list:
if term.startswith('R'):
regionid = term[1:]
return regionid
def return_city_id(self):
cityid = None
for term in self.list:
if term.startswith('I'):
cityid = term[1:]
return cityid
print (os.getcwd())
os.chdir("D:\ETL-IntroductionProject")
print (os.getcwd())
final_location = [['L','P', 'C', 'R', 'I']]
new_location = fromtext('locations.txt', encoding= 'Latin-1')
stored_list = []
for identifier in new_location:
if identifier[0].startswith('L'):
identifier = identifier[0]
info_list = identifier.split('_')
stored_list.append(info_list)
for lst in stored_list:
tabling = ETL(lst)
location = tabling.return_location()
country = tabling.return_country_id()
city = tabling.return_city_id()
region = tabling.return_region_id()
person_list = tabling.parse_P()
for person in person_list:
table_new = [location, person, country, region, city]
final_location.append(table_new)
totext(final_location, 'l1.txt')
However when I use "totext" function of petl, it throws me an "Assertion Error".
AssertionError: template is required
I am unable to understand what the fault is. Can some one please explain the problem I am facing and what I should be doing ?
The template parameter to the toext function is not optional there is no default format for how the rows are written in this case, you must provide a template. Check the doc for toext here for an example: https://petl.readthedocs.io/en/latest/io.html#text-files
The template describes the format of each row that it writes out using the field headers to describe things, you can optionally pass in a prologue to write the header too. A basic template in your case would be:
table_new_template = "{L} {P} {C} {R} {I}"
totext(final_location, 'l1.txt', template=table_new_template)

local variable 'CompletelyUniqueName' referenced before assignment

I've looked through as many answers on this subject as I could find and all suggested that it's a global - local conflict. I can't see how this would apply in my case but please do explain. Here's the error :
"local variable 'CompletelyUniqueName' referenced before assignment"
and here is the code, a function I call from another script :
def geopixsum(filename):
# register all of the GDAL drivers
gdal.AllRegister()
# Check file type (in this case Geotiff)
if filename.endswith('.tif'):
# open the image
try:
inDs = gdal.Open(filename)
except:
print 'Could not open ',file,'\n'
# get image size
rows = inDs.RasterYSize
cols = inDs.RasterXSize
# read band 1 into data
band1 = inDs.GetRasterBand(1)
data = band1.ReadAsArray(0,0,cols,rows)
# get nodata value
nandat = band1.GetNoDataValue()
sumvals = data[np.where(np.logical_not(data == nandat))]
CompletelyUniqueName = sumvals.sum()
print 'sum = ',CompletelyUniqueName
inDs = None
return CompletelyUniqueName
This code worked when not a function but rather just a script on its own. Again, I know this would make it it seem like a global - local issue but given the name I've assigned the variable I think I've gone to great enough lengths to avoid a conflict.
You should either define default value for CompletelyUniqueName (for case if filename.endswith('.tif') == False)
def geopixsum(filename):
CompletelyUniqueName = 0
if filename.endswith('.tif'):
...
CompletelyUniqueName = sumvals.sum()
return CompletelyUniqueName
Or return inside if statement
def geopixsum(filename):
if filename.endswith('.tif'):
...
CompletelyUniqueName = sumvals.sum()
return CompletelyUniqueName
The simplest fix:
def geopixsum(filename):
CompletelyUniqueName = 0 # or None, or anything you want to return
# if the file is not a tif
# register all of the GDAL drivers
gdal.AllRegister()
# Check file type (in this case Geotiff)
if filename.endswith('.tif'):
# open the image
try:
inDs = gdal.Open(filename)
except:
print 'Could not open ',file,'\n'
# get image size
rows = inDs.RasterYSize
cols = inDs.RasterXSize
# read band 1 into data
band1 = inDs.GetRasterBand(1)
data = band1.ReadAsArray(0,0,cols,rows)
# get nodata value
nandat = band1.GetNoDataValue()
sumvals = data[np.where(np.logical_not(data == nandat))]
CompletelyUniqueName = sumvals.sum()
print 'sum = ',CompletelyUniqueName
inDs = None
return CompletelyUniqueName

Categories