How to set Variable Width and Decimals in SPSS with Python? - python

I'm trying to manipulate some .sav files with SavReaderWriter. What I already have is this:
with savReaderWriter.SavReader(dirIn, ioUtf8 = True) as reader:
df = pd.DataFrame(reader.all(), columns = [s for s in reader.header])
varLabels = reader.varLabels
varTypes = reader.varTypes
valueLabels = reader.valueLabels
varWidth = reader.varWids # <------------- This guy
varMeasure = reader.measureLevels
varAlignments = reader.alignments
varColumnWidths = reader.columnWidths
varMissingValues = reader.missingValues
and:
with SavWriter(savFileName = dirOut,
varNames = varNames,
varTypes = varTypes,
varLabels= varLabels,
valueLabels = valueLabels,
measureLevels = varMeasure,
columnWidths = varColumnWidths,
alignments = varAlignments,
missingValues = varMissingValues,
ioUtf8=True
) as writer:
for record in records:
writer.writerow(record)
The problem is that I don't know how can I set the Variable Width that I got when reading the sav at fist code, when using the SavWriter part. Does anyone else know what can I do?

I acctually got it working!
First I had to get the formats when reading the .sav:
varFormats = reader.formats
Then just add this param when opening the savWriter:
formats = varFormats
Kind made my way since the docs doesnt help that much, but gave me an idea how the formats works:
https://pythonhosted.org/savReaderWriter/index.html#formats

Related

Data output is not the same as inside function

I am currently having an issue where I am trying to store data in a list (using dataclasses). When I print the data inside the list in the function (PullIncursionData()) it responded with a certain amount of numbers (never the same, not possible due to it's nature). When printing it after it being called to store it's return in a Var it somehow prints only the same number.
I cannot share the numbers, as they update with EVE Online's API, so the only way is to run it locally and read the first list yourself.
The repository is Here: https://github.com/AtherActive/EVEAPI-Demo
Heads up! Inside the main.py (the file with issues) (a snippet of code is down below) are more functions. All functions from line 90 and forward are important, the rest can be ignored for this question, as they do not interact with the other functions.
def PullIncursionData():
#Pulls data from URL and converts it into JSON
url = 'https://esi.evetech.net/latest/incursions/?datasource=tranquility'
data = rq.get(url)
jsData = data.json()
#Init var to store incursions
incursions = []
#Set lenght for loop. yay
length = len(jsData)
# Every loop incursion data will be read by __parseIncursionData(). It then gets added to var Incursions.
for i in range(length):
# Add data to var Incursion.
incursions.append(__parseIncursionData(jsData, i))
# If Dev mode, print some debug. Can be toggled in settings.py
if settings.developerMode == 1:
print(incursions[i].constellation_id)
return incursions
# Basically parses the input data in a decent manner. No comments needed really.
def __parseIncursionData(jsData, i):
icstruct = stru.Incursion
icstruct.constellation_id = jsData[i]['constellation_id']
icstruct.constellation_name = 'none'
icstruct.staging = jsData[i]['staging_solar_system_id']
icstruct.region_name = ResolveSystemNames(icstruct.constellation_id, 'con-reg')
icstruct.status = jsData[i]['state']
icstruct.systems_id = jsData[i]['infested_solar_systems']
icstruct.systems_names = ResolveSystemNames(jsData[i]['infested_solar_systems'], 'system')
return icstruct
# Resolves names for systems, regions and constellations. Still WIP.
def ResolveSystemNames(id, mode='constellation'):
#init value
output_name = 'none'
# If constellation, pull data and find region name.
if mode == 'con-reg':
url = 'https://www.fuzzwork.co.uk/api/mapdata.php?constellationid={}&format=json'.format(id)
data = rq.get(url)
jsData = data.json()
output_name = jsData[0]['regionname']
# Pulls system name form Fuzzwork.co.uk.
elif mode == 'system':
#Convert output to a list.
output_name = []
lenght = len(id)
# Pulls system name from Fuzzwork. Not that hard.
for i in range(lenght):
url = 'https://www.fuzzwork.co.uk/api/mapdata.php?solarsystemid={}&format=json'.format(id[i])
data = rq.get(url)
jsData = data.json()
output_name.append(jsData[i]['solarsystemname'])
return output_name
icdata = PullIncursionData()
print('external data check:')
length = len(icdata)
for i in range(length):
print(icdata[i].constellation_id)
structures.py (custom file)
#dataclass
class Incursion:
constellation_id = int
constellation_name = str
staging = int
staging_name = str
systems_id = list
systems_names = list
region_name = str
status = str
def ___init___(self):
self.constellation_id = -1
self.constellation_name = 'undefined'
self.staging = -1
self.staging_name = 'undefined'
self.systems_id = []
self.systems_names = []
self.region_name = 'undefined'
self.status = 'unknown'

Python sql returning list

got some functions with sqlstatements. My first func is fine because i get only 1 result.
My second function returns a large list of errorcodes and i dont know how to get them back for response.
TypeError: <sqlalchemy.engine.result.ResultProxy object at 0x7f98b85ef910> is not JSON serializable
Tried everything need help.
My Code:
def topalarms():
customer_name = request.args.get('customer_name')
machine_serial = request.args.get('machine_serial')
#ts = request.args.get('ts')
#ts_start = request.args.get('ts')
if (customer_name is None) or (machine_serial is None):
return missing_param()
# def form_response(response, session):
# response['customer'] = customer_name
# response['serial'] = machine_serial
# return do_response(customer_name, form_response)
def form_response(response, session):
result_machine_id = machine_id(session, machine_serial)
if not result_machine_id:
response['Error'] = 'Seriennummer nicht vorhanden/gefunden'
return
#response[''] = result_machine_id[0]["id"]
machineid = result_machine_id[0]["id"]
result_errorcodes = error_codes(session, machineid)
response['ErrorCodes'] = result_errorcodes
return do_response(customer_name, form_response)
def machine_id(session, machine_serial):
stmt_raw = '''
SELECT
id
FROM
machine
WHERE
machine.serial = :machine_serial_arg
'''
utc_now = datetime.datetime.utcnow()
utc_now_iso = pytz.utc.localize(utc_now).isoformat()
utc_start = datetime.datetime.utcnow() - datetime.timedelta(days = 30)
utc_start_iso = pytz.utc.localize(utc_start).isoformat()
stmt_args = {
'machine_serial_arg': machine_serial,
}
stmt = text(stmt_raw).columns(
#ts_insert = ISODateTime
)
result = session.execute(stmt, stmt_args)
ts = utc_now_iso
ts_start = utc_start_iso
ID = []
for row in result:
ID.append({
'id': row[0],
'ts': ts,
'ts_start': ts_start,
})
return ID
def error_codes(session, machineid):
stmt_raw = '''
SELECT
name
FROM
identifier
WHERE
identifier.machine_id = :machineid_arg
'''
stmt_args = {
'machineid_arg': machineid,
}
stmt = text(stmt_raw).columns(
#ts_insert = ISODateTime
)
result = session.execute(stmt, stmt_args)
errors = []
for row in result:
errors.append(result)
#({'result': [dict(row) for row in result]})
#errors = {i: result[i] for i in range(0, len(result))}
#errors = dict(result)
return errors
My problem is func error_codes somethiing is wrong with my result.
my Output should be like this:
ABCNormal
ABCSafety
Alarm_G01N01
Alarm_G01N02
Alarm_G01N03
Alarm_G01N04
Alarm_G01N05
I think you need to take a closer look at what you are doing correctly with your working function and compare that to your non-working function.
Firstly, what do you think this code does?
for row in result:
errors.append(result)
This adds to errors one copy of the result object for each row in result. So if you have six rows in result, errors contains six copies of result. I suspect this isn't what you are looking for. You want to be doing something with the row variable.
Taking a closer look at your working function, you are taking the first value out of the row, using row[0]. So, you probably want to do the same in your non-working function:
for row in result:
errors.append(row[0])
I don't have SQLAlchemy set up so I haven't tested this: I have provided this answer based solely on the differences between your working function and your non-working function.
You need a json serializer. I suggest using Marshmallow: https://marshmallow.readthedocs.io/en/stable/
There are some great tutorials online on how to do this.

How skip to another loop in python if no data returned by the API?

I have a python code that loops through multiple location and pulls data from a third part API. Below is the code sublocation_idsare location id coming from a directory.
As you can see from the code the data gets converted to a data frame and then saved to a Excel file. The current issue I am facing is if the API does not returns data for publication_timestamp for certain location the loop stops and does not proceeds and I get error as shown below the code.
How do I avoid this and skip to another loop if no data is returned by the API?
for sub in sublocation_ids:
city_num_int = sub['id']
city_num_str = str(city_num_int)
city_name = sub['name']
filter_text_new = filter_text.format(city_num_str)
data = json.dumps({"filters": [filter_text_new], "sort_by":"created_at", "size":2})
r = requests.post(url = api_endpoint, data = data).json()
articles_list = r["articles"]
articles_list_normalized = json_normalize(articles_list)
df = articles_list_normalized
df['publication_timestamp'] = pd.to_datetime(df['publication_timestamp'])
df['publication_timestamp'] = df['publication_timestamp'].apply(lambda x: x.now().strftime('%Y-%m-%d'))
df.to_excel(writer, sheet_name = city_name)
writer.save()
Key Error: publication_timestamp
Change this bit of code:
df = articles_list_normalized
if 'publication_timestamp' in df.columns:
df['publication_timestamp'] = pd.to_datetime(df['publication_timestamp'])
df['publication_timestamp'] = df['publication_timestamp'].apply(lambda x: x.now().strftime('%Y-%m-%d'))
df.to_excel(writer, sheet_name = city_name)
else:
continue
If the API literally returns no data i.e. {} then you might even do the check before normalizing it:
if articles_list:
df = json_normalize(articles_list)
# ... rest of code ...
else:
continue

python byte array print() gives wrong values

Using python 3, I made the following script consisting of a model class containing hundreds of bytearrays and in the same script outside of model class i am printing some of these out to verify they are correct. When i print the values some of values are not what i expected.(i put coded comments to identify these in the code below)
Here is a shortend version of my script with some of the bytearrays
`
class Model:
def __init__(self):
# weird values:
self.bp_diastole_118 = bytearray(b'\xff\x01\x02\x01\x01\x00\x02\x3b')
self.bp_diastole_120 = bytearray(b'\xff\x01\x02\x01\x01\x00\x02\x3c')
self.bp_diastole_122 = bytearray(b'\xff\x01\x02\x01\x01\x00\x02\x3d')
self.bp_diastole_124 = bytearray(b'\xff\x01\x02\x01\x01\x00\x02\x3e')
self.bp_diastole_126 = bytearray(b'\xff\x01\x02\x01\x01\x00\x02\x3f')
self.bp_diastole_128 = bytearray(b'\xff\x01\x02\x01\x01\x00\x02\x40')
self.bp_diastole_160 = bytearray(b'\xff\x01\x02\x01\x01\x00\x02\x50')
# correct values:
self.pupil_r_normal = bytearray(b'\xff\x01\x02\x01\x01\x00\x03\xc3')
self.pupil_r_dilated = bytearray(b'\xff\x01\x02\x01\x01\x00\x03\xc4')
self.pupil_r_constriced = bytearray(b'\xff\x01\x02\x01\x01\x00\x03\xc5')
self.pupil_r_reaction_on = bytearray(b'\xff\x01\x02\x01\x01\x00\x03\xc6')
self.pupil_r_reaction_off = bytearray(b'\xff\x01\x02\x01\x01\x00\x03\xc7')
m = Model()
print('--------------weird value------------------')
print('bp_diastole_118 = {}'.format(m.bp_diastole_118))
print('bp_diastole_120 = {}'.format(m.bp_diastole_120))
print('bp_diastole_122 = {}'.format(m.bp_diastole_122))
print('bp_diastole_124 = {}'.format(m.bp_diastole_124))
print('bp_diastole_126 = {}'.format(m.bp_diastole_126))
print('bp_diastole_128 = {}'.format(m.bp_diastole_128))
print('bp_diastole_160 = {}'.format(m.bp_diastole_160))
print('-------------correct value--------------------')
print('pupil_r_normal = {}'.format(m.pupil_r_normal))
print('pupil_r_dilated = {}'.format(m.pupil_r_dilated))
print('pupil_r_constriced = {}'.format(m.pupil_r_constriced))
print('pupil_r_reaction_on = {}'.format(m.pupil_r_reaction_on))
print('pupil_r_reaction_off = {}'.format(m.pupil_r_reaction_off))
here is what is printed to the console:
`
--------------weird value------------------
bp_diastole_118 = bytearray(b'\xff\x01\x02\x01\x01\x00\x02;')
bp_diastole_120 = bytearray(b'\xff\x01\x02\x01\x01\x00\x02<')
bp_diastole_122 = bytearray(b'\xff\x01\x02\x01\x01\x00\x02=')
bp_diastole_124 = bytearray(b'\xff\x01\x02\x01\x01\x00\x02>')
bp_diastole_126 = bytearray(b'\xff\x01\x02\x01\x01\x00\x02?')
bp_diastole_128 = bytearray(b'\xff\x01\x02\x01\x01\x00\x02#')
bp_diastole_160 = bytearray(b'\xff\x01\x02\x01\x01\x00\x02P')
-------------correct value--------------------
pupil_r_normal = bytearray(b'\xff\x01\x02\x01\x01\x00\x03\xc3')
pupil_r_dilated = bytearray(b'\xff\x01\x02\x01\x01\x00\x03\xc4')
pupil_r_constriced = bytearray(b'\xff\x01\x02\x01\x01\x00\x03\xc5')
pupil_r_reaction_on = bytearray(b'\xff\x01\x02\x01\x01\x00\x03\xc6')
pupil_r_reaction_off = bytearray(b'\xff\x01\x02\x01\x01\x00\x03\xc7')
As you can see the good values print exactly what i would expect and are identical to the values i initialized. However if you look at what was printed from the weird values you can see the last 3 characters do not match the values i initialized.
i.e.
initialized:
self.bp_diastole_118 = bytearray(b'\xff\x01\x02\x01\x01\x00\x02\x3b')
is not the same as printed:
bp_diastole_118 = bytearray(b'\xff\x01\x02\x01\x01\x00\x02;')
Does anybody know why this is happening and how I could remedy the problem?
you are seeing the utf representation of the hex value you set for example utf('0x3b') == ';'
The problem I found is x3b through x50 they seem to be outputting the last part as special character with x50 being P. If you test using bytearray(b'\x3b') it will display the ending for you

local variable 'CompletelyUniqueName' referenced before assignment

I've looked through as many answers on this subject as I could find and all suggested that it's a global - local conflict. I can't see how this would apply in my case but please do explain. Here's the error :
"local variable 'CompletelyUniqueName' referenced before assignment"
and here is the code, a function I call from another script :
def geopixsum(filename):
# register all of the GDAL drivers
gdal.AllRegister()
# Check file type (in this case Geotiff)
if filename.endswith('.tif'):
# open the image
try:
inDs = gdal.Open(filename)
except:
print 'Could not open ',file,'\n'
# get image size
rows = inDs.RasterYSize
cols = inDs.RasterXSize
# read band 1 into data
band1 = inDs.GetRasterBand(1)
data = band1.ReadAsArray(0,0,cols,rows)
# get nodata value
nandat = band1.GetNoDataValue()
sumvals = data[np.where(np.logical_not(data == nandat))]
CompletelyUniqueName = sumvals.sum()
print 'sum = ',CompletelyUniqueName
inDs = None
return CompletelyUniqueName
This code worked when not a function but rather just a script on its own. Again, I know this would make it it seem like a global - local issue but given the name I've assigned the variable I think I've gone to great enough lengths to avoid a conflict.
You should either define default value for CompletelyUniqueName (for case if filename.endswith('.tif') == False)
def geopixsum(filename):
CompletelyUniqueName = 0
if filename.endswith('.tif'):
...
CompletelyUniqueName = sumvals.sum()
return CompletelyUniqueName
Or return inside if statement
def geopixsum(filename):
if filename.endswith('.tif'):
...
CompletelyUniqueName = sumvals.sum()
return CompletelyUniqueName
The simplest fix:
def geopixsum(filename):
CompletelyUniqueName = 0 # or None, or anything you want to return
# if the file is not a tif
# register all of the GDAL drivers
gdal.AllRegister()
# Check file type (in this case Geotiff)
if filename.endswith('.tif'):
# open the image
try:
inDs = gdal.Open(filename)
except:
print 'Could not open ',file,'\n'
# get image size
rows = inDs.RasterYSize
cols = inDs.RasterXSize
# read band 1 into data
band1 = inDs.GetRasterBand(1)
data = band1.ReadAsArray(0,0,cols,rows)
# get nodata value
nandat = band1.GetNoDataValue()
sumvals = data[np.where(np.logical_not(data == nandat))]
CompletelyUniqueName = sumvals.sum()
print 'sum = ',CompletelyUniqueName
inDs = None
return CompletelyUniqueName

Categories