I am reading timestamp strings of format '2014-11-09T01:00Z' from mongo and storing in a python dict. But while retrieving same from the dict I see some of the strings got automatically converted to proto timestamp like seconds: 1511047800.
I have no clue how could this happen. Any insights appreciated.
class SomeInfo:
__school_id_info = defaultdict(
lambda: {'banned': -1,
'school_boy': False,
'teacher': False})
def __init__(self):
mongo_connection_str = os.environ['MONGO_CONNECTION_STRING']
mongo_db_name = os.environ.get('MONGO_DB_NAME', 'coll')
mongo_dao = MongoDao(mongo_connection_str, mongo_db_name)
mongo_collection_name = 'school'
school_records = mongo_dao.find_records(mongo_collection_name)
for school_details in school_records:
self.__school_id_info['date'] = school_details['date']
#If I print the __school_id_info here I see date as '2014-11-09T01:00Z' string format which is same as how it is in Mongo.
self.__dict__ = self.__school_id_info
def get_info_for_both_students(self) -> dict:
#while returning from here I see some as seconds: 1235192400 protobuff timestamp format and some still as it is
# .I am not doing any conversion to protobuf timestamp
return self.__school_id_info
Related
What i get from api:
"name":"reports"
"col_type":"array<struct<imageUrl:string,reportedBy:string>>"
So in hive schema I got:
reports array<struct<imageUrl:string,reportedBy:string>>
Note: I got hive array schema as string from api
My target:
bigquery.SchemaField("reports", "RECORD", mode="NULLABLE",
fields=(
bigquery.SchemaField('imageUrl', 'STRING'),
bigquery.SchemaField('reportedBy', 'STRING')
)
)
Note: I would like to create universal code that can handle when i receive any number of struct inside of the array.
Any tips are welcome.
I tried creating a script that parses your input which is reports array<struct<imageUrl:string,reportedBy:string>>. This converts your input to a dictionary that could be used as schema when creating a table. The main idea of the apporach is instead of using SchemaField(), you can create a dictionary which is much easier than creating SchemaField() objects with parameters using your example input.
NOTE: The script is only tested based on your input and it can parse more fields if added in struct<.
import re
from google.cloud import bigquery
def is_even(number):
if (number % 2) == 0:
return True
else:
return False
def clean_string(str_value):
return re.sub(r'[\W_]+', '', str_value)
def convert_to_bqdict(api_string):
"""
This only works for a struct with multiple fields
This could give you an idea on constructing a schema dict for BigQuery
"""
num_even = True
main_dict = {}
struct_dict = {}
field_arr = []
schema_arr = []
# Hard coded this since not sure what the string will look like if there are more inputs
init_struct = sample.split(' ')
main_dict["name"] = init_struct[0]
main_dict["type"] = "RECORD"
main_dict["mode"] = "NULLABLE"
cont_struct = init_struct[1].split('<')
num_elem = len(cont_struct)
# parse fields inside of struct<
for i in range(0,num_elem):
num_even = is_even(i)
# fields are seen on even indices
if num_even and i != 0:
temp = list(filter(None,cont_struct[i].split(','))) # remove blank elements
for elem in temp:
fields = list(filter(None,elem.split(':')))
struct_dict["name"] = clean_string(fields[0])
# "type" works for STRING as of the moment refer to
# https://cloud.google.com/bigquery/docs/schemas#standard_sql_data_types
# for the accepted data types
struct_dict["type"] = clean_string(fields[1]).upper()
struct_dict["mode"] = "NULLABLE"
field_arr.append(struct_dict)
struct_dict = {}
main_dict["fields"] = field_arr # assign dict to array of fields
schema_arr.append(main_dict)
return schema_arr
sample = "reports array<struct<imageUrl:string,reportedBy:string,newfield:bool>>"
bq_dict = convert_to_bqdict(sample)
client = bigquery.Client()
project = client.project
dataset_ref = bigquery.DatasetReference(project, '20211228')
table_ref = dataset_ref.table("20220203")
table = bigquery.Table(table_ref, schema=bq_dict)
table = client.create_table(table)
Output:
When I try to return documents based on the date created, I get an empty list when I know for a fact that there are documents in the database that meet the criteria. I used postman to send the request which would be a string input from the user eg. "Tue Apr 28 2020". This string input would then be converted to a datetime object like so:
def get(self):
try:
body = request.get_json()
search_field = datetime.datetime.strptime(body, '%a %b %d %Y') #format string to datetime object
next_day = search_field
next_day += relativedelta(days=1) #Set the end of the range to the next day
search_field = search_field.replace(tzinfo=datetime.timezone.utc).isoformat()
next_day = next_day.replace(tzinfo=datetime.timezone.utc).isoformat()
print(search_field) #Verify the fields are correct : 2020-04-28T00:00:00+00:00
print(next_day) #2020-04-29T00:00:00+00:00
date_search = Reports.objects.filter(__raw__={'creation_timestamp' : {'$gte' : search_field, '$lte' : next_day}}).to_json() #This is where the documents should be filtered for return
print(date_search)
return Response(date_search, mimetype="application/json", status=200) #The document/s should be returned here as a JSON array.
except Exception as e:
print(e)
return make_response(jsonify(message='Something went wrong :('), 401)
Here is the partial database model:
class Reports(db.Document):
creation_timestamp = db.DateTimeField(default=datetime.utcnow, required=True)
When the document is created, it is stored in the database and the time is stored as isoformat(). The user can only input the search field in the format stated above with a date picker so I format the date to fit the format Mongodb would understand.
Using the above code, I get an empty list and the 200 status code. Checking the database shows I have documents that would fit the criteria, can anyone help figure out what is wrong? Thanks.
If you can have your search_field and nextday in datetime format then you can write the query. I also suggest using Q for pymongo queries in mongoengine.
Your query :
import Q from mongoengine
search_time=datetime.datetime(2017, 11, 8)
nextday=datetime.datetime(2017, 11, 9)
date_search=Report.objects(Q(creation_timestamp__gte=search_field) & Q(timestamp__lte=nextday)).to_json()
The following code is giving me:
Runtime.MarshalError: Unable to marshal response: {'Yes'} is not JSON serializable
from calendar import monthrange
def time_remaining_less_than_fourteen(year, month, day):
a_year = int(input['year'])
b_month = int(input['month'])
c_day = int(input['day'])
days_in_month = monthrange(int(a_year), int(b_month))[1]
time_remaining = ""
if (days_in_month - c_day) < 14:
time_remaining = "No"
return time_remaining
else:
time_remaining = "Yes"
return time_remaining
output = {time_remaining_less_than_fourteen((input['year']), (input['month']), (input['day']))}
#print(output)
When I remove {...} it then throws: 'unicode' object has no attribute 'copy'
I encountered this issue when working with lambda transformation blueprint kinesis-firehose-process-record-python for Kinesis Firehose which led me here. Thus I will post a solution to anyone who also finds this questions when having issues with the lambda.
The blueprint is:
from __future__ import print_function
import base64
print('Loading function')
def lambda_handler(event, context):
output = []
for record in event['records']:
print(record['recordId'])
payload = base64.b64decode(record['data'])
# Do custom processing on the payload here
output_record = {
'recordId': record['recordId'],
'result': 'Ok',
'data': base64.b64encode(payload)
}
output.append(output_record)
print('Successfully processed {} records.'.format(len(event['records'])))
return {'records': output}
The thing to note is that the Firehose lambda blueprints for python provided by AWS are for Python 2.7, and they don't work with Python 3. The reason is that in Python 3, strings and byte arrays are different.
The key change to make it work with lambda powered by Python 3.x runtime was:
changing
'data': base64.b64encode(payload)
into
'data': base64.b64encode(payload).decode("utf-8")
Otherwise, the lambda had an error due to inability to serialize JSON with byte array returned from base64.b64encode.
David here, from the Zapier Platform team.
Per the docs:
output: A dictionary or list of dictionaries that will be the "return value" of this code. You can explicitly return early if you like. This must be JSON serializable!
In your case, output is a set:
>>> output = {'Yes'}
>>> type(output)
<class 'set'>
>>> json.dumps(output)
Object of type set is not JSON serializable
To be serializable, you need a dict (which has keys and values). Change your last line to include a key and it'll work like you expect:
# \ here /
output = {'result': time_remaining_less_than_fourteen((input['year']), (input['month']), (input['day']))}
In python, I receive JSON data. The data looks like the following stub:
{
"id": 1,
"naam": "4.13",
"ruimte_temperatuur_sensor": {...},
// etc
}
I map this json to an object (note the sensor is mapped already):
ruimte = Ruimte(id=id,
naam=naam,
ruimte_temperatuur_sensor=temperatuur_sensor,
ruimte_humiditeit_sensor=humiditeit_sensor,
ruimte_beweging_sensor=beweging_sensor,
airco_temperatuur_sensor=airco_sensor,
radiator_temperatuur_sensor=radiator_sensor)
The strangest thing happens:
The id field in JSON is an integer, but Python maps it to a tuple. In my debugger, you can clearly see that the id=id maps to an integer, but then all of a sudden my object contains a tuple:
The object's constructor should not cause that:
class Ruimte:
def __init__(self,
id: int,
naam: str,
ruimte_temperatuur_sensor: Sensor,
ruimte_humiditeit_sensor: Sensor,
ruimte_beweging_sensor: Sensor,
airco_temperatuur_sensor: Sensor,
radiator_temperatuur_sensor: Sensor):
self.id = id,
self.naam = naam,
self.ruimte_temperatuur_sensor = ruimte_temperatuur_sensor
self.ruimte_humiditeit_sensor = ruimte_humiditeit_sensor
self.ruimte_beweging_sensor = ruimte_beweging_sensor
self.airco_temperatuur_sensor = airco_temperatuur_sensor
self.radiator_temperatuur_sensor = radiator_temperatuur_sensor
In the sub-objects the id is not parsed to a tuple, for exampe ruimte.airco_temperatuur_sensor.id is an integer:
but that JSON is parsed the same way:
def _parse_json_to_sensor(self, json: dict) -> Sensor:
id = json["id"]
type = SensorType(json["type"])
meet_interval_sec = json["sensorInstelling"]["meetIntervalSec"]
opslaan_interval_sec = json["sensorInstelling"]["opslaanIntervalSec"]
sensor = Sensor(id=id,
type=type,
meet_interval_sec=meet_interval_sec,
opslaan_interval_sec=opslaan_interval_sec)
I'm totally lost on this. What could cause this?
You have commas after the lines where you assign self.id and self.naam. Remove them.
a_string = 'string',
type(a_string)
>>> tuple
The comma in the line:
self.id = id,
leads to the creation of a tuple. See this example:
a = 1
b = 1
c = 1,
print(b)
print(c)
So I'm trying to compare a dict that I have created to a dict response returned by a boto3 call.
The response is a representation of a JSON document and I want to check they are the same.
Boto3 always returned the strings as unicode. Here's the response:
{u'Version': u'2012-10-17', u'Statement': [{u'Action': u'sts:AssumeRole', u'Principal': {u'Service': u'ec2.amazonaws.com'}, u'Effect': u'Allow', u'Sid': u''}]}
I initially created my dict like this:
default_documment = {}
default_documment['Version'] = '2012-10-17'
default_documment['Statement'] = [{}]
default_documment['Statement'][0]['Sid'] = ''
default_documment['Statement'][0]['Effect'] = 'Allow'
default_documment['Statement'][0]['Principal'] = {}
default_documment['Statement'][0]['Principal']['Service'] = 'ec2.amazonaws.com'
default_documment['Statement'][0]['Action'] = 'sts:AssumeRole'
However, when i compare these two dicts with == they are not equal.
So then I tried adding u to all the strings when I create the dict:
# Default document for a new role
default_documment = {}
default_documment[u'Version'] = u'2012-10-17'
default_documment[u'Statement'] = [{}]
default_documment[u'Statement'][0][u'Sid'] = u''
default_documment[u'Statement'][0][u'Effect'] = u'Allow'
default_documment[u'Statement'][0][u'Principal'] = {}
default_documment[u'Statement'][0][u'Principal'][u'Service'] = u'ec2.amazonaws.com'
default_documment[u'Statement'][0][u'Action'] = u'sts:AssumeRole'
This doesn't work either. The dicts are not equally and if i do a print of my dict it doesn't show u'somestring' it just shows 'somestring'.
How can I compare my dict to what boto3 has returned?
Your second attempt works correctly in Python 2.7 and 3.3. Below is just a cut-and-paste of your Boto3 response and your code (with document spelling corrected :)
D = {u'Version': u'2012-10-17', u'Statement': [{u'Action': u'sts:AssumeRole', u'Principal': {u'Service': u'ec2.amazonaws.com'}, u'Effect': u'Allow', u'Sid': u''}]}
default_document = {}
default_document[u'Version'] = u'2012-10-17'
default_document[u'Statement'] = [{}]
default_document[u'Statement'][0][u'Sid'] = u''
default_document[u'Statement'][0][u'Effect'] = u'Allow'
default_document[u'Statement'][0][u'Principal'] = {}
default_document[u'Statement'][0][u'Principal'][u'Service'] = u'ec2.amazonaws.com'
default_document[u'Statement'][0][u'Action'] = u'sts:AssumeRole'
print(D == default_document)
Output:
True