I am trying to search my database to see if a date range I am about to add overlaps with a date range that already exists in the database.
Using this question: Determine Whether Two Date Ranges Overlap
I came up with firstDay <= :end and lastDay >= :start for my FilterExpression.
def create(self, start=None, days=30):
# Create the start/end times
if start is None:
start = datetime.utcnow()
elif isinstance(start, datetime) is False:
raise ValueError('Start time must either be "None" or a "datetime"')
end = start + timedelta(days=days)
# Format the start and end string "YYYYMMDD"
start = str(start.year) + str('%02d' % start.month) + str('%02d' % start.day)
end = str(end.year) + str('%02d' % end.month) + str('%02d' % end.day)
# Search the database for overlap
days = self.connection.select(
filter='firstDay <= :end and lastDay >= :start',
attributes={
':start': {'N': start},
':end': {'N': end}
}
)
# if we get one or more days then there is overlap
if len(days) > 0:
raise ValueError('There looks to be a time overlap')
# Add the item to the database
self.connection.insert({
"firstDay": {"N": start},
"lastDay": {"N": end}
})
I am then calling the function like this:
seasons = dynamodb.Seasons()
seasons.create(start=datetime.utcnow() + timedelta(days=50))
As requested, the method looks like this:
def select(self, conditions='', filter='', attributes={}, names={}, limit=1, select='ALL_ATTRIBUTES'):
"""
Select one or more items from dynamodb
"""
# Create the condition, it should contain the datatype hash
conditions = self.hashKey + ' = :hash and ' + conditions if len(conditions) > 0 else self.hashKey + ' = :hash'
attributes[':hash'] = {"S": self.hashValue}
limit = max(1, limit)
args = {
'TableName': self.table,
'Select': select,
'ScanIndexForward': True,
'Limit': limit,
'KeyConditionExpression': conditions,
'ExpressionAttributeValues': attributes
}
if len(names) > 0:
args['ExpressionAttributeNames'] = names
if len(filter) > 0:
args['FilterExpression'] = filter
return self.connection.query(**args)['Items']
When I run the above, it keeps inserting the above start and end date into the database because it isn't finding any overlap. Why is this happening?
The table structure looks like this (JavaScript):
{
TableName: 'test-table',
AttributeDefinitions: [{
AttributeName: 'dataType',
AttributeType: 'S'
}, {
AttributeName: 'created',
AttributeType: 'S'
}],
KeySchema: [{
AttributeName: 'dataType',
KeyType: 'HASH'
}, {
AttributeName: 'created',
KeyType: 'RANGE'
}],
ProvisionedThroughput: {
ReadCapacityUnits: 5,
WriteCapacityUnits: 5
},
}
Looks like you are setting LIMIT=1. You are probably using this to say 'just return the first match found'. In fact setting Limit to 1 means you will only evaluate the first item found in the Query (i.e. in the partition value range). You probably need to remove the limit, so that each item in the partition range is evaluated for an overlap.
Related
Working on a biotech research project where a robot is doing the dilution and I was having trouble configuring the code so that the pipette stops a specific column. Ideally we want the code to ask the user for which column the pipette to stop at.
More info on the API: https://protocols.opentrons.com/protocol/customizable_serial_dilution_ot2
def get_values(*names):
import json
_all_values = json.loads("""{"pipette_type":"p300_single_gen2","mount_side":"right","tip_type":"standard","trough_type":"nest_12_reservoir_15ml","plate_type":"nest_96_wellplate_200ul_flat","dilution_factor":3,"num_of_dilutions":10,"total_mixing_volume":150,"blank_on":true,"tip_use_strategy":"never","air_gap_volume":10}""")
return [_all_values[n] for n in names]
"""DETAILS."""
metadata = {
'protocolName': 'Customizable Serial Dilution',
'author': 'Opentrons <protocols#opentrons.com>',
'source': 'Protocol Library',
'apiLevel': '2.11'
}
def run(protocol_context):
"""PROTOCOL BODY."""
[pipette_type, mount_side, tip_type, trough_type, plate_type,
dilution_factor, num_of_dilutions, total_mixing_volume,
blank_on, tip_use_strategy, air_gap_volume] = get_values( # noqa: F821
'pipette_type', 'mount_side', 'tip_type', 'trough_type',
'plate_type', 'dilution_factor', 'num_of_dilutions',
'total_mixing_volume', 'blank_on',
'tip_use_strategy', 'air_gap_volume'
)
# check for bad setup here
if not 1 <= num_of_dilutions <= 11:
raise Exception('Enter a number of dilutions between 1 and 11')
if num_of_dilutions == 11 and blank_on == 1:
raise Exception(
'No room for blank with 11 dilutions')
pip_range = pipette_type.split('_')[0].lower()
tiprack_map = {
'p10': {
'standard': 'opentrons_96_tiprack_10ul',
'filter': 'opentrons_96_filtertiprack_20ul'
},
'p20': {
'standard': 'opentrons_96_tiprack_20ul',
'filter': 'opentrons_96_filtertiprack_20ul'
},
'p50': {
'standard': 'opentrons_96_tiprack_300ul',
'filter': 'opentrons_96_filtertiprack_200ul'
},
'p300': {
'standard': 'opentrons_96_tiprack_300ul',
'filter': 'opentrons_96_filtertiprack_200ul'
},
'p1000': {
'standard': 'opentrons_96_tiprack_1000ul',
'filter': 'opentrons_96_filtertiprack_1000ul'
}
}
# labware
trough = protocol_context.load_labware(
trough_type, '2')
plate = protocol_context.load_labware(
plate_type, '3')
tip_name = tiprack_map[pip_range][tip_type]
tipracks = [
protocol_context.load_labware(tip_name, slot)
for slot in ['1', '4']
]
print(mount_side)
# pipette
pipette = protocol_context.load_instrument(
pipette_type, mount_side, tipracks)
# reagents
diluent = trough.wells()[0]
transfer_volume = total_mixing_volume/dilution_factor
diluent_volume = total_mixing_volume - transfer_volume
if 'multi' in pipette_type:
dilution_destination_sets = [
[row] for row in plate.rows()[0][1:num_of_dilutions]]
dilution_source_sets = [
[row] for row in plate.rows()[0][:num_of_dilutions-1]]
blank_set = [plate.rows()[0][num_of_dilutions+1]]
else:
dilution_destination_sets = plate.columns()[1:num_of_dilutions]
dilution_source_sets = plate.columns()[:num_of_dilutions-1]
blank_set = plate.columns()[num_of_dilutions+1]
all_diluent_destinations = [
well for set in dilution_destination_sets for well in set]
pipette.pick_up_tip()
for dest in all_diluent_destinations:
# Distribute diluent across the plate to the the number of samples
# And add diluent to one column after the number of samples for a blank
pipette.transfer(
diluent_volume,
diluent,
dest,
air_gap=air_gap_volume,
new_tip='never')
pipette.drop_tip()
# Dilution of samples across the 96-well flat bottom plate
if tip_use_strategy == 'never':
pipette.pick_up_tip()
for source_set, dest_set in zip(dilution_source_sets,
dilution_destination_sets):
for s, d in zip(source_set, dest_set):
pipette.transfer(
transfer_volume,
s,
d,
air_gap=air_gap_volume,
mix_after=(5, total_mixing_volume/2),
new_tip=tip_use_strategy)
if tip_use_strategy == 'never':
pipette.drop_tip()
if blank_on:
pipette.pick_up_tip()
for blank_well in blank_set:
pipette.transfer(
diluent_volume,
diluent,
blank_well,
air_gap=air_gap_volume,
new_tip='never')
pipette.drop_tip()
Any help is very much appreciated. Thank you!
Currently the robot just goes through all the columns but we want to find a way to have it stop as a specific column.
I'm working on database that uses lot of data. One invoice could have 7482 different articles. Validating invoice cost so much time, it took 26 minutes to validate one with 7482 articles. I find the method that take time to finish, it is the "action_move_create" inside "odoo\addons\account\models\account_invoice.py".
#api.multi
def action_move_create(self):
""" Creates invoice related analytics and financial move lines """
account_move = self.env['account.move']
for inv in self:
if not inv.journal_id.sequence_id:
raise UserError(_('Please define sequence on the journal related to this invoice.'))
if not inv.invoice_line_ids.filtered(lambda line: line.account_id):
raise UserError(_('Please add at least one invoice line.'))
if inv.move_id:
continue
if not inv.date_invoice:
inv.write({'date_invoice': fields.Date.context_today(self)})
if not inv.date_due:
inv.write({'date_due': inv.date_invoice})
company_currency = inv.company_id.currency_id
# create move lines (one per invoice line + eventual taxes and analytic lines)
iml = inv.invoice_line_move_line_get()
iml += inv.tax_line_move_line_get()
diff_currency = inv.currency_id != company_currency
# create one move line for the total and possibly adjust the other lines amount
total, total_currency, iml = inv.compute_invoice_totals(company_currency, iml)
name = inv.name or ''
if inv.payment_term_id:
totlines = inv.payment_term_id.with_context(currency_id=company_currency.id).compute(total, inv.date_invoice)[0]
res_amount_currency = total_currency
for i, t in enumerate(totlines):
if inv.currency_id != company_currency:
amount_currency = company_currency._convert(t[1], inv.currency_id, inv.company_id, inv._get_currency_rate_date() or fields.Date.today())
else:
amount_currency = False
# last line: add the diff
res_amount_currency -= amount_currency or 0
if i + 1 == len(totlines):
amount_currency += res_amount_currency
iml.append({
'type': 'dest',
'name': name,
'price': t[1],
'account_id': inv.account_id.id,
'date_maturity': t[0],
'amount_currency': diff_currency and amount_currency,
'currency_id': diff_currency and inv.currency_id.id,
'invoice_id': inv.id
})
else:
iml.append({
'type': 'dest',
'name': name,
'price': total,
'account_id': inv.account_id.id,
'date_maturity': inv.date_due,
'amount_currency': diff_currency and total_currency,
'currency_id': diff_currency and inv.currency_id.id,
'invoice_id': inv.id
})
part = self.env['res.partner']._find_accounting_partner(inv.partner_id)
line = [(0, 0, self.line_get_convert(l, part.id)) for l in iml]
line = inv.group_lines(iml, line)
line = inv.finalize_invoice_move_lines(line)
date = inv.date or inv.date_invoice
move_vals = {
'ref': inv.reference,
'line_ids': line,
'journal_id': inv.journal_id.id,
'date': date,
'narration': inv.comment,
}
move = account_move.create(move_vals)
# Pass invoice in method post: used if you want to get the same
# account move reference when creating the same invoice after a cancelled one:
move.post(invoice = inv)
# make the invoice point to that move
vals = {
'move_id': move.id,
'date': date,
'move_name': move.name,
}
inv.write(vals)
return True
Could you suggest some solutions?
We suppose that the hardware is efficient to run odoo correctly.
I optimize it by using raw sql query. I made these codes in account.invoice model:
The first one is the definition of _mock_create_move_line (called in action_move_create).
def _mock_create_move_line(self, model, values, move):
bad_names = ["analytic_line_ids", "tax_ids", "analytic_tag_ids"]
other_fields = [
"currency_id", "debit", "credit", "balance",
"debit_cash_basis", "credit_cash_basis", "balance_cash_basis",
"company_currency_id", "amount_residual",
"amount_residual_currency", "tax_base_amount", "reconciled",
"company_id", "counterpart"
]
cr = self.env.cr
quote = '"{}"'.format
columns = []
columns1 = []
for i, v in enumerate(values):
v = model._add_missing_default_values(v)
account_id = self.env['account.account'].browse(v['account_id'])
# compulsory columns and some stored related columns
# related fields are not triggered, krrrrr
v.update({
'move_id': move.id,
'date_maturity': move.date,
'company_id': account_id.company_id.id,
'date': move.date,
'journal_id': move.journal_id.id,
'user_type_id': account_id.user_type_id.id,
'create_uid': self.env.uid,
'create_date': fields.Datetime.now()
})
######
temp_column = []
for name, val in sorted(v.items()):
if name in bad_names:
continue
field = model._fields[name]
if field.column_type:
col_val = field.convert_to_column(val, model, v)
temp_column.append(col_val)
if not i:
columns1.append((name, field.column_format, col_val))
columns.append(tuple(temp_column))
model.check_access_rule('create')
try:
query = "INSERT INTO {} ({}) VALUES {} RETURNING id".format(
quote(model._table),
", ".join(quote(name) for name, fmt, val in columns1),
", ".join('%s' for fmt in columns),
)
cr.execute(query, columns)
ids = cr.fetchall()
# clear the model cache to take account of the new insertion
# if not executed, relationnal field will not be updated
model.invalidate_cache()
account_move_line_ids = model.browse(ids)
account_move_line_ids.modified(other_fields)
account_move_line_ids.recompute()
# update parent_path
account_move_line_ids._parent_store_create()
except Exception as e:
_logger.info(e)
cr.rollback()
return
The second one is the overriding of native method action_move_create. I make some modification, call _mock_create_move_line if there is 'raw_sql' in the context.
#api.multi
def action_move_create(self):
""" Creates invoice related analytics and financial move lines """
# TODO : make choice between ORM or raw sql according to the context
account_move = self.env['account.move']
for inv in self:
if not inv.journal_id.sequence_id:
raise UserError(_('Please define sequence on the journal related to this invoice.'))
if not inv.invoice_line_ids.filtered(lambda line: line.account_id):
raise UserError(_('Please add at least one invoice line.'))
if inv.move_id:
continue
if not inv.date_invoice:
inv.write({'date_invoice': fields.Date.context_today(self)})
if not inv.date_due:
inv.write({'date_due': inv.date_invoice})
company_currency = inv.company_id.currency_id
# create move lines (one per invoice line + eventual taxes and analytic lines)
iml = inv.invoice_line_move_line_get()
iml += inv.tax_line_move_line_get()
diff_currency = inv.currency_id != company_currency
# create one move line for the total and possibly adjust the other lines amount
total, total_currency, iml = inv.compute_invoice_totals(company_currency, iml)
name = inv.name or ''
if inv.payment_term_id:
totlines = \
inv.payment_term_id.with_context(currency_id=company_currency.id).compute(total, inv.date_invoice)[0]
res_amount_currency = total_currency
for i, t in enumerate(totlines):
if inv.currency_id != company_currency:
amount_currency = company_currency._convert(t[1], inv.currency_id, inv.company_id,
inv._get_currency_rate_date() or fields.Date.today())
else:
amount_currency = False
# last line: add the diff
res_amount_currency -= amount_currency or 0
if i + 1 == len(totlines):
amount_currency += res_amount_currency
iml.append({
'type': 'dest',
'name': name,
'price': t[1],
'account_id': inv.account_id.id,
'date_maturity': t[0],
'amount_currency': diff_currency and amount_currency,
'currency_id': diff_currency and inv.currency_id.id,
'invoice_id': inv.id
})
else:
iml.append({
'type': 'dest',
'name': name,
'price': total,
'account_id': inv.account_id.id,
'date_maturity': inv.date_due,
'amount_currency': diff_currency and total_currency,
'currency_id': diff_currency and inv.currency_id.id,
'invoice_id': inv.id
})
part = self.env['res.partner']._find_accounting_partner(inv.partner_id)
line = [(0, 0, self.line_get_convert(l, part.id)) for l in iml]
line = inv.group_lines(iml, line)
line = inv.finalize_invoice_move_lines(line)
date = inv.date or inv.date_invoice
if self.env.context.get('raw_sql', None):
move_vals = {
'ref': inv.reference,
'journal_id': inv.journal_id.id,
'date': date,
'narration': inv.comment,
}
# remove (0, 0, ...)
# override the group_lines method to avoid looping on next instruction
new_lines = [nl[2] for nl in line]
# TODO do not call compute here, add with ...norecompute()
move = account_move.create(move_vals)
move.env.cr.commit()
self._mock_create_move_line(self.env['account.move.line'], new_lines, move)
# Pass invoice in method post: used if you want to get the same
# account move reference when creating the same invoice after a cancelled one:
# compute move, it is not triggered automatically bc raw sql insertion
# is it correct to call it like this ? find better way
move._amount_compute()
move._compute_partner_id()
move._compute_matched_percentage()
else:
# make default behavior
move_vals = {
'ref': inv.reference,
'line_ids': line,
'journal_id': inv.journal_id.id,
'date': date,
'narration': inv.comment,
}
move = account_move.create(move_vals)
move.post(invoice=inv)
# make the invoice point to that move
vals = {
'move_id': move.id,
'date': date,
'move_name': move.name,
}
inv.write(vals)
return True
Now, the execution time is less than 1 minutes for about 7000 records to insert inside invoice.move.line
this my code, i'm looking, is other way to code this in most efficient way?
i have multiple variables and inserted to the dictionary.
please feel to suggest and other options like array and etc will do.
def momentEndSpan(span_type,max_combo,length):
if "simply supported" == span_type:
q = max_combo
force = {}
RA = {"PA" : q*length/2}
RB = {"PB" : q*length/2}
RA_moment = {"MA" : 0}
R_mid_moment = {"Mmid": (q*math.pow(length,2))/8 }
RB_moment = { "MB" : 0}
force.update(RA)
force.update(RB)
force.update(RA_moment)
force.update(R_mid_moment)
force.update(RB_moment)
return force
elif "one end continuous" == span_type:
q = max_combo
x = (3/8)*length
force = {}
RA = {"Phinge" : 3*q*length/8}
RB = {"Pfixed" : 5*q*length/8}
RA_moment = {"Mhinge" : 0}
R_mid_moment = {"Mmid": (q*math.pow(length,2))*(9/128) }
RB_moment = { "MB" : -1*(q*math.pow(length,2))/8 }
force.update(RA)
force.update(RB)
force.update(RA_moment)
force.update(R_mid_moment)
force.update(RB_moment)
return force
Thank you very much
The "More Pythonic" way is to create one dictionary and update once.
q = max_combo
force = {}
if "simply supported" == span_type:
new = {"PA" : q*length/2,
"PB" : q*length/2,
"MA" : 0, "Mmid": (q*math.pow(length,2))/8,
"MB" : 0}
elif "one end continuous" == span_type:
x = (3/8)*length
new = {"Phinge" : 3*q*length/8,
"Pfixed" : 5*q*length/8,
"Mhinge" : 0,
"Mmid": (q*math.pow(length,2))*(9/128),
"MB" : -1*(q*math.pow(length,2))/8 }
force.update(new)
Also, note that if the force dictionary doesn't contain any previously defined items you can simply return the new and/or just continue to update the new in your next operations if there are any. Or just use name force instead of new.
q = max_combo
if "simply supported" == span_type:
force = {...}
elif "one end continuous" == span_type:
x = (3/8)*length
force = {...}
Im trying to use the eventful api to get information about only music events (concerts) between two dates. For example I want to get the below information about each concert from 20171012 to 20171013:
- city
- performer
- country
- latitude
- longitude
- genre
- title
- image
- StarTime
Im using a python example available online and change it to get the data above. But for now its not working Im just able to get this information:
{'latitude': '40.4',
'longitude': '-3.68333',
'start_time': '2017-10-12 20:00:00',
'city_name': 'Madrid', 'title': 'Kim Waters & Maysa Smooth en Hot Jazz Festival'}
But the performer, genre country and image url its not working. Do you know how to get that information? When I change the python example below to get this information it returns always a empty array.
python example working: (However, without getting the performer, genre, country and image url, if I add theese elements to the event_features I get an empty array)
import requests
import datetime
def get_event(user_key, event_location , start_date, end_date, event_features, fname):
data_lst = [] # output
start_year = int(start_date[0:4])
start_month = int(start_date[4:6])
start_day = int(start_date[6:])
end_year = int(end_date[0:4])
end_month = int(end_date[4:6])
end_day = int(end_date[6:])
start_date = datetime.date(start_year, start_month, start_day)
end_date = datetime.date(end_year, end_month, end_day)
step = datetime.timedelta(days=1)
while start_date <= end_date:
date = str(start_date.year)
if start_date.month < 10:
date += '0' + str(start_date.month)
else:
date += str(start_date.month)
if start_date.day < 10:
date += '0' + str(start_date.day)
else:
date += str(start_date.day)
date += "00"
date += "-" + date
url = "http://api.eventful.com/json/events/search?"
url += "&app_key=" + user_key
url += "&location=" + event_location
url += "&date=" + date
url += "&page_size=250"
url += "&sort_order=popularity"
url += "&sort_direction=descending"
url += "&q=music"
url+= "&c=music"
data = requests.get(url).json()
try:
for i in range(len(data["events"]["event"])):
data_dict = {}
for feature in event_features:
data_dict[feature] = data["events"]["event"][i][feature]
data_lst.append(data_dict)
except:
pass
print(data_lst)
start_date += step
def main():
user_key = ""
event_location = "Madrid"
start_date = "20171012"
end_date = "20171013"
event_location = event_location.replace("-", " ")
start_date = start_date
end_date = end_date
event_features = ["latitude", "longitude", "start_time"]
event_features += ["city_name", "title"]
event_fname = "events.csv"
get_event(user_key, event_location, start_date, end_date, event_features, event_fname)
if __name__ == '__main__':
main()
You should debug your problem and not to ignore all exceptions.
Replace lines try: ... except: pass by:
data = requests.get(url).json()
if "event" in data.get("event", {}):
for row in data["events"]["event"]:
# print(row) # you can look here what are the available data, while debugging
data_dict = {feature: row[feature] for feature in features}
data_lst.append(data_dict)
else:
pass # a problem - you can do something here
You will see a KeyError with a name of the missing feature that is not present in "row". You should fix missing features and read documentation about API of that service. Country feature is probably "country_name" similarly to "city_name". Maybe you should set the "include" parameter to specify more sections of details in search than defaults only.
An universal try: ... except: pass should never used, because "Errors should never pass silently." (The Zen of Python)
Read Handling Exceptions:
... The last except clause may omit the exception name(s), to serve as a wildcard. Use this with extreme caution, since it is easy to mask a real programming error in this way! ...
A more important command where unexpected exceptions are possible is requests.get(url).json(), e.g. TimeoutException. Anyway you should not continue the "while" loop if there is a problem.
If you look at the data returned by eventful.com, a few things are clear:
For country, the field to be used is country_name. This was missing from your "event_features" list
There can be multiple performers for each event. To get all the performers, you need to add "performers" to your "event_features" list
There is no field named Genre and hence you cannot find Genre
The "image" field is always None. This means there is no image available.
Here is modified code. Hopefully it works much better and it will help you move forward.
import datetime
import requests
data_lst = [] # output
event_features = ["latitude", "longitude", "start_time", "city_name",
"country_name", "title", "image", "performers"]
def get_event(user_key, event_location, start_date, end_date):
start_year = int(start_date[0:4])
start_month = int(start_date[4:6])
start_day = int(start_date[6:])
end_year = int(end_date[0:4])
end_month = int(end_date[4:6])
end_day = int(end_date[6:])
start_date = datetime.date(start_year, start_month, start_day)
end_date = datetime.date(end_year, end_month, end_day)
step = datetime.timedelta(days=1)
while start_date <= end_date:
date = str(start_date.year)
if start_date.month < 10:
date += '0' + str(start_date.month)
else:
date += str(start_date.month)
if start_date.day < 10:
date += '0' + str(start_date.day)
else:
date += str(start_date.day)
date += "00"
date += "-" + date
url = "http://api.eventful.com/json/events/search?"
url += "&app_key=" + user_key
url += "&location=" + event_location
url += "&date=" + date
url += "&page_size=250"
url += "&sort_order=popularity"
url += "&sort_direction=descending"
url += "&q=music"
url += "&c=music"
data = requests.get(url).json()
print "==== Data Returned by eventful.com ====\n", data
try:
for i in range(len(data["events"]["event"])):
data_dict = {}
for feature in event_features:
data_dict[feature] = data["events"]["event"][i][feature]
data_lst.append(data_dict)
except IndexError:
pass
print "===================================="
print data_lst
start_date += step
def main():
user_key = "Enter Your Key Here"
event_location = "Madrid"
start_date = "20171012"
end_date = "20171013"
event_location = event_location.replace("-", " ")
start_date = start_date
end_date = end_date
#event_fname = "events.csv"
get_event(user_key, event_location, start_date, end_date)
if __name__ == '__main__':
main()
I was able to successfully pull data from the Eventful API for the performer, image, and country fields. However, I don't think the Eventful Search API supports genre - I don't see it in their documentation.
To get country, I added "country_name", "country_abbr" to your event_features array. That adds these values to the resulting JSON:
'country_abbr': u'ESP',
'country_name': u'Spain'
Performer also can be retrieved by adding "performers" to event_features. That will add this to the JSON output:
'performers': {
u'performer': {
u'name': u'Kim Waters',
u'creator': u'evdb',
u'url': u'http://concerts.eventful.com/Kim-Waters?utm_source=apis&utm_medium=apim&utm_campaign=apic',
u'linker': u'evdb',
u'short_bio': u'Easy Listening / Electronic / Jazz', u'id': u'P0-001-000333271-4'
}
}
To retrieve images, add image to the event_features array. Note that not all events have images, however. You will either see 'image': None or
'image': {
u'medium': {
u'url': u'http://d1marr3m5x4iac.cloudfront.net/store/skin/no_image/categories/128x128/other.jpg',
u'width': u'128',
u'height': u'128'
},
u'thumb': {
u'url': u'http://d1marr3m5x4iac.cloudfront.net/store/skin/no_image/categories/48x48/other.jpg',
u'width': u'48',
u'height': u'48'
}
}
Good luck! :)
I have files with incorrect JSON that I want to start fixing by getting it into properly grouped chunks.
The brace grouping {{ {} {} } } {{}} {{{}}} should already be correct
How can I grab all the top-level braces, correctly grouped, as separate strings?
If you don't want to install any extra modules simple function will do:
def top_level(s):
depth = 0
start = -1
for i, c in enumerate(s):
if c == '{':
if depth == 0:
start = i
depth += 1
elif c == '}' and depth:
depth -= 1
if depth == 0:
yield s[start:i+1]
print(list(top_level('{{ {} {} } } {{}} {{{}}}')))
Output:
['{{ {} {} } }', '{{}}', '{{{}}}']
It will skip invalid braces but could be easily modified to report an error when they are spotted.
Using the regex module:
In [1]: import regex
In [2]: braces = regex.compile(r"\{(?:[^{}]++|(?R))*\}")
In [3]: braces.findall("{{ {} {} } } {{}} {{{}}}")
Out[3]: ['{{ {} {} } }', '{{}}', '{{{}}}']
pyparsing can be really helpful here. It will handle pathological cases where you have braces inside strings, etc. It might be a little tricky to do all of this work yourself, but fortunately, somebody (the author of the library) has already done the hard stuff for us.... I'll reproduce the code here to prevent link-rot:
# jsonParser.py
#
# Implementation of a simple JSON parser, returning a hierarchical
# ParseResults object support both list- and dict-style data access.
#
# Copyright 2006, by Paul McGuire
#
# Updated 8 Jan 2007 - fixed dict grouping bug, and made elements and
# members optional in array and object collections
#
json_bnf = """
object
{ members }
{}
members
string : value
members , string : value
array
[ elements ]
[]
elements
value
elements , value
value
string
number
object
array
true
false
null
"""
from pyparsing import *
TRUE = Keyword("true").setParseAction( replaceWith(True) )
FALSE = Keyword("false").setParseAction( replaceWith(False) )
NULL = Keyword("null").setParseAction( replaceWith(None) )
jsonString = dblQuotedString.setParseAction( removeQuotes )
jsonNumber = Combine( Optional('-') + ( '0' | Word('123456789',nums) ) +
Optional( '.' + Word(nums) ) +
Optional( Word('eE',exact=1) + Word(nums+'+-',nums) ) )
jsonObject = Forward()
jsonValue = Forward()
jsonElements = delimitedList( jsonValue )
jsonArray = Group(Suppress('[') + Optional(jsonElements) + Suppress(']') )
jsonValue << ( jsonString | jsonNumber | Group(jsonObject) | jsonArray | TRUE | FALSE | NULL )
memberDef = Group( jsonString + Suppress(':') + jsonValue )
jsonMembers = delimitedList( memberDef )
jsonObject << Dict( Suppress('{') + Optional(jsonMembers) + Suppress('}') )
jsonComment = cppStyleComment
jsonObject.ignore( jsonComment )
def convertNumbers(s,l,toks):
n = toks[0]
try:
return int(n)
except ValueError, ve:
return float(n)
jsonNumber.setParseAction( convertNumbers )
Phew! That's a lot ... Now how do we use it? The general strategy here will be to scan the string for matches and then slice those matches out of the original string. Each scan result is a tuple of the form (lex-tokens, start_index, stop_index). For our use, we don't care about the lex-tokens, just the start and stop. We could do: string[result[1], result[2]] and it would work. We can also do string[slice(*result[1:])] -- Take your pick.
results = jsonObject.scanString(testdata)
for result in results:
print '*' * 80
print testdata[slice(*result[1:])]