for loop only takes last value in python aws dynamodb - python

I am trying to insert records into a table, but only last record(result data) from the loop is inserting into the table
Here is the code i tried:
CDates = ['2020-05-10','2020-05-12','2020-05-13','2020-05-16','2020-05-20']
ResultData = {}
for date in CDates:
filterDate = Key('Date').eq(id)
appResponse = appTable.scan(FilterExpression = filterDate)
accResp = table.query(KeyConditionExpression = Key('PrimaryId').eq('Key'),FilterExpression = Key('Date').eq(date))
if len(accResp['Items']) == 0:
ResultData['PrimaryId'] = 'Key'
ResultData['CreatedDate'] = date
ResultData['Type'] = 'Appt'
ResultData['Id'] = str(uuid.uuid4())
print(ResultData)
table.put_item(Item=ResultData)
Not getting where did I go wrong

You assigned ResultData outside of the loop and changed the values for the same keys every time the loop ran. Try this:
CDates = ['2020-05-10', '2020-05-12', '2020-05-13', '2020-05-16', '2020-05-20']
for date in CDates:
filterDate = Key('Date').eq(id)
appResponse = appTable.scan(FilterExpression=filterDate)
accResp = table.query(
KeyConditionExpression=Key('PrimaryId').eq('Key'),
FilterExpression=Key('Date').eq(date))
if len(accResp['Items']) == 0:
ResultData = {
'PrimaryId': 'Key',
'CreationDate': date,
'Type': 'Appt',
'Id': str(uuid.uuid4())
}
print(ResultData)
table.put_item(Item=ResultData)

Related

How to append data to a pandas dataframe?

I have a fairly complex sequence of functions calling apis and having the result set be appended to a dataframe - the thing is when I print the dataframe during each loop of append, I see new values but at the end when the loop breaks, I only see what value for final_df ? Any thoughts as to why?
df = pd.DataFrame(columns = ['repo', 'number', 'title', 'branch', 'merged_at', 'created_at', 'authored_by', 'merged_by', 'from_version', 'to_version'] )
def get_prs(repo,pr_number):
response = requests.request("GET", pgv.github_pr_url + str(repo) + '/pulls/' + str(pr_number), headers=pgv.headers)
response = response.json()
return response
def get_commits(repo,from_version,to_version):
response = requests.request("GET", pgv.github_commits_url + str(repo) +'/compare/' + str(from_version) + '...' + str(to_version) , headers=pgv.headers)
response = response.json()
# print(len(response['commits']))
# print(response['commits'])
for i in range(0,len(response['commits'])):
# print(response['commits'][i])
# x = re.match(r"\AMerge pull request #(?P<number>\d+) from/(?P<branch>(.+)\s*$)", response['commits'][i].get('commit').get('message'))
x = re.search("\AMerge pull request #(?P<number>\d+) from/(?P<branch>.*)", response['commits'][i].get('commit').get('message'))
# print(x)
if x is None:
pass
else:
# return re.search("(\d+)",x.group(0)).group(0), response['commits'][i].get('branches_url')
return x.group('number'), x.group('branch')
# print(x.group('branch'))
#query GitHub to get all commits between from_version and to_version.
def return_deploy_events():
final_object = []
response = requests.request('POST',pgv.url, params = {'api_key' : pgv.key}, json = pgv.query_params)
response = response.json()
if "jobs" in response:
time.sleep(5)
else:
for i in range(0,len(response['query_result']['data']['rows'])):
# print(response['query_result']['data']['rows'])
# get_prs(response['query_result']['data']['rows'][i].get('REPO'),get_commits(response['query_result']['data']['rows'][i].get('REPO'),response['query_result']['data']['rows'][i].get('FROM_VERSION'), response['query_result']['data']['rows'][i].get('TO_VERSION'))).get('merged_at')
try:
repo = response['query_result']['data']['rows'][i].get('REPO')
from_version = response['query_result']['data']['rows'][i].get('FROM_VERSION')
to_version = response['query_result']['data']['rows'][i].get('TO_VERSION')
# print(get_prs(repo,get_commits(repo,from_version, to_version)))
pull_requests = get_prs(repo,get_commits(repo,from_version, to_version)[0])
##pack into all one return
final_df = df.append({
'repo':repo,
'title': pull_requests.get('title'),
'branch': get_commits(repo,from_version, to_version)[1],
'created_at': pull_requests.get('created_at'),
'merged_at': pull_requests.get('merged_at'),
'authored_by': pull_requests.get('user').get('login'),
'merged_by': pull_requests.get('merged_by').get('login'),
'number': get_commits(repo,from_version, to_version)[0],
'from_version': from_version,
'to_version': to_version,}, ignore_index = True)
# print(get_commits(repo,from_version, to_version))
**HERE, WHEN UNCOMMENTED, PRINTS ALL RECORDS I WANT APPENDED **
# print(final_df.head(10))
except Exception:
pass
# 'title':, 'branch',
# 'merged_at', 'created_at', 'authored_by', 'merged_by',
# 'from_version': response['query_result']['data']['rows'][i].get('FROM_VERSION'), 'to_version':response['query_result']['data']['rows'][i].get('TO_VERSION')},
# ignore_index = True)
**BELOW IS WHERE IT PRINTS ONLY 1 RECORD **
print(final_df)
# final_df = json.loads(final_df.to_json(orient = 'records'))
# gec.json_to_s3(final_df, glob_common_vars.s3_resource,glob_common_vars.s3_bucket_name, 'test/test.json.gzip')
return_deploy_events()
I think the problem is, you are assigning each rows to the same variable.
So the last row will be printed at the last. So try to append each rows to result list.
def return_deploy_events():
final_object = []
result = []
response = requests.request('POST',pgv.url, params = {'api_key' : pgv.key}, json = pgv.query_params)
response = response.json()
if "jobs" in response:
time.sleep(5)
else:
for i in range(0,len(response['query_result']['data']['rows'])):
# print(response['query_result']['data']['rows'])
# get_prs(response['query_result']['data']['rows'][i].get('REPO'),get_commits(response['query_result']['data']['rows'][i].get('REPO'),response['query_result']['data']['rows'][i].get('FROM_VERSION'), response['query_result']['data']['rows'][i].get('TO_VERSION'))).get('merged_at')
try:
repo = response['query_result']['data']['rows'][i].get('REPO')
from_version = response['query_result']['data']['rows'][i].get('FROM_VERSION')
to_version = response['query_result']['data']['rows'][i].get('TO_VERSION')
# print(get_prs(repo,get_commits(repo,from_version, to_version)))
pull_requests = get_prs(repo,get_commits(repo,from_version, to_version)[0])
##pack into all one return
final_df = df.append({
'repo':repo,
'title': pull_requests.get('title'),
'branch': get_commits(repo,from_version, to_version)[1],
'created_at': pull_requests.get('created_at'),
'merged_at': pull_requests.get('merged_at'),
'authored_by': pull_requests.get('user').get('login'),
'merged_by': pull_requests.get('merged_by').get('login'),
'number': get_commits(repo,from_version, to_version)[0],
'from_version': from_version,
'to_version': to_version,}, ignore_index = True)
# print(get_commits(repo,from_version, to_version))
**HERE, WHEN UNCOMMENTED, PRINTS ALL RECORDS I WANT APPENDED **
# print(final_df.head(10))
result.append(final_df) # append the current row to result
except Exception:
pass
**BELOW IS WHERE IT PRINTS ONLY 1 RECORD **
print(result) # print the final result
I just added two lines of code, but I hope it works.

Parsing logs to json Python

Folks,
I am trying to parse log file into json format.
I have a lot of logs, there is one of them
How can I parse this?
03:02:03.113 [info] ext_ref = BANK24AOS_cl_reqmarketcreditorderstate_6M8I1NT8JKYD_1591844522410384_4SGA08M8KIXQ reqid = 1253166 type = INREQ channel = BANK24AOS sid = msid_1591844511335516_KRRNBSLH2FS duration = 703.991 req_uri = marketcredit/order/state login = 77012221122 req_type = cl_req req_headers = {"accept-encoding":"gzip","connection":"close","host":"test-mobileapp-api.bank.kz","user-agent":"okhttp/4.4.1","x-forwarded-for":"212.154.169.134","x-real-ip":"212.154.169.134"} req_body = {"$sid":"msid_1591844511335516_KRRNBSLH2FS","$sid":"msid_1591844511335516_KRRNBSLH2FS","app":"bank","app_version":"2.3.2","channel":"aos","colvir_token":"GExPR0lOX1BBU1NXT1JEX0NMRUFSVEVYVFNzrzh4Thk1+MjDKWl/dDu1fQPsJ6gGLSanBp41yLRv","colvir_commercial_id":"-1","colvir_id":"000120.335980","openway_commercial_id":"6247520","openway_id":"6196360","$lang":"ru","ekb_id":"923243","inn":"990830221722","login":"77012221122","bank24_id":"262"} resp_body = {"task_id":"","status":"success","data":{"state":"init","applications":[{"status":"init","id":"123db561-34a3-4a8d-9fa7-03ed6377b44f","name":"Sulpak","amount":101000,"items":[{"name":"Switch CISCO x24","price":100000,"count":1,"amount":100000}]}],"segment":{"range":{"min":6,"max":36,"step":1},"payment_day":{"max":28,"min":1}}}}
Into this type of json, or any other format (but I guess json is best one)
{
"time":"03:02:03.113",
"class_req":"info",
"ext_ref":"BANK24AOS_cl_reqmarketcreditorderstate_6M8I1NT8JKYD_1591844522410384_4SGA08M8KIXQ",
"reqid":"1253166",
"type":"INREQ",
"channel":"BANK24AOS",
"sid":"msid_1591844511335516_KRRNBSLH2FS",
"duration":"703.991",
"req_uri":"marketcredit/order/state",
"login":"77012221122",
"req_type":"cl_req",
"req_headers":{
"accept-encoding":"gzip",
"connection":"close",
"host":"test-mobileapp-api.bank.kz",
"user-agent":"okhttp/4.4.1",
"x-forwarded-for":"212.154.169.134",
"x-real-ip":"212.154.169.134"
},
"req_body":{
"$sid":"msid_1591844511335516_KRRNBSLH2FS",
"$sid":"msid_1591844511335516_KRRNBSLH2FS",
"app":"bank",
"app_version":"2.3.2",
"channel":"aos",
"colvir_token":"GExPR0lOX1BBU1NXT1JEX0NMRUFSVEVYVFNzrzh4Thk1+MjDKWl/dDu1fQPsJ6gGLSanBp41yLRv",
"colvir_commercial_id":"-1",
"colvir_id":"000120.335980",
"openway_commercial_id":"6247520",
"openway_id":"6196360",
"$lang":"ru",
"ekb_id":"923243",
"inn":"990830221722",
"login":"77012221122",
"bank24_id":"262"
},
"resp_body":{
"task_id":"",
"status":"success",
"data":{
"state":"init",
"applications":[
{
"status":"init",
"id":"123db561-34a3-4a8d-9fa7-03ed6377b44f",
"name":"Sulpak",
"amount":101000,
"items":[
{
"name":"Switch CISCO x24",
"price":100000,
"count":1,
"amount":100000
}
]
}
],
"segment":{
"range":{
"min":6,
"max":36,
"step":1
},
"payment_day":{
"max":28,
"min":1
}
}
}
}
}
I am trying to split first whole text, but there I met another problem is to match keys to values depending on '=' sign. Also there might be some keys with empty values. For ex.:
type = INREQ channel = sid = duration = 1.333 (to get to know that there is an empty value, you need to pay attention on number of spaces. Usually there is 1 space between prev.value and next key). So this example should look like this:
{
"type":"INREQ",
"channel":"",
"sid":"",
"duration":"1.333"
}
Thanks ahead!
Here, one thing pass for duplicate key about "$sid":"msid_1591844511335516_KRRNBSLH2FS"
import re
text = """03:02:03.113 [info] ext_ref = reqid = 1253166 type = INREQ channel = BANK24AOS sid = msid_1591844511335516_KRRNBSLH2FS duration = 703.991 req_uri = marketcredit/order/state login = 77012221122 req_type = cl_req req_headers = {"accept-encoding":"gzip","connection":"close","host":"test-mobileapp-api.bank.kz","user-agent":"okhttp/4.4.1","x-forwarded-for":"212.154.169.134","x-real-ip":"212.154.169.134"} req_body = {"$sid":"msid_1591844511335516_KRRNBSLH2FS","$sid":"msid_1591844511335516_KRRNBSLH2FS","app":"bank","app_version":"2.3.2","channel":"aos","colvir_token":"GExPR0lOX1BBU1NXT1JEX0NMRUFSVEVYVFNzrzh4Thk1+MjDKWl/dDu1fQPsJ6gGLSanBp41yLRv","colvir_commercial_id":"-1","colvir_id":"000120.335980","openway_commercial_id":"6247520","openway_id":"6196360","$lang":"ru","ekb_id":"923243","inn":"990830221722","login":"77012221122","bank24_id":"262"} resp_body = {"task_id":"","status":"success","data":{"state":"init","applications":[{"status":"init","id":"123db561-34a3-4a8d-9fa7-03ed6377b44f","name":"Sulpak","amount":101000,"items":[{"name":"Switch CISCO x24","price":100000,"count":1,"amount":100000}]}],"segment":{"range":{"min":6,"max":36,"step":1},"payment_day":{"max":28,"min":1}}}}"""
index1 = text.index('[')
index2 = text.index(']')
new_text = 'time = '+ text[:index1-1] + ' class_req = ' + text[index1+1:index2] + text[index2+2:]
lst = re.findall(r'\S+? = |\S+? = \{.*?\} |\S+? = \{.*?\}$|\S+? = \S+? ', new_text)
res = {}
for item in lst:
key, equal, value = item.partition('=')
key, value = key.strip(), value.strip()
if value.startswith('{'):
try:
value = json.loads(value)
except:
print(value)
res[key] = value
you can try regulation in python.
here is what i write, it works for your problem.
for convenience i deleted string before "ext_ref...",you can directly truncate the raw string.
import re
import json
string = 'ext_ref = BANK24AOS_cl_reqmarketcreditorderstate_6M8I1NT8JKYD_1591844522410384_4SGA08M8KIXQ reqid = 1253166 type = INREQ channel = BANK24AOS sid = msid_1591844511335516_KRRNBSLH2FS duration = 703.991 req_uri = marketcredit/order/state login = 77012221122 req_type = cl_req req_headers = {"accept-encoding":"gzip","connection":"close","host":"test-mobileapp-api.bank.kz","user-agent":"okhttp/4.4.1","x-forwarded-for":"212.154.169.134","x-real-ip":"212.154.169.134"} req_body = {"$sid":"msid_1591844511335516_KRRNBSLH2FS","$sid":"msid_1591844511335516_KRRNBSLH2FS","app":"bank","app_version":"2.3.2","channel":"aos","colvir_token":"GExPR0lOX1BBU1NXT1JEX0NMRUFSVEVYVFNzrzh4Thk1+MjDKWl/dDu1fQPsJ6gGLSanBp41yLRv","colvir_commercial_id":"-1","colvir_id":"000120.335980","openway_commercial_id":"6247520","openway_id":"6196360","$lang":"ru","ekb_id":"923243","inn":"990830221722","login":"77012221122","bank24_id":"262"} resp_body = {"task_id":"","status":"success","data":{"state":"init","applications":[{"status":"init","id":"123db561-34a3-4a8d-9fa7-03ed6377b44f","name":"Sulpak","amount":101000,"items":[{"name":"Switch CISCO x24","price":100000,"count":1,"amount":100000}]}],"segment":{"range":{"min":6,"max":36,"step":1},"payment_day":{"max":28,"min":1}}}}'
position = re.search("req_headers",string) # position of req_headers
resp_body_pos = re.search("resp_body",string)
resp_body = string[resp_body_pos.span()[0]:]
res1 = {}
res1.setdefault(resp_body.split("=")[0],resp_body.split("=")[1])
print(res1)
before = string[:position.span()[0]]
after = string[position.span()[0]:resp_body_pos.span()[0]] # handle req_body seperately
res2 = re.findall("(\S+) = (\S+)",before)
print(res2)
res3 = re.findall("(\S+) = ({.*?})",after)
print(res3)
#res1 type: dict{'resp_body':'...'} content in resp_body
#res2 type: list[(),()..] content before req_head
#res3 type: list[(),()..] the rest content
and now you can do what you want to do with the data(.e.g. transform it into json respectively)
Hope this is helpful

Expected singleton error occurs when return more than one record in odoo?

This method to get the product price from the PO, and it works well if the PO have only one record otherwise I am getting this error.
raise ValueError("Expected singleton: %s" % self)
This is the method
#api.multi
def create_refund_invoice(self):
inv_obj = self.env['account.invoice']
for pick in self.filtered(lambda x:x.return_type):
type = 'in_refund' if pick.return_type == 'purchase' else 'out_refund'
inv_lines = {'type':type, 'partner_id':pick.partner_id.id, 'invoice_line_ids':[]}
account = pick.return_type == 'sale' and pick.partner_id.property_account_receivable_id.id or pick.partner_id.property_account_payable_id.id
inv_lines['account_id'] = account
inv_lines['origin'] = pick.name
inv_lines['name'] = pick.origin
for line in pick.move_lines:
name = line.product_id.partner_ref
for rec in self:
rec.order_id = line.env['purchase.order'].search([('name', '=', line.origin)]).order_line
rec.price = rec.order_id.price_unit
inv_lines['invoice_line_ids'] += [(0, None, {
'product_id':line.product_id.id,
'name':name,
'quantity':line.quantity_done,
'price_unit': rec.price,
'account_id':line.product_id.product_tmpl_id.get_product_accounts()['income'].id})]
if inv_lines['invoice_line_ids']:
inv_id = inv_obj.create(inv_lines)
pick.invoice_id = inv_id.id
It is necessary for odoo that when you are getting more than one record then you can not access it's field values directly.
In your code you are trying to get purchase_order_line of purchase_order It may possible that many lines are available in a single order.
def create_refund_invoice(self):
purchase_order_obj = self.env['purchase.order']
inv_obj = self.env['account.invoice']
for pick in self.filtered(lambda x:x.return_type):
type = 'in_refund' if pick.return_type == 'purchase' else 'out_refund'
inv_lines = {'type':type, 'partner_id':pick.partner_id.id, 'invoice_line_ids':[]}
account = pick.return_type == 'sale' and pick.partner_id.property_account_receivable_id.id or pick.partner_id.property_account_payable_id.id
inv_lines['account_id'] = account
inv_lines['origin'] = pick.name
inv_lines['name'] = pick.origin
for line in pick.move_lines:
name = line.product_id.partner_ref
for rec in self:
order_lines = purchase_order_obj.search([('name', '=', line.origin)]).order_line
for pol in order_lines:
price = pol.order_id.price_unit
inv_lines['invoice_line_ids'] += [(0, None, {
'product_id':line.product_id.id,
'name':name,
'quantity':line.quantity_done,
'price_unit': price,
'account_id':line.product_id.product_tmpl_id.get_product_accounts()['income'].id})
]
if inv_lines['invoice_line_ids']:
inv_id = inv_obj.create(inv_lines)
pick.invoice_id = inv_id.id
I have updated code test above code and update it as per your requirement.

Django/Python Multiple records

I have a program that compares values from the database and from a CSV file. My program works like this.
Database has values.
User uploads a file (multiple users multiple
files).
The program compares the values from the database and the
CSV files and gives an output.
Which tells me that this particular value was found in this user's file.
But I want the program to show me that if the value was found in the other user's file or not.
Here is a working example.
DB Values = [1,23,33,445,6656,88]
Example values of the CSV files:
File 1 values = [1,23,445,77,66,556,54]
File 2 values = [1,23,45,77,366]
File 3 values = [1,23,5,77,5356,524]
Output needed:
{'1':[(user1, some value),(user2, some value)...]}
Here my code:
def LCR(request):
template = "LCR\LCRGen.html"
dest = Destination.objects.values_list('dest_num', flat=True)
ratelist = { }
csv_file = { }
data_set = { }
io_string = { }
vendor = RateFile.objects.values_list()
v_count = vendor.count()
for v_id, v_name, v_file in vendor:
vendor_name = str(v_name)
vendornames = str(v_name)
vendornames = { }
for desNum in dest:
desNum = str(desNum)
for countvar in range(v_count):
csv_file[vendor_name] = RateFile.objects.get(id=v_id).ven_file
data_set[vendor_name] = csv_file[vendor_name].read().decode("UTF-8")
io_string[vendor_name] = io.StringIO(data_set[vendor_name])
next(io_string[vendor_name])
for column in csv.reader(io_string[vendor_name], delimiter=str(u",")):
vendornames[column[0]] = column[1]
for venNum, venValue in vendornames.items():
venlen = len(venNum)
deslen = len(desNum)
if venlen >= deslen or venlen <= deslen:
if desNum[:-1] == venNum[:-1] and desNum[:-2] == venNum[:-2] and desNum[:-3] == venNum[:-3]:
ratelist[desNum] = [(vendor_name, venValue),]
if (vendor_name, venValue) in ratelist[desNum]:
ratelist[desNum] = [
(vendor_name, venValue),]
elif desNum[:-1] == venNum[:-2] and desNum[:-2] == venNum[:-3] and desNum[:-3] == venNum[:-4]:
ratelist[desNum] = [(vendor_name, venValue),]
if (vendor_name, venValue) in ratelist[desNum]:
ratelist[desNum] = [
(vendor_name, venValue),]
elif desNum[:-1] == desNum[:-3] and desNum[:-2] == venNum[:-4] and desNum[:-3] == venNum[:-5]:
ratelist[desNum] = [(vendor_name, venValue),]
elif desNum[:-1] == venNum[:-5] and desNum[:-2] == venNum[:-6]:
ratelist[desNum] = [(vendor_name, venValue),]
if (vendor_name, venValue) in ratelist[desNum]:
ratelist[desNum] = [
(vendor_name, venValue),]
else:
pass
print ( ratelist )
return render ( request, template, { "ratelist" : ratelist } )
Output
Zong, Tata are usernames and the float values is their respective value for the key value of the dictionary.
{'12': [('Zong', ' 0.026')], '213': [('Tata', ' 4.150')], '49': [('Tata', ' 0.531')], '30': [('Zong', ' 0.87')], '454': [('Tata', ' 0.531')], '374': [('Zong', ' 0.87')],
This is what you asked for:
### your data example
db = [1,23,33,445,66,556,88]
us1 = [1,23,445,77,66,556,54]
us2 = [1,23,45,77,366]
### create a list of usernames (to use the string name in dictionary)
userlist = [ "us1", "us2" ]
### intialize the dict for results
values_dict = {}
### open the loop on DB values
for value in db :
# open loop on userlist
for user in userlist :
# if value is found in user list of values
if value in eval(user) :
# if values still NOT a key of results dictionary create the key with the tuple list as values
if value not in values_dict :
values_dict.update({ value : [ ( user, value ) ] })
# else just append the tuple (username, value) to the results dictionary for the DB value corresponding key
else :
values_dict[value].append([ ( user, value ) ])
values_dict
### OUTPUT:
{1: [('us1', 1), [('us2', 1)]], 23: [('us1', 23), [('us2', 23)]], 445: [('us1', 445)], 66: [('us1', 66)], 556: [('us1', 556)]}
but it makes no sense cause it simply check if a value is in the user list of values and add a tuple just to confirm it, it doesn't require all this code, could be simplified a lot. But I'm thinking that I misunderstood your question (please review the english), probably you need to use the DB value as the key to retrieve another value from the user...please review and update

Issue on MongoDB bulk load with upsert method

I am trying to load the 8m[rows] * 1k[columns] python dataframe into Mongo. For the performance improvement I have planned to use Mongo bulk operation.There will be update also I have to do in the collections on daily basis so that I used the upsert method of bulk operation. Below is the code which I have prepared,
def BulkLoad(self, Dataset):
counter = 0;
empty = '000'
columns = []
records = []
DataDict = {}
for col in Dataset.columns:
columns.append(col)
try:
db = self.getDatabase()
bulk = db.collection.initialize_ordered_bulk_op()
for j in range(len(Dataset)):
records.clear()
DataDict.clear()
DataDict.update(
{'CreatedBy': empty, 'ModifiedBy': empty, 'Value': Score})
for column in columns:
colValue = str(Dataset[column][j])
if (colValue == 'nan'):
colValue = colValue.replace('nan', '')
DataDict.update({column: colValue})
records.append(DataDict)
print("list is ",records)
Id = DataDict['Id']
Number = DataDict['Number']
print(DataDict)
bulk.find(
{'Id': Id, 'Number': Number}).upsert().update(
{
'$set': {'Id': Id, 'Number': Number,'Events':records}
})
counter += 1
if counter % 1000 == 0:
result = bulk.execute()
logging.info(pprint(result))
bulk = db.coll.initialize_ordered_bulk_op()
if counter % 1000 != 0:
result = bulk.execute()
logging.info(pprint(result))
except Exception as e:
logging.exception(e)
except BulkWriteError as bre:
logging.error(pprint(bre.details))
If I am loading the sample rows of 10 into Mongo collections, All the documents are having the same value of 10th row. I knew its because of python dictionary reference problem .
Can you please anyone give me suggestion on that ?
def BulkLoad(self, Dataset):
counter = 0;
empty = '000'
columns = []
records = []
for col in Dataset.columns:
columns.append(col)
try:
db = self.getDatabase()
bulk = db.collection.initialize_ordered_bulk_op()
for j in range(len(Dataset)):
DataDict = {}
DataDict.update(
{'CreatedBy': empty, 'ModifiedBy': empty, 'Value': Score})
for column in columns:
colValue = str(Dataset[column][j])
if (colValue == 'nan'):
colValue = colValue.replace('nan', '')
DataDict.update({column: colValue})
records.append(DataDict)
print("list is ",records)
Id = DataDict['Id']
Number = DataDict['Number']
print(DataDict)
bulk.find(
{'Id': Id, 'Number': Number}).upsert().update(
{
'$set': {'Id': Id, 'Number': Number,'Events':records}
})
counter += 1
if counter % 1000 == 0:
result = bulk.execute()
logging.info(pprint(result))
bulk = db.coll.initialize_ordered_bulk_op()
if counter % 1000 != 0:
result = bulk.execute()
logging.info(pprint(result))
except Exception as e:
logging.exception(e)
except BulkWriteError as bre:
logging.error(pprint(bre.details))

Categories