I have a query
fromDate = '2021-10-01'
toDate = '2021-10-10'
dataType = 'location'
typeId = '60dd6d303da6c17209d5ef68'
workerType = 'Employee'
start_date = datetime.strptime(str(fromDate)+" 00:00:00", '%Y-%m-%d %H:%M:%S')
end_date = datetime.strptime(str(toDate)+" 00:00:00", '%Y-%m-%d %H:%M:%S')
if dataType == 'location':
found_location = prodLocationCollection.find_one({"_id":ObjectId(typeId)})
match_filter = {'Date':{"$gte":start_date,"$lte":end_date},
'Location':found_location["locationName"],
'locationId':{'$exists':True},
'workerType':workerType
}
elif dataType == 'user':
match_filter = {'Date':{"$gte":start_date,"$lte":end_date},
'employeeId':ObjectId(typeId),
'locationId':{'$exists':True},
'workerType':workerType
}
output = list(prodAttendanceCollection.aggregate([{'$match': match_filter},
{"$group":{'_id':{
'employeeId':'$employeeId',
'workerId':'$workerId',
'workerFullName':'$workerFullName'
},
'dailyPointsArray':{
'$push':{
'Date':'$Date',
'createdAs':'$createdAs',
'Points':'$shiftPoints'
}
},
'total_shift_points':{'$sum':'$shiftPoints'},
'total_duration':{'$sum':'$duration'},
}
},
{ '$lookup':
{
'from': "users",
'localField': "employeeId",
'foreignField': "_id",
'as': "userInfo"
}
}
]))
This is giving output
[{'_id': {'employeeId': ObjectId('60dd78184524e6c116e22a44'),
'workerId': '1008',
'workerFullName': 'RADHIKA GOTHIVREKAR'},
'dailyPointsArray': [{'Date': datetime.datetime(2021, 10, 1, 0, 0),
'createdAs': 'ABSENT',
'Points': None},
{'Date': datetime.datetime(2021, 10, 2, 0, 0),
'createdAs': 'ABSENT',
'Points': None},
{'Date': datetime.datetime(2021, 10, 3, 0, 0),
'createdAs': 'ABSENT',
'Points': None},
{'Date': datetime.datetime(2021, 10, 4, 0, 0),
'createdAs': 'ABSENT',
'Points': None},
{'Date': datetime.datetime(2021, 10, 5, 0, 0),
'createdAs': 'IN-TIME PUNCHED',
'Points': 0},
{'Date': datetime.datetime(2021, 10, 6, 0, 0),
'createdAs': 'FULL-TIME PUNCHED',
'Points': 1},
{'Date': datetime.datetime(2021, 10, 7, 0, 0),
'createdAs': 'FULL-TIME PUNCHED',
'Points': 1},
{'Date': datetime.datetime(2021, 10, 8, 0, 0),
'createdAs': 'FULL-TIME PUNCHED',
'Points': 1},
{'Date': datetime.datetime(2021, 10, 9, 0, 0),
'createdAs': 'FULL-TIME PUNCHED',
'Points': 1},
{'Date': datetime.datetime(2021, 10, 10, 0, 0),
'createdAs': 'ABSENT',
'Points': None}],
'total_shift_points': 4,
'total_duration': 0,
'userInfo': []},
{'_id': {'employeeId': ObjectId('60dd77e34524e6c116e1b27d'),
'workerId': '1365',
'workerFullName': 'HARISON NIKALJE'},
'dailyPointsArray': [{'Date': datetime.datetime(2021, 10, 1, 0, 0),
'createdAs': 'ABSENT',
'Points': None},
{'Date': datetime.datetime(2021, 10, 2, 0, 0),
'createdAs': 'ABSENT',
'Points': None},
{'Date': datetime.datetime(2021, 10, 3, 0, 0),
'createdAs': 'ABSENT',
'Points': None},
{'Date': datetime.datetime(2021, 10, 4, 0, 0),
'createdAs': 'IN-TIME PUNCHED',
'Points': 0},
{'Date': datetime.datetime(2021, 10, 5, 0, 0),
'createdAs': 'IN-TIME PUNCHED',
'Points': 0},
{'Date': datetime.datetime(2021, 10, 6, 0, 0),
'createdAs': 'IN-TIME PUNCHED',
'Points': 0},
{'Date': datetime.datetime(2021, 10, 7, 0, 0),
'createdAs': 'ABSENT',
'Points': None},
{'Date': datetime.datetime(2021, 10, 8, 0, 0),
'createdAs': 'ABSENT',
'Points': None},
{'Date': datetime.datetime(2021, 10, 9, 0, 0),
'createdAs': 'ABSENT',
'Points': None},
{'Date': datetime.datetime(2021, 10, 10, 0, 0),
'createdAs': 'ABSENT',
'Points': None}],
'total_shift_points': 0,
'total_duration': 0,
'userInfo': []}]
As you can see everything is working but the lookup part is returning [] empty array
This is sample data of attendance & user
ATTENDANCE
{
"_id": {
"$oid": "60dd7d723fcb2eb7df248af3"
},
"workerId": "2073",
"workerFullName": "ARUN NAIR",
"workerType": "Employee",
"workerAadharCardNumber": "xxxxxxxxxx",
"Date": {
"$date": "2021-07-01T00:00:00.000Z"
},
"employeeId": {
"$oid": "60dd77c14524e6c116e1692c"
},
"Location": "HEAD OFFICE 2",
"sourceUnitType": null,
"duration": null,
"shiftPoints": null,
"createdAs": "ABSENT",
"ruleApplied": null,
"detections": [],
"locationId": {
"$oid": "60dd6d303da6c17209d5ef68"
},
"workerFaceRegistered": true
}
USER
{
"_id": {
"$oid": "60dd77c14524e6c116e1692c"
},
"workerFirstName": "ARUN",
"workerSurname": "NAIR",
"workerPhoneNumber": "xxxxxxxxxx",
"workerId": "2073",
"locationType": "HEAD OFFICE",
"locationName": "HEAD OFFICE 2",
"workerDesignation": "EXECUTIVE",
"workerDepartment": "SALES",
"workerAadharCardNumber": "xxxxxxxxxx",
"workerType": "Employee",
"workerEmail": "xxxxxxxxxxxx",
"workerStatus": "REGULAR",
"workerUsername": "2073",
"workerPassword": "$2b$12$wBrDRYgPbRxrLG8N9Wxs2eDxGVFauZMraosrkePnYWWpZmPE1ljY2",
"totalLoginAttemptLeft": 3,
"lastLocationId": {
"$oid": "60dd6d303da6c17209d5ef68"
},
"allowFencing": true,
"workerFaceRegistered": true,
"isActive": false
}
After grouping the original fields are not longer present, so $employeeId does not exist, which won't match anything.
Since employeeId was moved to _id.employeeId in the group stage, in the lookup, use localField: "_id.employeeId"
Related
I need to fill a db with fake data. To do so I'm creating a list of objects. I generate 10 timestamps, and for each timestamp I create an object, add the timestamp and some randomly assigned data, then it is appended to the list 5 times; each time given a unique id ('tag'). This format is quite specific as I am trying to simulate what the real data will be like.
Everytime I append a value, I run a print to show that the data is correct. Then when I am finished the entire operation, I print the entire list to double check. The first print value returns perfect. Yet the second tells me that I have 10 instances of identical data - my tag value is the same for 5 instances, then jumps up by 5, and repeat.
What am I missing?
cultures = [
{
'name': 'Pink Oyster',
'scientific_name': 'Pleurotus djamor'
},
{
'name': 'Brown Oyster',
'scientific_name': 'Pleurotus ostreatus'
},
{
'name': 'Blue Oyster',
'scientific_name': 'Pleurotus Columbinus'
},
{
'name': 'Chestnut',
'scientific_name': 'Pholiota Adiposa'
},
{
'name': 'Shiitake',
'scientific_name': 'Lentinula edodes'
},
{
'name': "Lion's Mane",
'scientific_name': 'Hericium erinaceus'
},
]
times = pd.date_range(start="2022-01-01",end="2022-07-20", periods=10).to_pydatetime().tolist()
data = []
count = 0
for time in times:
raw = {}
for key, value in cultures[randrange(0,5)].items():
raw[key] = value
raw['generation'] = randrange(0, 10)
raw['stage'] = randrange(0, 3)
raw['user_id'] = 1
raw['created_at'] = time
raw['updated_at'] = time
for i in range(5):
raw['tag'] = count
count += 1
data.append(raw)
#shows my data has been appended correctly
print(data[-1])
#???????
print(data)
example first print - tag value increases by 1
{'name': 'Blue Oyster', 'scientific_name': 'Pleurotus Columbinus', 'generation': 5, 'stage': 1, 'user_id': 1, 'created_at': datetime.datetime(2022, 1, 1, 0, 0), 'updated_at': datetime.datetime(2022, 1, 1, 0, 0), 'tag': 0}
{'name': 'Blue Oyster', 'scientific_name': 'Pleurotus Columbinus', 'generation': 5, 'stage': 1, 'user_id': 1, 'created_at': datetime.datetime(2022, 1, 1, 0, 0), 'updated_at': datetime.datetime(2022, 1, 1, 0, 0), 'tag': 1}
{'name': 'Blue Oyster', 'scientific_name': 'Pleurotus Columbinus', 'generation': 5, 'stage': 1, 'user_id': 1, 'created_at': datetime.datetime(2022, 1, 1, 0, 0), 'updated_at': datetime.datetime(2022, 1, 1, 0, 0), 'tag': 2}
{'name': 'Blue Oyster', 'scientific_name': 'Pleurotus Columbinus', 'generation': 5, 'stage': 1, 'user_id': 1, 'created_at': datetime.datetime(2022, 1, 1, 0, 0), 'updated_at': datetime.datetime(2022, 1, 1, 0, 0), 'tag': 3}
{'name': 'Blue Oyster', 'scientific_name': 'Pleurotus Columbinus', 'generation': 5, 'stage': 1, 'user_id': 1, 'created_at': datetime.datetime(2022, 1, 1, 0, 0), 'updated_at': datetime.datetime(2022, 1, 1, 0, 0), 'tag': 4}
example second print - tag is stuck at 4
[{'name': 'Blue Oyster', 'scientific_name': 'Pleurotus Columbinus', 'generation': 5, 'stage': 1, 'user_id': 1, 'created_at': datetime.datetime(2022, 1, 1, 0, 0), 'updated_at': datetime.datetime(2022, 1, 1, 0, 0), 'tag': 4}, {'name': 'Blue Oyster', 'scientific_name': 'Pleurotus Columbinus', 'generation': 5, 'stage': 1, 'user_id': 1, 'created_at': datetime.datetime(2022, 1, 1, 0, 0), 'updated_at': datetime.datetime(2022, 1, 1, 0, 0), 'tag': 4}, {'name': 'Blue Oyster', 'scientific_name': 'Pleurotus Columbinus', 'generation': 5, 'stage': 1, 'user_id': 1, 'created_at': datetime.datetime(2022, 1, 1, 0, 0), 'updated_at': datetime.datetime(2022, 1, 1, 0, 0), 'tag': 4}, {'name': 'Blue Oyster', 'scientific_name': 'Pleurotus Columbinus', 'generation': 5, 'stage': 1, 'user_id': 1, 'created_at': datetime.datetime(2022, 1, 1, 0, 0), 'updated_at': datetime.datetime(2022, 1, 1, 0, 0), 'tag': 4}, {'name': 'Blue Oyster', 'scientific_name': 'Pleurotus Columbinus', 'generation': 5, 'stage': 1, 'user_id': 1, 'created_at': datetime.datetime(2022, 1, 1, 0, 0), 'updated_at': datetime.datetime(2022, 1, 1, 0, 0), 'tag': 4},
How am i changing the values of previous appends when I change the value after I append the data
In python, stuff is passed around by reference to value. It basically means that primitive types (int, str, float) are passed by value, and complex types (list, object, dict) are passed by reference. So if you add raw to the list, you actually add the reference to it. You then change its contents, and add it again.
Instead of doing data.append(raw) try data.append(raw.copy()).
You need a fresh copy of raw each time round the loop:
for i in range(5):
raw1 = raw.copy()
raw1['tag'] = count
count += 1
data.append(raw1)
result data :
<QuerySet [{'disk_available': 26, 'hostname': '2', 'day': datetime.datetime(2020, 2, 11, 0, 0, tzinfo=<UTC>), 'c': 354}, {'disk_available': 27, 'hostname': '2', 'day': datetime.datetime(2020, 2, 10, 0, 0, tzinfo=<UTC>), 'c': 273}, {'disk_available': 19, 'hostname': '2', 'day': datetime.datetime(2020, 2, 12, 0, 0, tzinfo=<UTC>), 'c': 12}, {'disk_available': 26, 'hostname': '2', 'day': datetime.datetime(2020, 2, 12, 0, 0, tzinfo=<UTC>), 'c': 45}, {'disk_available': 26, 'hostname': 'tes', 'day': datetime.datetime(2020, 2, 11, 0, 0, tzinfo=<UTC>), 'c': 1945}, {'disk_available': 19, 'hostname': 'tes', 'day': datetime.datetime(2020, 2, 12, 0, 0, tzinfo=<UTC>), 'c': 53}, {'disk_available': 1, 'hostname': 'tes', 'day': datetime.datetime(2020, 2, 11, 0, 0, tzinfo=<UTC>), 'c': 1}, {'disk_available': 26, 'hostname': 'tes', 'day': datetime.datetime(2020, 2, 12, 0, 0, tzinfo=<UTC>), 'c': 45}, {'disk_available': 27, 'hostname': 'tes', 'day': datetime.datetime(2020, 2, 10, 0, 0, tzinfo=<UTC>), 'c': 291}]>
SocketClient.objects.annotate(day=TruncDay('create')).values('day').annotate(c=Count('id')).values('day', 'disk_available', 'hostname', 'c').order_by('hostname')
.
from the results above I want to do that , I want to display the average result of hostname hourly
so I have a list of dicts that looks like this:
[{
'field': {
'data': 'F1'
},
'value': F1Value1,
'date': datetime.datetime(2019, 3, 1, 0, 0)
}, {
'field': {
'data': 'F2'
},
'value': F2Value1,
'date': datetime.datetime(2019, 2, 5, 0, 0)
}, {
'field': {
'data': 'F2'
},
'value': F2Value2,
'date': datetime.datetime(2019, 2, 7, 0, 0)
}]
And I want an output that looks like this:
[
{
'F1': [
{
'value': F1Value1,
'date': datetime.datetime(2019, 3, 1, 0, 0)
}
]
},
{
'F2': [
{
'value': F2Value1,
'date': datetime.datetime(2019, 2, 5, 0, 0)
},
{
'value': F2Value2,
'date': datetime.datetime(2019, 2, 5, 0, 0)
},
]
}
]
That is, I want every field.data to be the key and have it append the value and date if it belongs to the same field.
Note: I want to do this WITHOUT using a for loop (apart from the loop to iterate through the list). I want to use python dict functions like update() and append() etc.
Any optimized solutions would be really helpful.
You could just use iterate through the list of dicts and use defaultdict from collections to add the items with a unique key,
>>> from collections import defaultdict
>>> d = defaultdict(list)
>>>
>>> for items in x:
... d[items['field']['data']].append({
... 'value': items['value'],
... 'date': items['date']
... })
...
>>>
>>> import pprint
>>> pprint.pprint(x)
[{'date': datetime.datetime(2019, 3, 1, 0, 0),
'field': {'data': 'F1'},
'value': 'F1Value1'},
{'date': datetime.datetime(2019, 2, 5, 0, 0),
'field': {'data': 'F2'},
'value': 'F2Value1'},
{'date': datetime.datetime(2019, 2, 7, 0, 0),
'field': {'data': 'F2'},
'value': 'F2Value2'}]
>>>
>>> pprint.pprint(list(d.items()))
[('F1', [{'date': datetime.datetime(2019, 3, 1, 0, 0), 'value': 'F1Value1'}]),
('F2',
[{'date': datetime.datetime(2019, 2, 5, 0, 0), 'value': 'F2Value1'},
{'date': datetime.datetime(2019, 2, 7, 0, 0), 'value': 'F2Value2'}])]
Use itertools.groupby:
from itertools import groupby
from pprint import pprint
result = [{key: [{k: v for k, v in element.items() if k != 'field'}
for element in group]}
for key, group in groupby(data, lambda element: element['field']['data'])]
pprint(result)
Output:
[{'F1': [{'date': datetime.datetime(2019, 3, 1, 0, 0), 'value': 'F1Value1'}]},
{'F2': [{'date': datetime.datetime(2019, 2, 5, 0, 0), 'value': 'F2Value1'},
{'date': datetime.datetime(2019, 2, 7, 0, 0), 'value': 'F2Value2'}]}]
Only using dict, list, and set:
[
{
field_data :
[
{ k:v for k, v in thing.items() if k != 'field' }
for thing in things if thing['field']['data'] == field_data
]
for field_data in set(thing['field']['data'] for thing in things)
}
]
I have this list:
list_users= [[{'points': 9, 'values': 1, 'division': 1, 'user_id': 3}], [{'points': 3, 'values': 0, 'division': 1, 'user_id': 1}], [{'points': 2, 'values': 0, 'division': 1, 'user_id': 4}], [{'points': 9, 'values': 0, 'division': 1, 'user_id': 11}], [{'points': 3, 'values': 0, 'division': 1, 'user_id': 10}], [{'points': 100, 'values': 4, 'division': 1, 'user_id': 2}], [{'points': 77, 'values': 2, 'division': 1, 'user_id': 5}], [{'points': 88, 'values': 3, 'division': 1, 'user_id': 6}], [{'points': 66, 'values': 1, 'division': 1, 'user_id': 7}], [{'points': 2, 'values': 0, 'division': 1, 'user_id': 8}]]
I need to sort the list by points and values.
How can I sort it if dict is inside a list inside the main list?
I generated this list by query and than just append to list_users?
Access the dictionary containing points and values by indexing on the inner list:
list_users_sorted = sorted(list_users, key=lambda x: (x[0]['points'], x[0]['values']))
# ^ ^
Sort using a key function for sorted that builds a tuple of points and values for each dict in each list.
def kf(x):
return (x[0]["points"], x[0]["values"])
s = sorted(list_users, key=kf)
print(s)
Output:
[[{'division': 1, 'points': 2, 'user_id': 4, 'values': 0}],
[{'division': 1, 'points': 2, 'user_id': 8, 'values': 0}],
[{'division': 1, 'points': 3, 'user_id': 1, 'values': 0}],
[{'division': 1, 'points': 3, 'user_id': 10, 'values': 0}],
[{'division': 1, 'points': 9, 'user_id': 11, 'values': 0}],
[{'division': 1, 'points': 9, 'user_id': 3, 'values': 1}],
[{'division': 1, 'points': 66, 'user_id': 7, 'values': 1}],
[{'division': 1, 'points': 77, 'user_id': 5, 'values': 2}],
[{'division': 1, 'points': 88, 'user_id': 6, 'values': 3}],
[{'division': 1, 'points': 100, 'user_id': 2, 'values': 4}]]
mydata = [{'date': datetime.datetime(2009, 1, 31, 0, 0), 'value': 14, 'year': u'2009'},
{'date': datetime.datetime(2009, 2, 28, 0, 0), 'value': 84, 'year': u'2009'},
{'date': datetime.datetime(2009, 3, 31, 0, 0), 'value': 77, 'year': u'2009'},
{'date': datetime.datetime(2009, 4, 30, 0, 0), 'value': 80, 'year': u'2009'},
{'date': datetime.datetime(2009, 5, 31, 0, 0), 'value': 6, 'year': u'2009'},
{'date': datetime.datetime(2009, 6, 30, 0, 0), 'value': 16, 'year': u'2009'},
{'date': datetime.datetime(2009, 7, 31, 0, 0), 'value': 16, 'year': u'2009'},
{'date': datetime.datetime(2009, 8, 31, 0, 0), 'value': 1, 'year': u'2009'},
{'date': datetime.datetime(2009, 9, 30, 0, 0), 'value': 9, 'year': u'2009'},
{'date': datetime.datetime(2008, 1, 31, 0, 0), 'value': 77, 'year': u'2008'},
{'date': datetime.datetime(2008, 2, 29, 0, 0), 'value': 60, 'year': u'2008'},
{'date': datetime.datetime(2008, 3, 31, 0, 0), 'value': 28, 'year': u'2008'},
{'date': datetime.datetime(2008, 4, 30, 0, 0), 'value': 9, 'year': u'2008'},
{'date': datetime.datetime(2008, 5, 31, 0, 0), 'value': 74, 'year': u'2008'},
{'date': datetime.datetime(2008, 6, 30, 0, 0), 'value': 70, 'year': u'2008'},
{'date': datetime.datetime(2008, 7, 31, 0, 0), 'value': 75, 'year': u'2008'},
{'date': datetime.datetime(2008, 8, 31, 0, 0), 'value': 7, 'year': u'2008'},
{'date': datetime.datetime(2008, 9, 30, 0, 0), 'value': 10, 'year': u'2008'},
{'date': datetime.datetime(2008, 10, 31, 0, 0), 'value': 54, 'year': u'2008'},
{'date': datetime.datetime(2008, 11, 30, 0, 0), 'value': 55, 'year': u'2008'},
{'date': datetime.datetime(2008, 12, 31, 0, 0), 'value': 40, 'year': u'2008'},
{'date': datetime.datetime(2007, 12, 31, 0, 0), 'value': 93, 'year': u'2007'},]
In 'mydata', I get list of sequential monthly data. I wrote some code to group them on year.
partial_req_data = dict([(k,[f for f in v]) for k,v in itertools.groupby(mydata, key=lambda x : x.get('year'))])
Now I further need some efficient code to fill the missing months with {}, i.e. empty dict. There are bad ways to do that, but am looking for good ones.
required_data = {"2009": [{'date': datetime.datetime(2009, 1, 31, 0, 0), 'value': 14, 'year': u'2009' },
{'date': datetime.datetime(2009, 2, 28, 0, 0), 'value': 84, 'year': u'2009'},
{'date': datetime.datetime(2009, 3, 31, 0, 0), 'value': 77, 'year': u'2009'},
{'date': datetime.datetime(2009, 4, 30, 0, 0), 'value': 80, 'year': u'2009'},
{'date': datetime.datetime(2009, 5, 31, 0, 0), 'value': 6, 'year': u'2009'},
{'date': datetime.datetime(2009, 6, 30, 0, 0), 'value': 16, 'year': u'2009'},
{'date': datetime.datetime(2009, 7, 31, 0, 0), 'value': 16, 'year': u'2009'},
{'date': datetime.datetime(2009, 8, 31, 0, 0), 'value': 1, 'year': u'2009'},
{'date': datetime.datetime(2009, 9, 30, 0, 0), 'value': 9, 'year': u'2009'},
{}, {}, {}],
"2008": [{'date': datetime.datetime(2008, 1, 31, 0, 0), 'value': 77, 'year': u'2008'},
{'date': datetime.datetime(2008, 2, 29, 0, 0), 'value': 60, 'year': u'2008'},
{'date': datetime.datetime(2008, 3, 31, 0, 0), 'value': 28, 'year': u'2008'},
{'date': datetime.datetime(2008, 4, 30, 0, 0), 'value': 9, 'year': u'2008'},
{'date': datetime.datetime(2008, 5, 31, 0, 0), 'value': 74, 'year': u'2008'},
{'date': datetime.datetime(2008, 6, 30, 0, 0), 'value': 70, 'year': u'2008'},
{'date': datetime.datetime(2008, 7, 31, 0, 0), 'value': 75, 'year': u'2008'},
{'date': datetime.datetime(2008, 8, 31, 0, 0), 'value': 7, 'year': u'2008'},
{'date': datetime.datetime(2008, 9, 30, 0, 0), 'value': 10, 'year': u'2008'},
{'date': datetime.datetime(2008, 10, 31, 0, 0), 'value': 54, 'year': u'2008'},
{'date': datetime.datetime(2008, 11, 30, 0, 0), 'value': 55, 'year': u'2008'},
{'date': datetime.datetime(2008, 12, 31, 0, 0), 'value': 40, 'year': u'2008'},]
"2007": [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {},
{'date': datetime.datetime(2007, 12, 31, 0, 0), 'value': 93, 'year': u'2007'}]
}
import datetime
from itertools import groupby
from pprint import pprint
required_data={}
for k,g in groupby(mydata,key=lambda x: x.get('year')):
partial={}
for datum in g:
partial[datum.get('date').month]=datum
required_data[k]=[partial.get(m,{}) for m in range(1,13)]
pprint(required_data)
For each year k, partial is a dict whose keys are months.
The trick is to use partial.get(m,{}) since this will return the datum when it exists, or {} when it does not.