Apply json patch to a Mongoengine document - python

I'm trying to apply a json-patch to a Mongoengine Document.
I'm using these json-patch library: https://github.com/stefankoegl/python-json-patch and mongoengine 0.14.3 with python 3.6.3
This is my actual code:
json_patch = JsonPatch.from_string(jp_string)
document = Document.objects(id=document_id)
json_documents = json.loads(document.as_pymongo().to_json())
json_patched_document = json_patch.apply(json_documents[0])
Document.objects(id=document_id).first().delete()
Document
.from_json(json.dumps(json_patched_document))
.save(force_insert=True)
Is there a better way to save an edited json document?
I've enhanced a little bit the code:
json_patch = JsonPatch.from_string(jp_string)
document = Document.objects(id=document_id)
json_document = json.loads(document.as_pymongo().to_json())
json_patched_document = json_patch.apply(json_documents[0])
Document
.from_json(json.dumps(json_patched_document), created=True)
.save()
but, is there a way to not convert the document to json?

I had slightly similar problem, the part that I dont wanted the complete Document for saving, I just wanted to update fields which are modified/added.
heres the code I tests on below inputs:
def tryjsonpatch():
doc_in_db = {'foo': 'bar', "name": "aj", 'numbers': [1, 3, 7, 8]}
input = {'foo': 'bar', "name": "dj", 'numbers': [1, 3, 4, 8]}
input2 = {'foo': 'bar', "name": "aj", 'numbers': [1, 3, 7, 8], "extera": "12"}
input3 = {'foo': 'bar', "name": "dj", 'numbers': [1, 3, 4, 8], "extera": "12"}
patch = jsonpatch.JsonPatch.from_diff(doc_in_db, input3)
print("\n***patch***\n", patch)
doc = get_minimal_doc(doc_in_db, patch)
result = patch.apply(doc, in_place=True)
print("\n###result###\n", result,
"\n###present###\n", doc_in_db)
def get_minimal_doc(present, patch):
cur_dc = {}
for change in patch.patch:
if change['op'] not in ("add"):
keys = change['path'].split("/")[1:]
present_move = {}
old_key = 1
first = True
for key in keys:
if key.isdigit(): # old_key represented a array
cur_dc[old_key] = present_move
else:
if first:
cur_dc[key] = {}
first = False
else:
cur_dc[old_key][key] = {}
old_key = key
present_move = present[old_key]
return cur_dc
tryjsonpatch()

Related

How to compare between two dictionaries using threads

Im currently working on a comparison where I am trying to solve on how I am able to compare between two dictionaries where the first requests does a GET and scrapes the data to a dictionary and then I want to compare to for the next request using the same method and see if there has been any changes on the webpage. I have currently done:
import random
import threading
import time
from concurrent.futures import as_completed
from concurrent.futures.thread import ThreadPoolExecutor
import requests
from bs4 import BeautifulSoup
URLS = [
'https://github.com/search?q=hello+world',
'https://github.com/search?q=python+3',
'https://github.com/search?q=world',
'https://github.com/search?q=i+love+python',
'https://github.com/search?q=sport+today',
'https://github.com/search?q=how+to+code',
'https://github.com/search?q=banana',
'https://github.com/search?q=android+vs+iphone',
'https://github.com/search?q=please+help+me',
'https://github.com/search?q=batman',
]
def doRequest(url):
response = requests.get(url)
time.sleep(random.randint(10, 30))
return response, url
def doScrape(response):
soup = BeautifulSoup(response.text, 'html.parser')
return {
'title': soup.find("input", {"name": "q"})['value'],
'repo_count': soup.find("span", {"data-search-type": "Repositories"}).text.strip()
}
def checkDifference(parsed, url):
def threadPoolLoop():
with ThreadPoolExecutor(max_workers=1) as executor:
future_tasks = [
executor.submit(
doRequest,
url
) for url in URLS]
for future in as_completed(future_tasks):
response, url = future.result()
if response.status_code == 200:
checkDifference(doScrape(response), url)
while True:
t = threading.Thread(target=threadPoolLoop, )
t.start()
print('Joining thread and waiting for it to finish...')
t.join()
My problem is that I do not know how I can print out whenever there has been a change for either title or/and repo_count? (The whole point will be that I will run this script 24/7 and I always want it to print out whenever there has been a change)
If you're looking for a simple method to compare two dictionaries, there are a few different options.
Some good resources to begin:
mCoding: zipping together Python dicts
StackOverflow: Comparing two dictionaries and checking how many (key, value) pairs are equal
Let's start with two dictionaries to compare 👇 Some added elements, some removed, some changed, some same.
dict1 = {
"value_2": 2,
"value_3": 3,
"value_4": 4,
"value_5": "five",
"value_6": "six",
}
dict2 = {
"value_1": 1,
"value_2": 2,
"value_4": 4
}
You could probably use the unittest library. Like this:
>>> from unittest import TestCase
>>> TestCase().assertDictEqual(dict1, dict1) # <-- No output, because they are the same
>>> TestCase().assertDictEqual(dict1, dict2) # <-- Will raise error and display elements which are different
AssertionError: {'value_2': 2, 'value_3': 3, 'value_4': 4, 'value_5': 'five', 'value_6': 'six'} != {'value_1': 1, 'value_2': 3, 'value_4': 4}
- {'value_2': 2, 'value_3': 3, 'value_4': 4, 'value_5': 'five', 'value_6': 'six'}
+ {'value_1': 1, 'value_2': 3, 'value_4': 4}
But the challenge there is that it will raise an error when they are different; which is probably not what you're looking for. You simply want to see when they are different.
Another method is the deepdiff library. Like this:
>>> from deepdiff import DeepDiff
>>> from pprint import pprint
>>> pprint(DeepDiff(dict1, dict2))
{'dictionary_item_added': [root['value_1']],
'dictionary_item_removed': [root['value_3'], root['value_5'], root['value_6']],
'values_changed': {"root['value_2']": {'new_value': 3, 'old_value': 2}}}
Or, you could easily craft your own functions. Like this 👇 (functions copied from here)
>>> from pprint import pprint
>>> def compare_dict(d1, d2):
... return {k: d1[k] for k in d1 if k in d2 and d1[k] == d2[k]}
>>> pprint(compare_dict(dict1, dict2))
{'value_4': 4}
>>> def dict_compare(d1, d2):
... d1_keys = set(d1.keys())
... d2_keys = set(d2.keys())
... shared_keys = d1_keys.intersection(d2_keys)
... added = d1_keys - d2_keys
... removed = d2_keys - d1_keys
... modified = {o: {"old": d1[o], "new": d2[o]} for o in shared_keys if d1[o] != d2[o]}
... same = set(o for o in shared_keys if d1[o] == d2[o])
... return {"added": added, "removed": removed, "modified": modified, "same": same}
>>> pprint(dict_compare(dict1, dict2))
{'added': {'value_6', 'value_3', 'value_5'},
'modified': {'value_2': {'old': 2, 'new': 3}},
'removed': {'value_1'},
'same': {'value_4'}}

Nested dictionary replacing previous value + key instead of appending

I am working on vector space model, data set consists of 50 text files. Iterating through them splitting into words and saving them in dictionary. Now i want to use nested dictionary like:
dictionary = { {someword: {Doc1:23},{Doc21:2},{Doc34:3}},
{someword: {Doc1:23},{Doc21:2},{Doc34:3}},
{someword: {Doc1:23},{Doc21:2},{Doc34:3}}
}
but when i am running my program it replaces not only the document but also it does not calculates frequency by adding that how many times 'someword' occurred in a particular document.
for iterator in range(1, 51):
f = open(directory + str(iterator) + ext, "r")
for line in f.read().lower().split():
line = getwords(line)
for word in line:
if check(word, stopwords) == 0:
if existence(word, terms, iterator) != 1:
terms[word] = {}
terms[word]["Doc"+str(iterator)] = 1
else:
terms[word]["Doc"+str(iterator)] = int(terms[word]["Doc"+str(iterator)]) + 1
f.close()
existence function is :
def existence(tok, diction, iteration):
if tok in diction:
temp = "Doc"+str(iteration)
if temp in diction:
return 1
else:
return 0
else:
return 0
Result Somewhat like this.
{'blunder': {'Doc1': 1}, 'by': {'Doc50': 1}, 'anton': {'Doc27': 1}, 'chekhov': {'Doc27': 1}, 'an': {'Doc50': 1}, 'illustration': {'Doc48': 1}, 'story': {'Doc48': 1}, 'author': {'Doc48': 1}, 'portrait'...
Do you want to know how many times each word appears in each file? This is easily accomplished with a defaultdict of Counters, courtesy of the collections module.
You've got the right idea I think, looping over the files, reading line by line and splitting into words. It's the counting part you need help with.
from collections import defaultdict, Counter
from string import punctuation
fnames = ['1.txt', '2.txt', '3.txt', '4.txt', '5.txt']
word_counter = defaultdict(Counter)
for fname in fnames:
with open(fname, 'r') as txt:
for line in txt:
words = line.lower().strip().split()
for word in words:
word = word.strip(punctuation)
if word:
word_counter[word][fname] += 1
The data look will like this inside word_counter:
{
'within': {
'1.txt': 2,
},
'we': {
'1.txt': 3,
'2.txt': 2,
'3.txt': 2,
'4.txt': 2,
'5.txt': 4,
},
'do': {
'1.txt': 7,
'2.txt': 8,
'3.txt': 8,
'4.txt': 6,
'5.txt': 5,
},
...
}

Python Cerberus how to check dynamic root keys

I have a dict with IDs as its root keys that I want to validate. In other words, the root keys of the dict I want to validate are dynamic. Is there a way to run keyschema against the root keys?
e.g. https://repl.it/#crunk1/cerberusrootkeys
import cerberus
v = cerberus.validator.Validator()
schema = {'keyschema': {'type': 'string'}}
d = {'foo': 'bar', 'baz': 'gaz'}
print('I want this to be true.')
print(v.validate(d, schema))
### Output:
# I want this to be true.
# False
I know I could do the following:
wrapper = {'nested': d}
schema = {'nested': {'keyschema': {'type': 'string'}}}
v.validate(wrapper, schema)
but the current structure of my project doesn't easily allow for that.
Any solutions/tips/suggestions?
I managed to hack something together (https://repl.it/#crunk1/Cerberus-root-types) subclassing Validator and overriding validate():
class V(cerberus.Validator):
def validate(self, document, schema=None, update=False, normalize=True):
doc = None
wrapped = False
if schema is not None:
root_schema = schema.get('__root__', None)
wrapped = root_schema is not None
if wrapped:
doc = {'__root__': document}
schema = {'__root__': root_schema}
elif self.schema is not None:
root_schema = self.schema.get('__root__', None)
wrapped = root_schema is not None
if wrapped:
doc = {'__root__': document}
schema = {'__root__': root_schema}
doc = doc or document
result = super(V, self).validate(doc, schema, update, normalize)
if wrapped:
# Unwrap.
self.document = self.document['__root__']
for e in self._errors:
e.schema_path = tuple(e.schema_path[1:])
if len(e.document_path) > 1:
e.document_path = tuple(e.document_path[1:])
return result
This allows you to treat the root document as a 'type': 'dict' or 'type': 'list'.
v = V()
d = {'1': '1', '2': '2'}
schema = {'__root__': {
'type': 'dict',
'keyschema': {'coerce': int},
'valueschema': {'coerce': int},
}}
print(v.validate(d, schema), v.document, v.errors)
l = ['1', '2']
schema = {'__root__': {
'type': 'list',
'schema': {'coerce': int},
}}
print(v.validate(l, schema), v.document, v.errors)
l = ['1', 'b']
print(v.validate(l, schema), v.document, v.errors)
Output:
True {1: 1, 2: 2} {}
True [1, 2] {}
False [1, 'b'] {1: ["field '1' cannot be coerced: invalid literal for int() with base 10: 'b'"]}

Merge three dictionaries in Python

I am trying to merge three dictionaries together.
I am receiving an unsupported operand types error.
Here is my code:
def add_student():
global Snumber
global iCode
global kCode
Snumber = Student_number.get()
Sname = Student_name.get()
Ssurnname = Student_surname.get()
Sdetail = Student_detail.get()
i = Students(Snumber,Sname,Ssurnname,Sdetail)
Sinfo[Snumber]=[Sname,Ssurnname,Sdetail]
iName = Student_subject.get()
iCode = Student_code.get()
iMark1 = Student_Mark1.get()
iMark2 = Student_Mark2.get()
iMark3 = Student_Mark3.get()
iProject = Student_project.get()
j = Subjects(iName,iCode,iMark1,iMark2,iMark3,iProject)
SSubject[iCode]=[iName,iMark1,iMark2,iMark3,iProject]
kCourse = Degree_course.get()
kCode = Degree_code.get()
kYear = Degree_year.get()
v = Degrees(kCourse,kCode,kYear)
SDegree[kCode]=[kCourse,kYear]
popup_add()
student_list = (Sinfo.items() + SSubject.items() + SDegree.items())
print(student_list)
I believe my problem is in:
student_list = (Sinfo.items() + SSubject.items() + SDegree.items())
print(student_list)
you can use dict.update()
>>> a = {1:1,2:2,3:3}
>>> a
{1: 1, 2: 2, 3: 3}
>>> b = {4:4,5:5}
>>> c = {6:6,7:7}
>>> a.update(b)
>>> a.update(c)
>>> a
{1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7}
if you dont want to modify the original you can use the following to copy it into a new variable
>>> new_dict = dict(a)
To merge multiple dictionary, lets say we have dict Sinfo, SSubject and SDegree
student_list = dict(Sinfo.items() + SSubject.items() + SDegree.items())
code above will works with python 2 only. for python 3, need to add list to convert dict.items into list first as shown below
student_list = dict(list(Sinfo.items()) + list(SSubject.items()) + list(SDegree.items()))

Get information from different dict by dict name

I have a data/character_data.py:
CHARACTER_A = { 1: {"level": 1, "name":"Ann", "skill_level" : 1},
2: {"level": 2, "name":"Tom", "skill_level" : 1}}
CHARACTER_B = { 1: {"level": 1, "name":"Kai", "skill_level" : 1},
2: {"level": 2, "name":"Mel", "skill_level" : 1}}
In main.py, I can do this:
from data import character_data as character_data
print character_data.CHARACTER_A[1]["name"]
>>> output: Ann
print character_data.CHARACTER_B[2]["name"]
>>> output: Mel
How do I achieve this?
from data import character_data as character_data
character_type = "CHARACTER_A"
character_id = 1
print character_data.character_type[character_id]["name"]
>>> correct output should be: Ann
I get AttributeError when try use character_type as "CHARACTER_A".
How about this
In [38]: from data import character_data as character_data
In [39]: character_type = "CHARACTER_A"
In [40]: character_id = 1
In [41]: getattr(character_data, character_type)[character_id]["name"]
Out[41]: 'Ann'
You can use locals():
>>> from data.character_data import CHARACTER_A, CHARACTER_B
>>> character_id = 1
>>> character_type = "CHARACTER_A"
>>> locals()[character_type][character_id]["name"]
Ann
Though, think about merging CHARACTER_A and CHARACTER_B into one dict and access this dict instead of locals().
Also, see Dive into Python: locals and globals.
You need to structure your data properly.
characters = {}
characters['type_a'] = {1: {"level": 1, "name":"Ann", "skill_level" : 1},
2: {"level": 2, "name":"Tom", "skill_level" : 1}}
characters['type_b'] = ...
Or, the better solution is to create your own "character" type, and use that instead:
class Character(object):
def __init__(self, type, level, name, skill):
self.type = type
self.level = level
self.name = name
self.skill = skill
characters = []
characters.append(Character('A',1,'Ann',1))
characters.append(Character('A',2,'Tom',1))
characters.append(Character('B',2,'Kai',1)) # and so on
Then,
all_type_a = []
looking_for = 'A'
for i in characters:
if i.type == looking_for:
all_type_a.append(i)
Or, the shorter way:
all_type_a = [i for i in characters if i.type == looking_for]

Categories