I currently have A JSON file saved containing some data I want to convert to CSV. Here is the data sample below, please note, I have censored the actual value in there for security and privacy reasons.
{
"ID value1": {
"Id": "ID value1",
"TechnischContactpersoon": {
"Naam": "Value",
"Telefoon": "Value",
"Email": "Value"
},
"Disclaimer": [
"Value"
],
"Voorzorgsmaatregelen": [
{
"Attributes": {},
"FileId": "value",
"FileName": "value",
"FilePackageLocation": "value"
},
{
"Attributes": {},
"FileId": "value",
"FileName": "value",
"FilePackageLocation": "value"
},
]
},
"ID value2": {
"Id": "id value2",
"TechnischContactpersoon": {
"Naam": "Value",
"Telefoon": "Value",
"Email": "Value"
},
"Disclaimer": [
"Placeholder"
],
"Voorzorgsmaatregelen": [
{
"Attributes": {},
"FileId": "value",
"FileName": "value",
"FilePackageLocation": "value"
}
]
},
Though I know how to do this (because I already have a function to handle a JSON to CSV convertion) with a simple JSON string without issues. I do not know to this with this kind of JSON file that this kind of a structure layer. Aka a second layer beneath the first. Also you may have noticed that there is an ID value above
Because as may have noticed from structure is actually another layer inside the JSON file. So in total I need to have two kinds of CSV files:
The main CSV file just containing the ID, Disclaimer. This CSV file
is called utility networks and contains all possible ID value's and
the value
A file containing the "Voorzorgsmaatregelen" value's. Because there are multiple values in this section, one CSV file per unique
ID file is needed and needs to be named after the Unique value id.
Deleted this part because it was irrelevant.
Data_folder = "Data"
Unazones_file_name = "UnaZones"
Utilitynetworks_file_name = "utilityNetworks"
folder_path_JSON_BS_JSON = folder_path_creation(Data_folder)
pkml_file_path = os.path.join(folder_path_JSON_BS_JSON,"pmkl.json")
print(pkml_file_path)
json_object = json_open(pkml_file_path)
json_content_unazones = json_object.get("mapRequest").get("UnaZones")
json_content_utility_Networks = json_object.get("utilityNetworks")
Unazones_json_location = json_to_save(json_content_unazones,folder_path_JSON_BS_JSON,Unazones_file_name)
csv_file_location_unazones = os.path.join(folder_path_CSV_file_path(Data_folder),(Unazones_file_name+".csv"))
csv_file_location_Utilitynetwork = os.path.join(folder_path_CSV_file_path(Data_folder),(Unazones_file_name+".csv"))
json_content_utility_Networks = json_object.get("utilityNetworks")
Utility_networks_json_location = json_to_save(json_content_utility_Networks,folder_path_JSON_BS_JSON,Utilitynetworks_file_name)
def json_to_csv_convertion(json_file_path: str, csv_file_location: str):
loaded_json_data = json_open(json_file_path)
# now we will open a file for writing
data_file = open(csv_file_location, 'w', newline='')
# # create the csv writer object
csv_writer = csv.writer(data_file,delimiter = ";")
# Counter variable used for writing
# headers to the CSV file
count = 0
for row in loaded_json_data:
if count == 0:
# Writing headers of CSV file
header = row.keys()
csv_writer.writerow(header)
count += 1
# Writing data of CSV file
csv_writer.writerow(row.values())
data_file.close()
def folder_path_creation(path: str):
if not os.path.exists(path):
os.makedirs(path)
return path
def json_open(complete_folder_path):
with open(complete_folder_path) as f:
json_to_load = json.load(f) # Modified "objectids" to "object_ids" for readability -sg
return json_to_load
def json_to_save(input_json, folder_path: str, file_name: str):
json_save_location = save_file(input_json, folder_path, file_name, "json")
return json_save_location
So how do I this starting from this?
for obj in json_content_utility_Networks:
Go from there?
Keep in mind that is JSON value has already one layer above every object for every object I need to start one layer below it.
So how do I this?
I am trying to achieve the below JSON format and store it in a json file:
{
"Name": "Anurag",
"resetRecordedDate": false,
"ED": {
"Link": "google.com"
}
}
I know how to create a simple JSON file using JSON dumps but not really sure how to add something similar to a dictionary for one of the records within the JSON file.
Assuming the input json content is
{
"Name": "Anurag",
"resetRecordedDate": False
}
Program
import json
# read file
with open('example.json', 'r') as infile:
data=infile.read()
# parse file
parsed_json = json.loads(data)
# Add dictionary element
parsed_json["ED"] = {
"Link": "google.com"
}
# print(json.dumps(parsed_json, indent=4))
# write to json
with open('data.json', 'w') as outfile:
json.dump(parsed_json, outfile)
o/p
{
"Name": "Anurag",
"resetRecordedDate": false,
"ED": {
"Link": "google.com"
}
}
I have a small json file, with the following lines:
{
"IdTitulo": "Jaws",
"IdDirector": "Steven Spielberg",
"IdNumber": 8,
"IdDecimal": "2.33"
}
An there is a schema in my db collection, named test_dec. This is what I've used to create the schema:
db.createCollection("test_dec",
{validator: {
$jsonSchema: {
bsonType: "object",
required: ["IdTitulo","IdDirector"],
properties: {
IdTitulo: {
"bsonType": "string",
"description": "string type, nombre de la pelicula"
},
IdDirector: {
"bsonType": "string",
"description": "string type, nombre del director"
},
IdNumber : {
"bsonType": "int",
"description": "number type to test"
},
IdDecimal : {
"bsonType": "decimal",
"description": "decimal type"
}
}
}}
})
I've made multiple attempts to insert the data. The problem is in the IdDecimal field value.
Some of the trials, replacing the IdDecimal line by:
"IdDecimal": 2.33
"IdDecimal": {"$numberDecimal": "2.33"}
"IdDecimal": NumberDecimal("2.33")
None of them work. The second one is the formal solution provided by MongoDB manuals (mongodb-extended-json) adn the error is the output I've placed in my question: bson.errors.InvalidDocument: key'$numberDecimal' must not start with '$'.
I am currently using a python to load the json. I've been playing around with this file:
import os,sys
import re
import io
import json
from pymongo import MongoClient
from bson.raw_bson import RawBSONDocument
from bson.json_util import CANONICAL_JSON_OPTIONS,dumps,loads
import bsonjs as bs
#connection
client = MongoClient('localhost',27018,document_class=RawBSONDocument)
db = client['myDB']
coll = db['test_dec']
other_col = db['free']
for fname in os.listdir('/mnt/win/load'):
num = re.findall("\d+", fname)
if num:
with io.open(fname, encoding="ISO-8859-1") as f:
doc_data = loads(dumps(f,json_options=CANONICAL_JSON_OPTIONS))
print(doc_data)
test = '{"idTitulo":"La pelicula","idRelease":2019}'
raw_bson = bs.loads(test)
load_raw = RawBSONDocument(raw_bson)
db.other_col.insert_one(load_raw)
client.close()
I am using a json file. If I try to parse anything like Decimal128('2.33') the output is "ValueError: No JSON object could be decoded", because my json has an invalid format.
The result of
db.other_col.insert_one(load_raw)
Is that the content of "test" is inserted.
But I cannot use doc_data with RawBSONDocument, because it goes like that. It says:
TypeError: unpack_from() argument 1 must be string or buffer, not list:
When I manage to parse the json directly to the RawBSONDocument I got all the trash within and the record in database looks like the sample here:
{
"_id" : ObjectId("5eb2920a34eea737626667c2"),
"0" : "{\n",
"1" : "\t\"IdTitulo\": \"Gremlins\",\n",
"2" : "\t\"IdDirector\": \"Joe Dante\",\n",
"3" : "\t\"IdNumber\": 6,\n",
"4" : "\"IdDate\": {\"$date\": \"2010-06-18T:00.12:00Z\"}\t\n",
"5" : "}\n"
}
It seems it is not that simple to load a extended json into MongoDB. The extended version is because I want to use schema validation.
Oleg pointed out that is numberDecimal and not NumberDecimal as I had it before. I've fixed the json file, but nothing changed.
Executed:
with io.open(fname, encoding="ISO-8859-1") as f:
doc_data = json.load(f)
coll.insert(doc_data)
And the json file:
{
"IdTitulo": "Gremlins",
"IdDirector": "Joe Dante",
"IdNumber": 6,
"IdDecimal": {"$numberDecimal": "3.45"}
}
One more roll of the dice from me. If you are using schema validation as you are, I would recommend defining a class and being explicit with defining each field and how you propose to convert the field to the relevant python datatypes. While your solution is generic, the data structure has to be rigid to match the validation.
IMO this is clearer and you have control over any errors etc within the class.
Just to confirm I ran the schema validation and this works with the supplied validation.
from pymongo import MongoClient
import bson.json_util
import dateutil.parser
import json
class Film:
def __init__(self, file):
data = file.read()
loaded = json.loads(data)
self.IdTitulo = loaded.get('IdTitulo')
self.IdDirector = loaded.get('IdDirector')
self.IdDecimal = bson.json_util.Decimal128(loaded.get('IdDecimal'))
self.IdNumber = int(loaded.get('IdNumber'))
self.IdDateTime = dateutil.parser.parse(loaded.get('IdDateTime'))
def insert_one(self, collection):
collection.insert_one(self.__dict__)
client = MongoClient()
mycollection = client.mydatabase.test_dec
with open('c:/temp/1.json', 'r') as jfile:
film = Film(jfile)
film.insert_one(mycollection)
gives:
> db.test_dec.findOne()
{
"_id" : ObjectId("5eba79eabf951a15d32843ae"),
"IdTitulo" : "Jaws",
"IdDirector" : "Steven Spielberg",
"IdDecimal" : NumberDecimal("2.33"),
"IdNumber" : 8,
"IdDateTime" : ISODate("2020-05-12T10:08:21Z")
}
>
JSON file used:
{
"IdTitulo": "Jaws",
"IdDirector": "Steven Spielberg",
"IdNumber": 8,
"IdDecimal": "2.33",
"IdDateTime": "2020-05-12T11:08:21+0100"
}
JSON with type information is called Extended JSON. Following the examples, construct extended json for your data:
ext_json = '''
{
"IdTitulo": "Jaws",
"IdDirector": "Steven Spielberg",
"IdNumber": 8,
"IdDecimal": {"$numberDecimal":"2.33"}
}
'''
In Python, use json_util to load extended json into a Python dictionary:
from bson.json_util import loads
doc = loads(ext_json)
print(doc)
# {u'IdTitulo': u'Jaws', u'IdDirector': u'Steven Spielberg', u'IdDecimal': Decimal128('2.33'), u'IdNumber': 8}
The result of this load is sometimes referred to as a "BSON document" but it is not BSON, which is binary. "BSON" in this context really means that some values are not of python standard library types. The "document" part basically means the object is a dictionary.
You will notice that IdNumber is of a non-standard library type:
print type(doc['IdDecimal'])
# <class 'bson.decimal128.Decimal128'>
To insert this dictionary into MongoDB, follow pymongo tutorial:
from pymongo import MongoClient
client = MongoClient('localhost', 14420)
db = client.test_database
collection = db.test_collection
collection.insert_one(doc)
print(doc)
Finally, I've got the solution and it is using RawBSONDocument.
First the json file:
{
"IdTitulo": "Dead Snow",
"IdDirector": "Tommy Wirkola",
"IdNumber": 11,
"IdDecimal": {"$numberDecimal": "2.22"}
}
& the validation schema file:
db.createCollection("test_dec",
{validator: {
$jsonSchema: {
bsonType: "object",
required: ["IdTitulo","IdDirector"],
properties: {
IdTitulo: {
"bsonType": "string",
"description": "string type, nombre de la pelicula"
},
IdDirector: {
"bsonType": "string",
"description": "string type, nombre del director"
},
IdNumber : {
"bsonType": "int",
"description": "number type to test"
},
IdDecimal : {
"bsonType": "decimal",
"description": "decimal type"
}
}
}}
})
So, the collection in this case is "test_dec".
And the python script that opens the file ".json", reads it and parses it to be imported into MongoDB.
import json
from bson.raw_bson import RawBSONDocument
from pymongo import MongoClient
import bsonjs
#connection
client = MongoClient('localhost',27018)
db = client['movieDB']
coll = db['test_dec']
#open an read file
with open('1.json', 'r') as jfile:
data = jfile.read()
loaded = json.loads(data)
dumped = json.dumps(loaded, indent=4)
bson_bytes = bsonjs.loads(dumped)
coll.insert_one(RawBSONDocument(bson_bytes))
client.close()
The inserted document:
{
"_id" : ObjectId("5eb971ec6fbab859dfae8a6f"),
"IdTitulo" : "Dead Snow",
"IdDirector" : "Toomy Wirkola",
"IdDecimal" : NumberDecimal("2.22"),
"IdNumber" : 11
}
I don't know how it flipped the fields IdDecimal and IdNumber, but it passes the validation and I am really happy.
I tried a document with 'hello' instead of a number in NumberDecimal and the insertion resulted in:
{
"_id" : ObjectId("5eb973b76fbab859dfae8ecd"),
"IdTitulo" : "Shining",
"IdDirector" : "Stanley Kubrick",
"IdDecimal" : NumberDecimal("NaN"),
"IdNumber" : 19
}
Thanks to all that tried to help. Specially Oleg!!! Thank you for being so patient.
Could you not just use bson.decimal128.Decimal128? Ot am I missing something?
from pymongo import MongoClient
from bson.decimal128 import Decimal128
db = MongoClient()['mydatabase']
data = {
"IdTitulo": "Jaws",
"IdDirector": "Steven Spielberg",
"IdNumber": 8,
"IdDecimal": "2.33"
}
data['IdDecimal'] = Decimal128(data['IdDecimal'])
db.other_col.insert_one(data)
I am trying to build a database-backed system using Python as an engine and JSON as the DB file. Currently, there are three files:
# functions.py
import json
from pprint import pprint
with open('db.json') as data_file:
data = json.load(data_file)
def getStudentByUsername(username, record):
detail = json.load(open('db.json'))["students"][username][record]
pprint(detail)
# main.py
import functions
functions.getStudentByUsername('youngh', 'age')
{ // db.json
"students": {
"youngh": {
"name": "Hayden Young",
"age": 13,
"email": "user#school.county.sch.uk"
}
}
}
But, I can't find a way to change my students object like this and still query by username:
{ //db.json "students": [
{
"username": "youngh",
"name": "Hayden Young"
}]
}
Any ideas? I'm new to this type of use of Python 3.
In your second example, you have the user attributes in a dictionary inside a list.
You will need to iterate over your list and get the dictionary with the username you seek.
with open('db.json') as dbfile:
for student in json.load(dbfile)["students"]:
if student["username"] == username:
return(student)
Keep in mind that because the usernames are contained in their own separate dictionaries, you can now have duplicate usernames, and the for loop method will only return the first match.
I think you can use lambda:
#let this var be your file output after parsing to json
users = {
"students":
[
{
"username": "youngh",
"name": "Hayden Young"
}
]
}
def getStudentByUsername(username, record):
detail= filter(lambda user: username == user['username'], users["students"])
print (next(detail)[record])
For duplicate usernames, you can do something like this:
def getStudentByUsername(username, record):
detail= filter(lambda user: username.lower() == user['username'].lower(), users["students"])
while True:
try:
print (next(detail)[record])
except StopIteration:
return
Hey so I have some hash ids in a csv file like
XbRPhe65YbC+xtgGQ8ukeZEr9xFOC4MEs9Z0wUidGSec=
XbRPhe65YbC+xtgGQ8uksrqSUJ/HhTPj1d2pL0/vuGrHM=
and I want to parse them into python wrap them in some additional code like
{"id" :"XbRPshe65YbC+xtGQ8ukqR2u2btfNeNe2gtcs72QbxPA=", "timestamp":"20150831"},
and then wrap all of that in some JSON syntax. This is then sent as a post request. Problem is I cannot seem to make it JSON readable. Everything seems to be ordered wrong and I am getting extra \.
import os
import pandas as pd
from pprint import pprint
df=pd.read_csv('test.csv',sep=',',header=None)
df[0] = '{"id" :"' + df[0].astype(str) + '", "timestamp":"20150831"}, '
df = df[:-1] # removes last comma
test = 'hello'
data =[ { "ids":[ df[0]],
"attributes":[
{
"name":"girl"
},
{
"name":"size"
}
]
}
]
json1 = data.to_json()
print(json1)
I agree that pandas doesn't seem to be the simplest tool for the job here. The built-in libraries will work great:
import csv
import json
with open('test.csv', newline='') as csvfile:
csvreader = csv.reader(csvfile)
data = {
"ids": [{"id": row[0], "timestamp": "20150831"} for row in csvreader],
"attributes": [
{"name": "girl"},
{"name": "size"}
]
}
json1 = json.dumps(data)
print(json1)