I am trying to use csv file to read data and convert them into nested array using python.
my column values of csv are
"hallticket_Number ","student_name","gender","course_name","university_course_code ","university_college_code","caste","course_year","semester_yearly_exams","subject_name1","subject_code1","marks_or_grade_points_obtained1","maximum_marks_or_grade_points1","pass_mark1","no_of_credits1","pass_fail_absent1","subject_name2","subject_code2","marks_or_grade_points_obtained2","maximum_marks_or_grade_points2","no_of_credits2","pass_fail_absent2" ,"subject_name3","subject_code3", "marks_or_grade_points_obtained3","maximum_marks_or_grade_points3","no_of_credits3", "pass_fail_absent3" ,"subject_name4" ,"subject_code4" ,"marks_or_grade_points_obtained4","maximum_marks_or_grade_points4","no_of_credits4" , "pass_fail_absent4" ,"subject_code5", "marks_or_grade_points_obtained5" ,"maximum_marks_or_grade_points5","no_of_credits5","pass_fail_absent5","subject_name6","marks_or_grade_points_obtained6","maximum_marks_or_grade_points6", "no_of_credits6","pass_fail_absent","final_result_pass_fail","marks_or_sgpa_
The output i need in JSON is
{
"hallticket_": 22342,
"student_name": "abc",
"gender": "m",
"course_name":" fgd",
"course_code":52,
"college_code ":521,
"caste":"open",
"year":55,
"exam":"s1",
"subject": [ {
"subject_name1":"hh",
"subject_code1":52,
"marks_or_grade_points_obtained1":85,
"maximum_marks_or_grade_points1":50,
"pass_mark1":52,
"no_of_credits1":85,
"pass_fail_absent1":"pass"},]
"subject": [ {
"subject_name2":"hh",
"subject_code2":52,
"marks_or_grade_points_obtained2":85,
"maximum_marks_or_grade_points2":50,
"pass_mark2":52,
"no_of_credits2":85,
"pass_fail_absent2":"pass"},]
"subject": [ {
"subject_name3":"hh",
"subject_code3":52,
"marks_or_grade_points_obtained3":85,
"maximum_marks_or_grade_points3":50,
"pass_mark3":52,
"no_of_credits3":85,
"pass_fail_absent3":"pass"},]
"subject": [ {
"subject_name4":"hh",
"subject_code4":52,
"marks_or_grade_points_obtained4":85,
"maximum_marks_or_grade_points4":50,
"pass_mark4":52,
"no_of_credits4":85,
"pass_fail_absent4":"pass"},]
"subject": [ {
"subject_name5":"hh",
"subject_code5":52,
"marks_or_grade_points_obtained5":85,
"maximum_marks_or_grade_points5":50,
"pass_mark5":52,
"no_of_credits5":85,
"pass_fail_absent5":"pass"},]
"subject": [ {
"subject_name6":"hh",
"subject_code6":52,
"marks_or_grade_points_obtained6":85,
"maximum_marks_or_grade_points6":50,
"pass_mark6":52,
"no_of_credits6":85,
"pass_fail_absent6":"pass"},]
"final_result_pass_fail":"pass",
" marks_or_sgpa_obtained":"8.00",
"maximum_marks_sgpa":"10",
"total_credits":"135"
}
import csv
import json
# Open the CSV
f = open('data.csv', 'r')
reader = csv.DictReader(f)
# Parse the CSV into JSON
out = json.dumps([row for row in reader])
print(out)
Hopefully this will work as your expectations!
Related
I have been trying to convert a JSON fie to CSV in python but the obtained csv is very vague with each letter being separated with comma rather than the word as a whole from the key - value pair. The code which I have tried and the obtained csv output are given below.
SAMPLE JSON FILE
"details":[
{
"name": "sreekumar, ananthu",
"type": "faculty/academician",
"personal": {
"age": "28",
"address": [
{
"street": "xyz",
"city": "abc",
}
]
}
SAMPLE CODE
import json
import csv
with open("json_data.json","r") as f:
data = json.loads(f)
csv_file = open("csv_file.csv","w")
csv_writer = csv.writer(csv_file)
for details in data['detail'];
for detail_key, detail_value in details.items():
if detail_key == 'name':
csv_writer.writerow(detail_value)
if detail_key == 'personal':
for personal_key, personal_value in detail_value.items():
if personal_key == 'age'
csv_writer.writerow(personal_value)
csv_file.close()
SAMPLE OUTPUT
s,r,e,e,k,u,m,a,ra,n,a,n,t,h,u,2,8
I'm having trouble to generate a well formatted CSV file out of some data i fetched from the leadfeeder API. In the csv file that is currently being created, not all values are in one row, id and leads are one column higher then the rest. Like here:
CSV Output
I later also like to load another json file and use it to map some values over the id and then put also the visits per lead into my csv file.
Do you also have some advice for this?
This is my code so far:
import json
import csv
csv_columns = ['name', 'industry', 'website_url', 'status', 'crm_lead_id', 'crm_organization_id', 'employee_count', 'id', 'type' ]
with open('data.json', 'r') as d:
d = json.load(d)
csv_file = 'lead_daten.csv'
try:
with open('leads.csv', 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=csv_columns, extrasaction='ignore')
writer.writeheader()
for item in d['data']:
writer.writerow(item)
writer.writerow(item['attributes'])
except IOError:
print("I/O error")
My json data has the following structure:
I need also some of the nested values like the id in relationships!
{
"data": [
{
"attributes": {
"crm_lead_id": null,
"crm_organization_id": null,
"employee_count": 5000,
"facebook_url": null,
"first_visit_date": "2019-01-31",
"industry": "Furniture",
"last_visit_date": "2019-01-31",
"linkedin_url": null,
"name": "Example Inc",
"phone": null,
"status": "new",
"twitter_handle": "example",
"website_url": "http://www.example.com"
},
"id": "s7ybF6VxqhQqVM1m1BCnZT_8SRo9XnuoxSUP5ChvERZS9",
"relationships": {
"location": {
"data": {
"id": "8SRo9XnuoxSUP5ChvERZS9",
"type": "locations"
}
}
},
"type": "leads"
},
{
"attributes": {
"crm_lead_id": null,
When you write to a csv, you must write one full row at a time. You current code writes one row with only id and type, and then a different row with the other fields.
The correct way is to first fully build a dictionary containing all the fields and only then write it in one single operation. Code could be:
...
writer.writeheader()
for item in d['data']:
item.update(item["attributes"])
writer.writerow(item)
...
I have different Json Response like below sample :
In My case keys is in form of column Name and data is in rows Node.
{
"count": 2,
"name": "Report",
"columnNames": [
"Name",
"Address",
"Account",
"Completed"
],
"rows": [
[
"'ABC'",
Xyz,
"'Admin'",
"'Yes'"
],
[
"'ABC1'",
Xyz,
"'Admin'",
"'Yes'"
],
[
"'ABC2'",
Xyz,
"'Admin'",
"'Yes'"
]
]
}
and then i want to convert these json into csv format like this
Name,Address,Account, Completed
"'ABC'",Xyz,"'Admin'","'Yes'"
"'ABC1'",Xyz,"'Admin'","'Yes'"
"'ABC2'",Xyz,"'Admin'","'Yes'"
You can use the csv module and some formatting, say you have a json object
import csv
csv_arr = [json["columnNames"]]
for row in json["rows"]:
row_csv_arr = []
csv_arr.append(row)
Finally, you can write to a file
with open("output.csv",'wb') as csv_file:
wr = csv.writer(csv_file)
wr.writerows(csv_arr)
Or get it as a string
# Join the arrays
for i, val in enumerate(csv_arr):
csv_arr[i] = ", ".join(val)
csv_string = "\n".join(csv_arr)
With the following simple Python script:
import json
file = 'toy.json'
data = json.loads(file)
print(data['gas']) # example
My data generates the error ...is not JSON serializable.
With this, slightly more sophisticated, Python script:
import json
import sys
#load the data into an element
data = open('transactions000000000029.json', 'r')
#dumps the json object into an element
json_str = json.dumps(data)
#load the json to a string
resp = json.loads(json_str)
#extract an element in the response
print(resp['gas'])
The same.
What I'd like to do is extract all the values of a particular index, so ideally I'd like to render the input like so:
...
"hash": "0xf2b5b8fb173e371cbb427625b0339f6023f8b4ec3701b7a5c691fa9cef9daf63",
"gasUsed": "21000",
"hash": "0xf8f2a397b0f7bb1ff212b6bcc57e4a56ce3e27eb9f5839fef3e193c0252fab26"
"gasUsed": "21000"
...
The data looks like this:
{
"blockNumber": "1941794",
"blockHash": "0x41ee74e34cbf9ef4116febea958dbc260e2da3a6bf6f601bfaeb2cd9ab944a29",
"hash": "0xf2b5b8fb173e371cbb427625b0339f6023f8b4ec3701b7a5c691fa9cef9daf63",
"from": "0x3c0cbb196e3847d40cb4d77d7dd3b386222998d9",
"to": "0x2ba24c66cbff0bda0e3053ea07325479b3ed1393",
"gas": "121000",
"gasUsed": "21000",
"gasPrice": "20000000000",
"input": "",
"logs": [],
"nonce": "14",
"value": "0x24406420d09ce7440000",
"timestamp": "2016-07-24 20:28:11 UTC"
}
{
"blockNumber": "1941716",
"blockHash": "0x75e1602cad967a781f4a2ea9e19c97405fe1acaa8b9ad333fb7288d98f7b49e3",
"hash": "0xf8f2a397b0f7bb1ff212b6bcc57e4a56ce3e27eb9f5839fef3e193c0252fab26",
"from": "0xa0480c6f402b036e33e46f993d9c7b93913e7461",
"to": "0xb2ea1f1f997365d1036dd6f00c51b361e9a3f351",
"gas": "121000",
"gasUsed": "21000",
"gasPrice": "20000000000",
"input": "",
"logs": [],
"nonce": "1",
"value": "0xde0b6b3a7640000",
"timestamp": "2016-07-24 20:12:17 UTC"
}
What would be the best way to achieve that?
I've been thinking that perhaps the best way would be to reformat it as valid json?
Or maybe to just treat it like regex?
Your json file is not valid. This data should be a list of dictionaries. You should then separate each dictionary with a comma, Like this:
[
{
"blockNumber":"1941794",
"blockHash": "0x41ee74bf9ef411d9ab944a29",
"hash":"0xf2ef9daf63",
"from":"0x3c0cbb196e3847d40cb4d77d7dd3b386222998d9",
"to":"0x2ba24c66cbff0bda0e3053ea07325479b3ed1393",
"gas":"121000",
"gasUsed":"21000",
"gasPrice":"20000000000",
"input":"",
"logs":[
],
"nonce":"14",
"value":"0x24406420d09ce7440000",
"timestamp":"2016-07-24 20:28:11 UTC"
},
{
"blockNumber":"1941716",
"blockHash":"0x75e1602ca8d98f7b49e3",
"hash":"0xf8f2a397b0f7bb1ff212e193c0252fab26",
"from":"0xa0480c6f402b036e33e46f993d9c7b93913e7461",
"to":"0xb2ea1f1f997365d1036dd6f00c51b361e9a3f351",
"gas":"121000",
"gasUsed":"21000",
"gasPrice":"20000000000",
"input":"",
"logs":[
],
"nonce":"1",
"value":"0xde0b6b3a7640000",
"timestamp":"2016-07-24 20:12:17 UTC"
}
]
Then use this to open the file:
with open('toy.json') as data_file:
data = json.load(data_file)
You can then render the desired output like:
for item in data:
print item['hash']
print item['gasUsed']
If each block is valid JSON data you can parse them seperatly:
data = []
with open('transactions000000000029.json') as inpt:
lines = []
for line in inpt:
if line.startswith('{'): # block starts
lines = [line]
else:
lines.append(line)
if line.startswith('}'): # block ends
data.append(json.loads(''.join(lines)))
for block in data:
print("hash: {}".format(block['hash']))
print("gasUsed: {}".format(block['gasUsed']))
I have a bunch of Avro files that I would like to read one by one from S3. I have no problem reading the files as bytes but I am wondering how can you iterate over the entires after that. Current code:
conn = boto.s3.connect_to_region("us-east-1")
my_bucket=boto.s3.bucket.Bucket(conn, "my_bucket")
my_key = my_bucket.get_key("folder/file.avro")
raw_bytes = my_key.read()
test_schema = '''
{
"namespace": "com.company",
"type": "record",
"name": "MimeMessage_v2",
"fields": [
{
"name": "record_timestamp",
"type": "long"
},
{
"name": "contents",
"type": "bytes"
}
],
"message_id": 2
}
'''
schema = avro.schema.Parse(test_schema)
#this is the problematic section
dreader = DatumReader(schema, schema)
v = dreader.read(raw_bytes)
I am wondering how to read a variable containing bytes of a Avro file properly.
Here is one of the ways that worked for me in Python 3:
from avro.datafile import DataFileReader
avro_bytes = io.BytesIO(raw_bytes)
reader = DataFileReader(avro_bytes, avro.io.DatumReader())
for line in reader:
print(line)