I am trying to convert an XML file to MarkDown but I can't find an example of converting a python dict. Here is my code:
import pypandoc
import xmltodict
import json
datadict = open("/home/.../FIX44.xml")
dic = xmltodict.parse(datadict.read())
jsondump = json.dumps(dic)
#print jsondump
output = pypandoc.convert(jsondump, 'md', format='json', outputfile = 'test.md')
assert output == ""
When I try to run this I get:
RuntimeError: Pandoc died with exitcode "1" during conversion
Related
I pass the following json via command prompt as
$python new.py {'scenarioId':'null','scenarioName':'EC_02','scenarioDesc':'EC_02','riskEngine':'null'}
and when I run the following:
import sys
import json
str_json = sys.argv[1].replace("'", '"')
try:
d = json.dumps(str_json)
dat = json.loads(b)
print("Stress JSON is as follows \n",dat)
except json.decoder.JSONDecodeError:
print("Not a valid JSON")
print(dat['scenarioId'])
It's a valid json but I get the error TypeError: string indices must be integers for the last line.
It works fine when I initialize it as str_json in the code itself.
How to solve this?
In your code when you are using a_json = json.loads(b), here type of b variable is still string, convert it into json: a_json = json.loads(json.loads(b))
Code:
import sys
import json
str_json = sys.argv[1].replace("'", '"')
try:
b = json.dumps(str_json)
a_json = json.loads(json.loads(b))
print("Stress JSON is as follows \n",a_json)
except json.decoder.JSONDecodeError:
print("Not a valid JSON")
print(a_json['scenarioId'])
Output:
Stress JSON is as follows
{'scenarioId': 'null', 'scenarioName': 'EC_02', 'scenarioDesc': 'EC_02', 'riskEngine': 'null'}
null
I am trying to convert xls to json and but when I am executing the code it's not giving me the data inside xls sheet, it's only giving me the json structure.
Below is the code which I am running, I am not able to understand what modification I should further make in this so that I can get a perfect json file.
Please note - input is in the form of binary stream and output is also in the form of a stream and not file.
#!/usr/bin/python -u
import sys
import xlrd
import simplejson
from collections import OrderedDict
wb = xlrd.open_workbook(file_contents=sys.stdin.read())
for sheet_index in range(wb.nsheets):
# print sheet_index
sh = wb.sheet_by_index(sheet_index)
# print "Processing sheet no ", sheet_index
attributes = sh.row_values(0)
#print attributes
rows_list = []
attr_list = []
# print attr_list[0]
for rownum in range(1,sh.nrows):
row_val_list = sh.row_values(rownum)
row_dict = OrderedDict()
for index in range(len(attr_list)):
row_dict[attr_list[index]] = row_val_list[index]
#row_dict['ID'] = row_val_list[0]
#row_dict['Name'] = row_val_list[1]
#rows_list.append(row_dict)
#json_data = simplejson.dumps(rows_list)
#sys.stdout.write(json_data)
rows_list.append(row_dict)
json_data = simplejson.dumps(rows_list)
sys.stdout.write(json_data)
# json_data = simplejson.dumps(rows_list)
#sys.stdout.write(json_data)
~
Any help is much appreciated
here is the correct working python code
#!/usr/bin/python -u
import sys
import xlrd
import simplejson
from collections import OrderedDict
wb = xlrd.open_workbook(file_contents=sys.stdin.read())
#print "Sheets are .... ", wb.nsheets
for sheet_index in range(wb.nsheets):
sh = wb.sheet_by_index(sheet_index)
if sh.nrows == 0:
continue
attr_list = sh.row_values(0)
rows_list = []
for rownum in range(1,sh.nrows):
row_values = sh.row_values(rownum)
row_dict = OrderedDict()
for index in range(len(attr_list)):
row_dict[attr_list[index]] = row_values[index]
rows_list.append(row_dict)
json_data = simplejson.dumps(rows_list)
sys.stdout.write(json_data)
I'm trying to convert a JSON file to XML using a small python script, but for some reason the loop only seems to read the first line of the JSON file.
from xml.dom import minidom
from json import JSONDecoder
import json
import sys
import csv
import os
import re
import dicttoxml
from datetime import datetime, timedelta
from functools import partial
reload(sys)
sys.setdefaultencoding('utf-8')
nav = 'navigation_items.bson.json'
df = 'testxmloutput.txt'
def json2xml(json_obj, line_padding=""):
result_list = list()
json_obj_type = type(json_obj)
if json_obj_type is list:
for sub_elem in json_obj:
result_list.append(json2xml(sub_elem, line_padding))
return "\n".join(result_list)
if json_obj_type is dict:
for tag_name in json_obj:
sub_obj = json_obj[tag_name]
result_list.append("%s<%s>" % (line_padding, tag_name))
result_list.append(json2xml(sub_obj, "\t" + line_padding))
result_list.append("%s</%s>" % (line_padding, tag_name))
return "\n".join(result_list)
return "%s%s" % (line_padding, json_obj)
def json_parse(fileobj, decoder=JSONDecoder(), buffersize=2048):
buffer = ''
for chunk in iter(partial(fileobj.read, buffersize), ''):
buffer += chunk
while buffer:
try:
result, index = decoder.raw_decode(buffer)
yield result
buffer = buffer[index:]
except ValueError:
# Not enough data to decode, read more
break
def converter(data):
f = open(df,'w')
data = open(nav)
for line in json_parse(data):
f.write(dicttoxml.dicttoxml(line, attr_type=False))
f.close()
converter(nav)
I was under the assumption that the iter would read the first line to memory and move on to the next. The converted output looks great but im too sure where to look on how to get it to loop through to the next line in file.
Try json.load to load the file into a dict and then iterate the dict for your output.
import sys
import json
json_file = sys.argv[1]
data = {}
with open(json_file) as data_file:
data = json.load(data_file)
for key in data:
#do your things
My specs:
Python 3.4.3
Windows 7
IDE is Jupyter Notebooks
What I have referenced:
how-to-properly-escape-single-and-double-quotes
python-escaping-strings-for-use-in-xml
escaping-characters-in-a-xml-file-with-python
Here is the data and script, respectively, below (I have tried variations on serializing Column 'E' using both Sax and ElementTree):
Data
A,B,C,D,E,F,G,H,I,J
"3","8","1","<Request TransactionID="3" RequestType="FOO"><InstitutionISO /><CallID>23</CallID><MemberID>12</MemberID><MemberPassword /><RequestData><AccountNumber>2</AccountNumber><AccountSuffix>85</AccountSuffix><AccountType>S</AccountType><MPIAcctType>Checking</MPIAcctType><TransactionCount>10</TransactionCount></RequestData></Request>","<Response TransactionID="2" RequestType="HoldInquiry"><ShareList>0000',0001,0070,</ShareList></Response>","1967-12-25 22:18:13.471000","2005-12-25 22:18:13.768000","2","70","0"
Script
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os.path
import sys
import csv
from io import StringIO
import xml.etree.cElementTree as ElementTree
from xml.etree.ElementTree import XMLParser
import xml
import xml.sax
from xml.sax import ContentHandler
class MyHandler(xml.sax.handler.ContentHandler):
def __init__(self):
self._charBuffer = []
self._result = []
def _getCharacterData(self):
data = ''.join(self._charBuffer).strip()
self._charBuffer = []
return data.strip() #remove strip() if whitespace is important
def parse(self, f):
xml.sax.parse(f, self)
return self._result
def characters(self, data):
self._charBuffer.append(data)
def startElement(self, name, attrs):
if name == 'Response':
self._result.append({})
def endElement(self, name):
if not name == 'Response': self._result[-1][name] = self._getCharacterData()
def read_data(path):
with open(path, 'rU', encoding='utf-8') as data:
reader = csv.DictReader(data, delimiter =',', quotechar="'", skipinitialspace=True)
for row in reader:
yield row
if __name__ == "__main__":
empty = ''
Response = 'sample.csv'
for idx, row in enumerate(read_data(Response)):
if idx > 10: break
data = row['E']
print(data) # The before
data = data[1:-1]
data = ""'{}'"".format(data)
print(data) # Sanity check
# data = '<Response TransactionID="2" RequestType="HoldInquiry"><ShareList>0000',0001,0070,</ShareList></Response>'
try:
root = ElementTree.XML(data)
# print(root)
except StopIteration:
raise
pass
# xmlstring = StringIO(data)
# print(xmlstring)
# Handler = MyHandler().parse(xmlstring)
Specifically, due to the quoting in the CSV file (which is beyond my control), I have had to resort to slicing the string (line 51) and then formatting it (line 52).
However the print out from the above attempt is as follows:
"<Response TransactionID="2" RequestType="HoldInquiry"><ShareList>0000'
<Response TransactionID="2" RequestType="HoldInquiry"><ShareList>0000
File "<string>", line unknown
ParseError: no element found: line 1, column 69
Interestingly - if I assign the variable "data" (as in line 54) I receive this:
File "<ipython-input-80-7357c9272b92>", line 56
data = '<Response TransactionID="2" RequestType="HoldInquiry"><ShareList>0000',0001,0070,</ShareList></Response>'
^
SyntaxError: invalid token
I seek feedback and information on how to address utilizing the most Pythonic means to do so. Ideally, is there a method that can leverage ElementTree. Thank you, in advance, for your feedback and guidance.
It seems that You have badly formatted (well, badly quoted) csv data.
If csv file is beyond Your control I suggest not using csv reader to read them,
instead - if You can rely on each field being properly quoted - split them yourself.
with open(Response, 'rU', encoding='utf-8') as data:
separated = data.read().split('","')
try:
x = ElementTree.XML(separated[3])
print(x)
xml.etree.ElementTree.dump(x)
y = ElementTree.XML(separated[4])
xml.etree.ElementTree.dump(y)
except Exception as e:
print(e)
outputs
<Element 'Request' at 0xb6d973b0>
<Request RequestType="FOO" TransactionID="3"><InstitutionISO /><CallID>23</CallID><MemberID>12</MemberID><MemberPassword /><RequestData><AccountNumber>2</AccountNumber><AccountSuffix>85</AccountSuffix><AccountType>S</AccountType><MPIAcctType>Checking</MPIAcctType><TransactionCount>10</TransactionCount></RequestData></Request>
<Response RequestType="HoldInquiry" TransactionID="2"><ShareList>0000',0001,0070,</ShareList></Response>
Does anyone know how can I convert JSON to XLS in Python?
I know that it is possible to create xls files using the package xlwt in Python.
What if I want convert a JSON data convert to XLS file directly?
Is there a way to archive this?
Using pandas (0.15.1) and openpyxl (1.8.6):
import pandas
pandas.read_json("input.json").to_excel("output.xlsx")
I usually use tablib for this use. Its pretty simple to use:
https://pypi.python.org/pypi/tablib/
If your json file is stored in some directory then,
import pandas as pd
pd.read_json("/path/to/json/file").to_excel("output.xlsx")
If you have your json within the code then, you can simply use DataFrame
json_file = {'name':["aparna", "pankaj", "sudhir", "Geeku"],'degree': ["MBA", "BCA", "M.Tech", "MBA"],'score':[90, 40, 80, 98]}
df = pd.DataFrame(json_file).to_excel("excel.xlsx")
In case someone wants to do output to Excel as a stream using Flask-REST
Pandas versions:
json_payload = request.get_json()
with NamedTemporaryFile(suffix='.xlsx') as tmp:
pandas.DataFrame(json_payload).to_excel(tmp.name)
buf = BytesIO(tmp.read())
response = app.make_response(buf.getvalue())
response.headers['content-type'] = 'application/octet-stream'
return response
and OpenPyXL version:
keys = []
wb = Workbook()
ws = wb.active
json_data = request.get_json()
with NamedTemporaryFile() as tmp:
for i in range(len(json_data)):
sub_obj = json_data[i]
if i == 0:
keys = list(sub_obj.keys())
for k in range(len(keys)):
ws.cell(row=(i + 1), column=(k + 1), value=keys[k]);
for j in range(len(keys)):
ws.cell(row=(i + 2), column=(j + 1), value=sub_obj[keys[j]]);
wb.save(tmp.name)
buf = BytesIO(tmp.read())
response = app.make_response(buf.getvalue())
response.headers['content-type'] = 'application/octet-stream'
return response