How to convert JSON to XLS in Python - python

Does anyone know how can I convert JSON to XLS in Python?
I know that it is possible to create xls files using the package xlwt in Python.
What if I want convert a JSON data convert to XLS file directly?
Is there a way to archive this?

Using pandas (0.15.1) and openpyxl (1.8.6):
import pandas
pandas.read_json("input.json").to_excel("output.xlsx")

I usually use tablib for this use. Its pretty simple to use:
https://pypi.python.org/pypi/tablib/

If your json file is stored in some directory then,
import pandas as pd
pd.read_json("/path/to/json/file").to_excel("output.xlsx")
If you have your json within the code then, you can simply use DataFrame
json_file = {'name':["aparna", "pankaj", "sudhir", "Geeku"],'degree': ["MBA", "BCA", "M.Tech", "MBA"],'score':[90, 40, 80, 98]}
df = pd.DataFrame(json_file).to_excel("excel.xlsx")

In case someone wants to do output to Excel as a stream using Flask-REST
Pandas versions:
json_payload = request.get_json()
with NamedTemporaryFile(suffix='.xlsx') as tmp:
pandas.DataFrame(json_payload).to_excel(tmp.name)
buf = BytesIO(tmp.read())
response = app.make_response(buf.getvalue())
response.headers['content-type'] = 'application/octet-stream'
return response
and OpenPyXL version:
keys = []
wb = Workbook()
ws = wb.active
json_data = request.get_json()
with NamedTemporaryFile() as tmp:
for i in range(len(json_data)):
sub_obj = json_data[i]
if i == 0:
keys = list(sub_obj.keys())
for k in range(len(keys)):
ws.cell(row=(i + 1), column=(k + 1), value=keys[k]);
for j in range(len(keys)):
ws.cell(row=(i + 2), column=(j + 1), value=sub_obj[keys[j]]);
wb.save(tmp.name)
buf = BytesIO(tmp.read())
response = app.make_response(buf.getvalue())
response.headers['content-type'] = 'application/octet-stream'
return response

Related

how to convert json file to csv with "success":true

I have problem with convert json file to csv file on python
and i think it will be the nested json file but i don't know how to handle it!
import json, requests
url = requests.get("https://####/api/food_orders")
text = url.text
data = json.load(text)
order_data = data['data']
# now we will open a file for writing
data_file = open('ordersJsonToCsv.csv', 'w', newline='')
# create the csv writer object
csv_writer = csv.writer(data_file)
# Counter variable used for writing
# headers to the CSV file
count = 0
for ord in order_data:
if count == 0:
# Writing headers of CSV file
header = ord.keys()
csv_writer.writerow(header)
count += 1
# Writing data of CSV file
csv_writer.writerow(ord.values())
data_file.close()
And Json file look like
This code will solve the problem to get data only
import pandas as pd
import json, requests
url = requests.get("https://##/api/orders?
text = url.text
info = json.loads(text)
df = pd.json_normalize(info['data'])
df.to_csv("samplecsv.csv")

how to download excel file in python and streamlit?

I have a Python script that read files and convert it to dataframe using Python and streamlit. Then I want to create a function to allows the user to download this dataframe as an Excel file with extension .xls.
So I tried to read the dataframe and convert it to an Excel file using these two functions:
pd.ExcelWriter
df.to_excel
But when I try to download the file using a link the file doesn't download and displays this error:
Failed-Network error
Code:
import pandas as pd
import streamlit as st
writer = pd.ExcelWriter('update2.xlsx')
df.to_excel(writer, index = False, header=True,encoding='utf-8')
with open(writer,'rb') as f:
b64 = base64.b64encode(f.read())
href = f'Download {extension}'
st.write(href, unsafe_allow_html=True)
With the streamlit latest release(above 1.0.0):
Use
st.download_button
Displays a download button widget.
This is useful when you would like to provide a way for your users to download a file directly from your app.
Note that the data to be downloaded is stored in memory while the user is connected, so it's a good idea to keep file sizes under a couple of hundred megabytes to conserve memory.
Here is a sample code from the discussion, that can be helpful to download excel files...
import pandas as pd
from io import BytesIO
from pyxlsb import open_workbook as open_xlsb
import streamlit as st
def to_excel(df):
output = BytesIO()
writer = pd.ExcelWriter(output, engine='xlsxwriter')
df.to_excel(writer, index=False, sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
format1 = workbook.add_format({'num_format': '0.00'})
worksheet.set_column('A:A', None, format1)
writer.save()
processed_data = output.getvalue()
return processed_data
df_xlsx = to_excel(df)
st.download_button(label='📥 Download Current Result',
data=df_xlsx ,
file_name= 'df_test.xlsx')
This worked for me
import pandas as pd
from io import BytesIO
import streamlit as st
def to_excel(df: pd.DataFrame):
in_memory_fp = BytesIO()
df.to_excel(in_memory_fp)
# Write the file out to disk to demonstrate that it worked.
in_memory_fp.seek(0, 0)
return in_memory_fp.read()
cols = ["col1", "col2"]
df = pd.DataFrame.from_records([{k: 0.0 for k in cols} for _ in range(25)])
excel_data = to_excel(df)
file_name = "excel.xlsx"
st.download_button(
f"Click to download {file_name}",
excel_data,
file_name,
f"text/{file_name}",
key=file_name
)
line 5 can't be executed since you havent assigned any excel to the DataFrame df.
try something like this in your code:
df = pd.read_csv('update2.xlsx')
I hope, this helped.
Take care
def get_binary_file_downloader_html(bin_file, file_label='File'):
with open(bin_file, 'rb') as f:
data = f.read()
bin_str = base64.b64encode(data).decode()
href = f'Descargar {file_label}'
return href
st.markdown(get_binary_file_downloader_html('Wip_QRY.xlsx', 'Excel'), unsafe_allow_html=True)

convert xls to json in python

I am trying to convert xls to json and but when I am executing the code it's not giving me the data inside xls sheet, it's only giving me the json structure.
Below is the code which I am running, I am not able to understand what modification I should further make in this so that I can get a perfect json file.
Please note - input is in the form of binary stream and output is also in the form of a stream and not file.
#!/usr/bin/python -u
import sys
import xlrd
import simplejson
from collections import OrderedDict
wb = xlrd.open_workbook(file_contents=sys.stdin.read())
for sheet_index in range(wb.nsheets):
# print sheet_index
sh = wb.sheet_by_index(sheet_index)
# print "Processing sheet no ", sheet_index
attributes = sh.row_values(0)
#print attributes
rows_list = []
attr_list = []
# print attr_list[0]
for rownum in range(1,sh.nrows):
row_val_list = sh.row_values(rownum)
row_dict = OrderedDict()
for index in range(len(attr_list)):
row_dict[attr_list[index]] = row_val_list[index]
#row_dict['ID'] = row_val_list[0]
#row_dict['Name'] = row_val_list[1]
#rows_list.append(row_dict)
#json_data = simplejson.dumps(rows_list)
#sys.stdout.write(json_data)
rows_list.append(row_dict)
json_data = simplejson.dumps(rows_list)
sys.stdout.write(json_data)
# json_data = simplejson.dumps(rows_list)
#sys.stdout.write(json_data)
~
Any help is much appreciated
here is the correct working python code
#!/usr/bin/python -u
import sys
import xlrd
import simplejson
from collections import OrderedDict
wb = xlrd.open_workbook(file_contents=sys.stdin.read())
#print "Sheets are .... ", wb.nsheets
for sheet_index in range(wb.nsheets):
sh = wb.sheet_by_index(sheet_index)
if sh.nrows == 0:
continue
attr_list = sh.row_values(0)
rows_list = []
for rownum in range(1,sh.nrows):
row_values = sh.row_values(rownum)
row_dict = OrderedDict()
for index in range(len(attr_list)):
row_dict[attr_list[index]] = row_values[index]
rows_list.append(row_dict)
json_data = simplejson.dumps(rows_list)
sys.stdout.write(json_data)

python: get full formula from excel, using xlrd

I'm trying to get the full formula from Excel file
I tried many ways, but all get for me the value
I need the full formula that is in the cell, not the value itself
I'm using python with xlrd
is there any function I can use ?
or is there anyway to ?
Thanks alot
So I know this is a very old post, but I found a decent way of getting the formulas from all the sheets in a workbook as well as having the newly created workbook retain all the formatting.
First step is to save a copy of your .xlsx file as .xls
-- Use the .xls as the filename in the code below
Using Python 2.7
from lxml import etree
from StringIO import StringIO
import xlsxwriter
import subprocess
from xlrd import open_workbook
from xlutils.copy import copy
from xlsxwriter.utility import xl_cell_to_rowcol
import os
file_name = '<YOUR-FILE-HERE>'
dir_path = os.path.dirname(os.path.realpath(file_name))
subprocess.call(["unzip",str(file_name+"x"),"-d","file_xml"])
xml_sheet_names = dict()
with open_workbook(file_name,formatting_info=True) as rb:
wb = copy(rb)
workbook_names_list = rb.sheet_names()
for i,name in enumerate(workbook_names_list):
xml_sheet_names[name] = "sheet"+str(i+1)
sheet_formulas = dict()
for i, k in enumerate(workbook_names_list):
xmlFile = os.path.join(dir_path,"file_xml/xl/worksheets/{}.xml".format(xml_sheet_names[k]))
with open(xmlFile) as f:
xml = f.read()
tree = etree.parse(StringIO(xml))
context = etree.iterparse(StringIO(xml))
sheet_formulas[k] = dict()
for _, elem in context:
if elem.tag.split("}")[1]=='f':
cell_key = elem.getparent().get(key="r")
cell_formula = elem.text
sheet_formulas[k][cell_key] = str("="+cell_formula)
sheet_formulas
Structure of Dictionary 'sheet_formulas'
{'Worksheet_Name': {'A1_cell_reference':'cell_formula'}}
Example results:
{u'CY16': {'A1': '=Data!B5',
'B1': '=Data!B1',
'B10': '=IFERROR(Data!B12,"")',
'B11': '=IFERROR(SUM(B9:B10),"")',

From password-protected Excel file to pandas DataFrame

I can open a password-protected Excel file with this:
import sys
import win32com.client
xlApp = win32com.client.Dispatch("Excel.Application")
print "Excel library version:", xlApp.Version
filename, password = sys.argv[1:3]
xlwb = xlApp.Workbooks.Open(filename, Password=password)
# xlwb = xlApp.Workbooks.Open(filename)
xlws = xlwb.Sheets(1) # counts from 1, not from 0
print xlws.Name
print xlws.Cells(1, 1) # that's A1
I'm not sure though how to transfer the information to a pandas dataframe. Do I need to read cells one by one and all, or is there a convenient method for this to happen?
Simple solution
import io
import pandas as pd
import msoffcrypto
passwd = 'xyz'
decrypted_workbook = io.BytesIO()
with open(i, 'rb') as file:
office_file = msoffcrypto.OfficeFile(file)
office_file.load_key(password=passwd)
office_file.decrypt(decrypted_workbook)
df = pd.read_excel(decrypted_workbook, sheet_name='abc')
pip install --user msoffcrypto-tool
Exporting all sheets of each excel from directories and sub-directories to seperate csv files
from glob import glob
PATH = "Active Cons data"
# Scaning all the excel files from directories and sub-directories
excel_files = [y for x in os.walk(PATH) for y in glob(os.path.join(x[0], '*.xlsx'))]
for i in excel_files:
print(str(i))
decrypted_workbook = io.BytesIO()
with open(i, 'rb') as file:
office_file = msoffcrypto.OfficeFile(file)
office_file.load_key(password=passwd)
office_file.decrypt(decrypted_workbook)
df = pd.read_excel(decrypted_workbook, sheet_name=None)
sheets_count = len(df.keys())
sheet_l = list(df.keys()) # list of sheet names
print(sheet_l)
for i in range(sheets_count):
sheet = sheet_l[i]
df = pd.read_excel(decrypted_workbook, sheet_name=sheet)
new_file = f"D:\\all_csv\\{sheet}.csv"
df.to_csv(new_file, index=False)
Assuming the starting cell is given as (StartRow, StartCol) and the ending cell is given as (EndRow, EndCol), I found the following worked for me:
# Get the content in the rectangular selection region
# content is a tuple of tuples
content = xlws.Range(xlws.Cells(StartRow, StartCol), xlws.Cells(EndRow, EndCol)).Value
# Transfer content to pandas dataframe
dataframe = pandas.DataFrame(list(content))
Note: Excel Cell B5 is given as row 5, col 2 in win32com. Also, we need list(...) to convert from tuple of tuples to list of tuples, since there is no pandas.DataFrame constructor for a tuple of tuples.
from David Hamann's site (all credits go to him)
https://davidhamann.de/2018/02/21/read-password-protected-excel-files-into-pandas-dataframe/
Use xlwings, opening the file will first launch the Excel application so you can enter the password.
import pandas as pd
import xlwings as xw
PATH = '/Users/me/Desktop/xlwings_sample.xlsx'
wb = xw.Book(PATH)
sheet = wb.sheets['sample']
df = sheet['A1:C4'].options(pd.DataFrame, index=False, header=True).value
df
Assuming that you can save the encrypted file back to disk using the win32com API (which I realize might defeat the purpose) you could then immediately call the top-level pandas function read_excel. You'll need to install some combination of xlrd (for Excel 2003), xlwt (also for 2003), and openpyxl (for Excel 2007) first though. Here is the documentation for reading in Excel files. Currently pandas does not provide support for using the win32com API to read Excel files. You're welcome to open up a GitHub issue if you'd like.
Based on the suggestion provided by #ikeoddy, this should put the pieces together:
How to open a password protected excel file using python?
# Import modules
import pandas as pd
import win32com.client
import os
import getpass
# Name file variables
file_path = r'your_file_path'
file_name = r'your_file_name.extension'
full_name = os.path.join(file_path, file_name)
# print(full_name)
Getting command-line password input in Python
# You are prompted to provide the password to open the file
xl_app = win32com.client.Dispatch('Excel.Application')
pwd = getpass.getpass('Enter file password: ')
Workbooks.Open Method (Excel)
xl_wb = xl_app.Workbooks.Open(full_name, False, True, None, pwd)
xl_app.Visible = False
xl_sh = xl_wb.Worksheets('your_sheet_name')
# Get last_row
row_num = 0
cell_val = ''
while cell_val != None:
row_num += 1
cell_val = xl_sh.Cells(row_num, 1).Value
# print(row_num, '|', cell_val, type(cell_val))
last_row = row_num - 1
# print(last_row)
# Get last_column
col_num = 0
cell_val = ''
while cell_val != None:
col_num += 1
cell_val = xl_sh.Cells(1, col_num).Value
# print(col_num, '|', cell_val, type(cell_val))
last_col = col_num - 1
# print(last_col)
ikeoddy's answer:
content = xl_sh.Range(xl_sh.Cells(1, 1), xl_sh.Cells(last_row, last_col)).Value
# list(content)
df = pd.DataFrame(list(content[1:]), columns=content[0])
df.head()
python win32 COM closing excel workbook
xl_wb.Close(False)
Adding to #Maurice answer to get all the cells in the sheet without having to specify the range
wb = xw.Book(PATH, password='somestring')
sheet = wb.sheets[0] #get first sheet
#sheet.used_range.address returns string of used range
df = sheet[sheet.used_range.address].options(pd.DataFrame, index=False, header=True).value

Categories