I am unable to print all the output in the excle file - python

i am unable to print all output in excle file.
please guide
i am using pd.dataFrame and openpyxl
i am fatching data from excel file and i want to write all the out data in column and rows.
from Bio import SeqIO
import csv
from openpyxl import load_workbook
import pandas as pd
from Bio.SeqUtils.ProtParam import ProteinAnalysis
input_file = open ("Sequence.fasta","r")
for record in SeqIO.parse(input_file,"fasta"):`my_sec= str(record.seq).rstrip('\\')
analyse= ProteinAnalysis(my_sec)
mol_weight = analyse.molecular_weight()
count_amino= analyse.count_amino_acids()
epsilon_prot = analyse.molar_extinction_coefficient()
iso_point=analyse.isoelectric_point()
ist_index=analyse.instability_index()
aromati=analyse.aromaticity()
gra_vy=analyse.gravy()
flex=analyse.flexibility()
writer = pd.ExcelWriter('protein_feature_data.xlsx',engine='openpyxl')
wb= writer.book
df = pd.DataFrame({'sequence':[my_sec],
'Molecular_Weight':[mol_weight],
'Amino_Acid_Count':[count_amino],
'molar_extinction_coefficient':[epsilon_prot],
'isoelectric_point':[iso_point],
'instability_index':[ist_index],
'aromaticity':[aromati],
'Gravy':[gra_vy],
'Flexibility':[flex]})
df.to_excel(writer)
wb.save('protein_feature_data.xlsx')
`

Please use this updated code. You can save the file with just to_excel. Also, the df creation and writing to file should be outside the for loop. If the df is created fine (cannot confirm as the input_file has not been shared, the data should be written to the output file correctly.
from Bio import SeqIO
import csv
from openpyxl import load_workbook
import pandas as pd
from Bio.SeqUtils.ProtParam import ProteinAnalysis
input_file = open ("Sequence.fasta","r")
for record in SeqIO.parse(input_file,"fasta"):
my_sec= str(record.seq).rstrip('\\')
analyse= ProteinAnalysis(my_sec)
mol_weight = analyse.molecular_weight()
count_amino= analyse.count_amino_acids()
epsilon_prot = analyse.molar_extinction_coefficient()
iso_point=analyse.isoelectric_point()
ist_index=analyse.instability_index()
aromati=analyse.aromaticity()
gra_vy=analyse.gravy()
flex=analyse.flexibility()
df = pd.DataFrame({'sequence':[my_sec],
'Molecular_Weight':[mol_weight],
'Amino_Acid_Count':[count_amino],
'molar_extinction_coefficient':[epsilon_prot],
'isoelectric_point':[iso_point],
'instability_index':[ist_index],
'aromaticity':[aromati],
'Gravy':[gra_vy],
'Flexibility':[flex]})
df.to_excel('protein_feature_data.xlsx')

Related

how to download excel file in python and streamlit?

I have a Python script that read files and convert it to dataframe using Python and streamlit. Then I want to create a function to allows the user to download this dataframe as an Excel file with extension .xls.
So I tried to read the dataframe and convert it to an Excel file using these two functions:
pd.ExcelWriter
df.to_excel
But when I try to download the file using a link the file doesn't download and displays this error:
Failed-Network error
Code:
import pandas as pd
import streamlit as st
writer = pd.ExcelWriter('update2.xlsx')
df.to_excel(writer, index = False, header=True,encoding='utf-8')
with open(writer,'rb') as f:
b64 = base64.b64encode(f.read())
href = f'Download {extension}'
st.write(href, unsafe_allow_html=True)
With the streamlit latest release(above 1.0.0):
Use
st.download_button
Displays a download button widget.
This is useful when you would like to provide a way for your users to download a file directly from your app.
Note that the data to be downloaded is stored in memory while the user is connected, so it's a good idea to keep file sizes under a couple of hundred megabytes to conserve memory.
Here is a sample code from the discussion, that can be helpful to download excel files...
import pandas as pd
from io import BytesIO
from pyxlsb import open_workbook as open_xlsb
import streamlit as st
def to_excel(df):
output = BytesIO()
writer = pd.ExcelWriter(output, engine='xlsxwriter')
df.to_excel(writer, index=False, sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
format1 = workbook.add_format({'num_format': '0.00'})
worksheet.set_column('A:A', None, format1)
writer.save()
processed_data = output.getvalue()
return processed_data
df_xlsx = to_excel(df)
st.download_button(label='📥 Download Current Result',
data=df_xlsx ,
file_name= 'df_test.xlsx')
This worked for me
import pandas as pd
from io import BytesIO
import streamlit as st
def to_excel(df: pd.DataFrame):
in_memory_fp = BytesIO()
df.to_excel(in_memory_fp)
# Write the file out to disk to demonstrate that it worked.
in_memory_fp.seek(0, 0)
return in_memory_fp.read()
cols = ["col1", "col2"]
df = pd.DataFrame.from_records([{k: 0.0 for k in cols} for _ in range(25)])
excel_data = to_excel(df)
file_name = "excel.xlsx"
st.download_button(
f"Click to download {file_name}",
excel_data,
file_name,
f"text/{file_name}",
key=file_name
)
line 5 can't be executed since you havent assigned any excel to the DataFrame df.
try something like this in your code:
df = pd.read_csv('update2.xlsx')
I hope, this helped.
Take care
def get_binary_file_downloader_html(bin_file, file_label='File'):
with open(bin_file, 'rb') as f:
data = f.read()
bin_str = base64.b64encode(data).decode()
href = f'Descargar {file_label}'
return href
st.markdown(get_binary_file_downloader_html('Wip_QRY.xlsx', 'Excel'), unsafe_allow_html=True)

Updating excel sheet without overwriting using openpyxl :Pandas

I am trying to update sheet without overwriting the complete data but my code is creating a new sheet instead.
import csv
import openpyxl
import pandas as pd
from openpyxl import load_workbook
df1 = pd.read_csv(r'C:\Users\name\Desktop\Data_Sj.csv')
ddf = df1[
(df1['Sports'] == 'Football')
]
print(ddf)
writer = pd.ExcelWriter(r'C:\Users\name\Desktop\check\Checklist1.xlsx', engine= 'openpyxl')
book = load_workbook(r'C:\Users\name\Desktop\check\Checklist1.xlsx')
writer.book = book
ddf.to_excel(r'C:\Users\name\Desktop\check\Checklist1.xlsx')
writer.save()

Writing large data to a excel column cell with looping

Parsing .ts file and getting string from that file. i want write that string to excel column. can anyone help me?
from xml.dom import minidom
import sys, xlsxwriter
import pandas as pd
import numpy as np
reload(sys)
sys.setdefaultencoding('utf-8')
doc = minidom.parse("english.ts")
def main():
writer = pd.ExcelWriter('new.xlsx', engine='xlsxwriter')
messages = doc.getElementsByTagName("message")
for message in messages:
source = message.getElementsByTagName("source")[0]
ori_string = source.firstChild.data
print ori_string
df = pd.DataFrame({'TString': [ori_string]})
writer = pd.ExcelWriter('new.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
writer.save()
if __name__ == '__main__':
main()
I am not getting error and "ori_string" is printing but in excel sheet nothing is written.
I want to just write the string to the excel column.how to iterate rows and how to give row and column number inside iteration?
from xml.dom import minidom
import sys, xlsxwriter
reload(sys)
sys.setdefaultencoding('utf-8')
doc = minidom.parse("english.ts")
def main():
row = 0
messages = doc.getElementsByTagName("message")
workbook = xlsxwriter.Workbook('data3.xlsx')
worksheet = workbook.add_worksheet()
for message in messages:
source = message.getElementsByTagName("source")[0]
ori_string = source.firstChild.data
print ori_string
worksheet.write(row, 0, ori_string)
row += 1
workbook.close()
Now its working thank you #DavidG for your suggestion.

Exception: Exception('Exception caught in workbook destructor. Explicit close() may be required for workbook.',)

I am trying to put a bunch of CSV files into one workbook and here is my code:
import csv
import glob
import openpyxl
import os, sys
import pandas as pd
import xlsxwriter as xlwr
def main():
list_of_files = []
names = []
for csv_file in glob.glob(os.path.join('.', '*.csv')):
bleh = csv_file[2:]
name = bleh[:-4]
names.append(name)
df = pd.read_csv(csv_file, index_col=None, header=0)
list_of_files.append(df)
writer = pd.ExcelWriter('non_concussed_game_logs.xlsx')
for n, df in enumerate(list_of_files):
df.to_excel(writer, '%s' % names[n])
writer.save
if __name__ == "__main__":
main()
I am getting the error mentioned in the title of my post but I am unsure as to why I'm getting it. I have used this script before and it has worked but I'm not sure why it is not now. Any help is appreciated!
I figured it out, my CSV files were encoded in utf-8 so I had to make the read_csv() call
df = pd.read_csv(csv_file, index_col=None, header=0, encoding='utf-8')
and also add the parenthesis to the writer.save line.

python: get full formula from excel, using xlrd

I'm trying to get the full formula from Excel file
I tried many ways, but all get for me the value
I need the full formula that is in the cell, not the value itself
I'm using python with xlrd
is there any function I can use ?
or is there anyway to ?
Thanks alot
So I know this is a very old post, but I found a decent way of getting the formulas from all the sheets in a workbook as well as having the newly created workbook retain all the formatting.
First step is to save a copy of your .xlsx file as .xls
-- Use the .xls as the filename in the code below
Using Python 2.7
from lxml import etree
from StringIO import StringIO
import xlsxwriter
import subprocess
from xlrd import open_workbook
from xlutils.copy import copy
from xlsxwriter.utility import xl_cell_to_rowcol
import os
file_name = '<YOUR-FILE-HERE>'
dir_path = os.path.dirname(os.path.realpath(file_name))
subprocess.call(["unzip",str(file_name+"x"),"-d","file_xml"])
xml_sheet_names = dict()
with open_workbook(file_name,formatting_info=True) as rb:
wb = copy(rb)
workbook_names_list = rb.sheet_names()
for i,name in enumerate(workbook_names_list):
xml_sheet_names[name] = "sheet"+str(i+1)
sheet_formulas = dict()
for i, k in enumerate(workbook_names_list):
xmlFile = os.path.join(dir_path,"file_xml/xl/worksheets/{}.xml".format(xml_sheet_names[k]))
with open(xmlFile) as f:
xml = f.read()
tree = etree.parse(StringIO(xml))
context = etree.iterparse(StringIO(xml))
sheet_formulas[k] = dict()
for _, elem in context:
if elem.tag.split("}")[1]=='f':
cell_key = elem.getparent().get(key="r")
cell_formula = elem.text
sheet_formulas[k][cell_key] = str("="+cell_formula)
sheet_formulas
Structure of Dictionary 'sheet_formulas'
{'Worksheet_Name': {'A1_cell_reference':'cell_formula'}}
Example results:
{u'CY16': {'A1': '=Data!B5',
'B1': '=Data!B1',
'B10': '=IFERROR(Data!B12,"")',
'B11': '=IFERROR(SUM(B9:B10),"")',

Categories