Writing large data to a excel column cell with looping - python

Parsing .ts file and getting string from that file. i want write that string to excel column. can anyone help me?
from xml.dom import minidom
import sys, xlsxwriter
import pandas as pd
import numpy as np
reload(sys)
sys.setdefaultencoding('utf-8')
doc = minidom.parse("english.ts")
def main():
writer = pd.ExcelWriter('new.xlsx', engine='xlsxwriter')
messages = doc.getElementsByTagName("message")
for message in messages:
source = message.getElementsByTagName("source")[0]
ori_string = source.firstChild.data
print ori_string
df = pd.DataFrame({'TString': [ori_string]})
writer = pd.ExcelWriter('new.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
writer.save()
if __name__ == '__main__':
main()
I am not getting error and "ori_string" is printing but in excel sheet nothing is written.
I want to just write the string to the excel column.how to iterate rows and how to give row and column number inside iteration?

from xml.dom import minidom
import sys, xlsxwriter
reload(sys)
sys.setdefaultencoding('utf-8')
doc = minidom.parse("english.ts")
def main():
row = 0
messages = doc.getElementsByTagName("message")
workbook = xlsxwriter.Workbook('data3.xlsx')
worksheet = workbook.add_worksheet()
for message in messages:
source = message.getElementsByTagName("source")[0]
ori_string = source.firstChild.data
print ori_string
worksheet.write(row, 0, ori_string)
row += 1
workbook.close()
Now its working thank you #DavidG for your suggestion.

Related

How to save transformed file into new excel file using Openpyxl Python?

I have 3 excel files currently in my working directory. All 3 files has name that ends with "_Updated.xlsx". I wanted to transform the files such that all empty rows in each of the files get deleted. I have created function for it, but the only issue is I cannot save all transformed file using below code. Not sure what is wrong ? The reason for creating new file is I would like to save my raw files.
Python code
import openpyxl
import os
from openpyxl import load_workbook,Workbook
import glob
from pathlib import Path
Excel_file_path="/Excel"
for file in Path(Excel_file_path).glob('*_Updated.xlsx'):
wb=load_workbook(file)
wb_modified = False
for sheet in wb.worksheets:
max_row_in_sheet = sheet.max_row
max_col_in_sheet = sheet.max_column
sheet_modified = False
if max_row_in_sheet > 1:
first_nonempty_row = nonempty_row() # Function to find nonempty row
sheet_modified = del_rows_before(first_nonempty_row) #Function to delete nonempty row
wb_modified = wb_modified or sheet_modified
if wb_modified:
for workbook in workbooks:
for sheet in wb.worksheets:
new_wb = Workbook()
ws = new_wb.active
for row_data in sheet.iter_rows():
for row_cell in row_data:
ws[row_cell.coordinate].value = row_cell.value
new_wb.save("/Excel/"+sheet.title+"_Transformed.xlsx")
In case, if any one is still looking for answer to my above question. Below is the code that worked for me.
import openpyxl
import os
from openpyxl import load_workbook
import glob
from pathlib import Path
Excel_file_path="/Excel"
for file in Path(Excel_file_path).glob('*_Updated.xlsx'):
wb=load_workbook(file)
wb_modified = False
for sheet in wb.worksheets:
max_row_in_sheet = sheet.max_row
max_col_in_sheet = sheet.max_column
sheet_modified = False
if max_row_in_sheet > 1:
first_nonempty_row = get_first_nonempty_row() # Function to find nonempty row
sheet_modified = del_rows_before(first_nonempty_row) #Function to delete nonempty roW
file_name = os.path.basename(file)
wb.save("Excel/"+file_name[:-5]+"_Transformed.xlsx")
wb.close()

I am unable to print all the output in the excle file

i am unable to print all output in excle file.
please guide
i am using pd.dataFrame and openpyxl
i am fatching data from excel file and i want to write all the out data in column and rows.
from Bio import SeqIO
import csv
from openpyxl import load_workbook
import pandas as pd
from Bio.SeqUtils.ProtParam import ProteinAnalysis
input_file = open ("Sequence.fasta","r")
for record in SeqIO.parse(input_file,"fasta"):`my_sec= str(record.seq).rstrip('\\')
analyse= ProteinAnalysis(my_sec)
mol_weight = analyse.molecular_weight()
count_amino= analyse.count_amino_acids()
epsilon_prot = analyse.molar_extinction_coefficient()
iso_point=analyse.isoelectric_point()
ist_index=analyse.instability_index()
aromati=analyse.aromaticity()
gra_vy=analyse.gravy()
flex=analyse.flexibility()
writer = pd.ExcelWriter('protein_feature_data.xlsx',engine='openpyxl')
wb= writer.book
df = pd.DataFrame({'sequence':[my_sec],
'Molecular_Weight':[mol_weight],
'Amino_Acid_Count':[count_amino],
'molar_extinction_coefficient':[epsilon_prot],
'isoelectric_point':[iso_point],
'instability_index':[ist_index],
'aromaticity':[aromati],
'Gravy':[gra_vy],
'Flexibility':[flex]})
df.to_excel(writer)
wb.save('protein_feature_data.xlsx')
`
Please use this updated code. You can save the file with just to_excel. Also, the df creation and writing to file should be outside the for loop. If the df is created fine (cannot confirm as the input_file has not been shared, the data should be written to the output file correctly.
from Bio import SeqIO
import csv
from openpyxl import load_workbook
import pandas as pd
from Bio.SeqUtils.ProtParam import ProteinAnalysis
input_file = open ("Sequence.fasta","r")
for record in SeqIO.parse(input_file,"fasta"):
my_sec= str(record.seq).rstrip('\\')
analyse= ProteinAnalysis(my_sec)
mol_weight = analyse.molecular_weight()
count_amino= analyse.count_amino_acids()
epsilon_prot = analyse.molar_extinction_coefficient()
iso_point=analyse.isoelectric_point()
ist_index=analyse.instability_index()
aromati=analyse.aromaticity()
gra_vy=analyse.gravy()
flex=analyse.flexibility()
df = pd.DataFrame({'sequence':[my_sec],
'Molecular_Weight':[mol_weight],
'Amino_Acid_Count':[count_amino],
'molar_extinction_coefficient':[epsilon_prot],
'isoelectric_point':[iso_point],
'instability_index':[ist_index],
'aromaticity':[aromati],
'Gravy':[gra_vy],
'Flexibility':[flex]})
df.to_excel('protein_feature_data.xlsx')

Python - How to convert CSV to XLSX?

I have a code to convert CSV to XLXS the only problem is that when I do the conversion some numeric columns are stored as text. And that makes SQL unable to convert from nvarchar to float.
Code:
import csv, os
from glob import glob
from xlsxwriter.workbook import Workbook
import pandas as pd
import numpy as np
for csvfile in glob('FILE.CSV'):
name = os.path.basename(csvfile).split('.')[-2]
workbook = Workbook('FILE.xlsx', {'strings_to_numbers': True, 'constant_memory': True})
worksheet = workbook.add_worksheet()
with open(csvfile, 'r') as f:
r = csv.reader(f, delimiter=';')
for row_index, row in enumerate(r):
for col_index, data in enumerate(row):
worksheet.write(row_index, col_index, data)
currency_format = workbook.add_format({'num_format': '$#,##0.00'})
workbook.close()
import openpyxl
ss = openpyxl.load_workbook("file.xlsx")
# printing the sheet names
ss_sheet = ss['Sheet1']
ss_sheet.title = 'plan1'
ss.save("file.xlsx")
print("-------------------------------------------")
print(" .CSV to .XLSX Conversion Successful")
print("-------------------------------------------")

how to download excel file in python and streamlit?

I have a Python script that read files and convert it to dataframe using Python and streamlit. Then I want to create a function to allows the user to download this dataframe as an Excel file with extension .xls.
So I tried to read the dataframe and convert it to an Excel file using these two functions:
pd.ExcelWriter
df.to_excel
But when I try to download the file using a link the file doesn't download and displays this error:
Failed-Network error
Code:
import pandas as pd
import streamlit as st
writer = pd.ExcelWriter('update2.xlsx')
df.to_excel(writer, index = False, header=True,encoding='utf-8')
with open(writer,'rb') as f:
b64 = base64.b64encode(f.read())
href = f'Download {extension}'
st.write(href, unsafe_allow_html=True)
With the streamlit latest release(above 1.0.0):
Use
st.download_button
Displays a download button widget.
This is useful when you would like to provide a way for your users to download a file directly from your app.
Note that the data to be downloaded is stored in memory while the user is connected, so it's a good idea to keep file sizes under a couple of hundred megabytes to conserve memory.
Here is a sample code from the discussion, that can be helpful to download excel files...
import pandas as pd
from io import BytesIO
from pyxlsb import open_workbook as open_xlsb
import streamlit as st
def to_excel(df):
output = BytesIO()
writer = pd.ExcelWriter(output, engine='xlsxwriter')
df.to_excel(writer, index=False, sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
format1 = workbook.add_format({'num_format': '0.00'})
worksheet.set_column('A:A', None, format1)
writer.save()
processed_data = output.getvalue()
return processed_data
df_xlsx = to_excel(df)
st.download_button(label='📥 Download Current Result',
data=df_xlsx ,
file_name= 'df_test.xlsx')
This worked for me
import pandas as pd
from io import BytesIO
import streamlit as st
def to_excel(df: pd.DataFrame):
in_memory_fp = BytesIO()
df.to_excel(in_memory_fp)
# Write the file out to disk to demonstrate that it worked.
in_memory_fp.seek(0, 0)
return in_memory_fp.read()
cols = ["col1", "col2"]
df = pd.DataFrame.from_records([{k: 0.0 for k in cols} for _ in range(25)])
excel_data = to_excel(df)
file_name = "excel.xlsx"
st.download_button(
f"Click to download {file_name}",
excel_data,
file_name,
f"text/{file_name}",
key=file_name
)
line 5 can't be executed since you havent assigned any excel to the DataFrame df.
try something like this in your code:
df = pd.read_csv('update2.xlsx')
I hope, this helped.
Take care
def get_binary_file_downloader_html(bin_file, file_label='File'):
with open(bin_file, 'rb') as f:
data = f.read()
bin_str = base64.b64encode(data).decode()
href = f'Descargar {file_label}'
return href
st.markdown(get_binary_file_downloader_html('Wip_QRY.xlsx', 'Excel'), unsafe_allow_html=True)

Exception: Exception('Exception caught in workbook destructor. Explicit close() may be required for workbook.',)

I am trying to put a bunch of CSV files into one workbook and here is my code:
import csv
import glob
import openpyxl
import os, sys
import pandas as pd
import xlsxwriter as xlwr
def main():
list_of_files = []
names = []
for csv_file in glob.glob(os.path.join('.', '*.csv')):
bleh = csv_file[2:]
name = bleh[:-4]
names.append(name)
df = pd.read_csv(csv_file, index_col=None, header=0)
list_of_files.append(df)
writer = pd.ExcelWriter('non_concussed_game_logs.xlsx')
for n, df in enumerate(list_of_files):
df.to_excel(writer, '%s' % names[n])
writer.save
if __name__ == "__main__":
main()
I am getting the error mentioned in the title of my post but I am unsure as to why I'm getting it. I have used this script before and it has worked but I'm not sure why it is not now. Any help is appreciated!
I figured it out, my CSV files were encoded in utf-8 so I had to make the read_csv() call
df = pd.read_csv(csv_file, index_col=None, header=0, encoding='utf-8')
and also add the parenthesis to the writer.save line.

Categories