I have data in excel where is text with enter space in last column. Here is examples of my data:
If I convert using python to csv, my data looks like this:
I need the TEXT column will be like this:
This is my script:
import pandas as pd
import os
import numpy as np
WD = r'XXX'
os.chdir(WD)
for file in os.listdir(WD):
if file.endswith('.xlsx'):
FILE = file
sheet_names = pd.ExcelFile(FILE).sheet_names
for sn in sheet_names:
OUTPUT_FILE = '{}_{}'.format(sn,FILE.replace('.xlsx','.csv'))
df = pd.read_excel(FILE,)
print(FILE, sn)
for col in df.columns.to_list():
df[col] = df[col].map({True: '', False: ''}).fillna(df[col])
cn = ['IN', 'NAME', 'TEXT']
df = df.reindex(columns = cn)
df.to_csv(OUTPUT_FILE,sep='|',encoding='utf-8-sig',index=False)
Do you have any idea?
I hope this works for your solution, (pip install xlsxwriter) before executing
Excel to csv:
import pandas as pd
df = pd.read_excel('./keep_enter.xlsx')
def replace_custom_func(x):
new_str = ''
if len(x) > 0:
for i in x.split('\n'):
new_str += f'"{i}"&CHAR(10)&'
return "=" + new_str[:-10]
else:
return x
df['Text'] = df['Text'].apply(lambda x: replace_custom_func(x))
df.to_csv('keep_enter1.csv', sep='|', index=False)
CSV to Excel:
df = pd.read_csv('./keep_enter1.csv', sep='|')
writer = pd.ExcelWriter('new_excel_replace12345.xlsx', engine='xlsxwriter')
# # Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Sheet1', index=False)
# # Get the xlsxwriter workbook and worksheet objects.
workbook = writer.book
worksheet = writer.sheets['Sheet1']
format = workbook.add_format({'text_wrap': True})
worksheet.set_column('C:D', None, format)
worksheet.write_formula(1, 2, df['Text'][0])
# # Close the Pandas Excel writer and output the Excel file.
writer.save()
Output:
Related
from docx.api import Document
import pandas as pd
document = Document("D:/tmp/test.docx")
tables = document.tables
df = pd.DataFrame()
for table in document.tables:
for row in table.rows:
text = [cell.text for cell in row.cells]
df = df.append([text], ignore_index=True)
df.columns = ["Column1", "Column2"]
df.to_excel("D:/tmp/test.xlsx")
print df
Output
`>>>
Column1 Column2
0 Hello TEST
1 Est Ting
2 Gg ff
How to remove row and column 0,1,2 and how to add some images in this codes?
You can remove the index and header when export to excel, simply adding the following conditions:
df.to_excel("test.xlsx", header = None, index = False)
It can be done like this.
import pandas as pd
dataset = pd.DataFrame({'A':[1,2,3,4], 'B':[5,6,7,8]})
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
dataset.to_excel(writer, sheet_name = 'Data', index = False, header = False)
sheet_name = 'Images' #Sheet name in which the image will be generated
cell = 'B2' #Position of the image in w.r.t cell value
workbook = writer.book
worksheet = workbook.add_worksheet(sheet_name)
worksheet.insert_image(cell, 'Tmp.jpg') #Add image
workbook.close()
writer.save()
I am having problem in appending data to existing sheet of the excel file.
Following is the code, which picks up the last date of record from the excel file creates new df with incremental records and I am trying to append the new records to same sheet of excel file.
import pandas as pd
import datetime as dt
import yfinance as yf
from openpyxl import load_workbook
ticker = 'AXISBANK.NS'
ef = pd.read_excel('D:/YProject/'+ticker+'.xlsx', sheet_name = 'Daily')
en = dt.datetime.today()+ dt.timedelta(days=1)
st = ef.Date.max()+ dt.timedelta(days=1)
df = yf.download(ticker, start =st, end =en, interval ='1d').drop(['Adj Close'], axis = 1).reset_index()
writer = pd.ExcelWriter('D:/YProject/'+ticker+'.xlsx',engine='openpyxl', mode='a')
writer.book = load_workbook('D:/YProject/'+ticker+'.xlsx')
df.to_excel(writer, sheet_name = 'Daily',header='false')
writer.save()
writer.close()
print(df)
This code creates new sheet and doesnot append the sheet
Trying to find out how to print to a specific column/row similar to how
pd.to_excel(startcol = 1, startrow = 1) works. I have to do this in an open excel workbook, and found the library xlwings. I'm currently using openpyxl, how would I do this in xlwings? I read the documentation printing to specific cells like A1, but not by specifying columns/rows.
#Write to Excel
book = load_workbook('Test.xlsx')
writer = pd.ExcelWriter('Test.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
def addtoexcel(df, row):
i = 0
df[["val1", "val2"]] = df[["val1", "val2"]].apply(pd.to_numeric)
line = int(df.loc[i][1])
for i in range(1, line+1):
if line ==i:
df = df.T
df.to_excel(writer, "test", index = False, header = False, startcol = line+2, startrow = row)
How can I print in xlwings by specifying column/row like (1,1)?
You can easily print a pandas dataframe to excel using xlwings. The range object takes a row and a column number as arguments (or just a cell reference as a string). Consider the following code:
import xlwings as xw
import pandas as pd
row = 1
column = 2
path = 'your/path/file.xlsx'
df = pd.DataFrame({'A' : [5,5,5],
'B' : [6,6,6]})
wb = xw.Book(path)
sht = wb.sheets["Sheet1"]
sht.range(row, column).value = df
You can also add options to include index/header:
sht.range(row, column).options(index=False, header=False).value = df
I have a df reading in multiple .xlsx files. I have manipulated what I need in the files and the export view is exact. However, I need the data to export into one larger 2 column file rather than multiple individual files.
Any help is appreciated. I haven't been able to figure the problem out on my own.
import os
import glob
import pandas as pd
folder = input('Enter the folder name: ')
os.chdir('C:/Users/PCTR261010/Desktop/' + folder)
FileList = glob.glob('*.xlsx')
for fname in FileList:
df = pd.read_excel(fname).assign(New=os.path.basename('mpcc_' + (fname.split('-', 1)[0]).split('#', 1)[1]))
df1 = df[['New', '<ID>']]
writer = pd.ExcelWriter('ParttoMPCC_Import.xlsx', engine='xlsxwriter')
df1.to_excel(writer, sheet_name='Import', index=False, header=False)
writer.save()
You can append desired columns in a single DataFrame and write that DataFrame to an excel file. Below code should do the job.
import os
import glob
import pandas as pd
folder = input('Enter the folder name: ')
os.chdir('C:/Users/PCTR261010/Desktop/' + folder)
FileList = glob.glob('*.xlsx')
df1 = pd.DataFrame() # create an empty df
for fname in FileList:
df = pd.read_excel(fname).assign(New=os.path.basename('mpcc_' + (fname.split('-', 1)[0]).split('#', 1)[1]))
df1 = df1.append(df[['New', '<ID>']]) # append columns data to the df1
writer = pd.ExcelWriter('ParttoMPCC_Import.xlsx', engine='xlsxwriter')
df1.to_excel(writer, sheet_name='Import', index=False, header=False)
writer.save()
You can use pd.concat as follows:
data = []
for fname in FileList:
df = pd.read_excel(fname).assign(New=os.path.basename('mpcc_' + (fname.split('-', 1)[0]).split('#', 1)[1]))
df1 = df[['New', '<ID>']]
data.append(df1)
writer = pd.ExcelWriter('ParttoMPCC_Import.xlsx', engine='xlsxwriter')
df = pd.concat(data)
df.to_excel(writer, sheet_name='Import', index=False, header=False)
writer.save()
I'm attempting to copy from column Range AP:AR of workbook 1 to Range A:C of workbook 2 through Pandas data frames.
I have successfully read the data frame below in workbook 1, I then want to write this into workbook 2 of the specified range. So AP:AR to AQ:AS.
I have tried:
#df.to_excel(writer, 'AP')
I have also tried the following:
#df = pd.write_excel(filename, skiprows = 2, parse_cols = 'AP:AR')
pd.writer = pd.ExcelWriter('output.xlsx', columns = 'AP:AR')
pd.writer.save()
For example:
filename ='C:/ workbook 1.xlsx'
df = pd.read_excel(filename, skiprows = 2, parse_cols = 'A:C')
import pandas as pd
writer = pd.ExcelWriter('C:/DRAX/ workbook 2.xlsx')
df.to_excel(writer, 'AQ')
writer.save()
print(df)
It reads correctly, but writes to Cell column ‘B’ instead of AQ.
You have to specify the starting column you want to write the dataframe with the parameter startcol, which is an integer starting from 0:
So you should change the line
df.to_excel(writer, 'AQ')
to
df.to_excel(writer, startcol=42) # AQ has the index of 42
Results: