Copying Specific Column from One Workbook to Another using Openpyxl - python

I am fairly new to python and trying to copy the data in workbook one, columns A,D,G, paste them into columns A,B,C, on the new workbook. I know how to copy one column or the whole worksheet but can't figure that out.
from openpyxl import load_workbook
wb1 = load_workbook('test1.xlsx')
wb2 = load_workbook('test2.xlsx')
ws1 = wb1.get_sheet_by_name('Sheet1')
ws2 = wb2.get_sheet_by_name('Sheet1')
mr = ws1.max_row
mc = ws1.max_column
for i in range (1, mr + 1):
for j in range (1, mc + 1):
c = ws1.cell(row=i, column=j)
ws2.cell(row=i, column=j).value = c.value
wb2.save('test2.xlsx')
That is the code I am using to copy the whole worksheet

I think you're looking for something like this
for i in range(1, mr + 1):
ws2['A' + str(i)].value = ws1['A' + str(i)].value
ws2['B' + str(i)].value = ws1['D' + str(i)].value
ws2['C' + str(i)].value = ws1['G' + str(i)].value

Related

Copy range from multiple workbooks to new worksheets in a master workbook using python

I have a bunch of files and I'm running a loop to do some calculations. This loop is already done, but I am saving the results to new files, according to the original one.
file1.xlsx
file2.xlsx
after running the code:
results/file1_results.xlsx
results/file2_results.xlsx
This is the loop through files:
directory = os.getcwd()
for filename in os.listdir(directory):
if filename.endswith(".xlsx"):
wb = load_workbook(filename)
ws = wb.active
max_row = ws.max_row
ws["CX1"] = "Humanas"
for row in range(2, max_row + 1):
ws["CX{}".format(row)] = round((
ws['R{}'.format(row)].value +
ws['U{}'.format(row)].value +
ws['X{}'.format(row)].value +
ws['AA{}'.format(row)].value +
ws['AD{}'.format(row)].value +
ws['AG{}'.format(row)].value +
ws['AJ{}'.format(row)].value +
ws['AM{}'.format(row)].value ) * (50/8))
ws["CY1"] = "Exatas"
for row in range(2, max_row + 1):
ws["CY{}".format(row)] = round((
ws['AP{}'.format(row)].value +
ws['AS{}'.format(row)].value +
ws['AV{}'.format(row)].value +
ws['AY{}'.format(row)].value ) * (50/4))
ws["CZ1"] = "Linguagens"
for row in range(2, max_row + 1):
ws["CZ{}".format(row)] = round((
ws['BB{}'.format(row)].value +
ws['BE{}'.format(row)].value +
ws['BH{}'.format(row)].value +
ws['BK{}'.format(row)].value +
ws['BN{}'.format(row)].value +
ws['BQ{}'.format(row)].value +
ws['BT{}'.format(row)].value +
ws['BW{}'.format(row)].value +
ws['BZ{}'.format(row)].value +
ws['CC{}'.format(row)].value +
ws['CF{}'.format(row)].value +
ws['CI{}'.format(row)].value ) * (50/12))
ws["DA1"] = "Biológicas"
for row in range(2, max_row + 1):
ws["DA{}".format(row)] = round((
ws['CL{}'.format(row)].value +
ws['CO{}'.format(row)].value +
ws['CR{}'.format(row)].value +
ws['CU{}'.format(row)].value ) * (50/4))
wb.save('results/' + os.path.splitext(filename)[0] + '_results.xlsx')
wb.close
else:
continue
The data is a bunch of dummies (0 or 1)
I need to adjust the results to a single file.xlsx.
I need to get multiple worksheets(named as original files or close to this).
I don't want to merge into a single ws
I am trying to copy the range of all results.xlsx and place into the new file. But no success.
A good alternative could be skip the creation of all files and place results directly into the final one, but I can not figure how to do this.
Edit 1 - Got success of joining all of results in specific worksheet, but now, I need to clean up all of them to get only the results.
dest_wb = Workbook()
from openpyxl import Workbook
from openpyxl import load_workbook
for root, dir, filenames in os.walk(path):
for file in filenames:
file_name = file.split('.')[0]
file_path = os.path.abspath(os.path.join(root, file))
# Create new sheet in destination Workbook
dest_wb.create_sheet(file_name)
dest_ws = dest_wb[file_name]
source_wb = load_workbook(file_path)
source_sheet = source_wb.active
for row in source_sheet.rows:
for cell in row:
dest_ws[cell.coordinate] = cell.value
dest_wb.save("results/report.xlsx")
Did this to the results:
dados = pd.read_excel("results/report.xlsx", sheet_name=None)
df = pd.concat(dados[frame] for frame in dados.keys())
lista_colunas = [7, 10, 101, 102, 103, 104]
filtro = df.columns[lista_colunas]
final_df = df[filtro]
grouped_df = final_df.groupby(final_df.columns[1])
salas = grouped_df.groups.keys()
writer = pd.ExcelWriter('results/resultado.xlsx', engine='xlsxwriter')
for sala in salas:
splitdf = grouped_df.get_group(sala)
splitdf.to_excel(writer, sheet_name=str(sala))
writer.save()

TypeError: 'Sheet' object is not callable when combining data across excel

I am now working on combining data for 2 excel file. my approach is to use xlwt to create a new excel and corresponding sheet. and then use for loop to write into the newly created excel file. my code is as below,
#......
wbk = xlwt.Workbook()
sheet_0 = wbk.add_sheet('A')
sheet_1 = wbk.add_sheet('B')
wbk.save(os.path.join(currentPath, gsheet.cell(gsr, 2).value) + FullName + " " + time_now + ".xls")
wkbk = xlrd.open_workbook(
os.path.join(currentPath, gsheet.cell(gsr, 2).value) + FullName + " " + time_now + ".xls")
wb_merge = copy(wkbk)
wsheet_0 = wb_merge.get_sheet(0)
wsheet_1 = wb_merge.get_sheet(1)
workbook_merge_0 = xlrd.open_workbook(os.path.join(currentPath, filename0), formatting_info=True)
sheet_merge_0 = workbook_merge_0.sheet_by_index(0)
for r in range(0, total_row_0):
for c in range(0, total_col_0):
wsheet_1.write(r, c, sheet_merge_0(r, c).value)
#......
when i am running the whole code, error show TypeError: 'Sheet' object is not callable, pointing to the last row, i.e. wsheet_1.write(r, c, sheet_merge_0(r, c).value)
Any suggestion can be provided? Thanks in advance!

python reading numbers from xlsx as different types

I have Python reading the following xlsx. What I noticed when I run the code again on the a similar xlsx but located in a different directory the types of numbers change. I tried to format the cells in xlsx so they are the same but it doesn't seem to work.
On the first xlsx I see the value in B1 as long and B15 as float, but in the second xlsx i see them as numpy.float64.
from openpyxl import load_workbook
import pandas as pd
import xlrd
import string as st
from string import ascii_uppercase # allows creation of Excel "A1" reference
import numpy as np
#address_1='C:/Users/user/Desktop/target/new version/xlsx/new colour.xlsx'#new version/xlsx/new colour.xlsx'
address_1='C:/Users/user/Desktop/target/new/xlsx/colour.xlsx'
book_formula = load_workbook(address_1,data_only=False)# location of file
book = load_workbook(address_1,data_only=True)# location of file
l = list(st.ascii_uppercase) + [letter1 + letter2 for letter1 in ascii_uppercase for letter2 in ascii_uppercase]
#reference data i.e. =
sheets_formula = book_formula.get_sheet_names()
name = []
ref_equal_dup = []
ref_cell_dup = [] # this has duplicates this goes through each worksheet to get the cells in each
index_1 = 0
def equal():
ref_equal_dup.append(str('=') + l[col] + str(row+1))
ref_equal_dup.append(str('=') + l[col] + '$' + str(row+1))
ref_equal_dup.append(str('=') + '$' + l[col] + '$' + str(row+1))
ref_equal_dup.append(str('=') + '$' + l[col] + str(row+1))
def cell():
ref_cell_dup.append( l[col] + str(row+1))
ref_cell_dup.append( l[col] + '$' + str(row+1))
ref_cell_dup.append( '$' + l[col] + '$' + str(row+1))
ref_cell_dup.append( '$' + l[col] + str(row+1))
while index_1 <len(sheets_formula):
name.append((str('=') + str(sheets_formula[index_1]) + str('!')))
df = pd.DataFrame(book_formula[(sheets_formula[index_1])].values)
rows, cols = len(df.index) - 1, len(df.columns) - 1
for col in range(cols):
for row in range(rows):
equal()
cell()
index_1 = index_1 + 1
# removes the dup from ref_cell_dup and ref_equal_dup:
ref_equal_dup_table = pd.DataFrame(np.array(ref_equal_dup).reshape(len(ref_equal_dup)/1,1),columns=['Cell'])
ref_cell_dup_table = pd.DataFrame(np.array(ref_cell_dup).reshape(len(ref_cell_dup)/1,1),columns=['Cell'])
# drops dups and keeps the first occurance
ref_cell_flat = ref_cell_dup_table.drop_duplicates(keep ='first')
ref_equal_flat = ref_equal_dup_table.drop_duplicates(keep ='first')
ref_cell = list(ref_cell_flat.values.flatten())
ref_equal = list(ref_equal_flat.values.flatten())
# gets the worksheet!cell
wrk_cell = []
for x in (name):
for y in (ref_cell):
wrk_cell.append(x + y)
sheets_formula = book_formula.get_sheet_names()
# gets the cell value and formula
index = 0
formula = []
def if_statements():
if str(thecell) <> str(thecell_0):
if (thecell) in str(wrk_cell + ref_equal):
formula.append(['Cell Reference',sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), str(thecell)[1:]])
if (thecell) not in wrk_cell and thecell not in ref_equal and thecell is not None and thecell <> 'nan':
formula.append(['Formula',sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), str(thecell)[1:]])
elif thecell == thecell_0:
if type(thecell) == unicode:
formula.append(['u',sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), thecell])
elif type(thecell) == long:
formula.append([type(thecell),sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), float(thecell)])
# elif str(type(thecell)) == "<type 'numpy.float64'>":
# formula.append(['f',sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), thecell])
elif type(thecell) <> unicode:# and type(thecell) <> long: #and str(type(thecell)) <> "<type 'numpy.float64'>":
formula.append([type(thecell),sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), str(thecell)])
while index < len(sheets_formula):
df = pd.DataFrame(book_formula[(sheets_formula[index])].values)
df_0 = pd.DataFrame(book[(sheets_formula[index])].values)
rows, cols = len(df.index) , len(df.columns)
for row in range(rows):
for col in range(cols):
thecell = df.iloc[row, col]
thecell_0 = df_0.iloc[row, col]
if thecell is not None:
if_statements()
index = index + 1
new_version = pd.DataFrame(np.array(formula).reshape(len(formula)-1/4,4),columns=['ACTION','SHEET_NAME','CELL_ADDRESS','CELL_VALUE'])
out put from python
xlsx format
The idea behind this is to compare a data set then store it, if a new version pops up I want to compare the old one vs the new one. This is issue caused by having numpy library?

Python - copy and paste values from a formula into another column (openpyxl or other libs)

I am attempting to clean up an excel file and essentially what I want to do is this: I have inserted a formula in column B that references column A and would like to then take all the values in column B and paste special as values over column A. After, I would like to delete column B.
Does anyone know of anyway to do this via openpyxl library or another library?
Here is a copy of my code thus far:
import openpyxl as xl
import pandas as pd
import datetime as dt
import os
def left(s, amount):
return s[:amount]
def right(s, amount):
return s[-amount:]
def mid(s, offset, amount):
return s[offset:offset+amount]
filepath = r'...desktop\Scripts' + '\\'
filename = 'file.xlsx'
fn = filepath + filename
WB = xl.load_workbook(fn, data_only = True)
WS = WB.worksheets[1]
for i in sorted(WS.merged_cell_ranges):
WS.unmerge_cells(str(i))
WB.save(filepath + 'unmerged.xlsx')
unmerged = pd.read_excel(filepath + 'unmerged.xlsx', sheetname = 1)
unmerged.insert(1, 'ifIsBlank', '')
unmerged.insert(5, 'trimSubst', '')
inserted = unmerged.to_excel(filepath + 'inserted.xlsx', index = False)
WB = xl.load_workbook(filepath + 'inserted.xlsx')
WS = WB.worksheets[0]
WS['A'][0].value = 'BU'
WS['E'][0].value = 'em name'
blankCount1 = 1
blankCount2 = 0
trimCount = 1
for i, cell in enumerate(WS['B']):
cell.value = '=IF(ISBLANK(A' + str(blankCount1) + '), B' + str(blankCount2) + ', A' + str(blankCount1) + ')'.format(i)
blankCount1 += 1
blankCount2 += 1
for j, cell in enumerate(WS['F']):
cell.value = '=TRIM(SUBSTITUTE(E' + str(trimCount) + ', "~", "_"))'.format(j)
trimCount += 1
WS['B'][0].value = 'Category Name'
WS['F'][0].value = 'Item Name'
WB.save(filepath + 'formula.xlsx')
valWB = xl.load_workbook(filepath + 'formula.xlsx', data_only = True)
So the issue I'm running into is that when I read formula.xlsx as a .csv (desired file type of my final output), it shows nothing in the cells that have the formula- Is there a way to essentially copy and paste the values that come out of the formula into another column?
Thanks!

Openpyxl reads only formula not cell value

For the past day i've been trying to get openpyxl or any other python excel library to read the cell value. But every time i run the program it returns with None if i set it to data_only = True. If i dont add the data_only it only returns the formula im using. This is my code:
wb = xl.load_workbook(str(tm.strftime("%d-%m-%Y")) + ' - ' + str(tm.strftime("%A")) + '.xlsx')
ws = wb.active
bag = float(self.entryBags.get())
ws['B3'] = bag
wb.save(str(tm.strftime("%d-%m-%Y")) + ' - ' + str(tm.strftime("%A")) + '.xlsx')
wb2 = xl.load_workbook(str(tm.strftime("%d-%m-%Y")) + ' - ' + str(tm.strftime("%A")) + '.xlsx', data_only = True)
ws2 = wb2.active
total_till = ws2['B13']
print(total_till.value)
If any one can give any suggestions on how to read the cell value, using openpyxl or any other python library, it would be very helpful.
Thanks

Categories