Read specific columns from excel for python

Read specific columns from excel for python - python

import xlrd
workbook = xlrd.open_workbook(filename)
sheet = workbook.sheet_by_index(0)
array = []
for i in range(2, 9):
array.append([sheet.cell(i, j).value for j in range(2, 5)])
Excel Image
I have this code and it works fine, but it's not doing what I want it to do. It is pulling the data from all the three columns of that excel file (see excel image). I only want it to pull data from column C and column E, and store that as a pair in the array. How to do that? I know there is something like skip columns and skip rows in python, but not sure how to embed that in the code I have.

Using openpyxl :-
def iter_rows(ws):
result=[]
for row in ws.iter_rows():
rowlist = []
for cell in row:
rowlist.append(cell.value)
result.append(rowlist)
return result
wb = load_workbook(filename = '/home/piyush/testtest.xlsx')
ws = wb.active
first_sheet = wb.get_sheet_names()[0]
print first_sheet
worksheet = wb.get_sheet_by_name(first_sheet)
fileList = (list(iter_rows(worksheet)))
col1 = []
col2 = []
for col in fileList:
col1.append(col[1])#1 is column index
col2.append(col[2])#2 is column index
for a in zip(col1,col2):
print a
#append as pair in another array
using pandas:-
xl = pd.ExcelFile("/home/piyush/testtest.xlsx")
df = xl.parse("Sheet1")
df.iloc[:,[col1Index,col1Index]]

Related

Exporting to Excel Using Openpyxl - specific rows and columns

I have tried succesfuly to populate the data in QTableWidget using Pandas.
Now i want to export to specific rows and columns to an existing excel so i will not lose stylesheet and other data from this excel. Please , help me out finding the solution to run it properly.
Goal is to export to excel to specific rows and columns, rows in range from 7 to 30 and columns from 1 to 13 using OpenPyxl to just modify values of an existing excel. I know "appends" means to add whole data table on the bottom of the excel and i don't know what function to use instead.
def kalkuacje_exportuj(self):
columnHeaders = []
# create column header list
for j in range(self.ui.tableWidget.model().columnCount()):
columnHeaders.append(self.ui.tableWidget.horizontalHeaderItem(j).text())
df = pd.DataFrame(columns=columnHeaders)
# create dataframe object recordset
for row in range(self.ui.tableWidget.rowCount()):
for col in range(self.ui.tableWidget.columnCount()):
df.at[row, columnHeaders[col]] = self.ui.tableWidget.item(row, col).text()
from openpyxl import Workbook
wb = Workbook()
wb = load_workbook ('OFERTA_SZABLON.xlsx')
# ws1 = wb.sheetnames()
ws1 = wb["DETALE wyceniane osobno"]
# for row in ws1.iter_rows(min_row=7,
# max_row=30,
# min_col=1,
# max_col=13):
for row in range(7, 30):
for col in range(1, 13):
for r in dataframe_to_rows(df, index=False, header=False):
ws1.append(r)
# for cell in row:
# print(cell)
wb.save('OFERTA_SZABLON.xlsx')

I solved the problem like this:
from openpyxl import Workbook
wb = Workbook()
wb = load_workbook ('OFERTA_SZABLON.xlsx')
# ws1 = wb.sheetnames()
ws1 = wb["DETALE wyceniane osobno"]
# for r in dataframe_to_rows(df, index=False, header=False):
# ws1.append(r)
offset_row = 5
offset_col = 0
row = 1
for row_data in dataframe_to_rows(df, index=False, header=False):
col = 1
for cell_data in row_data:
ws1.cell(row + offset_row, col + offset_col, cell_data)
col += 1
row += 1
wb.save('OFERTA_SZABLON.xlsx')

I cannot figure this out for the life of me.
the guy above me has an error with >>> load_workbook ('OFERTA_SZABLON.xlsx')
it makes no sense and Workbook.load_workbook('') isn't a thing anyways
dataframe_to_rows doesn't seem to exist either

How can i write a list to a new excel workbook?

So i have a problem, i have a list with full of datas in it from other excel workbooks and i want to write it to a new excel. I made it half succesfull, my code only writes the list's first element. How can i write tha datas row by row?
This is my code so far:
import glob as gl
from openpyxl import load_workbook
import xlsxwriter
files = gl.glob(r"<path>*.xlsm")
for file in files:
lst = []
wb = load_workbook(filename=file)
s = wb.active
a = s['C2'].value
lst.append(a)
b = s['E2'].value
lst.append(b)
c = s['C3'].value
lst.append(c)
workbook = xlsxwriter.Workbook('<name>.xlsx')
worksheet = workbook.add_worksheet()
row = 1
column = 0
for item in lst:
worksheet.write(row,column,item)
column += 1
workbook.close()

First of all you need to gather all files in a list, so:
all_lists = []
for file in files:
lst = []
wb = load_workbook(filename=file)
s = wb.active
a = s['C2'].value
lst.append(a)
b = s['E2'].value
lst.append(b)
c = s['C3'].value
lst.append(c)
all_lists.append(lst)
After the above snippet you'll have all the read files in all_files
Now as an example, suppose:
all_files = [['heyho', 'hohoo', 'juhuu'],
['heyho1', 'hohoo1', 'juhuu1'],
['heyho2', 'hohoo2', 'juhuu2']]
You can write each element in a row by the following snippet:
workbook = xlsxwriter.Workbook('myfile.xlsx')
worksheet = workbook.add_worksheet()
row = 0 # initial row
for item in all_files:
column = 0 # column resets for each row
for i in item:
worksheet.write(row, column, i)
column += 1
row += 1
workbook.close()

How to filter column data using openpyxl

I am trying to apply a filter to an existing Excel file, and export it to another Excel file. I would like to extract rows that only contain the value 16, then export the table to another excel file (as shown in the picture below).
I have tried reading the openpyxl documentation multiple times and googling for solutions but I still can't make my code work. I have also attached the code and files below
import openpyxl
# Is use to create a reference of the Excel to wb
wb1 = openpyxl.load_workbook('test_data.xlsx')
wb2 = openpyxl.load_workbook('test_data_2.xlsx')
# Refrence the workbook to the worksheets
sh1 = wb1["data_set_1"]
sh2 = wb2["Sheet1"]
sh1.auto_filter.ref = "A:A"
sh1.auto_filter.add_filter_column(0, ["16"])
sh1.auto_filter.add_sort_condition("B2:D6")
sh1_row_number = sh1.max_row
sh1_col_number = sh1.max_column
rangeSelected = []
for i in range(1, sh1_row_number+1, 1):
rowSelected = []
for j in range(1, sh1_col_number+1, 1):
rowSelected.append(sh1.cell(row = i, column = j))
rangeSelected.append(rowSelected)
del rowSelected
for i in range(1, sh1_row_number+1, 1):
for j in range(1, sh1_col_number+1, 1):
sh2.cell(row = i, column = j).value = rangeSelected[i-1][j-1].value
wb1.save("test_data.xlsx")
wb2.save("test_data_2.xlsx")
The pictures shows what should be the desire result

The auto filter doesn't actually filter the data, it is just for visualization.
You probably want to filter while looping through the workbook. Please note with this code I assume you have the table headers already in the second workbook. It does not overwrite the data, it appends to the table.
import openpyxl
# Is use to create a reference of the Excel to wb
wb1 = openpyxl.load_workbook('test_data.xlsx')
wb2 = openpyxl.load_workbook('test_data_2.xlsx')
# Refrence the workbook to the worksheets
sh1 = wb1["data_set_1"]
sh2 = wb2["data_set_1"] # use same sheet name, different workbook
for row in sh1.iter_rows():
if row[0].value == 16: # filter on first column with value 16
sh2.append((cell.value for cell in row))
wb1.save("test_data.xlsx")
wb2.save("test_data_2.xlsx")

Python-Excel: How to write lines of a row to an existing excel file?

I have searched the site but I could not find anything related to the following question.
I have an existing spreadsheet that I am going to pull data from on a daily basis, the information in the spreadsheet will change everyday.
What I want to do is create a file that tracks certain information from this cell, I want it to pull the data from the spreadsheet and write it to another spreadsheet. The adding of the data to a new spreadsheet should not overwrite the existing data.I would really appreciate the help on this. See code below:
import os
import openpyxl
import xlrd
wb=openpyxl.load_workbook('Test_shorts.xlsx','r')
sheet = wb.active
rows = sheet.max_row
col = sheet.max_column
rows = rows+1
print rows
new =[]
for x in range (2, 3):
for y in range(1,10):
z= sheet.cell(row=x,column=y).value
new.append(z)
print(new)

If you want to copy the whole worksheet, you can use copy_worksheet() function directly. It will create a copy of your active worksheet.
I don't know your data, but I am sure you can finish it by yourself. Hope this may help
from openpyxl import load_workbook
file_name = "Test_shorts.xlsx"
wb = load_workbook(file_name)
sheet = wb.active
target = wb.copy_worksheet(sheet)
# you can code to append new data here
new = wb.get_sheet_by_name(target.title) # to get copied sheet
for x in range (2, 3):
for y in range(1,10):
print(x,y)
z= sheet.cell(row=x,column=y).value
new.append(z)
wb.save(file_name)
as commented, a loop of cells are required so I altered your code a little.
from openpyxl import load_workbook
file_name = "Test_shorts.xlsx"
wb = load_workbook(file_name)
current_sheet = wb.active
new_sheet = wb.create_sheet("New", 1)
for row in current_sheet.rows:
col = 0 # set the column to 0 when 1 row ends
for cell in row:
col += 1 # cell.column will return 'ABC's so I defined col for the column
new_sheet.cell(cell.row, col, cell.value)
wb.save(file_name)

Iterate through worksheets adding data to each iteration

I have an excel file in which all data is listed in rows(first Image), I need to take this data and list it in column A of individual worksheets in a newly created workbook(Needs to look like the 2nd image). I am having issues getting the proper 'for' loop, so the data is written each separate worksheet. My code now writes that data all on the same worksheet.
import openpyxl
import os
import time
wb = openpyxl.load_workbook('IP-Results.xlsx') #load input file
sheet = wb.get_sheet_by_name('IP-Results-32708') #get sheet from input file
wbOutput = openpyxl.Workbook() #open a new workbook
wbOutput.remove_sheet(wbOutput.get_sheet_by_name('Sheet')) #remove initial worksheet named 'sheet'
for cell in sheet['A']: #iterate through firewall names in column A and make those the title of the sheets in new workbook
value = cell.value
wbOutput.create_sheet(title=cell.value)
inputwb = wb
inputsheet = inputwb.active
outputwb = wbOutput
outputsheet = outputwb.active
maxRow = inputsheet.max_row
maxCol = inputsheet.max_column
for i in range(1, max(maxRow, maxCol) +1):
for j in range(1, min(maxRow, maxCol) + 1):
for sheet in outputwb.get_sheet_names():
outputsheet.cell(row=i, column=j).value = inputsheet.cell(row=j, column=i).value
outputsheet.cell(row=j, column=i).value = inputsheet.cell(row=i, column=j).value
wbOutput.save("Decom-" + time.strftime("%m-%d-%Y")+ ".xlsx")

'outputsheet' is assigned to refer to the first (the default) sheet in wbOutput:
outputwb = wbOutput
outputsheet = outputwb.active
Then the main loop writes to outputsheet which always refers to the same original worksheet, causing all your data to appear on the same sheet:
for i in range(1, max(maxRow, maxCol) +1):
for j in range(1, min(maxRow, maxCol) + 1):
for sheet in outputwb.get_sheet_names():
**outputsheet**.cell(row=i, column=j).value = inputsheet.cell(row=j, column=i).value
**outputsheet**.cell(row=j, column=i).value = inputsheet.cell(row=i, column=j).value
The easiest solution would be dropping the third inner loop and using get_sheet_by_name:
for i in range(1, max(maxRow, maxCol) +1):
sheet_name = inputsheet.cell(row=i, column=1).value
a_sheet = outputwb .get_sheet_by_name(sheet_name)
for j in range(1, min(maxRow, maxCol) + 1):
a_sheet.cell(row=i, column=1).value = inputsheet.cell(row=j, column=i).value
I can't test the code at the moment but the general idea should work.
edit
Although it might be worth redesigning to something like this pseudo code:
for each inputwb_row in inputworkbook:
new_sheet = create a new_sheet in outputworkbook
set new_sheet.title = inputworkbook.cell[row,1].value
for each column in inputwb_row:
new_sheet.cell[column, 1].value = inputworkbook.cell[inputwb_row ,column].value

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Read specific columns from excel for python - python

Related

Exporting to Excel Using Openpyxl - specific rows and columns

How can i write a list to a new excel workbook?

How to filter column data using openpyxl

Python-Excel: How to write lines of a row to an existing excel file?

Iterate through worksheets adding data to each iteration

Categories

Resources