How to loop through rows of the Excel sheet using openpyxl? - python

I am using Python, Selenium, openpyxl in order to fill a form online.
To fill the form I am taking values from specific cells on excel (.xlsx).
(to test the code you can just create and excel file with 2 columns, insert under column A some names and column B some age.
From the cell A2, I take the NAME of the person and insert it into the online form
From the cell B2, I take the LASTNAME of the person and insert it into the online form
Then I click 'Reset' (This is an example but in the real code I will click save as a draft).
I would like to create a loop in which the code will start again from driver.get("https://www.roboform.com/filling-test-all-fields") to go again to the page where I need to fill out the form, but this time I would like to take:
From the cell A3, the NAME of the person and insert it into the online form
From the cell B3, the LASTNAME of the person and insert it into the online form
And click 'Send as a draft' again
Then again, another loop to insert the data from row 4, so I would like to program to read again my code from driver.get("https://www.roboform.com/filling-test-all-fields") but this time take values from A4 and B4, and so on, until the row on excel is empty.
With the following code I can insert the data to the online form:
from selenium import webdriver
from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.common.exceptions import NoSuchElementException
import openpyxl
driver: WebDriver =
webdriver.Chrome("/Users/HHHHH/PycharmProjects/excel/driver/chromedriver")
driver.maximize_window()
excel_document = openpyxl.load_workbook(r"/Users/XPATH OF THE EXCEL FILE YOU CREATE TO TEST THIS CODE",
data_only=True)
sheet = excel_document["Sheet1"]
driver.get("https://www.roboform.com/filling-test-all-fields")
#Insert in the form the Name of the person
prevsymbol = sheet["A2"].value
if prevsymbol == None:
pass
else:
try:
driver.find_element_by_name("02frstname").send_keys(sheet["A2"].value)
except NoSuchElementException:
print("A2:(name) Not Found")
#Insert in the form the Last Name of the person
prevsymbol = sheet["B2"].value
if prevsymbol == None:
pass
else:
try:
driver.find_element_by_name("04lastname").send_keys(sheet["B2"].value)
except NoSuchElementException:
print("B2:(Lastname) Not Found")
#click Save as a draft
driver.find_element_by_xpath("//*[#value='Reset']").click()

I have create a helper class please find if it fulfill your purpose. This code is done in old version of openpyxl. Please update code if needed.
class OpenpyxlImport(object):
def __init__(self, file):
self.file = file
if self.file.name.endswith('.xls'):
self.wb = self.xls_to_xlsx(self.file)
else:
self.wb = load_workbook(self.file)
self.sheets = self.wb.worksheets
def to_camelcase(self, string):
text = re.sub(r'(?!^)_([a-zA-Z])', lambda m: ' ' + m.group(1).upper(), str(string))
return text.upper()
def to_snake_case(self, string):
text = re.sub(r'\s', '_', str(string))
return text.lower()
def xls_to_xlsx(self, content):
xls_book = xlrd.open_workbook(file_contents=content.read())
workbook = openpyxlWorkbook()
for i in range(0, xls_book.nsheets):
xls_sheet = xls_book.sheet_by_index(i)
sheet = workbook.active if i == 0 else workbook.create_sheet()
sheet.title = xls_sheet.name
for row in range(0, xls_sheet.nrows):
for col in range(0, xls_sheet.ncols):
sheet.cell(row=row + 1, column=col + 1).value = xls_sheet.cell_value(row, col)
return workbook
def tally_header(self, row, fields):
# Strip whitespace in cell value
for cell in row:
cell.value = cell.value.rstrip()
return [cell.value for cell in row] == fields
def row_to_dict(self, row):
dct = {}
for cell in row:
dct[self.to_snake_case(self.get_first_sheet()[cell.column + '1'].value)] = cell.value
return dct
def get_sheets(self):
return self.sheets
def get_first_sheet(self):
return self.sheets[0]
def get_sheet_rows(self):
return tuple(self.get_first_sheet().iter_rows())
# Usage
excel = OpenpyxlImport(file)
rows = excel.get_sheet_rows()
if excel.tally_header(rows[0], self.fields):
for row in rows[1:]:
params = excel.row_to_dict(row)

You can get the number of rows in the sheet using the max_row property. So, the code becomes:
from selenium import webdriver
from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.common.exceptions import NoSuchElementException
import openpyxl
driver: WebDriver =
webdriver.Chrome("/Users/HHHHH/PycharmProjects/excel/driver/chromedriver")
driver.maximize_window()
excel_document = openpyxl.load_workbook(r"/Users/HHHHH/Desktop/testtesttest1.xlsx",
data_only=True)
sheet = excel_document["Sheet1"]
for i in range(1, sheet.max_row+1):
driver.get("https://XXXXXXXXXX")
# Insert in the form the Name of the person
cell = "A" + str(i)
prevsymbol = sheet[cell].value
# Note that instead of doing the work at the else clause, you can negate the term
if prevsymbol is not None:
try:
# Note that we can use prevsymbol here, instead of referring to cell once again
driver.find_element_by_id("name").send_keys(prevsymbol)
except NoSuchElementException:
#
print(cell + ":(name) Not Found")
# Insert in the form the Age of the person
cell = "B" + str(i)
prevsymbol = sheet[cell].value
if prevsymbol is not None:
try:
driver.find_element_by_id("age").send_keys(prevsymbol)
except NoSuchElementException:
print(cell + ":(Age) Not Found")
# Click Save as a draft
driver.find_element_by_xpath("xpath_save_as_draft").click()

Related

Python Selenium | How can I remove these repetitive code every time I add let's say a new "item"

I'm designing a Automated Acceptance Test using Python Selenium to fill in web form and check for the errors that are shown on the webpage then compared to my expected results to give me some sort of "Success" and "Fail" type of result.
Currently, I am testing on a website with only 3 fields.
It only requires IC, Postcode and Vehicle Registration Number. However, let's say if there was another field "Name". I'd have to add these repetitive code into my code
# Enter Name
input_Name = driver.find_element(By.ID, "name")
input_Name.send_keys(entry['Name'])
time.sleep(1)
and to test the "error". It varies between testing if the button is working or if the element is visible, etc.
So, how can I do this without having to repeat this process over and over, every time a new field is added? I was thinking of adding the fields into an excel sheet and use a for loop on it but I can't really figure out where to place it in my code.
Here is my code:
import datetime
import os.path
import time
import numpy as np
import openpyxl
import pandas as pd
import selenium
from selenium import webdriver
from selenium.common.exceptions import (ElementClickInterceptedException,
ElementNotInteractableException,
NoSuchElementException)
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
ProjectName = 'Automated UAT'
# Excel Path
ExcelPath = 'Excel Path'
# Convert Excel Data Type
ExcelConverters = {'IC':str, 'Postcode':str, 'VehRegNum':str}
# Web Form url
url = 'url'
# Set ChromeDriver path
DriverPath = 'Driver Path'
driver = webdriver.Chrome(executable_path=DriverPath)
# Empty Result Data Frame
result_df = pd.DataFrame(columns = ['e1', 'e2', 'e3', 'e4'])
try:
try:
df1 = pd.read_excel (ExcelPath, header = 0, converters = ExcelConverters)
df = df1.replace(np.nan, '', regex= True)
data = df.iloc[:,0]
time.sleep(1)
wb = openpyxl.load_workbook(ExcelPath)
ws = wb.worksheets[0]
maxrow = ws.max_row
# for loop
for i in range(0, maxrow+1):
print("attempt row " + str(i))
# Get Website
driver.get(url)
# Maximize Window
driver.maximize_window()
entry = df.loc[i]
# Enter IC Number
input_IC = driver.find_element(By.ID, "icno")
input_IC.send_keys(entry['IC'])
time.sleep(1)
# Enter Postcode
input_Postcode = driver.find_element(By.ID, "postcode")
input_Postcode.send_keys(entry['Postcode'])
time.sleep(1)
# Enter Vehicle Registration Number
input_VehRegNum = driver.find_element(By.ID, "registerno")
input_VehRegNum.send_keys(entry['VehRegNum'])
time.sleep(1)
# Click on Button
button = driver.find_element(By.NAME, 'btnGenQuote')
button.click()
time.sleep(5)
# Error Messages/ Results and Locating errors
### Problem: Error must be in order for it to work ###
try:
try:
try:
try:
IC_error1 = driver.find_element(By.XPATH, "//*[contains(text(), 'IC number is mandatory')]")
e1 = ["T"]
print("IC_error1 found")
except NoSuchElementException:
e1 = ["F"]
print("IC_error1 not found")
IC_error2 = driver.find_element(By.XPATH, "//*[contains(text(), 'Invalid IC format')]")
e2 = ["T"]
print("IC_error2 found")
except NoSuchElementException:
e2 = ["F"]
print("IC_error2 not found")
Postcode_error = driver.find_element(By.XPATH, "//*[contains(text(), 'Postcode number is mandatory')]")
e3 = ["T"]
print("Postcode_error found")
except NoSuchElementException:
e3 = ["F"]
print("Postcode_error not found")
RegNo_error = driver.find_element(By.XPATH, "//*[contains(text(), 'Vehicle Registration Number is mandatory')]")
e4 = ["T"]
print("RegNo_error found")
except NoSuchElementException:
e4 = ["F"]
print("RegNo_error not found")
time.sleep(3)
new_result = pd.DataFrame(zip(e1,e2,e3,e4), columns = ['e1', 'e2', 'e3', 'e4'])
result_df = pd.concat([result_df, new_result])
result_df.reset_index(drop=True, inplace=True)
print(result_df)
# For screenshot
# timestamp = datetime.datetime.now().strftime('%d%m%Y_%H%M%S')
# driver.get_screenshot_as_file(ProjectName + '_' + timestamp + '.png')
driver.get_screenshot_as_file(ProjectName + '_' + str(i) + '.png')
time.sleep(2)
except KeyError:
print("If the number of attempts is lesser than the number of data rows, it could be caused by the Excel Column Row don't match with the code.")
print("Process is COMPLETED")
result_df.to_excel('Result.xlsx', sheet_name= 'result') # Ensure "Result" workbook is closed to update
pass # to skip KeyError when there isn't any data afterwards
finally:
driver.quit()
If you have any ideas on how to enhance my code even more. I'm open to suggestions :")
You can put it into a function to remove the repeating code. The function can have two parameters.
An enum coresponding to the search type (i.e if you want to search element by name or by id)
The search value
Example:
import enum
class idOrName(enum.Enum):
id: Id
name: Name
def findAndSendKey(searchType, searchString, entryValue):
element = ''
if searchType == idOrName.id:
element = driver.find_element(By.ID, searchString)
if searchType == idOrName.name:
element = driver.find_element(By.NAME, searchString)
element.send_keys(entry[entryValue])
time.sleep(1)
Now you can run it as:
findAndSendKey(idOrName.id, "registerno", "VehRegNum")
And call this function where ever required

Extract Header and Table text from a .docx file

I'm trying to extract page and header data from a docx file. The file is several hundred pages, each with a table and a header. The header has pertinent information that needs to be paired with each table. I'm able to extract the header and table data, I just can't reliably pair them together.
Using win32com this is what I've got so far
# getting the table page number
app = Dispatch("Word.Application")
doc = app.Documents.Open(filename)
table_1_page = doc.Tables(1).Range.Information(3) # 3 == wdActiveEndPageNumber
The problem occurs because the headers TextFrames and are duplicated on multiple pages, so when I call:
# getting the header page number
doc.Sections(1).Headers(1).Shapes(1).TextFrame.TextRange.Information(3)
I get one of the pages that the TextFrame occurs on. The page is chooses seems somewhat arbitrary, sometimes its the first others its the last, but its not predictable.
I'm spent a bit of time reading over the object model here. Ultimately it would be nice to capture all of the items displayed per page without reinventing the wheel.
EDIT 10/25/16 per request, here is some minimum working code**
# filename docx_parser.py
import pythoncom
class OpenDoc(object):
def __init__(self, docx_path):
import win32com.client as win32
self.path = docx_path
self.word = win32.Dispatch("Word.Application")
self.word.Visible = 0
self.word.Documents.Open(p)
self.doc = self.word.ActiveDocument
def get_table_count(self):
return self.doc.Tables.Count
def count_table_rows(self, table):
return table.Rows.Count
def count_table_columns(self, table):
return table.Columns.Count
def get_headers(self):
headers = self.doc.Sections(1).Headers(1)
shape_count = headers.Shapes.Count
for shape_num in range(1, shape_count + 1):
t_range = headers.Shapes(shape_num).TextFrame.TextRange
text = t_range.Text
page_num = t_range.Information(3) # 3 == wdActiveEndPageNumber
yield text, page_num
def get_table_text(self, table):
col_count = self.count_table_columns(table)
row_count = self.count_table_rows(table)
for row in range(1, row_count + 1):
row_data = []
for col in range(1, col_count + 1):
try:
row_data.append(table.Cell(Row=row, Column=col).Range.Text.strip(chr(7) + chr(13)))
except pythoncom.com_error as error:
row_data.append("")
yield row_data
def get_all_table_text(self):
for table in self.get_tables():
table_data = []
for row_data in self.get_table_text(table):
table_data.append(row_data)
yield table_data
def get_tables(self):
for table in self.doc.Tables:
yield table
def __del__(self):
self.word.Quit()
if __name__ == "__main__":
try:
path = r"sample.docx"
open_doc = OpenDoc(path)
for table_num, table_text in enumerate(open_doc.get_all_table_text()):
print("\n-------------- Table %s ----------------" % (table_num + 1))
for row_data in table_text:
print(", ".join(row_data))
for header_text, page_num in open_doc.get_headers():
print("header page number: %s, text: %s" % (page_num, header_text))
except Exception as error:
from traceback import format_exc
print(format_exc())
raw_input("")

read Chinese character from excel file python3

I have an Excel file that contains two columns, first one in Chinese and the second is just a link.
I tried two methods I found here. but it didn't work and I can't print the value in the console, I changed my encoding variable in settings (pycharm) to U8, still doesn't work.
I used Pandas & xlrd libs, both didn't work while it worked for others who posted.
this is my current code :
from xlrd import open_workbook
class Arm(object):
def __init__(self, id, dsp_name):
self.id = id
self.dsp_name = dsp_name
def __str__(self):
return("Arm object:\n"
" Arm_id = {0}\n"
" DSPName = {1}\n"
.format(self.id, self.dsp_name))
if __name__ == '__main__':
wb = open_workbook('test.xls')
for sheet in wb.sheets():
print(sheet)
number_of_rows = sheet.nrows
number_of_columns = sheet.ncols
items = []
rows = []
for row in range(1, number_of_rows):
values = []
for col in range(number_of_columns):
value = str(sheet.cell(row, col).value)
for a in value:
print('\n'.join([a]))
values.append(value)
print(value)
for item in items:
print (item)
print("Accessing one single value (eg. DSPName): {0}".format(item.dsp_name))
print
obviously it's not working, I was just messing around with it after giving up.
File : http://www59.zippyshare.com/v/UxITFjis/file.html
It's not about encoding, you are not access the right rows.
On the line 24
for row in range(1, number_of_rows):
why are you want to start with 1 instead of 0.
tryfor row in range(number_of_rows):
Well the problem I had wasn't in reading the Chinese characters actually! my problem we're in printing in console.
I thought that the print encoder works fine and I just didn't read it the characters, but this code works fine :
from xlrd import open_workbook
wb = open_workbook('test.xls')
messages = []
links = []
for sheet in wb.sheets():
number_of_rows = sheet.nrows
number_of_columns = sheet.ncols
for row in range(1, number_of_rows):
i = 0
for col in range(number_of_columns):
value = (sheet.cell(row,col).value).encode('gbk')
if i ==0:
messages.append(value)
else:
links.append(value)
i+=1
print(links)
to check it, I paste the first result in selenium driver (since I was going to use it anyway)
element = driver.find_element_by_class_name('email').send_keys(str(messages[0],'gbk'))
and it works like a charme!

Import a CSV to Google Fusion Table with python

From http://fuzzytolerance.info/blog/2012/01/13/2012-01-14-updating-google-fusion-table-from-a-csv-file-using-python/ I have edited his code to import the necessary modules, however I get the following error "AttributeError: 'module' object has no attribute 'urlencode'". I run the code and I am prompted to enter my password, I enter my own google account password, and then the code gives me the error message, pehaps I need to define a password somewhere?
I wonder if anyone can please trouble shoot my code or advise me on how to avoid this error or even advise me of an EASIER way to import a CSV into a GOOGLE FUSION TABLE that I OWN
Here is my code
import csv
from decimal import *
import getpass
from fusiontables.authorization.clientlogin import ClientLogin
from fusiontables import ftclient
nameAgeNick = 'C:\\Users\\User\\Desktop\\NameAgeNickname.txt'
# check to see if something is an integer
def isInt(s):
try:
int(s)
return True
except ValueError:
return False
# check to see if something is a float
def isFloat(s):
try:
float(s)
return True
except ValueError:
return False
# open the CSV file
ifile = open(nameAgeNick, "rb")
reader = csv.reader(ifile)
# GFT table ID
tableID = "tableid"
# your username
username = "username"
# prompt for your password - you can hardcode it but this is more secure
password = getpass.getpass("Enter your password:")
# Get token and connect to GFT
token = ClientLogin().authorize(username, password)
ft_client = ftclient.ClientLoginFTClient(token)
# Loop through the CSV data and upload
# Assumptions for my data: if it's a float less than 0, it's a percentage
# Floats are being rounded to 1 significant digit
# Non-numbers are wrapped in a single quote for string-type in the updatate statement
# The first row is the column names and matches exactly the column names in Fustion tables
# The first column is the unique ID I'll use to select the record for updating in Fusion Tables
rownum = 0
setList = list()
nid = 0
for row in reader:
# Save header row.
if rownum == 0:
header = row
else:
colnum = 0
setList[:] = []
for col in row:
thedata = col
# This bit rounds numbers and turns numbers < 1 into percentages
if isFloat(thedata):
if isInt(thedata) is False:
if float(thedata) < 1:
thedata = float(thedata) * 100
thedata = round(float(thedata), 1)
else:
thedata = "'" + thedata + "'"
# make sql where clause for row
setList.append(header[colnum] + "=" + str(thedata))
nid = row[0]
colnum += 1
# get rowid and update the record
rowid = ft_client.query("select ROWID from " + tableID + " where ID = " + nid).split("\n")[1]
print( rowid)
print( ft_client.query("update " + tableID + " set " + ",".join(map(str, setList)) + " where rowid = '" + rowid + "'"))
rownum += 1
ifile.close()​
And this is the module where the error occurs:
#!/usr/bin/python
#
# Copyright (C) 2010 Google Inc.
""" ClientLogin.
"""
__author__ = 'kbrisbin#google.com (Kathryn Brisbin)'
import urllib, urllib2
class ClientLogin():
def authorize(self, username, password):
auth_uri = 'https://www.google.com/accounts/ClientLogin'
authreq_data = urllib.urlencode({ //////HERE IS ERROR
'Email': username,
'Passwd': password,
'service': 'fusiontables',
'accountType': 'HOSTED_OR_GOOGLE'})
auth_req = urllib2.Request(auth_uri, data=authreq_data)
auth_resp = urllib2.urlopen(auth_req)
auth_resp_body = auth_resp.read()
auth_resp_dict = dict(
x.split('=') for x in auth_resp_body.split('\n') if x)
return auth_resp_dict['Auth']
​

How do I write a python macro in libreoffice calc to cope with merged cells when inserting external data

The premise: I am working in libreoffice calc and need to send an instruction to another program that I know to be listening on a TCP port, via a python macro.
I am expecting a list of invoice line data from the listening program and want to insert the lines into the libreoffice spreadsheet which may or may not have merged cells.
Having been helped many times over by searching stackoverflow, I thought that I would post a solution to a problem which took much effort to resolve.
The code splits the data into lines and each line into data items delimited by the sending program, by tab. The data is inserted, starting from the cell in which the cursor is presently positioned. Each subsequent data item is inserted into the next column and for each line of subsequent data increments the row for the next set of inserts.
Finding the merged cell "range" was a particularly difficult thing to discover how to do and I have not found this documented elsewhere.
Finally each data item is tested to see if it should be inserted as a numeric or text, this is vital if you wish the spreadsheet to perform calculations on the inserted data.
The last line of data is marked with the word "END". This final line of data contains, in this example, an Invoice number ( at position 1) and the specific Cell Name (at position 4) into which it should be put. If there is an error the data is written into the next row down as text so the user can cut and paste the data.
Configobj is a package that reads parameters from a flat file. In this example, I am using that file to store the TCP port to be used. Both the listening program and this code are reading the port number from the same configuration file. It could have been hard coded.
Here is a python macro that works for me, I trust that it will point others in the right direction
def fs2InvoiceLinesCalc(*args):
desktop = XSCRIPTCONTEXT.getDesktop()
model = desktop.getCurrentComponent()
try:
sheets = model.getSheets()
except AttributeError:
raise Exception("This script is for Calc Spreadsheets only")
# sheet = sheets.getByName('Sheet1')
sheet = model.CurrentController.getActiveSheet()
oSelection = model.getCurrentSelection()
oArea = oSelection.getRangeAddress()
first_row = oArea.StartRow
last_row = oArea.EndRow
first_col = oArea.StartColumn
last_col = oArea.EndColumn
#get the string from Footswitch2 via a TCP port
import os, socket, time
from configobj import ConfigObj
configuration_dir = os.environ["HOME"]
config_filename = configuration_dir + "/fs2.cfg"
if os.access(config_filename, os.R_OK):
pass
else:
return None
cfg = ConfigObj(config_filename)
#define values to use from the configuration file
tcp_port = int(cfg["control"]["TCP_PORT"])
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(0.5)
try:
sock.connect(("localhost", tcp_port))
except:
return None
sock.settimeout(10)
try:
sock.send(bytes('invoice\n', 'UTF-8'))
except:
return None
try:
time.sleep(1.0)
s_list = sock.recv(4096).decode('UTF-8')
s_list = s_list.split("\n")
except:
return None
lines_in_response = len(s_list)
if lines_in_response is None:
return None
column =['A','B','C','D','E','F','G','H','I','J','K','L','M',\
'N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
# merged rows are cumulative
master_row_merge_adj = 0
for x in range(0,lines_in_response):
if s_list[x].startswith("END"):
break
row_merge_adj = master_row_merge_adj
insert_table = s_list[x].split("\t")
if s_list[x] == "":
continue
parts = len(insert_table)
# merged columns are a simple adjustment for each item within x
column_merge_adj = 0
row_merge_done = 0
for y in range(0,parts):
it = insert_table[y]
cell_name = column[first_col + y + column_merge_adj]+str(x +1 +first_row + row_merge_adj)
cell = sheet.getCellRangeByName(cell_name)
if cell.getIsMerged():
cellcursor = sheet.createCursorByRange(cell)
cellcursor.collapseToMergedArea()
try:
# format AbsoluteName $Sheet1.$A$1:$D$2 for a merged cell of A1:D2
a,b,cell_range = cellcursor.AbsoluteName.partition(".")
start_cell, end_cell = cell_range.split(":")
a, start_col, start_row = start_cell.split("$")
a, end_col, end_row = end_cell.split("$")
column_merge_adj = column_merge_adj + (int(column.index(end_col)) - int(column.index(start_col)))
# merged rows are cumulative over each x
# merged row increment should only occur once within each x
# or data will not be in the top left of the merged cell
if row_merge_done == 0:
master_row_merge_adj = row_merge_adj + (int(end_row) - int(start_row))
row_merge_done = 1
except:
#unable to compute - insert data off to the right so it's available for cut and paste
column_merge_adj = 10
try:
float(it)
ins_numeric = True
except:
ins_numeric = False
if ins_numeric:
cell.Value = it
else:
cell.String = it
if s_list[x].startswith("END"):
insert_table = s_list[x].split("\t")
try:
invno = int(insert_table[1])
cell_name = insert_table[4]
except:
pass
try:
cell = sheet.getCellRangeByName(cell_name)
cell.Value = invno
except:
#The cell_name passed for Invoice number is incorrect, attempt to insert it in the next row, first selected column
passed_cell_name = cell_name
cell_name = column[first_col]+str(x +2 +first_row + row_merge_adj)
cell = sheet.getCellRangeByName(cell_name)
insert_text = "Invoice Number "+str(invno)+" Pos "+passed_cell_name+" Incorrect"
cell.String = insert_text
sock.close()
return None

Categories