Formating of URL in webbrowser doesn't work - python

Chrome says it can not find the url.
Yet, When I run the program the URL prints out as:
'https://www.gettyimages.com/detail/photo/london-tower-bridge-river-thames-city-skyscrapers-royalty-free-image/860119662?adppopup=true'
If I replace the variable "url" with the actual url above in the webbrowser.open it finds the webpage perfectly.
Here is the actual code:
# Reading an excel file using Python
import xlrd
# Give the location of the file
loc = ('F:\\Documents\\Fun Stuff\\Other\\bridges.xlsx')
# To open Workbook
book = xlrd.open_workbook(loc)
sheet = book.sheet_by_index(0)
# input is requested and stored in a variable
row_text = input('Row Number = ')
# convert user input into an integer
row_number = int(row_text)
# For row “row_number” and column 2
cell_value = sheet.cell(row_number, 1)
url = str(cell_value)
url = url[5:]
#Verify url is extracted from Excel sheet
print(url)
# open-webpage
import webbrowser
#Path to webbrower
chrome_path = ('C:/Program Files (x86)/Google/Chrome/Application/chrome.exe %s')
# open-webpage
webbrowser.get(chrome_path).open(url, new=0)

Related

Problem when trying to run python script from VBA

I'm new on coding.. I'm trying to write my first script called from VBA and I always. The script works well itself, but now I want to call it from VBA passing two variables (the URL and file name).
I alway get error in the shell line.
I hope you can help me..
thanks
This is the URL containing the table to download: https://jde.erpref.com/?schema=920&table=F4311
#this is my VBA code:
Sub Dw_table()
Dim url As String
Dim file_name As String
Dim PythonExe, PythonScript As String
Dim objShell As Object
' Prompt the user to enter the URL and file name
url = InputBox("Enter the URL of the table:")
file_name = InputBox("Enter the file name:")
'paths for exe and script
PythonExe = """C:\Users\Mario Rdz\AppData\Local\Programs\Python\Python311\python.exe"""
PythonScript = "C:\Users\Mario Rdz\PycharmProjects\HelloWorld\JDEREF.py"
objShell.Run PythonExe & PythonScript & url & file_name
End Sub
this is my python code
from bs4 import BeautifulSoup
import requests
import openpyxl
import sys
# Passed arguments from excel VBA
url = sys.argv[1]
file_name = sys.argv[2]`
page = requests.get(url)
#Parse the HTML content of the webpage
soup = BeautifulSoup(page.content, "html.parser")
#Find the table with the ID "columnselectcollection"
table = soup.find(id="columnselectcollection")
#Create a new workbook and add a worksheet
your text`workbook = openpyxl.Workbook()
worksheet = workbook.active
#Create a new workbook and add a worksheet
workbook = openpyxl.Workbook()
worksheet = workbook.active
#Set the values for column headers
worksheet.cell(row=1, column=2).value = "Seq"
worksheet.cell(row=1, column=3).value = "Field"
worksheet.cell(row=1, column=4).value = "Description"
worksheet.cell(row=1, column=5).value = "Data type"
worksheet.cell(row=1, column=6).value = "Edit Type type"
worksheet.cell(row=1, column=7).value = "Lenght"
worksheet.cell(row=1, column=8).value = "Decimals"
#Iterate over the rows of the table and write the data to the worksheet
for row in table.find_all('tr'):
row_data = []
for cell in row.find_all('td'):
row_data.append(cell.text)
worksheet.append(row_data)
#delete row 2 which is blanks and delete unuseful column 9
worksheet.delete_cols(9)
worksheet.delete_rows(2)
#Save the workbook to an Excel file
workbook.save(f"{file_name}.xlsx")
I think the issue ius the way how I'm calling the python script or how I'm passing the URL and file name to the python script

How to Stop Override data in Excel by Python

I am saving data in an excel file, but it overrides the data and previous data loss, whenever I run the program again. I want to save data under the previous data every time whenever I run the program. sorry for the whole code because I don't which part of the code I should share with you. help, please!
//Python Code
from time import time
from turtle import heading, title
import requests
from bs4 import BeautifulSoup
from openpyxl import Workbook, load_workbook
from openpyxl.styles import Font
from openpyxl.utils import get_column_letter
# links in array of cardano only
links = ["https://cardanoscan.io/pool/eff96bfccda465d5be2c42e97ab4c6013bd08b29bd8a41feafcd4713", "https://cardanoscan.io/pool/54ee46b05aac43c6f692963d731037b1747a4cebfc119ccd2c5ad78b", "https://cardanoscan.io/pool/c44a041de1e1e85d5f89a5afac6d3c304e411855b2418ae209178cdc", "https://cardanoscan.io/pool/1f79d3dd8366a9d04512e59e3cd65c1342eaa261b641ef93e5ac8f86",
"https://cardanoscan.io/pool/37776026d64eeb9fb5873caedc6e9c140f66c43ef6b3e396e061c891", "https://cardanoscan.io/pool/51897956cbe5c8f4751953c31ce85a1c60f5f27efee16397e6654947", "https://cardanoscan.io/pool/27ef7af15a7b854508385bf193acd73f7fb78b72ea6a99e8a413ca68", "https://cardanoscan.io/pool/b1b2d76b11afa2fbc6b5f89f078b47a9defa00707975f3fd4ebe1df2", "https://cardanoscan.io/pool/9390bd2c8d16935b3bdfeaae6301c203f67806511e492e3cf3dbfc69"]
print("Searching for Alerts!")
# loop to check alerts in website
wb = Workbook()
ws = wb.active
ws.title = "Links Alerts"
# Data Headings
headings = ['Branch Name', 'Branch Link', 'Threshold', 'Percentage', 'Status']
ws.append(headings)
# making bold headings
for col in range(1, 6):
ws[get_column_letter(col) + '1'].font = Font(bold=True)
# creating the main function to check alerts
def find_alerts():
for link in links: # searching link in lines one by one
# request to get link data in text form
html_text = requests.get(link).text
# BeautifulSoup fromating the data for us
soup = BeautifulSoup(html_text, "lxml")
soup.prettify
result = soup.find(
'div', class_='position-absolute font-weight-bold').text.replace(' ', '') # target the class where the data lies
if "0%" not in result: # condition for change
ws.append(["Cardano", link, "Saturation",
result, "Issue found"]) # creating row if the condition execute
print(
f"Issue found in link: {link} because of {result} saturation.")
else:
ws.append(["Cardano", link, "Saturation",
result, " No Issue found"]) # same thing as above
print("no Issue in this link!")
ws.insert_rows(11) # making space for new data
wb.save('Alert_Checking.xlsx') # saving file
if __name__ == '__main__':
while True:
find_alerts() # calling the function
time_wait = 60 # setting time
print(" ")
print(f"Wating for {time_wait} minutes to recheck again...")
print(" ")
input("Press Enter key to close the Tab!")
time.sleep(time_wait * 60)
You are creating a new workbook every time you run your script, so of course you won't retain data when running the script again. You can use a try ... except block to try and open the file, or create if it doesn't exist.
filename = "Alert_Checking.xlsx"
try:
# File already exists, load file
load_workbook(filename)
ws = wb["Links Alerts"]
except FileNotFoundError:
# File does not exist, create file
wb = Workbook()
ws = wb.active
ws.title = "Links Alerts"
# Data Headings
headings = ['Branch Name', 'Branch Link', 'Threshold', 'Percentage', 'Status']
ws.append(headings)
# making bold headings
for col in range(1, 6):
ws[get_column_letter(col) + '1'].font = Font(bold=True)

Insert hyperlink to a local folder in Excel with Python

The piece of code reads an Excel file. This excel file holds information such as customer job numbers, customer names, sites, works description ect..
What this code will do when completed (I hope) is read the last line of the worksheet (this is taken from a counter on the worksheet at cell 'P1'), create folders based on cell content, and create a hyperlink on the worksheet to open the lowest local folder that was created.
I have extracted the info I need from the worksheet to understand what folders need to be created, but I am not able to write a hyperlink to the cell on the row in column B.
#Insert Hyperlink to folder
def folder_hyperlink(last_row_position, destination):
cols = 'B'
rows = str(last_row_position)
position = cols + rows
final_position = "".join(position)
print final_position # This is just to check the value
# The statement below should insert hyperlink in eps.xlsm > worksheet jobnoeps at column B and last completed row.
ws.cell(final_position).hyperlink = destination
The complete code is below but here is the section that is meant to create the hyperlink. I have also tried the 'xlswriter' package with no joy. Searched the internet and the above snippet is the result of what I found.
Anyone know what I am doing wrong?
__author__ = 'Paul'
import os
import openpyxl
from openpyxl import load_workbook
import xlsxwriter
site_info_root = 'C:\\Users\\paul.EPSCONSTRUCTION\\PycharmProjects\\Excel_Jobs\\Site Information\\'
# This function returns the last row on eps.xlsm to be populated
def get_last_row(cell_ref = 'P1'): #P1 contains the count of the used rows
global wb
global ws
wb = load_workbook("eps.xlsm", data_only = True) #Workbook
ws = wb["jobnoeps"] #Worksheet
last_row = ws.cell(cell_ref).value #Value of P1 from that worksheet
return last_row
# This function will read the job number in format EPS-XXXX-YR
def read_last_row_jobno(last_row_position):
last_row_data = []
for cols in range(1, 5):
last_row_data += str(ws.cell(column = cols, row = last_row_position).value)
last_row_data_all = "".join(last_row_data)
return last_row_data_all
#This function will return the Customer
def read_last_row_cust(last_row_position):
cols = 5
customer_name = str(ws.cell(column = cols, row = last_row_position).value)
return customer_name
#This function will return the Site
def read_last_row_site(last_row_position):
cols = 6
site_name = str(ws.cell(column = cols, row = last_row_position).value)
return site_name
#This function will return the Job Discription
def read_last_row_disc(last_row_position):
cols = 7
site_disc = str(ws.cell(column = cols, row = last_row_position).value)
return site_disc
last_row = get_last_row()
job_no_details = read_last_row_jobno(last_row)
job_customer = read_last_row_cust(last_row)
job_site = read_last_row_site(last_row)
job_disc = read_last_row_disc(last_row)
cust_folder = job_customer
job_dir = job_no_details + "\\" + job_site + " - " + job_disc
#Insert Hyperlink to folder
def folder_hyperlink(last_row_position, destination):
cols = 'B'
rows = str(last_row_position)
position = cols + rows
final_position = "".join(position)
print final_position # This is just to check the value
# The statement below should insert hyperlink in eps.xlsm > worksheet jobnoeps at column B and last completed row.
ws.cell(final_position).hyperlink = destination
folder_location = site_info_root + job_customer + "\\" + job_dir
print folder_location # This is just to check the value
folder_hyperlink(last_row, folder_location)
Now my hyperlink function looks like this after trying xlsxwriter as advised.
##Insert Hyperlink to folder
def folder_hyperlink(last_row_position, destination):
import xlsxwriter
cols = 'B'
rows = str(last_row_position)
position = cols + rows
final_position = "".join(position)
print final_position # This is just to check the value
workbook = xlsxwriter.Workbook('eps.xlsx')
worksheet = workbook.add_worksheet('jobnoeps')
print worksheet
worksheet.write_url(final_position, 'folder_location')
workbook.close()
The function overwrites the exsisting eps.xlsx, creates a jobnoeps table and then inserts the hyperlink. I have played with the following lines but don't know how to get it to open the existing xlsx and existing jobnoeps tab and then enter the hyperlink.
workbook = xlsxwriter.Workbook('eps.xlsx')
worksheet = workbook.add_worksheet('jobnoeps')
worksheet.write_url(final_position, 'folder_location')
The XlsxWriter write_url() method allows you to link to folders or other workbooks and worksheets as well as internal links and links to web urls. For example:
import xlsxwriter
workbook = xlsxwriter.Workbook('links.xlsx')
worksheet = workbook.add_worksheet()
worksheet.set_column('A:A', 50)
# Link to a Folder.
worksheet.write_url('A1', r'external:C:\Temp')
# Link to a workbook.
worksheet.write_url('A3', r'external:C:\Temp\Book.xlsx')
# Link to a cell in a worksheet.
worksheet.write_url('A5', r'external:C:\Temp\Book.xlsx#Sheet1!C5')
workbook.close()
See the docs linked to above for more details.
Here is the code that did the trick:-
# Creates hyperlink in existing workbook...
def set_hyperlink():
from openpyxl import load_workbook
x = "hyperlink address"
wb = load_workbook("filename.xlsx")
ws = wb.get_sheet_by_name("sheet_name")
ws.cell(row = x?, column = y?).hyperlink = x
wb.save("filename.xlsx")
set_hyperlink()
Tried again with openpyxl as advised.

I need to copy data from one workbook into another. But it is giving some error

I am unable to copy data. it is giving error at the line "sel.copy". Please help out
Code:
excel = client.Dispatch("Excel.Application")
currworkingdirectory = os.getcwd()
workbook = excel.Workbooks.Open(cwd + "\\test.csv")
print (workbook)
excel.Visible = 1
worksheets = workbook.Worksheets(1)
# Copy required data from sheet
sel = worksheets.Range("E:E,H:H,I:I").Select()
sel.Copy()
worksheets.Range("E:E,H:H,I:I").Select() is NOT Setting sel To "E:E,H:H,I:I" it is only Selecting "E:E,H:H,I:I"
To fix Change
sel = worksheets.Range("E:E,H:H,I:I").Select()
To
sel = worksheets.Range("E:E,H:H,I:I")
In the end you are best to not use a copy paste at all, try just setting Range("Destination").Value = sel.Value instead.

From password-protected Excel file to pandas DataFrame

I can open a password-protected Excel file with this:
import sys
import win32com.client
xlApp = win32com.client.Dispatch("Excel.Application")
print "Excel library version:", xlApp.Version
filename, password = sys.argv[1:3]
xlwb = xlApp.Workbooks.Open(filename, Password=password)
# xlwb = xlApp.Workbooks.Open(filename)
xlws = xlwb.Sheets(1) # counts from 1, not from 0
print xlws.Name
print xlws.Cells(1, 1) # that's A1
I'm not sure though how to transfer the information to a pandas dataframe. Do I need to read cells one by one and all, or is there a convenient method for this to happen?
Simple solution
import io
import pandas as pd
import msoffcrypto
passwd = 'xyz'
decrypted_workbook = io.BytesIO()
with open(i, 'rb') as file:
office_file = msoffcrypto.OfficeFile(file)
office_file.load_key(password=passwd)
office_file.decrypt(decrypted_workbook)
df = pd.read_excel(decrypted_workbook, sheet_name='abc')
pip install --user msoffcrypto-tool
Exporting all sheets of each excel from directories and sub-directories to seperate csv files
from glob import glob
PATH = "Active Cons data"
# Scaning all the excel files from directories and sub-directories
excel_files = [y for x in os.walk(PATH) for y in glob(os.path.join(x[0], '*.xlsx'))]
for i in excel_files:
print(str(i))
decrypted_workbook = io.BytesIO()
with open(i, 'rb') as file:
office_file = msoffcrypto.OfficeFile(file)
office_file.load_key(password=passwd)
office_file.decrypt(decrypted_workbook)
df = pd.read_excel(decrypted_workbook, sheet_name=None)
sheets_count = len(df.keys())
sheet_l = list(df.keys()) # list of sheet names
print(sheet_l)
for i in range(sheets_count):
sheet = sheet_l[i]
df = pd.read_excel(decrypted_workbook, sheet_name=sheet)
new_file = f"D:\\all_csv\\{sheet}.csv"
df.to_csv(new_file, index=False)
Assuming the starting cell is given as (StartRow, StartCol) and the ending cell is given as (EndRow, EndCol), I found the following worked for me:
# Get the content in the rectangular selection region
# content is a tuple of tuples
content = xlws.Range(xlws.Cells(StartRow, StartCol), xlws.Cells(EndRow, EndCol)).Value
# Transfer content to pandas dataframe
dataframe = pandas.DataFrame(list(content))
Note: Excel Cell B5 is given as row 5, col 2 in win32com. Also, we need list(...) to convert from tuple of tuples to list of tuples, since there is no pandas.DataFrame constructor for a tuple of tuples.
from David Hamann's site (all credits go to him)
https://davidhamann.de/2018/02/21/read-password-protected-excel-files-into-pandas-dataframe/
Use xlwings, opening the file will first launch the Excel application so you can enter the password.
import pandas as pd
import xlwings as xw
PATH = '/Users/me/Desktop/xlwings_sample.xlsx'
wb = xw.Book(PATH)
sheet = wb.sheets['sample']
df = sheet['A1:C4'].options(pd.DataFrame, index=False, header=True).value
df
Assuming that you can save the encrypted file back to disk using the win32com API (which I realize might defeat the purpose) you could then immediately call the top-level pandas function read_excel. You'll need to install some combination of xlrd (for Excel 2003), xlwt (also for 2003), and openpyxl (for Excel 2007) first though. Here is the documentation for reading in Excel files. Currently pandas does not provide support for using the win32com API to read Excel files. You're welcome to open up a GitHub issue if you'd like.
Based on the suggestion provided by #ikeoddy, this should put the pieces together:
How to open a password protected excel file using python?
# Import modules
import pandas as pd
import win32com.client
import os
import getpass
# Name file variables
file_path = r'your_file_path'
file_name = r'your_file_name.extension'
full_name = os.path.join(file_path, file_name)
# print(full_name)
Getting command-line password input in Python
# You are prompted to provide the password to open the file
xl_app = win32com.client.Dispatch('Excel.Application')
pwd = getpass.getpass('Enter file password: ')
Workbooks.Open Method (Excel)
xl_wb = xl_app.Workbooks.Open(full_name, False, True, None, pwd)
xl_app.Visible = False
xl_sh = xl_wb.Worksheets('your_sheet_name')
# Get last_row
row_num = 0
cell_val = ''
while cell_val != None:
row_num += 1
cell_val = xl_sh.Cells(row_num, 1).Value
# print(row_num, '|', cell_val, type(cell_val))
last_row = row_num - 1
# print(last_row)
# Get last_column
col_num = 0
cell_val = ''
while cell_val != None:
col_num += 1
cell_val = xl_sh.Cells(1, col_num).Value
# print(col_num, '|', cell_val, type(cell_val))
last_col = col_num - 1
# print(last_col)
ikeoddy's answer:
content = xl_sh.Range(xl_sh.Cells(1, 1), xl_sh.Cells(last_row, last_col)).Value
# list(content)
df = pd.DataFrame(list(content[1:]), columns=content[0])
df.head()
python win32 COM closing excel workbook
xl_wb.Close(False)
Adding to #Maurice answer to get all the cells in the sheet without having to specify the range
wb = xw.Book(PATH, password='somestring')
sheet = wb.sheets[0] #get first sheet
#sheet.used_range.address returns string of used range
df = sheet[sheet.used_range.address].options(pd.DataFrame, index=False, header=True).value

Categories