Changes not written into a CSV file after setting UTF-8 encoding - python

I am trying to write changes to a file but I get the error :
'charmap' codec can't encode character '\u0159' in position 17: character maps to <undefined>
Other people said that you need to set the encoding to UTF-8 and so I set :
with open('ScrapedContent.csv', 'w+', newline='', encoding="utf-8") as write
After this is done the text is no longer being written to the ScrapedContent.csv file and the whole program becomes pretty much useless afterwards. Here is my code :
(I am providing the entire code since I don't know where the issue happens)
Desired solution :
There are "special" characters written to the file such as "č, ř, š". These are not actually special but rather normal in the 21st century but unfortunately it seems like that computers are still having a hard time understanding.
So in any case I need to write those characters to the file so they don't get broken. I don't care about what has to be done as long as the final file provides the result. I have spend about 6 hours trying to fix this now and I got nowhere.
This is the complete error output :
Traceback (most recent call last):
File "E:\Projects\Reality Scrapers\SRealityContentScraper\main.py", line 113, in <module>
writer.writerow([title.text, offers.text, address.text, phone_number, email])
File "C:\Users\workstationone\AppData\Local\Programs\Python\Python39\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode character '\u011b' in position 57: character maps to <undefined>
This is the code :
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import WebDriverException, TimeoutException
from platform import system
from os import getcwd, getlogin
import csv
cwd = getcwd()
os = system()
user = getlogin()
browser = input("Browser name ex.: Chromium | Chrome | Firefox: ")
if os == "Linux":
if user == "root":
print(
"You are executing the script as root. Make sure that the profile folder is also located in the root directory.")
del user
if browser == "Firefox" or browser == "Firefox ESR" or browser == "Firefox Browser":
try:
if os == "Windows":
driver = webdriver.Firefox(executable_path=cwd + "/geckodriver.exe")
else:
driver = webdriver.Firefox(executable_path=cwd + "/geckodriver")
except WebDriverException:
print("Warning 10: Firefox is not installed in the default location")
bin_location = input("Firefox executable location: ")
binary = FirefoxBinary(bin_location)
if os == "Windows":
driver = webdriver.Firefox(executable_path=cwd + "/geckodriver.exe", firefox_binary=bin_location)
else:
driver = webdriver.Firefox(executable_path=cwd + "/geckodriver", firefox_binary=bin_location)
del bin_location
elif browser == "Chrome" or browser == "Chrome Browser" or browser == "Google Chrome" or browser == "Chromium" or browser == "Chromium Browser":
try:
if os == "Windows":
driver = webdriver.Chrome(executable_path=cwd + "/chromedriver.exe")
else:
driver = webdriver.Chrome(executable_path=cwd + "/chromedriver")
except WebDriverException:
print("Warning 11: Chrome/Chromium is not installed in the default location")
bin_location = input("Chrome/Chromium executable location: ")
options = Options()
options.binary_location = bin_location
if os == "Windows":
driver = webdriver.Chrome(executable_path=cwd + "/chromedriver.exe")
else:
driver = webdriver.Chrome(executable_path=cwd + "/chromedriver")
del bin_location
else:
print("Error 10: Invalid browser selected")
input("Press ENTER to exit: ")
exit()
wait = WebDriverWait(driver, 10)
with open('links.csv', 'w+', newline='', encoding="utf-8") as write:
driver.get("https://www.sreality.cz/adresar")
writer = csv.writer(write)
page_spawn = 0
while page_spawn == 0:
try:
links = wait.until(ec.presence_of_all_elements_located((By.CSS_SELECTOR, "h2.title > a")))
# print(len(links))
for link in links:
print(link.get_attribute("href"))
writer.writerow([link.get_attribute("href")])
wait.until(ec.element_to_be_clickable(
(By.CSS_SELECTOR, "a.btn-paging-pn.icof.icon-arr-right.paging-next:not(.disabled"))).click()
except TimeoutException:
page_spawn = 1
break
with open('links.csv') as read:
reader = csv.reader(read)
link_list = list(reader)
with open('ScrapedContent.csv', 'w+', newline='', encoding="utf-8") as write:
writer = csv.writer(write)
for link in link_list:
driver.get(', '.join(link))
title = wait.until(ec.presence_of_element_located((By.CSS_SELECTOR, "h1.page-title span.text.ng-binding")))
offers = wait.until(ec.presence_of_element_located(
(By.CSS_SELECTOR, "a.switcher.ng-binding.ng-scope span.ng-binding.ng-scope")))
address = wait.until(
ec.presence_of_element_located((By.CSS_SELECTOR, "tr.c-aginfo__table__row td.ng-binding")))
try:
wait.until(
ec.presence_of_element_located((By.CSS_SELECTOR, "button.value.link.ng-binding.ng-scope"))).click()
phone_number = wait.until(ec.presence_of_element_located((By.CSS_SELECTOR, "span.phone.ng-binding")))
except TimeoutException:
pass
try:
wait.until(ec.presence_of_element_located((By.CSS_SELECTOR, "button.value.link.ng-binding"))).click()
email = wait.until(ec.presence_of_element_located((By.CSS_SELECTOR, "a.value.link.ng-binding")))
except TimeoutException:
pass
try:
phone_number = phone_number.text
except AttributeError:
phone_number = " "
pass
try:
email = email.text
except AttributeError:
email = " "
pass
print(title.text, " ", offers.text, " ", address.text, " ", phone_number, " ", email)
try:
writer.writerow([title.text, offers.text, address.text, phone_number, email])
except Exception as e:
print (e)
driver.quit()

This is heavily based on this answer.
Basically, you can't directly write unicode characters using csv.
You need a helper function:
def utf8ify(l):
return [str(s).encode('utf-8') for s d]
Then when you write the row add:
writer.writerow(utf8ify([title.text, offers.text, address.text, phone_number, email]))
The answer I linked to is better than mine in every way. If you want to learn why this works, read that answer.

Related

Whenever I look for an element in a page, even if it exists, it is still coming up as nonexistent

So I just started coding with Selenium and I made this program that goes onto the website JKLM.fun and it plays the game. Lately, I've been trying to type in the chat but I keep getting this error:
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element
This is the code I am running:
chat = Driver.find_element(by=By.TAG_NAME, value="textarea")
And this is what I am trying to access:
And before you say to use XPATH or CSS selector or access the DIV above, none of those worked. If you need all my code I'll just put it below this. Can someone please please help me? I have been stuck on this forever!
import random
import time
import re
import keyboard
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#Variables
Code = "JFEV"
Username = "Glitch BOT"
legitMode = False
totalLegitMode = False
lesslegitmode = False
botmode = False
Word = ""
Driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
usedWords = []
joinedGame = False
invalid = open("keyboardTest/invalid.txt", "a")
#Functions
def findWord(Prompt):
notvalid = open("keyboardTest/invalid.txt").read().split('\n')
global usedWords
Words = open("keyboardTest/bigtosmall.txt").read().split("\n")
bestword = ""
for o in range(len(Words)):
if Prompt.lower() in Words[o] and Words[o] not in usedWords and Words[o] not in notvalid:
bestword = Words[o]
usedWords.append(bestword)
break
# while not (Prompt.lower() in Word):
# Word = (random.choice(Words))
# if Prompt.lower() in Word and not (Word in usedWords):
# usedWords.append(Word)
# break
if bestword == "":
print('No Word Found For:', Prompt)
return bestword
def joinServer():
global Code
if Code == "":
Driver.get("https://jklm.fun")
while Driver.current_url == "https://jklm.fun/":
pass
return
else:
Driver.get(f"https://jklm.fun/{Code}")
return
def joinGame():
global joinedGame
while joinedGame == False:
try:
joinBox = Driver.find_element(by=By.CLASS_NAME, value="join")
joinButton = joinBox.find_element(by=By.TAG_NAME, value="button")
ActionChains(Driver).move_to_element(joinButton).click(joinButton).perform()
joinedGame = True
except:
pass
#Code
joinServer()
OK = Driver.find_element(by=By.CLASS_NAME, value="line")
Driver.implicitly_wait(5)
usernameBox = OK.find_element(by=By.TAG_NAME, value="input")
Driver.implicitly_wait(5)
while True:
if usernameBox.get_attribute("value") != "":
time.sleep(0.1)
usernameBox.send_keys(Keys.BACK_SPACE)
else:
usernameBox.send_keys(Username)
usernameBox.send_keys(Keys.RETURN)
break
iFrame = Driver.find_element(by=By.TAG_NAME, value="iframe")
Driver.switch_to.frame(iFrame)
joinGame()
print('JOINED THE GAME')
time.sleep(2)
print('AFTER DELAY')
try:
chat = Driver.find_element(by=By.TAG_NAME, value="textarea")
chat.clear()
except:
print('DID NOT WORK!!!! LLLL')
print("DEFINED CHAT!")
chat.send_keys('Testing')
print("SAID TESTING!")
chat.send_keys(Keys.RETURN)
print("PRINTED IT OUT!")
while joinedGame == True:
try:
#time.sleep(0.3)
joinBox = Driver.find_element(by=By.CLASS_NAME, value="join")
if not joinBox.is_displayed():
Player = Driver.find_element(by=By.CLASS_NAME, value="player")
selfTurn = Driver.find_element(by=By.CLASS_NAME, value="selfTurn")
if Player.text == "" and selfTurn.is_displayed():
Input = selfTurn.find_element(by=By.CLASS_NAME, value="styled")
Prompt = Driver.find_element(by=By.CLASS_NAME, value="syllable").text
print("Current Prompt is:",Prompt)
guess = findWord(Prompt)
print("The guess for that prompt is:", guess)
if legitMode:
time.sleep(random.uniform(0.3,0.8))
for i in range(len(guess)):
time.sleep(random.uniform(0.01,.12))
Input.send_keys(guess[i])
elif totalLegitMode:
time.sleep(random.uniform(0.2,1))
for i in range(len(guess)):
time.sleep(random.uniform(0.05,.14))
Input.send_keys(guess[i])
elif lesslegitmode:
time.sleep(random.uniform(0.1,.6))
for i in range(len(guess)):
time.sleep(random.uniform(0.02,.11))
Input.send_keys(guess[i])
else:
Input.send_keys(guess)
Input.send_keys(Keys.RETURN)
usedWords.append(guess)
print("just used word:", guess)
if not botmode:
time.sleep(.2)
if selfTurn.is_displayed() and Driver.find_element(by=By.CLASS_NAME, value="syllable").text == Prompt:
# a = open("keyboardTest/invalid.txt").read().split('\n')
# if guess not in a: #if its not already in the list
invalid.write('\n')
invalid.write(guess) #if word didn't work, put it into invalid list then ill manually check it
invalid.close()
invalid = open("keyboardTest/invalid.txt", "a")
# guess = findWord(Prompt)
# print(guess)
else:
usedWords = []
joinButton = joinBox.find_element(by=By.TAG_NAME, value="button")
ActionChains(Driver).move_to_element(joinButton).click(joinButton).perform()
except Exception as e:
pass
I figured it out. All you have to do is switch to parent frame before accessing chatbox. That is where it is located. It isn't located in the iframe.
Driver.switch_to.parent_frame() #check for commands
And you're good to go! (Don't forget to switch back to Iframe when done accessing chatbox)

Hello world! I am stuck with this error. Can anyone assist me fix this? I would appreciate any form of assessment

This is the code:
#A simple Kahoot bot that joins Kahoot game and sits idle
#Version 1.4.4
#ENTech SS
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import random
#Asking for info here:
print("Kahoot spammer v 1.4.4")
pin = input("Please enter a game pin:")
name = input("Please enter a name:")
join = input("Please enter a amount of bots to join(Default is 50):")
tab = 0
nameb = str(name)
bot_num = 0
#Start chrome
print("Starting chrome...")
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(executable_path="/Users/adamkatborg/Desktop/chromedriver-2", options="options")
#driver = webdriver.Chrome(chrome_options=chrome_options)
#If join feild is blank, then default is 50
if join=='':
join=50
def namec():
#Code for clarifying name
global join, bot_num, nameb
num=random.randint(1,999)*3
if join=='1':
nameb=name
bot_num = bot_num + 1
if int(join)>=2:
if bot_num==join:
print("Name generation completed")
nameb=(name + '.' + str(num))
bot_num = bot_num + 1
def bot():
global nameb, driver, tab
if bot_num==1:
print("No new window necessary")
elif bot_num >=2:
print("Opening new window...")
driver.execute_script("window.open('');")
driver.switch_to.window(driver.window_handles[tab])
print("Navigating to Kahoot...")
#Navigate to kahoot.com
driver.get("https://kahoot.it/")
#Wait untill element is available
wait = WebDriverWait(driver, 10)
element = wait.until(EC.element_to_be_clickable((By.ID, 'inputSession')))
#Finding input box
inputb = driver.find_element_by_id('inputSession')
print("Joining game...")
#Inputting pin
inputb.send_keys(pin)
inputb.submit()
#Entering name
element = wait.until(EC.element_to_be_clickable((By.ID, 'username')))
gname = driver.find_element_by_id('username')
namec()
gname.send_keys(nameb)
gname.submit()
#Checking login
print("Checking if login was succesfull...")
try:
content = driver.find_element_by_class_name('ng-binding')
except:
print("Error checking page:\nId could have changed, or connection could have dropped.")
x=input("Press any key to exit...")
print("Success!")
print("Bot [" + bot_num + "] is now in the game ;)")
tab = tab + 1
#Code for running a set amoun of times
for x in range(int(join)):
bot()
And this is the error I am getting:
Traceback (most recent call last):
File "/Users/adamkatborg/Desktop/kbot-master/testbot.py", line 23, in <module>
driver = webdriver.Chrome(executable_path="/Users/adamkatborg/Desktop/chromedriver-2", options="options")
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/selenium/webdriver/chrome/webdriver.py", line 64, in __init__
desired_capabilities = options.to_capabilities()
AttributeError: 'str' object has no attribute 'to_capabilities'
Can someone please help me?
On line 23 you are passing "options" as the options parameter, it looks like it is expecting an object instead of a string.
What you probably meant to do was pass the chrome_options object. The line should look like this:
driver = webdriver.Chrome(executable_path="/Users/adamkatborg/Desktop/chromedriver-2", options=chrome_options)

Error With Selenium: AttributeError: 'WebDriver' object has no attribute 'Firefox'

I'm trying to run the code below, which I believe should work, but I'm actually getting this error:
AttributeError: 'WebDriver' object has no attribute 'Firefox'
def main():
import csv
import os
import selenium
from selenium import webdriver
profile = webdriver.FirefoxProfile()
profile.accept_untrusted_certs = True
wd = webdriver.Firefox(executable_path="C:/Utility/geckodriver.exe", firefox_profile=profile)
ticker = input("Enter your ticker: ")
url = "http://financials.morningstar.com/cash-flow/cf.html?t=" + ticker.upper()
print(url)
browser = wd.Firefox()
browser.get(url)
values_elementticker = browser.find_elements_by_xpath("//span[#class='gry']")
values2 = values = [x.text for x in values_elementticker]
print(values2[0])
values_element = browser.find_elements_by_xpath("//div[#id='data_i97']")
values = [x.text for x in values_element]
print("Cash Flows:")
print(values[0])
with open("C:\\Users\\Excel\\Desktop\\results.xls", "a") as f:
for i in range(len(values2)):
f.write(values2[0] + "\n")
for i in range(len(values)):
f.write(values[0] + "\n")
browser.close()
restart = input("Do you wish to start again?").lower()
if restart == "yes":
main()
else:
exit()
main()
The error is thrown immediately after I enter a valid ticker. Does this work for others? Or, is something wrong with the code? It looks OK to me. Thanks.

How to prevent my Selenium/Python program from crashing?

I made a program which gets one record from Google Sheet process on it then delete it and so on. If I update Google Sheet then the program will deduct record in the next loop and process on it and then delete,
but it runs only 1 or 2 hours and then program gives an error:
What can I add in my program so my program never stops?
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import traceback
import string
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from selenium.common.exceptions import NoAlertPresentException
from selenium.common.exceptions import UnexpectedAlertPresentException
Email=raw_input('Please Enter your Email: ')
password=raw_input('Please Enter Password: ')
print("\n******Don't Interrupt the Script******")
print('#script is Runing............\n')
chrome_options = webdriver.ChromeOptions() #going to chrome options
chrome_options.add_argument("--start-maximized")
prefs = {"profile.default_content_setting_values.notifications" : 2 #turn off all notifications
,"profile.managed_default_content_settings.images": 2} #disable images
chrome_options.add_experimental_option("prefs",prefs)
driver = webdriver.Chrome(chrome_options=chrome_options) # passing paramaters to chrome
driver.get('https://accounts.google.com')
time.sleep(3)
#giving Email-------------------
email = driver.find_element_by_id('Email')
email.send_keys(Email, Keys.RETURN)
#giving password----------------
time.sleep(3)
email = driver.find_element_by_id('Passwd')
email.send_keys(password, Keys.RETURN)
#credentials + attach with googleSheet------------------------------
scope = ['https://spreadsheets.google.com/feeds']
credentials = ServiceAccountCredentials.from_json_keyfile_name('stephens-31d8490b5bd2.json', scope)
google_sheet = gspread.authorize(credentials)
workSheet = google_sheet.open("Video Access Master Sheet").worksheet("Sheet1")
while True:
#fetch Records from Rows 2 to 50 and save on list-----------------
for i in range(2,51):
li_url=[]
li_email=[]
row=workSheet.row_values(i)
for b in row:
if 'youtu' in b:
li_url.append(b)
#find record which you append on list and then delete from googleSheet--------------------
cell = workSheet.find(b)
row = cell.row
col = cell.col
workSheet.update_cell(row,col, '')
print 'Fetching Values From Row '+str(i)+'....'
elif '#' in b:
li_email.append(b)
elif b=='':
continue
else:
continue
#*********************************************************
#getting length list of li_url and apply condition on it-----------------------------------------------
length=len(li_url)
if length==0:
continue
else:
try:
#getting URLs from list and put into driver.get---------------------------------------------------------
for a in li_url:
driver.get(a)
time.sleep(3)
driver.find_element_by_css_selector('.yt-uix-button-icon.yt-uix-button-icon-info.yt-sprite').click()
time.sleep(3)
driver.find_element_by_css_selector('.yt-uix-button.yt-uix-button-size-default.yt-uix-button-default.metadata-share-button').click()
time.sleep(2)
put_email=driver.find_element_by_css_selector('.yt-uix-form-input-textarea.metadata-share-contacts')
#getting emails from email list--------------------------------------------------------------
put_email.send_keys(li_email[0])
time.sleep(2)
driver.find_element_by_css_selector('.yt-uix-button.yt-uix-button-size-default.yt-uix-button-primary.sharing-dialog-button.sharing-dialog-ok').click()
time.sleep(4)
driver.find_element_by_xpath('.//*[#id="video-header"]/div/button[2]/span').click()
time.sleep(10)
#for notifications and alters--------------------------------------------
try:
driver.switch_to.alert.accept()
except NoAlertPresentException:
pass
except UnexpectedAlertPresentException:
pass
except:
traceback.print_exc
pass
print 'Row '+str(i)+' Successfully Updated. \n'
time.sleep(120) #while loop sleep for 20minuts
This is the error I got:
Traceback (most recent call last):
File "<stdin>", line 2, in <module>
File "<string>", line 56, in parse
File "<string>", line 35, in parse
cElementTree.ParseError: no element found: line 1, column 0
For some reason cell = workSheet.find(b) fails. Could be bad data in there; without seeing the input it's anyone's guess.
Since you already know the row number, you can avoid using cell = workSheet.find(b) by simply keeping track of the columns you're searching through and finally calling workSheet.update_cell(i, col, '') after copying the data.

Selenium and PhantomJS Error: "Cannot connect to GhostDriver"

I am running Selenium and PhantomJS to input search terms into a website and retrieve the number of hits for each search term. I have to do this 130,000+ times, so the code has been running nicely for a day until suddenly the program broke with the following error:
Traceback (most recent call last):
File "CBBPlyNwsScrape.py", line 82, in <module>
browser = webdriver.PhantomJS()
File "/Library/Python/2.7/site-packages/selenium/webdriver/phantomjs/webdriver.py", line 50, in __init__
self.service.start()
File "/Library/Python/2.7/site-packages/selenium/webdriver/phantomjs/service.py", line 69, in start
raise WebDriverException("Can not connect to GhostDriver")
selenium.common.exceptions.WebDriverException: Message: 'Can not connect to GhostDriver'
I'm running this on Mac OSX and Python 2.7.3. I have the latests versions of Selenium and PhantomJS installed. Can anyone tell me what is going on and why GhostDriver was working fine for so long and suddenly stopped?
In the ghostdriver.log file, this is all it contains:
PhantomJS is launching GhostDriver...
[ERROR - 2013-12-01T05:14:34.491Z] GhostDriver - Main - Could not start Ghost Driver => {
"message": "Could not start Ghost Driver",
"line": 82,
"sourceId": 4445044288,
"sourceURL": ":/ghostdriver/main.js",
"stack": "Error: Could not start Ghost Driver\n at :/ghostdriver/main.js:82",
"stackArray": [
{
"sourceURL": ":/ghostdriver/main.js",
"line": 82
}
]
}
Thanks
Installing latest phantom js fixed this error, this was happening with default ubuntu 12.04 phantomjs destro
I was having the same problem. I don't know why the program has trouble calling the phantomJS webdriver, but the answer is to write a simple exception WebDriverException. This following code did the trick for me
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException, WebDriverException
import unittest, time, re, urllib2
f = open("mother.txt","r") #opens file with name of "test.txt"
l = "1"
m = "2"
n = "3"
aTuple = ( l, m, n ) # create tuple
e = int(0)
for line in f:
e += 1
try:
h = str(e)
j = line
g = open("yes4/" + h + ".txt","w") #opens file with name of "test.txt"
for item in aTuple:
driver = webdriver.PhantomJS('phantomjs')
base_url = j + item
verificationErrors = []
accept_next_alert = True
driver.get(base_url)
elem=driver.find_element_by_id("yelp_main_body")
source_code=elem.get_attribute("outerHTML").encode('utf-8').strip()
g.write(source_code)
driver.quit()
except WebDriverException:
print "e"
h = str(e)
j = line
g = open("yes4/" + h + ".txt","w") #opens file with name of "test.txt"
for item in aTuple:
driver = webdriver.PhantomJS('phantomjs')
base_url = j + item
verificationErrors = []
accept_next_alert = True
driver.get(base_url)
elem=driver.find_element_by_id("yelp_main_body")
source_code=elem.get_attribute("outerHTML").encode('utf-8').strip()
g.write(source_code)
driver.quit()
else:
print h

Categories