I am running Selenium and PhantomJS to input search terms into a website and retrieve the number of hits for each search term. I have to do this 130,000+ times, so the code has been running nicely for a day until suddenly the program broke with the following error:
Traceback (most recent call last):
File "CBBPlyNwsScrape.py", line 82, in <module>
browser = webdriver.PhantomJS()
File "/Library/Python/2.7/site-packages/selenium/webdriver/phantomjs/webdriver.py", line 50, in __init__
self.service.start()
File "/Library/Python/2.7/site-packages/selenium/webdriver/phantomjs/service.py", line 69, in start
raise WebDriverException("Can not connect to GhostDriver")
selenium.common.exceptions.WebDriverException: Message: 'Can not connect to GhostDriver'
I'm running this on Mac OSX and Python 2.7.3. I have the latests versions of Selenium and PhantomJS installed. Can anyone tell me what is going on and why GhostDriver was working fine for so long and suddenly stopped?
In the ghostdriver.log file, this is all it contains:
PhantomJS is launching GhostDriver...
[ERROR - 2013-12-01T05:14:34.491Z] GhostDriver - Main - Could not start Ghost Driver => {
"message": "Could not start Ghost Driver",
"line": 82,
"sourceId": 4445044288,
"sourceURL": ":/ghostdriver/main.js",
"stack": "Error: Could not start Ghost Driver\n at :/ghostdriver/main.js:82",
"stackArray": [
{
"sourceURL": ":/ghostdriver/main.js",
"line": 82
}
]
}
Thanks
Installing latest phantom js fixed this error, this was happening with default ubuntu 12.04 phantomjs destro
I was having the same problem. I don't know why the program has trouble calling the phantomJS webdriver, but the answer is to write a simple exception WebDriverException. This following code did the trick for me
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException, WebDriverException
import unittest, time, re, urllib2
f = open("mother.txt","r") #opens file with name of "test.txt"
l = "1"
m = "2"
n = "3"
aTuple = ( l, m, n ) # create tuple
e = int(0)
for line in f:
e += 1
try:
h = str(e)
j = line
g = open("yes4/" + h + ".txt","w") #opens file with name of "test.txt"
for item in aTuple:
driver = webdriver.PhantomJS('phantomjs')
base_url = j + item
verificationErrors = []
accept_next_alert = True
driver.get(base_url)
elem=driver.find_element_by_id("yelp_main_body")
source_code=elem.get_attribute("outerHTML").encode('utf-8').strip()
g.write(source_code)
driver.quit()
except WebDriverException:
print "e"
h = str(e)
j = line
g = open("yes4/" + h + ".txt","w") #opens file with name of "test.txt"
for item in aTuple:
driver = webdriver.PhantomJS('phantomjs')
base_url = j + item
verificationErrors = []
accept_next_alert = True
driver.get(base_url)
elem=driver.find_element_by_id("yelp_main_body")
source_code=elem.get_attribute("outerHTML").encode('utf-8').strip()
g.write(source_code)
driver.quit()
else:
print h
Related
the bot enters the profile, accesses the post but when pulling the comments from the txt file it closes, without making any comments.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from time import sleep
import instaloader
import random
import os.path
driver = webdriver.Chrome(r"C:\\Users\\Windows\\Downloads\\chromedriver.exe")
def sorteioInstagram():
abrindo_instagram("inspiracaobiel", "biel123", "https://www.instagram.com/p/CoI52RJOqnT/")
if os.path.isfile('seguidores.txt'):
print("Arquivo de Seguidores já carregado...")
else:
pegar_seguidores('bielbibiel')
comentandoPost()
def abrindo_instagram(username,password, url):
print("Abrindo Instagram...")
driver.get("https://www.instagram.com/")
sleep(2)
print("Fazendo Login no Instagram...")
driver.find_element(By.XPATH,"//input\[#name="username"\]").send_keys(username)
driver.find_element(By.XPATH,"//input\[#name="password"\]").send_keys(password)
driver.find_element(By.XPATH,"//button\[#type="submit"\]").click()
sleep(10)
print("Negando Solicitação de Segurança Instagram...")
driver.find_element(By.XPATH,"//button[text()='Agora não']").click()
sleep(5)
driver.find_element(By.XPATH,"//button[text()='Agora não']").click()
sleep(4)
print("Acessando o Post do sorteio...")
driver.get(url)
def pegar_seguidores(usuario):
L = instaloader.Instaloader()
L.login('inspiracaobiel', 'biel123')
profile = instaloader.Profile.from_username(L.context, usuario)
print(f"Salvando Seguidores de {usuario}...")
#Salvando Seguidores em Arquivo .TXT
file = open("seguidores.txt", "a+")
for followee in profile.get_followers():
username = "#" + followee.username
file.write(username + "\n")
file.close()
def comentandoPost():
z = 0
while 1 == 1:
cmt = driver.find_element(By.XPATH,'//*\[#id="react-root"\]/section/main/div/div\[1\]/article/div\[3\]/section\[3\]/div/form/textarea')
cmt.click()
comment = lendo_arquivo()
driver.find_element(By.XPATH,'//*\[#id="react-root"\]/section/main/div/div\[1\]/article/div\[3\]/section\[3\]/div/form/textarea').send_keys(comment)
driver.find_element(By.XPATH,'//*\[#id="react-root"\]/section/main/div/div\[1\]/article/div\[3\]/section\[3\]/div/form/textarea').send_keys(' ')
sleep(10)
driver.find_element(By.XPATH,'//*\[#id="react-root"\]/section/main/div/div\[1\]/article/div\[3\]/section\[3\]/div/form/textarea').send_keys(Keys.ENTER)
sleep(10)
driver.find_element(By.XPATH,'//\*\[#id="react-root"\]/section/main/div/div\[1\]/article/div\[3\]/section\[3\]/div/form/textarea').send_keys(Keys.ENTER)
z += 1
print(f"{z}")
sleep(60)
def lendo_arquivo():
with open("seguidores.txt", "r") as file:
allText = file.read()
words = list(map(str, allText.split()))
return random.choice(words)
sorteioInstagram()
```
ERROR:
Traceback (most recent call last):
File "c:\\Users\\gabri\\Downloads\\Codigo Comentarios\\venv\\Scripts\\main.py", line 76, in \<module\>
sorteioInstagram()
File "c:\\Users\\gabri\\Downloads\\Codigo Comentarios\\venv\\Scripts\\main.py", line 17, in sorteioInstagram
pegar_seguidores('bielbibiel')
File "c:\\Users\\gabri\\Downloads\\Codigo Comentarios\\venv\\Scripts\\main.py", line 41, in pegar_seguidores
L.login('inspiracaobiel', 'biel123')
File "C:\\Users\\gabri\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\instaloader\\instaloader.py", line 634, in login
self.context.login(user, passwd)
File "C:\\Users\\gabri\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\instaloader\\instaloadercontext.py", line 239, in login
raise ConnectionException("Login: Checkpoint required. Point your browser to "
instaloader.exceptions.ConnectionException: Login: Checkpoint required. Point your browser to https://www.instagram.com/challenge/action/AXG6u8rHRprRccgzcgLKpQNJzv0wH2s1vajDDBvJi2xj-ypRREOtDgZK3e5Ee8HekhTocA0/AfxgQISXQXjHI8kasJEBP020fZ1GKKqFXXJGeaFzdDei1KKj2Pc3OVuZl5K_J3Og2Mxa0Yx64gubOg/ffc_XxXKP7ukaNEpowIhzsecXIF8lbX5oShdPs03HikkaCikTbJ50ZWn38x98bzTC5ZI/ - follow the instructions, then retry.
I tried to fix the comment part but I couldn't, but I couldn't resolve the error, it keeps closing and not making any comment.
I am trying to write changes to a file but I get the error :
'charmap' codec can't encode character '\u0159' in position 17: character maps to <undefined>
Other people said that you need to set the encoding to UTF-8 and so I set :
with open('ScrapedContent.csv', 'w+', newline='', encoding="utf-8") as write
After this is done the text is no longer being written to the ScrapedContent.csv file and the whole program becomes pretty much useless afterwards. Here is my code :
(I am providing the entire code since I don't know where the issue happens)
Desired solution :
There are "special" characters written to the file such as "č, ř, š". These are not actually special but rather normal in the 21st century but unfortunately it seems like that computers are still having a hard time understanding.
So in any case I need to write those characters to the file so they don't get broken. I don't care about what has to be done as long as the final file provides the result. I have spend about 6 hours trying to fix this now and I got nowhere.
This is the complete error output :
Traceback (most recent call last):
File "E:\Projects\Reality Scrapers\SRealityContentScraper\main.py", line 113, in <module>
writer.writerow([title.text, offers.text, address.text, phone_number, email])
File "C:\Users\workstationone\AppData\Local\Programs\Python\Python39\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode character '\u011b' in position 57: character maps to <undefined>
This is the code :
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import WebDriverException, TimeoutException
from platform import system
from os import getcwd, getlogin
import csv
cwd = getcwd()
os = system()
user = getlogin()
browser = input("Browser name ex.: Chromium | Chrome | Firefox: ")
if os == "Linux":
if user == "root":
print(
"You are executing the script as root. Make sure that the profile folder is also located in the root directory.")
del user
if browser == "Firefox" or browser == "Firefox ESR" or browser == "Firefox Browser":
try:
if os == "Windows":
driver = webdriver.Firefox(executable_path=cwd + "/geckodriver.exe")
else:
driver = webdriver.Firefox(executable_path=cwd + "/geckodriver")
except WebDriverException:
print("Warning 10: Firefox is not installed in the default location")
bin_location = input("Firefox executable location: ")
binary = FirefoxBinary(bin_location)
if os == "Windows":
driver = webdriver.Firefox(executable_path=cwd + "/geckodriver.exe", firefox_binary=bin_location)
else:
driver = webdriver.Firefox(executable_path=cwd + "/geckodriver", firefox_binary=bin_location)
del bin_location
elif browser == "Chrome" or browser == "Chrome Browser" or browser == "Google Chrome" or browser == "Chromium" or browser == "Chromium Browser":
try:
if os == "Windows":
driver = webdriver.Chrome(executable_path=cwd + "/chromedriver.exe")
else:
driver = webdriver.Chrome(executable_path=cwd + "/chromedriver")
except WebDriverException:
print("Warning 11: Chrome/Chromium is not installed in the default location")
bin_location = input("Chrome/Chromium executable location: ")
options = Options()
options.binary_location = bin_location
if os == "Windows":
driver = webdriver.Chrome(executable_path=cwd + "/chromedriver.exe")
else:
driver = webdriver.Chrome(executable_path=cwd + "/chromedriver")
del bin_location
else:
print("Error 10: Invalid browser selected")
input("Press ENTER to exit: ")
exit()
wait = WebDriverWait(driver, 10)
with open('links.csv', 'w+', newline='', encoding="utf-8") as write:
driver.get("https://www.sreality.cz/adresar")
writer = csv.writer(write)
page_spawn = 0
while page_spawn == 0:
try:
links = wait.until(ec.presence_of_all_elements_located((By.CSS_SELECTOR, "h2.title > a")))
# print(len(links))
for link in links:
print(link.get_attribute("href"))
writer.writerow([link.get_attribute("href")])
wait.until(ec.element_to_be_clickable(
(By.CSS_SELECTOR, "a.btn-paging-pn.icof.icon-arr-right.paging-next:not(.disabled"))).click()
except TimeoutException:
page_spawn = 1
break
with open('links.csv') as read:
reader = csv.reader(read)
link_list = list(reader)
with open('ScrapedContent.csv', 'w+', newline='', encoding="utf-8") as write:
writer = csv.writer(write)
for link in link_list:
driver.get(', '.join(link))
title = wait.until(ec.presence_of_element_located((By.CSS_SELECTOR, "h1.page-title span.text.ng-binding")))
offers = wait.until(ec.presence_of_element_located(
(By.CSS_SELECTOR, "a.switcher.ng-binding.ng-scope span.ng-binding.ng-scope")))
address = wait.until(
ec.presence_of_element_located((By.CSS_SELECTOR, "tr.c-aginfo__table__row td.ng-binding")))
try:
wait.until(
ec.presence_of_element_located((By.CSS_SELECTOR, "button.value.link.ng-binding.ng-scope"))).click()
phone_number = wait.until(ec.presence_of_element_located((By.CSS_SELECTOR, "span.phone.ng-binding")))
except TimeoutException:
pass
try:
wait.until(ec.presence_of_element_located((By.CSS_SELECTOR, "button.value.link.ng-binding"))).click()
email = wait.until(ec.presence_of_element_located((By.CSS_SELECTOR, "a.value.link.ng-binding")))
except TimeoutException:
pass
try:
phone_number = phone_number.text
except AttributeError:
phone_number = " "
pass
try:
email = email.text
except AttributeError:
email = " "
pass
print(title.text, " ", offers.text, " ", address.text, " ", phone_number, " ", email)
try:
writer.writerow([title.text, offers.text, address.text, phone_number, email])
except Exception as e:
print (e)
driver.quit()
This is heavily based on this answer.
Basically, you can't directly write unicode characters using csv.
You need a helper function:
def utf8ify(l):
return [str(s).encode('utf-8') for s d]
Then when you write the row add:
writer.writerow(utf8ify([title.text, offers.text, address.text, phone_number, email]))
The answer I linked to is better than mine in every way. If you want to learn why this works, read that answer.
So I've found this code on GitHub for gathering IPs from: https://free-proxy-list.net/ and rotate them. But I get an error message when I try to run it.
I am using ChromeDriver 2.41 because I was first getting a differernt error regarding the Socks integer. Using ChromeDriver 2.41 has solved that, but I still can't get past this 'pxy' reference.
ALSO, pycharm is alerting me that 'pd' has a redeclared definition without usage. I'd really appreciate some help with the 'pxy' and 'pd' errors!
This is the code :
from selenium import webdriver
from selenium.webdriver.chrome.options import DesiredCapabilities
from selenium.webdriver.common.proxy import Proxy, ProxyType
import time
co = webdriver.ChromeOptions()
co.add_argument("log-level=3")
co.add_argument("--headless")
def get_proxies(co=co):
driver = webdriver.Chrome(chrome_options=co)
driver.get("https://free-proxy-list.net/")
PROXIES = []
proxies = driver.find_elements_by_css_selector("tr[role='row']")
for p in proxies:
result = p.text.split(" ")
if result[-1] == "yes":
PROXIES.append(result[0]+":"+result[1])
driver.close()
return PROXIES
ALL_PROXIES = get_proxies()
def proxy_driver(PROXIES, co=co):
prox = Proxy()
if PROXIES:
pxy = PROXIES[-1]
else:
print("--- Proxies used up (%s)" % len(PROXIES))
PROXIES = get_proxies()
prox.proxy_type = ProxyType.MANUAL
prox.http_proxy = pxy
prox.socks_proxy = pxy
prox.ssl_proxy = pxy
capabilities = webdriver.DesiredCapabilities.CHROME
prox.add_to_capabilities(capabilities)
driver = webdriver.Chrome(chrome_options=co, desired_capabilities=capabilities)
return driver
# --- YOU ONLY NEED TO CARE FROM THIS LINE ---
# creating new driver to use proxy
pd = proxy_driver(ALL_PROXIES)
# code must be in a while loop with a try to keep trying with different proxies
running = True
while running:
try:
mycodehere()
# if statement to terminate loop if code working properly
something()
# you
except:
new = ALL_PROXIES.pop()
# reassign driver if fail to switch proxy
pd = proxy_driver(ALL_PROXIES)
print("--- Switched proxy to: %s" % new)
This is the error I get:
Traceback (most recent call last):
File "scripts2.py", line 65, in <module>
pd = proxy_driver(ALL_PROXIES)
File "scripts2.py", line 53, in proxy_driver
prox.http_proxy = pxy
UnboundLocalError: local variable 'pxy' referenced before assignment
I'm a little confused because I thought 'pxy' is assigned under if PROXIES?
Try changing this below lines
current code:
if PROXIES:
pxy = PROXIES[-1] # script will fail if this condition not met
else:
print("--- Proxies used up (%s)" % len(PROXIES))
PROXIES = get_proxies()
Updated code:
# make sure to reset pxy to either null or empty
pxy = ''
if (PROXIES is None):
#print("--- Proxies used up (%s)" % len(PROXIES))
PROXIES = get_proxies()
pxy = PROXIES[-1]
# I would check if pxy is empty or not before doing assignment
if (pxy!=''):
#Then do the logic here
I'm running a script to collect information from various pages on a website.
#python2
from __future__ import division
from bs4 import BeautifulSoup
from pyvirtualdisplay import Display
from BeautifulSoup import SoupStrainer
import pandas as pd
import urllib,re,csv,os,urllib2,requests,itertools,pdfkit,time
import smtplib
import math
from selenium import webdriver
import requests.packages.urllib3
import requests
requests.packages.urllib3.disable_warnings()
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import WebDriverException
from datetime import datetime
os.environ["LANG"] = "en_US.UTF-8"
start_time = time.time()
os.chdir('DIRECTORY')
#import .csv with variables for fulls list
fulls = zip(orgs, terms, sites, smo_ids, year_i, year_start, year_end)
orgs2 = []
terms2 = []
sites2 = []
results2 = []
smo_ids2 = []
article_number = []
years2 = []
numbers = range(2000001)
numbers = numbers[0::200]
start_time = time.time()
display = Display(visible=0, size=(1600, 1200))
display.start()
otime = datetime.now()
startpoint = 1
for full in fulls:
site = full[2]
org = full[0]
smo_id = full[3]
term = full[1]
year = full[4]
driver = webdriver.Chrome(executable_path="/usr/local/bin/chromedriver")
try:
driver.get(site) #get original site info
except (WebDriverException, TimeoutException) as e:
print(e.Message)
print "REFRESHING PAGE"
driver.refresh(site)
source = driver.page_source
soup = BeautifulSoup(source, "html.parser")
soup2 = soup.encode("utf-8")
try:
resultno = re.findall('<h1 id="pqResultsCount">\n(.*?) result',soup2)
resultno = ''.join(resultno)
resultno = resultno.translate(None, "(){}<>,")
resultno = int(resultno)
except ValueError, e:
resultno = int(0)
no_pages = int(math.ceil(resultno/20))
an = re.findall('{"(.*?)markedlistcheckbox:markallitems',soup2)
an = ''.join(an)
an = re.findall('markAll":false,"formats":{(.*?)},"markURL"',an)
an = ''.join(an)
an = re.sub(r'":.+?,"', '', an)
an = an.translate(None, '"')
an = an.split(':', 1)[0]
an = an.split('MSTAR_')
an.pop(0)
for i in an:
article_number.append(i)
years2.append(year)
sites2.append(site)
orgs2.append(org)
smo_ids2.append(smo_id)
terms2.append(term)
#begin encryption search
encrypt = re.findall('id="searchForm"(.*?)/></div>',soup2)
encrypt = ''.join(encrypt)
encrypt
t_ac = re.findall('name="t:ac" type="hidden" value="(.*?)/',encrypt)
t_ac = ''.join(t_ac)
t_ac
t_formdata = re.findall('name="t:formdata" type="hidden" value="(.*?)"',encrypt)
t_formdata = ''.join(t_formdata)
t_formdata
#start page 2 stuff
for page in range(2,no_pages+1):
site_ = "https://WEBSITE.com/results:gotopage/" + str(page) + "?t:ac=" + t_ac + "/?t:formdata=" + t_formdata + ""
driver.get(site_) #get subsequent page info
source = driver.page_source # Here is your populated data for the page source
soup_ = BeautifulSoup(source, "html.parser")
soup2_ = soup_.encode("utf-8")
an_ = re.findall('{"(.*?)markedlistcheckbox:markallitems',soup2_)
an_ = ''.join(an_)
an_ = re.findall('markAll":false,"formats":{(.*?)},"markURL"',an_)
an_ = ''.join(an_)
an_ = re.sub(r'":.+?,"', '', an_)
an_ = an_.translate(None, '"')
an_ = an_.split(':', 1)[0]
an_ = an_.split('MSTAR_')
an_.pop(0)
for i_ in an_:
article_number.append(i_)
years2.append(year)
sites2.append(site)
orgs2.append(org)
smo_ids2.append(smo_id)
terms2.append(term)
driver.quit()
elapsed_time = time.time() - start_time
try:
ctime_1 = ctime
except:
ctime_1 = otime
m, s = divmod(elapsed_time, 60)
h, m = divmod(m, 60)
ctime = datetime.now()
diftime = ctime - ctime_1
diftime = str(diftime)
diftime = diftime[2:7]
ctime2 = str(ctime)
ctime2 = ctime2[11:19]
print "%d:%02d:%02d | %s | %s" % (h, m, s, ctime2, diftime)
print "%d: Page %d is complete" % (startpoint, startpoint)
if startpoint in numbers:
print "Sleeping for 10 seconds"
time.sleep(10)
startpoint += 1
article_info = zip(article_number, years2, sites2, orgs2, smo_ids2, terms2)
The code runs, but at various points (sometimes 20 mins into the run, sometimes 14 hours into it), I get the following error:
Traceback (most recent call last):
File "<stdin>", line 131, in <module>
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/chrome/webdriver.py", line 69, in __init__
desired_capabilities=desired_capabilities)
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py", line 151, in __init__
self.start_session(desired_capabilities, browser_profile)
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py", line 240, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py", line 308, in execute
self.error_handler.check_response(response)
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/errorhandler.py", line 194, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: session not created exception
from timeout: Timed out receiving message from renderer: 600.000
(Session info: chrome=64.0.3282.186)
(Driver info: chromedriver=2.35.528139 (47ead77cb35ad2a9a83248b292151462a66cd881),platform=Linux 4.13.0-36-generic x86_64)
I'm using current chrome and chromedriver, and I have tried this using selenium versions 3.9, 3.8, and 3.7. No matter what, I eventually get the above error.
Any ideas how to fix this error?
I made a program which gets one record from Google Sheet process on it then delete it and so on. If I update Google Sheet then the program will deduct record in the next loop and process on it and then delete,
but it runs only 1 or 2 hours and then program gives an error:
What can I add in my program so my program never stops?
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import traceback
import string
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from selenium.common.exceptions import NoAlertPresentException
from selenium.common.exceptions import UnexpectedAlertPresentException
Email=raw_input('Please Enter your Email: ')
password=raw_input('Please Enter Password: ')
print("\n******Don't Interrupt the Script******")
print('#script is Runing............\n')
chrome_options = webdriver.ChromeOptions() #going to chrome options
chrome_options.add_argument("--start-maximized")
prefs = {"profile.default_content_setting_values.notifications" : 2 #turn off all notifications
,"profile.managed_default_content_settings.images": 2} #disable images
chrome_options.add_experimental_option("prefs",prefs)
driver = webdriver.Chrome(chrome_options=chrome_options) # passing paramaters to chrome
driver.get('https://accounts.google.com')
time.sleep(3)
#giving Email-------------------
email = driver.find_element_by_id('Email')
email.send_keys(Email, Keys.RETURN)
#giving password----------------
time.sleep(3)
email = driver.find_element_by_id('Passwd')
email.send_keys(password, Keys.RETURN)
#credentials + attach with googleSheet------------------------------
scope = ['https://spreadsheets.google.com/feeds']
credentials = ServiceAccountCredentials.from_json_keyfile_name('stephens-31d8490b5bd2.json', scope)
google_sheet = gspread.authorize(credentials)
workSheet = google_sheet.open("Video Access Master Sheet").worksheet("Sheet1")
while True:
#fetch Records from Rows 2 to 50 and save on list-----------------
for i in range(2,51):
li_url=[]
li_email=[]
row=workSheet.row_values(i)
for b in row:
if 'youtu' in b:
li_url.append(b)
#find record which you append on list and then delete from googleSheet--------------------
cell = workSheet.find(b)
row = cell.row
col = cell.col
workSheet.update_cell(row,col, '')
print 'Fetching Values From Row '+str(i)+'....'
elif '#' in b:
li_email.append(b)
elif b=='':
continue
else:
continue
#*********************************************************
#getting length list of li_url and apply condition on it-----------------------------------------------
length=len(li_url)
if length==0:
continue
else:
try:
#getting URLs from list and put into driver.get---------------------------------------------------------
for a in li_url:
driver.get(a)
time.sleep(3)
driver.find_element_by_css_selector('.yt-uix-button-icon.yt-uix-button-icon-info.yt-sprite').click()
time.sleep(3)
driver.find_element_by_css_selector('.yt-uix-button.yt-uix-button-size-default.yt-uix-button-default.metadata-share-button').click()
time.sleep(2)
put_email=driver.find_element_by_css_selector('.yt-uix-form-input-textarea.metadata-share-contacts')
#getting emails from email list--------------------------------------------------------------
put_email.send_keys(li_email[0])
time.sleep(2)
driver.find_element_by_css_selector('.yt-uix-button.yt-uix-button-size-default.yt-uix-button-primary.sharing-dialog-button.sharing-dialog-ok').click()
time.sleep(4)
driver.find_element_by_xpath('.//*[#id="video-header"]/div/button[2]/span').click()
time.sleep(10)
#for notifications and alters--------------------------------------------
try:
driver.switch_to.alert.accept()
except NoAlertPresentException:
pass
except UnexpectedAlertPresentException:
pass
except:
traceback.print_exc
pass
print 'Row '+str(i)+' Successfully Updated. \n'
time.sleep(120) #while loop sleep for 20minuts
This is the error I got:
Traceback (most recent call last):
File "<stdin>", line 2, in <module>
File "<string>", line 56, in parse
File "<string>", line 35, in parse
cElementTree.ParseError: no element found: line 1, column 0
For some reason cell = workSheet.find(b) fails. Could be bad data in there; without seeing the input it's anyone's guess.
Since you already know the row number, you can avoid using cell = workSheet.find(b) by simply keeping track of the columns you're searching through and finally calling workSheet.update_cell(i, col, '') after copying the data.