Im trying to translate user comments from tripadvisor.
My code :-
1.]Selects only portuguese comments( from language dropdown),
2.]Then expands each of the comments,
3.]Then saves all these expanded comments in a list
4.]Then translates them into english & prints on screen
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
com_=[] # To save translated comments
expanded_comments=[] # To save expanded comments
driver = webdriver.Chrome("C:\Users\shalini\Downloads\chromedriver_win32\chromedriver.exe")
driver.maximize_window()
def expand_reviews(driver):
# TRYING TO EXPAND REVIEWS (& CLOSE A POPUP)
try:
driver.find_element_by_class_name("moreLink").click()
except:
print "err"
try:
driver.find_element_by_class_name("ui_close_x").click()
except:
print "err"
try:
driver.find_element_by_class_name("moreLink").click()
except:
print "err3"
def save_comments(driver):
expand_reviews(driver)
# SELECTING ALL EXPANDED COMMENTS
expanded_com_elements=driver.find_elements_by_class_name("entry")
time.sleep(3)
for i in expanded_com_elements:
expanded_comments.append(i.text)
# SELECTING ALL GOOGLE-TRANSLATOR links
gt= driver.find_elements(By.CSS_SELECTOR,".googleTranslation>.link")
# NOW PRINTING TRANSLATED COMMENTS
for i in gt:
try:
driver.execute_script("arguments[0].click()",i)
#i.click().perform()
com=driver.find_element_by_class_name("ui_overlay").text
com_.append(com)
time.sleep(5)
driver.find_element_by_class_name("ui_close_x").click().perform()
time.sleep(5)
except Exception as e:
pass
#print e
for i in range(282):
page=i*10
url="https://www.tripadvisor.com/Airline_Review-d8729164-Reviews-Cheap-Flights-or"+str(page)+"-TAP-Portugal#REVIEWS"
driver.get(url)
wait = WebDriverWait(driver, 10)
if i==0:
# SELECTING PORTUGUESE COMMENTS ONLY # Run for one time then iterate over pages
try:
langselction = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "span.sprite-date_picker-triangle")))
langselction.click()
driver.find_element_by_xpath("//div[#class='languageList']//li[normalize-space(.)='Portuguese first']").click()
time.sleep(5)
except Exception as e:
print e
save_comments(driver)
================ERROR=================
expanded_comments return empty list. Some comments get saved, some get skipped.
First page is saved properly (all comments expanded), but thereafter only first comment gets saved, without being expanded. But translated comments from all pages get saved properly in com_
I have changed your code and now it's working.
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome("./chromedriver.exe")
driver.maximize_window()
url="https://www.tripadvisor.com/Airline_Review-d8729164-Reviews-Cheap-Flights-TAP-Portugal#REVIEWS"
driver.get(url)
wait = WebDriverWait(driver, 10)
# SELECTING PORTUGUESE COMMENTS ONLY
#show_lan = driver.find_element_by_xpath("//div[#class='languageList']/ul/li[contains(text(),'Portuguese first')]")
try:
langselction = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "span.sprite-date_picker-triangle")))
langselction.click()
driver.find_element_by_xpath("//div[#class='languageList']//li[normalize-space(.)='Portuguese first']").click()
time.sleep(5)
except Exception as e:
print e
# TRYING TO EXPAND REVIEWS (& CLOSE A POPUP)
try:
driver.find_element_by_class_name("moreLink").click()
except:
print "err"
try:
driver.find_element_by_class_name("ui_close_x").click()
except:
print "err"
try:
driver.find_element_by_class_name("moreLink").click()
except:
print "err3"
# SELECTING ALL EXPANDED COMMENTS
expanded_com_elements=driver.find_elements_by_class_name("entry")
expanded_comments=[]
time.sleep(3)
for i in expanded_com_elements:
expanded_comments.append(i.text)
# SELECTING ALL GOOGLE-TRANSLATOR links
gt= driver.find_elements(By.CSS_SELECTOR,".googleTranslation>.link")
# NOW PRINTING TRANSLATED COMMENTS
for i in gt:
try:
driver.execute_script("arguments[0].click()",i)
#i.click().perform()
print driver.find_element_by_class_name("ui_overlay").text
time.sleep(5)
driver.find_element_by_class_name("ui_close_x").click().perform()
time.sleep(5)
except Exception as e:
pass
#print e
Related
I don't receive an error or anything to tell me what's wrong, the list I'm appending the value to is just coming up empty. In my code, getting the values for variables page_cards, ticker, and optCriteria all work. I've tried following, following-sibling, and several other variations with no luck.
try:
page_cards = driver.find_elements_by_xpath('//article[#data-testid="tweet"]')
except NoSuchElementException:
continue
for card in page_cards:
try:
ticker = card.find_element_by_xpath('//span/a[starts-with(text(),"$")]').text.replace('$', '')
optCriteria = card.find_element_by_xpath('//span/a[starts-with(text(),"$")]'
'/../following-sibling::span').text.split('\n')[0].replace('-', '').replace('$', '')
emoji = card.find_element_by_xpath('//span/a[starts-with(text(),"$")]/..//'
'following-sibling::img[#alt= "Ox" OR #alt= "Bear face"]/#alt')
tradeCriteria = str(ticker+optCriteria+emoji)
except NoSuchElementException:
continue
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.support.ui import Select, WebDriverWait
def wait_element_visible(self, delay, *locator):
WebDriverWait(driver, delay).until(
ec.visibility_of_element_located(locator)
)
Ok so i have this code to scrape this link.... https://datacentersupport.lenovo.com/gb/en/products/storage/lenovo-storage/s3200/70l8/parts/display/compatible. The code I have scrapes all the details perfectly.......except the substitutes. Here's the full code. Have I missed something?
from selenium import webdriver
from time import sleep
import pandas as pd
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
# initializing webdriver
driver = webdriver.Chrome(executable_path="~~chromedriver.exe")
url = " https://datacentersupport.lenovo.com/gb/en/products/storage/lenovo-storage/s3200/70l8/parts/display/compatible."
driver.get(url)
sleep(5)
results = []
#getting breadcrumbs
bread1 = driver.find_element_by_xpath("//span[#class='prod-catagory-name']")
bread2 = driver.find_element_by_xpath("//span[#class='prod-catagory-name']/a")
#grabbing table data and navigating
pages = int(driver.find_element_by_xpath("//div[#class='page-container']/span[#class='icon-s-right active']/preceding-sibling::span[1]").text)
num = pages -1
for _ in range(pages):
rows = driver.find_elements_by_xpath("//table/tbody/tr/td[2]/div")
for row in rows:
parts = row.text
results.append([url,bread1.text,parts])
try:
for element in WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "span[class='icon-s-down']"))):
driver.execute_script("arguments[0].click();", element)
sleep(5)
substitute = WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.XPATH, "//span[#class='icon-s-up']//following::tr[3]/td[contains(#class,'enabled-border')]//div[text()]")))
for sub in substitute:
subs = sub.text
results.append(subs)
except TimeoutException:
pass
except NoSuchElementException:
break
finally:
try:
pagination = driver.find_element_by_xpath("//div[#class='page-container']/span[#class='icon-s-right active']").click()
sleep(3)
except NoSuchElementException:
break
df = pd.DataFrame(results)
df.to_csv('datacenter2.csv', index=False)
driver.quit()
The results were far from pleasing. I know am missing something within the loops. But am not sure what. Any suggestion would be highly appreciated.
This is the result I get :
I need to print the code alongside the numbers in the last column for each row it scrapes
So I am trying to scrape usernames and comments from multiple posts. Using this code below.
from selenium.webdriver.common.by import By
from selenium import webdriver
import time
import sys
import pandas as pd
from pandas import ExcelWriter
import os.path
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
url=['https://www.instagram.com/p/CRLe53_hmMH','https://www.instagram.com/p/CRX7VL1sL54/?utm_medium=share_sheet',
'https://www.instagram.com/p/CRVB7ykM7-R/?utm_medium=share_sheet', 'https://www.instagram.com/p/CRQ9Bq5M6ce/?utm_medium=share_sheet',
'https://www.instagram.com/p/CRQT1BJMmSi/?utm_medium=share_sheet', 'https://www.instagram.com/p/CM8T3HgMQG0/?utm_medium=copy_link'
'https://www.instagram.com/p/COrn5fYs78O/?utm_medium=share_sheet']
user_names = []
user_comments = []
driver = driver = webdriver.Chrome('E:/chromedriver')
driver.get(url[0])
time.sleep(3)
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']")))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='password']")))
username.clear()
username.send_keys('myuname')
password.clear()
password.send_keys('mypassword')
Login_button = WebDriverWait(driver, 2).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
time.sleep(4)
not_now = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
not_now2 = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
for n in url:
try:
driver.get(n)
time.sleep(3)
load_more_comment = driver.find_element_by_class_name('glyphsSpriteCircle_add__outline__24__grey_9')
print("Found {}".format(str(load_more_comment)))
i = 0
while load_more_comment.is_displayed() and i < 10:
load_more_comment.click()
time.sleep(1.5)
load_more_comment = driver.find_element_by_class_name('glyphsSpriteCircle_add__outline__24__grey_9')
print("Found {}".format(str(load_more_comment)))
i += 1
user_names.pop(0)
user_comments.pop(0)
except Exception as e:
print(e)
pass
comment = driver.find_elements_by_class_name('gElp9 ')
for c in comment:
container = c.find_element_by_class_name('C4VMK')
name = container.find_element_by_class_name('_6lAjh ').text
content = container.find_element_by_tag_name('span').text
content = content.replace('\n', ' ').strip().rstrip()
user_names.append(name)
user_comments.append(content)
print(content)
user_names.pop(0)
user_comments.pop(0)
#export(user_names, user_comments)
driver.close()
df = pd.DataFrame(list(zip(user_names, user_comments)),
columns =['Name', 'Comments'])
#df.to_excel('ujicoba_gabung_IG_6.xlsx')
print(df)
But somehow instead of returning username and comment, both user_names and user_comments return usernames. Where did I make a mistake?
Here Are My outputs
I think my problem is on the for loop where I declare the container as C4VMK. But I inspected the element on Instagram it is already the same
There are two span in C4VMK class. First in h3 -> first div -> span and second is that one you want.
For getting the second span that is the comment, replace your code with below and get the second element.
content = container.find_elements_by_tag_name('span')[1].text
Your container is correct. However, when you search for a span by tag name like this:
content = container.find_element_by_tag_name('span').text
Selenium will find the first span that is under the content. Which in this case is the username span with the class 'Jv7Aj mArmR MqpiF '.
What you are looking for is the other span that I highlighted in the image, which is a direct child of the container with an empty class.
You can select it like this:
content = container.find_element_by_xpath("/span[#class='']")
I verify if Autocomplte works well or not. I send the keys but he does not select the required element. Finally I want to print the URL of the page that appear after finding the required element and pressing on it. I recieve only this result:
Ran 1 test in 33.110s
OK
Process finished with exit code 0
Message:
def test_autocomplet(self):
try:
driver = webdriver.Chrome()
self.driver=webdriver.Chrome()
url = self.driver.get("http://automationpractice.com/index.php")
self.driver.maximize_window()
Serach_text_box=self.driver.find_element_by_id("search_query_top")
Serach_text_box.send_keys("Printed")
Serach_text_box.send_keys(Keys.ARROW_DOWN)
five_option= WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH,"//*[contains(text(),'Dress')]")))
five_option.send_keys(Keys.ENTER)
print self.driver.current_url
self.assertEqual("http://automationpractice.com/index.php?id_product=3&controller=product",self.driver.current_url, "This Test case is fallied")
except NoSuchElementException as e:
print (e)
except AssertionError as e:
print (e)
except TimeoutException as e:
print (e)
I want to know if any thing in the code is wrong and why he does not select and click on the required element and print the URL of the next page that appear after click on the required element.
I would be thanksfull for any help.
I put here code which I used to test this page.
To select item on menu I can use ARROW_DOWN but it doesn't give information about selected item.
Second method is to search
//div[#class='ac_results']//li[contains(text(),'Dress')]
or at least
//li[contains(text(),'Dress')]
eventually
//div[#class='ac_results']//li
to access item in menu. And then I can get full text .text or highlighted part .//strong
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import time
try:
#driver = webdriver.Chrome()
driver = webdriver.Firefox()
url = driver.get("http://automationpractice.com/index.php")
#driver.maximize_window()
search_text_box = driver.find_element_by_id("search_query_top")
search_text_box.send_keys("Printed")
time.sleep(1) # page display (and update) autocompletion when you make little longer delay
# --- select using arrow key ---
# move selection on list and accept it
#search_text_box.send_keys(Keys.ARROW_DOWN)
#search_text_box.send_keys(Keys.ARROW_DOWN)
#search_text_box.send_keys(Keys.ARROW_DOWN)
#search_text_box.send_keys(Keys.ENTER)
# OR
# --- select using tag `<li>` and `text()` in autocompletion ---
# click on first matching item on list
#one_option = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//li[contains(text(),'Dress')]")))
one_option = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//div[#class='ac_results']//li[contains(text(),'Dress')]")))
print(' tag:', one_option.tag_name)
print('text:', one_option.text)
print('bold:', one_option.find_element_by_xpath('.//strong').text)
one_option.click()
# OR
# --- get all elements in autocompletion using `<li>` tag ---
# get many matching items and use [index] to click on some item on list
#one_option = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//li[contains(text(),'Dress')]")))
#all_options = driver.find_elements_by_xpath("//li[contains(text(),'Dress')]")
#for option in all_options:
# print(option.tag_name, ':', option.text)
#all_options[1].click()
print(' current:', driver.current_url)
print('expected:', "http://automationpractice.com/index.php?id_product=3&controller=product")
print('the same:', driver.current_url == "http://automationpractice.com/index.php?id_product=3&controller=product")
assert "http://automationpractice.com/index.php?id_product=3&controller=product" == driver.current_url, "This Test case is fallied"
#assertEqual("http://automationpractice.com/index.php?id_product=3&controller=product", self.driver.current_url, "This Test case is fallied")
except NoSuchElementException as e:
print('NoSuchElementException:', e)
except TimeoutException as e:
print('TimeoutException:', e)
except AssertionError as e:
print('AssertionError:', e)
i am making a whatsapp bot for my friends birthday i want to bombard him with messages :) ,
can u please help me in solving this problem :) ?
i have tried replacing \n with \r\n but was unable to simulate shift+enter
from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
import socket
import time
handl=open('message.txt','r')
message_text=handl.read().replace('\n','\r\n')
no_of_message=1
filename='contacts.txt'
handle = open(filename,'r')
moblie_no_list=list() # list of phone number can be of any length
for number in handle:
if len(number)==12:
moblie_no_list.append(number)
elif len(number)==13:
moblie_no_list.append(number.replace('+',''))
elif len(number) ==10:
number='91'+number
moblie_no_list.append(number)
else:
pass
def element_presence(by,xpath,time):
element_present = EC.presence_of_element_located((By.XPATH, xpath))
WebDriverWait(driver, time).until(element_present)
def is_connected():
try:
# connect to the host -- tells us if the host is actually
# reachable
socket.create_connection(("www.google.com", 80))
return True
except :
is_connected()
driver = webdriver.Firefox()
driver.get("http://web.whatsapp.com")
time.sleep(10) #wait time to scan the code in second
def send_whatsapp_msg(phone_no,text):
driver.get("https://web.whatsapp.com/send?phone=
{}&source=&data=#".format(phone_no))
try:
driver.switch_to_alert().accept()
except:
pass
try:
element_presence(By.XPATH,'//*
[#id="main"]/footer/div[1]/div[2]/div/div[2]',30)
txt_box=driver.find_element(By.XPATH , '//*
[#id="main"]/footer/div[1]/div[2]/div/div[2]')
global no_of_message
for x in range(no_of_message):
txt_box.send_keys(text)
txt_box.send_keys("\n")
except :
print("invailid phone no :"+str(phone_no))
for moblie_no in moblie_no_list:
try:
send_whatsapp_msg(moblie_no,message_text)
except Exception as e:
time.sleep(10)
is_connected()
I Expected Sending Multiline Messages But It Splits Em Into Single Message each .
txt_box.send_keys(Keys.SHIFT,'\n')
Here's What I Did To Solve The Question
Keys.Shift Will Manipulate Press Of Shift Key Followed By '\n' Carriage Return.
Link To Complete Code On Github
Complete Solved Code Is Here :) :
from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
import socket
import time
handl=open('message.txt','r', encoding="utf8")
message_text=handl.read()
messages=message_text.split('\n')
print(messages)
no_of_message=1
filename='contacts.txt'
handle = open(filename,'r')
moblie_no_list=list() # list of phone number can be of any length
for number in handle:
if len(number)==12:
moblie_no_list.append(number)
elif len(number)==13:
moblie_no_list.append(number.replace('+',''))
elif len(number) ==10:
number='91'+number
moblie_no_list.append(number)
else:
pass
def element_presence(by,xpath,time):
element_present = EC.presence_of_element_located((By.XPATH, xpath))
WebDriverWait(driver, time).until(element_present)
def is_connected():
try:
# connect to the host -- tells us if the host is actually
# reachable
socket.create_connection(("www.google.com", 80))
return True
except :
is_connected()
driver = webdriver.Firefox()
driver.get("http://web.whatsapp.com")
time.sleep(10) #wait time to scan the code in second
def send_whatsapp_msg(phone_no,text):
driver.get("https://web.whatsapp.com/send?phone={}&source=&data=#".format(phone_no))
try:
driver.switch_to_alert().accept()
except:
pass
try:
element_presence(By.XPATH,'//*[#id="main"]/footer/div[1]/div[2]/div/div[2]',30)
txt_box=driver.find_element(By.XPATH , '//*[#id="main"]/footer/div[1]/div[2]/div/div[2]')
global messages
for message in messages:
txt_box.send_keys(message)
txt_box.send_keys(Keys.SHIFT,'\n')
txt_box.send_keys("\n")
except :
print("invailid phone no :"+str(phone_no))
for moblie_no in moblie_no_list:
try:
send_whatsapp_msg(moblie_no,message_text)
except Exception as e:
time.sleep(10)
is_connected()