from selenium.webdriver import Chrome
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get('https://www.suwon.go.kr/sw-www/www01/www01-01.jsp?q_tabs=open')
driver.implicitly_wait(3)
content = driver.find_element(By.TAG_NAME, 'iframe')
driver.switch_to.frame(content)
dropdown = Select(driver.find_element(By.XPATH,'//*[#id="dateType"]'))
dropdown.select_by_index(4)
driver.find_element(By.XPATH,'//*[#id="searchBtn"]').click()
complaint_list = []
contents_list = []
def complaint_Scraping():
for i in range(1,78):
titles = driver.find_elements(By.CSS_SELECTOR,'tbody > tr > td.left')
for complaint in titles:
name = BeautifulSoup(complaint.text, "html.parser")
complaint_list.append(name)
a = driver.find_elements(By.CSS_SELECTOR,' tbody > tr > td.left > a')
for content in a:
content.click()
time.sleep(2)
ancient_html = driver.find_elements(By.XPATH,'//*[#id="txt"]/div[1]/div[1]/div/div[2]')
content = BeautifulSoup(ancient_html.text, "html.parser")
contents_list.append(content)
driver.back()
complaint_Scraping()
i dont'know what's wrong in here
I can get all the titles, but It's not working if I try to get contents of the titles. first page may possible, but other things can't, please let me solve the problem.
ancient_html = driver.find_elements(By.XPATH,'//*[#id="txt"]/div[1]/div[1]/div/div[2]')
content = BeautifulSoup(ancient_html.text, "html.parser")
find_elements() returns a list of elements. ancient_html is a list.
Related
All the data in is populated from the first table. I cannot move to the next div and get the data of the td for each tr.
The site: https://asd.com/page/
Below is the code that I have written.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
my_url= "https://asd.com/page/asd"
driver.get(my_url)
boxes = driver.find_elements(By.CLASS_NAME, "col-md-4")
companies = []
company = {}
for box in boxes:
header = box.find_element(By.CLASS_NAME,"text-primary.text-uppercase")
company['name']= header.text
td= box
company['Type']= td.find_element(By.XPATH,"//div/div/div/table/tbody/tr[1]/td").text
company['Capital']= td.find_element(By.XPATH,"//div/div/div/table/tbody/tr[2]/td").text
company['Address'] = td.find_element(By.XPATH,"//div/div/div/table/tbody/tr[3]/td").text
company['Owner'] = td.find_element(By.XPATH,"//div/div/div/table/tbody/tr[4]/td").text
company['Co-Owner'] = td.find_element(By.XPATH,"//div/div/div/table/tbody/tr[5]/td").text
company['Duration'] = td.find_element(By.XPATH,"//div/div/div/table/tbody/tr[6]/td").text
company['Place'] = td.find_element(By.XPATH,"//div/div/div/table/tbody/tr[7]/td").text
company['Company ID'] = td.find_element(By.XPATH,"//div/div/div/table/tbody/tr[8]/td").text
companies.append(company)
print(company)
There are several issues here:
You need to add some delay between driver.get(my_url) and boxes = driver.find_elements(By.CLASS_NAME, "col-md-4") to let the elements loaded before getting the list of all of them.
text-primary.text-uppercase is actually 2 class names: text-primary and text-uppercase so you should use XPATH or CSS_SELECTOR to locate element by 2 class names, not by CLASS_NAME.
In order to locate elements inside another element you should use XPATH starting with a dot .
Your locators like //div/div/div/table/tbody/tr[1]/td are absolute while they should be calculated based on the parent box element.
No need to define td element, you can use the existing box element here.
Locators like //div/div/div/table/tbody/tr[1]/td can and should be improved.
You probably will need to scroll to boxes while iterating over them.
I think company = {} should be defined inside the loop.
This should work better:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
my_url= "https://monentreprise.bj/page/annonces"
driver.get(my_url)
wait = WebDriverWait(driver, 20)
actions = ActionChains(driver)
wait.until(EC.presence_of_element_located((By.CLASS_NAME, "col-md-4")))
time.sleep(2)
boxes = driver.find_elements(By.CLASS_NAME, "col-md-4")
companies = []
for box in boxes:
actions.move_to_element(box).perform()
time.sleep(0.3)
company = {}
header = box.find_element(By.XPATH,".//h5[#class='text-primary text-uppercase']")
company['name']= header.text
company['Objet']= box.find_element(By.XPATH,".//tr[1]/td").text
company['Capital']= box.find_element(By.XPATH,".//tr[2]/td").text
company['Siège Social'] = box.find_element(By.XPATH,".//tr[3]/td").text
company['Gérant'] = box.find_element(By.XPATH,".//tr[4]/td").text
company['Co-Gérant'] = box.find_element(By.XPATH,".//tr[5]/td").text
company['Durée'] = box.find_element(By.XPATH,".//tr[6]/td").text
company['Dépôt'] = box.find_element(By.XPATH,".//tr[7]/td").text
company['Immatriculation RCCM'] = box.find_element(By.XPATH,".//tr[8]/td").text
companies.append(company)
print(company)
Hello I did this code that returns to me a list of li , but I want to access to each a tag mentioned inside and open it , if you have any recommandation I would be very grateful
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import pandas as pd
import time
options = Options()
# Creating our dictionary
all_services = pd.DataFrame(columns=['Motif', 'Description'])
path = "C:/Users/Al4D1N/Documents/ChromeDriver_webscraping/chromedriver.exe"
driver = webdriver.Chrome(options=options, executable_path=path)
driver.get("https://www.mairie.net/national/acte-naissance.htm#plus")
list_of_services = driver.find_elements_by_css_selector(".list-images li")
I know that I need to iterate in each list_of_services Item , but I don't know how can I open each a tag since they all don't have classes or ids that can help me to make difference between them
This is one way to extract all of the links within the hrefs.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import pandas as pd
import time
options = Options()
# Creating our dictionary
all_services = pd.DataFrame(columns=['Motif', 'Description'])
path = "C:/Users/Al4D1N/Documents/ChromeDriver_webscraping/chromedriver.exe"
driver = webdriver.Chrome(options=options, executable_path=path)
driver.get("https://www.mairie.net/national/acte-naissance.htm#plus")
#Get all elements in class 'list-images'
list_of_services = driver.find_elements_by_class_name("list-images")
for service in list_of_services:
#In each element, select the atags
atags = service.find_elements_by_css_selector('a')
for atag in atags:
#In each atag, select the href
href = atag.get_attribute('href')
Output:
https://www.mairie.net/national/acte-mariage.htm#acte-naissance
https://www.mairie.net/national/acte-deces.htm#acte-naissance
https://www.mairie.net/national/carte-identite.htm#acte-naissance
https://www.mairie.net/national/passeport.htm#acte-naissance
https://www.mairie.net/national/casier-judiciaire.htm#acte-naissance
https://www.mairie.net/national/demande-carte-electorale.htm#acte-naissance
https://www.mairie.net/national/cadastre-plu.htm#acte-naissance
https://www.mairie.net/national/carte-grise-en-ligne-par-internet.htm#acte-naissance
https://www.mairie.net/national/certificat-non-gage.htm#acte-naissance
https://www.mairie.net/national/permis-conduire-delivrance.htm#acte-naissance
https://www.mairie.net/national/changement-adresse.htm#acte-naissance
hello i'm trying to parse all star reviews in text (4,1,4,2,etc.)
driver.get('https://www.amazon.com/gp/new-releases/kitchen/ref=zg_bs_tab_t_bsnr')
elements=WebDriverWait(driver,20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".a-icon-alt")))
for i in elements:
i = i.get_attribute("innerHTML")
i= i.split(' ')[0]
list3.append(i)
i want to parse Review Stars and if Review star is not exists instead of it print something.
Try:
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import time
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.set_window_size(1024, 600)
driver.maximize_window()
url = "https://www.amazon.com/gp/new-releases/kitchen/ref=zg_bs_tab_t_bsnr"
driver.get(url)
time.sleep(2)
soup=bs(driver.page_source,'html.parser')
for card in soup.findAll('span', {'class': 'aok-inline-block zg-item'}):
elem = card.find('span', {'class': 'a-icon-alt'})
if elem:
print(elem.text.split()[0])
else: print("no")
I am a beginner in Python-Selenium scraping. I want to scrape permalink of all the Quora answers posted under a question. So far I have created the following code snippet. But when I run it, it gives me only one link in the output. This is due to the fact that the page isn't loaded fully I guess. What should I do to get at least 100 permalinks to answers from the page source?
from selenium import webdriver
from selenium.webdriver.common.by import
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
driver_option = webdriver.ChromeOptions()
driver_option.add_argument(" — incognito")
chromedriver_path = './chromedriver'
def create_webdriver():
return webdriver.Chrome(executable_path=chromedriver_path, chrome_options=driver_option)
f = open('file_text.txt', 'w')
# Open the website
browser = create_webdriver()
browser.get("https://www.quora.com/How-do-I-prove-the-flat-earth-theory")
projects = browser.find_elements_by_xpath("//a[#class='answer_permalink']")
for proj in projects:
anslink = proj.get_attribute('href')
f.write(anslink)
f.close()
import requests
from bs4 import BeautifulSoup
r = requests.get("https://www.quora.com/How-do-I-prove-the-flat-earth-theory")
soup = BeautifulSoup(r.text, 'html.parser')
for item in soup.findAll("a", {'class': 'answer_permalink'}):
print(item.get("href"))
Output:
/How-do-I-prove-the-flat-earth-theory/answer/Dave-Morgan-14
/How-do-I-prove-the-flat-earth-theory/answer/Ken-Natco
/How-do-I-prove-the-flat-earth-theory/answer/Matthew-Writer
/How-do-I-prove-the-flat-earth-theory/answer/Chance-Thompson-13
/How-do-I-prove-the-flat-earth-theory/answers/27223260
/How-do-I-prove-the-flat-earth-theory/answers/26836797
/How-do-I-prove-the-flat-earth-theory/answer/Frida-Schiess
/How-do-I-prove-the-flat-earth-theory/answer/Pierre-Ripplinger
/How-do-I-prove-the-flat-earth-theory/answer/Jacob-Fu
/How-do-I-prove-the-flat-earth-theory/answer/Mike-Howells-4
/How-do-I-prove-the-flat-earth-theory/answer/Mick-Stute
/How-do-I-prove-the-flat-earth-theory/answer/Jesse-Bridges-III
/How-do-I-prove-the-flat-earth-theory/answer/Renard-Leblanc
/How-do-I-prove-the-flat-earth-theory/answers/26831140
/How-do-I-prove-the-flat-earth-theory/answers/27158717
/How-do-I-prove-the-flat-earth-theory/answer/Chris-Lockwood-4
/How-do-I-prove-the-flat-earth-theory/answer/David-Minger
/How-do-I-prove-the-flat-earth-theory/answer/Rick-Brown-50
/How-do-I-prove-the-flat-earth-theory/answer/Jacques-Malan-4
/How-do-I-prove-the-flat-earth-theory/answer/Robert-Lent-1
/How-do-I-prove-the-flat-earth-theory/answers/79419339
/How-do-I-prove-the-flat-earth-theory/answer/Dave-Consiglio
/How-do-I-prove-the-flat-earth-theory/answers/65113366
/How-do-I-prove-the-flat-earth-theory/answer/Krishnabh-Medhi
Selenium Approach:
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.firefox.options import Options
from bs4 import BeautifulSoup
import time
options = Options()
options.add_argument('--headless')
driver = webdriver.Firefox(options=options)
driver.get(
'https://www.quora.com/How-do-I-prove-the-flat-earth-theory')
lenOfPage = driver.execute_script(
"window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
match = False
while(match == False):
lastCount = lenOfPage
lenOfPage = driver.execute_script(
"window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
if lastCount >= 51000:
break
soup = BeautifulSoup(driver.page_source, 'html.parser')
count = 0
for item in soup.findAll("a", {'class': 'answer_permalink'}):
count += 1
print(item.get("href"))
print(count)
driver.quit()
Output:
/How-do-I-prove-the-flat-earth-theory/answer/Dave-Morgan-14
/How-do-I-prove-the-flat-earth-theory/answer/Ken-Natco
/How-do-I-prove-the-flat-earth-theory/answer/Matthew-Writer
/How-do-I-prove-the-flat-earth-theory/answer/Chance-Thompson-13
/How-do-I-prove-the-flat-earth-theory/answers/27223260
/How-do-I-prove-the-flat-earth-theory/answers/26836797
/How-do-I-prove-the-flat-earth-theory/answer/Frida-Schiess
/How-do-I-prove-the-flat-earth-theory/answer/Pierre-Ripplinger
/How-do-I-prove-the-flat-earth-theory/answer/Jacob-Fu
/How-do-I-prove-the-flat-earth-theory/answer/Mike-Howells-4
/How-do-I-prove-the-flat-earth-theory/answer/Mick-Stute
/How-do-I-prove-the-flat-earth-theory/answer/Jesse-Bridges-III
/How-do-I-prove-the-flat-earth-theory/answer/Renard-Leblanc
/How-do-I-prove-the-flat-earth-theory/answers/26831140
/How-do-I-prove-the-flat-earth-theory/answer/Danya-Rose
/How-do-I-prove-the-flat-earth-theory/answer/Chris-Lockwood-4
/How-do-I-prove-the-flat-earth-theory/answer/David-Minger
/How-do-I-prove-the-flat-earth-theory/answer/Rick-Brown-50
/How-do-I-prove-the-flat-earth-theory/answer/Jacques-Malan-4
/How-do-I-prove-the-flat-earth-theory/answer/Robert-Lent-1
/How-do-I-prove-the-flat-earth-theory/answer/John-Lind-22
/How-do-I-prove-the-flat-earth-theory/answer/Dave-Consiglio
/How-do-I-prove-the-flat-earth-theory/answers/65113366
/How-do-I-prove-the-flat-earth-theory/answer/Krishnabh-Medhi
/How-do-I-prove-the-flat-earth-theory/answers/44569062
/How-do-I-prove-the-flat-earth-theory/answer/Abd-Ul-Rahman-Lomax
/How-do-I-prove-the-flat-earth-theory/answer/Helmut-Walle
/How-do-I-prove-the-flat-earth-theory/answer/Ed-Kohlwey-1
/How-do-I-prove-the-flat-earth-theory/answer/Jason-Ree-4
/How-do-I-prove-the-flat-earth-theory/answer/Drew-Curry
/How-do-I-prove-the-flat-earth-theory/answer/Darrel-Blakely-2
/How-do-I-prove-the-flat-earth-theory/answer/Alexander-Kunz-2
/How-do-I-prove-the-flat-earth-theory/answer/Michael-Greenberg-61
/How-do-I-prove-the-flat-earth-theory/answer/Matthew-Schenker
/How-do-I-prove-the-flat-earth-theory/answer/Gregory-Hart-8
/How-do-I-prove-the-flat-earth-theory/answer/Mark-Giammattei
/How-do-I-prove-the-flat-earth-theory/answer/Vernon-Bender
/How-do-I-prove-the-flat-earth-theory/answer/Brett-Evill
/How-do-I-prove-the-flat-earth-theory/answer/Kurt-Mager
/How-do-I-prove-the-flat-earth-theory/answer/Michael-Brenner-13
/How-do-I-prove-the-flat-earth-theory/answer/Luke-Anderson-87
/How-do-I-prove-the-flat-earth-theory/answer/Sassa-Neuf
/How-do-I-prove-the-flat-earth-theory/answer/Spandan-Mallick
/How-do-I-prove-the-flat-earth-theory/answers/58252346
/How-do-I-prove-the-flat-earth-theory/answer/Timothy-Lamothe
/How-do-I-prove-the-flat-earth-theory/answer/Eric-Schwertfeger
/How-do-I-prove-the-flat-earth-theory/answers/70843234
/How-do-I-prove-the-flat-earth-theory/answer/Tony-Flury
/How-do-I-prove-the-flat-earth-theory/answer/Aji-Jijo
/How-do-I-prove-the-flat-earth-theory/answer/Tia-Eastlake
/How-do-I-prove-the-flat-earth-theory/answer/Michael-Grace-53
/How-do-I-prove-the-flat-earth-theory/answer/Ray-Mason-30
/How-do-I-prove-the-flat-earth-theory/answer/Jimmy-May-2
/How-do-I-prove-the-flat-earth-theory/answer/Thomas-Edward-Samuel-Thomas
/How-do-I-prove-the-flat-earth-theory/answer/Alan-Atkinson-4
/How-do-I-prove-the-flat-earth-theory/answer/Joseph-Perkins-11
/How-do-I-prove-the-flat-earth-theory/answer/David-Ridlen
/How-do-I-prove-the-flat-earth-theory/answer/Charles-Li-86
/How-do-I-prove-the-flat-earth-theory/answers/140610748
/How-do-I-prove-the-flat-earth-theory/answer/Corentin-Oger
/How-do-I-prove-the-flat-earth-theory/answer/Jean-Pierre-Choisy
/How-do-I-prove-the-flat-earth-theory/answer/Tom-Kubin
/How-do-I-prove-the-flat-earth-theory/answers/120618033
/How-do-I-prove-the-flat-earth-theory/answer/Charles-Brenchley-1
/How-do-I-prove-the-flat-earth-theory/answer/Jonathan-Johnson-41
/How-do-I-prove-the-flat-earth-theory/answer/Edward-Teach-53
/How-do-I-prove-the-flat-earth-theory/answer/Tony-Price-50
/How-do-I-prove-the-flat-earth-theory/answer/Nathaniel-Day-8
/How-do-I-prove-the-flat-earth-theory/answer/Nuurussubchiy-Fikriy
/How-do-I-prove-the-flat-earth-theory/answers/150581075
/How-do-I-prove-the-flat-earth-theory/answers/87762707
/How-do-I-prove-the-flat-earth-theory/answer/Neil-219
/How-do-I-prove-the-flat-earth-theory/answer/Alex-Frantz-1
/How-do-I-prove-the-flat-earth-theory/answer/Andy-P-Zbinden
/How-do-I-prove-the-flat-earth-theory/answer/Uriel-Anderson-4
/How-do-I-prove-the-flat-earth-theory/answer/Chris-OLeary-19
/How-do-I-prove-the-flat-earth-theory/answer/Daniel-Gerber-7
/How-do-I-prove-the-flat-earth-theory/answer/Roy-Wilson-64
/How-do-I-prove-the-flat-earth-theory/answer/Randy-Wonsowicz-Jr
/How-do-I-prove-the-flat-earth-theory/answer/Leslie-Harrington-4
/How-do-I-prove-the-flat-earth-theory/answer/Eddie-Olsson
/How-do-I-prove-the-flat-earth-theory/answer/Vincent-Emery
/How-do-I-prove-the-flat-earth-theory/answer/Maxwell-Perry-3
/How-do-I-prove-the-flat-earth-theory/answer/Matthew-Granovsky
/How-do-I-prove-the-flat-earth-theory/answers/83259600
/How-do-I-prove-the-flat-earth-theory/answer/Benjamin-Dixon-17
/How-do-I-prove-the-flat-earth-theory/answer/John-Chambers-75
/How-do-I-prove-the-flat-earth-theory/answer/Ryne-Hanz
/How-do-I-prove-the-flat-earth-theory/answer/Eric-Rodriguez-137
/How-do-I-prove-the-flat-earth-theory/answer/Robert-Hopkins-90
/How-do-I-prove-the-flat-earth-theory/answer/Sasha-Maddah
/How-do-I-prove-the-flat-earth-theory/answer/Owen-Lee-126
/How-do-I-prove-the-flat-earth-theory/answer/David-Phillips-133
/How-do-I-prove-the-flat-earth-theory/answer/Hasan-Poonawala-1
/How-do-I-prove-the-flat-earth-theory/answer/Cristiano-Dal-Vi
/How-do-I-prove-the-flat-earth-theory/answer/Rex-Newborn
/How-do-I-prove-the-flat-earth-theory/answer/John-Neumann-9
/How-do-I-prove-the-flat-earth-theory/answer/Josh-D-Davis
/How-do-I-prove-the-flat-earth-theory/answer/Maruthi-Sreenath
/How-do-I-prove-the-flat-earth-theory/answer/Clint-Morgan-2
/How-do-I-prove-the-flat-earth-theory/answer/Nicholas-Volkmuth
/How-do-I-prove-the-flat-earth-theory/answer/Richard-Swim
/How-do-I-prove-the-flat-earth-theory/answers/143504277
/How-do-I-prove-the-flat-earth-theory/answer/Christer-Svanström
/How-do-I-prove-the-flat-earth-theory/answer/Steve-Schlackman-2
/How-do-I-prove-the-flat-earth-theory/answers/147597845
/How-do-I-prove-the-flat-earth-theory/answer/Rene-Dukundane-Felix
/How-do-I-prove-the-flat-earth-theory/answers/148753762
/How-do-I-prove-the-flat-earth-theory/answer/Henk-Schuring
/How-do-I-prove-the-flat-earth-theory/answers/135814117
/How-do-I-prove-the-flat-earth-theory/answer/Emilio-Trampuz
/How-do-I-prove-the-flat-earth-theory/answers/40529643
/How-do-I-prove-the-flat-earth-theory/answer/Karl-Sangree
/How-do-I-prove-the-flat-earth-theory/answer/Ted-Carriker
/How-do-I-prove-the-flat-earth-theory/answer/egi-syahban
/How-do-I-prove-the-flat-earth-theory/answer/Mayank-Dahiya-12
/How-do-I-prove-the-flat-earth-theory/answer/Robert-Jones-741
/How-do-I-prove-the-flat-earth-theory/answer/Jimmi-Carlsson-1
/How-do-I-prove-the-flat-earth-theory/answer/Cole-Johnson-24
/How-do-I-prove-the-flat-earth-theory/answer/Kram-Redarsh
/How-do-I-prove-the-flat-earth-theory/answers/64915389
I have a script that loads a page and saves a bunch of data ids from multiple containers. I then want to open up new urls appending those said data ids onto the end of the urls. For each url I want to locate all the hrefs and compare them to a list of specific links and if any of them match I want to save that link and a few other details to a table.
I have managed to get it to open the url with the appended data id but when I try to search for elements in the new page it either pulls them from the first url that was parsed if I try to findAll from soup again or I constantly get this error when I try to run another html.parser.
ResultSet object has no attribute 'findAll'. You're probably treating a list of items like a single item. Did you call find_all() when you meant to call find()?
Is it not possible to run another parser or am I just doing something wrong?
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup as soup
from selenium.webdriver.common.action_chains import ActionChains
url = "http://csgo.exchange/id/76561197999004010#x"
driver = webdriver.Firefox()
driver.get(url)
import time
time.sleep(15)
html = driver.page_source
soup = soup(html, "html.parser")
containers = soup.findAll("div",{"class":"vItem"})
print(len(containers))
data_ids = [] # Make a list to hold the data-id's
for container in containers:
test = container.attrs["data-id"]
data_ids.append(test) # add data-id's to the list
print(str(test))
for id in data_ids:
url2 = "http://csgo.exchange/item/" + id
driver.get(url2)
import time
time.sleep(2)
soup2 = soup(html, "html.parser")
containers2 = soup2.findAll("div",{"class":"bar"})
print(str(containers2))
with open('scraped.txt', 'w', encoding="utf-8") as file:
for id in data_ids:
file.write(str(id)+'\n') # write every data-id to a new line
Not sure exactly what you want from each page. You should add waits. I add waits looking for hrefs in the flow history section of each page (if present). It should illustrate the idea.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url = 'http://csgo.exchange/id/76561197999004010'
driver = webdriver.Chrome()
driver.get(url)
ids = [item.get_attribute('data-id') for item in WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "[data-id]")))]
results = []
baseURL = 'http://csgo.exchange/item/'
for id in ids:
url = baseURL + id
driver.get(url)
try:
flowHistory = [item.get_attribute('href') for item in WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#tab-history-flow [href]")))]
results.append([id, flowHistory])
except:
print(url)
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url = 'http://csgo.exchange/id/76561197999004010'
profile = webdriver.FirefoxProfile()
profile.set_preference("permissions.default.image", 2) # Block all images to load websites faster.
driver = webdriver.Firefox(firefox_profile=profile)
driver.get(url)
ids = [item.get_attribute('data-id') for item in WebDriverWait(driver,30).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "[data-id]")))]
results = []
baseURL = 'http://csgo.exchange/item/'
for id in ids:
url = baseURL + id
driver.get(url)
try:
pros = ['http://csgo.exchange/profiles/76561198149324950']
flowHistory = [item.get_attribute('href') for item in WebDriverWait(driver,3).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#tab-history-flow [href]")))]
if flowHistory in pros:
results.append([url,flowHistory])
print(results)
except:
print()
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
urls = ['http://csgo.exchange/id/76561197999004010']
profile = webdriver.FirefoxProfile()
profile.set_preference("permissions.default.image", 2) # Block all images to load websites faster.
driver = webdriver.Firefox(firefox_profile=profile)
for url in urls:
driver.get(url)
ids = [item.get_attribute('data-id') for item in WebDriverWait(driver,30).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "[data-id]")))]
results = []
pros = ['http://csgo.exchange/profiles/76561198149324950', 'http://csgo.exchange/profiles/76561198152970370']
baseURL = 'http://csgo.exchange/item/'
for id in ids:
url = baseURL + id
driver.get(url)
try:
flowHistory = [item.get_attribute('href') for item in WebDriverWait(driver,2).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#tab-history-flow [href]")))]
match = []
for string in pros:
if string in flowHistory:
match = string
break
if match:
pass
results.append([url,match])
print(results)
except:
print()