Can while loop print hour with other while loop? - python

I have bot witch is making automation for me using selenium. I have counter, but I need to have date too, my code is:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import time
import datetime
i = 1
actions = ActionChains(OMS)
now = datetime.datetime.now()
while i <= amount:
time.sleep(1)
bot.find_element_by_class_name('classname').click()
actions.send_keys("Lorem ipsum dolor sit amet, consectetlerisque." + Keys.RETURN)
actions.perform()
time.sleep(3)
print(i)
print(str(now))
i = i + 1
OMS.description(500)
**output:**
[1
2020 11 9 13 30 9
[2
2020 11 9 13 30 9
[3
2020 11 9 13 30 9
[4
2020 11 9 13 30 9...]
Can my code print the date and time of sended keys?

Related

Importing CSV with time losing format

I am trying to import csv with time, but when I import it, it does not show the correct format, how can I fix the time uploaded format?
import pandas as pd
# load the data from the CSV file
data = pd.read_csv('H_data - Copy.csv')
print (data.head(5))
Result :
0 0 24:39.5
1 1 25:20.4
2 2 25:56.1
3 3 26:36.1
4 4 27:21.0
CSV looks like this when I copy and past, not sure how to upload it here:
time
0 24:39.5
1 25:20.4
2 25:56.1
3 26:36.1
4 27:21.0
5 27:57.1
6 28:34.2
7 29:11.0
8 29:47.6
9 30:27.4
10 31:06.6
11 31:46.9
12 32:22.9
13 32:58.4
14 33:30.3
15 34:13.2
16 34:51.8
17 35:32.8
18 36:04.5
19 36:46.4
20 37:27.0
21 37:58.2
22 38:43.1
23 39:23.5
24 39:54.6
25 40:39.5
26 41:15.1
27 41:55.6
28 42:27.8

How to get table and it's element with Python/Selenium

I'm trying to get all the price in the table at this URL:
https://www.skyscanner.it/trasporti/voli/bud/rome/?adults=1&adultsv2=1&cabinclass=economy&children=0&childrenv2=&destinationentityid=27539793&inboundaltsenabled=true&infants=0&iym=2208&originentityid=27539604&outboundaltsenabled=true&oym=2208&preferdirects=false&ref=home&rtn=1&selectedoday=01&selectediday=01
The table elements are the days with the related price.
This is what I'm trying to do to get the table:
#Attempt 1
week = table.find_element(By.CLASS_NAME, "BpkCalendarGrid_bpk-calendar-grid__NzBmM month-view-grid--data-loaded")
#Attempt 2
table = driver.find_element(by=By.XPATH, value="Xpath copied using Crhome inspector"
However I cannot get it.
What is the correct way to extract all the price from this table? Thanks!
You can grab table data meaning all prices using selenium with pandas DataFrame. There are two tables exist of the table data prices
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
option = webdriver.ChromeOptions()
option.add_argument("start-maximized")
#chrome to stay open
option.add_experimental_option("detach", True)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=option)
driver.get('https://www.skyscanner.it/trasporti/voli/bud/rome/?adults=1&adultsv2=1&cabinclass=economy&children=0&childrenv2=&destinationentityid=27539793&inboundaltsenabled=true&infants=0&iym=2208&originentityid=27539604&outboundaltsenabled=true&oym=2208&preferdirects=false&ref=home&rtn=1&selectedoday=01&selectediday=01')
table = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, '(//table)[1]'))).get_attribute("outerHTML")
table_2 = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, '(//table)[2]'))).get_attribute("outerHTML")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="acceptCookieButton"]'))).click()
df1 = pd.read_html(table)[0]
print(df1)
df2 = pd.read_html(table_2)[0]
print(df2)
Output:
lun mar mer gio ven sab dom
0 1€ 40 2€ 28 3€ 32 4€ 37 5€ 34 6€ 35 7€ 34
1 8€ 34 9€ 28 10€ 27 11€ 26 12€ 26 13€ 46 14€ 35
2 15€ 35 16€ 40 17€ 36 18€ 51 19€ 28 20€ 33 21€ 36
3 22€ 38 23€ 38 24€ 30 25€ 50 26€ 43 27€ 50 28€ 51
4 29€ 38 30€ 36 31€ 58 1- 2- 3- 4-
5 5- 6- 7- 8- 9- 10- 11-
lun mar mer gio ven sab dom
0 1€ 40 2€ 28 3€ 32 4€ 37 5€ 34 6€ 35 7€ 34
1 8€ 34 9€ 28 10€ 27 11€ 26 12€ 26 13€ 46 14€ 35
2 15€ 35 16€ 40 17€ 36 18€ 51 19€ 28 20€ 33 21€ 36
3 22€ 38 23€ 38 24€ 30 25€ 50 26€ 43 27€ 50 28€ 51
4 29€ 38 30€ 36 31€ 58 1- 2- 3- 4-
5 5- 6- 7- 8- 9- 10- 11-
webdriverManager
Alternative solution(Table-1): Thus way you can extract prices from table two too.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
option = webdriver.ChromeOptions()
option.add_argument("start-maximized")
#chrome to stay open
option.add_experimental_option("detach", True)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=option)
driver.get('https://www.skyscanner.it/trasporti/voli/bud/rome/?adults=1&adultsv2=1&cabinclass=economy&children=0&childrenv2=&destinationentityid=27539793&inboundaltsenabled=true&infants=0&iym=2208&originentityid=27539604&outboundaltsenabled=true&oym=2208&preferdirects=false&ref=home&rtn=1&selectedoday=01&selectediday=01')
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="acceptCookieButton"]'))).click()
table = WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, '(//table)[1]/tbody/tr/td')))
for i in table:
price = i.find_element(By.XPATH,'.//div[#class="price"]').text.replace('€','').strip()
print(price)
Output:
39
30
32
37
34
35
34
34
28
27
26
26
46
35
35
40
36
52
29
34
37
39
39
30
50
44
50
52
38
36
58

Python find_element in find_elements problem?

Good day everyone:
I’d like to get the basketball game data from the web include league , date, time and score ….
The first level for loop works fine to get every league title
for league in leagues:
But the second level for loop
for row in _rows:
I always get all leagues rows ,I just need data for league by league
What should I do to fix it?
Any help will greatly appreciated.
from selenium import webdriver
#from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.action_chains import ActionChains
import time
from selenium.common.exceptions import NoSuchElementException
driver = webdriver.Chrome()
driver.set_window_size(1500,1350)
# open url (sorry for the url , cause system always report its a spam)
driver.get("https://"+"we"+"b2."+"sa8"+"8"+"88.n"+"et"+"/sp"+"ort/Ga"+"mes.aspxdevice=pc")
# jump to basketball
locator = (By.XPATH, '//*[#id="menuList"]/div/ul/li[3]/div[2]/a[1]')
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()
time.sleep(1)
# date menu
locator = (By.XPATH, '//*[#id="chooseDate"]')
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()
# jump to date 1
locator = (By.XPATH, '//*[#id="dateOption"]/a[1]/span[1]')
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()
# close AD by double clicl
locator = (By.ID, 'btn_close')
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()
actions = ActionChains(driver)
actions.click(pointer).perform()
# list all leagues schedule
leagues = []
leagues = driver.find_elements(By.XPATH, '//*[#id="scheduleBottom"]/table[*]')
for league in leagues:
#print("Block.text=",Block.text,"\n")
#_rows = Block.find_elements(By.TAG_NAME, "tr")
league_Title = league.find_element(By.TAG_NAME ,'caption')
_rows = []
_rows = league.find_elements(By.XPATH, "//*[contains(#id, '_mainRow') or contains(#id, '_secondRow')]")
print("\nleague : ",league_Title.text, 'len(_rows)=',len(_rows))
for row in _rows:
print(league_Title,row.text) #," / _rows=",_rows)
# first_rows = Block.find_element(By.XPATH, "//*[contains(#id, '_mainRow')]")
# second_rows = Block.find_element(By.XPATH, "//*[contains(#id, '_secondRow')]")
print("\trow : ",row.text)
time.sleep(1)
time.sleep(120)
driver.quit()
I can't run code because page shows Error 404.
EDIT: in question you forgot ? in Games.aspx?device=pc and this made problem with 404
You have to use dot . at the beginning of xpath to use path relative to league
_rows = league.find_elements(By.XPATH, ".//...rest...") # <-- dot before `//`
You use absolute xpath and it searchs in full HTML.
EDIT:
Partial result with dot in xpath:
I used lang=3 to get English text,
And I used a[2] to select second date (03 / 06 (Sun)) because first date (03 / 05 (Sat)) was empty (no matches)
url: https://web2.sa8888.net/sport/Games.aspx?lang=3&device=pc
len(leagues): 115
league: NBA len(_rows)= 12
row: 06:05 Finished Dallas Mavericks 26 25 34 29 114 | Live Update
row: Sacramento Kings 36 29 27 21 113
row: 08:05 Finished Charlotte Hornets 31 31 37 24 123 | Live Update
row: San Antonio Spurs 30 30 37 20 117
row: 09:05 Finished Miami Heat 22 32 19 26 99 | Live Update
row: Philadelphia 76ers 14 26 28 14 82
row: 09:05 Finished Memphis Grizzlies 31 37 29 27 124 | Live Update
row: Orlando Magic 29 16 29 22 96
row: 09:05 Finished Minnesota Timberwolves 32 31 46 26 135 | Live Update
row: Portland Trail Blazers 34 30 37 20 121
row: 09:35 Finished Los Angeles Lakers 32 30 27 35 124 | Live Update
row: Golden State Warriors 25 42 27 22 116
---
league: NBA GATORADE LEAGUE len(_rows)= 8
row: 08:00 Finished Delaware Blue Coats 42 34 37 33 146 | Live Update
row: Westchester Knicks 28 28 24 31 111
row: 09:00 Finished Austin Spurs 35 21 23 31 110 | Live Update
row: Salt Lake City Stars 30 32 21 17 100
row: 09:00 Finished Wisconsin Herd 26 30 20 38 114 | Live Update
row: Capital City Go-Go 27 31 32 38 128
row: 11:00 Finished Santa Cruz Warriors 36 19 17 27 99 | Live Update
row: Memphis Hustle 26 29 22 30 107
---
league: CHINA PROFESSIONAL BASKETBALL LEAGUE len(_rows)= 12
row: 11:00 Finished Fujian Sturgeons 37 21 27 32 117 | Live Update
row: Ningbo Rockets 24 28 34 25 111
row: 11:00 Finished Sichuan Blue Whales 12 21 27 20 80 | Live Update
row: Zhejiang Lions 23 27 35 25 110
row: 15:00 Finished Shenzhen Leopards 23 32 30 33 118 | Live Update
row: Shandong Hi Speed 29 25 32 29 115
row: 15:30 Finished Jilin Northeast Tigers 36 39 25 18 118 | Live Update
row: Shanghai Sharks 15 25 32 36 108
row: 19:35 Finished Beijing Ducks 24 20 17 22 83 | Live Update
row: Beijing Royal Fighters 18 18 21 22 79
row: 20:00 Finished Nanjing Monkey King 23 24 23 25 95 | Live Update
row: Jiangsu Dragons 18 17 21 24 80
---
Full working code:
I added also WebDriverWait to wait for leagues.
And row.text.replace('\n', ' | ') to display row in one line
from selenium import webdriver
#from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.action_chains import ActionChains
import time
from selenium.common.exceptions import NoSuchElementException
driver = webdriver.Chrome()
driver.set_window_size(1500, 1350)
# open url (sorry for the url , cause system always report its a spam)
driver.get("https://web2.sa8888.net/sport/Games.aspx?lang=3&device=pc") # lang=3 for English
# jump to basketball
locator = (By.XPATH, '//*[#id="menuList"]/div/ul/li[3]/div[2]/a[1]')
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()
time.sleep(1)
# date menu
locator = (By.XPATH, '//*[#id="chooseDate"]')
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()
# jump to date 1
locator = (By.XPATH, '//*[#id="dateOption"]/a[2]/span[1]') # a[2] for second date, because first has no matches
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()
# close AD by double clicl
locator = (By.ID, 'btn_close')
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
actions = ActionChains(driver)
actions.click(pointer).perform()
actions = ActionChains(driver)
actions.click(pointer).perform()
# wait for leagues
locator = (By.XPATH, '//*[#id="scheduleBottom"]/table[*]')
pointer = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(locator),
"element not found"
)
# list all leagues schedule
leagues = driver.find_elements(By.XPATH, '//*[#id="scheduleBottom"]/table[*]')
print('len(leagues):', len(leagues))
for league in leagues:
#print("Block.text=",Block.text,"\n")
#_rows = Block.find_elements(By.TAG_NAME, "tr")
league_Title = league.find_element(By.TAG_NAME ,'caption')
_rows = league.find_elements(By.XPATH, ".//*[contains(#id, '_mainRow') or contains(#id, '_secondRow')]")
print("\nleague:", league_Title.text, 'len(_rows)=', len(_rows))
for row in _rows:
#print(league_Title, row.text) #," / _rows=",_rows)
# first_rows = Block.find_element(By.XPATH, "//*[contains(#id, '_mainRow')]")
# second_rows = Block.find_element(By.XPATH, "//*[contains(#id, '_secondRow')]")
print("\trow:", row.text.replace('\n', ' | ')) # <- clean text
time.sleep(1)
print('---')
time.sleep(120)
driver.quit()
I think find_element() or find() is for Only one element on page. You will get just the first element of list of elements, if you use find_element() for multi elements on page.
And find_elements or findAll() is for all elements on page. This function will return data in Array format.
hope this help you some.

How to change a list of synsets to list elements?

I have tried out the following snippet of code for my project:
import pandas as pd
import nltk
from nltk.corpus import wordnet as wn
nltk.download('wordnet')
df=[]
hypo = wn.synset('science.n.01').hyponyms()
hyper = wn.synset('science.n.01').hypernyms()
mero = wn.synset('science.n.01').part_meronyms()
holo = wn.synset('science.n.01').part_holonyms()
ent = wn.synset('science.n.01').entailments()
df = df+hypo+hyper+mero+holo+ent
df_agri_clean = pd.DataFrame(df)
df_agri_clean.columns=["Items"]
print(df_agri_clean)
pd.set_option('display.expand_frame_repr', False)
It has given me this output of a dataframe:
Items
0 Synset('agrobiology.n.01')
1 Synset('agrology.n.01')
2 Synset('agronomy.n.01')
3 Synset('architectonics.n.01')
4 Synset('cognitive_science.n.01')
5 Synset('cryptanalysis.n.01')
6 Synset('information_science.n.01')
7 Synset('linguistics.n.01')
8 Synset('mathematics.n.01')
9 Synset('metallurgy.n.01')
10 Synset('metrology.n.01')
11 Synset('natural_history.n.01')
12 Synset('natural_science.n.01')
13 Synset('nutrition.n.03')
14 Synset('psychology.n.01')
15 Synset('social_science.n.01')
16 Synset('strategics.n.01')
17 Synset('systematics.n.01')
18 Synset('thanatology.n.01')
19 Synset('discipline.n.01')
20 Synset('scientific_theory.n.01')
21 Synset('scientific_knowledge.n.01')
This can be converted to a list by just printing df.
[Synset('agrobiology.n.01'), Synset('agrology.n.01'), Synset('agronomy.n.01'), Synset('architectonics.n.01'), Synset('cognitive_science.n.01'), Synset('cryptanalysis.n.01'), Synset('information_science.n.01'), Synset('linguistics.n.01'), Synset('mathematics.n.01'), Synset('metallurgy.n.01'), Synset('metrology.n.01'), Synset('natural_history.n.01'), Synset('natural_science.n.01'), Synset('nutrition.n.03'), Synset('psychology.n.01'), Synset('social_science.n.01'), Synset('strategics.n.01'), Synset('systematics.n.01'), Synset('thanatology.n.01'), Synset('discipline.n.01'), Synset('scientific_theory.n.01'), Synset('scientific_knowledge.n.01')]
I wish to change every word under "Items" like so :
Synset('agrobiology.n.01') => agrobiology.n.01
or
Synset('agrobiology.n.01') => 'agrobiology'
Any answer associated will be appreciated! Thanks!
To access the name of these items, just do function.name(). You could use line comprehension update these items as follows:
df_agri_clean['Items'] = [df_agri_clean['Items'][i].name() for i in range(len(df_agri_clean))]
df_agri_clean
The output will be as you expected
Items
0 agrobiology.n.01
1 agrology.n.01
2 agronomy.n.01
3 architectonics.n.01
4 cognitive_science.n.01
5 cryptanalysis.n.01
6 information_science.n.01
7 linguistics.n.01
8 mathematics.n.01
9 metallurgy.n.01
10 metrology.n.01
11 natural_history.n.01
12 natural_science.n.01
13 nutrition.n.03
14 psychology.n.01
15 social_science.n.01
16 strategics.n.01
17 systematics.n.01
18 thanatology.n.01
19 discipline.n.01
20 scientific_theory.n.01
21 scientific_knowledge.n.01
To further replace ".n.01" as well from the string, you could do the following:
df_agri_clean['Items'] = [df_agri_clean['Items'][i].name().replace('.n.01', '') for i in range(len(df_agri_clean))]
df_agri_clean
Output (just like your second expected output)
Items
0 agrobiology
1 agrology
2 agronomy
3 architectonics
4 cognitive_science
5 cryptanalysis
6 information_science
7 linguistics
8 mathematics
9 metallurgy
10 metrology
11 natural_history
12 natural_science
13 nutrition.n.03
14 psychology
15 social_science
16 strategics
17 systematics
18 thanatology
19 discipline
20 scientific_theory
21 scientific_knowledge

How to extract table using request /pandas using python

I tried the code for exctacting product name, year, values fro table but some where i getting issues .
my code:
import requests
import pandas as pd
import pymysql
try:
df = []
dates1 = []
try:
url = 'http://cpmaindia.com/fiscal_custom_duty.php'
html = requests.get(url).content
tab_list = pd.read_html(html)
tab = tab_list[0]
tab.apply(lambda x: x.tolist(), axis=1)
tab = tab.values.tolist()
print(tab)
except Exception as e:
raise e
except Exception as e:
raise e
This I tried But not getting desire output .
want to parse table only.
thanks
tab_list[0] produces the following:
print (tab)
0
0 <!-- function MM_swapImgRestore() { //v3.0 va...
1 Custom Duty Import Duty on Petrochemicals (%)...
2 <!-- body { \tmargin-left: 0px; \tmargin-top: ...
Did you mean to grab tab_list[8]?
Also, if you're using pandas to read in the table from html, there is no need to use requests:
import pandas as pd
url = 'http://cpmaindia.com/fiscal_custom_duty.php'
tab_list = pd.read_html(url)
table = tab_list[8]
table.columns = table.iloc[0,:]
table = table.iloc[1:,2:-1]
Output:
print (table)
0 Import Duty on Petrochemicals (%) ... Import Duty on Petrochemicals (%)
1 Product / Year - ... 16/17
2 Naphtha ... 5
3 Ethylene ... 2.5
4 Propylene ... 2.5
5 Butadiene ... 2.5
6 Benzene ... 2.5
7 Toluene ... 2.5
8 Mixed Xylene ... 2.5
9 Para Xylene ... 0
10 Ortho Xylene ... 0
11 LDPE ... 7.5
12 LLDPE ... 7.5
13 HDPE ... 7.5
14 PP ... 7.5
15 PVC ... 7.5
16 PS ... 7.5
17 EDC ... 2
18 VCM ... 2
19 Styrene ... 2
20 SBR ... 10
21 PBR ... 10
22 MEG ... 5
23 DMT ... 5
24 PTA ... 5
25 ACN ... 5
[25 rows x 7 columns]

Categories