How to get $0 value in Webbot using python - python

Please find below code am trying to get Seller Proceeds value in Website, but it has $0, when i tried in console $0.value am getting 598.08 but am getting Calculate when i tried using this
sel_proc = web.find_elements(id="afn-seller-proceeds")[0].text
'''
Full Code :
import pandas as pd
from webbot import Browser
from bs4 import BeautifulSoup
web = Browser()
##web.set_window_position(-10000,0)
df = pd.read_excel('sample.xlsx')
soafees = []
fulfees = []
selproc = []
for ind in df.index:
web.go_to('https://somelink')
## web.set_window_position(-10000,0)
web.click(id='link_continue')
print("Login Successful")
asin = df['ASIN'][ind]
sp = int(df['Selling Price'][ind])
print(sp)
cp = int(df['Cost of Product'][ind])
print(cp)
web.type(df['ASIN'][ind] , into = 'Enter your product name, UPC, EAN, ISBN or ASIN',clear = True)
web.click(id='a-autoid-0')
web.type(sp,tag='input',id='afn-pricing',clear = True)
web.type(cp,tag='input',id='afn-cost-of-goods',clear = True)
web.click(id='update-fees-link')
res = web.find_elements(id="afn-selling-fees")[0].text
ful_fees = web.find_elements(id="afn-amazon-fulfillment-fees")[0].text
sel_proc = web.find_elements(id="afn-seller-proceeds")[0].text
## sel_proc = web.execute_script('return arguments[0].value;', element);
print("soa fees : "+res)
print("Fulfillment fees : "+ful_fees)
print("Seller Proceeds : "+sel_proc)
soafees.append(res)
fulfees.append(ful_fees)
selproc.append(sel_proc)
print(soafees)
print(fulfees)
print(selproc)
df_soa = pd.DataFrame(soafees,columns = ['SOA Fees'])
df_ful = pd.DataFrame(fulfees,columns = ['FBA Fees'])
df_sel = pd.DataFrame(selproc,columns = ['Seller Proceeds'])
print(df)
print(df_soa)
print(df_ful)
print(df_sel)
Snapshot for reference:
thanks in advance for your support

In the sel_proc variable, you are storing the text, Instead, you should look for the attribute which has the value. I believe, in this case, it should be a "value" attribute.
sel_proc = web.find_elements(id="afn-seller-proceeds")[0].get_attribute(<attribute_name>)
Your code will look something like this:
sel_proc = web.find_elements(id="afn-seller-proceeds")[0].get_attribute("value")

Related

Extracting data from web with selenium and inserting it in a pandas dataframe

I have a problem, I cannot "take" the data I have extracted from selenium and store them somewhere to manipulate or store them
I am grabbing the data, like so:
try:
books = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((By.ID, "titleitem"))
)
finally:
driver.quit()
inside the try function I have extracted the data like this:
for i, book in enumerate(books):
splited = books[i].text.split("\n")
writer = str(splited[0])
title = str(splited[1])
publiser = str(splited[2])
country = str(splited[3])
ISBN = str(splited[4])
So in the end I have this code to extract exactly the data I want:
try:
books = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((By.ID, "titleitem"))
)
for i, book in enumerate(books):
splited = books[i].text.split("\n")
writer = str(splited[0])
title = str(splited[1])
publiser = str(splited[2])
country = str(splited[3])
ISBN = str(splited[4])
finally:
driver.quit()
Those variables are the things I want to grab.
When I print them, they appear normal (as they are on the website)
But then, I try to insert them to a pandas dataframe, like this
(fake_books is declared as a pd.DataFrame()):
tmp = pd.Series({'title' : title, 'author': writer, 'publiser': ekdoths})
fake_books = fake_books.append(tmp)
I have also tries a list of dictionaries:
books = [{}]
...
for i, book in enumerate(books):
splited = books[i].text.split("\n")
books[i]['writer'] = str(splited[0])
books[i]['title'] = str(splited[1])
books[i]['ekdoths'] = str(splited[2])
books[i]['polh'] = str(splited[3])
books[i]['ISBN'] = str(splited[4])
Neither of those things work, the programm is just "lagging" and printing an emply dataframe of list
I always use this method, I create a list of dictionaries then I pass it into pd.DataFrame
# create empty list as the beginning of the code
df_list = []
for i, book in enumerate(books):
splited = books[i].text.split("\n")
writer = str(splited[0])
title = str(splited[1])
publiser = str(splited[2])
country = str(splited[3])
ISBN = str(splited[4])
# add the scraped data into dictionary then append it into df_list
df_list.append({"writer":writer, "title":title, "publiser":publiser, "country":country, "ISBN":ISBN})
# and the end of your code after scraping all you want
df = pd.DataFrame(df_list)

Print the shop name repeatedly using Selenium python

As it print all details but problem is that print Master Juice repeatedly as below is highlighted in picture that is my link https://www.foodpanda.pk/restaurants/new?lat=24.9414896&lng=67.1676002&vertical=restaurants from where i scrape
from selenium import webdriver
driver = webdriver.Chrome('F:/chromedriver')
driver.get("https://www.foodpanda.pk/restaurants/new?lat=24.9414896&lng=67.1676002&vertical=restaurants")
# response = scrapy.Selector(text=driver.page_source)
list = driver.find_elements_by_css_selector("ul.vendor-list li")
length = len(driver.find_elements_by_css_selector("ul.vendor-list li"))
for i in range(length):
try:
name = driver.find_elements_by_css_selector(".headline .name")[i].text
time = driver.find_elements_by_css_selector(".badge-info")[i].text.strip()
rating = driver.find_elements_by_css_selector(".rating")[i].text
dealtag = driver.find_elements_by_css_selector(".multi-tag")[i].text
except:
pass
print(name,time,rating,dealtag)
It's because it prints every time even if there's an error, printing whatever was previously stored in your name, time, etc. variables. Try it when you move your print statement within your try: block
from selenium import webdriver
driver = webdriver.Chrome('F:/chromedriver')
driver.get("https://www.foodpanda.pk/restaurants/new?lat=24.9414896&lng=67.1676002&vertical=restaurants")
# response = scrapy.Selector(text=driver.page_source)
list = driver.find_elements_by_css_selector("ul.vendor-list li")
length = len(driver.find_elements_by_css_selector("ul.vendor-list li"))
for i in range(length):
try:
name = driver.find_elements_by_css_selector(".headline .name")[i].text
time = driver.find_elements_by_css_selector(".badge-info")[i].text.strip()
rating = driver.find_elements_by_css_selector(".rating")[i].text
dealtag = driver.find_elements_by_css_selector(".multi-tag")[i].text
print(name,time,rating,dealtag)
except:
pass

TypeError: list indices must be integers or slices, not Tag. One of my loops isn't working

The ultimate goal of this is to output select data columns to a .csv. I had it working once to where it only got the first table on the page but I needed both. Now it says this. Im quite new to python and IDK how I got to this point in the first place. I needed the call and put table but on the web page the calls came first and when I did .find I only got the calls. I am working on this with a friend and he put in the last two functions. He could get the columns I wanted but now we only get the calls. I tried to fix it and now it say the error in the title.
import bs4
import requests
import pandas as pd
import csv
from bs4 import BeautifulSoup
#sets desired ticker. in the future you could make this long
def ticker():
ticker = ['GME','NYMT']
return ticker
#creates list of urls for scrapet to grab
def ticker_site():
ticker_site = ['https://finance.yahoo.com/quote/'+x+'/options?p='+x for x in ticker()]
return ticker_site
optionRows = []
for i in range(len(ticker_site())):
optionRows.append([])
def ticker_gets():
option_page = ticker_site()
requested_page = requests.get(option_page[i])
ticker_soup = BeautifulSoup(requested_page.text,'html.parser')
return ticker_soup
def soup_search():
table = ticker_gets()
both_tables = table.find_all('table')
call_table = both_tables[0]
put_table= both_tables[1]
call_rows = call_table.find('tr')
put_rows = put_table.find('tr')
#makes the call table
for call in call_rows:
whole_call_table = call.find_all('td')
call_row = [y.text for y in whole_call_table]
optionRows[call].append(call_row)
#makes the put table
for put in put_rows:
whole_put_table = put.find_all('td')
put_row = [z.text for z in whole_put_table]
optionRows[put].append(put_row)
for i in range(len(optionRows)):
optionRows[i] = optionRows[i][1:len(optionRows[i])]
return optionRows
def getColumns(columnIndexes=[2, 4, 5]):
newList = []
for tickerIndex in range(len(soup_search())):
newList.append([])
indexCount = 0
for j in soup_search()[tickerIndex]:
newList[tickerIndex].append([])
for i in columnIndexes:
newList[tickerIndex][indexCount].append(j[i])
indexCount += 1
return newList
def csvOutputer():
rows = getColumns()
fields = ["Ticker", "Strike", "Bid", "Ask"]
with open('newcsv', 'w') as f:
write = csv.writer(f)
write.writerow(fields)
for i in range(len(ticker())):
for j in rows[i]:
j.insert(0, ticker()[i])
write.writerow(j)
csvOutputer()

python requests-html get id value of element

I've benn playing with an example taken from here:
https://stackoverflow.com/a/61408325
this is working and was very helpfull, but I'm strugling with the requests-html documentation.
In this example is it possible to get the id value of the element?
from requests_html import AsyncHTMLSession
from collections import defaultdict
import pandas as pd
url = 'https://www.flashscore.com/football/england/premier-league-2018-2019/results/'
asession = AsyncHTMLSession()
async def get_scores():
r = await asession.get(url)
await r.html.arender()
return r
results = asession.run(get_scores)
results = results[0]
times = results.html.find("div.event__time")
home_teams = results.html.find("div.event__participant.event__participant--home")
scores = results.html.find("div.event__scores.fontBold")
away_teams = results.html.find("div.event__participant.event__participant--away")
event_part = results.html.find("div.event__part")
dict_res = defaultdict(list)
for ind in range(len(times)):
dict_res['times'].append(times[ind].text)
dict_res['home_teams'].append(home_teams[ind].text)
dict_res['scores'].append(scores[ind].text)
dict_res['away_teams'].append(away_teams[ind].text)
dict_res['event_part'].append(event_part[ind].text)
df_res = pd.DataFrame(dict_res)
I managed to get the id in a way I don't know if it will be the most suitable.
What I did was search for the of the entire game
match_div = results.html.find("div.event__match")
and then get the id from its atributes
for ind in range(len(times)):
id = match_div[ind].attrs['id']
I think that must be a more 'direct' way of doing this, but not getting there

Parsing a JSON using specific key words using Python

I'm trying to parse a JSON of a sites stock.
The JSON: https://www.ssense.com/en-us/men/sneakers.json
So I want to take some keywords from the user. Then I want to parse the JSON using these keywords to find the name of the item and (in this specific case) return the ID, SKU and the URL.
So for example:
If I inputted "Black Fennec" I want to parse the JSON and find the ID,SKU, and URL of Black Fennec Sneakers (that have an ID of 3297299, a SKU of 191422M237006, and a url of /men/product/ps-paul-smith/black-fennec-sneakers/3297299 )
I have never attempted doing anything like this. Based on some guides that show how to parse a JSON I started out with this:
r = requests.Session()
stock = r.get("https://www.ssense.com/en-us/men/sneakers.json",headers = headers)
obj json_data = json.loads(stock.text)
However I am now confused. How do I find the product based off the keywords and how do I get the ID,Url and the SKU or it?
Theres a number of ways to handle the output. not sure what you want to do with it. But this should get you going.
EDIT 1:
import requests
r = requests.Session()
obj_json_data = r.get("https://www.ssense.com/en-us/men/sneakers.json").json()
products = obj_json_data['products']
keyword = input('Enter a keyword: ')
for product in products:
if keyword.upper() in product['name'].upper():
name = product['name']
id_var = product['id']
sku = product['sku']
url = product['url']
print ('Product: %s\nID: %s\nSKU: %s\nURL: %s' %(name, id_var, sku, url))
# if you only want to return the first match, uncomment next line
#break
I also have it setup to store it into a dataframe, and or a list too. Just to give some options of where to go with it.
import requests
import pandas as pd
r = requests.Session()
obj_json_data = r.get("https://www.ssense.com/en-us/men/sneakers.json").json()
products = obj_json_data['products']
keyword = input('Enter a keyword: ')
products_found = []
results = pd.DataFrame()
for product in products:
if keyword.upper() in product['name'].upper():
name = product['name']
id_var = product['id']
sku = product['sku']
url = product['url']
temp_df = pd.DataFrame([[name, id_var, sku, url]], columns=['name','id','sku','url'])
results = results.append(temp_df)
products_found = products_found.append(name)
print ('Product: %s\nID: %s\nSKU: %s\nURL: %s' %(name, id_var, sku, url))
if products_found == []:
print ('Nothing found')
EDIT 2: Here is another way to do it by converting the json to a dataframe, then filtering by those rows that have the keyword in the name (this is actually a better solution in my opinion)
import requests
import pandas as pd
from pandas.io.json import json_normalize
r = requests.Session()
obj_json_data = r.get("https://www.ssense.com/en-us/men/sneakers.json").json()
products = obj_json_data['products']
products_df = json_normalize(products)
keyword = input('Enter a keyword: ')
products_found = []
results = pd.DataFrame()
results = products_df[products_df['name'].str.contains(keyword, case = False)]
#print (results[['name', 'id', 'sku', 'url']])
products_found = list(results['name'])
if products_found == []:
print ('Nothing found')
else:
print ('Found: '+ str(products_found))

Categories