i am new to python as a matter of fact, this is my first python project. I am using ebaysdk to search for electronics on ebay and i want it to return multiple results because my app is for comparing prices but it returns only one result.
Someone please help me to make the code return multiple results.
Here is my code snippet.
#app.route('/ebay_page_post', methods=['GET', 'POST'])
def ebay_page_post():
if request.method == 'POST':
#Get json format of the text sent by Ajax
search = request.json['search']
try:
#ebaysdk code starts here
api = finding(appid='JohnOkek-hybridse-PRD-5c2330105-9bbb62f2', config_file = None)
api_request = {'keywords':search, 'outputSelector': 'SellerInfo', 'categoryId': '293'}
response = api.execute('findItemsAdvanced', api_request)
soup = BeautifulSoup(response.content, 'lxml')
totalentries = int(soup.find('totalentries').text)
items = soup.find_all('item')
for item in items:
cat = item.categoryname.string.lower()
title = item.title.string.lower().strip()
price = int(round(float(item.currentprice.string)))
url = item.viewitemurl.string.lower()
seller = item.sellerusername.text.lower()
listingtype = item.listingtype.string.lower()
condition = item.conditiondisplayname.string.lower()
print ('____________________________________________________________')
#return json format of the result for Ajax processing
return jsonify(cat + '|' + title + '|' + str(price) + '|' + url + '|' + seller + '|' + listingtype + '|' + condition)
except ConnectionError as e:
return jsonify(e)
Based on the code you provided, added the key value pair collection example you could use :
#app.route('/ebay_page_post', methods=['GET', 'POST'])
def ebay_page_post():
if request.method == 'POST':
#Get json format of the text sent by Ajax
search = request.json['search']
try:
#ebaysdk code starts here
api = finding(appid='JohnOkek-hybridse-PRD-5c2330105-9bbb62f2', config_file = None)
api_request = {'keywords':search, 'outputSelector': 'SellerInfo', 'categoryId': '293'}
response = api.execute('findItemsAdvanced', api_request)
soup = BeautifulSoup(response.content, 'lxml')
totalentries = int(soup.find('totalentries').text)
items = soup.find_all('item')
# This will be returned
itemsFound = {}
# This index will be incremented
# each time an item is added
index = 0
for item in items:
cat = item.categoryname.string.lower()
title = item.title.string.lower().strip()
price = int(round(float(item.currentprice.string)))
url = item.viewitemurl.string.lower()
seller = item.sellerusername.text.lower()
listingtype = item.listingtype.string.lower()
condition = item.conditiondisplayname.string.lower()
# Adding the item found in the collection
# index is the key and the item json is the value
itemsFound[index] = jsonify(cat + '|' + title + '|' + str(price) + '|' + url + '|' + seller + '|' + listingtype + '|' + condition)
# Increment the index for the next items key
index++
for key in itemsFound:
print key, ':', itemsFound[key
# return itemsFound
except ConnectionError as e:
return jsonify(e)
Once the first item is found, add it to the collection. After your for loop finishes, then return the collection.
Right now you are returning (breaking the iteration) once you have found the first
I was able to solve the problem.
Click here to see how i did it
Thanks to every contributor, i am most grateful to you all.
Related
I have the following json that I open to extract the content and retrieve the id that I store in a list.
f = open('content/dataProcessing_2022_02_23.json')
Item = json.load(f)['results']
df_Item = pd.DataFrame (Item, columns = ['id'])
List_Item = df_Item[['id'][0]].values.tolist()
List_Item
Then I create the dictionary where I am going to store the content I will generate in my following script
structureDictItem = {
"Item":[
"dataProcessingItem"
]
}
Here is my script:
def writeContentToFile(mode, customername, workspacename, category, endpoint, jsonContent):
path = os.path.join(os.getcwd(), customername, workspacename, category)
Path(path).mkdir(parents=True, exist_ok=True)
index = 1
while os.path.exists(path + "/" + (endpoint+ f'_{date}' if index == 1 else endpoint + f'_({index})_{date}') + '.json'): index += 1
with open(path + "/" + (endpoint+ f'_{date}' if index == 1 else endpoint + f'_({index})_{date}') + '.json', mode, encoding='utf-8') as f:
json.dump(jsonContent, f, ensure_ascii=False, indent=4)
f.close()
for categoryItem in structureDictItem:
for endpointItem in structureDictItem[categoryItem]:
endpointFilenameItem = endpointItem
url = DataGalaxy_url + endpointFilenameItem
params = {"versionId":Workspace_id,
"includeAccessData":"true",
"includeAttributes":"true",
"includeLinks":"true",
"limit":5000
}
jsonResponse = requests.get(url, params=params, headers={"Authorization":accessToken}).json()
writeContentToFile('a', customername, workspacename, categoryItem, endpointFilenameItem, jsonResponse)
try:
for item in List_Item:
params["dataProcessingId"] = item
jsonResponse = requests.get(url=url, params = params, headers={"Authorization":accessToken}).json()['results']
writeContentToFile('a', customername, workspacename, categoryItem, endpointFilenameItem, jsonResponse)
except:
print(endpointItem)
next
However the following result I get is not really the final result I am expecting. Indeed I wish to have all the content in the same JSON. I understand why I am getting this output it is because I have 17 ID so its generating 17 different JSON. I would like help to see how I am able to generate into a single JSON. Can someone give me a hint on it or have an idea what I need to add to my script ?
dataProcessingItem_(10)_2022_02_23.json
dataProcessingItem_(11)_2022_02_23.json
dataProcessingItem_(12)_2022_02_23.json
dataProcessingItem_(13)_2022_02_23.json
dataProcessingItem_(14)_2022_02_23.json
dataProcessingItem_(15)_2022_02_23.json
dataProcessingItem_(16)_2022_02_23.json
dataProcessingItem_(17)_2022_02_23.json
dataProcessingItem_(18)_2022_02_23.json
dataProcessingItem_(2)_2022_02_23.json
dataProcessingItem_(3)_2022_02_23.json
dataProcessingItem_(4)_2022_02_23.json
dataProcessingItem_(5)_2022_02_23.json
dataProcessingItem_(6)_2022_02_23.json
dataProcessingItem_(7)_2022_02_23.json
dataProcessingItem_(8)_2022_02_23.json
dataProcessingItem_(9)_2022_02_23.json
Desired output :
dataProcessingItem_2022_02_23.json
I have a string https://www.exampleurl.com/
How would I insert a word in the middle of a string so it could look like this: https://www.subdomain.exampleulr.com/
I know I can insert the word if I did this:
url = 'https://www.exampleurl.com/'
url[:12] + 'subdomain'
It prints me https://www.subdomain, but I can't figure out how to print the rest of the string dynamically so it would adjust to the subdomain that is being appended to the string.
My goal is for the end result to look like the following https://www.subdomain.exampleurl.com/
url = 'https://www.exampleurl.com/'
content = url.split("www.")
url = content[0] + "www." + "subdomain." + content[1]
url = 'https://www.exampleurl.com/'
text = url.split(".")
url = text[0] + '.subdomain.' + text[1] + '.' + text[2]
Final output : https://www.subdomain.exampleurl.com/
Better split on the first .:
l = url.split('.', 1)
l[0] + '.subdomain.' + l[1]
## OR if subdomain is a variable:
f'{l[0]}.{subdomain}.{l[1]}'
output: 'https://www.subdomain.exampleurl.com/'
Using replace (once)
url = 'https://www.exampleurl.com/'
url = url.replace(".", ".subdomain.", 1) # only replaces first "." to
# get desured result
I am trying to scrape pickels.com.au.
I am trying to update the pickels_dataset.csv file if the link is the same and if the price is not the same them I am removing the list and inserting the new row to the CSV file, but it doesn't remove the old entry from the CSV file.
What would be the best way to remove and update the row in the CSV file.
Below is my code...
import requests
from scrapy.selector import Selector
import csv
import re
from tqdm import tqdm
from time import sleep
with open('pickels_dataset.csv', 'a+', newline='', encoding='utf-8') as auction_csv_file:
auction_csv_writer = csv.writer(auction_csv_file)
live_auctions_api = 'https://www.pickles.com.au/PWR-Web/services/api/sales/future'
api_request = requests.get(url=live_auctions_api)
for auctions in api_request.json():
auction_link = auctions.get('viewSaleListingLink')
if 'cars/item/search/-/listing/listSaleItems/' in auction_link:
auction_request = requests.get(url=auction_link)
response = Selector(text=auction_request.text)
sales_id_re = response.xpath('//script[contains(text(), "Product_Type_Sequence")]/text() | //script[contains(text(), "lot_number_suffix_sequence")]/text()').get()
sales_id = re.findall(r'"Product_Type_Sequence";var n="(.*?)"', sales_id_re) or re.findall(r'"lot_number_suffix_sequence";var n="(.*?)"', sales_id_re)
if sales_id == []:
continue
auction_sale_link = f'https://www.pickles.com.au/v4/caradvert/saleid-{sales_id[0]}-public?count=true&inav=Car%7Cbc%7Cha%7Cu&q=(And.ProductType.Vehicles._.Year.range(2010..2021).)&sr=%7Clot_number_suffix_sequence%7C0%7C30'
auction_sale_link_requests = requests.get(url=auction_sale_link)
auctions_data = auction_sale_link_requests.json().get('SearchResults')
if auctions_data == []:
print("NO RESULTS")
for auction_data in auctions_data:
if int(auction_data.get('MinimumBid')) > 0:
ids = auction_data.get('TargetId')
main_title = auction_data.get('Title')
short_title = str(auction_data.get('Year')) + ' ' + str(auction_data.get('Make')) + ' ' + str(auction_data.get('Model'))
make = auction_data.get('M ake')
model = auction_data.get('Model')
variant = auction_data.get('Series')
transmission = auction_data.get('Transmission')
odometer = auction_data.get('Odometer')
state = auction_data.get('Location').get('State')
sale_price = auction_data.get('MinimumBid')
link_path = main_title.replace(' ', '-').replace('/', '-').replace(',', '-') + '/' + str(ids)
link = f'https://www.pickles.com.au/cars/item/-/details/{link_path}'
sale_date = auction_data.get('SaleEndString')
auction_values = [
main_title, short_title, make,
model, variant, transmission, odometer,
state, "${:,.2f}".format(sale_price).strip() ,
link, sale_date
]
with open('pickels_dataset.csv', 'r+') as csv_read:
auction_reader = list(csv.reader(csv_read))
for each in auction_reader:
if link in each:
each_link, each_price = each[9], each[0]
if (link == each_link) and (sale_price != each_price):
auction_reader.clear()
print('New list found, old list deleted')
auction_csv_writer.writerow(auction_values)
print('New value added')
continue
elif (link == each[9]) and (sale_price == each[0]):
print('Same result already exist in the file')
continue
else:
auction_csv_writer.writerow(auction_values)
print('Unique result found and added.')
break
Your current script is opening your auction CSV file for appending, and then whilst it is still open, attempting to open it again for reading. This is probably why it is not updating as expected.
A better approach would be to first read the entire contents of your existing saved auction file into a dictionary. The key could be the link which would then make it easy to determine if you have already seen an existing auction.
Next scrape the current auctions and update the saved_auctions dictionary as needed.
Finally at the end, write the contents of saved_auctions back to the CSV file.
For example:
import requests
from scrapy.selector import Selector
import csv
import re
auction_filename = 'pickels_dataset.csv'
# Load existing auctions into a dictionary with link as key
saved_auctions = {}
with open(auction_filename, newline='', encoding='utf-8') as f_auction_file:
for row in csv.reader(f_auction_file):
saved_auctions[row[9]] = row # dictionary key is link
live_auctions_api = 'https://www.pickles.com.au/PWR-Web/services/api/sales/future'
api_request = requests.get(url=live_auctions_api)
for auctions in api_request.json():
auction_link = auctions.get('viewSaleListingLink')
if 'cars/item/search/-/listing/listSaleItems/' in auction_link:
auction_request = requests.get(url=auction_link)
response = Selector(text=auction_request.text)
sales_id_re = response.xpath('//script[contains(text(), "Product_Type_Sequence")]/text() | //script[contains(text(), "lot_number_suffix_sequence")]/text()').get()
sales_id = re.findall(r'"Product_Type_Sequence";var n="(.*?)"', sales_id_re) or re.findall(r'"lot_number_suffix_sequence";var n="(.*?)"', sales_id_re)
if sales_id == []:
continue
auction_sale_link = f'https://www.pickles.com.au/v4/caradvert/saleid-{sales_id[0]}-public?count=true&inav=Car%7Cbc%7Cha%7Cu&q=(And.ProductType.Vehicles._.Year.range(2010..2021).)&sr=%7Clot_number_suffix_sequence%7C0%7C30'
auction_sale_link_requests = requests.get(url=auction_sale_link)
auctions_data = auction_sale_link_requests.json().get('SearchResults')
if auctions_data == []:
print("NO RESULTS")
for auction_data in auctions_data:
if int(auction_data.get('MinimumBid')) > 0:
ids = auction_data.get('TargetId')
main_title = auction_data.get('Title')
short_title = str(auction_data.get('Year')) + ' ' + str(auction_data.get('Make')) + ' ' + str(auction_data.get('Model'))
make = auction_data.get('Make')
model = auction_data.get('Model')
variant = auction_data.get('Series')
transmission = auction_data.get('Transmission')
odometer = auction_data.get('Odometer')
state = auction_data.get('Location').get('State')
minimum_bid = auction_data.get('MinimumBid')
sale_price = "${:,.2f}".format(minimum_bid).strip()
link_path = main_title.replace(' ', '-').replace('/', '-').replace(',', '-') + '/' + str(ids)
link = f'https://www.pickles.com.au/cars/item/-/details/{link_path}'
sale_date = auction_data.get('SaleEndString')
auction_values = [
main_title, short_title, make,
model, variant, transmission, odometer,
state, sale_price,
link, sale_date
]
if link in saved_auctions:
if saved_auctions[link][8] == sale_price:
print('Same result already exists in the file')
else:
print('New value updated')
saved_auctions[link] = auction_values # Updated the entry
else:
print('New auction added')
saved_auctions[link] = auction_values
# Update the saved auction file
with open(auction_filename, 'w', newline='', encoding='utf-8') as f_auction_file:
csv_auction_file = csv.writer(f_auction_file)
csv_auction_file.writerows(saved_auctions.values())
If you want to also remove auctions that are no longer active, then it would probably be best to simply ignore the saved file and just write all current entries as is.
i'm an absolut beginner, but with youtube and some websites i've written a crawler for the german website Immoscout24.
My problem: the crawler works fine, if all attributes are excisting. But if one site hasn't any attribute (e.g. "pre" in "beschreibung_container"), i'll get "NameError: name 'beschreibung' is not defined". How can i do, that it writes nothing ("") into my result list (csv), if the attribute not exists ans continues crawling?
for number in numbers:
my_url = "https://www.immobilienscout24.de/expose/%s#/" %number
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")
containers = page_soup.find_all("div", {"id":"is24-content"})
filename = "results_"+current_datetime+".csv"
f = open(filename, "a")
headers = "Objekt-ID##Titel##Adresse##Merkmale##Kosten##Bausubstanz und Energieausweis##Beschreibung##Ausstattung##Lage\n"
f.write(headers)
for container in containers:
try:
objektid_container = container.find_all("div", {"class":"is24-scoutid__content padding-top-s"})
objektid = objektid_container[0].get_text().strip()
titel_container = container.find_all("h1", {"class":"font-semibold font-xl margin-bottom margin-top-m palm-font-l"})
titel = titel_container[0].get_text().strip()
adresse_container = container.find_all("div", {"class":"address-block"})
adresse = adresse_container[0].get_text().strip()
criteria_container = container.find_all("div", {"class":"criteriagroup criteria-group--two-columns"})
criteria = criteria_container[0].get_text().strip()
preis_container = container.find_all("div", {"class":"grid-item lap-one-half desk-one-half padding-right-s"})
preis = preis_container[0].get_text().strip()
energie_container = container.find_all("div", {"class":"criteriagroup criteria-group--border criteria-group--two-columns criteria-group--spacing"})
energie = energie_container[0].get_text().strip()
beschreibung_container = container.find_all("pre", {"class":"is24qa-objektbeschreibung text-content short-text"})
beschreibung = beschreibung_container[0].get_text().strip()
ausstattung_container = container.find_all("pre", {"class":"is24qa-ausstattung text-content short-text"})
ausstattung = ausstattung_container[0].get_text().strip()
lage_container = container.find_all("pre", {"class":"is24qa-lage text-content short-text"})
lage = lage_container[0].get_text().strip()
except:
print("some mistake")
pass
f.write(objektid + "##" + titel + "##" + adresse + "##" + criteria.replace(" ", ";") + "##" + preis.replace(" ", ";") + "##" + energie.replace(" ", ";") + "##" + beschreibung.replace("\n", " ") + "##" + ausstattung.replace("\n", " ") + "##" + lage.replace("\n", " ") + "\n")
f.close()
EDIT
First problem is solved. Another problem: my result list shows in each column like:
look here
How can i do, that "Objekt-ID" and the other headlines are only in row No. 1?
For each variable, you can simply just do the following
obj = container.find_all("div", {"class":"xxxxx"}) or ""
objid = obj[0].get_text().strip() if obj else ""
The first line will default the value into "" empty string if find_all returns empty list or none. The second also does the same thing but check for the existence of value first then apply the if else condition.
I think you need to encapsulate each variable in try-except block.
E.g:
try:
objektid_container = container.find_all("div", {"class":"is24-scoutid__content padding-top-s"})
objektid = objektid_container[0].get_text().strip()
except:
objektid = ""
Do this for all variables
For Second issue Move your headers outside loop
Remove this code:
filename = "results_"+current_datetime+".csv"
f = open(filename, "a")
headers = "Objekt-ID##Titel##Adresse##Merkmale##Kosten##Bausubstanz und Energieausweis##Beschreibung##Ausstattung##Lage\n"
f.write(headers)
And add it before:
for number in numbers:
I have an interesting behavior happening with my program.
i have the following methods:
def getMarket(self, Currency):
return self.public_api('GetMarket/' + Currency + '_BTC')
def getBalance(self, Currency):
self.api_params.clear()
self.api_params['Currency'] = Currency
return self.private_api('GetBalance')
my_api = buyBot(API_KEY, API_SECRET)
pumpCoin = my_api.getMarket('OSC')
pumpRawRate = pumpCoin['Data']['High']
pumpRawQty = .02
pumpBuyRate = my_api.calculateBuy(pumpRawRate)
pumpQty = float(pumpRawQty)/float(pumpBuyRate)
pumpSellRate = pumpCoin['Data']['Low']
pumpSellCoin = my_api.getBalance('OSC')
pumpSellAmount = pumpSellCoin["Data"]["Total"]
print str(pumpRawRate) + '\n' + str(pumpBuyRate) + '\n' + str(pumpSellRate) + '\n' + str(pumpQty) + '\n' + str(pumpSellAmount)`
From section: pumpCoin = my_api.getMarket('OSC') to pumpSellRate = pumpCoin['Data']['Low'], i have no problems getting the information and working with it.
Problem seems to be starting with line: pumpSellCoin = my_api.getBalance('OSC')
I get the following Error message:
Traceback (most recent call last):
File "C:\XXXXXX.py", line 92, in <module>
pumpSellAmount = pumpSellCoin["Data"]["Total"]
TypeError: string indices must be integers, not str
if i run: print (my_api.getBalance('OSC'), i am able to see all the private API information that is retrieved by that call, however i am not sure why it is giving me a problem when i try to call 1 specific item in the stack.
Let me know if you need any more information on this.
Any help will be greatly appreciated.
I have looked at the other posts and so far i can't seem to figure out the exact cause.
This is the private_api code
def private_api(self, meth):
time.sleep(1)
params = self.api_params
url = self.apisite + meth
nonce = str(int(time.time()))
post_data = json.dumps(params)
hash = hashlib.md5()
hash.update(post_data)
base64hash = base64.b64encode(hash.digest())
sig = self.apikey + "POST" + urllib.quote_plus(url).lower() + nonce + base64hash
hmacsig = base64.b64encode(hmac.new(base64.b64decode(self.apisecret), sig, hashlib.sha256).digest())
hdr = "amx " + self.apikey + ":" + hmacsig + ":" + nonce
headers = { 'Authorization': hdr, 'Content-Type':'application/json; charset=utf-8' }
request = urllib2.Request(url, data=post_data, headers=headers)
return urllib2.urlopen(request).read()
Please add this to your code:
print('pumpSellCoin', type(pumpSellCoin["Data"]), type(pumpSellCoin["Data"]["Total"]))
pumpSellAmount = pumpSellCoin["Data"]["Total"]
This will show you that one of your variables is a list or a string and not a dictionary and you need to access is using a number and not a name like "Data" or "Total"
Try this example:
test = 'abcde'
print(type(test))
print(test[0])
print(test[2:4])
print(test['whatever']) # this results in TypeError: string indices must be integers
if i run the program as follows:
my_api = buyBot(API_KEY, API_SECRET)
pumpCoin = my_api.getMarket('OSC')
pumpRawRate = pumpCoin['Data']['High']
pumpRawQty = .02
pumpBuyRate = my_api.calculateBuy(pumpRawRate)
pumpQty = float(pumpRawQty)/float(pumpBuyRate)
pumpSellRate = pumpCoin['Data']['Low']
pumpSellBal = my_api.getBalance('OSC')
print pumpSellBal
#print('pumpSellBal', type(pumpSellBal["Data"]), type(pumpSellBal["Data"]["Total"]))
#pumpSellAmount = pumpSellBal['Data']['Total']
print str(pumpRawRate) + '\n' + str(pumpBuyRate) + '\n' + str(pumpSellRate) + '\n' + str(pumpQty) #+ '\n' + str(pumpSellAmount)
i get the following results:
{"Success":true,"Error":null,"Data":[{"CurrencyId":235,"Symbol":"OSC","Total":8561.03652012,"Available":0.00000000,"Unconfirmed":0.00000000,"HeldForTrades":8561.03652012,"PendingWithdraw":0.00000000,"Address":null,"Status":"OK","StatusMessage":null,"BaseAddress":null}]}
1.61e-06
2.415e-06
1.25e-06
8281.57349896
So i am definitely able to communicate back and forward, however the issue only seems to be when i try to work with a single piece of information from pumpSellBal = my_api.getBalance('OSC')