I would like to get the json data from for instance https://app.weathercloud.net/d0838117883#current using python requests module.
I tried:
import re
import requests
device='0838117883'
URL='https://app.weathercloud.net'
URL1=URL+'/d'+device
URL2=URL+'/device/stats'
headers={'Content-Type':'text/plain; charset=UTF-8',
'Referer':URL1,
'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/48.0.2564.82 Chrome/48.0.2564.82 Safari/537.36',
'Accept':'application/json, text/javascript,*/*'}
with requests.Session() as s:
#get html from URL1 in order to get the CSRF token
page = s.get(URL1)
CSRF=re.findall('WEATHERCLOUD_CSRF_TOKEN:"(.*)"},',page.text)[0]
#create parameters for URL2, in order to get the json file
params={'code':device,'WEATHERCLOUD_CSRF_TOKEN':CSRF}
page_stats=requests.get(URL2,params=params,headers=headers)
print(page_stats.url)
print(page_stats) #<Response [200]>
print(page_stats.text) #empty
print(page_stats.json()) #error
But the page_stats is empty.
How can I get the stats data from weathercloud?
Inspecting the page with DevTools, you'll find a useful endpoint:
https://app.weathercloud.net/device/stats
You can "replicate" the original web request made by your browser with requests library:
import requests
cookies = {
'PHPSESSID': '************************',
'WEATHERCLOUD_CSRF_TOKEN':'***********************',
'_ga': '**********',
'_gid': '**********',
'__gads': 'ID=**********',
'WeathercloudCookieAgreed': 'true',
'_gat': '1',
'WEATHERCLOUD_RECENT_ED3C8': '*****************',
}
headers = {
'Connection': 'keep-alive',
'sec-ch-ua': '^\\^Google',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'X-Requested-With': 'XMLHttpRequest',
'sec-ch-ua-mobile': '?0',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36',
'sec-ch-ua-platform': '^\\^Windows^\\^',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Referer': 'https://app.weathercloud.net/d0838117883',
'Accept-Language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7,es;q=0.6',
}
params = (
('code', '0838117883'),
('WEATHERCLOUD_CSRF_TOKEN', '****************'),
)
response = requests.get('https://app.weathercloud.net/device/stats', headers=headers, params=params, cookies=cookies)
# Serializing json
json_object = json.loads(response.text)
json Output:
{'last_update': 1632842172,
'bar_current': [1632842172, 1006.2],
'bar_day_max': [1632794772, 1013.4],
'bar_day_min': [1632845772, 1006.2],
'bar_month_max': [1632220572, 1028],
'bar_month_min': [1632715572, 997.3],
'bar_year_max': [1614418512, 1038.1],
'bar_year_min': [1615434432, 988.1],
'wdir_current': [1632842172, 180],
..............}
That's it.
Related
I would like to limit the data they receive to the first 8 links on the website. As shown in the picture, there is no data available beyond the 8th link, as seen in the CSV file. How can I apply this limit so that they only receive data from the first 8 links? The website link is https://www.linkedin.com/learning/search?keywords=data%20science,
JSON API
CSV File
Code part
import requests
import pandas as pd
url = "https://www.linkedin.com/learning-api/searchV2?keywords=data%20science&q=keywords&searchRequestId=RW4AuZRJT22%2BUeXnsZJGQA%3D%3D"
payload={}
headers = {
'authority': 'www.linkedin.com',
'accept': 'application/vnd.linkedin.normalized+json+2.1',
'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8,pt;q=0.7',
'cookie': 'bscookie="v=1&202108281231498ed9b977-a15a-4647-83ff-d0ef12adfbfbAQFdf9p_GSaBPrFkmyztJ8zyOnqVND-D"; li_theme=light; li_theme_set=app; li_sugr=4752e3dd-9232-4bb9-9dbb-b29c1a127f77; bcookie="v=2&9fb3a4d0-1139-4e2b-89ba-e5374eeb9735"; aam_uuid=08800810176251362264578372297522883472; _gcl_au=1.1.240501668.1664707206; li_rm=AQELLfU3ZqmMhAAAAYQ_tPjGK8ONpN3EEUxH1P4M6Czq5fk6EXaEXSzKwoNSXoSZ7KgO5uSTE9iZ30fuhs6ju1rLH1VgXYyRM3nNuiTQEx1k2ca6SR0Hk1d5-NBafeE0zv65QetFY5Yrx2ufzRlfEXUkJJSoO9Z2o7MeuX-3Go7P4dI-m5HQM7VOKLiK_TD-ZWzj_OkdkR75K31QKGq8bxPLa0JpkGUzhDIVGWzl6vqkcl6BJEK2s-keIZjsiH5MZ9sbLXEVOxLg4vD21TTJBNshE6zaiWrSnxx_PEm44eDPqjvXRMVWFeX7VZfIe2KFshWXLRc4SY8hAQINymU; visit=v=1&M; G_ENABLED_IDPS=google; JSESSIONID="ajax:7673827752327651374"; timezone=Asia/Karachi; _guid=0f0d3402-80be-4bef-9baf-18d281f68921; mbox=session^#965dfb20b29e4f2688eedcf643d2e5ab^#1671620169|PC^#965dfb20b29e4f2688eedcf643d2e5ab.38_0^#1687170309; __ssid=db28305b-28da-4f8b-ad3a-54dea10b9eb9; dfpfpt=da2e5dde482a41b09cf7178ba1bcec7e; g_state={"i_l":0}; liap=true; li_at=AQEDATKxuC8DTVh9AAABhaytidQAAAGGZN5q6E0AdHv14xrDnsngkfFuMyIIbGYccHR15UrPQ8rb3qpS0_-mpCFm9pXQkoNYGdk87LiGVIqiw4oXuJ9tqflCEOev71_L83JoJ-fkbOfZwdG0RICtuIHn; AnalyticsSyncHistory=AQKUIualgILMBgAAAYZHP2t3mvejt25dMqUMRmrpyhaQMe1cucNiAMliFNRUf4cu4aKnZ1z1kQ_FGeqFr2m04Q; lms_ads=AQEr9ksNAL4kugAAAYZHP2z8QK26stPkoXe2TgJZW3Fnrl4dCzbC2DtithS1-zp5Ve85QwxzRhPvP9okaC0kbu40FYX7EqIk; lms_analytics=AQEr9ksNAL4kugAAAYZHP2z8QK26stPkoXe2TgJZW3Fnrl4dCzbC2DtithS1-zp5Ve85QwxzRhPvP9okaC0kbu40FYX7EqIk; fid=AQGWcXnO5AffyAAAAYZRr6tph6cekZ9ZD66e1xdHhumlVvJ3cKYzZLwfK-I3nJyeRyLQs3LRnowKjQ; lil-lang=en_US; lang=v=2&lang=en-us; _dd_l=1; _dd=ff90da3c-aa07-4491-9106-b226eba1c09c; AMCVS_14215E3D5995C57C0A495C55%40AdobeOrg=1; AMCV_14215E3D5995C57C0A495C55%40AdobeOrg=-637568504%7CMCIDTS%7C19403%7CMCMID%7C09349215808923073694559483836331055195%7CMCAAMLH-1677084815%7C3%7CMCAAMB-1677084815%7CRKhpRz8krg2tLO6pguXWp5olkAcUniQYPHaMWWgdJ3xzPWQmdj0y%7CMCOPTOUT-1676487215s%7CNONE%7CMCCIDH%7C1076847823%7CvVersion%7C5.1.1; s_cc=true; UserMatchHistory=AQJJ3j-efkcQeQAAAYZWAETxBE44VVBGzo_i-gr5nEGPOK85mS3kDScLdGC24_GeNx-GEeCNDrPOjkQde_MGT4iPc7vJV4sT_nPL8Tv4WMTLarIEliLYPkCvou8zFlb3dFNkbXZjVV_KTVeDvUSJ5WJTeStLNXmzV3_EV5mI9dbSRpoTFlJ94vi_zxcCmnLTaGAYGQAdymMv4SbaMgtnt3QcY8Zj9-hnwxdsIEmJloq47_QTP7sfl-SG-vw8xvhl9KYb0ZPKCnQ6ioJhu3G4cFpKJiSUbULkYMADSo0; lidc="b=VB23:s=V:r=V:a=V:p=V:g=4060:u=105:x=1:i=1676480108:t=1676566269:v=2:sig=AQEz2UktgVcQuJwMoVRgKgnUuKtCEm9C"; s_sq=%5B%5BB%5D%5D; gpv_pn=www.linkedin.com%2Flearning%2Fsearch; s_ips=615; s_plt=7.03; s_pltp=www.linkedin.com%2Flearning%2Fsearch; s_tp=6116; s_ppv=www.linkedin.com%2Flearning%2Fsearch%2C47%2C10%2C2859%2C7%2C18; s_tslv=1676480356388',
'csrf-token': 'ajax:7673827752327651374',
'referer': 'https://www.linkedin.com/learning/search?keywords=data%20science',
'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Google Chrome";v="110"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
'x-li-lang': 'en_US',
'x-li-page-instance': 'urn:li:page:d_learning_search;gNOg2MJoSqWv2XNAh4ukiQ==',
'x-li-pem-metadata': 'Learning Exp - Search=search',
'x-li-track': '{"clientVersion":"1.1.2236","mpVersion":"1.1.2236","osName":"web","timezoneOffset":5,"timezone":"Asia/Karachi","mpName":"learning-web","displayDensity":1,"displayWidth":1366,"displayHeight":768}',
'x-lil-intl-library': 'en_US',
'x-restli-protocol-version': '2.0.0'
}
res = requests.request("GET", url, headers=headers, data=payload).json()
product=[]
items=res['included']
for item in items:
try:
title=item['headline']['title']['text']
except:
title=''
try:
url='https://www.linkedin.com/learning/'+item['slug']
except:
url=''
try:
rating=item['rating']['ratingCount']
except:
rating=''
wev={
'title':title,
'instructor':name,
'review':rating,
'url':url
}
product.append(wev)
df=pd.DataFrame(product)
df.to_csv('learning.csv')
To filter the rows that contain empty columns, specifically those with an empty title column, you can simply add the following code:
df=pd.DataFrame(product)
filter = df["title"] != ""
dfNew = df[filter]
dfNew.to_csv('learning.csv')
The entire code will be:
import requests
import pandas as pd
url = "https://www.linkedin.com/learning-api/searchV2?keywords=data%20science&q=keywords&searchRequestId=RW4AuZRJT22%2BUeXnsZJGQA%3D%3D"
payload={}
headers = {
'authority': 'www.linkedin.com',
'accept': 'application/vnd.linkedin.normalized+json+2.1',
'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8,pt;q=0.7',
'cookie': 'bscookie="v=1&202108281231498ed9b977-a15a-4647-83ff-d0ef12adfbfbAQFdf9p_GSaBPrFkmyztJ8zyOnqVND-D"; li_theme=light; li_theme_set=app; li_sugr=4752e3dd-9232-4bb9-9dbb-b29c1a127f77; bcookie="v=2&9fb3a4d0-1139-4e2b-89ba-e5374eeb9735"; aam_uuid=08800810176251362264578372297522883472; _gcl_au=1.1.240501668.1664707206; li_rm=AQELLfU3ZqmMhAAAAYQ_tPjGK8ONpN3EEUxH1P4M6Czq5fk6EXaEXSzKwoNSXoSZ7KgO5uSTE9iZ30fuhs6ju1rLH1VgXYyRM3nNuiTQEx1k2ca6SR0Hk1d5-NBafeE0zv65QetFY5Yrx2ufzRlfEXUkJJSoO9Z2o7MeuX-3Go7P4dI-m5HQM7VOKLiK_TD-ZWzj_OkdkR75K31QKGq8bxPLa0JpkGUzhDIVGWzl6vqkcl6BJEK2s-keIZjsiH5MZ9sbLXEVOxLg4vD21TTJBNshE6zaiWrSnxx_PEm44eDPqjvXRMVWFeX7VZfIe2KFshWXLRc4SY8hAQINymU; visit=v=1&M; G_ENABLED_IDPS=google; JSESSIONID="ajax:7673827752327651374"; timezone=Asia/Karachi; _guid=0f0d3402-80be-4bef-9baf-18d281f68921; mbox=session^#965dfb20b29e4f2688eedcf643d2e5ab^#1671620169|PC^#965dfb20b29e4f2688eedcf643d2e5ab.38_0^#1687170309; __ssid=db28305b-28da-4f8b-ad3a-54dea10b9eb9; dfpfpt=da2e5dde482a41b09cf7178ba1bcec7e; g_state={"i_l":0}; liap=true; li_at=AQEDATKxuC8DTVh9AAABhaytidQAAAGGZN5q6E0AdHv14xrDnsngkfFuMyIIbGYccHR15UrPQ8rb3qpS0_-mpCFm9pXQkoNYGdk87LiGVIqiw4oXuJ9tqflCEOev71_L83JoJ-fkbOfZwdG0RICtuIHn; AnalyticsSyncHistory=AQKUIualgILMBgAAAYZHP2t3mvejt25dMqUMRmrpyhaQMe1cucNiAMliFNRUf4cu4aKnZ1z1kQ_FGeqFr2m04Q; lms_ads=AQEr9ksNAL4kugAAAYZHP2z8QK26stPkoXe2TgJZW3Fnrl4dCzbC2DtithS1-zp5Ve85QwxzRhPvP9okaC0kbu40FYX7EqIk; lms_analytics=AQEr9ksNAL4kugAAAYZHP2z8QK26stPkoXe2TgJZW3Fnrl4dCzbC2DtithS1-zp5Ve85QwxzRhPvP9okaC0kbu40FYX7EqIk; fid=AQGWcXnO5AffyAAAAYZRr6tph6cekZ9ZD66e1xdHhumlVvJ3cKYzZLwfK-I3nJyeRyLQs3LRnowKjQ; lil-lang=en_US; lang=v=2&lang=en-us; _dd_l=1; _dd=ff90da3c-aa07-4491-9106-b226eba1c09c; AMCVS_14215E3D5995C57C0A495C55%40AdobeOrg=1; AMCV_14215E3D5995C57C0A495C55%40AdobeOrg=-637568504%7CMCIDTS%7C19403%7CMCMID%7C09349215808923073694559483836331055195%7CMCAAMLH-1677084815%7C3%7CMCAAMB-1677084815%7CRKhpRz8krg2tLO6pguXWp5olkAcUniQYPHaMWWgdJ3xzPWQmdj0y%7CMCOPTOUT-1676487215s%7CNONE%7CMCCIDH%7C1076847823%7CvVersion%7C5.1.1; s_cc=true; UserMatchHistory=AQJJ3j-efkcQeQAAAYZWAETxBE44VVBGzo_i-gr5nEGPOK85mS3kDScLdGC24_GeNx-GEeCNDrPOjkQde_MGT4iPc7vJV4sT_nPL8Tv4WMTLarIEliLYPkCvou8zFlb3dFNkbXZjVV_KTVeDvUSJ5WJTeStLNXmzV3_EV5mI9dbSRpoTFlJ94vi_zxcCmnLTaGAYGQAdymMv4SbaMgtnt3QcY8Zj9-hnwxdsIEmJloq47_QTP7sfl-SG-vw8xvhl9KYb0ZPKCnQ6ioJhu3G4cFpKJiSUbULkYMADSo0; lidc="b=VB23:s=V:r=V:a=V:p=V:g=4060:u=105:x=1:i=1676480108:t=1676566269:v=2:sig=AQEz2UktgVcQuJwMoVRgKgnUuKtCEm9C"; s_sq=%5B%5BB%5D%5D; gpv_pn=www.linkedin.com%2Flearning%2Fsearch; s_ips=615; s_plt=7.03; s_pltp=www.linkedin.com%2Flearning%2Fsearch; s_tp=6116; s_ppv=www.linkedin.com%2Flearning%2Fsearch%2C47%2C10%2C2859%2C7%2C18; s_tslv=1676480356388',
'csrf-token': 'ajax:7673827752327651374',
'referer': 'https://www.linkedin.com/learning/search?keywords=data%20science',
'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Google Chrome";v="110"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
'x-li-lang': 'en_US',
'x-li-page-instance': 'urn:li:page:d_learning_search;gNOg2MJoSqWv2XNAh4ukiQ==',
'x-li-pem-metadata': 'Learning Exp - Search=search',
'x-li-track': '{"clientVersion":"1.1.2236","mpVersion":"1.1.2236","osName":"web","timezoneOffset":5,"timezone":"Asia/Karachi","mpName":"learning-web","displayDensity":1,"displayWidth":1366,"displayHeight":768}',
'x-lil-intl-library': 'en_US',
'x-restli-protocol-version': '2.0.0'
}
res = requests.request("GET", url, headers=headers, data=payload).json()
product=[]
items=res['included']
for item in items:
try:
title=item['headline']['title']['text']
except:
title=''
try:
url='https://www.linkedin.com/learning/'+item['slug']
except:
url=''
try:
rating=item['rating']['ratingCount']
except:
rating=''
name = item.get("description", {}).get("text", "")
wev={
'title':title,
'instructor':name,
'review':rating,
'url':url
}
product.append(wev)
df=pd.DataFrame(product)
filter = df["title"] != ""
dfNew = df[filter]
dfNew.to_csv('learning.csv')
However, this is solution works because the web is structured. For complex/irregular websites I prefer to use scrapy as we use in my job.
i try to use the api-endpoint from this site:
https://horoguides.com/hk/watch_finder
I searched for the api-endpoint in the network-tab and try to rebuild this api-access with the following code:
import requests
url = "https://horoguides.com/hk/ajaj/watch/searchWatches"
payload = {
"addLimit": "LIMIT 0, 20",
"addOrder": "ORDER BY establish DESC",
}
headers = {
'Accept': "application/json, text/javascript, */*; q=0.01",
'Accept-Language': "de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7",
'Connection': "keep-alive",
'Content-Type': "multipart/form-data; boundary=---011000010111000001101001",
'Cookie': "PHPSESSID=siob5k70qu4gh8bkio07qtocv3; _gid=GA1.2.40295814.1663575664; __gads=ID=2fc582d62ff2a986-223e4e8c26ce00a9:T=1663575664:RT=1663575664:S=ALNI_MaTX_1U4CELXasmH0td3MvCRQ5S5Q; _gat_UA-90322481-1=1; _gat_gtag_UA_90322481_1=1; _ga_6Z9E9PKG02=GS1.1.1663594500.3.1.1663594710.0.0.0; _ga=GA1.1.699639573.1663575664",
'Origin': "https://horoguides.com",
'Referer': "https://horoguides.com/hk/watch_finder",
'Sec-Fetch-Dest': "empty",
'Sec-Fetch-Mode': "cors",
'Sec-Fetch-Site': "same-origin",
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
'X-Requested-With': "XMLHttpRequest",
'sec-ch-ua': "^\^Chromium^^;v=^\^104^^, ^\^"
}
resp = requests.request("POST", url, json=payload, headers=headers)
print(resp.status_code)
respJSON = resp.json()
print(respJSON)
But as response i only get:
200
{'status': 'invalid'}
Why is this reponse from the api-endpoint not working?
I also tried to run this in Insomnia and get the same result.
You need to fix the payload. The following code works:
import requests
url = "https://horoguides.com/hk/ajaj/watch/searchWatches"
payload = {
"addLimit": "LIMIT 0, 20",
"addOrder": "ORDER BY establish DESC",
'lang': 'hk',
'ajaxID': 'searchWatches'
}
headers = {
'Accept': "application/json, text/javascript, */*; q=0.01",
'Accept-Language': "de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7",
'Connection': "keep-alive",
'Origin': "https://horoguides.com",
'Referer': "https://horoguides.com/hk/watch_finder",
'Sec-Fetch-Dest': "empty",
'Sec-Fetch-Mode': "cors",
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
'X-Requested-With': "XMLHttpRequest"
}
resp = requests.request("POST", url, data=payload, headers=headers)
print(resp.status_code)
respJSON = resp.json()
print(respJSON)
Result in terminal:
200
{'act': 'watch/searchWatches', 'status': 'success', 'getData': {'a5124': {'id': '5124', 'name': '116610-LN-0001', 'url_name': '116610-ln-97200', 'establish': '2014', 'w_brand_id': '39', 'w_brand_abbr': '', 'w_brand_name': 'ROLEX', 'w_brand_urlname': 'rolex', 'w_brand_localname': '勞力士', 'hype_default_currency': 'NT$', 'w_series_name': 'SUBMARINER', 'w_series_urlname':[....]
For requests documentation, see https://requests.readthedocs.io/en/latest/
I am trying to do some scraping from websites using GET and POST methods, but now I am facing a new challenge.
I am trying to get data from a credit simulator, I found this portuguese site (https://www.bancomontepio.pt/particulares/credito/pessoal/credito-pessoal-online).
As far as I know, I need to use POST method, but I have to specify the data (the Amount value, the Term...). I usually do it by creating a dictionary structure but that is not working.
I'm kinda lost to be fair, maybe the problem is on the header...
Here is my code:
import requests
import warnings
warnings.filterwarnings("ignore")
term=24
amount=5000
url = 'https://simuladores.bancomontepio.pt/ITSCredit.External/Calculator/ITSCredit.Calculator.UI.External/gateway/Calculator/api/Calculator/Calculate?hash=-1359629931'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36',
'Accept-Language': 'pt-PT,pt;q=0.9,en-US;q=0.8,en;q=0.7'}
payload = {'Amount': amount,'Term': term,'ProductCode':"26B1129900X"}
response = requests.post(url, headers=headers, data=payload, verify=False).json()
If i take off the .json(), I get the error Response [410].
The goal is to get the TAN or TAEG that change when term ("Prazo") or amount ("Montante") values change.
Any ideias?
[EDIT]
headers = {'Accept': 'application/json, text/plain, */*' ,
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'pt-PT,pt;q=0.9,en-US;q=0.8,en;q=0.7',
'Connection': 'keep-alive',
'Content-Length': '957',
'content-type': 'text/plain',
'Cookie': '_gcl_au=1.1.911606195.1646064658; OptanonAlertBoxClosed=2022-02-28T16:45:11.586Z; _ga=GA1.2.1147601977.1646064657; _ga_8WVEJF7X11=GS1.1.1646305654.3.1.1646309750.0; _ga_63QCVBV1V3=GS1.1.1646305679.1.1.1646309750.0; ASP.NET_SessionId=wlfbf2dx4oatlio0vl1ftinq; _gid=GA1.2.449121330.1646650093; calc-cookie=; OptanonConsent=isGpcEnabled=0&datestamp=Mon+Mar+07+2022+11%3A38%3A48+GMT%2B0000+(Hora+padr%C3%A3o+da+Europa+Ocidental)&version=6.30.0&isIABGlobal=false&consentId=6caccc97-6af1-4b55-9049-5694835d9f7a&interactionCount=2&landingPath=NotLandingPage&groups=C0001%3A1%2CC0002%3A1%2CC0003%3A1%2CC0004%3A1&hosts=H10%3A1%2CH20%3A1%2CH7%3A1%2CH8%3A1%2CH23%3A1%2CH11%3A1%2CH24%3A1%2CH13%3A1%2CH25%3A1&genVendors=&geolocation=ES%3B&AwaitingReconsent=false; _gali=slider-container; _gat_UA-186811106-6=1',
'Host': 'simuladores.bancomontepio.pt',
'Origin': 'https://simuladores.bancomontepio.pt',
'Referer': 'https://simuladores.bancomontepio.pt/',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="90", "Google Chrome";v="90"',
'sec-ch-ua-mobile': '?0',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'}headers = {'Accept': 'application/json, text/plain, */*' ,
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'pt-PT,pt;q=0.9,en-US;q=0.8,en;q=0.7',
'Connection': 'keep-alive',
'Content-Length': '957',
'content-type': 'text/plain',
'Cookie': '_gcl_au=1.1.911606195.1646064658; OptanonAlertBoxClosed=2022-02-28T16:45:11.586Z; _ga=GA1.2.1147601977.1646064657; _ga_8WVEJF7X11=GS1.1.1646305654.3.1.1646309750.0; _ga_63QCVBV1V3=GS1.1.1646305679.1.1.1646309750.0; ASP.NET_SessionId=wlfbf2dx4oatlio0vl1ftinq; _gid=GA1.2.449121330.1646650093; calc-cookie=; OptanonConsent=isGpcEnabled=0&datestamp=Mon+Mar+07+2022+11%3A38%3A48+GMT%2B0000+(Hora+padr%C3%A3o+da+Europa+Ocidental)&version=6.30.0&isIABGlobal=false&consentId=6caccc97-6af1-4b55-9049-5694835d9f7a&interactionCount=2&landingPath=NotLandingPage&groups=C0001%3A1%2CC0002%3A1%2CC0003%3A1%2CC0004%3A1&hosts=H10%3A1%2CH20%3A1%2CH7%3A1%2CH8%3A1%2CH23%3A1%2CH11%3A1%2CH24%3A1%2CH13%3A1%2CH25%3A1&genVendors=&geolocation=ES%3B&AwaitingReconsent=false; _gali=slider-container; _gat_UA-186811106-6=1',
'Host': 'simuladores.bancomontepio.pt',
'Origin': 'https://simuladores.bancomontepio.pt',
'Referer': 'https://simuladores.bancomontepio.pt/',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="90", "Google Chrome";v="90"',
'sec-ch-ua-mobile': '?0',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'}
payload = {'CCRDCalculateInput':{},'MultifunctionsCalculateInput':{},'Device':{'Browser':'chrome','BrowserVersion':'90.0.4430.212','Device':'Desktop','Os':'windows','OsVersion':'windows-10','UserAgent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'},'IsCustomer':'true','Amount':7500,'Term':60,'ConditionCode':'26B1129900X-01-I-129900-F','CreditDestinationCode':'129900','ProductCode':'26B1129900X','FinancedExpenses':'false','FrequencyTrancheCode':'null','GoalCode':'C006','GoalDescription':'PROJETOS PESSOAIS','FrequencyTypeCode':'M','FamilyCode':'CP','Proponents':[{'Position':1,
'Birthday':'1992-03-07T13:03:30.000Z','State':'true','EntityType':{'ID':1,'CompanyID':1,'Code':'P','Description':'Proponente','Value':'null','ValueString':'null','State':'true','Imported':'null'},'ExpenseCodes':['009']}],'Counterparts':0,'OptionalExpenses':[{'Code':'009','Factor':1}],'ResidualValue':0,'InterestOnly':0,'Deferment':0}
Now I'm getting a empty json()... Response 200 but I got this structure:
{'Status': 'Unknown',
'Error': {'VisibleToHuman': False, 'Code': '0', 'Message': ''},
'Result': None}
As far as I know, the status should be "OK" to get some info on the Result.
Cheers
Looks like you need to expand the payload to include more (all) of the parameters (including the cookies, specifically the ASP.NET_SessionId).
import requests
import warnings
warnings.filterwarnings("ignore")
term=24
amount=5000
url = 'https://simuladores.bancomontepio.pt/ITSCredit.External/Calculator/ITSCredit.Calculator.UI.External/gateway/Calculator/api/Calculator/Calculate?hash=-1359629931'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36',
'Accept-Language': 'pt-PT,pt;q=0.9,en-US;q=0.8,en;q=0.7',
'Cookie':'ASP.NET_SessionId=fhkyn1vn5knlw3uhdnh50nii;'}
payload = {
"CCRDCalculateInput":{},
"MultifunctionsCalculateInput":{},
"Device":{
"Browser":"chrome",
"BrowserVersion":"96.0.4664.110",
"Device":"Desktop",
"Os":"windows",
"OsVersion":"windows-10",
"UserAgent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"},
"IsCustomer":'true',
"Amount":amount,
"Term":term,
"ConditionCode":"26B1129900X-01-I-129900-F",
"CreditDestinationCode":"129900",
"ProductCode":"26B1129900X",
"FinancedExpenses":'false',
"FrequencyTrancheCode":'null',
"GoalCode":"C006",
"GoalDescription":"PROJETOS PESSOAIS",
"FrequencyTypeCode":"M",
"FamilyCode":"CP",
"Proponents":[{
"Position":'1',
"Birthday":"1992-03-10T13:03:24.000Z",
"State":'true',
"EntityType":{
"ID":'1',
"CompanyID":'1',
"Code":"P",
"Description":"Proponente",
"Value":'null',
"ValueString":'null',
"State":'true',
"Imported":'null'},
"ExpenseCodes":[
"009"]}],
"Counterparts":'0',
"OptionalExpenses":[{
"Code":"009",
"Factor":'1'}],
"ResidualValue":'0',
"InterestOnly":'0',
"Deferment":'0'}
jsonData = requests.post(url, headers=headers, json=payload, verify=False).json()
results = jsonData['Result']
mtic = results['MTIC']
installment = results['PeriodInstallment'][0]['Installment']
taeg = results['TAEG']
tan = results['PeriodInstallment'][0]['TAN']
print(f'Installment: {installment}\nTAEG: {taeg}\nTAN: {tan}\nMTIC: {mtic}')
Output:
Installment: 224.5
TAEG: 14.8
TAN: 7.0
MTIC: 5708.2
I have the first code that is working and printing to csv, however it included a lot of data I didn't need. A paragraph of code was then added to only include the data I wanted. The problem is it prints to the screen correctly but still includes all the data in the CSV file. I've tried everything I could think of in this line but it won't either won't print or still prints everything.
data = pd.DataFrame(stock_info)
Could someone show me where I'm going wrong so it will print only the portion I want it to?
Old Working Code
import requests
import pandas as pd
url = "https://www.stockrover.com/stock_infos/grid?_dc=1644769629231"
def stock_data(stock_info):
data = pd.DataFrame(stock_info)
data.to_csv("data.csv", index=False)
payload = "state=%7B%22sortInfo%22%3A%7B%7D%2C%22columns%22%3A%5B77%2C32%2C498%2C500%2C31%2C27%2C499%2C30%2C578%2C28%2C29%2C544%2C181%2C185%2C186%5D%2C%22view%22%3A281%2C%22priorPrimaryColumn%22%3A170%2C%22filterData%22%3A%5B%5D%2C%22name%22%3A%22New%201%22%2C%22cType%22%3A%22Screener%22%2C%22cNode%22%3A%22s_39%22%2C%22cIsFolder%22%3Afalse%2C%22gridSelection%22%3A%22BTU%22%2C%22lastActive%22%3A1396898415%2C%22primaryColumn%22%3A76%2C%22folderDisabledParams%22%3A%7B%22filterData%22%3A%5B%5D%7D%2C%22mainGridDateRange%22%3A%22ytd%22%2C%22groupState%22%3Anull%2C%22moversGridDateRange%22%3A%221_day%22%2C%22peersGridDateRange%22%3A%221_day%22%2C%22lastGridSelections%22%3A%5B%22BTU%22%5D%2C%22lastQuantNode%22%3A%5B%5D%2C%22includeQuotesInTable%22%3Afalse%2C%22includeAllQuotesLastValue%22%3Afalse%2C%22markets%22%3A%7B%22panel%22%3A%22summary%22%7D%2C%22researchPanel%22%3A%22tablePanel%22%2C%22recentSearchTickers%22%3A%5B%22SPY%22%2C%22AMZN%22%2C%22AAPL%22%2C%22s_32%22%2C%22%5ENDX%22%2C%22AXP%22%2C%22XOM%22%2C%22AFL%22%2C%22%5EDJX%22%2C%22AIT%22%2C%22ADVC%22%5D%2C%22quotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22checkedQuotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22dashboard%22%3A%7B%22buttonRef%22%3A%22272%22%7D%2C%22tickerSelectedFeeds%22%3A%5B%22Benzinga%20News%22%2C%22Yahoo%20News%22%5D%2C%22marketSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Stock%20Market%20News%20-%20Investing.com%22%5D%2C%22bondsSelectedFeeds%22%3A%5B%22Bonds%20Strategy%20-%20Investing.com%22%5D%2C%22commoditiesSelectedFeeds%22%3A%5B%22Commodities%20%26%20Futures%20News%20-%20Investing.com%22%2C%22Commodities%20Fundamental%20Analysis%20-%20Investing.com%22%2C%22Commodities%20Strategy%20Analysis%20-%20Investing.com%22%5D%2C%22stocksSelectedFeeds%22%3A%5B%22CNNMoney%20News%22%2C%22Google%20News%22%2C%22Seeking%20Alpha%20Top%20Stories%22%5D%2C%22etfsSelectedFeeds%22%3A%5B%22Economy%20News%20-%20Investing.com%22%2C%22ETF%20Analysis%20-%20Investing.com%22%2C%22Investing%20Ideas%20-%20Investing.com%22%5D%2C%22topPanel%22%3A%22researchPanel%22%2C%22maxRecordsNode%22%3Afalse%2C%22version%22%3A7%2C%22lastGridSelectionsRaw%22%3A%5B%22BTU%22%5D%2C%22lastSelectionScreeners%22%3A%22s_39%22%2C%22quotesDisabled%22%3Atrue%2C%22lastSelectionPortfolios%22%3A%22p_2%22%2C%22comparisonPanels%22%3A%7B%22Portfolio%22%3A%22p_2%22%2C%22Index%22%3A%22%5EDJX%22%2C%22Watchlist%22%3A%22Watchlists%22%2C%22Screener%22%3A%22s_39%22%7D%2C%22lastSelectionWatchlists%22%3A%22w_26%22%2C%22indicesSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Yahoo%20News%22%5D%2C%22newsActive%22%3A%22tickerNews%22%2C%22recentSearchMetrics%22%3A%5B%22Price%22%2C%22EPS%22%2C%22Sales%22%5D%2C%22editPanel%22%3A%22positionsPanel%22%2C%22newsType%22%3A%22marketNews%22%2C%22tableColumns%22%3A%5B%22ticker%22%2C%22rank%22%2C%22score_rank%22%2C%22filter_score%22%2C%22company%22%2C%22cash%22%2C%22currentassets%22%2C%22netppe%22%2C%22intangibles%22%2C%22totalassets%22%2C%22currentliabilities%22%2C%22longtermdebt%22%2C%22totaldebt%22%2C%22totalliabilities%22%2C%22equity%22%2C%22tangiblebookvalue%22%2C%22cash_short_term_p%22%2C%22net_ppe_p%22%2C%22intangibles_p%22%5D%2C%22last_save%22%3A1644769379%2C%22panels%22%3A%7B%22collapsed%22%3A%7B%22chp%22%3Atrue%2C%22ip%22%3Atrue%2C%22mp%22%3Afalse%2C%22qp%22%3Afalse%2C%22conp%22%3Atrue%2C%22fsp%22%3Afalse%7D%2C%22viewportWidth%22%3A%221920%22%2C%22viewportHeight%22%3A%221069%22%2C%22chartPanelHeight%22%3A483%2C%22controlPanelWidth%22%3A296%2C%22insightPanelWidth%22%3A%22485%22%2C%22quoteBoxHeight%22%3A200%2C%22navigationPanelWidth%22%3A277%7D%7D&updateMarket=true&page=1&start=0&limit=250"
headers = {
'authority': 'www.stockrover.com',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"',
'x-csrf-token': '7yR4pfI0kAArtjJak535+NJrpB0L212PAbXCg0kbyE4SyjFaQ73sMHJLiqAkPb5nGzfC8KvAa3kTADLAEQXyOQ==',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'accept': 'application/json',
'x-requested-with': 'XMLHttpRequest',
'sec-ch-ua-platform': '"Windows"',
'origin': 'https://www.stockrover.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.stockrover.com/research/table/281/s_39/BTU',
'accept-language': 'en-US,en;q=0.9',
'cookie': 'remember_me_pref=0; user_name=test11964; plan=3; premiumBraintreeKey=MIIBCgKCAQEAzM4LJfrNnBOgRFB1dDJkmqTFCWT2Y%2BksOydD8xDH4R033WUzxbffMZb%2B3dqEyQvOVjLcwFIHByDc4Xwej7enas2E%2FVRyh7Cvyadn7M5zQeRyLcI9Ys5KCozMwxJPc0x76FlXPwiAo1Qlz3RcLb9wGHBag2R51FuTie%2BhVDCgzWajqDCREzRhi%2Fqlt3D%2FxXNo%2FiwJlpOUr%2Fx1QnkkILxgKlq1dD7KJ767O5ojYKXsO%2BV2Bfu7sSD3djsOxQJ1%2FRbaDm2E96EDkWhhOeOpPndQ6IuSl4NmnJg%2Fcq6f8csW8M3Ys%2BMZPFkdxPC4%2FfRM1XC9o76PjpVNBIO%2ByJEELKZedwIDAQAB; lr=1644769628; _Ruby2_session=Q1drcmlhazYvUFZLd0NydnRXUGpoUzArZDlxYWRCcW9sRUx5VDBydWVWRHdGWDZlMnlESURzbldwbFV1L0drbUlKaWt5MXRtaS9iR0ZYZEpPVHQ1N25qRnR1d3FrY0tzQW1qQm9CdTZ3MSs0d2c3MlpuMjRiQWhCOHI1cGNWekZ4cUdJd0ZFcGtpeng3MFlqZjFDUW9RYmpFMU9DeGdGMVZKR1EwMjVhSE9yVHl4VXFtQm9aYVBtNHF5d0pwMjJ1aVlNMUVRUzdnVFZWZ1AxQkY5Q0p6a2RKay9QL05tOWk4cHZiSERtaGRxeTlxTWZnV3Q0cjdwR3RndUtmeUp3QThhMnJaV2dGZjlPUUtjcGRidDhiajRxK2g0RUZTMWNZUDBaeGNCcUVxSDJ1QnZVRlRkWk9tUExJNWN3TDN5T1BQcmhVVGsycStVTzJRaUwvSkk2TnNVZldTOGU3Tm5wQ3RUMy9nazFqbzdrUWtvYzRwQWRpV3dnTVB3YzhodFV2U0FRR3VKdllMY01NZmdOdGtmOEJ4UT09LS1nTXBrYldhQ0pEeWJ3ak9qQjcrTGV3PT0%3D--1439f36a7f9362aee4b5b666747a2d63d72e81bd'
}
response = requests.request("POST", url, headers=headers, data=payload)
stock_info = response.json()['stock_infos']
stock_data(stock_info)
New Non-Working Code
import requests
import pandas as pd
url = "https://www.stockrover.com/stock_infos/grid?_dc=1644769629231"
def stock_data(stock_info):
data = pd.DataFrame(stock_info)
data.to_csv("data.csv", index=False)
payload = "state=%7B%22sortInfo%22%3A%7B%7D%2C%22columns%22%3A%5B77%2C32%2C498%2C500%2C31%2C27%2C499%2C30%2C578%2C28%2C29%2C544%2C181%2C185%2C186%5D%2C%22view%22%3A281%2C%22priorPrimaryColumn%22%3A170%2C%22filterData%22%3A%5B%5D%2C%22name%22%3A%22New%201%22%2C%22cType%22%3A%22Screener%22%2C%22cNode%22%3A%22s_39%22%2C%22cIsFolder%22%3Afalse%2C%22gridSelection%22%3A%22BTU%22%2C%22lastActive%22%3A1396898415%2C%22primaryColumn%22%3A76%2C%22folderDisabledParams%22%3A%7B%22filterData%22%3A%5B%5D%7D%2C%22mainGridDateRange%22%3A%22ytd%22%2C%22groupState%22%3Anull%2C%22moversGridDateRange%22%3A%221_day%22%2C%22peersGridDateRange%22%3A%221_day%22%2C%22lastGridSelections%22%3A%5B%22BTU%22%5D%2C%22lastQuantNode%22%3A%5B%5D%2C%22includeQuotesInTable%22%3Afalse%2C%22includeAllQuotesLastValue%22%3Afalse%2C%22markets%22%3A%7B%22panel%22%3A%22summary%22%7D%2C%22researchPanel%22%3A%22tablePanel%22%2C%22recentSearchTickers%22%3A%5B%22SPY%22%2C%22AMZN%22%2C%22AAPL%22%2C%22s_32%22%2C%22%5ENDX%22%2C%22AXP%22%2C%22XOM%22%2C%22AFL%22%2C%22%5EDJX%22%2C%22AIT%22%2C%22ADVC%22%5D%2C%22quotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22checkedQuotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22dashboard%22%3A%7B%22buttonRef%22%3A%22272%22%7D%2C%22tickerSelectedFeeds%22%3A%5B%22Benzinga%20News%22%2C%22Yahoo%20News%22%5D%2C%22marketSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Stock%20Market%20News%20-%20Investing.com%22%5D%2C%22bondsSelectedFeeds%22%3A%5B%22Bonds%20Strategy%20-%20Investing.com%22%5D%2C%22commoditiesSelectedFeeds%22%3A%5B%22Commodities%20%26%20Futures%20News%20-%20Investing.com%22%2C%22Commodities%20Fundamental%20Analysis%20-%20Investing.com%22%2C%22Commodities%20Strategy%20Analysis%20-%20Investing.com%22%5D%2C%22stocksSelectedFeeds%22%3A%5B%22CNNMoney%20News%22%2C%22Google%20News%22%2C%22Seeking%20Alpha%20Top%20Stories%22%5D%2C%22etfsSelectedFeeds%22%3A%5B%22Economy%20News%20-%20Investing.com%22%2C%22ETF%20Analysis%20-%20Investing.com%22%2C%22Investing%20Ideas%20-%20Investing.com%22%5D%2C%22topPanel%22%3A%22researchPanel%22%2C%22maxRecordsNode%22%3Afalse%2C%22version%22%3A7%2C%22lastGridSelectionsRaw%22%3A%5B%22BTU%22%5D%2C%22lastSelectionScreeners%22%3A%22s_39%22%2C%22quotesDisabled%22%3Atrue%2C%22lastSelectionPortfolios%22%3A%22p_2%22%2C%22comparisonPanels%22%3A%7B%22Portfolio%22%3A%22p_2%22%2C%22Index%22%3A%22%5EDJX%22%2C%22Watchlist%22%3A%22Watchlists%22%2C%22Screener%22%3A%22s_39%22%7D%2C%22lastSelectionWatchlists%22%3A%22w_26%22%2C%22indicesSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Yahoo%20News%22%5D%2C%22newsActive%22%3A%22tickerNews%22%2C%22recentSearchMetrics%22%3A%5B%22Price%22%2C%22EPS%22%2C%22Sales%22%5D%2C%22editPanel%22%3A%22positionsPanel%22%2C%22newsType%22%3A%22marketNews%22%2C%22tableColumns%22%3A%5B%22ticker%22%2C%22rank%22%2C%22score_rank%22%2C%22filter_score%22%2C%22company%22%2C%22cash%22%2C%22currentassets%22%2C%22netppe%22%2C%22intangibles%22%2C%22totalassets%22%2C%22currentliabilities%22%2C%22longtermdebt%22%2C%22totaldebt%22%2C%22totalliabilities%22%2C%22equity%22%2C%22tangiblebookvalue%22%2C%22cash_short_term_p%22%2C%22net_ppe_p%22%2C%22intangibles_p%22%5D%2C%22last_save%22%3A1644769379%2C%22panels%22%3A%7B%22collapsed%22%3A%7B%22chp%22%3Atrue%2C%22ip%22%3Atrue%2C%22mp%22%3Afalse%2C%22qp%22%3Afalse%2C%22conp%22%3Atrue%2C%22fsp%22%3Afalse%7D%2C%22viewportWidth%22%3A%221920%22%2C%22viewportHeight%22%3A%221069%22%2C%22chartPanelHeight%22%3A483%2C%22controlPanelWidth%22%3A296%2C%22insightPanelWidth%22%3A%22485%22%2C%22quoteBoxHeight%22%3A200%2C%22navigationPanelWidth%22%3A277%7D%7D&updateMarket=true&page=1&start=0&limit=250"
headers = {
'authority': 'www.stockrover.com',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"',
'x-csrf-token': '7yR4pfI0kAArtjJak535+NJrpB0L212PAbXCg0kbyE4SyjFaQ73sMHJLiqAkPb5nGzfC8KvAa3kTADLAEQXyOQ==',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'accept': 'application/json',
'x-requested-with': 'XMLHttpRequest',
'sec-ch-ua-platform': '"Windows"',
'origin': 'https://www.stockrover.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.stockrover.com/research/table/281/s_39/BTU',
'accept-language': 'en-US,en;q=0.9',
'cookie': 'remember_me_pref=0; user_name=test11964; plan=3; premiumBraintreeKey=MIIBCgKCAQEAzM4LJfrNnBOgRFB1dDJkmqTFCWT2Y%2BksOydD8xDH4R033WUzxbffMZb%2B3dqEyQvOVjLcwFIHByDc4Xwej7enas2E%2FVRyh7Cvyadn7M5zQeRyLcI9Ys5KCozMwxJPc0x76FlXPwiAo1Qlz3RcLb9wGHBag2R51FuTie%2BhVDCgzWajqDCREzRhi%2Fqlt3D%2FxXNo%2FiwJlpOUr%2Fx1QnkkILxgKlq1dD7KJ767O5ojYKXsO%2BV2Bfu7sSD3djsOxQJ1%2FRbaDm2E96EDkWhhOeOpPndQ6IuSl4NmnJg%2Fcq6f8csW8M3Ys%2BMZPFkdxPC4%2FfRM1XC9o76PjpVNBIO%2ByJEELKZedwIDAQAB; lr=1644769628; _Ruby2_session=Q1drcmlhazYvUFZLd0NydnRXUGpoUzArZDlxYWRCcW9sRUx5VDBydWVWRHdGWDZlMnlESURzbldwbFV1L0drbUlKaWt5MXRtaS9iR0ZYZEpPVHQ1N25qRnR1d3FrY0tzQW1qQm9CdTZ3MSs0d2c3MlpuMjRiQWhCOHI1cGNWekZ4cUdJd0ZFcGtpeng3MFlqZjFDUW9RYmpFMU9DeGdGMVZKR1EwMjVhSE9yVHl4VXFtQm9aYVBtNHF5d0pwMjJ1aVlNMUVRUzdnVFZWZ1AxQkY5Q0p6a2RKay9QL05tOWk4cHZiSERtaGRxeTlxTWZnV3Q0cjdwR3RndUtmeUp3QThhMnJaV2dGZjlPUUtjcGRidDhiajRxK2g0RUZTMWNZUDBaeGNCcUVxSDJ1QnZVRlRkWk9tUExJNWN3TDN5T1BQcmhVVGsycStVTzJRaUwvSkk2TnNVZldTOGU3Tm5wQ3RUMy9nazFqbzdrUWtvYzRwQWRpV3dnTVB3YzhodFV2U0FRR3VKdllMY01NZmdOdGtmOEJ4UT09LS1nTXBrYldhQ0pEeWJ3ak9qQjcrTGV3PT0%3D--1439f36a7f9362aee4b5b666747a2d63d72e81bd'
}
response = requests.request("POST", url, headers=headers, data=payload)
stock_info = response.json()['stock_infos']
stock_data(stock_info)
for info in stock_info:
key = info[0]
for i, sub in enumerate(info[1:], 1):
if sub == key:
print(info[i:])
break
If you don't need to keep those columns just slice them out by adding a line to your original code:
data = pd.DataFrame(stock_info)
data = data.iloc[:, 4:]
In original code:
import requests
import pandas as pd
url = "https://www.stockrover.com/stock_infos/grid?_dc=1644769629231"
def stock_data(stock_info):
data = pd.DataFrame(stock_info)
data = data.iloc[:, 4:]
data.to_csv("data.csv", index=False)
payload = "state=%7B%22sortInfo%22%3A%7B%7D%2C%22columns%22%3A%5B77%2C32%2C498%2C500%2C31%2C27%2C499%2C30%2C578%2C28%2C29%2C544%2C181%2C185%2C186%5D%2C%22view%22%3A281%2C%22priorPrimaryColumn%22%3A170%2C%22filterData%22%3A%5B%5D%2C%22name%22%3A%22New%201%22%2C%22cType%22%3A%22Screener%22%2C%22cNode%22%3A%22s_39%22%2C%22cIsFolder%22%3Afalse%2C%22gridSelection%22%3A%22BTU%22%2C%22lastActive%22%3A1396898415%2C%22primaryColumn%22%3A76%2C%22folderDisabledParams%22%3A%7B%22filterData%22%3A%5B%5D%7D%2C%22mainGridDateRange%22%3A%22ytd%22%2C%22groupState%22%3Anull%2C%22moversGridDateRange%22%3A%221_day%22%2C%22peersGridDateRange%22%3A%221_day%22%2C%22lastGridSelections%22%3A%5B%22BTU%22%5D%2C%22lastQuantNode%22%3A%5B%5D%2C%22includeQuotesInTable%22%3Afalse%2C%22includeAllQuotesLastValue%22%3Afalse%2C%22markets%22%3A%7B%22panel%22%3A%22summary%22%7D%2C%22researchPanel%22%3A%22tablePanel%22%2C%22recentSearchTickers%22%3A%5B%22SPY%22%2C%22AMZN%22%2C%22AAPL%22%2C%22s_32%22%2C%22%5ENDX%22%2C%22AXP%22%2C%22XOM%22%2C%22AFL%22%2C%22%5EDJX%22%2C%22AIT%22%2C%22ADVC%22%5D%2C%22quotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22checkedQuotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22dashboard%22%3A%7B%22buttonRef%22%3A%22272%22%7D%2C%22tickerSelectedFeeds%22%3A%5B%22Benzinga%20News%22%2C%22Yahoo%20News%22%5D%2C%22marketSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Stock%20Market%20News%20-%20Investing.com%22%5D%2C%22bondsSelectedFeeds%22%3A%5B%22Bonds%20Strategy%20-%20Investing.com%22%5D%2C%22commoditiesSelectedFeeds%22%3A%5B%22Commodities%20%26%20Futures%20News%20-%20Investing.com%22%2C%22Commodities%20Fundamental%20Analysis%20-%20Investing.com%22%2C%22Commodities%20Strategy%20Analysis%20-%20Investing.com%22%5D%2C%22stocksSelectedFeeds%22%3A%5B%22CNNMoney%20News%22%2C%22Google%20News%22%2C%22Seeking%20Alpha%20Top%20Stories%22%5D%2C%22etfsSelectedFeeds%22%3A%5B%22Economy%20News%20-%20Investing.com%22%2C%22ETF%20Analysis%20-%20Investing.com%22%2C%22Investing%20Ideas%20-%20Investing.com%22%5D%2C%22topPanel%22%3A%22researchPanel%22%2C%22maxRecordsNode%22%3Afalse%2C%22version%22%3A7%2C%22lastGridSelectionsRaw%22%3A%5B%22BTU%22%5D%2C%22lastSelectionScreeners%22%3A%22s_39%22%2C%22quotesDisabled%22%3Atrue%2C%22lastSelectionPortfolios%22%3A%22p_2%22%2C%22comparisonPanels%22%3A%7B%22Portfolio%22%3A%22p_2%22%2C%22Index%22%3A%22%5EDJX%22%2C%22Watchlist%22%3A%22Watchlists%22%2C%22Screener%22%3A%22s_39%22%7D%2C%22lastSelectionWatchlists%22%3A%22w_26%22%2C%22indicesSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Yahoo%20News%22%5D%2C%22newsActive%22%3A%22tickerNews%22%2C%22recentSearchMetrics%22%3A%5B%22Price%22%2C%22EPS%22%2C%22Sales%22%5D%2C%22editPanel%22%3A%22positionsPanel%22%2C%22newsType%22%3A%22marketNews%22%2C%22tableColumns%22%3A%5B%22ticker%22%2C%22rank%22%2C%22score_rank%22%2C%22filter_score%22%2C%22company%22%2C%22cash%22%2C%22currentassets%22%2C%22netppe%22%2C%22intangibles%22%2C%22totalassets%22%2C%22currentliabilities%22%2C%22longtermdebt%22%2C%22totaldebt%22%2C%22totalliabilities%22%2C%22equity%22%2C%22tangiblebookvalue%22%2C%22cash_short_term_p%22%2C%22net_ppe_p%22%2C%22intangibles_p%22%5D%2C%22last_save%22%3A1644769379%2C%22panels%22%3A%7B%22collapsed%22%3A%7B%22chp%22%3Atrue%2C%22ip%22%3Atrue%2C%22mp%22%3Afalse%2C%22qp%22%3Afalse%2C%22conp%22%3Atrue%2C%22fsp%22%3Afalse%7D%2C%22viewportWidth%22%3A%221920%22%2C%22viewportHeight%22%3A%221069%22%2C%22chartPanelHeight%22%3A483%2C%22controlPanelWidth%22%3A296%2C%22insightPanelWidth%22%3A%22485%22%2C%22quoteBoxHeight%22%3A200%2C%22navigationPanelWidth%22%3A277%7D%7D&updateMarket=true&page=1&start=0&limit=250"
headers = {
'authority': 'www.stockrover.com',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"',
'x-csrf-token': '7yR4pfI0kAArtjJak535+NJrpB0L212PAbXCg0kbyE4SyjFaQ73sMHJLiqAkPb5nGzfC8KvAa3kTADLAEQXyOQ==',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'accept': 'application/json',
'x-requested-with': 'XMLHttpRequest',
'sec-ch-ua-platform': '"Windows"',
'origin': 'https://www.stockrover.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.stockrover.com/research/table/281/s_39/BTU',
'accept-language': 'en-US,en;q=0.9',
'cookie': 'remember_me_pref=0; user_name=test11964; plan=3; premiumBraintreeKey=MIIBCgKCAQEAzM4LJfrNnBOgRFB1dDJkmqTFCWT2Y%2BksOydD8xDH4R033WUzxbffMZb%2B3dqEyQvOVjLcwFIHByDc4Xwej7enas2E%2FVRyh7Cvyadn7M5zQeRyLcI9Ys5KCozMwxJPc0x76FlXPwiAo1Qlz3RcLb9wGHBag2R51FuTie%2BhVDCgzWajqDCREzRhi%2Fqlt3D%2FxXNo%2FiwJlpOUr%2Fx1QnkkILxgKlq1dD7KJ767O5ojYKXsO%2BV2Bfu7sSD3djsOxQJ1%2FRbaDm2E96EDkWhhOeOpPndQ6IuSl4NmnJg%2Fcq6f8csW8M3Ys%2BMZPFkdxPC4%2FfRM1XC9o76PjpVNBIO%2ByJEELKZedwIDAQAB; lr=1644769628; _Ruby2_session=Q1drcmlhazYvUFZLd0NydnRXUGpoUzArZDlxYWRCcW9sRUx5VDBydWVWRHdGWDZlMnlESURzbldwbFV1L0drbUlKaWt5MXRtaS9iR0ZYZEpPVHQ1N25qRnR1d3FrY0tzQW1qQm9CdTZ3MSs0d2c3MlpuMjRiQWhCOHI1cGNWekZ4cUdJd0ZFcGtpeng3MFlqZjFDUW9RYmpFMU9DeGdGMVZKR1EwMjVhSE9yVHl4VXFtQm9aYVBtNHF5d0pwMjJ1aVlNMUVRUzdnVFZWZ1AxQkY5Q0p6a2RKay9QL05tOWk4cHZiSERtaGRxeTlxTWZnV3Q0cjdwR3RndUtmeUp3QThhMnJaV2dGZjlPUUtjcGRidDhiajRxK2g0RUZTMWNZUDBaeGNCcUVxSDJ1QnZVRlRkWk9tUExJNWN3TDN5T1BQcmhVVGsycStVTzJRaUwvSkk2TnNVZldTOGU3Tm5wQ3RUMy9nazFqbzdrUWtvYzRwQWRpV3dnTVB3YzhodFV2U0FRR3VKdllMY01NZmdOdGtmOEJ4UT09LS1nTXBrYldhQ0pEeWJ3ak9qQjcrTGV3PT0%3D--1439f36a7f9362aee4b5b666747a2d63d72e81bd'
}
response = requests.request("POST", url, headers=headers, data=payload)
stock_info = response.json()['stock_infos']
stock_data(stock_info)
The added block of code is added after the csv file has already been written.
You have other problems as well but that is the most obvious at the moment.
it should be closer to...
...
for info in stock_info:
key = info[0]
for i, sub in enumerate(info[1:], 1):
if sub == key:
print(info[i:])
break
stock_data(stock_info)
Also the added block of code doesn't actually make any changes to the data, it only changes what ends up getting printed. Whatever changes you want made to the file need to be made to stock_info.
I am trying to scrape "shopee.com.my" top selling products with scrape and also tried with requests but failed in getting valid JSON object. my requests code is given below:
import requests as r
import json
data = {
'authority': 'shopee.com.my',
'method': 'GET',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'scheme': 'https',
'accept': '*/*, application/json',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36',
'x-api-source': 'pc',
'x-requested-with': 'XMLHttpRequest',
'x-shopee-language': 'en',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
}
subcat_url = '/Boys-Fashion-cat.27.2427'
id = subcat_url.split('.')[-1]
data['path'] = f'/api/v2/search_items/?by=sales&limit=50&match_id={id}&newest=0&order=desc&page_type=search&version=2'
data['referer'] = f'https://shopee.com.my{subcat_url}?page=0&sortBy=sales'
url = f'https://shopee.com.my/api/v2/search_items/?by=sales&match_id={id}&newest=0&order=desc&page_type=search&version=2'
req = r.get(url, headers=data)
items = req.json()['items']
print(items)
print(f'Items length: {len(items)}')
here is my scrapy code:
import scrapy
import json
from scrapy import Request
from scrapy.http.cookies import CookieJar
header_data = {'authority': 'shopee.com.my',
'method': 'GET',
'scheme': 'https',
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36',
# 'cookie': 'SPC_U=-; SPC_IA=-1; SPC_EC=-; SPC_F=7jrWAm4XYNNtyVAk83GPknN8NbCMQEIk; REC_T_ID=476673f8-eeb0-11ea-8919-48df374df85c; _gcl_au=1.1.1197882328.1599225148; _med=refer; _fbp=fb.2.1599225150134.114138691; language=en; _ga=GA1.3.1167355736.1599225151; csrftoken=mu9M72KLd73P9QJusB9zFBP6wV3NGg85; _gid=GA1.3.273342972.1603211749; SPC_SI=yxvc89nmqe97ldvpo6wgeybtc8berzyd; welcomePkgShown=true; AMP_TOKEN=%24NOT_FOUND; REC_MD_41_1000027=1603289427_0_50_0_48; SPC_CT_48918e31="1603289273.lUS7x9IuKN5vNbhzibZCOHrIf6vVQmykU/TXxiOii7w="; SPC_CT_57540430="1603289278.FLT3IdzHC32RmEzFxkOi9pI7qhKIs/yq328elYMuwps="; SPC_CT_50ee4e78="1603289299.gvjW32HwgiQGN/4kj2Ac3YFrpqyHVTO8+UjM+uzxy4E="; _dc_gtm_UA-61915055-6=1; SPC_CT_75d7a2b7="1603289557.t5FvxXhnJacZrKkjnIWCUbAgAxAQ3hG5c1tZBzafwc4="; SPC_R_T_ID="n6Ek85JJY1JZATlhgutfB4KB3qrbmFDYX1+udv1EBAPegPE9xuzM8HFeCy1duskY9+DVLJxe4RqaabhyUuojHQG0NI2TqegihbAge+s3k7w="; SPC_T_IV="SGNXqyZ1jtRYpo5kFeKtYg=="; SPC_R_T_IV="SGNXqyZ1jtRYpo5kFeKtYg=="; SPC_T_ID="n6Ek85JJY1JZATlhgutfB4KB3qrbmFDYX1+udv1EBAPegPE9xuzM8HFeCy1duskY9+DVLJxe4RqaabhyUuojHQG0NI2TqegihbAge+s3k7w="',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'x-api-source': 'pc',
'x-requested-with': 'XMLHttpRequest',
'x-shopee-language': 'en',
}
class TestSpider(scrapy.Spider):
name = 'test'
allowed_domains = ['shopee.com', 'shopee.com.my', 'shopee.com.my/api/']
def start_requests(self):
subcat_url = '/Baby-Toddler-Play-cat.27.23785'
id = subcat_url.split('.')[-1]
header_data['path'] = f'/api/v2/search_items/?by=sales&limit=50&match_id={id}&newest=0&order=desc&page_type=search&version=2'
header_data['referer'] = f'https://shopee.com.my{subcat_url}?page=0&sortBy=sales'
url = f'https://shopee.com.my/api/v2/search_items/?by=sales&limit=50&match_id={id}&newest=0&order=desc&page_type=search&version=2'
yield Request(url=url, headers=header_data)
def parse_data(self, response):
try:
jdata = json.loads(response.body)
return None
except Exception as e:
print(f'exception: {e}')
print(response.body)
return None
items = jdata['items']
for item in items:
name = item['name']
image_path = item['image']
absolute_image = f'https://cf.shopee.com.my/file/{image_path}_tn'
print(f'this is absolute image {absolute_image}')
monthly_sold = 'pending'
price = float(item['price'])/100000
total_sold = item['sold']
location = item['shop_location']
stock = item['stock']
print(name)
print(price)
print(total_sold)
print(location)
print(stock)
not using cookies now but also tried with fresh cookies but no response.
Here are some example links where some so them responses always valid JSON object but some links not return any response. see below api and direct browser links:
https://shopee.com.my/Kids-Sports-Outdoor-Play-cat.27.21700?page=0&sortBy=sales
https://shopee.com.my/api/v2/search_items/?by=sales&limit=50&match_id=21700&newest=0&order=desc&page_type=search&version=2
https://shopee.com.my/Bath-Toiletries-cat.27.2422
https://shopee.com.my/api/v2/search_items/?by=sales&limit=50&match_id=2422&newest=0&order=desc&page_type=search&version=2
you can also see API links in network tab:
network tab link image
I think you are missing a required header I send them like this and it worked
from pprint import pprint
import requests
headers = {
'authority': 'shopee.com.my',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'x-shopee-language': 'en',
'x-requested-with': 'XMLHttpRequest',
'if-none-match-': '55b03-c3d70d78b473147beeb6551fa9df8ca0',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36',
'x-api-source': 'pc',
'accept': '*/*',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://shopee.com.my/Kids-Sports-Outdoor-Play-cat.27.21700?page=0&sortBy=sales',
'accept-language': 'es-US,es;q=0.9,en-US;q=0.8,en;q=0.7,es-419;q=0.6',
# 'cookie': '_gcl_au=1.1.1866522785.1603486253; _fbp=fb.2.1603486253254.1114160447; SPC_IA=-1; SPC_EC=-; SPC_U=-; SPC_F=9RO26eJM7IQiFlxki0dAdQCcCsgPwz67; REC_T_ID=71a698d6-1571-11eb-9baf-48df3757c438; SPC_SI=mall.n58BgakbNjCD5RDYlsQJ8EurmBkH5HIY; SPC_CT_c49f0fdc="1603486254.GqWz1BPlfz3MKmUufL3eTwFqgUfdKWcWVf2xiJI7nSk="; SPC_R_T_ID="89vber/2TKnfACAmGbXpxC3BzHc0ajEQMPxgMbAlZnQlgEo7YWmya0sf/KRt1FsoZvaFYKoNDk+Rh9YWLWsNMH324iqgZePbam1q9QpYQlE="; SPC_T_IV="vko6vAtWsyHuqteFHAoPIA=="; SPC_R_T_IV="vko6vAtWsyHuqteFHAoPIA=="; SPC_T_ID="89vber/2TKnfACAmGbXpxC3BzHc0ajEQMPxgMbAlZnQlgEo7YWmya0sf/KRt1FsoZvaFYKoNDk+Rh9YWLWsNMH324iqgZePbam1q9QpYQlE="; AMP_TOKEN=%24NOT_FOUND; _ga=GA1.3.602723004.1603486255; _gid=GA1.3.657631736.1603486255; _dc_gtm_UA-61915055-6=1; language=en',
}
params = (
('by', 'sales'),
('limit', '50'),
('match_id', '21700'),
('newest', '0'),
('order', 'desc'),
('page_type', 'search'),
('version', '2'),
)
response = requests.get('https://shopee.com.my/api/v2/search_items/', headers=headers, params=params)
pprint(response.json())