Scraping data through Api from json - python

I would like to limit the data they receive to the first 8 links on the website. As shown in the picture, there is no data available beyond the 8th link, as seen in the CSV file. How can I apply this limit so that they only receive data from the first 8 links? The website link is https://www.linkedin.com/learning/search?keywords=data%20science,
JSON API
CSV File
Code part
import requests
import pandas as pd
url = "https://www.linkedin.com/learning-api/searchV2?keywords=data%20science&q=keywords&searchRequestId=RW4AuZRJT22%2BUeXnsZJGQA%3D%3D"
payload={}
headers = {
'authority': 'www.linkedin.com',
'accept': 'application/vnd.linkedin.normalized+json+2.1',
'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8,pt;q=0.7',
'cookie': 'bscookie="v=1&202108281231498ed9b977-a15a-4647-83ff-d0ef12adfbfbAQFdf9p_GSaBPrFkmyztJ8zyOnqVND-D"; li_theme=light; li_theme_set=app; li_sugr=4752e3dd-9232-4bb9-9dbb-b29c1a127f77; bcookie="v=2&9fb3a4d0-1139-4e2b-89ba-e5374eeb9735"; aam_uuid=08800810176251362264578372297522883472; _gcl_au=1.1.240501668.1664707206; li_rm=AQELLfU3ZqmMhAAAAYQ_tPjGK8ONpN3EEUxH1P4M6Czq5fk6EXaEXSzKwoNSXoSZ7KgO5uSTE9iZ30fuhs6ju1rLH1VgXYyRM3nNuiTQEx1k2ca6SR0Hk1d5-NBafeE0zv65QetFY5Yrx2ufzRlfEXUkJJSoO9Z2o7MeuX-3Go7P4dI-m5HQM7VOKLiK_TD-ZWzj_OkdkR75K31QKGq8bxPLa0JpkGUzhDIVGWzl6vqkcl6BJEK2s-keIZjsiH5MZ9sbLXEVOxLg4vD21TTJBNshE6zaiWrSnxx_PEm44eDPqjvXRMVWFeX7VZfIe2KFshWXLRc4SY8hAQINymU; visit=v=1&M; G_ENABLED_IDPS=google; JSESSIONID="ajax:7673827752327651374"; timezone=Asia/Karachi; _guid=0f0d3402-80be-4bef-9baf-18d281f68921; mbox=session^#965dfb20b29e4f2688eedcf643d2e5ab^#1671620169|PC^#965dfb20b29e4f2688eedcf643d2e5ab.38_0^#1687170309; __ssid=db28305b-28da-4f8b-ad3a-54dea10b9eb9; dfpfpt=da2e5dde482a41b09cf7178ba1bcec7e; g_state={"i_l":0}; liap=true; li_at=AQEDATKxuC8DTVh9AAABhaytidQAAAGGZN5q6E0AdHv14xrDnsngkfFuMyIIbGYccHR15UrPQ8rb3qpS0_-mpCFm9pXQkoNYGdk87LiGVIqiw4oXuJ9tqflCEOev71_L83JoJ-fkbOfZwdG0RICtuIHn; AnalyticsSyncHistory=AQKUIualgILMBgAAAYZHP2t3mvejt25dMqUMRmrpyhaQMe1cucNiAMliFNRUf4cu4aKnZ1z1kQ_FGeqFr2m04Q; lms_ads=AQEr9ksNAL4kugAAAYZHP2z8QK26stPkoXe2TgJZW3Fnrl4dCzbC2DtithS1-zp5Ve85QwxzRhPvP9okaC0kbu40FYX7EqIk; lms_analytics=AQEr9ksNAL4kugAAAYZHP2z8QK26stPkoXe2TgJZW3Fnrl4dCzbC2DtithS1-zp5Ve85QwxzRhPvP9okaC0kbu40FYX7EqIk; fid=AQGWcXnO5AffyAAAAYZRr6tph6cekZ9ZD66e1xdHhumlVvJ3cKYzZLwfK-I3nJyeRyLQs3LRnowKjQ; lil-lang=en_US; lang=v=2&lang=en-us; _dd_l=1; _dd=ff90da3c-aa07-4491-9106-b226eba1c09c; AMCVS_14215E3D5995C57C0A495C55%40AdobeOrg=1; AMCV_14215E3D5995C57C0A495C55%40AdobeOrg=-637568504%7CMCIDTS%7C19403%7CMCMID%7C09349215808923073694559483836331055195%7CMCAAMLH-1677084815%7C3%7CMCAAMB-1677084815%7CRKhpRz8krg2tLO6pguXWp5olkAcUniQYPHaMWWgdJ3xzPWQmdj0y%7CMCOPTOUT-1676487215s%7CNONE%7CMCCIDH%7C1076847823%7CvVersion%7C5.1.1; s_cc=true; UserMatchHistory=AQJJ3j-efkcQeQAAAYZWAETxBE44VVBGzo_i-gr5nEGPOK85mS3kDScLdGC24_GeNx-GEeCNDrPOjkQde_MGT4iPc7vJV4sT_nPL8Tv4WMTLarIEliLYPkCvou8zFlb3dFNkbXZjVV_KTVeDvUSJ5WJTeStLNXmzV3_EV5mI9dbSRpoTFlJ94vi_zxcCmnLTaGAYGQAdymMv4SbaMgtnt3QcY8Zj9-hnwxdsIEmJloq47_QTP7sfl-SG-vw8xvhl9KYb0ZPKCnQ6ioJhu3G4cFpKJiSUbULkYMADSo0; lidc="b=VB23:s=V:r=V:a=V:p=V:g=4060:u=105:x=1:i=1676480108:t=1676566269:v=2:sig=AQEz2UktgVcQuJwMoVRgKgnUuKtCEm9C"; s_sq=%5B%5BB%5D%5D; gpv_pn=www.linkedin.com%2Flearning%2Fsearch; s_ips=615; s_plt=7.03; s_pltp=www.linkedin.com%2Flearning%2Fsearch; s_tp=6116; s_ppv=www.linkedin.com%2Flearning%2Fsearch%2C47%2C10%2C2859%2C7%2C18; s_tslv=1676480356388',
'csrf-token': 'ajax:7673827752327651374',
'referer': 'https://www.linkedin.com/learning/search?keywords=data%20science',
'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Google Chrome";v="110"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
'x-li-lang': 'en_US',
'x-li-page-instance': 'urn:li:page:d_learning_search;gNOg2MJoSqWv2XNAh4ukiQ==',
'x-li-pem-metadata': 'Learning Exp - Search=search',
'x-li-track': '{"clientVersion":"1.1.2236","mpVersion":"1.1.2236","osName":"web","timezoneOffset":5,"timezone":"Asia/Karachi","mpName":"learning-web","displayDensity":1,"displayWidth":1366,"displayHeight":768}',
'x-lil-intl-library': 'en_US',
'x-restli-protocol-version': '2.0.0'
}
res = requests.request("GET", url, headers=headers, data=payload).json()
product=[]
items=res['included']
for item in items:
try:
title=item['headline']['title']['text']
except:
title=''
try:
url='https://www.linkedin.com/learning/'+item['slug']
except:
url=''
try:
rating=item['rating']['ratingCount']
except:
rating=''
wev={
'title':title,
'instructor':name,
'review':rating,
'url':url
}
product.append(wev)
df=pd.DataFrame(product)
df.to_csv('learning.csv')

To filter the rows that contain empty columns, specifically those with an empty title column, you can simply add the following code:
df=pd.DataFrame(product)
filter = df["title"] != ""
dfNew = df[filter]
dfNew.to_csv('learning.csv')
The entire code will be:
import requests
import pandas as pd
url = "https://www.linkedin.com/learning-api/searchV2?keywords=data%20science&q=keywords&searchRequestId=RW4AuZRJT22%2BUeXnsZJGQA%3D%3D"
payload={}
headers = {
'authority': 'www.linkedin.com',
'accept': 'application/vnd.linkedin.normalized+json+2.1',
'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8,pt;q=0.7',
'cookie': 'bscookie="v=1&202108281231498ed9b977-a15a-4647-83ff-d0ef12adfbfbAQFdf9p_GSaBPrFkmyztJ8zyOnqVND-D"; li_theme=light; li_theme_set=app; li_sugr=4752e3dd-9232-4bb9-9dbb-b29c1a127f77; bcookie="v=2&9fb3a4d0-1139-4e2b-89ba-e5374eeb9735"; aam_uuid=08800810176251362264578372297522883472; _gcl_au=1.1.240501668.1664707206; li_rm=AQELLfU3ZqmMhAAAAYQ_tPjGK8ONpN3EEUxH1P4M6Czq5fk6EXaEXSzKwoNSXoSZ7KgO5uSTE9iZ30fuhs6ju1rLH1VgXYyRM3nNuiTQEx1k2ca6SR0Hk1d5-NBafeE0zv65QetFY5Yrx2ufzRlfEXUkJJSoO9Z2o7MeuX-3Go7P4dI-m5HQM7VOKLiK_TD-ZWzj_OkdkR75K31QKGq8bxPLa0JpkGUzhDIVGWzl6vqkcl6BJEK2s-keIZjsiH5MZ9sbLXEVOxLg4vD21TTJBNshE6zaiWrSnxx_PEm44eDPqjvXRMVWFeX7VZfIe2KFshWXLRc4SY8hAQINymU; visit=v=1&M; G_ENABLED_IDPS=google; JSESSIONID="ajax:7673827752327651374"; timezone=Asia/Karachi; _guid=0f0d3402-80be-4bef-9baf-18d281f68921; mbox=session^#965dfb20b29e4f2688eedcf643d2e5ab^#1671620169|PC^#965dfb20b29e4f2688eedcf643d2e5ab.38_0^#1687170309; __ssid=db28305b-28da-4f8b-ad3a-54dea10b9eb9; dfpfpt=da2e5dde482a41b09cf7178ba1bcec7e; g_state={"i_l":0}; liap=true; li_at=AQEDATKxuC8DTVh9AAABhaytidQAAAGGZN5q6E0AdHv14xrDnsngkfFuMyIIbGYccHR15UrPQ8rb3qpS0_-mpCFm9pXQkoNYGdk87LiGVIqiw4oXuJ9tqflCEOev71_L83JoJ-fkbOfZwdG0RICtuIHn; AnalyticsSyncHistory=AQKUIualgILMBgAAAYZHP2t3mvejt25dMqUMRmrpyhaQMe1cucNiAMliFNRUf4cu4aKnZ1z1kQ_FGeqFr2m04Q; lms_ads=AQEr9ksNAL4kugAAAYZHP2z8QK26stPkoXe2TgJZW3Fnrl4dCzbC2DtithS1-zp5Ve85QwxzRhPvP9okaC0kbu40FYX7EqIk; lms_analytics=AQEr9ksNAL4kugAAAYZHP2z8QK26stPkoXe2TgJZW3Fnrl4dCzbC2DtithS1-zp5Ve85QwxzRhPvP9okaC0kbu40FYX7EqIk; fid=AQGWcXnO5AffyAAAAYZRr6tph6cekZ9ZD66e1xdHhumlVvJ3cKYzZLwfK-I3nJyeRyLQs3LRnowKjQ; lil-lang=en_US; lang=v=2&lang=en-us; _dd_l=1; _dd=ff90da3c-aa07-4491-9106-b226eba1c09c; AMCVS_14215E3D5995C57C0A495C55%40AdobeOrg=1; AMCV_14215E3D5995C57C0A495C55%40AdobeOrg=-637568504%7CMCIDTS%7C19403%7CMCMID%7C09349215808923073694559483836331055195%7CMCAAMLH-1677084815%7C3%7CMCAAMB-1677084815%7CRKhpRz8krg2tLO6pguXWp5olkAcUniQYPHaMWWgdJ3xzPWQmdj0y%7CMCOPTOUT-1676487215s%7CNONE%7CMCCIDH%7C1076847823%7CvVersion%7C5.1.1; s_cc=true; UserMatchHistory=AQJJ3j-efkcQeQAAAYZWAETxBE44VVBGzo_i-gr5nEGPOK85mS3kDScLdGC24_GeNx-GEeCNDrPOjkQde_MGT4iPc7vJV4sT_nPL8Tv4WMTLarIEliLYPkCvou8zFlb3dFNkbXZjVV_KTVeDvUSJ5WJTeStLNXmzV3_EV5mI9dbSRpoTFlJ94vi_zxcCmnLTaGAYGQAdymMv4SbaMgtnt3QcY8Zj9-hnwxdsIEmJloq47_QTP7sfl-SG-vw8xvhl9KYb0ZPKCnQ6ioJhu3G4cFpKJiSUbULkYMADSo0; lidc="b=VB23:s=V:r=V:a=V:p=V:g=4060:u=105:x=1:i=1676480108:t=1676566269:v=2:sig=AQEz2UktgVcQuJwMoVRgKgnUuKtCEm9C"; s_sq=%5B%5BB%5D%5D; gpv_pn=www.linkedin.com%2Flearning%2Fsearch; s_ips=615; s_plt=7.03; s_pltp=www.linkedin.com%2Flearning%2Fsearch; s_tp=6116; s_ppv=www.linkedin.com%2Flearning%2Fsearch%2C47%2C10%2C2859%2C7%2C18; s_tslv=1676480356388',
'csrf-token': 'ajax:7673827752327651374',
'referer': 'https://www.linkedin.com/learning/search?keywords=data%20science',
'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Google Chrome";v="110"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
'x-li-lang': 'en_US',
'x-li-page-instance': 'urn:li:page:d_learning_search;gNOg2MJoSqWv2XNAh4ukiQ==',
'x-li-pem-metadata': 'Learning Exp - Search=search',
'x-li-track': '{"clientVersion":"1.1.2236","mpVersion":"1.1.2236","osName":"web","timezoneOffset":5,"timezone":"Asia/Karachi","mpName":"learning-web","displayDensity":1,"displayWidth":1366,"displayHeight":768}',
'x-lil-intl-library': 'en_US',
'x-restli-protocol-version': '2.0.0'
}
res = requests.request("GET", url, headers=headers, data=payload).json()
product=[]
items=res['included']
for item in items:
try:
title=item['headline']['title']['text']
except:
title=''
try:
url='https://www.linkedin.com/learning/'+item['slug']
except:
url=''
try:
rating=item['rating']['ratingCount']
except:
rating=''
name = item.get("description", {}).get("text", "")
wev={
'title':title,
'instructor':name,
'review':rating,
'url':url
}
product.append(wev)
df=pd.DataFrame(product)
filter = df["title"] != ""
dfNew = df[filter]
dfNew.to_csv('learning.csv')
However, this is solution works because the web is structured. For complex/irregular websites I prefer to use scrapy as we use in my job.

Related

Need help isolating results from xhr request

When I run the code below, it's giving me a lot of information I don't want. I only want to capture the data circled starting with 4. Does anyone know how to isolate the data in the request to get rid of everything but what is circled? Also, if anyone knows how to bring the results to a csv or xlsx file, that would be even better.
Working code:
import requests
url = "https://www.stockrover.com/stock_infos/grid?_dc=1644769629231"
data = {
"ticker": 4,
"rank": "5",
}
payload = "state=%7B%22sortInfo%22%3A%7B%7D%2C%22columns%22%3A%5B77%2C32%2C498%2C500%2C31%2C27%2C499%2C30%2C578%2C28%2C29%2C544%2C181%2C185%2C186%5D%2C%22view%22%3A281%2C%22priorPrimaryColumn%22%3A170%2C%22filterData%22%3A%5B%5D%2C%22name%22%3A%22New%201%22%2C%22cType%22%3A%22Screener%22%2C%22cNode%22%3A%22s_39%22%2C%22cIsFolder%22%3Afalse%2C%22gridSelection%22%3A%22BTU%22%2C%22lastActive%22%3A1396898415%2C%22primaryColumn%22%3A76%2C%22folderDisabledParams%22%3A%7B%22filterData%22%3A%5B%5D%7D%2C%22mainGridDateRange%22%3A%22ytd%22%2C%22groupState%22%3Anull%2C%22moversGridDateRange%22%3A%221_day%22%2C%22peersGridDateRange%22%3A%221_day%22%2C%22lastGridSelections%22%3A%5B%22BTU%22%5D%2C%22lastQuantNode%22%3A%5B%5D%2C%22includeQuotesInTable%22%3Afalse%2C%22includeAllQuotesLastValue%22%3Afalse%2C%22markets%22%3A%7B%22panel%22%3A%22summary%22%7D%2C%22researchPanel%22%3A%22tablePanel%22%2C%22recentSearchTickers%22%3A%5B%22SPY%22%2C%22AMZN%22%2C%22AAPL%22%2C%22s_32%22%2C%22%5ENDX%22%2C%22AXP%22%2C%22XOM%22%2C%22AFL%22%2C%22%5EDJX%22%2C%22AIT%22%2C%22ADVC%22%5D%2C%22quotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22checkedQuotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22dashboard%22%3A%7B%22buttonRef%22%3A%22272%22%7D%2C%22tickerSelectedFeeds%22%3A%5B%22Benzinga%20News%22%2C%22Yahoo%20News%22%5D%2C%22marketSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Stock%20Market%20News%20-%20Investing.com%22%5D%2C%22bondsSelectedFeeds%22%3A%5B%22Bonds%20Strategy%20-%20Investing.com%22%5D%2C%22commoditiesSelectedFeeds%22%3A%5B%22Commodities%20%26%20Futures%20News%20-%20Investing.com%22%2C%22Commodities%20Fundamental%20Analysis%20-%20Investing.com%22%2C%22Commodities%20Strategy%20Analysis%20-%20Investing.com%22%5D%2C%22stocksSelectedFeeds%22%3A%5B%22CNNMoney%20News%22%2C%22Google%20News%22%2C%22Seeking%20Alpha%20Top%20Stories%22%5D%2C%22etfsSelectedFeeds%22%3A%5B%22Economy%20News%20-%20Investing.com%22%2C%22ETF%20Analysis%20-%20Investing.com%22%2C%22Investing%20Ideas%20-%20Investing.com%22%5D%2C%22topPanel%22%3A%22researchPanel%22%2C%22maxRecordsNode%22%3Afalse%2C%22version%22%3A7%2C%22lastGridSelectionsRaw%22%3A%5B%22BTU%22%5D%2C%22lastSelectionScreeners%22%3A%22s_39%22%2C%22quotesDisabled%22%3Atrue%2C%22lastSelectionPortfolios%22%3A%22p_2%22%2C%22comparisonPanels%22%3A%7B%22Portfolio%22%3A%22p_2%22%2C%22Index%22%3A%22%5EDJX%22%2C%22Watchlist%22%3A%22Watchlists%22%2C%22Screener%22%3A%22s_39%22%7D%2C%22lastSelectionWatchlists%22%3A%22w_26%22%2C%22indicesSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Yahoo%20News%22%5D%2C%22newsActive%22%3A%22tickerNews%22%2C%22recentSearchMetrics%22%3A%5B%22Price%22%2C%22EPS%22%2C%22Sales%22%5D%2C%22editPanel%22%3A%22positionsPanel%22%2C%22newsType%22%3A%22marketNews%22%2C%22tableColumns%22%3A%5B%22ticker%22%2C%22rank%22%2C%22score_rank%22%2C%22filter_score%22%2C%22company%22%2C%22cash%22%2C%22currentassets%22%2C%22netppe%22%2C%22intangibles%22%2C%22totalassets%22%2C%22currentliabilities%22%2C%22longtermdebt%22%2C%22totaldebt%22%2C%22totalliabilities%22%2C%22equity%22%2C%22tangiblebookvalue%22%2C%22cash_short_term_p%22%2C%22net_ppe_p%22%2C%22intangibles_p%22%5D%2C%22last_save%22%3A1644769379%2C%22panels%22%3A%7B%22collapsed%22%3A%7B%22chp%22%3Atrue%2C%22ip%22%3Atrue%2C%22mp%22%3Afalse%2C%22qp%22%3Afalse%2C%22conp%22%3Atrue%2C%22fsp%22%3Afalse%7D%2C%22viewportWidth%22%3A%221920%22%2C%22viewportHeight%22%3A%221069%22%2C%22chartPanelHeight%22%3A483%2C%22controlPanelWidth%22%3A296%2C%22insightPanelWidth%22%3A%22485%22%2C%22quoteBoxHeight%22%3A200%2C%22navigationPanelWidth%22%3A277%7D%7D&updateMarket=true&page=1&start=0&limit=250"
headers = {
'authority': 'www.stockrover.com',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"',
'x-csrf-token': '7yR4pfI0kAArtjJak535+NJrpB0L212PAbXCg0kbyE4SyjFaQ73sMHJLiqAkPb5nGzfC8KvAa3kTADLAEQXyOQ==',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'accept': 'application/json',
'x-requested-with': 'XMLHttpRequest',
'sec-ch-ua-platform': '"Windows"',
'origin': 'https://www.stockrover.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.stockrover.com/research/table/281/s_39/BTU',
'accept-language': 'en-US,en;q=0.9',
'cookie': 'remember_me_pref=0; user_name=test11964; plan=3; premiumBraintreeKey=MIIBCgKCAQEAzM4LJfrNnBOgRFB1dDJkmqTFCWT2Y%2BksOydD8xDH4R033WUzxbffMZb%2B3dqEyQvOVjLcwFIHByDc4Xwej7enas2E%2FVRyh7Cvyadn7M5zQeRyLcI9Ys5KCozMwxJPc0x76FlXPwiAo1Qlz3RcLb9wGHBag2R51FuTie%2BhVDCgzWajqDCREzRhi%2Fqlt3D%2FxXNo%2FiwJlpOUr%2Fx1QnkkILxgKlq1dD7KJ767O5ojYKXsO%2BV2Bfu7sSD3djsOxQJ1%2FRbaDm2E96EDkWhhOeOpPndQ6IuSl4NmnJg%2Fcq6f8csW8M3Ys%2BMZPFkdxPC4%2FfRM1XC9o76PjpVNBIO%2ByJEELKZedwIDAQAB; lr=1644769628; _Ruby2_session=Q1drcmlhazYvUFZLd0NydnRXUGpoUzArZDlxYWRCcW9sRUx5VDBydWVWRHdGWDZlMnlESURzbldwbFV1L0drbUlKaWt5MXRtaS9iR0ZYZEpPVHQ1N25qRnR1d3FrY0tzQW1qQm9CdTZ3MSs0d2c3MlpuMjRiQWhCOHI1cGNWekZ4cUdJd0ZFcGtpeng3MFlqZjFDUW9RYmpFMU9DeGdGMVZKR1EwMjVhSE9yVHl4VXFtQm9aYVBtNHF5d0pwMjJ1aVlNMUVRUzdnVFZWZ1AxQkY5Q0p6a2RKay9QL05tOWk4cHZiSERtaGRxeTlxTWZnV3Q0cjdwR3RndUtmeUp3QThhMnJaV2dGZjlPUUtjcGRidDhiajRxK2g0RUZTMWNZUDBaeGNCcUVxSDJ1QnZVRlRkWk9tUExJNWN3TDN5T1BQcmhVVGsycStVTzJRaUwvSkk2TnNVZldTOGU3Tm5wQ3RUMy9nazFqbzdrUWtvYzRwQWRpV3dnTVB3YzhodFV2U0FRR3VKdllMY01NZmdOdGtmOEJ4UT09LS1nTXBrYldhQ0pEeWJ3ak9qQjcrTGV3PT0%3D--1439f36a7f9362aee4b5b666747a2d63d72e81bd'
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.json())
The data structure in this case is rather unusual but this looks like it might work for you:
import requests
url = "https://www.stockrover.com/stock_infos/grid?_dc=1644769629231"
payload = "state=%7B%22sortInfo%22%3A%7B%7D%2C%22columns%22%3A%5B77%2C32%2C498%2C500%2C31%2C27%2C499%2C30%2C578%2C28%2C29%2C544%2C181%2C185%2C186%5D%2C%22view%22%3A281%2C%22priorPrimaryColumn%22%3A170%2C%22filterData%22%3A%5B%5D%2C%22name%22%3A%22New%201%22%2C%22cType%22%3A%22Screener%22%2C%22cNode%22%3A%22s_39%22%2C%22cIsFolder%22%3Afalse%2C%22gridSelection%22%3A%22BTU%22%2C%22lastActive%22%3A1396898415%2C%22primaryColumn%22%3A76%2C%22folderDisabledParams%22%3A%7B%22filterData%22%3A%5B%5D%7D%2C%22mainGridDateRange%22%3A%22ytd%22%2C%22groupState%22%3Anull%2C%22moversGridDateRange%22%3A%221_day%22%2C%22peersGridDateRange%22%3A%221_day%22%2C%22lastGridSelections%22%3A%5B%22BTU%22%5D%2C%22lastQuantNode%22%3A%5B%5D%2C%22includeQuotesInTable%22%3Afalse%2C%22includeAllQuotesLastValue%22%3Afalse%2C%22markets%22%3A%7B%22panel%22%3A%22summary%22%7D%2C%22researchPanel%22%3A%22tablePanel%22%2C%22recentSearchTickers%22%3A%5B%22SPY%22%2C%22AMZN%22%2C%22AAPL%22%2C%22s_32%22%2C%22%5ENDX%22%2C%22AXP%22%2C%22XOM%22%2C%22AFL%22%2C%22%5EDJX%22%2C%22AIT%22%2C%22ADVC%22%5D%2C%22quotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22checkedQuotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22dashboard%22%3A%7B%22buttonRef%22%3A%22272%22%7D%2C%22tickerSelectedFeeds%22%3A%5B%22Benzinga%20News%22%2C%22Yahoo%20News%22%5D%2C%22marketSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Stock%20Market%20News%20-%20Investing.com%22%5D%2C%22bondsSelectedFeeds%22%3A%5B%22Bonds%20Strategy%20-%20Investing.com%22%5D%2C%22commoditiesSelectedFeeds%22%3A%5B%22Commodities%20%26%20Futures%20News%20-%20Investing.com%22%2C%22Commodities%20Fundamental%20Analysis%20-%20Investing.com%22%2C%22Commodities%20Strategy%20Analysis%20-%20Investing.com%22%5D%2C%22stocksSelectedFeeds%22%3A%5B%22CNNMoney%20News%22%2C%22Google%20News%22%2C%22Seeking%20Alpha%20Top%20Stories%22%5D%2C%22etfsSelectedFeeds%22%3A%5B%22Economy%20News%20-%20Investing.com%22%2C%22ETF%20Analysis%20-%20Investing.com%22%2C%22Investing%20Ideas%20-%20Investing.com%22%5D%2C%22topPanel%22%3A%22researchPanel%22%2C%22maxRecordsNode%22%3Afalse%2C%22version%22%3A7%2C%22lastGridSelectionsRaw%22%3A%5B%22BTU%22%5D%2C%22lastSelectionScreeners%22%3A%22s_39%22%2C%22quotesDisabled%22%3Atrue%2C%22lastSelectionPortfolios%22%3A%22p_2%22%2C%22comparisonPanels%22%3A%7B%22Portfolio%22%3A%22p_2%22%2C%22Index%22%3A%22%5EDJX%22%2C%22Watchlist%22%3A%22Watchlists%22%2C%22Screener%22%3A%22s_39%22%7D%2C%22lastSelectionWatchlists%22%3A%22w_26%22%2C%22indicesSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Yahoo%20News%22%5D%2C%22newsActive%22%3A%22tickerNews%22%2C%22recentSearchMetrics%22%3A%5B%22Price%22%2C%22EPS%22%2C%22Sales%22%5D%2C%22editPanel%22%3A%22positionsPanel%22%2C%22newsType%22%3A%22marketNews%22%2C%22tableColumns%22%3A%5B%22ticker%22%2C%22rank%22%2C%22score_rank%22%2C%22filter_score%22%2C%22company%22%2C%22cash%22%2C%22currentassets%22%2C%22netppe%22%2C%22intangibles%22%2C%22totalassets%22%2C%22currentliabilities%22%2C%22longtermdebt%22%2C%22totaldebt%22%2C%22totalliabilities%22%2C%22equity%22%2C%22tangiblebookvalue%22%2C%22cash_short_term_p%22%2C%22net_ppe_p%22%2C%22intangibles_p%22%5D%2C%22last_save%22%3A1644769379%2C%22panels%22%3A%7B%22collapsed%22%3A%7B%22chp%22%3Atrue%2C%22ip%22%3Atrue%2C%22mp%22%3Afalse%2C%22qp%22%3Afalse%2C%22conp%22%3Atrue%2C%22fsp%22%3Afalse%7D%2C%22viewportWidth%22%3A%221920%22%2C%22viewportHeight%22%3A%221069%22%2C%22chartPanelHeight%22%3A483%2C%22controlPanelWidth%22%3A296%2C%22insightPanelWidth%22%3A%22485%22%2C%22quoteBoxHeight%22%3A200%2C%22navigationPanelWidth%22%3A277%7D%7D&updateMarket=true&page=1&start=0&limit=250"
headers = {
'authority': 'www.stockrover.com',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"',
'x-csrf-token': '7yR4pfI0kAArtjJak535+NJrpB0L212PAbXCg0kbyE4SyjFaQ73sMHJLiqAkPb5nGzfC8KvAa3kTADLAEQXyOQ==',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'accept': 'application/json',
'x-requested-with': 'XMLHttpRequest',
'sec-ch-ua-platform': '"Windows"',
'origin': 'https://www.stockrover.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.stockrover.com/research/table/281/s_39/BTU',
'accept-language': 'en-US,en;q=0.9',
'cookie': 'remember_me_pref=0; user_name=test11964; plan=3; premiumBraintreeKey=MIIBCgKCAQEAzM4LJfrNnBOgRFB1dDJkmqTFCWT2Y%2BksOydD8xDH4R033WUzxbffMZb%2B3dqEyQvOVjLcwFIHByDc4Xwej7enas2E%2FVRyh7Cvyadn7M5zQeRyLcI9Ys5KCozMwxJPc0x76FlXPwiAo1Qlz3RcLb9wGHBag2R51FuTie%2BhVDCgzWajqDCREzRhi%2Fqlt3D%2FxXNo%2FiwJlpOUr%2Fx1QnkkILxgKlq1dD7KJ767O5ojYKXsO%2BV2Bfu7sSD3djsOxQJ1%2FRbaDm2E96EDkWhhOeOpPndQ6IuSl4NmnJg%2Fcq6f8csW8M3Ys%2BMZPFkdxPC4%2FfRM1XC9o76PjpVNBIO%2ByJEELKZedwIDAQAB; lr=1644769628; _Ruby2_session=Q1drcmlhazYvUFZLd0NydnRXUGpoUzArZDlxYWRCcW9sRUx5VDBydWVWRHdGWDZlMnlESURzbldwbFV1L0drbUlKaWt5MXRtaS9iR0ZYZEpPVHQ1N25qRnR1d3FrY0tzQW1qQm9CdTZ3MSs0d2c3MlpuMjRiQWhCOHI1cGNWekZ4cUdJd0ZFcGtpeng3MFlqZjFDUW9RYmpFMU9DeGdGMVZKR1EwMjVhSE9yVHl4VXFtQm9aYVBtNHF5d0pwMjJ1aVlNMUVRUzdnVFZWZ1AxQkY5Q0p6a2RKay9QL05tOWk4cHZiSERtaGRxeTlxTWZnV3Q0cjdwR3RndUtmeUp3QThhMnJaV2dGZjlPUUtjcGRidDhiajRxK2g0RUZTMWNZUDBaeGNCcUVxSDJ1QnZVRlRkWk9tUExJNWN3TDN5T1BQcmhVVGsycStVTzJRaUwvSkk2TnNVZldTOGU3Tm5wQ3RUMy9nazFqbzdrUWtvYzRwQWRpV3dnTVB3YzhodFV2U0FRR3VKdllMY01NZmdOdGtmOEJ4UT09LS1nTXBrYldhQ0pEeWJ3ak9qQjcrTGV3PT0%3D--1439f36a7f9362aee4b5b666747a2d63d72e81bd'
}
response = requests.request("POST", url, headers=headers, data=payload)
stock_info = response.json()['stock_infos']
for info in stock_info:
key = info[0]
for i, sub in enumerate(info[1:], 1):
if sub == key:
print(info[i:])
break
Output:
['AA', 1452.0, 4285.0, 6679.0, 0.0, 14197.0, 2929.0, 1724.0, 1725.0, 8736.0, 3878.0, 3878.0, 10.23, 47.05, None]
['ACH', 1773.84, 7909.0, 18758.17, 788.41, 30523.38, 9252.6, 8782.1, 14242.28, 18580.33, 9038.55, 8250.17, 5.81, 61.46, 2.58]
...and the values for all observed tickers

Added code and now it's not print correctly to CSV file

I have the first code that is working and printing to csv, however it included a lot of data I didn't need. A paragraph of code was then added to only include the data I wanted. The problem is it prints to the screen correctly but still includes all the data in the CSV file. I've tried everything I could think of in this line but it won't either won't print or still prints everything.
data = pd.DataFrame(stock_info)
Could someone show me where I'm going wrong so it will print only the portion I want it to?
Old Working Code
import requests
import pandas as pd
url = "https://www.stockrover.com/stock_infos/grid?_dc=1644769629231"
def stock_data(stock_info):
data = pd.DataFrame(stock_info)
data.to_csv("data.csv", index=False)
payload = "state=%7B%22sortInfo%22%3A%7B%7D%2C%22columns%22%3A%5B77%2C32%2C498%2C500%2C31%2C27%2C499%2C30%2C578%2C28%2C29%2C544%2C181%2C185%2C186%5D%2C%22view%22%3A281%2C%22priorPrimaryColumn%22%3A170%2C%22filterData%22%3A%5B%5D%2C%22name%22%3A%22New%201%22%2C%22cType%22%3A%22Screener%22%2C%22cNode%22%3A%22s_39%22%2C%22cIsFolder%22%3Afalse%2C%22gridSelection%22%3A%22BTU%22%2C%22lastActive%22%3A1396898415%2C%22primaryColumn%22%3A76%2C%22folderDisabledParams%22%3A%7B%22filterData%22%3A%5B%5D%7D%2C%22mainGridDateRange%22%3A%22ytd%22%2C%22groupState%22%3Anull%2C%22moversGridDateRange%22%3A%221_day%22%2C%22peersGridDateRange%22%3A%221_day%22%2C%22lastGridSelections%22%3A%5B%22BTU%22%5D%2C%22lastQuantNode%22%3A%5B%5D%2C%22includeQuotesInTable%22%3Afalse%2C%22includeAllQuotesLastValue%22%3Afalse%2C%22markets%22%3A%7B%22panel%22%3A%22summary%22%7D%2C%22researchPanel%22%3A%22tablePanel%22%2C%22recentSearchTickers%22%3A%5B%22SPY%22%2C%22AMZN%22%2C%22AAPL%22%2C%22s_32%22%2C%22%5ENDX%22%2C%22AXP%22%2C%22XOM%22%2C%22AFL%22%2C%22%5EDJX%22%2C%22AIT%22%2C%22ADVC%22%5D%2C%22quotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22checkedQuotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22dashboard%22%3A%7B%22buttonRef%22%3A%22272%22%7D%2C%22tickerSelectedFeeds%22%3A%5B%22Benzinga%20News%22%2C%22Yahoo%20News%22%5D%2C%22marketSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Stock%20Market%20News%20-%20Investing.com%22%5D%2C%22bondsSelectedFeeds%22%3A%5B%22Bonds%20Strategy%20-%20Investing.com%22%5D%2C%22commoditiesSelectedFeeds%22%3A%5B%22Commodities%20%26%20Futures%20News%20-%20Investing.com%22%2C%22Commodities%20Fundamental%20Analysis%20-%20Investing.com%22%2C%22Commodities%20Strategy%20Analysis%20-%20Investing.com%22%5D%2C%22stocksSelectedFeeds%22%3A%5B%22CNNMoney%20News%22%2C%22Google%20News%22%2C%22Seeking%20Alpha%20Top%20Stories%22%5D%2C%22etfsSelectedFeeds%22%3A%5B%22Economy%20News%20-%20Investing.com%22%2C%22ETF%20Analysis%20-%20Investing.com%22%2C%22Investing%20Ideas%20-%20Investing.com%22%5D%2C%22topPanel%22%3A%22researchPanel%22%2C%22maxRecordsNode%22%3Afalse%2C%22version%22%3A7%2C%22lastGridSelectionsRaw%22%3A%5B%22BTU%22%5D%2C%22lastSelectionScreeners%22%3A%22s_39%22%2C%22quotesDisabled%22%3Atrue%2C%22lastSelectionPortfolios%22%3A%22p_2%22%2C%22comparisonPanels%22%3A%7B%22Portfolio%22%3A%22p_2%22%2C%22Index%22%3A%22%5EDJX%22%2C%22Watchlist%22%3A%22Watchlists%22%2C%22Screener%22%3A%22s_39%22%7D%2C%22lastSelectionWatchlists%22%3A%22w_26%22%2C%22indicesSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Yahoo%20News%22%5D%2C%22newsActive%22%3A%22tickerNews%22%2C%22recentSearchMetrics%22%3A%5B%22Price%22%2C%22EPS%22%2C%22Sales%22%5D%2C%22editPanel%22%3A%22positionsPanel%22%2C%22newsType%22%3A%22marketNews%22%2C%22tableColumns%22%3A%5B%22ticker%22%2C%22rank%22%2C%22score_rank%22%2C%22filter_score%22%2C%22company%22%2C%22cash%22%2C%22currentassets%22%2C%22netppe%22%2C%22intangibles%22%2C%22totalassets%22%2C%22currentliabilities%22%2C%22longtermdebt%22%2C%22totaldebt%22%2C%22totalliabilities%22%2C%22equity%22%2C%22tangiblebookvalue%22%2C%22cash_short_term_p%22%2C%22net_ppe_p%22%2C%22intangibles_p%22%5D%2C%22last_save%22%3A1644769379%2C%22panels%22%3A%7B%22collapsed%22%3A%7B%22chp%22%3Atrue%2C%22ip%22%3Atrue%2C%22mp%22%3Afalse%2C%22qp%22%3Afalse%2C%22conp%22%3Atrue%2C%22fsp%22%3Afalse%7D%2C%22viewportWidth%22%3A%221920%22%2C%22viewportHeight%22%3A%221069%22%2C%22chartPanelHeight%22%3A483%2C%22controlPanelWidth%22%3A296%2C%22insightPanelWidth%22%3A%22485%22%2C%22quoteBoxHeight%22%3A200%2C%22navigationPanelWidth%22%3A277%7D%7D&updateMarket=true&page=1&start=0&limit=250"
headers = {
'authority': 'www.stockrover.com',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"',
'x-csrf-token': '7yR4pfI0kAArtjJak535+NJrpB0L212PAbXCg0kbyE4SyjFaQ73sMHJLiqAkPb5nGzfC8KvAa3kTADLAEQXyOQ==',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'accept': 'application/json',
'x-requested-with': 'XMLHttpRequest',
'sec-ch-ua-platform': '"Windows"',
'origin': 'https://www.stockrover.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.stockrover.com/research/table/281/s_39/BTU',
'accept-language': 'en-US,en;q=0.9',
'cookie': 'remember_me_pref=0; user_name=test11964; plan=3; premiumBraintreeKey=MIIBCgKCAQEAzM4LJfrNnBOgRFB1dDJkmqTFCWT2Y%2BksOydD8xDH4R033WUzxbffMZb%2B3dqEyQvOVjLcwFIHByDc4Xwej7enas2E%2FVRyh7Cvyadn7M5zQeRyLcI9Ys5KCozMwxJPc0x76FlXPwiAo1Qlz3RcLb9wGHBag2R51FuTie%2BhVDCgzWajqDCREzRhi%2Fqlt3D%2FxXNo%2FiwJlpOUr%2Fx1QnkkILxgKlq1dD7KJ767O5ojYKXsO%2BV2Bfu7sSD3djsOxQJ1%2FRbaDm2E96EDkWhhOeOpPndQ6IuSl4NmnJg%2Fcq6f8csW8M3Ys%2BMZPFkdxPC4%2FfRM1XC9o76PjpVNBIO%2ByJEELKZedwIDAQAB; lr=1644769628; _Ruby2_session=Q1drcmlhazYvUFZLd0NydnRXUGpoUzArZDlxYWRCcW9sRUx5VDBydWVWRHdGWDZlMnlESURzbldwbFV1L0drbUlKaWt5MXRtaS9iR0ZYZEpPVHQ1N25qRnR1d3FrY0tzQW1qQm9CdTZ3MSs0d2c3MlpuMjRiQWhCOHI1cGNWekZ4cUdJd0ZFcGtpeng3MFlqZjFDUW9RYmpFMU9DeGdGMVZKR1EwMjVhSE9yVHl4VXFtQm9aYVBtNHF5d0pwMjJ1aVlNMUVRUzdnVFZWZ1AxQkY5Q0p6a2RKay9QL05tOWk4cHZiSERtaGRxeTlxTWZnV3Q0cjdwR3RndUtmeUp3QThhMnJaV2dGZjlPUUtjcGRidDhiajRxK2g0RUZTMWNZUDBaeGNCcUVxSDJ1QnZVRlRkWk9tUExJNWN3TDN5T1BQcmhVVGsycStVTzJRaUwvSkk2TnNVZldTOGU3Tm5wQ3RUMy9nazFqbzdrUWtvYzRwQWRpV3dnTVB3YzhodFV2U0FRR3VKdllMY01NZmdOdGtmOEJ4UT09LS1nTXBrYldhQ0pEeWJ3ak9qQjcrTGV3PT0%3D--1439f36a7f9362aee4b5b666747a2d63d72e81bd'
}
response = requests.request("POST", url, headers=headers, data=payload)
stock_info = response.json()['stock_infos']
stock_data(stock_info)
New Non-Working Code
import requests
import pandas as pd
url = "https://www.stockrover.com/stock_infos/grid?_dc=1644769629231"
def stock_data(stock_info):
data = pd.DataFrame(stock_info)
data.to_csv("data.csv", index=False)
payload = "state=%7B%22sortInfo%22%3A%7B%7D%2C%22columns%22%3A%5B77%2C32%2C498%2C500%2C31%2C27%2C499%2C30%2C578%2C28%2C29%2C544%2C181%2C185%2C186%5D%2C%22view%22%3A281%2C%22priorPrimaryColumn%22%3A170%2C%22filterData%22%3A%5B%5D%2C%22name%22%3A%22New%201%22%2C%22cType%22%3A%22Screener%22%2C%22cNode%22%3A%22s_39%22%2C%22cIsFolder%22%3Afalse%2C%22gridSelection%22%3A%22BTU%22%2C%22lastActive%22%3A1396898415%2C%22primaryColumn%22%3A76%2C%22folderDisabledParams%22%3A%7B%22filterData%22%3A%5B%5D%7D%2C%22mainGridDateRange%22%3A%22ytd%22%2C%22groupState%22%3Anull%2C%22moversGridDateRange%22%3A%221_day%22%2C%22peersGridDateRange%22%3A%221_day%22%2C%22lastGridSelections%22%3A%5B%22BTU%22%5D%2C%22lastQuantNode%22%3A%5B%5D%2C%22includeQuotesInTable%22%3Afalse%2C%22includeAllQuotesLastValue%22%3Afalse%2C%22markets%22%3A%7B%22panel%22%3A%22summary%22%7D%2C%22researchPanel%22%3A%22tablePanel%22%2C%22recentSearchTickers%22%3A%5B%22SPY%22%2C%22AMZN%22%2C%22AAPL%22%2C%22s_32%22%2C%22%5ENDX%22%2C%22AXP%22%2C%22XOM%22%2C%22AFL%22%2C%22%5EDJX%22%2C%22AIT%22%2C%22ADVC%22%5D%2C%22quotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22checkedQuotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22dashboard%22%3A%7B%22buttonRef%22%3A%22272%22%7D%2C%22tickerSelectedFeeds%22%3A%5B%22Benzinga%20News%22%2C%22Yahoo%20News%22%5D%2C%22marketSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Stock%20Market%20News%20-%20Investing.com%22%5D%2C%22bondsSelectedFeeds%22%3A%5B%22Bonds%20Strategy%20-%20Investing.com%22%5D%2C%22commoditiesSelectedFeeds%22%3A%5B%22Commodities%20%26%20Futures%20News%20-%20Investing.com%22%2C%22Commodities%20Fundamental%20Analysis%20-%20Investing.com%22%2C%22Commodities%20Strategy%20Analysis%20-%20Investing.com%22%5D%2C%22stocksSelectedFeeds%22%3A%5B%22CNNMoney%20News%22%2C%22Google%20News%22%2C%22Seeking%20Alpha%20Top%20Stories%22%5D%2C%22etfsSelectedFeeds%22%3A%5B%22Economy%20News%20-%20Investing.com%22%2C%22ETF%20Analysis%20-%20Investing.com%22%2C%22Investing%20Ideas%20-%20Investing.com%22%5D%2C%22topPanel%22%3A%22researchPanel%22%2C%22maxRecordsNode%22%3Afalse%2C%22version%22%3A7%2C%22lastGridSelectionsRaw%22%3A%5B%22BTU%22%5D%2C%22lastSelectionScreeners%22%3A%22s_39%22%2C%22quotesDisabled%22%3Atrue%2C%22lastSelectionPortfolios%22%3A%22p_2%22%2C%22comparisonPanels%22%3A%7B%22Portfolio%22%3A%22p_2%22%2C%22Index%22%3A%22%5EDJX%22%2C%22Watchlist%22%3A%22Watchlists%22%2C%22Screener%22%3A%22s_39%22%7D%2C%22lastSelectionWatchlists%22%3A%22w_26%22%2C%22indicesSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Yahoo%20News%22%5D%2C%22newsActive%22%3A%22tickerNews%22%2C%22recentSearchMetrics%22%3A%5B%22Price%22%2C%22EPS%22%2C%22Sales%22%5D%2C%22editPanel%22%3A%22positionsPanel%22%2C%22newsType%22%3A%22marketNews%22%2C%22tableColumns%22%3A%5B%22ticker%22%2C%22rank%22%2C%22score_rank%22%2C%22filter_score%22%2C%22company%22%2C%22cash%22%2C%22currentassets%22%2C%22netppe%22%2C%22intangibles%22%2C%22totalassets%22%2C%22currentliabilities%22%2C%22longtermdebt%22%2C%22totaldebt%22%2C%22totalliabilities%22%2C%22equity%22%2C%22tangiblebookvalue%22%2C%22cash_short_term_p%22%2C%22net_ppe_p%22%2C%22intangibles_p%22%5D%2C%22last_save%22%3A1644769379%2C%22panels%22%3A%7B%22collapsed%22%3A%7B%22chp%22%3Atrue%2C%22ip%22%3Atrue%2C%22mp%22%3Afalse%2C%22qp%22%3Afalse%2C%22conp%22%3Atrue%2C%22fsp%22%3Afalse%7D%2C%22viewportWidth%22%3A%221920%22%2C%22viewportHeight%22%3A%221069%22%2C%22chartPanelHeight%22%3A483%2C%22controlPanelWidth%22%3A296%2C%22insightPanelWidth%22%3A%22485%22%2C%22quoteBoxHeight%22%3A200%2C%22navigationPanelWidth%22%3A277%7D%7D&updateMarket=true&page=1&start=0&limit=250"
headers = {
'authority': 'www.stockrover.com',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"',
'x-csrf-token': '7yR4pfI0kAArtjJak535+NJrpB0L212PAbXCg0kbyE4SyjFaQ73sMHJLiqAkPb5nGzfC8KvAa3kTADLAEQXyOQ==',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'accept': 'application/json',
'x-requested-with': 'XMLHttpRequest',
'sec-ch-ua-platform': '"Windows"',
'origin': 'https://www.stockrover.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.stockrover.com/research/table/281/s_39/BTU',
'accept-language': 'en-US,en;q=0.9',
'cookie': 'remember_me_pref=0; user_name=test11964; plan=3; premiumBraintreeKey=MIIBCgKCAQEAzM4LJfrNnBOgRFB1dDJkmqTFCWT2Y%2BksOydD8xDH4R033WUzxbffMZb%2B3dqEyQvOVjLcwFIHByDc4Xwej7enas2E%2FVRyh7Cvyadn7M5zQeRyLcI9Ys5KCozMwxJPc0x76FlXPwiAo1Qlz3RcLb9wGHBag2R51FuTie%2BhVDCgzWajqDCREzRhi%2Fqlt3D%2FxXNo%2FiwJlpOUr%2Fx1QnkkILxgKlq1dD7KJ767O5ojYKXsO%2BV2Bfu7sSD3djsOxQJ1%2FRbaDm2E96EDkWhhOeOpPndQ6IuSl4NmnJg%2Fcq6f8csW8M3Ys%2BMZPFkdxPC4%2FfRM1XC9o76PjpVNBIO%2ByJEELKZedwIDAQAB; lr=1644769628; _Ruby2_session=Q1drcmlhazYvUFZLd0NydnRXUGpoUzArZDlxYWRCcW9sRUx5VDBydWVWRHdGWDZlMnlESURzbldwbFV1L0drbUlKaWt5MXRtaS9iR0ZYZEpPVHQ1N25qRnR1d3FrY0tzQW1qQm9CdTZ3MSs0d2c3MlpuMjRiQWhCOHI1cGNWekZ4cUdJd0ZFcGtpeng3MFlqZjFDUW9RYmpFMU9DeGdGMVZKR1EwMjVhSE9yVHl4VXFtQm9aYVBtNHF5d0pwMjJ1aVlNMUVRUzdnVFZWZ1AxQkY5Q0p6a2RKay9QL05tOWk4cHZiSERtaGRxeTlxTWZnV3Q0cjdwR3RndUtmeUp3QThhMnJaV2dGZjlPUUtjcGRidDhiajRxK2g0RUZTMWNZUDBaeGNCcUVxSDJ1QnZVRlRkWk9tUExJNWN3TDN5T1BQcmhVVGsycStVTzJRaUwvSkk2TnNVZldTOGU3Tm5wQ3RUMy9nazFqbzdrUWtvYzRwQWRpV3dnTVB3YzhodFV2U0FRR3VKdllMY01NZmdOdGtmOEJ4UT09LS1nTXBrYldhQ0pEeWJ3ak9qQjcrTGV3PT0%3D--1439f36a7f9362aee4b5b666747a2d63d72e81bd'
}
response = requests.request("POST", url, headers=headers, data=payload)
stock_info = response.json()['stock_infos']
stock_data(stock_info)
for info in stock_info:
key = info[0]
for i, sub in enumerate(info[1:], 1):
if sub == key:
print(info[i:])
break
If you don't need to keep those columns just slice them out by adding a line to your original code:
data = pd.DataFrame(stock_info)
data = data.iloc[:, 4:]
In original code:
import requests
import pandas as pd
url = "https://www.stockrover.com/stock_infos/grid?_dc=1644769629231"
def stock_data(stock_info):
data = pd.DataFrame(stock_info)
data = data.iloc[:, 4:]
data.to_csv("data.csv", index=False)
payload = "state=%7B%22sortInfo%22%3A%7B%7D%2C%22columns%22%3A%5B77%2C32%2C498%2C500%2C31%2C27%2C499%2C30%2C578%2C28%2C29%2C544%2C181%2C185%2C186%5D%2C%22view%22%3A281%2C%22priorPrimaryColumn%22%3A170%2C%22filterData%22%3A%5B%5D%2C%22name%22%3A%22New%201%22%2C%22cType%22%3A%22Screener%22%2C%22cNode%22%3A%22s_39%22%2C%22cIsFolder%22%3Afalse%2C%22gridSelection%22%3A%22BTU%22%2C%22lastActive%22%3A1396898415%2C%22primaryColumn%22%3A76%2C%22folderDisabledParams%22%3A%7B%22filterData%22%3A%5B%5D%7D%2C%22mainGridDateRange%22%3A%22ytd%22%2C%22groupState%22%3Anull%2C%22moversGridDateRange%22%3A%221_day%22%2C%22peersGridDateRange%22%3A%221_day%22%2C%22lastGridSelections%22%3A%5B%22BTU%22%5D%2C%22lastQuantNode%22%3A%5B%5D%2C%22includeQuotesInTable%22%3Afalse%2C%22includeAllQuotesLastValue%22%3Afalse%2C%22markets%22%3A%7B%22panel%22%3A%22summary%22%7D%2C%22researchPanel%22%3A%22tablePanel%22%2C%22recentSearchTickers%22%3A%5B%22SPY%22%2C%22AMZN%22%2C%22AAPL%22%2C%22s_32%22%2C%22%5ENDX%22%2C%22AXP%22%2C%22XOM%22%2C%22AFL%22%2C%22%5EDJX%22%2C%22AIT%22%2C%22ADVC%22%5D%2C%22quotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22checkedQuotesBoxTickers%22%3A%5B%22AMZN%22%2C%22AAPL%22%2C%22SPY%22%5D%2C%22dashboard%22%3A%7B%22buttonRef%22%3A%22272%22%7D%2C%22tickerSelectedFeeds%22%3A%5B%22Benzinga%20News%22%2C%22Yahoo%20News%22%5D%2C%22marketSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Stock%20Market%20News%20-%20Investing.com%22%5D%2C%22bondsSelectedFeeds%22%3A%5B%22Bonds%20Strategy%20-%20Investing.com%22%5D%2C%22commoditiesSelectedFeeds%22%3A%5B%22Commodities%20%26%20Futures%20News%20-%20Investing.com%22%2C%22Commodities%20Fundamental%20Analysis%20-%20Investing.com%22%2C%22Commodities%20Strategy%20Analysis%20-%20Investing.com%22%5D%2C%22stocksSelectedFeeds%22%3A%5B%22CNNMoney%20News%22%2C%22Google%20News%22%2C%22Seeking%20Alpha%20Top%20Stories%22%5D%2C%22etfsSelectedFeeds%22%3A%5B%22Economy%20News%20-%20Investing.com%22%2C%22ETF%20Analysis%20-%20Investing.com%22%2C%22Investing%20Ideas%20-%20Investing.com%22%5D%2C%22topPanel%22%3A%22researchPanel%22%2C%22maxRecordsNode%22%3Afalse%2C%22version%22%3A7%2C%22lastGridSelectionsRaw%22%3A%5B%22BTU%22%5D%2C%22lastSelectionScreeners%22%3A%22s_39%22%2C%22quotesDisabled%22%3Atrue%2C%22lastSelectionPortfolios%22%3A%22p_2%22%2C%22comparisonPanels%22%3A%7B%22Portfolio%22%3A%22p_2%22%2C%22Index%22%3A%22%5EDJX%22%2C%22Watchlist%22%3A%22Watchlists%22%2C%22Screener%22%3A%22s_39%22%7D%2C%22lastSelectionWatchlists%22%3A%22w_26%22%2C%22indicesSelectedFeeds%22%3A%5B%22Google%20News%22%2C%22Yahoo%20News%22%5D%2C%22newsActive%22%3A%22tickerNews%22%2C%22recentSearchMetrics%22%3A%5B%22Price%22%2C%22EPS%22%2C%22Sales%22%5D%2C%22editPanel%22%3A%22positionsPanel%22%2C%22newsType%22%3A%22marketNews%22%2C%22tableColumns%22%3A%5B%22ticker%22%2C%22rank%22%2C%22score_rank%22%2C%22filter_score%22%2C%22company%22%2C%22cash%22%2C%22currentassets%22%2C%22netppe%22%2C%22intangibles%22%2C%22totalassets%22%2C%22currentliabilities%22%2C%22longtermdebt%22%2C%22totaldebt%22%2C%22totalliabilities%22%2C%22equity%22%2C%22tangiblebookvalue%22%2C%22cash_short_term_p%22%2C%22net_ppe_p%22%2C%22intangibles_p%22%5D%2C%22last_save%22%3A1644769379%2C%22panels%22%3A%7B%22collapsed%22%3A%7B%22chp%22%3Atrue%2C%22ip%22%3Atrue%2C%22mp%22%3Afalse%2C%22qp%22%3Afalse%2C%22conp%22%3Atrue%2C%22fsp%22%3Afalse%7D%2C%22viewportWidth%22%3A%221920%22%2C%22viewportHeight%22%3A%221069%22%2C%22chartPanelHeight%22%3A483%2C%22controlPanelWidth%22%3A296%2C%22insightPanelWidth%22%3A%22485%22%2C%22quoteBoxHeight%22%3A200%2C%22navigationPanelWidth%22%3A277%7D%7D&updateMarket=true&page=1&start=0&limit=250"
headers = {
'authority': 'www.stockrover.com',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"',
'x-csrf-token': '7yR4pfI0kAArtjJak535+NJrpB0L212PAbXCg0kbyE4SyjFaQ73sMHJLiqAkPb5nGzfC8KvAa3kTADLAEQXyOQ==',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'accept': 'application/json',
'x-requested-with': 'XMLHttpRequest',
'sec-ch-ua-platform': '"Windows"',
'origin': 'https://www.stockrover.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.stockrover.com/research/table/281/s_39/BTU',
'accept-language': 'en-US,en;q=0.9',
'cookie': 'remember_me_pref=0; user_name=test11964; plan=3; premiumBraintreeKey=MIIBCgKCAQEAzM4LJfrNnBOgRFB1dDJkmqTFCWT2Y%2BksOydD8xDH4R033WUzxbffMZb%2B3dqEyQvOVjLcwFIHByDc4Xwej7enas2E%2FVRyh7Cvyadn7M5zQeRyLcI9Ys5KCozMwxJPc0x76FlXPwiAo1Qlz3RcLb9wGHBag2R51FuTie%2BhVDCgzWajqDCREzRhi%2Fqlt3D%2FxXNo%2FiwJlpOUr%2Fx1QnkkILxgKlq1dD7KJ767O5ojYKXsO%2BV2Bfu7sSD3djsOxQJ1%2FRbaDm2E96EDkWhhOeOpPndQ6IuSl4NmnJg%2Fcq6f8csW8M3Ys%2BMZPFkdxPC4%2FfRM1XC9o76PjpVNBIO%2ByJEELKZedwIDAQAB; lr=1644769628; _Ruby2_session=Q1drcmlhazYvUFZLd0NydnRXUGpoUzArZDlxYWRCcW9sRUx5VDBydWVWRHdGWDZlMnlESURzbldwbFV1L0drbUlKaWt5MXRtaS9iR0ZYZEpPVHQ1N25qRnR1d3FrY0tzQW1qQm9CdTZ3MSs0d2c3MlpuMjRiQWhCOHI1cGNWekZ4cUdJd0ZFcGtpeng3MFlqZjFDUW9RYmpFMU9DeGdGMVZKR1EwMjVhSE9yVHl4VXFtQm9aYVBtNHF5d0pwMjJ1aVlNMUVRUzdnVFZWZ1AxQkY5Q0p6a2RKay9QL05tOWk4cHZiSERtaGRxeTlxTWZnV3Q0cjdwR3RndUtmeUp3QThhMnJaV2dGZjlPUUtjcGRidDhiajRxK2g0RUZTMWNZUDBaeGNCcUVxSDJ1QnZVRlRkWk9tUExJNWN3TDN5T1BQcmhVVGsycStVTzJRaUwvSkk2TnNVZldTOGU3Tm5wQ3RUMy9nazFqbzdrUWtvYzRwQWRpV3dnTVB3YzhodFV2U0FRR3VKdllMY01NZmdOdGtmOEJ4UT09LS1nTXBrYldhQ0pEeWJ3ak9qQjcrTGV3PT0%3D--1439f36a7f9362aee4b5b666747a2d63d72e81bd'
}
response = requests.request("POST", url, headers=headers, data=payload)
stock_info = response.json()['stock_infos']
stock_data(stock_info)
The added block of code is added after the csv file has already been written.
You have other problems as well but that is the most obvious at the moment.
it should be closer to...
...
for info in stock_info:
key = info[0]
for i, sub in enumerate(info[1:], 1):
if sub == key:
print(info[i:])
break
stock_data(stock_info)
Also the added block of code doesn't actually make any changes to the data, it only changes what ends up getting printed. Whatever changes you want made to the file need to be made to stock_info.

python requests not returning json data

I would like to get the json data from for instance https://app.weathercloud.net/d0838117883#current using python requests module.
I tried:
import re
import requests
device='0838117883'
URL='https://app.weathercloud.net'
URL1=URL+'/d'+device
URL2=URL+'/device/stats'
headers={'Content-Type':'text/plain; charset=UTF-8',
'Referer':URL1,
'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/48.0.2564.82 Chrome/48.0.2564.82 Safari/537.36',
'Accept':'application/json, text/javascript,*/*'}
with requests.Session() as s:
#get html from URL1 in order to get the CSRF token
page = s.get(URL1)
CSRF=re.findall('WEATHERCLOUD_CSRF_TOKEN:"(.*)"},',page.text)[0]
#create parameters for URL2, in order to get the json file
params={'code':device,'WEATHERCLOUD_CSRF_TOKEN':CSRF}
page_stats=requests.get(URL2,params=params,headers=headers)
print(page_stats.url)
print(page_stats) #<Response [200]>
print(page_stats.text) #empty
print(page_stats.json()) #error
But the page_stats is empty.
How can I get the stats data from weathercloud?
Inspecting the page with DevTools, you'll find a useful endpoint:
https://app.weathercloud.net/device/stats
You can "replicate" the original web request made by your browser with requests library:
import requests
cookies = {
'PHPSESSID': '************************',
'WEATHERCLOUD_CSRF_TOKEN':'***********************',
'_ga': '**********',
'_gid': '**********',
'__gads': 'ID=**********',
'WeathercloudCookieAgreed': 'true',
'_gat': '1',
'WEATHERCLOUD_RECENT_ED3C8': '*****************',
}
headers = {
'Connection': 'keep-alive',
'sec-ch-ua': '^\\^Google',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'X-Requested-With': 'XMLHttpRequest',
'sec-ch-ua-mobile': '?0',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36',
'sec-ch-ua-platform': '^\\^Windows^\\^',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Referer': 'https://app.weathercloud.net/d0838117883',
'Accept-Language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7,es;q=0.6',
}
params = (
('code', '0838117883'),
('WEATHERCLOUD_CSRF_TOKEN', '****************'),
)
response = requests.get('https://app.weathercloud.net/device/stats', headers=headers, params=params, cookies=cookies)
# Serializing json
json_object = json.loads(response.text)
json Output:
{'last_update': 1632842172,
'bar_current': [1632842172, 1006.2],
'bar_day_max': [1632794772, 1013.4],
'bar_day_min': [1632845772, 1006.2],
'bar_month_max': [1632220572, 1028],
'bar_month_min': [1632715572, 997.3],
'bar_year_max': [1614418512, 1038.1],
'bar_year_min': [1615434432, 988.1],
'wdir_current': [1632842172, 180],
..............}
That's it.

extracting JSON from from an http request response - Scrapy

I'm building a web scraper to extract product information from the product link.
the web url is the following: https://scrapingclub.com/exercise/detail_header/
I found the HTTP request link for product details with chrome Dev Tools.
This is my code
class quoteSpider(scrapy.Spider):
name = 'Practice'
start_urls = ['https://scrapingclub.com/exercise/detail_header/']
def parse(self,response):
yield scrapy.Request('https://scrapingclub.com/exercise/ajaxdetail_header/', callback = self.parse_detail, headers={'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'es-ES,es;q=0.9,pt;q=0.8',
'Connection': 'keep-alive',
'Cookie': '__cfduid=da54d7e9c59cf35860825eabc96d7f1c41612805624; _ga=GA1.2.1229230175.1612805628; _gid=GA1.2.205529574.1613135874',
'Host': 'scrapingclub.com',
'Referer': 'https://scrapingclub.com/exercise/detail_header/',
'sec-ch-ua': '"Chromium";v="88", "Google Chrome";v="88", ";Not A Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest'})
def parse_detail(self, response):
product = ProductClass()
data = response
# im still debugging so im not putting it into an item yet
# data = json.loads(response.text)
# product['product_name'] = data['title']
# product['detail'] = data['description']
# product['price'] = data['price']
yield {
'value' : data
}
When I run
scrapy crawl ProductSpider -O test.json
This is my output file
[
{"value": "<TextResponse 200 https://scrapingclub.com/exercise/ajaxdetail_header/>"}
]
Why isn't returning me the JSON content?
change header data to get the expected output
class quoteSpider(scrapy.Spider):
name = 'Practice'
start_urls = ['https://scrapingclub.com/exercise/detail_header/']
def parse(self,response):
headers = {
'authority': 'scrapingclub.com',
'accept': '*/*',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
'x-requested-with': 'XMLHttpRequest',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://scrapingclub.com/exercise/detail_header/',
'accept-language': 'en-US,en;q=0.9',
'cookie': '__cfduid=d69d9664405f96c6477078a5c1fa78bb41613195439; _ga=GA1.2.523835360.1613195440; _gid=GA1.2.1763722170.1613195440',
}
yield scrapy.Request('https://scrapingclub.com/exercise/ajaxdetail_header/',
callback = self.parse_detail, headers=headers)
def parse_detail(self, response):
product = {}
data = response
# im still debugging so im not putting it into an item yet
data = json.loads(response.text)
product['product_name'] = data['title']
product['detail'] = data['description']
product['price'] = data['price']
yield product

Unable to scrape "shopee.com.my" top selling products page

I am trying to scrape "shopee.com.my" top selling products with scrape and also tried with requests but failed in getting valid JSON object. my requests code is given below:
import requests as r
import json
data = {
'authority': 'shopee.com.my',
'method': 'GET',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'scheme': 'https',
'accept': '*/*, application/json',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36',
'x-api-source': 'pc',
'x-requested-with': 'XMLHttpRequest',
'x-shopee-language': 'en',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
}
subcat_url = '/Boys-Fashion-cat.27.2427'
id = subcat_url.split('.')[-1]
data['path'] = f'/api/v2/search_items/?by=sales&limit=50&match_id={id}&newest=0&order=desc&page_type=search&version=2'
data['referer'] = f'https://shopee.com.my{subcat_url}?page=0&sortBy=sales'
url = f'https://shopee.com.my/api/v2/search_items/?by=sales&match_id={id}&newest=0&order=desc&page_type=search&version=2'
req = r.get(url, headers=data)
items = req.json()['items']
print(items)
print(f'Items length: {len(items)}')
here is my scrapy code:
import scrapy
import json
from scrapy import Request
from scrapy.http.cookies import CookieJar
header_data = {'authority': 'shopee.com.my',
'method': 'GET',
'scheme': 'https',
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36',
# 'cookie': 'SPC_U=-; SPC_IA=-1; SPC_EC=-; SPC_F=7jrWAm4XYNNtyVAk83GPknN8NbCMQEIk; REC_T_ID=476673f8-eeb0-11ea-8919-48df374df85c; _gcl_au=1.1.1197882328.1599225148; _med=refer; _fbp=fb.2.1599225150134.114138691; language=en; _ga=GA1.3.1167355736.1599225151; csrftoken=mu9M72KLd73P9QJusB9zFBP6wV3NGg85; _gid=GA1.3.273342972.1603211749; SPC_SI=yxvc89nmqe97ldvpo6wgeybtc8berzyd; welcomePkgShown=true; AMP_TOKEN=%24NOT_FOUND; REC_MD_41_1000027=1603289427_0_50_0_48; SPC_CT_48918e31="1603289273.lUS7x9IuKN5vNbhzibZCOHrIf6vVQmykU/TXxiOii7w="; SPC_CT_57540430="1603289278.FLT3IdzHC32RmEzFxkOi9pI7qhKIs/yq328elYMuwps="; SPC_CT_50ee4e78="1603289299.gvjW32HwgiQGN/4kj2Ac3YFrpqyHVTO8+UjM+uzxy4E="; _dc_gtm_UA-61915055-6=1; SPC_CT_75d7a2b7="1603289557.t5FvxXhnJacZrKkjnIWCUbAgAxAQ3hG5c1tZBzafwc4="; SPC_R_T_ID="n6Ek85JJY1JZATlhgutfB4KB3qrbmFDYX1+udv1EBAPegPE9xuzM8HFeCy1duskY9+DVLJxe4RqaabhyUuojHQG0NI2TqegihbAge+s3k7w="; SPC_T_IV="SGNXqyZ1jtRYpo5kFeKtYg=="; SPC_R_T_IV="SGNXqyZ1jtRYpo5kFeKtYg=="; SPC_T_ID="n6Ek85JJY1JZATlhgutfB4KB3qrbmFDYX1+udv1EBAPegPE9xuzM8HFeCy1duskY9+DVLJxe4RqaabhyUuojHQG0NI2TqegihbAge+s3k7w="',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'x-api-source': 'pc',
'x-requested-with': 'XMLHttpRequest',
'x-shopee-language': 'en',
}
class TestSpider(scrapy.Spider):
name = 'test'
allowed_domains = ['shopee.com', 'shopee.com.my', 'shopee.com.my/api/']
def start_requests(self):
subcat_url = '/Baby-Toddler-Play-cat.27.23785'
id = subcat_url.split('.')[-1]
header_data['path'] = f'/api/v2/search_items/?by=sales&limit=50&match_id={id}&newest=0&order=desc&page_type=search&version=2'
header_data['referer'] = f'https://shopee.com.my{subcat_url}?page=0&sortBy=sales'
url = f'https://shopee.com.my/api/v2/search_items/?by=sales&limit=50&match_id={id}&newest=0&order=desc&page_type=search&version=2'
yield Request(url=url, headers=header_data)
def parse_data(self, response):
try:
jdata = json.loads(response.body)
return None
except Exception as e:
print(f'exception: {e}')
print(response.body)
return None
items = jdata['items']
for item in items:
name = item['name']
image_path = item['image']
absolute_image = f'https://cf.shopee.com.my/file/{image_path}_tn'
print(f'this is absolute image {absolute_image}')
monthly_sold = 'pending'
price = float(item['price'])/100000
total_sold = item['sold']
location = item['shop_location']
stock = item['stock']
print(name)
print(price)
print(total_sold)
print(location)
print(stock)
not using cookies now but also tried with fresh cookies but no response.
Here are some example links where some so them responses always valid JSON object but some links not return any response. see below api and direct browser links:
https://shopee.com.my/Kids-Sports-Outdoor-Play-cat.27.21700?page=0&sortBy=sales
https://shopee.com.my/api/v2/search_items/?by=sales&limit=50&match_id=21700&newest=0&order=desc&page_type=search&version=2
https://shopee.com.my/Bath-Toiletries-cat.27.2422
https://shopee.com.my/api/v2/search_items/?by=sales&limit=50&match_id=2422&newest=0&order=desc&page_type=search&version=2
you can also see API links in network tab:
network tab link image
I think you are missing a required header I send them like this and it worked
from pprint import pprint
import requests
headers = {
'authority': 'shopee.com.my',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'x-shopee-language': 'en',
'x-requested-with': 'XMLHttpRequest',
'if-none-match-': '55b03-c3d70d78b473147beeb6551fa9df8ca0',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36',
'x-api-source': 'pc',
'accept': '*/*',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://shopee.com.my/Kids-Sports-Outdoor-Play-cat.27.21700?page=0&sortBy=sales',
'accept-language': 'es-US,es;q=0.9,en-US;q=0.8,en;q=0.7,es-419;q=0.6',
# 'cookie': '_gcl_au=1.1.1866522785.1603486253; _fbp=fb.2.1603486253254.1114160447; SPC_IA=-1; SPC_EC=-; SPC_U=-; SPC_F=9RO26eJM7IQiFlxki0dAdQCcCsgPwz67; REC_T_ID=71a698d6-1571-11eb-9baf-48df3757c438; SPC_SI=mall.n58BgakbNjCD5RDYlsQJ8EurmBkH5HIY; SPC_CT_c49f0fdc="1603486254.GqWz1BPlfz3MKmUufL3eTwFqgUfdKWcWVf2xiJI7nSk="; SPC_R_T_ID="89vber/2TKnfACAmGbXpxC3BzHc0ajEQMPxgMbAlZnQlgEo7YWmya0sf/KRt1FsoZvaFYKoNDk+Rh9YWLWsNMH324iqgZePbam1q9QpYQlE="; SPC_T_IV="vko6vAtWsyHuqteFHAoPIA=="; SPC_R_T_IV="vko6vAtWsyHuqteFHAoPIA=="; SPC_T_ID="89vber/2TKnfACAmGbXpxC3BzHc0ajEQMPxgMbAlZnQlgEo7YWmya0sf/KRt1FsoZvaFYKoNDk+Rh9YWLWsNMH324iqgZePbam1q9QpYQlE="; AMP_TOKEN=%24NOT_FOUND; _ga=GA1.3.602723004.1603486255; _gid=GA1.3.657631736.1603486255; _dc_gtm_UA-61915055-6=1; language=en',
}
params = (
('by', 'sales'),
('limit', '50'),
('match_id', '21700'),
('newest', '0'),
('order', 'desc'),
('page_type', 'search'),
('version', '2'),
)
response = requests.get('https://shopee.com.my/api/v2/search_items/', headers=headers, params=params)
pprint(response.json())

Categories