I'm trying to scrape the data from this interactive chart which is located at the bottom of the website below: https://www.vgchartz.com/tools/hw_date.php?reg=USA&ending=Yearly
I've used developer tools in chrome but cannot find the data points in the elements tab.
Would appreciate if someone can take a look and tell me if the data points are stored on the page somewhere or there is any way to do using Python
Thank you very much!
The data is included inside <script> tag on that page. To parse it, you can use js2py library. For example:
import ast
import js2py
import requests
url = "https://www.vgchartz.com/tools/hw_date.php?reg=USA&ending=Yearly"
data = re.search(
r"StockChart\(({.*?})\);", requests.get(url).text, flags=re.S
).group(1)
data = js2py.eval_js("data = " + data + ";")
data = ast.literal_eval(str(data))
print(data)
Prints:
{
"chart": {
"endOnTick": False,
"marginBottom": 90,
"marginLeft": 80,
"marginRight": 20,
"renderTo": "chart_container",
"startOnTick": False,
"zoomType": "y",
},
"legend": {"enabled": True},
"plotOptions": {"series": {"dataGrouping": {"smoothed": True}}},
"rangeSelector": {"selected": 5},
"series": [
{
"cropThreshold": 1,
"data": [
{"x": 1072933200000, "y": 1302609},
{"x": 1104555600000, "y": 2604003},
{"x": 1136091600000, "y": 5074726},
{"x": 1167627600000, "y": 8757992},
{"x": 1199163600000, "y": 10156740},
{"x": 1230786000000, "y": 10369446},
{"x": 1262322000000, "y": 8434877},
{"x": 1293858000000, "y": 4335275},
{"x": 1325394000000, "y": 2164269},
{"x": 1357016400000, "y": 600849},
],
...
The chart data is imbedded in the HTML response of a basic GET request to the link. The data points themselves can be found in a Javascript Object formatted in a script tag. To pull these points, you will need to use some sort of Javascript parser to access the Object and convert the JS object representation to a Python dictionary. Pure JSON parsing cannot easily be used here, since the Object is not initialized in valid JSON syntax. The code below uses the module pyjsparser which can be installed via pip: pip3 install pyjsparser.
import requests, pyjsparser
from bs4 import BeautifulSoup as soup
d = soup(requests.get('https://www.vgchartz.com/tools/hw_date.php?reg=USA&ending=Yearly'), 'html.parser')
ast = pyjsparser.parse(d.select_one('#chart_body > .chart_date_selector ~ script').text)
def to_json(ast):
if ast['type'] == 'ArrayExpression':
return [to_json(i) for i in ast['elements']]
if ast['type'] == 'ObjectExpression':
return {i['key']['name']:to_json(i['value']) for i in ast['properties']}
if ast['type'] == 'Literal':
return ast['value']
def get_chart_data(ast):
if isinstance(ast, dict):
if ast.get('type') == 'Property' and ast['key']['name'] == 'series' and ast['value']['type'] == 'ArrayExpression':
yield to_json(ast['value'])
return
for b in ast.values():
yield from get_chart_data(b)
elif isinstance(ast, list):
for i in ast:
yield from get_chart_data(i)
data = next(get_chart_data(ast))
Output:
[{'showLastLabel': True, 'cropThreshold': 1.0, 'name': 'PSP', 'data': [{'x': 1072933200000.0, 'y': 0.0}, {'x': 1104555600000.0, 'y': 3527367.0}, {'x': 1136091600000.0, 'y': 2952955.0}, {'x': 1167627600000.0, 'y': 3563757.0}, {'x': 1199163600000.0, 'y': 3815680.0}, {'x': 1230786000000.0, 'y': 2452361.0}, {'x': 1262322000000.0, 'y': 1824105.0}, {'x': 1293858000000.0, 'y': 1245169.0}, {'x': 1325394000000.0, 'y': 274023.0}, {'x': 1357016400000.0, 'y': 128378.0}, {'x': 1388552400000.0, 'y': 26999.0}]}, {'showLastLabel': True, 'cropThreshold': 1.0, 'name': 'Wii', 'data': [{'x': 1136091600000.0, 'y': 1075329.0}, {'x': 1167627600000.0, 'y': 6444409.0}, {'x': 1199163600000.0, 'y': 9826502.0}, {'x': 1230786000000.0, 'y': 8989309.0}, {'x': 1262322000000.0, 'y': 7398500.0}, {'x': 1293858000000.0, 'y': 4878060.0}, {'x': 1325394000000.0, 'y': 2042064.0}, {'x': 1357016400000.0, 'y': 773488.0}, {'x': 1388552400000.0, 'y': 216453.0}, {'x': 1420088400000.0, 'y': 58765.0}]}, {'showLastLabel': True, 'cropThreshold': 1.0, 'name': 'X360', 'data': [{'x': 1104555600000.0, 'y': 563282.0}, {'x': 1136091600000.0, 'y': 3832778.0}, {'x': 1167627600000.0, 'y': 4356599.0}, {'x': 1199163600000.0, 'y': 4784134.0}, {'x': 1230786000000.0, 'y': 4691537.0}, {'x': 1262322000000.0, 'y': 6999773.0}, {'x': 1293858000000.0, 'y': 7777810.0}, {'x': 1325394000000.0, 'y': 6488715.0}, {'x': 1357016400000.0, 'y': 3164108.0}, {'x': 1388552400000.0, 'y': 855780.0}, {'x': 1420088400000.0, 'y': 165018.0}, {'x': 1451624400000.0, 'y': 67456.0}, {'x': 1483246800000.0, 'y': 5433.0}]}, {'showLastLabel': True, 'cropThreshold': 1.0, 'name': 'DS', 'data': [{'x': 1072933200000.0, 'y': 1302609.0}, {'x': 1104555600000.0, 'y': 2604003.0}, {'x': 1136091600000.0, 'y': 5074726.0}, {'x': 1167627600000.0, 'y': 8757992.0}, {'x': 1199163600000.0, 'y': 10156740.0}, {'x': 1230786000000.0, 'y': 10369446.0}, {'x': 1262322000000.0, 'y': 8434877.0}, {'x': 1293858000000.0, 'y': 4335275.0}, {'x': 1325394000000.0, 'y': 2164269.0}, {'x': 1357016400000.0, 'y': 600849.0}]}, {'showLastLabel': True, 'cropThreshold': 1.0, 'name': 'PS3', 'data': [{'x': 1136091600000.0, 'y': 667762.0}, {'x': 1167627600000.0, 'y': 2474435.0}, {'x': 1199163600000.0, 'y': 3547363.0}, {'x': 1230786000000.0, 'y': 4255949.0}, {'x': 1262322000000.0, 'y': 4737437.0}, {'x': 1293858000000.0, 'y': 4486935.0}, {'x': 1325394000000.0, 'y': 3480788.0}, {'x': 1357016400000.0, 'y': 2237467.0}, {'x': 1388552400000.0, 'y': 721523.0}, {'x': 1420088400000.0, 'y': 274884.0}, {'x': 1451624400000.0, 'y': 125072.0}, {'x': 1483246800000.0, 'y': 12482.0}]}, {'showLastLabel': True, 'cropThreshold': 1.0, 'name': '3DS', 'data': [{'x': 1293858000000.0, 'y': 4056029.0}, {'x': 1325394000000.0, 'y': 3542069.0}, {'x': 1357016400000.0, 'y': 3905067.0}, {'x': 1388552400000.0, 'y': 2518536.0}, {'x': 1420088400000.0, 'y': 2499074.0}, {'x': 1451624400000.0, 'y': 2456456.0}, {'x': 1483246800000.0, 'y': 2060575.0}, {'x': 1514782800000.0, 'y': 1534418.0}, {'x': 1546318800000.0, 'y': 615394.0}, {'x': 1577854800000.0, 'y': 249847.0}, {'x': 1609477200000.0, 'y': 2787.0}]}, {'showLastLabel': True, 'cropThreshold': 1.0, 'name': 'PSV', 'data': [{'x': 1293858000000.0, 'y': 0.0}, {'x': 1325394000000.0, 'y': 1227049.0}, {'x': 1357016400000.0, 'y': 509085.0}, {'x': 1388552400000.0, 'y': 324396.0}, {'x': 1420088400000.0, 'y': 247162.0}, {'x': 1451624400000.0, 'y': 108422.0}, {'x': 1483246800000.0, 'y': 20908.0}, {'x': 1514782800000.0, 'y': 3119.0}, {'x': 1546318800000.0, 'y': 0.0}]}, {'showLastLabel': True, 'cropThreshold': 1.0, 'name': 'WiiU', 'data': [{'x': 1325394000000.0, 'y': 899502.0}, {'x': 1357016400000.0, 'y': 1214283.0}, {'x': 1388552400000.0, 'y': 1565573.0}, {'x': 1420088400000.0, 'y': 1361771.0}, {'x': 1451624400000.0, 'y': 460025.0}, {'x': 1483246800000.0, 'y': 9553.0}]}, {'showLastLabel': True, 'cropThreshold': 1.0, 'name': 'PS4', 'data': [{'x': 1357016400000.0, 'y': 2072802.0}, {'x': 1388552400000.0, 'y': 4656798.0}, {'x': 1420088400000.0, 'y': 5802890.0}, {'x': 1451624400000.0, 'y': 5077809.0}, {'x': 1483246800000.0, 'y': 5404655.0}, {'x': 1514782800000.0, 'y': 5245644.0}, {'x': 1546318800000.0, 'y': 3824746.0}, {'x': 1577854800000.0, 'y': 2084519.0}, {'x': 1609477200000.0, 'y': 613650.0}, {'x': 1641013200000.0, 'y': 168996.0}]}, {'showLastLabel': True, 'cropThreshold': 1.0, 'name': 'XOne', 'data': [{'x': 1357016400000.0, 'y': 1928445.0}, {'x': 1388552400000.0, 'y': 4325832.0}, {'x': 1420088400000.0, 'y': 4938473.0}, {'x': 1451624400000.0, 'y': 4696984.0}, {'x': 1483246800000.0, 'y': 4307564.0}, {'x': 1514782800000.0, 'y': 4304447.0}, {'x': 1546318800000.0, 'y': 2971031.0}, {'x': 1577854800000.0, 'y': 1508975.0}, {'x': 1609477200000.0, 'y': 503439.0}, {'x': 1641013200000.0, 'y': 22086.0}]}, {'showLastLabel': True, 'cropThreshold': 1.0, 'name': 'NS', 'data': [{'x': 1483246800000.0, 'y': 4880477.0}, {'x': 1514782800000.0, 'y': 5613050.0}, {'x': 1546318800000.0, 'y': 6479134.0}, {'x': 1577854800000.0, 'y': 9027556.0}, {'x': 1609477200000.0, 'y': 8012707.0}, {'x': 1641013200000.0, 'y': 2163947.0}]}, {'showLastLabel': True, 'cropThreshold': 1.0, 'name': 'PS5', 'data': [{'x': 1577854800000.0, 'y': 1940361.0}, {'x': 1609477200000.0, 'y': 4302047.0}, {'x': 1641013200000.0, 'y': 1328693.0}]}, {'showLastLabel': True, 'cropThreshold': 1.0, 'name': 'XS', 'data': [{'x': 1577854800000.0, 'y': 1525675.0}, {'x': 1609477200000.0, 'y': 3893991.0}, {'x': 1641013200000.0, 'y': 1937560.0}]}]
how to create a list of key value pairs in python???
I have these two lists:
x = [1,2,3,4,5]
y = [11,12,13,14,15]
I have tried this code:
l = {i:{'x': x[i], 'y': y[i]} for i in range(len(x))}
print(l)
output I am getting:
{0: {'x': 1, 'y': 11}, 1: {'x': 2, 'y': 12}, 2: {'x': 3, 'y': 13}, 3: {'x': 4, 'y': 14}, 4: {'x': 5, 'y': 15}}
expected output:
[0: {'x': 1, 'y': 11}, 1: {'x': 2, 'y': 12}, 2: {'x': 3, 'y': 13}, 3: {'x': 4, 'y': 14}, 4: {'x': 5, 'y': 15}]
Maybe you need this
x = [1,2,3,4,5]
y = [11,12,13,14,15]
l = [{i:{'x': x[i], 'y': y[i]}}for i in range(len(x))]
print(l)
The closest you can get to the expected output is an array which contains dictionaries it would look like this.
x = [1,2,3,4,5]
y = [11,12,13,14,15]
array = []
for i in len(x):
array.append({'x' : x[i], 'y' : y[i]})
Output will be:
[{'x': 1, 'y': 11}, {'x': 2, 'y': 12}]
You can access an element like this:
array[0]['x'] = 1
array[1]['y'] = 12
how can I convert a list of dict to a list of tuple?
Input:
[{'x': 0.4711900100474648, 'y': 0.6294374442355883}, {'x': 0.4732473419066774, 'y': 0.629306809190704}, {'x': 0.47373722332499346, 'y': 0.6274779185623242}, {'x': 0.47363924704133026, 'y': 0.6273908285324014}, {'x': 0.4731493656230142, 'y': 0.6261715681134813}, {'x': 0.4722349203088243, 'y': 0.6252571227992915}, {'x': 0.47210428526394, 'y': 0.62521357778433}, {'x': 0.4709285698599815, 'y': 0.6253442128292143}, {'x': 0.47024273587433907, 'y': 0.62612802309852}, {'x': 0.4706019822477708, 'y': 0.6283052738465912}]
I want this:
[(0,47..., 0.62...),(...,...)]
I tried this:
tupleList = [tuple(val['x'], val['y']) for dic in listOfDict for key,val in dic.items()]
I get error TypeError: 'float' object is not subscriptable
list_of_points = [{'x': 0.4711900100474648, 'y': 0.6294374442355883}, {'x': 0.4732473419066774, 'y': 0.629306809190704}, {'x': 0.47373722332499346, 'y': 0.6274779185623242}, {'x': 0.47363924704133026, 'y': 0.6273908285324014}, {'x': 0.4731493656230142, 'y': 0.6261715681134813}, {'x': 0.4722349203088243, 'y': 0.6252571227992915}, {'x': 0.47210428526394, 'y': 0.62521357778433}, {'x': 0.4709285698599815, 'y': 0.6253442128292143}, {'x': 0.47024273587433907, 'y': 0.62612802309852}, {'x': 0.4706019822477708, 'y': 0.6283052738465912}]
points_tuples = [(p['x'], p['y']) for p in list_of_points]
If you are using python 3.7+ and inserted the keys in x,y order (like done here) you could simply use the dict.values() of each inner dict. The values() will be in key (==input) order as well:
data = [{'x': 0.4711900100474648, 'y': 0.6294374442355883},
{'x': 0.4732473419066774, 'y': 0.629306809190704},
{'x': 0.47373722332499346, 'y': 0.6274779185623242},
{'x': 0.47363924704133026, 'y': 0.6273908285324014},
{'x': 0.4731493656230142, 'y': 0.6261715681134813},
{'x': 0.4722349203088243, 'y': 0.6252571227992915},
{'x': 0.47210428526394, 'y': 0.62521357778433},
{'x': 0.4709285698599815, 'y': 0.6253442128292143},
{'x': 0.47024273587433907, 'y': 0.62612802309852},
{'x': 0.4706019822477708, 'y': 0.6283052738465912}]
tup = [tuple(d.values()) for d in data]
Output:
[(0.4711900100474648, 0.6294374442355883), (0.4732473419066774, 0.629306809190704),
(0.47373722332499346, 0.6274779185623242), (0.47363924704133026, 0.6273908285324014),
(0.4731493656230142, 0.6261715681134813), (0.4722349203088243, 0.6252571227992915),
(0.47210428526394, 0.62521357778433), (0.4709285698599815, 0.6253442128292143),
(0.47024273587433907, 0.62612802309852), (0.4706019822477708, 0.6283052738465912)]
I have a dictionary as follows in python and I have to group by 'label' and get the highest value of 'confidence' for each 'label'
[{'label': 'id',
'confidence': 0.11110526,
'topleft': {'x': 0, 'y': 0},
'bottomright': {'x': 187, 'y': 57}},
{'label': 'id',
'confidence': 0.10690566,
'topleft': {'x': 265, 'y': 0},
'bottomright': {'x': 525, 'y': 54}},
{'label': 'name',
'confidence': 0.15541315,
'topleft': {'x': 9, 'y': 24},
'bottomright': {'x': 116, 'y': 58}},
{'label': 'group',
'confidence': 0.12578075,
'topleft': {'x': 53, 'y': 24},
'bottomright': {'x': 153, 'y': 61}},
{'label': 'name',
'confidence': 0.12709439,
'topleft': {'x': 0, 'y': 0},
'bottomright': {'x': 247, 'y': 84}},
{'label': 'group',
'confidence': 0.116156094,
'topleft': {'x': 96, 'y': 23},
'bottomright': {'x': 191, 'y': 61}}]
How do I achieve this efficiently
You can do this with groupby
for n,g in groupby(tst,key=lambda x:x['label']):
print n,max(list(g),key=lambda x:x['confidence']).get('confidence')
Result:
id 0.11110526
name 0.15541315
group 0.12578075
name 0.12709439
group 0.116156094