I want to get latitude and longitude from a webpage using beautifulsoup but they are in a script:
//<![CDATA[
theForm.oldSubmit = theForm.submit;
theForm.submit = WebForm_SaveScrollPositionSubmit;
theForm.oldOnSubmit = theForm.onsubmit;
theForm.onsubmit = WebForm_SaveScrollPositionOnSubmit;
var GMapsProperties={};function getGMapElementById(mapId,GMapElementId){var _mapId=typeof(mapId)=='string'? mapId : mapId.getDiv().id;var overlayArray=GMapsProperties[_mapId]['overlayArray'];for(var i=0;i < overlayArray.length;i++){if(overlayArray[i][0]==GMapElementId){return overlayArray[i][1];}}return null;}function removeGMapElementById(mapId,GMapElementId){var _mapId=typeof(mapId)=='string'? mapId : mapId.getDiv().id;var overlayArray=GMapsProperties[_mapId]['overlayArray'];for(var i=0;i < overlayArray.length;i++){if(overlayArray[i][0]==GMapElementId){overlayArray.splice(i,1);return;}}}function closeWindows(mapId){for(var i=0;i<GMapsProperties[mapId]['windowArray'].length;i++){GMapsProperties[mapId]['windowArray'][i][1].close();}}var _sg=_sg ||{};_sg.cs=(function(){var p={};p.createMarker=function(opt,id){var m=new google.maps.Marker(opt);if(id && m.getMap())GMapsProperties[m.getMap().getDiv().id]['overlayArray'].push([id,m]);return m;};p.createPolyline=function(opt,id){var m=new google.maps.Polyline(opt);if(id && m.getMap())GMapsProperties[m.getMap().getDiv().id]['overlayArray'].push([id,m]);return m;};p.createPolygon=function(opt,id){var m=new google.maps.Polygon(opt);if(id && m.getMap())GMapsProperties[m.getMap().getDiv().id]['overlayArray'].push([id,m]);return m;};return p;})();function addEvent(el,ev,fn){if(el.addEventListener)el.addEventListener(ev,fn,false);else if(el.attachEvent)el.attachEvent('on'+ev,fn);else el['on'+ev]=fn;}GMapsProperties['subgurim_GoogleMapControl'] = {}; var GMapsProperties_subgurim_GoogleMapControl = GMapsProperties['subgurim_GoogleMapControl']; GMapsProperties_subgurim_GoogleMapControl['enableStore'] = false; GMapsProperties_subgurim_GoogleMapControl['overlayArray'] = new Array(); GMapsProperties_subgurim_GoogleMapControl['windowArray'] = new Array();var subgurim_GoogleMapControl;function load_subgurim_GoogleMapControl(){var mapDOM = document.getElementById('subgurim_GoogleMapControl'); if (!mapDOM) return;subgurim_GoogleMapControl = new google.maps.Map(mapDOM);function subgurim_GoogleMapControlupdateValues(eventId,value){var item=document.getElementById('subgurim_GoogleMapControl_Event'+eventId);item.value=value;}google.maps.event.addListener(subgurim_GoogleMapControl, 'addoverlay', function(overlay) { if(overlay) { GMapsProperties['subgurim_GoogleMapControl']['overlayArray'].push(overlay); } });google.maps.event.addListener(subgurim_GoogleMapControl, 'clearoverlays', function() { GMapsProperties['subgurim_GoogleMapControl']['overlayArray'] = new Array(); });google.maps.event.addListener(subgurim_GoogleMapControl, 'removeoverlay', function(overlay) { removeGMapElementById('subgurim_GoogleMapControl',overlay.id) });google.maps.event.addListener(subgurim_GoogleMapControl, 'maptypeid_changed', function() { var tipo = subgurim_GoogleMapControl.getMapTypeId(); subgurim_GoogleMapControlupdateValues('0', tipo);});google.maps.event.addListener(subgurim_GoogleMapControl, 'dragend', function() { var lat = subgurim_GoogleMapControl.getCenter().lat(); var lng = subgurim_GoogleMapControl.getCenter().lng(); subgurim_GoogleMapControlupdateValues('2', lat+','+lng); });google.maps.event.addListener(subgurim_GoogleMapControl, 'zoom_changed', function() { subgurim_GoogleMapControlupdateValues('1', subgurim_GoogleMapControl.getZoom()); });subgurim_GoogleMapControl.setOptions({center:new google.maps.LatLng(35.6783546483511,51.4196634292603),disableDefaultUI:true,keyboardShortcuts:false,mapTypeControl:false,mapTypeId:google.maps.MapTypeId.ROADMAP,scrollwheel:false,zoom:14});var marker_subgurim_920435_=_sg.cs.createMarker({position:new google.maps.LatLng(35.6783546483511,51.4196634292603),clickable:true,draggable:false,map:subgurim_GoogleMapControl,raiseOnDrag:true,visible:true,icon:'/images/markers/Site/Tourism/vase.png'}, 'marker_subgurim_920435_');}addEvent(window,'load',load_subgurim_GoogleMapControl);//]]>
and I want information in this part:
{position:new google.maps.LatLng(35.6783546483511,51.4196634292603)
is it possible to access that information by using beautifulsoup or any other web-scraper?
Use Regular expression for this purpose.
import re
#Suppose script is stored in variable script_file
m = re.search('LatLng(\(.+?\))', script_file)
latlng = m.group(1)
latlng = eval(latlng)
print(latlng) #(35.6783546483511, 51.4196634292603)
import re
s = 'position:new google.maps.LatLng(35.6783546483511,51.4196634292603)'
lat, lng = map(float, re.search(r'\(([^,]+),([^)]+)', s).groups())
If you want to get Latitude and Longitude separately, use regex expression in this way:
import re
s = 'position:new google.maps.LatLng(35.6783546483511,51.4196634292603)'
Lat, Lng = map(float, re.search(r'LatLng\(([\d.]+),([\d.]+)\)',s).groups())
Related
I've now tried everything for the past few hours but I can't extract a specific thing from the HTML below. I want to grab the "sessionCartId" but I can't figure out how....
Thats what i tried so far :
sessioncartid = BeautifulSoup(response.text, "html.parser").findAll("script", {"type":"text/javascript"})[2]
data = json.loads(sessioncartid.text)
print(data)
^^ This gives me the correct script tag but i cant transform it into a json nor get the sessioncarId
<script type="text/javascript">
/*<![CDATA[*/
var ACC = {config: {}};
ACC.config.contextPath = "";
ACC.config.encodedContextPath = "/de/web";
ACC.config.commonResourcePath = "/_ui/20220811221438/responsive/common";
ACC.config.themeResourcePath = "/_ui/20220811221438/responsive/theme-gh";
ACC.config.siteResourcePath = "/_ui/20220811221438/responsive/site-ghstore";
ACC.config.rootPath = "/_ui/20220811221438/responsive";
ACC.config.CSRFToken = "81b0156a-5a78-4969-b52e-e5080473fb83";
ACC.pwdStrengthVeryWeak = 'password.strength.veryweak';
ACC.pwdStrengthWeak = 'password.strength.weak';
ACC.pwdStrengthMedium = 'password.strength.medium';
ACC.pwdStrengthStrong = 'password.strength.strong';
ACC.pwdStrengthVeryStrong = 'password.strength.verystrong';
ACC.pwdStrengthUnsafePwd = 'password.strength.unsafepwd';
ACC.pwdStrengthTooShortPwd = 'password.strength.tooshortpwd';
ACC.pwdStrengthMinCharText = 'password.strength.minchartext';
ACC.accessibilityLoading = 'aria.pickupinstore.loading';
ACC.accessibilityStoresLoaded = 'aria.pickupinstore.storesloaded';
ACC.config.googleApiKey = "";
ACC.config.googleApiVersion = "3.7";
ACC.autocompleteUrl = '/de/web/search/autocompleteSecure';
ACC.config.loginUrl = '/de/web/login';
ACC.config.authenticationStatusUrl = '/de/web/authentication/status';
/*]]>*/
var OCC =
{
"token": "1799248c-8de0-4199-b5fe-1d610452010a",
"currentUser": "test#gmail.com",
"sessionCartGuid": "2323121232323",
"sessionCartId": "121212123435324",
"sessionLanguageIso": "de",
"sessionCountryIso": "DE",
"urlPosCode": "web",
"isASM": false,
"intermediaryID": "",
"isASMCustomerEmulated": false,
"siteId": "ghstore",
"OCCBaseUrl": "/ghcommercewebservices/v2/ghstore",
"availablePointsOfService": "BUD,FRA,DTM,HAM,GRZ,HAJ,SZG,VIE,WEB,BER",
"primaryPointOfSevice": "WEB",
"clientChannel": "web-eu"
};
</script>
This is how you can extract that dictionary:
from bs4 import BeautifulSoup
import json
import re
html = '''
<script type="text/javascript">
/*<![CDATA[*/
var ACC = {config: {}};
ACC.config.contextPath = "";
ACC.config.encodedContextPath = "/de/web";
ACC.config.commonResourcePath = "/_ui/20220811221438/responsive/common";
ACC.config.themeResourcePath = "/_ui/20220811221438/responsive/theme-gh";
ACC.config.siteResourcePath = "/_ui/20220811221438/responsive/site-ghstore";
ACC.config.rootPath = "/_ui/20220811221438/responsive";
ACC.config.CSRFToken = "81b0156a-5a78-4969-b52e-e5080473fb83";
ACC.pwdStrengthVeryWeak = 'password.strength.veryweak';
ACC.pwdStrengthWeak = 'password.strength.weak';
ACC.pwdStrengthMedium = 'password.strength.medium';
ACC.pwdStrengthStrong = 'password.strength.strong';
ACC.pwdStrengthVeryStrong = 'password.strength.verystrong';
ACC.pwdStrengthUnsafePwd = 'password.strength.unsafepwd';
ACC.pwdStrengthTooShortPwd = 'password.strength.tooshortpwd';
ACC.pwdStrengthMinCharText = 'password.strength.minchartext';
ACC.accessibilityLoading = 'aria.pickupinstore.loading';
ACC.accessibilityStoresLoaded = 'aria.pickupinstore.storesloaded';
ACC.config.googleApiKey = "";
ACC.config.googleApiVersion = "3.7";
ACC.autocompleteUrl = '/de/web/search/autocompleteSecure';
ACC.config.loginUrl = '/de/web/login';
ACC.config.authenticationStatusUrl = '/de/web/authentication/status';
/*]]>*/
var OCC =
{
"token": "1799248c-8de0-4199-b5fe-1d610452010a",
"currentUser": "test#gmail.com",
"sessionCartGuid": "2323121232323",
"sessionCartId": "121212123435324",
"sessionLanguageIso": "de",
"sessionCountryIso": "DE",
"urlPosCode": "web",
"isASM": false,
"intermediaryID": "",
"isASMCustomerEmulated": false,
"siteId": "ghstore",
"OCCBaseUrl": "/ghcommercewebservices/v2/ghstore",
"availablePointsOfService": "BUD,FRA,DTM,HAM,GRZ,HAJ,SZG,VIE,WEB,BER",
"primaryPointOfSevice": "WEB",
"clientChannel": "web-eu"
};
</script>
'''
soup = BeautifulSoup(html, 'html.parser')
info = soup.select_one('script', string = re.compile('sessionCartGuid'))
json_obj = json.loads(info.text.split('var OCC =')[1].split(';')[0])
# print(json_obj)
print(json_obj['token'])
print(json_obj['currentUser'])
print(json_obj['sessionCartId'])
Result:
1799248c-8de0-4199-b5fe-1d610452010a
test#gmail.com
121212123435324
BeautifulSoup docs: https://beautiful-soup-4.readthedocs.io/en/latest/index.html
I am trying to port some javascript code with opengl to python. But cannot figure out what I am doing wrong in translating prog.uniform[u] = gl.getUniformLocation(prog, u);
Javascript:
let v = buildShader(vert, gl.VERTEX_SHADER);
let f = buildShader(frag, gl.FRAGMENT_SHADER);
let prog = gl.createProgram();
gl.attachShader(prog, v);
gl.attachShader(prog, f);
gl.linkProgram(prog);
prog.uniform = {};
u = ['model','bounds','frac','aspect'];
_.each(u, function(u){ prog.uniform[u] = gl.getUniformLocation(prog, u); });
Python3/PyOpenGl:
v = self.buildShader(vert, GL_VERTEX_SHADER)
f = self.buildShader(frag, GL_FRAGMENT_SHADER)
prog = glCreateProgram()
glAttachShader(prog, v)
glAttachShader(prog, f)
glLinkProgram(prog)
for u in ['model','bounds','frac','aspect']:
loc = glGetUniformLocations(prog,u)
glProgramUniform(prog,loc,u)
glProgramUniform assigns a value to a uniform, where the 3rd paramter is the value.
glProgramUniform(prog,loc,u) makes not any sense, when u is string which is the name name of the uniform.
You have to create a dictionary which contains the locations of a uniform for each name:
uniform = {}
for u in ['model','bounds','frac','aspect']:
uniform[u] = glGetUniformLocation(prog, u)
or simply
uniform = { u : glGetUniformLocation(prog, u) for u in ['model','bounds','frac','aspect'] }
I'm new in Python and I trying to make this C# method
protected void read(){
string[] attributes = new string[16];
attributes[0] = "ref_num";
attributes[1] = "tenant.name";
attributes[2] = "request_by.first_name";
attributes[3] = "request_by.first_name";
attributes[4] = "customer.first_name";
attributes[5] = "customer.last_name";
attributes[6] = "customer.id";
attributes[7] = "category.sym";
attributes[8] = "status.sym";
attributes[9] = "group.last_name";
attributes[10] = "zreporting_met.sym";
attributes[11] = "assignee.combo_name";
attributes[12] = "open_date";
attributes[13] = "close_date";
attributes[14] = "description";
attributes[15] = "summary";
int sid = soap.login("user", "pass");
string objectType = "cr";
string whereClause = "ref_num = '15967'";
int maxRows = -1;
//Valor de tiempo en Epoch
var epoch = (DateTime.UtcNow - new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc)).TotalSeconds;
//Transforma en valor de fecha humana
string _epoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(epoch).ToShortDateString();
ArrayList resultado = new ArrayList();
XmlDocument xml = new XmlDocument();
try
{
string _selectResponse = soap.doSelect(sid, objectType, whereClause, maxRows, attributes);
xml.LoadXml(_selectResponse);
XmlNodeList nodeList = xml.GetElementsByTagName("AttrValue");
for (int i = 0; i < nodeList.Count; i++)
{
resultado.Add(nodeList[i].InnerXml);
}
soap.logout(sid);
}
catch (Exception l)
{
Console.Write(l.Message);
}
}
this method works perfectly, in python I have this for the moment
class WebService:
soap = 'webservice url'
client = Client(soap)
sid = client.service.login("user","pass")
attributes = ["ref_num", "open_date"]
objectType = "cr"
whereClause = "open_date > 1519862400 AND open_date < 1522368000"
maxRows = -1
tickets = client.service.doSelect(sid, objectType, whereClause, -1, attributes)
print(tickets)
logout = client.service.logout(p)
using zeep to connect to the Web Service
the error what I have is in attributes array, the login method, works but when I try to use doSelect() method it says:
zeep.exceptions.ValidationError: Missing element string (doSelect.attributes)
Someone can help me? Thanks in advance.
I am trying to webscrape the latitude and longitude from the Maps in the URL
The HTML Script looks below,
<div class="map-container"><script>$(document).ready(function() {
var $injector = angular.element(document.body).injector();
var $compile = $injector.get('$compile');
var $rootScope = $injector.get('$rootScope');
var sfMap = $compile('<div data-sf-map data-center="center" data-zoom="15" data-markers="markers" data-scrollwheel="false" data-zoom-level-change="14"></div>');
var scope = $rootScope.$new();
scope.center = {
latitude: 40.7515022729943,
longitude: -74.0071970200125
};
scope.markers = [{
id: 4673,
latitude: 40.7515022729943,
longitude: -74.0071970200125
}];
var $el = sfMap(scope);
$('.listing-map .map-wrapper .map-container').append($el);
});
I tried to use the option of Copy as XPath but it is not working for
this case. How to get the latitude and longitude?
You can try below code to get latitude and longitude values:
import re
script = driver.find_element_by_xpath('//div[#class="map-container"]/script').get_attribute('textContent')
latitude = re.search('(?<=latitude: )-*\d+.\d+', script).group(0)
longitude = re.search('(?<=longitude: )-*\d+.\d+', script).group(0)
I have html string that:
<SCRIPT LANGUAGE="JavaScript">
prod155920017Matrix = new Array();
prod155920017Matrix[0] = new Array();
prod155920017Matrix[1] = new Array();
prod155920017Matrix[2] = new Array();
prod155920017Matrix[3] = new Array();
prod155920017Matrix[4] = new Array();
prod155920017Matrix[5] = new Array();
prod155920017Matrix[6] = new Array();
prod155920017Matrix[7] = new Array();
prod155920017Matrix[8] = new Array();
prod155920017Matrix[9] = new Array();
prod155920017Matrix[0][0] = new product('0','prod155920017','sku143650081','36.0B/6.0B','BLACK','Joni Snake-Print Thong Sandal',false,0,'',8,'3','/category/images/prod_stock1.gif',new Array(),'','false',9999,'null');
prod155920017Matrix[0][0].vendorRestrictedDates[0] = '20130903';
prod155920017Matrix[0][0].vendorRestrictedDates[1] = '20131129';
prod155920017Matrix[0][0].vendorRestrictedDates[2] = '20130902';
prod155920017Matrix[0][0].vendorRestrictedDates[3] = '20130101';
prod155920017Matrix[0][0].vendorRestrictedDates[4] = '20121225';
prod155920017Matrix[0][0].vendorRestrictedDates[5] = '20130528';
prod155920017Matrix[0][0].vendorRestrictedDates[6] = '20140102';
prod155920017Matrix[0][0].vendorRestrictedDates[7] = '20140101';
prod155920017Matrix[0][0].vendorRestrictedDates[8] = '20131128';
prod155920017Matrix[0][0].vendorRestrictedDates[9] = '20131226';
prod155920017Matrix[0][0].vendorRestrictedDates[10] = '20121226';
prod155920017Matrix[0][0].vendorRestrictedDates[11] = '20130527';
prod155920017Matrix[0][0].vendorRestrictedDates[12] = '20130705';
prod155920017Matrix[0][0].vendorRestrictedDates[13] = '20131225';
prod155920017Matrix[0][0].vendorRestrictedDates[14] = '20130102';
prod155920017Matrix[0][0].vendorRestrictedDates[15] = '20130704';
prod155920017Matrix[0][0].storeFulfillStatus = 'false';
prod155920017Matrix[1][0] = new product('0','prod155920017','sku143650082','36.5B/6.5B','BLACK','Joni Snake-Print Thong Sandal',false,0,'',10,'3','/category/images/prod_stock1.gif',new Array(),'','false',9999,'null');
</SCRIPT>
I try to use this regex: new product(\([^,]*),([^,]*),([^,]*),([^,]*),([^,]*) to get data: '36.0B/6.0B','BLACK', from script tag. But i can't.
Please help me get them.
Thanks all very much!
\'\d{2}\.\d[A-Z]\/\d\.\d[A-Z]\',\'[\w]+\'
This should get you the expected result.
Since regex can only do pattern matching, you cannot 'add characters' like ( and ) around the result strings using regex, you will have to do that using python.
The backslash is in the wrong spot:
new product\(([^,]*),([^,]*),([^,]*),([^,]*),([^,]*)