Related
I'm trying to add data from one variable to another.
I have the variable "group_mapping" with the following data in it:
{
"providerGroupId": "8263349d",
"role": {
"id": "PROJECT",
"name": "Project",
"isProjectScoped": true
},
"projects": [
{
"id": "186db3eb",
"name": "usr1"
}
]
},
{
"providerGroupId": "b8d2d1ea",
"role": {
"id": "READER",
"name": "Reader",
"isProjectScoped": false
},
"projects": null
},
{
"providerGroupId": "8263349d",
"role": {
"id": "PROJECT",
"name": "Project",
"isProjectScoped": true
},
"projects": [
{
"id": "3c684697",
"name": "usr3"
}
]
}
]
I want to add the content of group_mapping into another variable "mpvariables" and I'm doing the following
def create_saml_mapping(group_mapping):
global mpquery
global mpvariables
mpquery = ("""
mutation UpdateSAMLIdentityProvider($input: UpdateSAMLIdentityProviderInput!) {
updateSAMLIdentityProvider(input: $input) {
samlIdentityProvider {
id
}
}
}
""")
mpvariables = {
"input": {
"id": "SSO",
"patch": {
"name": "SSO",
"loginURL": "https://login..../saml2",
"logoutURL": "https://login..../saml2",
"useProviderManagedRoles": True,
"groupMapping": group_mapping,
"mergeGroupsMappingByRole": False,
"allowManualRoleOverride": False
}
}
}
def update_saml():
print("Getting token./Updating SAML")
result = create_saml_mapping(group_mapping)
result = json.dumps(result, indent=2)
print(result)
When I print "mpvariables" it shows me this:
{
"input": {
"id": "SSO",
"patch": {
"name": "SSO",
"loginURL": "https://login..../saml2",
"logoutURL": "https://login..../saml2",
"useProviderManagedRoles": true,
"mergeGroupsMappingByRole": false,
"allowManualRoleOverride": false,
"groupMapping": "[\n {\n \"providerGroupId\": \"8263349d\",\n \"role\": {\n \"id\": \"PROJECT\",\n \"name\": \"Project\",\n \"isProjectScoped\": true\n },\n \"projects\": [\n {\n \"id\": \"186db3eb\",\n \"name\": \"usr1\"\n }\n ]\n },\n {\n \"providerGroupId\": \"b8d2d1ea\",\n \"role\": {\n \"id\": \"READER\",\n \"name\": \"Reader\",\n \"isProjectScoped\": false\n },\n \"projects\": null\n },\n {\n \"providerGroupId\": \"8263349d\",\n \"role\": {\n \"id\": \"PROJECT\",\n \"name\": \"Project\",\n \"isProjectScoped\": true\n },\n \"projects\": [\n {\n \"id\": \"3c684697\",\n \"name\": \"usr3\"\n }\n ]\n }\n]"
}
}
}
How do I get the added data to keep the same format from the group_mapping variable? Without the "\n" and "" characters.
Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 12 months ago.
Improve this question
The string below I believe is in json format but when I try to
load string into a json variable so I can easily query and extract the data points it errors out.
jdata = '<pre>{\n "askId": "AAABB110-000011",\n "dateCreated": "2009-09-01T00:00:00.000Z",\n "dateUpdated": "2021-06-24T00:00:00.000Z",\n "owners": [\n {\n "ownerType": "Service-Level Owner",\n "VendorId": "000111222"\n },\n {\n "ownerType": "Technical Owner",\n "CustomerId": "000333444"\n },\n {\n "ownerType": "Business Owner",\n "ServiceId": "000005556"\n }\n ],\n "createdBy": "SYSTEM",\n "lastUpdatedBy": "000667778",\n "applicationName": "Treasury Bank Data",\n "description": "Process Data",\n "aliases": [\n "Treasury Bank Data",\n "Bank Data",\n "Bank Reconciliation",\n "Bank Rec",\n "SERV-X"\n ],\n "billingBusinessSegmentId": 6,\n "category": {\n "categoryId": 1,\n "categoryName": "Application"\n },\n "lifecycleStage": {\n "lifecycleStageId": 3,\n "lifecycleStageName": "Production"\n },\n "acquiredEntity": {\n "acquiredEntityId": 0,\n "acquiredEntityName": "Not Applicable"\n },\n "enclaveEnvironment": {\n "enclaveEnvironmentId": 0,\n "enclaveEnvironmentName": "General Hosting (Internal, Cloud, or Vendor hosted)"\n },\n "softwareType": {\n "softwareTypeId": 7,\n "softwareTypeName": "Vendor Product"\n },\n "infrastructureRequired": false,\n "uhgHelpdeskRequired": false,\n "references": [\n {\n "referenceType": "disaster-recovery",\n "referenceValue": "APPX009919"\n }\n ]\n}\n</pre>'
jsonData = json.loads(jdata)
Once I try to load the data into jsonData variable i get error below.
Expecting value: line 1 column 1 (char 0)
Is there something wrong with my string not allowing me to load as a json variable?
If you strip off the html tags, you get json:
import json
jdata = '<pre>{\n "askId": "AAABB110-000011",\n "dateCreated": "2009-09-01T00:00:00.000Z",\n "dateUpdated": "2021-06-24T00:00:00.000Z",\n "owners": [\n {\n "ownerType": "Service-Level Owner",\n "VendorId": "000111222"\n },\n {\n "ownerType": "Technical Owner",\n "CustomerId": "000333444"\n },\n {\n "ownerType": "Business Owner",\n "ServiceId": "000005556"\n }\n ],\n "createdBy": "SYSTEM",\n "lastUpdatedBy": "000667778",\n "applicationName": "Treasury Bank Data",\n "description": "Process Data",\n "aliases": [\n "Treasury Bank Data",\n "Bank Data",\n "Bank Reconciliation",\n "Bank Rec",\n "SERV-X"\n ],\n "billingBusinessSegmentId": 6,\n "category": {\n "categoryId": 1,\n "categoryName": "Application"\n },\n "lifecycleStage": {\n "lifecycleStageId": 3,\n "lifecycleStageName": "Production"\n },\n "acquiredEntity": {\n "acquiredEntityId": 0,\n "acquiredEntityName": "Not Applicable"\n },\n "enclaveEnvironment": {\n "enclaveEnvironmentId": 0,\n "enclaveEnvironmentName": "General Hosting (Internal, Cloud, or Vendor hosted)"\n },\n "softwareType": {\n "softwareTypeId": 7,\n "softwareTypeName": "Vendor Product"\n },\n "infrastructureRequired": false,\n "uhgHelpdeskRequired": false,\n "references": [\n {\n "referenceType": "disaster-recovery",\n "referenceValue": "APPX009919"\n }\n ]\n}\n</pre>'
jsonData = json.loads(jdata.lstrip('<pre>').rstrip('</pre>'))
print(jsonData)
I am trying to scrape this site for job openings:
https://recruiting.ultipro.com/UNI1029UNION/JobBoard/74c2a308-3bf1-4fb1-8a83-f92fa61499d3/?q=&o=postedDateDesc&w=&wc=&we=&wpst=
I looked in dev tools and saw that the page makes an XHR request to this site to retrieve the job opening(s) information which is in the form of a JSON object:
https://recruiting.ultipro.com/UNI1029UNION/JobBoard/74c2a308-3bf1-4fb1-8a83-f92fa61499d3/JobBoardView/LoadSearchResults
So I'm like "Great! I can parse this in two seconds using a python program like this":
''' from bs4 import BeautifulSoup
import json
import requests
def crawl():
union = requests.get('https://recruiting.ultipro.com/UNI1029UNION/JobBoard/74c2a308-3bf1-4fb1-8a83-f92fa61499d3/JobBoardView/LoadSearchResults').content
soup = BeautifulSoup(union, 'html.parser')
newDict = json.loads(str(soup))
for job in newDict['opportunities']:
print(job['Title'])
crawl() '''
Well it turns out that this page only returns 20 job openings out of 62. So I went back to the page and loaded the entirety of the page (clicked "view more opportunities")
And it said that it sent another XHR request to that same link, yet only 20 records are shown when I look.
How can I scrape all of the records from this page? And if someone could explain what is going on behind the scenes that would be great. I am a little new to web scraping so any insight is appreciated.
You don't need do a scraping , like you say the API that return all json is the link
https://recruiting.ultipro.com/UNI1029UNION/JobBoard/74c2a308-3bf1-4fb1-8a83-f92fa61499d3/JobBoardView/LoadSearchResults but you need set in body request this parameters
import requests
headers = {
'Content-Type': 'application/json'
}
data = '{\n "opportunitySearch": {\n "Top": 62,\n "Skip": 0,\n "QueryString": "",\n "OrderBy": [\n {\n "Value": "postedDateDesc",\n "PropertyName": "PostedDate",\n "Ascending": false\n }\n ],\n "Filters": [\n {\n "t": "TermsSearchFilterDto",\n "fieldName": 4,\n "extra": null,\n "values": [\n \n ]\n },\n {\n "t": "TermsSearchFilterDto",\n "fieldName": 5,\n "extra": null,\n "values": [\n \n ]\n },\n {\n "t": "TermsSearchFilterDto",\n "fieldName": 6,\n "extra": null,\n "values": [\n \n ]\n }\n ]\n },\n "matchCriteria": {\n "PreferredJobs": [\n \n ],\n "Educations": [\n \n ],\n "LicenseAndCertifications": [\n \n ],\n "Skills": [\n \n ],\n "hasNoLicenses": false,\n "SkippedSkills": [\n \n ]\n }\n}'
response = requests.post('https://recruiting.ultipro.com/UNI1029UNION/JobBoard/74c2a308-3bf1-4fb1-8a83-f92fa61499d3/JobBoardView/LoadSearchResults', headers=headers, data=data)
print(response.text)
And here using pandas (pip install pandas)
import requests
import pandas as pd
pd.set_option('display.width', 1000)
headers = {
'Content-Type': 'application/json'
}
data = '{\n "opportunitySearch": {\n "Top": 62,\n "Skip": 0,\n "QueryString": "",\n "OrderBy": [\n {\n "Value": "postedDateDesc",\n "PropertyName": "PostedDate",\n "Ascending": false\n }\n ],\n "Filters": [\n {\n "t": "TermsSearchFilterDto",\n "fieldName": 4,\n "extra": null,\n "values": [\n \n ]\n },\n {\n "t": "TermsSearchFilterDto",\n "fieldName": 5,\n "extra": null,\n "values": [\n \n ]\n },\n {\n "t": "TermsSearchFilterDto",\n "fieldName": 6,\n "extra": null,\n "values": [\n \n ]\n }\n ]\n },\n "matchCriteria": {\n "PreferredJobs": [\n \n ],\n "Educations": [\n \n ],\n "LicenseAndCertifications": [\n \n ],\n "Skills": [\n \n ],\n "hasNoLicenses": false,\n "SkippedSkills": [\n \n ]\n }\n}'
response = requests.post('https://recruiting.ultipro.com/UNI1029UNION/JobBoard/74c2a308-3bf1-4fb1-8a83-f92fa61499d3/JobBoardView/LoadSearchResults', headers=headers, data=data)
data=response.json()
df=pd.DataFrame.from_dict(data['opportunities'])
df= df[['Id','Title','RequisitionNumber','JobCategoryName','PostedDate']]
print(df.head(5))
Where data has "TOP" 62 like a limited your results:
{
"opportunitySearch": {
"Top": 62,
"Skip": 0,
"QueryString": "",
"OrderBy": [
{
"Value": "postedDateDesc",
"PropertyName": "PostedDate",
"Ascending": false
}
],
"Filters": [
{
"t": "TermsSearchFilterDto",
"fieldName": 4,
"extra": null,
"values": [
]
},
{
"t": "TermsSearchFilterDto",
"fieldName": 5,
"extra": null,
"values": [
]
},
{
"t": "TermsSearchFilterDto",
"fieldName": 6,
"extra": null,
"values": [
]
}
]
},
"matchCriteria": {
"PreferredJobs": [
],
"Educations": [
],
"LicenseAndCertifications": [
],
"Skills": [
],
"hasNoLicenses": false,
"SkippedSkills": [
]
}
}
I am setting up a geoJson file dynamically by accepting pologon coordinates and generating a resultant geoJson file which i would then append to "a master" geoJson file or read it directly on a map.
I have been able to generate the geoJson file but it print in a wrong format. What do I need to do to get the right format output?
I have tried using:
filecontent = [line.rstrip() for line in recentfile]
To eliminate the "\n" characters but it still prints odd characters to the file.
I have these non-formatted output (Beginning) The is a new line before and after every value on every line
{
"type": "FeatureCollection",
"features": [
"{\n \"type\": \"Feature\",\n \"geometry\": {\n \"type\": \"Polygon\",\n \"coordinates\": [\n [\n [\n -96.00362,\n 30.09309,\n 0.0\n ],\n [\n -96.00095,\n 30.09731,\n 0.0\n ],\n [\n -95.99735,\n 30.09713,\n 0.0\n ],\n [\n -95.99848,\n 30.09348,\n 0.0\n ],\n [\n -96.00362,\n 30.09309,\n 0.0\n ]\n ]\n ]\n },\n \"properties\": {\n \"name\": \"Pasture 7\",\n \"styleUrl\": \"#poly-4F2682-3000-128\",\n \"styleHash\": \"-50cd947a\",\n \"styleMapHash\": {\n \"normal\": \"#poly-4F2682-3000-128-normal\",\n \"highlight\": \"#poly-4F2682-3000-128-highlight\"\n },\n \"description\": \"Future site for\\u00a0\\u201cComprehensive Hi-Tech Agricultural Complex\\u201d\",\n \"stroke\": \"#4f2682\",\n \"stroke-opacity\": 1,\n \"stroke-width\": 3,\n \"fill\": \"#4f2682\",\n \"fill-opacity\": 0.5019607843137255\n }\n}{\n \"type\": \"Feature\",\n \"geometry\": {\n \"type\": \"Polygon\",\n \"coordinates\": [\n [\n [\n -96.02551,\n 30.09428,\n 0.0\n ],\n [\n -96.02524,\n 30.09953,\n 0.0\n ],\n [\n -96.0188,\n 30.09913,\n 0.0\n ],\n [\n -96.01874,\n 30.0937,\n 0.0\n ],\n [\n -96.02551,\n 30.09428,\n 0.0\n ]\n ]\n ]\n },\n \"properties\": {\n \"name\": \"Pasture 7\",\n \"styleUrl\": \"#poly-4F2682-3000-128\",\n \"styleHash\": \"-50cd947a\",\n \"styleMapHash\": {\n \"normal\": \"#poly-4F2682-3000-128-normal\",\n \"highlight\": \"#poly-4F2682-3000-128-highlight\"\n },\n \"description\": \"Future site for\\u00a0\\u201cComprehensive Hi-Tech Agricultural Complex\\u201d\",\n \"stroke\": \"#4f2682\",\n \"stroke-opacity\": 1,\n \"stroke-width\": 3,\n \"fill\": \"#4f2682\",\n \"fill-opacity\": 0.5019607843137255\n }\n}\n"
]
}
End of wrong output format
Here is my code to read the geoJson files and append to a new variable before writing to a file.
import os
import json
from pathlib import Path
import shutil
files = [os.path.join(sourceoffiles, x) for x in
os.listdir(destinationfiles) if x.endswith(".json")]
newest = max(files , key = os.path.getctime)
recentfile = open(newest)
recentfilecontent = recentfile.read()
#print(recentfilecontent)
geojson_file = {
"type": "FeatureCollection",
"features": [
]
};
geojson_file['features'].append(recentfilecontent)
with open("newarea.js" , "w") as newarea:
json.dump(geojson_file, newarea, indent=4)
This is my expected output:
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
-95.97203,
30.094557,
0
],
[
-95.970563,
30.094285,
0
],
[
-95.969004,
30.094035,
0
],
[
-95.967678,
30.093836,
0
],
[
-95.967681,
30.095099,
0
],
[
-95.967687,
30.096482,
0
],
[
-95.967699,
30.097665,
0
],
[
-95.970578,
30.097908,
0
],
[
-95.97203,
30.094557,
0
]
]
]
},
"properties": {
"name": "Area 1",
"styleUrl": "#poly-FF0004-3000-128-nodesc",
"styleHash": "-188cd7bd",
"styleMapHash": {
"normal": "#poly-FF0004-3000-128-nodesc-normal",
"highlight": "#poly-FF0004-3000-128-nodesc-highlight"
},
"stroke": "#ff0004",
"stroke-opacity": 1,
"stroke-width": 3,
"fill": "#ff0004",
"fill-opacity": 0.5019607843137255
}
},
{
"type": "Feature",
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
-95.979281,
30.0946,
0
],
[
-95.979115,
30.095331,
0
],
[
-95.980096,
30.095487,
0
],
[
-95.980258,
30.094751,
0
],
[
-95.979281,
30.0946,
0
]
]
]
},
"properties": {
"name": "Area 2",
"styleUrl": "#poly-0000FF-3000-128-nodesc",
"styleHash": "-6fb6f6b9",
"styleMapHash": {
"normal": "#poly-0000FF-3000-128-nodesc-normal",
"highlight": "#poly-0000FF-3000-128-nodesc-highlight"
},
"stroke": "#0000ff",
"stroke-opacity": 1,
"stroke-width": 3,
"fill": "#0000ff",
"fill-opacity": 0.5019607843137255
}
}
]
};
The file youre reading contains json encoded data that python sees as a string . When you write that into geojson_files features array, you're writing a string. You need to parse the json before you load the resultant dict into geojson_file:
geojson_file['features'].append(json.loads(recentfilecontent))
The newline representations disappear, being part of the (meaningless) json white space within what is currently represented as json encoded string containing newline.
I'm very new to python.I'm learning it.
I have json file which contains the following json data.
{
"document_tone": {
"tone_categories": [
{
"category_id": "emotion_tone",
"tones": [
{
"tone_name": "Anger",
"score": 0.041202,
"tone_id": "anger"
},
{
"tone_name": "Disgust",
"score": 0.054272,
"tone_id": "disgust"
},
{
"tone_name": "Fear",
"score": 0.080706,
"tone_id": "fear"
},
{
"tone_name": "Joy",
"score": 0.813125,
"tone_id": "joy"
},
{
"tone_name": "Sadness",
"score": 0.155878,
"tone_id": "sadness"
}
],
"category_name": "Emotion Tone"
},
{
"category_id": "writing_tone",
"tones": [
{
"tone_name": "Analytical",
"score": 0.0,
"tone_id": "analytical"
},
{
"tone_name": "Confident",
"score": 0.0,
"tone_id": "confident"
},
{
"tone_name": "Tentative",
"score": 0.0,
"tone_id": "tentative"
}
],
"category_name": "Writing Tone"
},
{
"category_id": "social_tone",
"tones": [
{
"tone_name": "Openness",
"score": 0.028,
"tone_id": "openness_big5"
},
{
"tone_name": "Conscientiousness",
"score": 0.314,
"tone_id": "conscientiousness_big5"
},
{
"tone_name": "Extraversion",
"score": 0.944,
"tone_id": "extraversion_big5"
},
{
"tone_name": "Agreeableness",
"score": 0.982,
"tone_id": "agreeableness_big5"
},
{
"tone_name": "Emotional Range",
"score": 0.865,
"tone_id": "neuroticism_big5"
}
],
"category_name": "Social Tone"
}
]
}
}
First I want to extract following fields and after extracting the fields I need "tone_name" with highest score.
"tones": [
{
"tone_name": "Anger",
"score": 0.041202,
"tone_id": "anger"
},
{
"tone_name": "Disgust",
"score": 0.054272,
"tone_id": "disgust"
},
{
"tone_name": "Fear",
"score": 0.080706,
"tone_id": "fear"
},
{
"tone_name": "Joy",
"score": 0.813125,
"tone_id": "joy"
},
{
"tone_name": "Sadness",
"score": 0.155878,
"tone_id": "sadness"
}
]
My Output should be like : joy
My Code is
import json
with open('data.json','r') as f:
for line in f:
line = line.strip()
print line
parsedJson = json.loads(line)
print parsedJson
for tone in parsedJson['document_tone']['tone_categories'][0]['tones']:
print(tone['tone_name'])
My data.json file contains
"{\n \"document_tone\": {\n \"tone_categories\": [\n {\n
\"category_id\": \"emotion_tone\", \n \"tones\": [\n
{\n \"tone_name\": \"Anger\", \n \"score\":
0.372974, \n \"tone_id\": \"anger\"\n }, \n
{\n \"tone_name\": \"Disgust\", \n \"score\":
0.114389, \n \"tone_id\": \"disgust\"\n }, \n
{\n \"tone_name\": \"Fear\", \n \"score\":
0.083108, \n \"tone_id\": \"fear\"\n }, \n
{\n \"tone_name\": \"Joy\", \n \"score\":
0.028716, \n \"tone_id\": \"joy\"\n }, \n
{\n \"tone_name\": \"Sadness\", \n \"score\":
0.461562, \n \"tone_id\": \"sadness\"\n }\n
], \n \"category_name\": \"Emotion Tone\"\n }, \n
{\n \"category_id\": \"writing_tone\", \n \"tones\":
[\n {\n \"tone_name\": \"Analytical\", \n
\"score\": 0.722, \n \"tone_id\": \"analytical\"\n
}, \n {\n \"tone_name\": \"Confident\", \n
\"score\": 0.0, \n \"tone_id\": \"confident\"\n
}, \n {\n \"tone_name\": \"Tentative\", \n
\"score\": 0.0, \n \"tone_id\": \"tentative\"\n
}\n ], \n \"category_name\": \"Writing Tone\"\n },
\n {\n \"category_id\": \"social_tone\", \n
\"tones\": [\n {\n \"tone_name\": \"Openness\",
\n \"score\": 0.015, \n \"tone_id\":
\"openness_big5\"\n }, \n {\n
\"tone_name\": \"Conscientiousness\", \n \"score\": 0.045,
\n \"tone_id\": \"conscientiousness_big5\"\n },
\n {\n \"tone_name\": \"Extraversion\", \n
\"score\": 0.722, \n \"tone_id\": \"extraversion_big5\"\n
}, \n {\n \"tone_name\": \"Agreeableness\", \n
\"score\": 0.706, \n \"tone_id\": \"agreeableness_big5\"\n
}, \n {\n \"tone_name\": \"Emotional Range\", \n
\"score\": 0.974, \n \"tone_id\": \"neuroticism_big5\"\n
}\n ], \n \"category_name\": \"Social Tone\"\n }\n
]\n }\n}"
Basically to do what you want here you just have to navigate your way through the dict till you get your list of tones, then loop through each tone, and for that tone print out its tone_name
parsedJson = json.loads(jsonFile)
for tone in parsedJson['document_tone']['tone_categories'][0]['tones']:
print(tone['tone_name'])
# Anger
# Disgust
# Fear
# Joy
# Sadness
Here's your fixed code
import json
jsonText = None
with open('data.json','r') as f:
jsonText = f.read()
parsedJson = json.loads(jsonText)
for tone in parsedJson['document_tone']['tone_categories'][0]['tones']:
print(tone['tone_name'])