Remove Non-Updated Text/Object In JSON File With Python - python

This script updates the code in the json file with the users input but doesn't replace the placeholder text Screenshot URL X x is a number (1, 5). I want to remove the placeholder text that hasn't been updated by the script with the user input. It was working before but now it isn't and I can find out why. Any help will be welcomed! Thanks!
Code:
# Load the data
file_name = path/to/json/file
with open(file_name) as fh:
full_data = json.load(fh)
# Dig into the data to find the screenshots
screen_shots = full_data['tabs'][0]['views'][1]['screenshots']
# Loop over each screen shot, updating each one
print("Press return/enter to enter another url or press it again with nothing entered to stop asking and continue the script.")
for number, screen_shot in enumerate(screen_shots):
new_url = input("Screnshot URL: ").strip()
if new_url:
# Updating the data here will also update the 'full_data' object
# as we are just referencing a part of it, not making copies
screen_shot.update({"url": new_url, "fullSizeURL": new_url})
else:
break
# Remove all entries which we did not update
screen_shots = screen_shots[:number]
# Save the data
with open(file_name, 'w') as fh:
json.dump(full_data, fh, indent=4)
JSON File:
{
"minVersion": "0.1",
"headerImage": "Header Image URL",
"tintColor": "",
"tabs": [
{
"tabname": "Details",
"views": [
{
"title": "Package Name",
"useBoldText": true,
"useBottomMargin": false,
"class": "DepictionSubheaderView"
},
{
"itemCornerRadius": 6,
"itemSize": "{160, 275.41333333333336}",
"screenshots": [
{
"accessibilityText": "Screenshot",
"url": "Screenshot URL 1",
"fullSizeURL": "Screenshot URL 1"
},
{
"accessibilityText": "Screenshot",
"url": "Screenshot URL 2",
"fullSizeURL": "Screenshot URL 2"
},
{
"accessibilityText": "Screenshot",
"url": "Screenshot URL 3",
"fullSizeURL": "Screenshot URL 3"
},
{
"accessibilityText": "Screenshot",
"url": "Screenshot URL 4",
"fullSizeURL": "Screenshot URL 4"
},
{
"accessibilityText": "Screenshot",
"url": "Screenshot URL 5",
"fullSizeURL": "Screenshot URL 5"
}
],
"class": "DepictionScreenshotsView"
},
{
"markdown": "This is a description.",
"useSpacing": true,
"class": "DepictionMarkdownView"
},
{
"class": "DepictionSeparatorView"
},
{
"title": "Known Issues",
"class": "DepictionHeaderView"
},
{
"markdown": "None",
"useSpacing": true,
"class": "DepictionMarkdownView"
},
{
"class": "DepictionSeparatorView"
},
{
"title": "Latest Version",
"class": "DepictionHeaderView"
},
{
"title": "Version",
"text": "1.2",
"class": "DepictionTableTextView"
},
{
"title": "Released",
"text": "1/1/11",
"class": "DepictionTableTextView"
},
{
"title": "Price",
"text": "Free",
"class": "DepictionTableTextView"
},
{
"title": "Developer",
"text": "Dev",
"class": "DepictionTableTextView"
},
{
"title": "Contact Support",
"action": "",
"class": "DepictionTableButtonView"
},
{
"spacing": 16,
"class": "DepictionSpacerView"
},
{
"spacing": 20,
"class": "DepictionSpacerView"
}
],
"class": "DepictionStackView"
},
{
"tabname": "Changelog",
"views": [
{
"title": "1.2",
"useBoldText": true,
"useBottomMargin": true,
"class": "DepictionSubheaderView"
},
{
"markdown": "\t\n\u2022 Initial Release",
"useSpacing": false,
"class": "DepictionMarkdownView"
},
{
"markdown": "<small style=\"color: #999; margin-top: -8px;\">Released 1/1/11</small>",
"useRawFormat": true,
"class": "DepictionMarkdownView"
}
],
"class": "DepictionStackView"
}
],
"class": "DepictionTabView"
}

Does this work?
import json
file_name = "test.json"
with open(file_name) as fh:
full_data = json.load(fh)
# Dig into the data to find the screenshots
screen_shots = full_data['tabs'][0]['views'][1]['screenshots']
# Loop over each screen shot, updating each one
print("Press return/enter to enter another url or press it again with nothing entered to stop asking and continue the script.")
for number, screen_shot in enumerate(screen_shots):
new_url = input("Screnshot URL: ").strip()
if new_url:
# Updating the data here will also update the 'full_data' object
# as we are just referencing a part of it, not making copies
screen_shot.update({"url": new_url, "fullSizeURL": new_url})
else:
break
# Remove all entries which we did not update
screen_shots = screen_shots[:number]
full_data['tabs'][0]['views'][1]['screenshots'] = screen_shots #-> this lines removes the placeholder text that is not updated by the user
# Save the data
with open(file_name, 'w') as fh:
json.dump(full_data, fh, indent=4)

Related

How can i get this JSON value using BeautifulSoup from window.__INITIAL_STATE__

So i am scraping a website containing window.INITIAL_STATE which is assigned with a huge JSON string. I am looking for the stock info (This item is currently out of stock) which looks like below in JSON grid:
{
"slotType": "WIDGET",
"id": 11,
"parentId": 10002,
"layoutParams": {
"margin": "0,24,0,0",
"orientation": "",
"widgetHeight": 150,
"widgetWidth": 12
},
"dataId": "1230886539",
"elementId": "11-AVAILABILITY",
"hasWidgetDataChanged": true,
"ttl": 3000,
"widget": {
"type": "AVAILABILITY",
"viewType": "brand",
"data": {
"announcementComponent": {
"action": null,
"metaData": null,
"tracking": null,
"trackingData": null,
"value": {
"type": "AnnouncementValue",
"subTitle": "This item is currently out of stock",
"title": "Sold Out"
}
}
}
}
},
I tried like below but does not work:
soup = BeautifulSoup(page.content, features="lxml")
print(soup.find(elementID='11-AVAILABILITY').get_text().strip())
To parse the __INITIAL_STATE__ out of HTML, you can use this example:
import re
import json
import requests
url = 'https://www.flipkart.com/sony-310ap-wired-headset/p/itm0527f8b27c68f'
html_data = requests.get(url).text
data = re.search(r'window\.__INITIAL_STATE__ = ({.*});', html_data).group(1)
data = json.loads(data)
# uncomment this to print all data:
# print(json.dumps(data, indent=4))
for w in data['pageDataV4']['page']['data']['10002']:
if w.get("elementId") == "11-AVAILABILITY":
print(json.dumps(w, indent=4))
break
Prints:
{
"slotType": "WIDGET",
"id": 11,
"parentId": 10002,
"layoutParams": {
"margin": "0,24,0,0",
"orientation": "",
"widgetHeight": 150,
"widgetWidth": 12
},
"dataId": "1230886539",
"elementId": "11-AVAILABILITY",
"hasWidgetDataChanged": true,
"ttl": 3000,
"widget": {
"type": "AVAILABILITY",
"viewType": "brand",
"data": {
"announcementComponent": {
"action": null,
"metaData": null,
"tracking": null,
"trackingData": null,
"value": {
"type": "AnnouncementValue",
"subTitle": "This item is currently out of stock",
"title": "Sold Out"
}
}
}
}
}

Update Key Value In Python In JSON File

How do I change a value in a json file with python? I want to search and find "class": "DepictionScreenshotsView" and replace it with "class": ""
JSON File:
{
"minVersion": "0.1",
"class": "DepictionTabView",
"tintColor": "#2cb1be",
"headerImage": "",
"tabs": [
{
"tabname": "Details",
"class": "DepictionStackView",
"tintColor": "#2cb1be",
"views": [
{
"class": "DepictionSubheaderView",
"useBoldText": true,
"useBottomMargin": false,
"title": "Description"
},
{
"class": "DepictionMarkdownView",
"markdown": "Some dummy text...",
"useRawFormat": true
},
{
"class": "DepictionSeparatorView"
},
{
"class": "DepictionSubheaderView",
"useBoldText": true,
"useBottomMargin": false,
"title": "Screenshots"
},
{
"class": "DepictionScreenshotsView",
"itemCornerRadius": 6,
"itemSize": "{160, 284.44444444444}",
"screenshots": [
{
"accessibilityText": "Screenshot",
"url": "http://example.com/image.png"
}
]
},
{
"class": "DepictionSeparatorView"
},
{
"class": "DepictionSubheaderView",
"useBoldText": true,
"useBottomMargin": false,
"title": "Information"
},
{
"class": "DepictionTableTextView",
"title": "Author",
"text": "User"
},
{
"class": "DepictionSpacerView",
"spacing": 16
},
{
"class": "DepictionStackView",
"views": [
{
"class": "DepictionTableButtonView",
"title": "Contact",
"action": "http://example.com/",
"openExternal": true
}
]
},
{
"class": "DepictionSpacerView",
"spacing": 16
}
]
},
{
"tabname": "History",
"class": "DepictionStackView",
"views": [
{
"class": "DepictionSubheaderView",
"useBoldText": true,
"useBottomMargin": false,
"title": ""
},
{
"class": "DepictionMarkdownView",
"markdown": "<ul>\n<li>Initial release.<\/li>\n<\/ul>",
"useRawFormat": true
}
]
}
]
}
You can read a json file in python as follow:
import json
with open('your_file.json') as f:
data = json.load(f)
Then you access and change the value (for your case):
data['tabs'][0]['views'][4]['class'] = ""
After having changed the data, you can save it :
with open('your_file.json', 'w') as outfile:
json.dump(data, outfile)

Python/JSON KeyError

Please take a look at this post for more info I am trying to find something like this
screen_shots = full_data['tabs'][0]['views'][1]['screenshots']
but for a new JSON file but I keep getting
KeyError: 'screenshots'
I tried a few things to fix it but nothing works so any help will be welcomed
JSON
{
"minVersion":"0.1",
"class":"DepictionTabView",
"tintColor":"#2cb1be",
"headerImage":"",
"tabs":[ {
"tabname":"Details",
"class":"DepictionStackView",
"tintColor":"#2cb1be",
"views":[ {
"class": "DepictionSubheaderView", "useBoldText": true, "useBottomMargin": false, "title": "Description"
}
,
{
"class": "DepictionMarkdownView", "markdown": "<p>This is a description...<\/p>", "useRawFormat": true
}
,
{
"class": "DepictionSeparatorView"
}
,
{
"class": "DepictionSubheaderView", "useBoldText": true, "useBottomMargin": false, "title": "Screenshots"
}
,
{
"class":"DepictionScreenshotsView",
"itemCornerRadius":6,
"itemSize":"{160, 284.44444444444}",
"screenshots":[ {
"accessibilityText": "Screenshot", "url": "Screenshot URL 1"
}
]
}
,
{
"class": "DepictionSeparatorView"
}
,
{
"class": "DepictionSubheaderView", "useBoldText": true, "useBottomMargin": false, "title": "Information"
}
,
{
"class": "DepictionTableTextView", "title": "Author", "text": "User"
}
,
{
"class": "DepictionTableTextView", "title": "Version", "text": "1.0"
}
,
{
"class": "DepictionTableTextView", "title": "Price", "text": "free"
}
,
{
"class": "DepictionSpacerView", "spacing": 16
}
,
{
"class":"DepictionStackView",
"views":[
{
"class": "DepictionTableButtonView", "title": "Support", "action": "", "openExternal": true
}
]
}
,
{
"class": "DepictionSpacerView", "spacing": 16
}
]
}
,
{
"tabname":"Changelog",
"class":"DepictionStackView",
"tintColor":"#2cb1be",
"views":[
{
"class": "DepictionSubheaderView", "useBoldText": true, "useBottomMargin": false, "title": "1.0"
}
,
{
"class": "DepictionMarkdownView", "markdown": "<ul>\n<li>Initial release.<\/li>\n<\/ul>", "useRawFormat": true
}
]
}
]
}
In your JSON, many of the views do not have a screenshots key. The 2nd view (views[1]) definitely does not while the 5th view (views[4]) does.
If you're trying to collect all the screenshots from the views you'll need to use a loop and some conditional logic to find them.
screenshots = [] # an accumulator to collect our screenshots
for view in full_data['tabs'][0]['views']: # loop over each view
if 'screenshots' in view: # only process views with a screenshots key
# there can be multiple screenshots per view, so concatenate them to our accumulator
screenshots += view['screenshots']
You can further simplify this to a list comprehension
screenshots = [*screenshot for screenshot in full_data['tabs'][0]['views'] if 'screenshots' in view]
The *screenshot tells Python to "unroll" the list of screenshots. Adding each item to the list, rather then inserting a list into our list.
In the example you posted:
screen_shots = full_data['tabs'][0]['views'][1]['screenshots']
Goes to:
{
"class": "DepictionMarkdownView", "markdown": "<p>This is a description...<\/p>", "useRawFormat": true
}
Which doesn't have any key called screenshots. Your JSON keys at that level are not guaranteed to have that key. I'm guessing that screenshots will only be there if class is DepictionScreenshotsView.
So you can try:
tab = full_data['tabs'][0]
screen_shots = [tab['views'][i]['screenshots'] for i in range(len(tab)) if tab['view'][i]['class'] == 'DepictionScreenshotsView')

Python | Add 2 of The Same Keys In A Dict

How would I go about adding 2 of the same keys just with screenshoturl2 and screenshoturl1 from a dict to a JSON file?
This is the code I have right now and it works but it just only adds the first screenshoturl1 and not screenshoturl2 and I have no idea on how to make it add it. See replies here for more and maybe helpful info about my topic
#!/usr/bin/env python3
import os
import sys
import json
import fileinput
def NumberofScreenshots():
global numberofscreenshots
while True:
try:
numberofscreenshots = input("Number of Screenshots?: ")
if numberofscreenshots == '':
print("Please enter how much screenshots to include.")
continue
elif numberofscreenshots.isalpha():
print("Please enter a number not a string.")
continue
else:
break
except ValueError:
break
def ScreenshotURL():
global screenshoturl1, screenshoturl2
if numberofscreenshots == "1":
screenshoturl1 = input("Screenshot URL: ")
elif numberofscreenshots == "2":
screenshoturl1 = input("Screenshot URL: ")
screenshoturl2 = input("Screenshot URL: ")
else:
pass
def NumberofScreenshots1():
if numberofscreenshots == "1":
with open('path/to/json/file','r') as f:
data = json.loads(f.read())
data['tabs'][0]['views'][1]['screenshots'][0]
data['tabs'][0]['views'][1]['screenshots'] = data['tabs'][0]['views'][1]['screenshots'][0]
data['tabs'][0]['views'][1]['screenshots'] = {"accessibilityText": "Screenshot","url": screenshoturl1,"fullSizeURL": screenshoturl1}
with open('path/to/json/file', 'w') as f:
f.write(json.dumps(data))
else:
print("Try again.")
def NumberofScreenshots2():
global data
if numberofscreenshots == "2":
with open('path/to/json/file','r') as f:
data = json.loads(f.read())
data['tabs'][0]['views'][1]['screenshots'][0]
print(data)
data['tabs'][0]['views'][1]['screenshots'] = data['tabs'][0]['views'][1]['screenshots'][0]
print(data)
data['tabs'][0]['views'][1]['screenshots'].update({"accessibilityText": "Screenshot","url": screenshoturl1,"fullSizeURL": screenshoturl1, "accessibilityText": "Screenshot","url": screenshoturl2,"fullSizeURL": screenshoturl2})
print(data)
with open('path/to/json/file', 'w') as f:
f.write(json.dumps(data))
else:
print("Try again.")
print("Pick Template:")
print("1. Default")
template = input("Template Name/Number: ")
if (template == "1"):
NumberofScreenshots()
ScreenshotURL()
NumberofScreenshots1()
NumberofScreenshots2()
# Show the user a error if they enter a number for a template that can't be found.
else:
print("The template you are looking for can not be found!")
We are looking at the function called NumberofScreenshots2
JSON File:
{
"tabs": [
{
"tabname": "Details",
"views": [
{
"title": "Some Name",
"useBoldText": true,
"useBottomMargin": false,
"class": "DepictionSubheaderView"
},
{
"itemCornerRadius": 6,
"itemSize": "",
"screenshots": [
{
"accessibilityText": "Screenshot",
"url": "Screenshot URL 1",
"fullSizeURL": "Screenshot URL 1"
},
{
"accessibilityText": "Screenshot",
"url": "Screenshot URL 2",
"fullSizeURL": "Screenshot URL 2"
},
{
"accessibilityText": "Screenshot",
"url": "Screenshot URL 3",
"fullSizeURL": "Screenshot URL 3"
},
{
"accessibilityText": "Screenshot",
"url": "Screenshot URL 4",
"fullSizeURL": "Screenshot URL 4"
},
{
"accessibilityText": "Screenshot",
"url": "Screenshot URL 5",
"fullSizeURL": "Screenshot URL 5"
}
],
"ipad": {
"itemCornerRadius": 9,
"itemSize": "{320, 550.8266666666667}",
"screenshots": [
{
"accessibilityText": "Screenshot",
"url": " Screenshot URL?size=640",
"fullSizeURL": "Screenshot URL"
}
],
"class": "DepictionScreenshotView"
},
"class": "DepictionScreenshotsView"
},
{
"markdown": " Description",
"useSpacing": true,
"class": "DepictionMarkdownView"
},
{
"class": "DepictionSeparatorView"
},
{
"title": "Known Issues",
"class": "DepictionHeaderView"
},
{
"markdown": "None",
"useSpacing": true,
"class": "DepictionMarkdownView"
},
{
"class": "DepictionSeparatorView"
},
{
"title": "Latest Version",
"class": "DepictionHeaderView"
},
{
"title": "1.0",
"text": "Latest Version Number",
"class": "DepictionTableTextView"
},
{
"title": "Released",
"text": "3/10/19",
"class": "DepictionTableTextView"
},
{
"title": "Price",
"text": "Free",
"class": "DepictionTableTextView"
},
{
"title": "Developer",
"text": "TestDev",
"class": "DepictionTableTextView"
},
{
"title": "Contact Support",
"action": "",
"class": "DepictionTableButtonView"
},
{
"spacing": 16,
"class": "DepictionSpacerView"
},
{
"spacing": 20,
"class": "DepictionSpacerView"
}
],
"class": "DepictionStackView"
},
{
"tabname": "Changelog",
"views": [
{
"title": "1.0",
"useBoldText": true,
"useBottomMargin": true,
"class": "DepictionSubheaderView"
},
{
"markdown": "\t\n\u2022 Initial Release",
"useSpacing": false,
"class": "DepictionMarkdownView"
},
{
"markdown": "<small style=\"color: #999; margin-top: -8px;\">Released 3/10/2019</small>",
"useRawFormat": true,
"class": "DepictionMarkdownView"
}
],
"class": "DepictionStackView"
}
],
"class": "DepictionTabView"
}
I want it to remove everything in screenshots and re-add it with the user input that they enter in the screenshoturl1 and screenshoturl2 so it will delete everything like the placeholders in screenshots and only add 2 back with the url the user enters in screenshoturl1 and screenshoturl2 and help will be nice.
Thanks!
If I understand what you are trying to do here, it looks like you would like to update one or two of the URLs, one at a time.
What you could do is go over each screenshot item, one at a time, and either update it or stop. This is easier and also lets you update all of them if you want to. This also means we don't have to ask how many to do up front (we just stop when we don't get any more).
import json
# Load the data
file_name = 'path/to/json/file'
with open(file_name) as fh:
full_data = json.load(fh)
# Dig into the data to find the screenshots
screen_shots = full_data['tabs'][0]['views'][1]['screenshots']
# Loop over each screen shot, updating each one
for number, screen_shot in enumerate(screen_shots):
print("Screenshot", number)
print('\tCurrent data:', screen_shot)
new_url = input(
"\tPlease enter new URL (leave empty and press return to stop): "
).strip()
if new_url:
# Updating the data here will also update the 'full_data' object
# as we are just referencing a part of it, not making copies
screen_shot.update({"url": new_url, "fullSizeURL": new_url})
else:
print("\tAll done!")
break
# Remove all entries which we did not update
screen_shots = screen_shots[:number]
# Save the data
with open(file_name, 'w') as fh:
json.dump(full_data, fh, indent=4)
Something else you may want to look into is returning results from your functions instead of using global variables, as this can quickly get very confusing with bigger scripts.
You can have your value a collection (like a list or a tuple). For added convenience you can use defaultdict. So if you wanted to have two values for a key:
from collections import defaultdict
multiValueDict = defaultdict(list)
multiValueDict[1].append(1)
multiValueDict[1].append(2)
print(multiValueDict[1])
And this outputs:
[1, 2]

How to parse JSON in a Django View

I post some JSON to a view. I want to now parse the data and add it to my database.
I need to get the properties name and theme and iterate over the array pages. My JSON is as follows:
{
"name": "xaAX",
"logo": "",
"theme": "b",
"fullSiteLink": "http://www.hello.com",
"pages": [
{
"id": "1364484811734",
"name": "Page Name",
"type": "basic",
"components": {
"img": "",
"text": ""
}
},
{
"name": "Twitter",
"type": "twitter",
"components": {
"twitter": {
"twitter-username": "zzzz"
}
}
}
]
}
Here is what I have so far:
def smartpage_create_ajax(request):
if request.POST:
# get stuff and loop over each page?
return HttpResponse('done')
python provides json to encode/decode json
import json
json_dict = json.loads(request.POST['your_json_data'])
json_dict['pages']
[
{
"id": "1364484811734",
"name": "Page Name",
"type": "basic",
"components": {
"img": "",
"text": ""
}
},
{
"name": "Twitter",
"type": "twitter",
"components": {
"twitter": {
"twitter-username": "zzzz"
}
}
},
}
]

Categories