looping through JSON data and inserting all bikestations into a dictionary - python

I am trying to loop through a JSON file and add data for all the bike stations listed into the dictionary instead of only the final bikestation, which is 502. So if my code was working the way I want it to, The resulting dictionary would have JSON data for all 5 bikestations. I am very new to this and any help is appreciated. here is my code so far:
import json
import urllib.request, urllib.parse, urllib.error
import datetime
import pymongo
stations = (123, 258, 290, 501, 502)
chicagoBikesURL = "https://data.cityofchicago.org/resource/eq45-8inv.json?"
for station in stations:
paramD = dict()
paramD["id"] = station
paramD["$order"] = "timeStamp DESC"
paramD["$limit"] = 2
params = urllib.parse.urlencode(paramD)
print(chicagoBikesURL+params)
document = urllib.request.urlopen(chicagoBikesURL+params)
# get all of the text from the document
text = document.read().decode()
if document.getcode() != 200 :
print("Error code=",document.getcode(), chicagoBikesURL+params)
text = "{}"
# Load the JSON text from the URL into a dictionary using the json library
js = json.loads(text)
# Output first Record
print("\nFirst BikeStation")
print(js[0])
# Write JSON data to a file
fdumps = open('bike_data.txt', "w")
fdumps.write(json.dumps(js).strip())
bikeStation_list.append(js)
# Make sure you close the file otherwise data may not be saved
fdumps.close()
#Process JSON Data
bikeStation_list = []`enter code here`

"w" mode on the file open always creates a new file. So each one overwrites the previous so you will only see the final item.
To append to the file, use:
fdumps = open('bike_data.txt', "a")

Related

How do I assign a value to a JSON key using a variable in Python?

I'm trying to make a feature with a Discord bot where it counts how many commands have been run and it stores it on a JSON file named data.json. Here is the code:
import json
# Read 'data.json'
with open("data.json", "r") as d:
datar = json.load(d)
com = datar.get("commands-run")
# Functions
def command_increase():
commands_run = com + 1
dataw = open("data.json", "w")
dataw["commands-run"] = commands_run
And here is the JSON file:
{
"commands-run": 0
}
And this is the error I get when I run a command and it tries to increase the value in the JSON file:
TypeError: '_io.TextIOWrapper' object does not support item assignment
On top of that, it also completely wipes the JSON file. By that, I mean that it just clears everything. Even the brackets.
When you do a json.load, it loads your json data into a dict.
You can increase your command counter in this dict and re-write the dict back into your json file at the end of it.
import json
# Read 'data.json'
with open("data.json", "r") as d:
datar = json.load(d)
com = datar.get("commands-run")
# Functions
def command_increase():
commands_run = com + 1
datar["commands-run"] = commands_run
with open("data.json", "w") as dataw:
dataw.write(json.dumps(datar, indent=4))
command_increase()

how to convert json file to csv with "success":true

I have problem with convert json file to csv file on python
and i think it will be the nested json file but i don't know how to handle it!
import json, requests
url = requests.get("https://####/api/food_orders")
text = url.text
data = json.load(text)
order_data = data['data']
# now we will open a file for writing
data_file = open('ordersJsonToCsv.csv', 'w', newline='')
# create the csv writer object
csv_writer = csv.writer(data_file)
# Counter variable used for writing
# headers to the CSV file
count = 0
for ord in order_data:
if count == 0:
# Writing headers of CSV file
header = ord.keys()
csv_writer.writerow(header)
count += 1
# Writing data of CSV file
csv_writer.writerow(ord.values())
data_file.close()
And Json file look like
This code will solve the problem to get data only
import pandas as pd
import json, requests
url = requests.get("https://##/api/orders?
text = url.text
info = json.loads(text)
df = pd.json_normalize(info['data'])
df.to_csv("samplecsv.csv")

Error while writing a file

I am trying to write a file but i am getting the following error: TypeError: a bytes-like object is required, not 'str'
import requests, pandas
from bs4 import BeautifulSoup
r = requests.get("https://www.basketball-reference.com/players/a/")
c = r.content
soup = BeautifulSoup(c, "html.parser")
full_record_heading = soup.findAll("tr")
full_record = soup.findAll("tr")
playerdata = ""
playerdata_saved = ""
for record in full_record:
playerdata = ""
for player in record.findAll("td"):
playerdata = playerdata +","+player.text
playerdata_saved = playerdata_saved + playerdata[1:]+("\n")
# print(playerdata_saved)
header="From,To,Pos,Ht,Wt,Birth Date,College"
file=open("Basketball.csv","r+b")
file.write(header)
Can anyone tell me the reason for the error? How can we know the correct syntax of any command and documentation available? I am new to python
When you open a file in python, you must specify its "file mode" - read-only, write-only, read AND write, and if the file is binary. So, in this line:
open("Basketball.csv","r+b")
You opened your file as READ-ONLY, and set the file to be read as BINARY.
You should have opened the file as:
open("Basketball.csv","w")
As write and as STRING
Nevertheless, you are manually writting a CSV file - you do not have to do that in Pyhton! Look at this example:
import requests
import pandas # Always import in different lines
from bs4 import BeautifulSoup
r = requests.get("https://www.basketball-reference.com/players/a/")
c = r.content
soup = BeautifulSoup(c, "html.parser")
full_record_heading = soup.findAll("tr")
full_record = soup.findAll("tr")
# Initialize your data buffer
my_data = []
# For each observation in your data source
for record in full_record:
# We extract a row of data
observation = record.findAll("td")
# Format the row as a dictionary - a "python hashmap"
dict_observation = {
"From": observation[0],
"To": observation[1],
"Pos": observation[2],
"Ht": observation[3],
"Wt": observation[4],
"Birth Date": observation[5],
"College": observation[6]
}
# Add the row to our DataFrame buffer
my_data.append(dict_observation)
# Now our DataFrame buffer contains all our data.
# We can format it as a Pandas DataFrame
dataframe = pandas.DataFrame().from_dict(my_data)
# Pandas DataFrames can be turned into CSVs seamlessly. Like:
dataframe.to_csv("Basketball.csv", index=False)
# Or even MS Excel:
dataframe.to_excel("Basketball.xlsx")
Use python data structures as often as you can!
If you want to write bytes you have to make it like below
file.write(bytes(header, encoding="UTF-8"))

write list of paragraph tuples to a csv file

The following code is designed to write a tuple, each containing a large paragraph of text, and 2 identifiers behind them, to a single line per each entry.
import urllib2
import json
import csv
base_url = "https://www.eventbriteapi.com/v3/events/search/?page={}
writer = csv.writer(open("./data/events.csv", "a"))
writer.writerow(["description", "category_id", "subcategory_id"])
def format_event(event):
return event["description"]["text"].encode("utf-8").rstrip("\n\r"), event["category_id"], event["subcategory_id"]
for x in range(1, 2):
print "fetching page - {}".format(x)
formatted_url = base_url.format(str(x))
resp = urllib2.urlopen(formatted_url)
data = resp.read()
j_data = json.loads(data)
events = map(format_event, j_data["events"])
for event in events:
#print event
writer.writerow(event)
print "wrote out events for page - {}".format(x)
The ideal format would be to have each line contain a single paragraph, followed by the other fields listed above, yet here is a screenshot of how the data comes out.
If instead I this line to the following:
writer.writerow([event])
Here is how the file now looks:
It certainly looks much closer to what I want, but its got parenthesis around each entry which are undesirable.
EDIT
here is a snippet that contains a sample of the data Im working with.
Can you try writing to the CSV file directly without using using the csv module? You can write/append comma-delimited strings to the CSV file just like writing to typical text files. Also, the way you deal with removing \r and \n characters might not be working. You can use regex to find those characters and replace them with an empty string "":
import urllib2
import json
import re
base_url = "https://www.eventbriteapi.com/v3/events/search/?page={}"
def format_event(event):
ws_to_strip = re.compile(r"(\r|\n)")
description = re.sub(ws_to_strip, "", event["description"]["text"].encode("utf-8"))
return [description, event["category_id"], event["subcategory_id"]]
with open("./data/events.csv", "a") as events_file:
events_file.write(",".join(["description", "category_id", "subcategory_id"]))
for x in range(1, 2):
print "fetching page - {}".format(x)
formatted_url = base_url.format(str(x))
resp = urllib2.urlopen(formatted_url)
data = resp.read()
j_data = json.loads(data)
events = map(format_event, j_data["events"])
for event in events:
events_file.write(",".join(event))
print "wrote out events for page - {}".format(x)
Change your csv writer to be DictWriter.
Make a few tweaks:
def format_event(event):
return {"description": event["description"]["text"].encode("utf-8").rstrip("\n\r"),
"category_id": event["category_id"],
"subcategory_id": event["subcategory_id"]}
May be a few other small things you need to do, but using DictWriter and formatting your data appropriately has been the easiest way to work with csv files that I've found.

Scraping values from HTML header and saving as a CSV file in Python

All,
I've just started using Python (v 2.7.1) and one of my first programs is trying to scrape information from a website containing power station data using the Standard Library and BeautifulSoup to handle the HTML elements.
The data I'd like to access is obtainable in either the 'Head' section of the HTML or as tables within the main body. The website will generate a CSV file from it data if the CSV link is clicked.
Using a couple of sources on this website I've managed to cobble together the code below which will pull the data out and save it to a file, but, it contains the \n designators. Try as I might, I can't get a correct CSV file to save out.
I am sure it's something simple but need a bit of help if possible!
from BeautifulSoup import BeautifulSoup
import urllib2,string,csv,sys,os
from string import replace
bm_url = 'http://www.bmreports.com/servlet/com.logica.neta.bwp_PanBMDataServlet?param1=T_COTPS-4&param2=&param3=&param4=&param5=2011-02-05&param6=*'
data = urllib2.urlopen(bm_url).read()
soup = BeautifulSoup(data)
data = str(soup.findAll('head',limit=1))
data = replace(data,'[<head>','')
data = replace(data,'<script language="JavaScript" src="/bwx_generic.js"></script>','')
data = replace(data,'<link rel="stylesheet" type="text/css" href="/bwx_style.css" />','')
data = replace(data,'<title>Historic Physical Balancing Mechanism Data</title>','')
data = replace(data,'<script language="JavaScript">','')
data = replace(data,' </script>','')
data = replace(data,'</head>]','')
data = replace(data,'var gs_csv=','')
data = replace(data,'"','')
data = replace(data,"'",'')
data = data.strip()
file_location = 'c:/temp/'
file_name = file_location + 'DataExtract.txt'
file = open(file_name,"wb")
file.write(data)
file.close()
Don't turn it back into a string and then use replace. That completely defeats the point of using BeautifulSoup!
Try starting like this:
scripttag = soup.head.findAll("script")[1]
javascriptdata = scripttag.contents[0]
Then you can use:
partition('=')[2] to cut off the "var gs_csv" bit.
strip(' \n"') to remove unwanted characters at each end (space, newline, ")
replace("\\n","\n") to sort out the new lines.
Incidentally, replace is a string method, so you don't have to import it separately, you can just do data.replace(....
Finally, you need to separate it as csv. You could save it and reopen it, then load it into a csv.reader. You could use the StringIO module to turn it into something you can feed directly to csv.reader (i.e. without saving a file first). But I think this data is simple enough that you can get away with doing:
for line in data.splitlines():
row = line.split(",")
SOLUTION
from BeautifulSoup import BeautifulSoup
import urllib2,string,csv,sys,os,time
bm_url_stem = "http://www.bmreports.com/servlet/com.logica.neta.bwp_PanBMDataServlet?param1="
bm_station = "T_COTPS-3"
bm_param = "&param2=&param3=&param4=&param5="
bm_date = "2011-02-04"
bm_param6 = "&param6=*"
bm_full_url = bm_url_stem + bm_station + bm_param + bm_date + bm_param6
data = urllib2.urlopen(bm_full_url).read()
soup = BeautifulSoup(data)
scripttag = soup.head.findAll("script")[1]
javascriptdata = scripttag.contents[0]
javascriptdata = javascriptdata.partition('=')[2]
javascriptdata = javascriptdata.strip(' \n"')
javascriptdata = javascriptdata.replace("\\n","\n")
javascriptdata = javascriptdata.strip()
csvwriter = csv.writer(file("c:/temp/" + bm_station + "_" + bm_date + ".csv", "wb"))
for line in javascriptdata.splitlines():
row = line.split(",")
csvwriter.writerow(row)
del csvwriter

Categories