Get data from API and save it to txt with Python

Get data from API and save it to txt with Python - python

I need to get data from this API https://api.storj.io/contacts/f52624d8ef76df81c40853c22f93735581071434 (sample node)
This is my code (python):
import requests
f = requests.get('https://api.storj.io/contacts/f52624d8ef76df81c40853c22f93735581071434')
print f.text
I want to save only protocol, responseTime and reputation in three subsequent lines of the txt file. It's supposed to look something like this::
protocol: 1.2.0
responseTime: 8157.912472694088
reputation: 1377
Unfortunately, I'm stuck at this point and I can not process this data in any way

import requests
f = requests.get('https://api.storj.io/contacts/f52624d8ef76df81c40853c22f93735581071434')
# Store content as json
answer = f.json()
# List of element you want to keep
items = ['protocol', 'responseTime', 'reputation']
# Display
for item in items:
print(item + ':' + str(answer[item]))
# If you want to save in a file
with open("Output.txt", "w") as text_file:
for item in items:
print(item + ':' + str(answer[item]), file=text_file)
Hope it helps! Cheers

You just need to transform to a JSON object to be able to access the keys
import requests
import simplejson as json
f = requests.get('https://api.storj.io/contacts/f52624d8ef76df81c40853c22f93735581071434')
x = json.loads(f.text)
print 'protocol: {}'.format(x.get('protocol'))
print 'responseTime: {}'.format(x.get('responseTime'))
print 'reputation: {}'.format(x.get('reputation'))

This is a very unrefined way to do what you want that you could build off of. You'd need to sub in a path/filename for text.txt.
import requests
import json
f = requests.get('https://api.storj.io/contacts/f52624d8ef76df81c40853c22f93735581071434')
t = json.loads(f.text)
with open('text.txt', 'a') as mfile:
mfile.write("protocol: {0}".format(str(t['protocol'])))
mfile.write("responseTime: {0}".format(str(t['responseTime'])))
mfile.write("reputation: {0}".format(str(t['reputation'])))

Related

Exporting API output to txt file

I have created a simple API with FastAPI and I want to export the output in a text file (txt).
This is a simplified code
import sys
from clases.sequence import Sequence
from clases.read_file import Read_file
from fastapi import FastAPI
app = FastAPI()
#app.get("/DNA_toolkit")
def sum(input: str): # pass the sequence in, this time as a query param
DNA = Sequence(input) # get the result (i.e., 4)
return {"Length": DNA.length(), # return the response
"Reverse": DNA.reverse(),
"complement":DNA.complement(),
"Reverse and complement": DNA.reverse_and_complement(),
"gc_percentage": DNA.gc_percentage()
}
And this is the output
{"Length":36,"Reverse":"TTTTTTTTTTGGGGGGGAAAAAAAAAAAAAAAATAT","complement":"ATATTTTTTTTTTTTTTTTCCCCCCCAAAAAAAAAA","Reverse and complement":"AAAAAAAAAACCCCCCCTTTTTTTTTTTTTTTTATA","gc_percentage":5.142857142857143}
The file I would like to get
Length 36
Reverse TTTTTTTTTTGGGGGGGAAAAAAAAAAAAAAAATAT
complement ATATTTTTTTTTTTTTTTTCCCCCCCAAAAAAAAAA
Reverse and complement AAAAAAAAAACCCCCCCTTTTTTTTTTTTTTTTATA
There is a simple way to do this. This is my first time working with APIs and I don't even know how possible is this

dict1={"Length":36,"Reverse":"TTTTTTTTTTGGGGGGGAAAAAAAAAAAAAAAATAT","complement":"ATATTTTTTTTTTTTTTTTCCCCCCCAAAAAAAAAA","Reverse and complement":"AAAAAAAAAACCCCCCCTTTTTTTTTTTTTTTTATA","gc_percentage":5.142857142857143}
with open("output.txt","w") as data:
for k,v in dict1.items():
append_data=k+" "+str(v)
data.write(append_data)
data.write("\n")
Output:
Length 36
Reverse TTTTTTTTTTGGGGGGGAAAAAAAAAAAAAAAATAT
complement ATATTTTTTTTTTTTTTTTCCCCCCCAAAAAAAAAA
Reverse and complement AAAAAAAAAACCCCCCCTTTTTTTTTTTTTTTTATA
gc_percentage 5.142857142857143

You can use open method to create a new file, and write your output. And as #Blackgaurd told you, this isn't a code-writing service.
Also I wrote this code really quickly so some syntax error may occur
import sys
import datetime
from clases.sequence import Sequence
from clases.read_file import Read_file
from fastapi import FastAPI
app = FastAPI()
#app.get("/DNA_toolkit")
def sum(input: str): # pass the sequence in, this time as a query param
DNA = Sequence(input) # get the result (i.e., 4)
res = {"Length": DNA.length(), # return the response
"Reverse": DNA.reverse(),
"complement":DNA.complement(),
"Reverse and complement": DNA.reverse_and_complement(),
"gc_percentage": DNA.gc_percentage()
}
#with open('result.txt', 'w+') as resFile:
#for i in res:
#resFile.write(i+" "+res[i]+"\n")
#resFile.close()
# Undo the above comment if you don't want to save result into
#file with unique id, else go with the method I wrote below...
filename = str(datetime.datetime.now().date()) + '_' + str(datetime.datetime.now().time()).replace(':', '.')
with open(filename+'.txt', 'w+') as resFile:
for i in res:
resFile.write(i+" "+res[i]+"\n")
resFile.close()
return {"Length": DNA.length(), # return the response
"Reverse": DNA.reverse(),
"complement":DNA.complement(),
"Reverse and complement": DNA.reverse_and_complement(),
"gc_percentage": DNA.gc_percentage()
}

I gonna assume that you have already got your data somehow calling your API.
# data = request.get(...).json()
# save to file:
with open("DNA_insights.txt", 'w') as f:
for k, v in data.items():
f.write(f"{k}: {v}\n")

write list of paragraph tuples to a csv file

The following code is designed to write a tuple, each containing a large paragraph of text, and 2 identifiers behind them, to a single line per each entry.
import urllib2
import json
import csv
base_url = "https://www.eventbriteapi.com/v3/events/search/?page={}
writer = csv.writer(open("./data/events.csv", "a"))
writer.writerow(["description", "category_id", "subcategory_id"])
def format_event(event):
return event["description"]["text"].encode("utf-8").rstrip("\n\r"), event["category_id"], event["subcategory_id"]
for x in range(1, 2):
print "fetching page - {}".format(x)
formatted_url = base_url.format(str(x))
resp = urllib2.urlopen(formatted_url)
data = resp.read()
j_data = json.loads(data)
events = map(format_event, j_data["events"])
for event in events:
#print event
writer.writerow(event)
print "wrote out events for page - {}".format(x)
The ideal format would be to have each line contain a single paragraph, followed by the other fields listed above, yet here is a screenshot of how the data comes out.
If instead I this line to the following:
writer.writerow([event])
Here is how the file now looks:
It certainly looks much closer to what I want, but its got parenthesis around each entry which are undesirable.
EDIT
here is a snippet that contains a sample of the data Im working with.

Can you try writing to the CSV file directly without using using the csv module? You can write/append comma-delimited strings to the CSV file just like writing to typical text files. Also, the way you deal with removing \r and \n characters might not be working. You can use regex to find those characters and replace them with an empty string "":
import urllib2
import json
import re
base_url = "https://www.eventbriteapi.com/v3/events/search/?page={}"
def format_event(event):
ws_to_strip = re.compile(r"(\r|\n)")
description = re.sub(ws_to_strip, "", event["description"]["text"].encode("utf-8"))
return [description, event["category_id"], event["subcategory_id"]]
with open("./data/events.csv", "a") as events_file:
events_file.write(",".join(["description", "category_id", "subcategory_id"]))
for x in range(1, 2):
print "fetching page - {}".format(x)
formatted_url = base_url.format(str(x))
resp = urllib2.urlopen(formatted_url)
data = resp.read()
j_data = json.loads(data)
events = map(format_event, j_data["events"])
for event in events:
events_file.write(",".join(event))
print "wrote out events for page - {}".format(x)

Change your csv writer to be DictWriter.
Make a few tweaks:
def format_event(event):
return {"description": event["description"]["text"].encode("utf-8").rstrip("\n\r"),
"category_id": event["category_id"],
"subcategory_id": event["subcategory_id"]}
May be a few other small things you need to do, but using DictWriter and formatting your data appropriately has been the easiest way to work with csv files that I've found.

Python Search File Returning []

I'm trying to search a JSON file for the username but it is just returning [] rather than tom123.
JSON file contents:
[{"id":"001788fffe48cbdb","username":"tom123"}]
Code:
import json
import re
import requests
f = open("first.json", "r+")
print(f.read())
username = [index["username"] for index in f.read()]
print(username)
f.close()

maybe something like this you want to parse the json so you can use it like a dict
import json
import re
import requests
f = open("first.json", "r+")
data = json.loads(f.read())
username = [index["username"] for index in data]
print(username)
f.close()

f is a file object which is a iterator-like object, which means that when you iterate over it you've consumed it and you cannot use it again. And in this case you consume it first in following line:
print(f.read())
Also for loading a json file you should use json.load() function. Then you can preserve the content after reading then search though the preserved content:
with open("first.json") as f
content = json.load(f)
username = [index["username"] for index in content]
print(username)
Also as a functional-based approach you can use operator.itemgetter and map() in order to get an iterator contain all the usernames (which is more optimized in terms of memory use):
from operator import itemgetter
with open("first.json") as f
content = json.load(f)
usernames = map(itemgetter("username"), content)

Saving Flickrapi response object as text file

import sys
import os
import urllib
from xml.etree.ElementTree import ElementTree
from xml.etree.ElementTree import tostring
import flickrapi
api_key = ' '
api_password = ' '
photo_id='2124494179'
flickr= flickrapi.FlickrAPI(api_key, api_password)
#photos= flickr.photos_getinfo(photo_id='15295705890')
#tree=ElementTree(flickr.photos_getinfo(photo_id))
#image_id=open('photoIds.txt','r')
#Image_data=open('imageinformation','w')
#e=image_id.readlines(10)
#f= [s.replace('\r\n', '') for s in e]
#num_of_lines=len(f)
#image_id.close()
#i=0
#while i<269846:
# term=f[i]
#try:
photoinfo=flickr.photos_getinfo(photo_id=photo_id)
photo_tree=ElementTree(photoinfo)
#photo_tree.write('photo_tree')
#i+=1
#photo=photo_tree.getroot()
#photodata=photo.getiterator()
#for elem in owner.getiterator():
#for elem in photo.getiterator():
for elem in photo_tree.getroot():
farm=elem.attrib['farm']
id=elem.attrib['id']
server=elem.attrib['server']
#title=photo_tree.find('title').txt
#for child in elem.findall():
# username=child.attrib['username']
# location=child.attrib['location']
# user=elem.attrib['username']
print (farm)
print(id)
print(server)
#owner=photo_tree.findall('owner')
# print(username)
#filename="%s.txt"%(farm)
#f=open(filename,'w')
#f.write("%s"%farm)
#for elem in photo_tree.getiterator():
#for child in photo_tree.getiterator():
#print (child.attrib)
#owner=child.attrib['username']
I would like to read data from a file and pass it to flickrapi method to get images' information recursively using pythonand save it in a file as a text: image id=.... user name=... location=... tags=... and so on. I could save the attributes of the first element by using .getroot() but I tried to get the attributes of other element but it returns error. I want to save the attributes into txt file and read the image ids from a file so I can use these data in the algorithm I'm working on.

Since I figured out a away to solve the problem(I'm a beginner and know almost nothing about python), what we need to do is to iterator the object(since it's not saved as xml file) using tags name as follows:
photo_tree=ElementTree(photoinfo)
for elem in photo_tree.getroot():
uploaded=elem.attrib['dateuploaded']
uploaded=datetime.datetime.fromtimestamp(float(uploaded)).strftime('%Y-%m-%d %H:%M:%S')
for elem in photo_tree.getiterator(tag='dates'):
taken_date=elem.attrib['taken']
photo_info = open(head + 'filename/' + ('%d.txt') % (id),'a')
photo_info.write(str(id)+'\t'+uploaded+'\t'+taken_date+'\t'+'\n')
may it helps someone who is seeking a solution for same problem. Or may be there is an efficient way to solve this issue!!

Scraping values from HTML header and saving as a CSV file in Python

All,
I've just started using Python (v 2.7.1) and one of my first programs is trying to scrape information from a website containing power station data using the Standard Library and BeautifulSoup to handle the HTML elements.
The data I'd like to access is obtainable in either the 'Head' section of the HTML or as tables within the main body. The website will generate a CSV file from it data if the CSV link is clicked.
Using a couple of sources on this website I've managed to cobble together the code below which will pull the data out and save it to a file, but, it contains the \n designators. Try as I might, I can't get a correct CSV file to save out.
I am sure it's something simple but need a bit of help if possible!
from BeautifulSoup import BeautifulSoup
import urllib2,string,csv,sys,os
from string import replace
bm_url = 'http://www.bmreports.com/servlet/com.logica.neta.bwp_PanBMDataServlet?param1=T_COTPS-4&param2=&param3=&param4=&param5=2011-02-05&param6=*'
data = urllib2.urlopen(bm_url).read()
soup = BeautifulSoup(data)
data = str(soup.findAll('head',limit=1))
data = replace(data,'[<head>','')
data = replace(data,'<script language="JavaScript" src="/bwx_generic.js"></script>','')
data = replace(data,'<link rel="stylesheet" type="text/css" href="/bwx_style.css" />','')
data = replace(data,'<title>Historic Physical Balancing Mechanism Data</title>','')
data = replace(data,'<script language="JavaScript">','')
data = replace(data,' </script>','')
data = replace(data,'</head>]','')
data = replace(data,'var gs_csv=','')
data = replace(data,'"','')
data = replace(data,"'",'')
data = data.strip()
file_location = 'c:/temp/'
file_name = file_location + 'DataExtract.txt'
file = open(file_name,"wb")
file.write(data)
file.close()

Don't turn it back into a string and then use replace. That completely defeats the point of using BeautifulSoup!
Try starting like this:
scripttag = soup.head.findAll("script")[1]
javascriptdata = scripttag.contents[0]
Then you can use:
partition('=')[2] to cut off the "var gs_csv" bit.
strip(' \n"') to remove unwanted characters at each end (space, newline, ")
replace("\\n","\n") to sort out the new lines.
Incidentally, replace is a string method, so you don't have to import it separately, you can just do data.replace(....
Finally, you need to separate it as csv. You could save it and reopen it, then load it into a csv.reader. You could use the StringIO module to turn it into something you can feed directly to csv.reader (i.e. without saving a file first). But I think this data is simple enough that you can get away with doing:
for line in data.splitlines():
row = line.split(",")

SOLUTION
from BeautifulSoup import BeautifulSoup
import urllib2,string,csv,sys,os,time
bm_url_stem = "http://www.bmreports.com/servlet/com.logica.neta.bwp_PanBMDataServlet?param1="
bm_station = "T_COTPS-3"
bm_param = "&param2=&param3=&param4=&param5="
bm_date = "2011-02-04"
bm_param6 = "&param6=*"
bm_full_url = bm_url_stem + bm_station + bm_param + bm_date + bm_param6
data = urllib2.urlopen(bm_full_url).read()
soup = BeautifulSoup(data)
scripttag = soup.head.findAll("script")[1]
javascriptdata = scripttag.contents[0]
javascriptdata = javascriptdata.partition('=')[2]
javascriptdata = javascriptdata.strip(' \n"')
javascriptdata = javascriptdata.replace("\\n","\n")
javascriptdata = javascriptdata.strip()
csvwriter = csv.writer(file("c:/temp/" + bm_station + "_" + bm_date + ".csv", "wb"))
for line in javascriptdata.splitlines():
row = line.split(",")
csvwriter.writerow(row)
del csvwriter

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Get data from API and save it to txt with Python - python

Related

Exporting API output to txt file

write list of paragraph tuples to a csv file

Python Search File Returning []

Saving Flickrapi response object as text file

Scraping values from HTML header and saving as a CSV file in Python

Categories

Resources