Exporting API output to txt file - python

I have created a simple API with FastAPI and I want to export the output in a text file (txt).
This is a simplified code
import sys
from clases.sequence import Sequence
from clases.read_file import Read_file
from fastapi import FastAPI
app = FastAPI()
#app.get("/DNA_toolkit")
def sum(input: str): # pass the sequence in, this time as a query param
DNA = Sequence(input) # get the result (i.e., 4)
return {"Length": DNA.length(), # return the response
"Reverse": DNA.reverse(),
"complement":DNA.complement(),
"Reverse and complement": DNA.reverse_and_complement(),
"gc_percentage": DNA.gc_percentage()
}
And this is the output
{"Length":36,"Reverse":"TTTTTTTTTTGGGGGGGAAAAAAAAAAAAAAAATAT","complement":"ATATTTTTTTTTTTTTTTTCCCCCCCAAAAAAAAAA","Reverse and complement":"AAAAAAAAAACCCCCCCTTTTTTTTTTTTTTTTATA","gc_percentage":5.142857142857143}
The file I would like to get
Length 36
Reverse TTTTTTTTTTGGGGGGGAAAAAAAAAAAAAAAATAT
complement ATATTTTTTTTTTTTTTTTCCCCCCCAAAAAAAAAA
Reverse and complement AAAAAAAAAACCCCCCCTTTTTTTTTTTTTTTTATA
There is a simple way to do this. This is my first time working with APIs and I don't even know how possible is this

dict1={"Length":36,"Reverse":"TTTTTTTTTTGGGGGGGAAAAAAAAAAAAAAAATAT","complement":"ATATTTTTTTTTTTTTTTTCCCCCCCAAAAAAAAAA","Reverse and complement":"AAAAAAAAAACCCCCCCTTTTTTTTTTTTTTTTATA","gc_percentage":5.142857142857143}
with open("output.txt","w") as data:
for k,v in dict1.items():
append_data=k+" "+str(v)
data.write(append_data)
data.write("\n")
Output:
Length 36
Reverse TTTTTTTTTTGGGGGGGAAAAAAAAAAAAAAAATAT
complement ATATTTTTTTTTTTTTTTTCCCCCCCAAAAAAAAAA
Reverse and complement AAAAAAAAAACCCCCCCTTTTTTTTTTTTTTTTATA
gc_percentage 5.142857142857143

You can use open method to create a new file, and write your output. And as #Blackgaurd told you, this isn't a code-writing service.
Also I wrote this code really quickly so some syntax error may occur
import sys
import datetime
from clases.sequence import Sequence
from clases.read_file import Read_file
from fastapi import FastAPI
app = FastAPI()
#app.get("/DNA_toolkit")
def sum(input: str): # pass the sequence in, this time as a query param
DNA = Sequence(input) # get the result (i.e., 4)
res = {"Length": DNA.length(), # return the response
"Reverse": DNA.reverse(),
"complement":DNA.complement(),
"Reverse and complement": DNA.reverse_and_complement(),
"gc_percentage": DNA.gc_percentage()
}
#with open('result.txt', 'w+') as resFile:
#for i in res:
#resFile.write(i+" "+res[i]+"\n")
#resFile.close()
# Undo the above comment if you don't want to save result into
#file with unique id, else go with the method I wrote below...
filename = str(datetime.datetime.now().date()) + '_' + str(datetime.datetime.now().time()).replace(':', '.')
with open(filename+'.txt', 'w+') as resFile:
for i in res:
resFile.write(i+" "+res[i]+"\n")
resFile.close()
return {"Length": DNA.length(), # return the response
"Reverse": DNA.reverse(),
"complement":DNA.complement(),
"Reverse and complement": DNA.reverse_and_complement(),
"gc_percentage": DNA.gc_percentage()
}

I gonna assume that you have already got your data somehow calling your API.
# data = request.get(...).json()
# save to file:
with open("DNA_insights.txt", 'w') as f:
for k, v in data.items():
f.write(f"{k}: {v}\n")

Related

Read file and format it into dictionary

How to capture the string in function.py and track the def step1() and its following function create() and login() into dictionary format? (The format i want to achieve is below)
function.py
#!C:\Python\Python39\python.exe
# print ('Content-type: text/html\n\n')
def step1():
create()
login()
def step2():
authenticate()
def step3():
send()
Expected output
thisdict = {
'def step1()': ['create(),login()'],
'def step2():':['authenticate()'],
'def step3():': ['send()']
}
You can read the file function.py, split it in order to separate the different functions, and then for each function, split it once more to get the signature as key and the commands as values:
with open('function.py', 'r') as inFile:
funcs = inFile.read().split('\n\n')[1:]
result = {}
for elem in funcs:
sign, commands = elem.split(':')
commands = list(map(str.strip, commands.split('\n')))[1:]
result.update({sign : commands})
print(result)
This will return:
{'def step1()': ['create()', 'login()'], 'def step2()': ['authenticate()'], 'def step3()': ['send()']}
You could use a regex that would find each method and content (def \w+\(.*\):)((?:\n[ \t]+.+)+)
(def \w+\(.*\):) for the method definition
\n[ \t]+.+ for each method row (with the previous \n)
import json
import re
with open("function.py") as fic:
content = fic.read()
groups = re.findall(r"(def \w+\(.*\):)((?:\n[ \t]+.+)+)", content)
result = {key: [",".join(map(str.strip, val.strip().splitlines()))]
for key, val in groups}
print(json.dumps(result, indent=4))
you can do something like that:
with open('function.py', 'r') as f:
file = f.readlines()
thisdict = {'start':[]}
temp = []
a = '_start_' #just to get the first lines if there is some things before the first function
for line in file:
if line.startsWith('def'): #You might want to add something for the spacing
thisdict[a] = temp
a = line[3:]
temp=[]
else:
temp.append(line)
thisdict[a] = temp
print(thisdict)
this clearly isn't the best code but it's easy to understand and easy to implement :)

Advice on the best way to find intersection of two dictionaries

I have posted a similar question before, but after reworking the project, I've gotten here:
With two csv files (new.csv, scrapers.csv) -
new.csv contains a single column:
'urls' = whole URLs
scrapers.csv contains two columns:
'scraper_dom' = A simplification of specific URL domains
'scraper_id' = An associated scraper_id that is used to import URLs to a separately managed database
Question
My goal here is to iterate through new.csv (parsing out fnetloc using urlparse) and perform a lookup on scrapers.csv to return a set of matching 'scraper_id' given a set of 'urls' (the way a VLOOKUP would work, or a JOIN in SQL), once urlparse does it's thing to isolate the netloc within the URL (the result of fnetloc).
My next big issue is that urlparse does not parse the URLs (from new.csv) to the exact simplification found in the scrapers.csv file, so I'd be reliant on a sort of partial match until I can figure out the regular expressions to use for that part of it.
I've imported pandas because previous attempts found me creating DataFrames and performing a pd.merge but I couldn't get that to work either...
Current code, commented out bits at the bottom are failed attempts, just thought I'd include what I've tried thus far.
(## are just intermediate print lines I put in to check output of the program)
import pandas as pd, re
from urllib.parse import urlparse
import csv
sd = {}
sid = {}
#INT = []
def fnetloc(any):
try:
p = urlparse(any)
return p.netloc
except IndexError:
return 'Error'
def dom(any):
try:
r = any.split(',')
return r[0]
except IndexError:
return 'Error'
def ids(any):
try:
e = any.split(',')
return e[0]
except IndexError:
return 'Error'
with open('scrapers.csv',encoding='utf-8',newline='') as s:
reader = enumerate(csv.reader(s))
s.readline()
for j, row in reader:
dict1 = dict({'scraper_dom':dom(row[0]), 'scraper_id':ids(row[1])})
sid[j + 1] = dict1
for di in sid.keys():
id = di
##print(sid[di]['scraper_dom'],sid[di]['scraper_id'])
with open('new.csv',encoding='UTF-8',newline='') as f:
reader = enumerate(csv.reader(f))
f.readline()
for i, row in reader:
dict2 = dict({'scraper_domain': fnetloc(row[0])})
sd[i + 1] = dict2
for d in sd.keys():
id = d
##print(sd[d]['scraper_domain'])
#def tryme( ):
#return filter(sd.has_key, sid)
#print(list(filter(sid, sd.keys())))
Sample of desired output.
You just need a procedure that can take a fnetloc and a list of scrapers and check to see if there is a scraper that matches that fnetloc:
def fnetloc_to_scraperid(fnetloc: str, scrapers: List[Scraper]) -> str:
try:
return next(x.scraper_id for x in scrapers if x.matches(fnetloc))
except:
return "[no scraper id found]"
I also recommend that you use some classes instead of keeping everything in csv row objects--it reduces errors in your code, in the long run, and greatly advances your sanity.
This script worked on the sample data I fed it:
import csv
from urllib.parse import urlparse
from typing import List
def fnetloc(any) -> str:
try:
p = urlparse(any)
return p.netloc
except IndexError:
return 'Error'
class Scraper:
def __init__(self, scraper_dom: str, scraper_id: str):
self.scraper_dom = scraper_dom
self.scraper_id = scraper_id
def matches(self, fnetloc: str) -> bool:
return fnetloc.endswith(self.scraper_dom)
class Site:
def __init__(self, url: str):
self.url = url
self.fnetloc = fnetloc(url)
def get_scraperid(self, scrapers: List[Scraper]) -> str:
try:
return next(x.scraper_id for x in scrapers if x.matches(self.fnetloc))
except:
return "[no scraper id found]"
sites = [Site(row[0]) for row in csv.reader(open("new.csv"))]
scrapers = [Scraper(row[0], row[1]) for row in csv.reader(open("scrapers.csv"))]
for site in sites:
print(site.url, site.get_scraperid(scrapers), sep="\t")

Get data from API and save it to txt with Python

I need to get data from this API https://api.storj.io/contacts/f52624d8ef76df81c40853c22f93735581071434 (sample node)
This is my code (python):
import requests
f = requests.get('https://api.storj.io/contacts/f52624d8ef76df81c40853c22f93735581071434')
print f.text
I want to save only protocol, responseTime and reputation in three subsequent lines of the txt file. It's supposed to look something like this::
protocol: 1.2.0
responseTime: 8157.912472694088
reputation: 1377
Unfortunately, I'm stuck at this point and I can not process this data in any way
import requests
f = requests.get('https://api.storj.io/contacts/f52624d8ef76df81c40853c22f93735581071434')
# Store content as json
answer = f.json()
# List of element you want to keep
items = ['protocol', 'responseTime', 'reputation']
# Display
for item in items:
print(item + ':' + str(answer[item]))
# If you want to save in a file
with open("Output.txt", "w") as text_file:
for item in items:
print(item + ':' + str(answer[item]), file=text_file)
Hope it helps! Cheers
You just need to transform to a JSON object to be able to access the keys
import requests
import simplejson as json
f = requests.get('https://api.storj.io/contacts/f52624d8ef76df81c40853c22f93735581071434')
x = json.loads(f.text)
print 'protocol: {}'.format(x.get('protocol'))
print 'responseTime: {}'.format(x.get('responseTime'))
print 'reputation: {}'.format(x.get('reputation'))
This is a very unrefined way to do what you want that you could build off of. You'd need to sub in a path/filename for text.txt.
import requests
import json
f = requests.get('https://api.storj.io/contacts/f52624d8ef76df81c40853c22f93735581071434')
t = json.loads(f.text)
with open('text.txt', 'a') as mfile:
mfile.write("protocol: {0}".format(str(t['protocol'])))
mfile.write("responseTime: {0}".format(str(t['responseTime'])))
mfile.write("reputation: {0}".format(str(t['reputation'])))

How to update line with modified data in Jython?

I'm have a csv file which contains hundred thousands of rows and below are some sample lines..,
1,Ni,23,28-02-2015 12:22:33.2212-02
2,Fi,21,28-02-2015 12:22:34.3212-02
3,Us,33,30-03-2015 12:23:35-01
4,Uk,34,31-03-2015 12:24:36.332211-02
I need to get the last column of csv data which is in wrong datetime format. So I need to get default datetimeformat("YYYY-MM-DD hh:mm:ss[.nnn]") from last column of the data.
I have tried the following script to get lines from it and write into flow file.
import json
import java.io
from org.apache.commons.io import IOUtils
from java.nio.charset import StandardCharsets
from org.apache.nifi.processor.io import StreamCallback
class PyStreamCallback(StreamCallback):
def __init__(self):
pass
def process(self, inputStream, outputStream):
text = IOUtils.readLines(inputStream, StandardCharsets.UTF_8)
for line in text[1:]:
outputStream.write(line + "\n")
flowFile = session.get()
if (flowFile != None):
flowFile = session.write(flowFile,PyStreamCallback())
flowFile = session.putAttribute(flowFile, "filename", flowFile.getAttribute('filename'))
session.transfer(flowFile, REL_SUCCESS)
but I am not able to find a way to convert it like below output.
1,Ni,23,28-02-2015 12:22:33.221
2,Fi,21,29-02-2015 12:22:34.321
3,Us,33,30-03-2015 12:23:35
4,Uk,34,31-03-2015 12:24:36.332
I have checked solutions with my friend(google) and was still not able to find solution.
Can anyone guide me to convert those input data into my required output?
In this transformation the unnecessary data located at the end of each line, so it's really easy to manage transform task with regular expression.
^(.*:\d\d)((\.\d{1,3})(\d*))?(-\d\d)?
Check the regular expression and explanation here:
https://regex101.com/r/sAB4SA/2
As soon as you have a large file - better not to load it into the memory. The following code loads whole the file into the memory:
IOUtils.readLines(inputStream, StandardCharsets.UTF_8)
Better to iterate line by line.
So this code is for ExecuteScript nifi processor with python (Jython) language:
import sys
import re
import traceback
from org.apache.commons.io import IOUtils
from org.apache.nifi.processor.io import StreamCallback
from org.python.core.util import StringUtil
from java.lang import Class
from java.io import BufferedReader
from java.io import InputStreamReader
from java.io import OutputStreamWriter
class TransformCallback(StreamCallback):
def __init__(self):
pass
def process(self, inputStream, outputStream):
try:
writer = OutputStreamWriter(outputStream,"UTF-8")
reader = BufferedReader(InputStreamReader(inputStream,"UTF-8"))
line = reader.readLine()
p = re.compile('^(.*:\d\d)((\.\d{1,3})(\d*))?(-\d\d)?')
while line!= None:
# print line
match = p.search(line)
writer.write( match.group(1) + (match.group(3) if match.group(3)!=None else '') )
writer.write('\n')
line = reader.readLine()
writer.flush()
writer.close()
reader.close()
except:
traceback.print_exc(file=sys.stdout)
raise
flowFile = session.get()
if flowFile != None:
flowFile = session.write(flowFile, TransformCallback())
# Finish by transferring the FlowFile to an output relationship
session.transfer(flowFile, REL_SUCCESS)
And as soon as question is about nifi, here are alternatives that seems to be easier
the same code as above but in groovy for nifi ExecuteScript processor:
def ff = session.get()
if(!ff)return
ff = session.write(ff, {rawIn, rawOut->
// ## transform streams into reader and writer
rawIn.withReader("UTF-8"){reader->
rawOut.withWriter("UTF-8"){writer->
reader.eachLine{line, lineNum->
if(lineNum>1) { // # skip the first line
// ## let use regular expression to transform each line
writer << line.replaceAll( /^(.*:\d\d)((\.\d{1,3})(\d*))?(-\d\d)?/ , '$1$3' ) << '\n'
}
}
}
}
} as StreamCallback)
session.transfer(ff, REL_SUCCESS)
ReplaceText processor
And if regular expression is ok - the easiest way in nifi is a ReplaceText processor that could do regular expression replace line-by-line.
In this case you don't need to write any code, just build the regular expression and configure your processor correctly.
Just using pure jython. It is an example that can be adapted to OP's needs.
Define a datetime parser for this csv file
from datetime import datetime
def parse_datetime(dtstr):
mydatestr='-'.join(dtstr.split('-')[:-1])
try:
return datetime.strptime(mydatestr,'%d-%m-%Y %H:%M:%S.%f').strftime('%d-%m-%Y %H:%M:%S.%f')[:-3]
except ValueError:
return datetime.strptime(mydatestr,'%d-%m-%Y %H:%M:%S').strftime('%d-%m-%Y %H:%M:%S')
my test.csv includes data like this: ( 2015 didnt have 29 Feb had to change OP's example ).
1,Ni,23,27-02-2015 12:22:33.2212-02
2,Fi,21,28-02-2015 12:22:34.3212-02
3,Us,33,30-03-2015 12:23:35-01
4,Uk,34,31-03-2015 12:24:36.332211-02
now the solution
with open('test.csv') as fi:
for line in fi:
line_split=line.split(',')
out_line = ', '.join(word if i<3 else parse_datetime(word) for i,word in enumerate(line_split))
#print(out_line)
#you can write this out_line to a file here.
printing out_line looks like this
1, Ni, 23, 27-02-2015 12:22:33.221
2, Fi, 21, 28-02-2015 12:22:34.321
3, Us, 33, 30-03-2015 12:23:35
4, Uk, 34, 31-03-2015 12:24:36.332
You can get them with regex :
(\d\d-\d\d-\d\d\d\d\ \d\d:\d\d:)(\d+(?:\.\d+)*)(-\d\d)$
Then just replace #2 with a rounded version of #2
See regex example at regexr.com
You could even do it "nicer" by getting every single value with a capturing group and then put them into a datetime.datetime object and print it from there, but imho that would be an overkill in maintainability and loose you too much performance.
Code had no possibility to test
import re
...
pattern = '^(.{25})(\d+(?:\.\d+)*)(-\d\d)$' //used offset for simplicity
....
for line in text[1:]:
match = re.search(pattern, line)
line = match.group(1) + round(match.group(2),3) + match.group(3)
outputStream.write(line + "\n")

Saving and Retrieving Python object Attributes values to a file

I require 2 things to be done.
First, take the request object and save the object attribute values
to a file as values of some known keys. This file needs to be editable
after saving, ie, a user can modify the values of the keys(So I used
json format). This is handled in function
save_auth_params_to_file().
Second, get the file contents in a such a format that I can retrieve
the values using the keys. This is handled in function
get_auth_params_from_file.
import json
import os
SUCCESS_AUTH_PARAM_FILE = '/auth/success_auth_params.json'
def save_auth_params_to_file(request):
auth_params = {}
if request is not None:
auth_params['token'] = request.token
auth_params['auth_url'] = request.auth_url
auth_params['server_cert'] = request.server_cert
auth_params['local_key'] = request.local_key
auth_params['local_cert'] = request.local_cert
auth_params['timeout'] = request.timeout_secs
with open(SUCCESS_AUTH_PARAM_FILE, 'w') as fout:
json.dump(auth_params, fout, indent=4)
def get_auth_params_from_file():
auth_params = {}
if os.path.exists(SUCCESS_AUTH_PARAM_FILE):
with open(SUCCESS_AUTH_PARAM_FILE, "r") as fin:
auth_params = json.load(fin)
return auth_params
Question:
Is there a more pythonic way to achieve the 2 things ?
Any potential issues in the code which I have overlooked?
Any error conditions I have to take care ?
There are some things to be noted, yes:
i) When your request is None for some reason, you are saving an empty JSON object to your file. Maybe you'll want to write to your file only if request is not None?
auth_params = {}
if request is not None:
auth_params['token'] = request.token
auth_params['auth_url'] = request.auth_url
auth_params['server_cert'] = request.server_cert
auth_params['local_key'] = request.local_key
auth_params['local_cert'] = request.local_cert
auth_params['timeout'] = request.timeout_secs
with open(SUCCESS_AUTH_PARAM_FILE, 'w') as fout:
json.dump(auth_params, fout, indent=4)
ii) Why not create the dict all at once?
auth_params = {
'token': request.token,
'auth_url': request.auth_url,
'server_cert': request.server_cert,
'local_key': request.local_key,
'local_cert': request.local_cert,
'timeout': request.timeout,
}
iii) Make sure this file is in a SAFE location with SAFE permissions. This is sensitive data, like anything related to authentication.
iv) You are overwriting your file everytime save_auth_params_to_file is called. Maybe you mean to append your JSON to the file instead of overwriting? If that's the case:
with open(SUCCESS_AUTH_PARAM_FILE, 'a') as fout:

Categories