i m new to gae and python too, i m trying to build simple app using datastore in which script is as follows
from google.appengine.ext import db
from google.appengine.ext import webapp
from google.appengine.ext.webapp import util
class Pincodes(db.Model):
city = db.StringProperty()
code = db.StringProperty()
class MainHandler(webapp.RequestHandler):
def get(self):
q = Pincodes.all()
q = q.filter("city =", "some_city")
p = q.get()
r = 'city: %s code: %s' % (pincode.city, pincode.code)
self.response.out.write(r)
my script also contain usual def main() and if__name, im developing it step by step from simple hello world app shown in code.google docs and it worked fine, i hav uploaded sample pincode data containing 10 records to local datastore and its fine too but im not able to query and display it on webpage i tried self.response.out.write and the output is "city: code: " and not "city: mumbai code:400001" whats wrong with my script
use
entity = q.get() # use get if you want one entity only
r = 'city: %s code: %s' %(entity.city, entity.code)
self.response.out.write(r)
instead of the print
edit:
def get(self):
q = Pincodes.all()
q = q.filter("city =", "some_city")
entity = q.get() # use get if you want one entity only
r = 'city: %s code: %s' %(entity.city, entity.code)
self.response.out.write(r)
edit2:
def get(self):
q = Pincodes.all()
q = q.filter("city =", "some_city")
entity = q.get() # use get if you want one entity only
if not entity:
self.response.out.write('sorry no entities found')
else:
r = 'city: %s code: %s' %(entity.city, entity.code)
self.response.out.write(r)
Related
I am new to Redis and Redisearch.
I want to create an autocomplete using redis in flask app.
Below is what I have tried so far,
autocomplete.py:
import redis
import redisearch
from flask import Flask,request,jsonify,render_template
app = Flask("autocomplete")
#creating a redis connection
r = redis.Redis(host='localhost', port=6379,db=0)
#app.route('/')
def home():
return "This is Home Page"
#route to add a value to autocomplete list
#app.route('/add')
def addValue():
try:
name = request.args.get('name')
n = name.strip()
for l in range(1,len(n)):
prefix = n[0:l]
r.zadd('compl',{prefix:0})
r.zadd('compl',{n+"*":0})
return "Success"
except:
return "Failed"
#route to get the autocomplete
#app.route('/autocomplete')
def autocomplete():
prefix = request.args.get('prefix')
results = []
rangelen = 50
count=5
start = r.zrank('compl',prefix)
if not start:
return []
while (len(results) != count):
range = r.zrange('compl',start,start+rangelen-1)
start += rangelen
if not range or len(range) == 0:
break
for entry in range:
entry=entry.decode('utf-8')
minlen = min(len(entry),len(prefix))
if entry[0:minlen] != prefix[0:minlen]:
count = len(results)
break
if entry[-1] == "*" and len(results) != count:
results.append(entry[0:-1])
return jsonify(results)
Currently the values for #app.route('/add') and prefixes for #app.route('/autocomplete') is fetched through the URL itself.
However, I want the prefixes/text for #app.route('/autocomplete') to be fetched through an input textbox to create dynamic autocomplete.
I would be really grateful if anyone could guide me in implementing the same.
This is a sample output:
autocomplete
I have also referred to https://redis.com/ebook/part-2-core-concepts/chapter-6-application-components-in-redis/6-1-autocomplete/ but was unable to understand on how to implement it
EDIT : I found a solution for this at https://github.com/RediSearch/redisearch-py/blob/master/redisearch/auto_complete.py
You can use redisearch's autocompleter.
Example using flask is available on Github https://github.com/Redislabs-Solution-Architects/redisearch_demo_and_preso
I'm using the code shown below in order to retrieve papers from arXiv. I want to retrieve papers that have words "machine" and "learning" in the title. The number of papers is large, therefore I want to implement a slicing by year (published).
How can I request records of 2020 and 2019 in search_query? Please notice that I'm not interested in post-filtering.
import urllib.request
import time
import feedparser
# Base api query url
base_url = 'http://export.arxiv.org/api/query?';
# Search parameters
search_query = urllib.parse.quote("ti:machine learning")
start = 0
total_results = 5000
results_per_iteration = 1000
wait_time = 3
papers = []
print('Searching arXiv for %s' % search_query)
for i in range(start,total_results,results_per_iteration):
print("Results %i - %i" % (i,i+results_per_iteration))
query = 'search_query=%s&start=%i&max_results=%i' % (search_query,
i,
results_per_iteration)
# perform a GET request using the base_url and query
response = urllib.request.urlopen(base_url+query).read()
# parse the response using feedparser
feed = feedparser.parse(response)
# Run through each entry, and print out information
for entry in feed.entries:
#print('arxiv-id: %s' % entry.id.split('/abs/')[-1])
#print('Title: %s' % entry.title)
#feedparser v4.1 only grabs the first author
#print('First Author: %s' % entry.author)
paper = {}
paper["date"] = entry.published
paper["title"] = entry.title
paper["first_author"] = entry.author
paper["summary"] = entry.summary
papers.append(paper)
# Sleep a bit before calling the API again
print('Bulk: %i' % 1)
time.sleep(wait_time)
According to the arXiv documentation, there is no published or date field available.
What you can do is to sort the results by date (by adding &sortBy=submittedDate&sortOrder=descending to your query parameters) and stop making requests when you reach 2018.
Basically your code should be modified like this:
import urllib.request
import time
import feedparser
# Base api query url
base_url = 'http://export.arxiv.org/api/query?';
# Search parameters
search_query = urllib.parse.quote("ti:machine learning")
i = 0
results_per_iteration = 1000
wait_time = 3
papers = []
year = ""
print('Searching arXiv for %s' % search_query)
while (year != "2018"): #stop requesting when papers date reach 2018
print("Results %i - %i" % (i,i+results_per_iteration))
query = 'search_query=%s&start=%i&max_results=%i&sortBy=submittedDate&sortOrder=descending' % (search_query,
i,
results_per_iteration)
# perform a GET request using the base_url and query
response = urllib.request.urlopen(base_url+query).read()
# parse the response using feedparser
feed = feedparser.parse(response)
# Run through each entry, and print out information
for entry in feed.entries:
#print('arxiv-id: %s' % entry.id.split('/abs/')[-1])
#print('Title: %s' % entry.title)
#feedparser v4.1 only grabs the first author
#print('First Author: %s' % entry.author)
paper = {}
paper["date"] = entry.published
year = paper["date"][0:4]
paper["title"] = entry.title
paper["first_author"] = entry.author
paper["summary"] = entry.summary
papers.append(paper)
# Sleep a bit before calling the API again
print('Bulk: %i' % 1)
i += results_per_iteration
time.sleep(wait_time)
for the "post-filtering" approach, once enough results are collected, I'd do something like this:
papers2019 = [item for item in papers if item["date"][0:4] == "2019"]
I'm struggling to get a Lambda function working. I have a python script to access twitter API, pull information, and export that information into an excel sheet. I'm trying to transfer python script over to AWS/Lambda, and I'm having a lot of trouble.
What I've done so far: Created AWS account, setup S3 to have a bucket, and poked around trying to get things to work.
I think the main area I'm struggling is how to go from a python script that I'm executing via local CLI and transforming that code into lambda-capable code. I'm not sure I understand how the lambda_handler function works, what the event or context arguments actually mean (despite watching a half dozen different tutorial videos), or how to integrate my existing functions into Lambda in the context of the lambda_handler, and I'm just very confused and hoping someone might be able to help me get some clarity!
Code that I'm using to pull twitter data (just a sample):
import time
import datetime
import keys
import pandas as pd
from twython import Twython, TwythonError
import pymysql
def lambda_handler(event, context):
def oauth_authenticate():
twitter_oauth = Twython(keys.APP_KEY, keys.APP_SECRET, oauth_version=2)
ACCESS_TOKEN = twitter_oauth.obtain_access_token()
twitter = Twython(keys.APP_KEY, access_token = ACCESS_TOKEN)
return twitter
def get_username():
"""
Prompts for the screen name of targetted account
"""
username = input("Enter the Twitter screenname you'd like information on. Do not include '#':")
return username
def get_user_followers(username):
"""
Returns data on all accounts following the targetted user.
WARNING: The number of followers can be huge, and the data isn't very valuable
"""
#username = get_username()
#import pdb; pdb.set_trace()
twitter = oauth_authenticate()
datestamp = str(datetime.datetime.now().strftime("%Y-%m-%d"))
target = twitter.lookup_user(screen_name = username)
for y in target:
target_id = y['id_str']
next_cursor = -1
index = 0
followersdata = {}
while next_cursor:
try:
get_followers = twitter.get_followers_list(screen_name = username,
count = 200,
cursor = next_cursor)
for x in get_followers['users']:
followersdata[index] = {}
followersdata[index]['screen_name'] = x['screen_name']
followersdata[index]['id_str'] = x['id_str']
followersdata[index]['name'] = x['name']
followersdata[index]['description'] = x['description']
followersdata[index]['date_checked'] = datestamp
followersdata[index]['targeted_account_id'] = target_id
index = index + 1
next_cursor = get_followers["next_cursor"]
except TwythonError as e:
print(e)
remainder = (float(twitter.get_lastfunction_header(header = 'x-rate-limit-reset')) \
- time.time())+1
print("Rate limit exceeded. Waiting for:", remainder/60, "minutes")
print("Current Time is:", time.strftime("%I:%M:%S"))
del twitter
time.sleep(remainder)
twitter = oauth_authenticate()
continue
followersDF = pd.DataFrame.from_dict(followersdata, orient = "index")
followersDF.to_excel("%s-%s-follower list.xlsx" % (username, datestamp),
index = False, encoding = 'utf-8')
We have about 5,000 objects of class Domain in Google App Engine, and we want to export the list of domains to CSV. Each domain is linked to an object of class DomainStateData:
class DomainStateData(db.Expando, ExpandoEntity):
plan = db.ReferenceProperty(Plan)
plan_expiration = db.DateTimeProperty()
trial_expiration = db.DateTimeProperty()
date_created = db.DateTimeProperty(auto_now_add=True, indexed=True)
last_modified = db.DateTimeProperty(auto_now=True)
class Domain(db.Expando, ExpandoEntity, SocialIconsEntity):
"""
Domain Model
"""
domain = db.StringProperty(required=True)
...
_state_data = db.ReferenceProperty(DomainStateData)
#property
def state_data(self):
try:
if not self._state_data:
# try to get it, if not, build it
sd = DomainStateData.get_by_key_name(self.key().name())
if not sd:
sd = DomainStateData(key_name=self.key().name()).put()
self._state_data = sd
self.put()
return self._state_data
else:
return self._state_data
except ReferencePropertyResolveError:
self._state_data = DomainStateData(key_name=self.key().name()).put()
self.put()
return self._state_data
I wrote a code which exports 100 domains to CSV (it takes 5 seconds), but if I try to fetch all the 5,000 domains I get a timeout, which is 60 seconds. Is it possible to fetch all the DomainStateData objects together without a timeout? Here is my code that exports the domains to CSV:
import sys
sys.path.insert(0, 'libs')
import webapp2
import datetime
import csv
from models import Domain
class ExportAllDomainsToCsvHandler(webapp2.RequestHandler):
def get(self):
self.response.headers['Content-Type'] = 'text/csv'
self.response.headers['Content-Disposition'] = 'attachment; filename="All Domains [{0}].csv"'.format(str(datetime.date.today()))
writer = csv.writer(self.response.out)
writer.writerow(["Domain", "Current state", "Plan expiration date", "Trial expiration date", "Current oauth user"])
all_domains = Domain.all().fetch(100)
all_domains.sort(key=lambda domain: (0 if domain.state_data.plan_expiration is None else 1, domain.state_data.plan_expiration, 0 if domain.state_data.trial_expiration is None else 1, domain.state_data.trial_expiration, domain.domain))
for domain in all_domains:
if (domain.state_data.plan_expiration is None):
domain_plan_expiration = "No plan expiration date"
else:
domain_plan_expiration = domain.state_data.plan_expiration.strftime('%Y-%m-%d')
if (domain.state_data.trial_expiration is None):
domain_trial_expiration = "No trial expiration date"
else:
domain_trial_expiration = domain.state_data.trial_expiration.strftime('%Y-%m-%d')
writer.writerow([domain.domain, domain.cur_state.name, domain_plan_expiration, domain_trial_expiration, domain.admin])
app = webapp2.WSGIApplication([
("/csv/export_all_domains_to_csv", ExportAllDomainsToCsvHandler)
], debug=True)
OK, I found a solution. I fetched all the DomainStateData objects directly from the database and now it takes 35 seconds to create the CSV with all the domains. Here is my code, I didn't change the models:
import sys
sys.path.insert(0, 'libs')
import webapp2
import datetime
import csv
from models import DomainStateData, Domain
class ExportAllDomainsToCsvHandler(webapp2.RequestHandler):
def get(self):
self.response.headers['Content-Type'] = 'text/csv'
self.response.headers['Content-Disposition'] = 'attachment; filename="All Domains [{0}].csv"'.format(str(datetime.date.today()))
writer = csv.writer(self.response.out)
writer.writerow(["Domain", "Current state", "Plan expiration date", "Trial expiration date", "Current oauth user"])
all_domain_state_data_dict = dict()
all_domain_state_data = DomainStateData.all().fetch(1000000)
all_domains = Domain.all().fetch(1000000)
for domain_state_data in all_domain_state_data:
all_domain_state_data_dict[domain_state_data.key().name()] = domain_state_data
for domain in all_domains:
if (domain.key().name() in all_domain_state_data_dict):
domain.__state_data = all_domain_state_data_dict[domain.key().name()]
all_domains.sort(key=lambda domain: (0 if domain.__state_data.plan_expiration is None else 1, domain.__state_data.plan_expiration, 0 if domain.__state_data.trial_expiration is None else 1, domain.__state_data.trial_expiration, domain.domain))
for domain in all_domains:
if (domain.__state_data.plan_expiration is None):
domain_plan_expiration = "No plan expiration date"
else:
domain_plan_expiration = domain.__state_data.plan_expiration.strftime('%Y-%m-%d')
if (domain.__state_data.trial_expiration is None):
domain_trial_expiration = "No trial expiration date"
else:
domain_trial_expiration = domain.__state_data.trial_expiration.strftime('%Y-%m-%d')
writer.writerow([domain.domain, domain.cur_state.name, domain_plan_expiration, domain_trial_expiration, domain.admin])
app = webapp2.WSGIApplication([
("/csv/export_all_domains_to_csv", ExportAllDomainsToCsvHandler)
], debug=True)
I trying to save entry in mongodb and get id. Then I want to find this entry in thread. But sometimes I can't do it.
import pymongo
import bson
import threading
connection = pymongo.Connection("localhost", 27017)
db = connection.test
def set_cache(db):
cache_id = db.test_collection.save({'test': 'some string'})
return cache_id
def get_cache(db, cache_id):
entry = db.test_collection.find_one({'_id' : bson.objectid.ObjectId(cache_id)})
if not entry:
print('No entry for %s' % cache_id)
return entry
i = 0
while 1:
i += 1
cache_id = set_cache(db)
t = threading.Thread(target=get_cache, args=(db, cache_id))
t.start()
t.join()
if i > 10000:
break
So, somethimes I see 'No entry for ...'. But I can see this entry in mongo.
python2.6
mongo 2.0.6
The problem with your implementation is that you are using unacknowledged writes with the default usage of pymongo.Connection . By using this you can get into situations that the writes are not confirmed in memory but you receive the confirmation in the client. If you are faster processing the response and emitting the find request you will get into situations like this one. You are basically being too fast :)
Now if you use an acknowledge write concern w:1 or by just using using the new pymongo.MongoClient class (which I encourage you to do so) you won't get into that situation:
import pymongo
import bson
import threading
connection = pymongo.MongoClient("localhost", 27017)
db = connection.test
def set_cache(db):
cache_id = db.test_collection.save({'test': 'some string'})
return cache_id
def get_cache(db, cache_id):
entry = db.test_collection.find_one({'_id' : bson.objectid.ObjectId(cache_id)})
if not entry:
print('No entry for %s' % cache_id)
return entry
i = 0
while 1:
i += 1
cache_id = set_cache(db)
t = threading.Thread(target=get_cache, args=(db, cache_id))
t.start()
t.join()
if i > 10000:
break
N.