I trying to save entry in mongodb and get id. Then I want to find this entry in thread. But sometimes I can't do it.
import pymongo
import bson
import threading
connection = pymongo.Connection("localhost", 27017)
db = connection.test
def set_cache(db):
cache_id = db.test_collection.save({'test': 'some string'})
return cache_id
def get_cache(db, cache_id):
entry = db.test_collection.find_one({'_id' : bson.objectid.ObjectId(cache_id)})
if not entry:
print('No entry for %s' % cache_id)
return entry
i = 0
while 1:
i += 1
cache_id = set_cache(db)
t = threading.Thread(target=get_cache, args=(db, cache_id))
t.start()
t.join()
if i > 10000:
break
So, somethimes I see 'No entry for ...'. But I can see this entry in mongo.
python2.6
mongo 2.0.6
The problem with your implementation is that you are using unacknowledged writes with the default usage of pymongo.Connection . By using this you can get into situations that the writes are not confirmed in memory but you receive the confirmation in the client. If you are faster processing the response and emitting the find request you will get into situations like this one. You are basically being too fast :)
Now if you use an acknowledge write concern w:1 or by just using using the new pymongo.MongoClient class (which I encourage you to do so) you won't get into that situation:
import pymongo
import bson
import threading
connection = pymongo.MongoClient("localhost", 27017)
db = connection.test
def set_cache(db):
cache_id = db.test_collection.save({'test': 'some string'})
return cache_id
def get_cache(db, cache_id):
entry = db.test_collection.find_one({'_id' : bson.objectid.ObjectId(cache_id)})
if not entry:
print('No entry for %s' % cache_id)
return entry
i = 0
while 1:
i += 1
cache_id = set_cache(db)
t = threading.Thread(target=get_cache, args=(db, cache_id))
t.start()
t.join()
if i > 10000:
break
N.
Related
I am new to Redis and Redisearch.
I want to create an autocomplete using redis in flask app.
Below is what I have tried so far,
autocomplete.py:
import redis
import redisearch
from flask import Flask,request,jsonify,render_template
app = Flask("autocomplete")
#creating a redis connection
r = redis.Redis(host='localhost', port=6379,db=0)
#app.route('/')
def home():
return "This is Home Page"
#route to add a value to autocomplete list
#app.route('/add')
def addValue():
try:
name = request.args.get('name')
n = name.strip()
for l in range(1,len(n)):
prefix = n[0:l]
r.zadd('compl',{prefix:0})
r.zadd('compl',{n+"*":0})
return "Success"
except:
return "Failed"
#route to get the autocomplete
#app.route('/autocomplete')
def autocomplete():
prefix = request.args.get('prefix')
results = []
rangelen = 50
count=5
start = r.zrank('compl',prefix)
if not start:
return []
while (len(results) != count):
range = r.zrange('compl',start,start+rangelen-1)
start += rangelen
if not range or len(range) == 0:
break
for entry in range:
entry=entry.decode('utf-8')
minlen = min(len(entry),len(prefix))
if entry[0:minlen] != prefix[0:minlen]:
count = len(results)
break
if entry[-1] == "*" and len(results) != count:
results.append(entry[0:-1])
return jsonify(results)
Currently the values for #app.route('/add') and prefixes for #app.route('/autocomplete') is fetched through the URL itself.
However, I want the prefixes/text for #app.route('/autocomplete') to be fetched through an input textbox to create dynamic autocomplete.
I would be really grateful if anyone could guide me in implementing the same.
This is a sample output:
autocomplete
I have also referred to https://redis.com/ebook/part-2-core-concepts/chapter-6-application-components-in-redis/6-1-autocomplete/ but was unable to understand on how to implement it
EDIT : I found a solution for this at https://github.com/RediSearch/redisearch-py/blob/master/redisearch/auto_complete.py
You can use redisearch's autocompleter.
Example using flask is available on Github https://github.com/Redislabs-Solution-Architects/redisearch_demo_and_preso
I am using python 3.9 with IMAPlib in order to retrieve emails and scrape links from them. It works fine but can become quite slow for large amounts of emails (I'm doing ~40,000). In order to speed it up I'd like to use some concurrency I can get all the emails at once.
To do this I get the IDs of all the emails beforehand then assign each ID to a task in my pool. I close the previously used impalib connection before scrape_link_mp() is called. I have tried to use a lock and a manager lock but I still get the same error.
Am I missing something fundamental here? Let me know if anything else needs to be explained, thanks.
My code looks like this:
def scrape_link_mp(self):
self.file_counter = 0
self.login_session.logout()
self.Manager = multiprocessing.Manager()
self.lock = self.Manager.Lock()
futures = []
with concurrent.futures.ProcessPoolExecutor() as Executor:
for self.num_message in self.arr_of_emails[self.start_index:]:
task_params = self.current_user,self.current_password,self.counter,self.imap_url,self.num_message,self.substring_filter,self.link_regex,self.lock
futures.append(
Executor.submit(
self.scrape_link_from_email_single,
*task_params
)
)
for future in concurrent.futures.as_completed(futures):
self.counter+=1
self.timestamp = time.strftime('%H:%M:%S')
print(f'[{self.timestamp}] DONE: {self.counter}/{len(self.num_mails)}')
print(future.result())
def scrape_link_from_email_single(self,current_user,current_password,counter,imap_url,num_message,substring_filter,link_regex,lock):
login_session_mp.logout()
current_user_mp = self.current_user
current_password_mp = self.current_password
self.lock.acquire()
login_session_mp = imaplib.IMAP4_SSL(self.imap_url,993)
login_session_mp.login(current_user_mp,current_password_mp)
self.search_mail_status, self.amount_matching_criteria = login_session_mp.search(Mail.CHARSET,search_criteria)
_,individual_response_data = login_session_mp.fetch(self.num_message,'(RFC822)')
self.lock().release
raw = email.message_from_bytes(individual_response_data[0][1])
scraped_email_value = str(email.message_from_bytes(Mail.scrape_email(raw)))
print(scraped_email_value)
returned_links = str(link_regex.findall(scraped_email_value))
for i in returned_links:
if substring_filter:
self.lock.acquire()
with open('out.txt','a+') as link_file:
link_file.write(i +'\n')
link_file.close()
self.lock.release()
I'm struggling to get a Lambda function working. I have a python script to access twitter API, pull information, and export that information into an excel sheet. I'm trying to transfer python script over to AWS/Lambda, and I'm having a lot of trouble.
What I've done so far: Created AWS account, setup S3 to have a bucket, and poked around trying to get things to work.
I think the main area I'm struggling is how to go from a python script that I'm executing via local CLI and transforming that code into lambda-capable code. I'm not sure I understand how the lambda_handler function works, what the event or context arguments actually mean (despite watching a half dozen different tutorial videos), or how to integrate my existing functions into Lambda in the context of the lambda_handler, and I'm just very confused and hoping someone might be able to help me get some clarity!
Code that I'm using to pull twitter data (just a sample):
import time
import datetime
import keys
import pandas as pd
from twython import Twython, TwythonError
import pymysql
def lambda_handler(event, context):
def oauth_authenticate():
twitter_oauth = Twython(keys.APP_KEY, keys.APP_SECRET, oauth_version=2)
ACCESS_TOKEN = twitter_oauth.obtain_access_token()
twitter = Twython(keys.APP_KEY, access_token = ACCESS_TOKEN)
return twitter
def get_username():
"""
Prompts for the screen name of targetted account
"""
username = input("Enter the Twitter screenname you'd like information on. Do not include '#':")
return username
def get_user_followers(username):
"""
Returns data on all accounts following the targetted user.
WARNING: The number of followers can be huge, and the data isn't very valuable
"""
#username = get_username()
#import pdb; pdb.set_trace()
twitter = oauth_authenticate()
datestamp = str(datetime.datetime.now().strftime("%Y-%m-%d"))
target = twitter.lookup_user(screen_name = username)
for y in target:
target_id = y['id_str']
next_cursor = -1
index = 0
followersdata = {}
while next_cursor:
try:
get_followers = twitter.get_followers_list(screen_name = username,
count = 200,
cursor = next_cursor)
for x in get_followers['users']:
followersdata[index] = {}
followersdata[index]['screen_name'] = x['screen_name']
followersdata[index]['id_str'] = x['id_str']
followersdata[index]['name'] = x['name']
followersdata[index]['description'] = x['description']
followersdata[index]['date_checked'] = datestamp
followersdata[index]['targeted_account_id'] = target_id
index = index + 1
next_cursor = get_followers["next_cursor"]
except TwythonError as e:
print(e)
remainder = (float(twitter.get_lastfunction_header(header = 'x-rate-limit-reset')) \
- time.time())+1
print("Rate limit exceeded. Waiting for:", remainder/60, "minutes")
print("Current Time is:", time.strftime("%I:%M:%S"))
del twitter
time.sleep(remainder)
twitter = oauth_authenticate()
continue
followersDF = pd.DataFrame.from_dict(followersdata, orient = "index")
followersDF.to_excel("%s-%s-follower list.xlsx" % (username, datestamp),
index = False, encoding = 'utf-8')
I know That it is not possible to pickle a pyramid request object, but I cant seem to find where I am sending the Request object.
Consider the following:
#task
def do_consignment_task(store, agent):
print "GOTHERE IN TASK"
s = sqlahelper.get_session()
consign = store.gen_consignment()
ca = Agents.by_id(store.consignment_agents_id)
consign.consignment_agents_id = ca.id
consign.consignment_teamleader_id = ca.ou[0].lead_agents_id
consign.consignment_timestamp = func.now()
consign.created_by_agent_id = agent.id
consign.complete_stamp = func.now()
consign.sims = store.sims
consign.status = "SUCCESS"
print "GOT BEFORE LOOP "
for sim in store.sims:
if sim in consign.sims:
continue
else:
consign.sims.append(sim)
s.add(consign)
transaction.savepoint()
print "GOT AFTER SAVEPOINT"
for sim in consign.sims:
is_reconsign = sim.consignment_agent or sim.consignment_teamlead
if is_reconsign:
if not sim.consignment_history:
sim.consignment_history = []
sim.consignment_history.append(dict(
stamp=sim.consignment_timestamp,
consignment_agent_id=sim.consignment_agents_id,
consignment_teamleader_id=sim.consignment_teamleader_id,
by_agent_id=agent.id
))
s.query(
Sims
).filter(
Sims.iccid == sim.iccid
).update(
{
"consignment_agents_id": consign.consignment_agents_id,
"consignment_history": sim.consignment_history,
"consignment_teamleader_id": ca.ou[0].lead_agents_id,
"consignment_timestamp": func.now(),
"modify_stamp": func.now(),
"consignments_id": consign.id
},
synchronize_session=False
)
print "GOT BEFORE COMMIT"
transaction.savepoint()
print "THIS IS THE ID ID ID ID ID ID : ", consign.id
I call this function like:
if self.store.finalise:
try:
store = self.store
agent = self.agent
do_consignment_task.delay(store, agent)
transaction.commit()
self.check_and_purge()
return "Consignmnet is being processed"
except Exception, exc:
self.check_and_purge()
self.log.exception(exc)
exc_error = "CONSIGNERR:", exc.message
raise USSDFailure(exc_error)
else:
self.store.status = "CANCELLED"
if "fullconfirm" in self.session:
del self.session["fullconfirm"]
self.check_and_purge()
return "CONSIGNMENT Cancelled"
When I run this code I get the following error:
EncodeError: Can't pickle <class 'pyramid.util.Request'>: attribute lookup pyramid.util.Request failed
I am not sending self or request objects - at least not that I can see.
How can solve this problem? Am I sending a request object, because I can not see one?
The traceback can be seen here
EDIT:
okay So I have tried to change the data I send to the function - I am not passing a sqlalchemy object and I am making a copy of the store object, that changes my code to:
#task
def do_consignment_task(agent_id, **store):
print "GOTHERE IN TASK"
s = sqlahelper.get_session()
cObj = USSDConsignmentsObject()
consign = cObj.gen_consignment()
ca = Agents.by_id(store.consignment_agents_id)
consign.consignment_agents_id = ca.id
consign.consignment_teamleader_id = ca.ou[0].lead_agents_id
consign.consignment_timestamp = func.now()
consign.created_by_agent_id = agent_id
# etc
and:
if self.store.finalise:
try:
# del self.service
store = self.store.__dict__.copy()
agent_id = self.agent.id
print store
print agent_id
# print help(store)
do_consignment_task.delay(agent_id, **store)
transaction.commit()
#etc
This however still gives me the same error :|
Try not to serialise a Pyramid request object. When you interact with a celery task you should think of it as an independent process.
Provide it all the information it needs to do it's work. Be aware that you need to serialise that information.
So self.store possibly contains attribute references that may be unrealistic to serialise.
Perhaps create a method on the store object that returns a clean dictionary object.
def serialize(self):
data = {}
data["element1"] = self.element1
data["element2"] = self.element2
data["element3"] = self.element3
return data
Then when you want to call the delay method make sure to use store.serialize() instead of store or the dict.
i m new to gae and python too, i m trying to build simple app using datastore in which script is as follows
from google.appengine.ext import db
from google.appengine.ext import webapp
from google.appengine.ext.webapp import util
class Pincodes(db.Model):
city = db.StringProperty()
code = db.StringProperty()
class MainHandler(webapp.RequestHandler):
def get(self):
q = Pincodes.all()
q = q.filter("city =", "some_city")
p = q.get()
r = 'city: %s code: %s' % (pincode.city, pincode.code)
self.response.out.write(r)
my script also contain usual def main() and if__name, im developing it step by step from simple hello world app shown in code.google docs and it worked fine, i hav uploaded sample pincode data containing 10 records to local datastore and its fine too but im not able to query and display it on webpage i tried self.response.out.write and the output is "city: code: " and not "city: mumbai code:400001" whats wrong with my script
use
entity = q.get() # use get if you want one entity only
r = 'city: %s code: %s' %(entity.city, entity.code)
self.response.out.write(r)
instead of the print
edit:
def get(self):
q = Pincodes.all()
q = q.filter("city =", "some_city")
entity = q.get() # use get if you want one entity only
r = 'city: %s code: %s' %(entity.city, entity.code)
self.response.out.write(r)
edit2:
def get(self):
q = Pincodes.all()
q = q.filter("city =", "some_city")
entity = q.get() # use get if you want one entity only
if not entity:
self.response.out.write('sorry no entities found')
else:
r = 'city: %s code: %s' %(entity.city, entity.code)
self.response.out.write(r)