Import settings.py Scrapy - python

I've got a testmultiple folder which contains an __init__.py file, pipelines, settings, and a core.py file which I use to launch several spiders located in a subfolder (spiders). I noticed that I had to import settings to use pipeline with CrawlerProcess. Here is my code:
import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.settings import Settings
import settings as my_settings
from spiders.DemoSpider import DemoSpider
from spiders.DemoSpider2 import DemoSpider2
crawler_settings = Settings()
crawler_settings.setmodule(my_settings)
process = CrawlerProcess(settings=crawler_settings)
process.crawl(DemoSpider)
process.crawl(DemoSpider2)
process.start() # the script will block here until the crawling is finished
But it fails on the 4th line. With this try I have :
ModuleNotFoundError: No module named 'testmultiple'
When I try :
from testmultiple.settings import settings as my_settings
I have same error, and with this line as well:
from testmultiple import settings as my_settings
How to import settings.py ?

Related

Unknown parent package

Looked at all similar discussions, but I can't have clue.
I have the following structure
ava folder
main.py
utils folder
__init__.py
collect_env.py
distribution_env.py
gradcam_utils.py
logger.py
misc.py
module_hooks.py
precise_bn.py
setup_env.py
__init__.py has
from .collect_env import collect_env
from .distribution_env import build_ddp, build_dp, default_device
from .gradcam_utils import GradCAM
from .logger import get_root_logger
from .misc import get_random_string, get_shm_dir, get_thread_id
from .module_hooks import register_module_hooks
from .precise_bn import PreciseBNHook
from .setup_env import setup_multi_processes
__all__ = [
'get_root_logger', 'collect_env', 'get_random_string', 'get_thread_id',
'get_shm_dir', 'GradCAM', 'PreciseBNHook', 'register_module_hooks',
'setup_multi_processes', 'build_ddp', 'build_dp', 'default_device'
]
In main.py, tried to import
from ..utils import (collect_env, get_root_logger, register_module_hooks, setup_multi_processes)
Then the error is
ImportError: attempted relative import with no known parent package

Is there a way to manage huge many import statements in Python?

I am working on a Flask-RESTFul API where I have so many import statements in my app.py file as below.
from flask import Flask
from flask_restful import Api
from apis.ChambAvail import ChambAvail
from apis.ChambAvailBal import ChambAvailBal
from apis.ChambTwInhibit import ChambTwInhibit
from apis.CurrentWip import CurrentWip
from apis.Detail import Detail
from apis.IncomingWip import IncomingWip
from apis.Info import Info
from apis.Manage import Manage
from apis.MfCount import MfCount
from apis.OtherHold import OtherHold
from apis.PqeDue import PqeDue
from apis.QualInhibit import QualInhibit
from apis.ReleaseBef import ReleaseBef
from apis.RfInhibit import RfInhibit
from apis.SketchDetailedLot import SketchDetailedLot
from apis.SketchLotDetail import SketchLotDetail
from apis.SketchMainDetail import SketchMainDetail
from apis.SketchMainRC import SketchMainRC
from apis.SketchDesignInfo import SketchDesignInfo
from apis.SketchOneDetail import SketchOneDetail
from apis.SketchOneLotDetail import SketchOneLotDetail
from apis.SketchOneMain import SketchOneMain
from apis.SketchOneNearestLot import SketchOneNearestLot
from apis.SketchTimeList import SketchTimeList
from apis.SketchWsgList import SketchWsgList
from apis.TakeEquipDetail import TakeEquipDetail
from apis.TakeEquipLotDetail import TakeEquipLotDetail
from apis.TakeEquipMain import TakeEquipMain
from apis.TakeOneDetail import TakeOneDetail
from apis.TakeOneEquipData import TakeOneEquipData
from apis.TakeOneMain import TakeOneMain
from apis.TakeOneRADetail import TakeOneRADetail
from apis.UpdBufInputCapacity import UpdBufInputCapacity
from apis.UpdEohInputCapacity import UpdEohInputCapacity
from dbmanager import datasource
from logmanager.setlogger import logger
from utils import config_reader
env = 'test'
app = Flask(__name__)
api = Api(app)
api.add_resource(SketchMainRC, '/SketchMainrc')
api.add_resource(SketchDesignInfo, '/SketchDesigninfo')
api.add_resource(SketchWsgList, '/Sketchwsglist')
api.add_resource(SketchMainDetail, '/Sketchmaindetail')
api.add_resource(SketchLotDetail, '/Sketchlotdetail')
api.add_resource(SketchOneMain, '/SketchOneMain')
api.add_resource(SketchOneDetail, '/SketchOnedetail')
api.add_resource(SketchOneLotDetail, '/SketchOnelotdetail')
api.add_resource(SketchOneNearestLot, '/SketchOnenearestlot')
api.add_resource(SketchDetailedLot, '/Sketchdetailedlot')
api.add_resource(SketchTimeList, '/Sketchtimelist')
api.add_resource(TakeEquipMain, '/TakeequipMain')
api.add_resource(TakeEquipDetail, '/Takeequipdetail')
api.add_resource(TakeEquipLotDetail, '/Takeequiplotdetail')
api.add_resource(TakeOneMain, '/TakeOneMain')
api.add_resource(TakeOneDetail, '/TakeOnedetail')
api.add_resource(TakeOneRADetail, '/TakeOneradetail')
api.add_resource(TakeOneEquipData, '/TakeOneequipData')
api.add_resource(ChambAvail, '/chambavail')
api.add_resource(ChambAvailBal, '/chambavailbal')
api.add_resource(ChambTwInhibit, '/chambtwinhibit')
api.add_resource(CurrentWip, '/currentwip')
api.add_resource(IncomingWip, '/incomingwip')
api.add_resource(Detail, '/detail')
api.add_resource(Manage, '/manage')
api.add_resource(MfCount, '/mfcount')
api.add_resource(OtherHold, '/otherhold')
api.add_resource(PqeDue, '/pqedue')
api.add_resource(QualInhibit, '/qualinhibit')
api.add_resource(ReleaseBef, '/releasebef')
api.add_resource(RfInhibit, '/rfinhibit')
api.add_resource(Info, '/info')
api.add_resource(InputCapacity, '/inputcapacity')
api.add_resource(InputCapacity, '/inputcapacity')
app.ls_sf_db_env = 'test'
app.ls_config = config_reader.get_config('test')
# Initialise the connection pool
datasource.initial_all_fab_engine(app)
if __name__ == '__main__':
logger.info('Starting the server')
app.run(port=5000, debug=True)
Below is my project structure.
These are just some of the APIs I completed & there are 100 more. I am new to Python, particularly to Flask-RESTFul API Development. I have separate classes for each API, I thought code looks cleaner that way. But in my app.py file, these import statements are piling up. Is there a better way to add/manage these import statements & also the add.resource() statements in a better way in my app.py file ? Could anyone let me know if there is a way I can import all these statements differently & keep my app.py cleaner ?
Any help is appreciated.
I'd do some path-based magic.
The following snippet will list all .py files in the apis/ subdir, import them as modules, extract a class that is named same as the file, and register it under /ClassName endpoint.
from importlib import import_module
from pathlib import Path
HERE = Path(__file__).parent
for api_class_file in HERE.glob("apis/*.py"):
# apis/ChambAvail.py -> ChambAvail
api_class_name = api_class_file.stem
if api_class_name.startswith("__"):
# skip special files such as __init__ and __main__
continue
# equivalent to "import apis.ChambAvail"
api_module = import_module(f"apis.{api_class_name}")
# equivalent to "apis.ChambAvail.ChambAvail
api_class = getattr(api_module, api_class_name)
api.add_resource(api_class, f"/{api_class_name}")

Can't get python to import nba_py that's in a different folder

I am using Python2.7 and Google App Engine for this project. I am a huge NBA fan and I want to use nba_py api to retrieve additional information to be display on my website but can't seem to import it correctly. I am trying to import it inside my blogfront.py but when I refresh the browser I get an error. Here's my directory structure.
multi-user-blog
- libraries
- nba_py
-nba_py
- __init__.py
- handlers
-blogfront.py
- __init__.py
- mainblog.py
Here's my blogfront.py
from bloghandler import BlogHandler
from models.post import Post
from libraries.nba_py import nba_py
# Render Home Page
class BlogFront(BlogHandler):
def get(self):
# type: () -> object
posts = Post.all().order('-created')
title = "Blog/Scores"
standing = get_standing()
self.render('front.html', posts=posts, title=title)
def get_standing():
scoreboard = nba_py.Scoreboard()
print scoreboard.west_conf_standings_by_day()
Here's my mainblog.py
import sys
import os
import re
import random
import hashlib
import hmac
import webapp2
import jinja2
import time
from handlers.bloghandler import BlogHandler
from handlers.blogfront import BlogFront
from handlers.deletecomment import DeleteComment
from handlers.deletepost import DeletePost
from handlers.likepost import Likes
from handlers.login import Login
from handlers.logout import Logout
from handlers.addcomment import AddComment
from handlers.newpost import NewPost
from handlers.postpage import PostPage
from handlers.signup import Signup
from handlers.signup import Register
from handlers.editcomment import EditComment
from handlers.editpost import EditPost
from models.comment import Comment
from models.user import User
from models.post import Post
from helpers import *
from string import letters
from google.appengine.ext import db
app = webapp2.WSGIApplication([('/', BlogFront),
('/blog/?', BlogFront),
('/postpage/([0-9]+)', PostPage),
('/edit/([0-9]+)', EditPost),
('/delete/([0-9]+)', DeletePost),
('/addcomment/([0-9]+)', AddComment),
('/blog/([0-9]+)/editcomment/([0-9]+)',
EditComment),
('/blog/([0-9]+)/deletecomment/([0-9]+)',
DeleteComment),
('/blog/like/([0-9]+)', Likes),
('/blog/newpost', NewPost),
('/signup', Register),
('/login', Login),
('/logout', Logout),
],
debug=True)
The error is: ImportError: No module named libraries.nba_py
Do you have a __init__.py at every step in the directory chain? That always gets me
multi-user-blog
- libraries
- __init__.py <-------
- nba_py
- __init__.py <-------
-nba_py
- __init__.py
How did you install nba_py?
If you install it with pip, you can just do import nba_py.
There is no need for your libraries structure.
From what you described, it's not clear whether you've followed the advice in https://cloud.google.com/appengine/docs/standard/python/tools/using-libraries-python-27 and installed nba_py into your project using using pip install -t, and also added an appengine_config.py to do a vendor.add(). Without those steps, your app will be sad.
Also, since nba_py wants to use pandas, you'll want to also pip install -t it, and since pandas wants numpy, you'll need the following in your app.yaml:
libraries:
- name: numpy
version: "1.6.1"

Running scrapy from python script

ive been trying to run scrapy from a python script file because i need to get the data and save it into my db. but when i run it with scrapy command
scrapy crawl argos
the script runs fine
but when im trying to run it with a script, following this link
http://doc.scrapy.org/en/latest/topics/practices.html#run-scrapy-from-a-script
i get this error
$ python pricewatch/pricewatch.py update
Traceback (most recent call last):
File "pricewatch/pricewatch.py", line 39, in <module>
main()
File "pricewatch/pricewatch.py", line 31, in main
update()
File "pricewatch/pricewatch.py", line 24, in update
setup_crawler("argos.co.uk")
File "pricewatch/pricewatch.py", line 13, in setup_crawler
settings = get_project_settings()
File "/Library/Python/2.7/site-packages/Scrapy-0.22.2-py2.7.egg/scrapy/utils/project.py", line 58, in get_project_settings
settings_module = import_module(settings_module_path)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/importlib/__init__.py", line 37, in import_module
__import__(name)
ImportError: No module named settings
i am unable to understand why it doesnt found get_project_setting() but runs fine with scrapy command on terminal
here is the screen shot of my project
here is the pricewatch.py code:
import commands
import sys
from database import DBInstance
from twisted.internet import reactor
from scrapy.crawler import Crawler
from scrapy import log
from spiders.argosspider import ArgosSpider
from scrapy.utils.project import get_project_settings
import settings
def setup_crawler(domain):
spider = ArgosSpider(domain=domain)
settings = get_project_settings()
crawler = Crawler(settings)
crawler.configure()
crawler.crawl(spider)
crawler.start()
def update():
#print "Enter a product to update:"
#product = raw_input()
#print product
#db = DBInstance()
setup_crawler("argos.co.uk")
log.start()
reactor.run()
def main():
try:
if sys.argv[1] == "update":
update()
elif sys.argv[1] == "database":
#db = DBInstance()
except IndexError:
print "You must select a command from Update, Search, History"
if __name__ =='__main__':
main()
i have fixed it
just need to put pricewatch.py to project's top level directory and then running it solved it
This answer is heavily copied from this answer which I believe answers your question and additionally provides a descent example.
Consider a project with the following structure.
my_project/
main.py # Where we are running scrapy from
scraper/
run_scraper.py #Call from main goes here
scrapy.cfg # deploy configuration file
scraper/ # project's Python module, you'll import your code from here
__init__.py
items.py # project items definition file
pipelines.py # project pipelines file
settings.py # project settings file
spiders/ # a directory where you'll later put your spiders
__init__.py
quotes_spider.py # Contains the QuotesSpider class
Basically, the command
scrapy startproject scraper is executed in the my_project folder, I've added a run_scraper.py file to the outer scraper folder, a main.py file to my root folder, and quotes_spider.py to the spiders folder.
My main file:
from scraper.run_scraper import Scraper
scraper = Scraper()
scraper.run_spiders()
My run_scraper.py file:
from scraper.scraper.spiders.quotes_spider import QuotesSpider
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
import os
class Scraper:
def __init__(self):
settings_file_path = 'scraper.scraper.settings' # The path seen from root, ie. from main.py
os.environ.setdefault('SCRAPY_SETTINGS_MODULE', settings_file_path)
self.process = CrawlerProcess(get_project_settings())
self.spiders = QuotesSpider # The spider you want to crawl
def run_spiders(self):
self.process.crawl(self.spider)
self.process.start() # the script will block here until the crawling is finished
Also, note that the settings might require a look-over, since the path needs to be according to the root folder (my_project, not scraper).
So in my case:
SPIDER_MODULES = ['scraper.scraper.spiders']
NEWSPIDER_MODULE = 'scraper.scraper.spiders'
etc...

ImportError and Django driving me crazy

OK, I have the following directory structure (it's a django project):
-> project
--> app
and within the app folder, there is a scraper.py file which needs to reference a class defined within models.py
I'm trying to do the following:
import urllib2
import os
import sys
import time
import datetime
import re
import BeautifulSoup
sys.path.append('/home/userspace/Development/')
os.environ['DJANGO_SETTINGS_MODULE'] = 'project.settings'
from project.app.models import ClassName
and this code just isn't working. I get an error of:
Traceback (most recent call last):
File "scraper.py", line 14, in
from project.app.models import ClassName
ImportError: No module named project.app.models
This code above used to work, but broke somewhere along the line and I'm extremely confused as to why I'm having problems. On SnowLeopard using python2.5.
import sys
sys.path.append ('/path/to/the/project')
from django.core.management import setup_environ
import settings
setup_environ(settings)
from app.models import MyModel
Whoa whoa whoa. You should never ever have to put your project name in any of your app code. You should be able to reuse app code across multiple projects with no changes. Pinax does this really well and I highly recommend checking it out for a lot of django best practices.
The worst thing you could do here is to hard code your absolute path into your app or settings. You shouldn't do this because it will break during deployment unless you do some import local_settings hacking.
If you have to access the project root directory, try what pinax has in settings.py...
import os.path
PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__))
The thing is that it looks like you are trying to access the models module within the same app and this is waaay easier.
To import models.py inside scraper.py in the same directory just use import models or import models as app_models if you already have something named models in scraper.py (django.db.models for instance). Are you familiar with Python module conventions?
However, the best way is probably to stick with the django idiom, from ... import ... statement:
from app import models
If this doesn't work automatically, then something is wrong in your settings.py.
You don't indicate if project is located in /home/userspace/Development/. I'll assume that it is.
Make sure there's an (empty by default) file named __init__.py in project and another one in app.
EDIT: Next thing to try: Fire up the Python command line in the script's directory and try the following:
import project
import project.app as app
import project.app.models as models
models.__dict__.keys()
Do they all work? If so, what is the last line's output? If not, which dies first?

Categories