I am creating my first spider
I am following and writing code as per in Scrapy tutorial in docs.scrapy
import scrapy
class QuotesSpider(scrapy.Spider):
name = "quotes"
def start_requests(self):
urls = [
'http://quotes.toscrape.com/page/1/',
'http://quotes.toscrape.com/page/2/',
]
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
page = response.url.split("/")[-2]
filename = 'quotes-%s.html' % page
with open(filename, 'wb') as f:
f.write(response.body)
self.log('Saved file %s' % filename)
I wrote the below code and saved in C:\Users\DODAAD\scrapy_goodfriday_A01\scrapy_goodfriday_A01\spiders as quotes_spider.py
I have done as per instruction but this error pops up in cmd
(base) C:\Users\DODAAD\scrapy_goodfriday_A01>scrapy crawl quotes
Traceback (most recent call last):
File "C:\Users\DODAAD\Anaconda3\Scripts\scrapy-script.py", line 10, in <module>
sys.exit(execute())
File "C:\Users\DODAAD\Anaconda3\lib\site-packages\scrapy\cmdline.py", line 144, in execute
cmd.crawler_process = CrawlerProcess(settings)
File "C:\Users\DODAAD\Anaconda3\lib\site-packages\scrapy\crawler.py", line 280, in __init__
super(CrawlerProcess, self).__init__(settings)
File "C:\Users\DODAAD\Anaconda3\lib\site-packages\scrapy\crawler.py", line 152, in __init__
self.spider_loader = self._get_spider_loader(settings)
File "C:\Users\DODAAD\Anaconda3\lib\site-packages\scrapy\crawler.py", line 146, in _get_spider_loader
return loader_cls.from_settings(settings.frozencopy())
File "C:\Users\DODAAD\Anaconda3\lib\site-packages\scrapy\spiderloader.py", line 68, in from_settings
return cls(settings)
File "C:\Users\DODAAD\Anaconda3\lib\site-packages\scrapy\spiderloader.py", line 24, in __init__
self._load_all_spiders()
File "C:\Users\DODAAD\Anaconda3\lib\site-packages\scrapy\spiderloader.py", line 51, in
_load_all_spiders
for module in walk_modules(name):
File "C:\Users\DODAAD\Anaconda3\lib\site-packages\scrapy\utils\misc.py", line 78, in walk_modules
submod = import_module(fullpath)
File "C:\Users\DODAAD\Anaconda3\lib\importlib\__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1006, in _gcd_import
File "<frozen importlib._bootstrap>", line 983, in _find_and_load
File "<frozen importlib._bootstrap>", line 967, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 677, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 728, in exec_module
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "C:\Users\DODAAD\scrapy_goodfriday_A01\scrapy_goodfriday_A01\spiders\quotes_spider-
checkpoint.py",
line 33, in <module>
"execution_count": null,
NameError: name 'null' is not defined
NameError: name 'null' is not defined
I don't understand what meant by name error null
Fix indents for your class:
import scrapy
class QuotesSpider(scrapy.Spider):
name = "quotes"
def start_requests(self):
urls = [
'http://quotes.toscrape.com/page/1/',
'http://quotes.toscrape.com/page/2/',
]
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
page = response.url.split("/")[-2]
filename = 'quotes-%s.html' % page
with open(filename, 'wb') as f:
f.write(response.body)
self.log('Saved file %s' % filename)
Related
i'm learning to automate some web tasks, and i have this code to fill out a form using python scrapy. It takes essentially mongolian cyrrilic sentences and translate it into traditional mongolian script. What i want to do is to read the sentences into a list from a file ("test.txt") and then to get the translation back and print it in the console.
import scrapy
class BichigSpider(scrapy.Spider):
name = "bichig"
allowed_domains = ["http://trans.mglip.com"]
start_urls = ["http://trans.mglip.com/EnglishC2T.aspx"]
def LoadListofSentences(self):
output = []
with open('test.txt', 'r',encoding='utf-8') as f:
for el in f:
output.append(el)
return output
def parse(self, response):
ListeSent = self.LoadListofSentences()
for sent in ListeSent:
formdata = {'inputCyrillic_ID': sent}
yield scrapy.http.FormRequest.from_response(response,
formdata=formdata,
clickdata={'name': 'ButtonTran_ID'},
callback=self.parse1)
def parse1(self, response):
print(response.css('outPutTraditonalM_ID::text').get())
Content of file "test.txt", located in the folder spiders with the init file:
Хоёр мянга арван гурван оны өвлийн цагаан будан татсан гэрэлт өдөр Өвөрхангай аймгийн театрт Монгол найргийн дархан цэц Дэндэвийн Пүрэвдорж гуайн нэрэмжит “Болор цом” наадмыг жулдрайхан би эхлүүлж байлаа.
Үндсэндээ түрүү жилийн эзэн дараа жилийнхээ цомыг нээдэг тэрхүү уламжлалын дагуу 30 жилийнх нь тэгш ойд түрүүлсний хувьд Пүрэвдорж гуайнхаа наадмыг “Өвгөд минь өндрийн салхи болохуйд” шүлгээрээ нээж байсан.
Тэрхүү хувь заяагаа би дандаа сүслэн боддог.
Пүрэвдорж гуай өөрөө санаачилж эхлүүлсэн, анхны түрүүг нь хүртсэн авшигтай эл наадмыг 80 насных нь их ойгоор өөрийнх нь нэрэмжит болон хүмүүн биеийг олсон халуун голомт Өврийн хангай нутагт нь болоход нээнэ гэдэг хувь заяа гэхээс өөр яалтай.
Шүлгээ дуудчихаад, шүлгээ уншихаар гараанаас эргэх гэж буй морьд шиг тогтож ядан байгаа найрагчдынхаа дунд орж ирэхэд омог төгөлдөр байсан даа.
Эрдэнэ-Очир ах минь, Хөөдөө ах минь, дархад Мийгаа ах минь, Лхамсүрэнжавын Ганзул ах минь бүгд шүлгээ унших гээд ирийтэл зогсож байсан сан.
Мөн ч алтанхан сайхан он жил байж шүү.
Наадмын урьд өдөр “Уран үгсийн чуулган”-д Монголын яруу найргийн их оргилууд Бавуугийн Лхагвасүрэн, Тангадын Галсан, Пунцагийн Бадарч нарын тоосон дунд орж цомын эзэн хэмээн Ичинхорлоо найрагчаар зарлуулж байснаа одоо эргээд бодох нь ээ, айх ч шиг.
Их найргийн бурхдын сүрд дарагдсан бахдал дүүрэн он цаг минь.
But i get this error after writing scrapy crawl bichig in the command line:
Traceback (most recent call last):
File "C:\Users\User\anaconda3\envs\Test\Scripts\scrapy-script.py", line 10, in <module>
sys.exit(execute())
File "C:\Users\User\anaconda3\envs\Test\lib\site-packages\scrapy\cmdline.py", line 124, in execute
cmds = _get_commands_dict(settings, inproject)
File "C:\Users\User\anaconda3\envs\Test\lib\site-packages\scrapy\cmdline.py", line 52, in _get_commands_dict
cmds = _get_commands_from_module('scrapy.commands', inproject)
File "C:\Users\User\anaconda3\envs\Test\lib\site-packages\scrapy\cmdline.py", line 33, in _get_commands_from_module
for cmd in _iter_command_classes(module):
File "C:\Users\User\anaconda3\envs\Test\lib\site-packages\scrapy\cmdline.py", line 20, in _iter_command_classes
for module in walk_modules(module_name):
File "C:\Users\User\anaconda3\envs\Test\lib\site-packages\scrapy\utils\misc.py", line 88, in walk_modules
submod = import_module(fullpath)
File "C:\Users\User\anaconda3\envs\Test\lib\importlib\__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1050, in _gcd_import
File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
File "<frozen importlib._bootstrap>", line 1006, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 688, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 883, in exec_module
File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
File "C:\Users\User\anaconda3\envs\Test\lib\site-packages\scrapy\commands\parse.py", line 10, in <module>
from scrapy.utils import display
File "C:\Users\User\anaconda3\envs\Test\lib\site-packages\scrapy\utils\display.py", line 5, in <module>
import ctypes
File "C:\Users\User\anaconda3\envs\Test\lib\ctypes\__init__.py", line 8, in <module>
from _ctypes import Union, Structure, Array
ImportError: DLL load failed while importing _ctypes:the specified module could not be found
Can somebody help me please with this or any other viable solution?
Solved! I needed to reinstall python that's all.
Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 1 year ago.
Improve this question
I already know that this is going to be a trashy question, as per the SO question guidelines, but I have to keep details down to a minimum. I have a traceback, but I don't know why its throwing an error.
Traceback:
Exception in thread django-main-thread:
Traceback (most recent call last):
File "...USER...\threading.py", line 954, in _bootstrap_inner
self.run()
File "...USER...\threading.py", line 892, in run
self._target(*self._args, **self._kwargs)
File "...USER...\site-packages\django\utils\autoreload.py", line 64, in wrapper
fn(*args, **kwargs)
File "...USER...\site-packages\django\core\management\commands\runserver.py", line 118, in inner_run
self.check(display_num_errors=True)
File "...USER...\site-packages\django\core\management\base.py", line 419, in check
all_issues = checks.run_checks(
File "...USER...\site-packages\django\core\checks\registry.py", line 76, in run_checks
new_errors = check(app_configs=app_configs, databases=databases)
File "...USER...\site-packages\django\core\checks\urls.py", line 13, in check_url_config
return check_resolver(resolver)
File "...USER...\site-packages\django\core\checks\urls.py", line 23, in check_resolver
return check_method()
File "...USER...\site-packages\django\urls\resolvers.py", line 412, in check
for pattern in self.url_patterns:
File "...USER...\site-packages\django\utils\functional.py", line 48, in __get__
res = instance.__dict__[self.name] = self.func(instance)
File "...USER...\site-packages\django\urls\resolvers.py", line 598, in url_patterns
patterns = getattr(self.urlconf_module, "urlpatterns", self.urlconf_module)
File "...USER...\site-packages\django\utils\functional.py", line 48, in __get__
res = instance.__dict__[self.name] = self.func(instance)
File "...USER...\site-packages\django\urls\resolvers.py", line 591, in urlconf_module
return import_module(self.urlconf_name)
File "...USER...\importlib\__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1030, in _gcd_import
File "<frozen importlib._bootstrap>", line 1007, in _find_and_load
File "<frozen importlib._bootstrap>", line 986, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 680, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 855, in exec_module
File "<frozen importlib._bootstrap>", line 228, in _call_with_frames_removed
File "...USER...\urls.py", line 20, in <module>
path('django_app/', include('django_app.urls')),
File "...USER...\site-packages\django\urls\conf.py", line 34, in include
urlconf_module = import_module(urlconf_module)
File "...USER...\importlib\__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1030, in _gcd_import
File "<frozen importlib._bootstrap>", line 1007, in _find_and_load
File "<frozen importlib._bootstrap>", line 986, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 680, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 855, in exec_module
File "<frozen importlib._bootstrap>", line 228, in _call_with_frames_removed
File "...USER...\urls.py", line 2, in <module>
from . import views
File "...USER...\views.py", line 185
if t > 0:
^
SyntaxError: invalid syntax
The "syntax error" is being thrown inside of a class that I made inside of my views.py file...
A general, crude idea of the code is:
import os
import sys
import numpy
some other imports
class my_constructor:
def __init__(self):
some stuff
def func(self):
some logic
(t, b, l, r) = self.removed_space
(w, h, th) = self.dimensions
if l > 0 or r > 0:
ppi_w = 0
if t > 0 or b > 0:
ppi_h = 0
if t > 0: #this is line 185 from the last line in the traceback
t = round(t * ppi_h)
if len(array) > 2:
t_a = 0
else:
t_a = 0
ar = 0
m = 0
if b > 0:
b = 0
if len(array) > 2:
b_a = 0
else:
b_a = 0
ar = 0
m = 0
I know a true answer for this likely isn't a reasonable request, but I am desperate and just asking for general ideas as to why this error might actually be occuring. I am using notepad++, so spacing is consistent. I know the syntax for the if statement is right... and apart from that, its got me beat.
if t > 0:
There's nothing wrong with that line. Check the previous line of code for imbalanced parentheses.
I'm having this error while trying to run my Teonite_project/web_scrapper.py script:
File "C:/Users/kfhei/Desktop/Teonite_project/Teonite_project/web_scrapper.py", line 9, in <module>
django.setup()
File "C:\Users\kfhei\Desktop\Teonite_project\env\lib\site-packages\django\__init__.py", line 19, in setup
configure_logging(settings.LOGGING_CONFIG, settings.LOGGING)
File "C:\Users\kfhei\Desktop\Teonite_project\env\lib\site-packages\django\conf\__init__.py", line 56, in __getattr__
self._setup(name)
File "C:\Users\kfhei\Desktop\Teonite_project\env\lib\site-packages\django\conf\__init__.py", line 43, in _setup
self._wrapped = Settings(settings_module)
File "C:\Users\kfhei\Desktop\Teonite_project\env\lib\site-packages\django\conf\__init__.py", line 106, in __init__
mod = importlib.import_module(self.SETTINGS_MODULE)
File "C:\Users\kfhei\Desktop\Teonite_project\env\lib\importlib\__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 994, in _gcd_import
File "<frozen importlib._bootstrap>", line 971, in _find_and_load
File "<frozen importlib._bootstrap>", line 955, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 665, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 678, in exec_module
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "C:\Users\kfhei\Desktop\Teonite_project\Teonite_project\Teonite_project\settings.py", line 15, in <module>
django.setup()
File "C:\Users\kfhei\Desktop\Teonite_project\env\lib\site-packages\django\__init__.py", line 19, in setup
configure_logging(settings.LOGGING_CONFIG, settings.LOGGING)
File "C:\Users\kfhei\Desktop\Teonite_project\env\lib\site-packages\django\conf\__init__.py", line 56, in __getattr__
self._setup(name)
File "C:\Users\kfhei\Desktop\Teonite_project\env\lib\site-packages\django\conf\__init__.py", line 43, in _setup
self._wrapped = Settings(settings_module)
File "C:\Users\kfhei\Desktop\Teonite_project\env\lib\site-packages\django\conf\__init__.py", line 125, in __init__
raise ImproperlyConfigured("The SECRET_KEY setting must not be empty.")
django.core.exceptions.ImproperlyConfigured: The SECRET_KEY setting must not be empty.
My script:
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
#import simplejson as json
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "Teonite_project.settings")
import django
django.setup()
from words.models import Word
from authors.models import Author
URL_DOMAIN = 'https://teonite.com/blog/'
def get_links(url):
'''
Returning the array of links
to blog articles from the website
'''
html = get_html(url)
links = []
for link in html.findAll('a'):
link = link.get('href')
if link == None or link[6:9] == 'tag' or link[6:10]=='page':
pass
elif link[:6] == '/blog/':
link = url[:19] + link
links.append(link)
return links
def get_html(url):
'''
Returning the HTML of the website
'''
req = Request(url)
html_page = urlopen(req)
html = BeautifulSoup(html_page, "html.parser")
return html
def get_text(url):
'''
Extracting the post content of
the articles from the blog
'''
html = get_html(url)
text =''
for content in html.select('.post-content'):
text = text + content.text
return content.text
def get_author(url):
'''
Extracting the name of the Author
from the articles
'''
html = get_html(url)
for author in html.select('.author-content'):
return author.text
if __name__ == '__main__':
'''
Main function tasks:
* Extract the neccessary data from the website,
* Save it to the database
'''
links = get_links(URL_DOMAIN)
author_dict_database = {}
word_dict_database = {}
for link in links:
text = get_text(link).strip()
wordslist = text.split()
author = get_author(link).strip()
for word in wordslist:
if not word.isalpha():
wordslist.remove(word)
word = word.lower()
if author in author_dict_database:
for word in wordslist:
if word in word_dict_database:
author_dict_database[author][word] += 1
else:
author_dict_database[author][word] = 1
else:
for word in wordslist:
if word in word_dict_database:
word_dict_database[word] += 1
else:
word_dict_database[word] = 1
author_dict_database[author] = word_dict_database
#Saving values to postgres database
for key_author,word_dict in author_dict_database:
database_author = Author(author=key_author)
for key_word, value_count in word_dict:
database_word = Word(author=key_author, words_list=key_word, words_count=value_count)
As you can seen I tried different things to make it work. I've read several topics in stackoverflow and tried to search in different websites.
Also I have configured the wsgi.py file
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "Teonite_project.settings")
Unfortunately I have no idea why this happens, because I have set the SECRET_KEY in my Teonite_project/Teonite_project/settings.py
Basically all I want is to run my script, and add the scrapped values to my postgres database in my authors, and words models.
I can not locate this error.
Error is genereted after I added one field in disease that is contact and taggable manager in models
If that is correct than is this error is generated due to Id field that I gave manually
Help to me get out of this!!
personal(myapp)/urls.py
urlpatterns = [
url(r'^.*doctorsu/$', views.doctorsu.as_view(), name = 'doctorsu'),
url(r'^.*disease/$', views.AddDisease.as_view(), name = 'AddDisease'),
]
mysite/urls.py
urlpatterns = [
url(r'^$',include('personal.urls')),
url(r'^taggit/',include('taggit_selectize.urls')),
]
models.py
class DoctorSignup(models.Model):
contact_regex = RegexValidator(regex=r'^[789]\d{9}$',message="Phone number must be start with 7,8 or 9")
doid = models.AutoField(verbose_name='Doctor Id',primary_key=True,default=0)
email = models.CharField(max_length=50)
contact = models.CharField(validators=[contact_regex])
class TaggedSymptoms(TaggedItemBase):
content_object = models.ForeignKey("Disease")
class TaggedMedicine(TaggedItemBase):
content_object = models.ForeignKey("Disease")
class Disease(models.Model):
did = models.AutoField(verbose_name='Disease Id', primary_key=True,default=0)
dName = models.CharField(max_length=20)
dtype = models.CharField(max_length=10)
symptoms = TaggableManager(through=TaggedSymptoms)
symptoms.rel.related_name = "+"
medi = TaggableManager(through=TaggedMedicine)
medi.rel.related_name = "+"
views.py
class doctorsu(TemplateView):
template_name = 'personal/doctorsu.html'
def get(self, request):
dsform = DoctorSignupForm()
data = DoctorSignup.objects.all()
args = {'dsform': dsform,'data': data}
return render(request,self.template_name,args)
def post(self, request):
dsform = DoctorSignupForm(request.POST)
if dsform.is_valid():
dsform.save()
cd = dsform.cleaned_data
args = {'dsform': dsform , 'cd': cd}
return render(request,self.template_name,args)
return render(request, 'personal/doctorsu.html')
class AddDisease(TemplateView):
template_name = 'personal/disease.html'
def get(self, request):
dform = DiseaseForm()
ddata = Disease.objects.all()
args = {'dform': dform,'ddata': ddata}
return render(request,self.template_name,args)
def post(self, request):
dform = DiseaseForm(request.POST)
if dform.is_valid():
dform.save()
cd = dform.cleaned_data
args = {'dform': dform , 'cd': cd}
return render(request,self.template_name,args)
forms.py
class DoctorSignupForm(forms.ModelForm):
dname = forms.CharField()
email = forms.EmailField()
contact = forms.RegexField(regex=r'^[789]\d{9}$',error_messages="Enter valid phone no.")
class Meta:
model = DoctorSignup
fields = "__all__"
class DiseaseForm(ModelForm):
dName = forms.CharField(help_text="Enter disease")
symptoms = TagField(help_text="Enter symptoms separated by comma")
medicine = TagField()
class Meta:
model = Disease
fields = "__all__"
Traceback
Traceback (most recent call last):
File "manage.py", line 22, in <module>
execute_from_command_line(sys.argv)
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\core\management\__init__.py", line 363, in execute_from_command_line
utility.execute()
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\core\management\__init__.py", line 355, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\core\management\base.py", line 283, in run_from_argv
self.execute(*args, **cmd_options)
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\core\management\base.py", line 327, in execute
self.check()
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\core\management\base.py", line 359, in check
include_deployment_checks=include_deployment_checks,
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\core\management\base.py", line 346, in _run_checks
return checks.run_checks(**kwargs)
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\core\checks\registry.py", line 81, in run_checks
new_errors = check(app_configs=app_configs)
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\core\checks\urls.py", line 16, in check_url_config
return check_resolver(resolver)
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\core\checks\urls.py", line 26, in check_resolver
return check_method()
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\urls\resolvers.py", line 254, in check
for pattern in self.url_patterns:
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\utils\functional.py", line 35, in __get__
res = instance.__dict__[self.name] = self.func(instance)
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\urls\resolvers.py", line 405, in url_patterns
patterns = getattr(self.urlconf_module, "urlpatterns", self.urlconf_module)
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\utils\functional.py", line 35, in __get__
res = instance.__dict__[self.name] = self.func(instance)
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\urls\resolvers.py", line 398, in urlconf_module
return import_module(self.urlconf_name)
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\importlib\__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 986, in _gcd_import
File "<frozen importlib._bootstrap>", line 969, in _find_and_load
File "<frozen importlib._bootstrap>", line 958, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 673, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 665, in exec_module
File "<frozen importlib._bootstrap>", line 222, in _call_with_frames_removed
File "E:\IT\project\program\20-8-17\mysite\mysite\urls.py", line 7, in <module>
url(r'^$',include('personal.urls')),
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\conf\urls\__init__.py", line 50, in include
urlconf_module = import_module(urlconf_module)
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\importlib\__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 986, in _gcd_import
File "<frozen importlib._bootstrap>", line 969, in _find_and_load
File "<frozen importlib._bootstrap>", line 958, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 673, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 665, in exec_module
File "<frozen importlib._bootstrap>", line 222, in _call_with_frames_removed
File "E:\IT\project\program\20-8-17\mysite\personal\urls.py", line 2, in <module>
from . import views
File "E:\IT\project\program\20-8-17\mysite\personal\views.py", line 3, in <module>
from personal.forms import *
File "E:\IT\project\program\20-8-17\mysite\personal\forms.py", line 50, in <module>
class DoctorSignupForm(forms.ModelForm):
File "E:\IT\project\program\20-8-17\mysite\personal\forms.py", line 90, in DoctorSignupForm
contact = forms.RegexField(regex=r'^[789]\d{9}$',error_messages="Enter valid phone no.")
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\forms\fields.py", line 517, in __init__
super(RegexField, self).__init__(max_length, min_length, *args, **kwargs)
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\forms\fields.py", line 228, in __init__
super(CharField, self).__init__(*args, **kwargs)
File "C:\Users\Charmi Shah\AppData\Local\Programs\Python\Python35-32\lib\site-packages\django\forms\fields.py", line 122, in __init__
messages.update(error_messages or {})
ValueError: dictionary update sequence element #0 has length 1; 2 is required
error_messages should be a dict, not a string.
See the docs.
This is what the project tree looks like:
rym_chart_scraper
├───scrapy.cfg
├───rym_chart_scraper
│ ├───__init__.py
│ ├───items.py
│ ├───models.py
├───pipelines.py
├───settings.py
├───spiders
├───my_spider.py
├───__init__.py
pipelines.py
from models import TopAlbums, db_connect, create_topalbums_table
from sqlalchemy.orm import sessionmaker
class TopAlbumPipeline:
def __init__(self):
engine = db_connect()
create_topalbums_table(engine)
self.Session = sessionmaker(bind=engine)
def process_item(self, item, spider):
session = self.Session()
topalbums = TopAlbums(**item)
try:
session.add(topalbums)
session.commit()
except:
session.rollback()
raise
finally:
session.close()
return item
models.py
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, DateTime
import settings
Base = declarative_base()
def db_connect():
return create_engine(URL(**settings.DATABASE))
def create_topalbums_table(engine):
Base.metadata.create_all(engine)
class TopAlbums(Base):
__tablename__ = 'top_albums'
id = Column(Integer, primary_key=True)
Artist = Column('Artist', String)
Album = Column('Album', String)
Chart_year = Column('Chart_year', String)
Genre = Column('Genre', String)
Ratings = Column('Ratings', Integer)
Reviews = Column('Reviews', Integer)
Date = Column('Date', DateTime)
And the spider:
from scrapy import Spider, Request
from rym_chart_scraper.utility import find_between, listToString
from rym_chart_scraper.items import TopAlbumChartItem
from datetime import datetime
class TopAlbumChartSpider(Spider):
name = "top_music_charts"
allowed_domains = ['rateyourmusic.com']
start_urls = [
"https://rateyourmusic.com/charts/top/album/all-time"
]
n_pages = 1
def parse(self, response):
for album, stats in zip(response.css('div.chart_main'),
response.css('div.chart_stats')):
...
yield item
next_page = response.css('a.navlinknext')[0].css(
'a::attr(href)').extract_first()
if next_page is not None:
next_page = response.urljoin(next_page)
self.n_pages += 1
if self.n_pages < 31:
yield Request(next_page, callback=self.parse)
When I run the scraper with:
scrapy crawl top_music_charts
I get the following import error.
2016-12-11 17:46:41 [twisted] CRITICAL:
Traceback (most recent call last):
File "/Users/baasman/anaconda/lib/python3.5/site-packages/twisted/internet/defer.py", line 1299, in _inlineCallbacks
result = g.send(result)
File "/Users/baasman/anaconda/lib/python3.5/site-packages/scrapy/crawler.py", line 72, in crawl
self.engine = self._create_engine()
File "/Users/baasman/anaconda/lib/python3.5/site-packages/scrapy/crawler.py", line 97, in _create_engine
return ExecutionEngine(self, lambda _: self.stop())
File "/Users/baasman/anaconda/lib/python3.5/site-packages/scrapy/core/engine.py", line 69, in __init__
self.scraper = Scraper(crawler)
File "/Users/baasman/anaconda/lib/python3.5/site-packages/scrapy/core/scraper.py", line 71, in __init__
self.itemproc = itemproc_cls.from_crawler(crawler)
File "/Users/baasman/anaconda/lib/python3.5/site-packages/scrapy/middleware.py", line 58, in from_crawler
return cls.from_settings(crawler.settings, crawler)
File "/Users/baasman/anaconda/lib/python3.5/site-packages/scrapy/middleware.py", line 34, in from_settings
mwcls = load_object(clspath)
File "/Users/baasman/anaconda/lib/python3.5/site-packages/scrapy/utils/misc.py", line 44, in load_object
mod = import_module(module)
File "/Users/baasman/anaconda/lib/python3.5/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 986, in _gcd_import
File "<frozen importlib._bootstrap>", line 969, in _find_and_load
File "<frozen importlib._bootstrap>", line 958, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 673, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 665, in exec_module
File "<frozen importlib._bootstrap>", line 222, in _call_with_frames_removed
File "/Users/baasman/Documents/python-workspace/rym_chart_scraper/rym_chart_scraper/pipelines.py", line 1, in <module>
from models import TopAlbums, db_connect, create_topalbums_table
ImportError: No module named 'models'
Trying to import 'models' interactively from main doesn't give an error, just when running the actual spider from the command line. Is there something wrong with the structure of the project? Or is it some other silly mistake? For some reason I can not get past this.