Getting errors when downloading large files in Python, specifically using pytuve - python

For a long time I was able to use pytube to download any video no matter the length. Recently, I have been getting errors on anything longer than 2 hours. What's weird is if I keep trying (using a while loop) eventually it will download. I would prefer to just get it the first time though.
Here is the error:
Traceback (most recent call last):
File "AppData\Local\Programs\Python\Python310\lib\site-packages\pytube\streams.py", line 314, in download
for chunk in request.stream(
File "AppData\Local\Programs\Python\Python310\lib\site-packages\pytube\request.py", line 157, in stream
response = _execute_request(
File "AppData\Local\Programs\Python\Python310\lib\site-packages\pytube\request.py", line 37, in _execute_request
return urlopen(request, timeout=timeout) # nosec
File "AppData\Local\Programs\Python\Python310\lib\urllib\request.py", line 216, in urlopen
return opener.open(url, data, timeout)
File "AppData\Local\Programs\Python\Python310\lib\urllib\request.py", line 525, in open
response = meth(req, response)
File "AppData\Local\Programs\Python\Python310\lib\urllib\request.py", line 634, in http_response
response = self.parent.error(
File "AppData\Local\Programs\Python\Python310\lib\urllib\request.py", line 563, in error
return self._call_chain(*args)
File "AppData\Local\Programs\Python\Python310\lib\urllib\request.py", line 496, in _call_chain
result = func(*args)
File "AppData\Local\Programs\Python\Python310\lib\urllib\request.py", line 643, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 500: Internal Server Error
Here is my code:
import pytube, csv
from pytube import YouTube
def yt_video_dl(vid_link):
yt = YouTube(vid_link)
yt_streams = yt.streams
yt_title = yt.title
stream_number = 0
streaminfo = yt.streams.filter(file_extension='mp4')
for s in streaminfo:
if 'itag="22"' in str(s):
stream_number = 22
elif 'itag="18"' in str(s):
stream_number = 18
stream = yt.streams.get_by_itag(stream_number)
stream.download('F:/',filename='downloadedvideo.mp4',timeout=12000)
yt_video_dl('https://www.youtube.com/watch?v=peVjp6uO4JY')

Related

Pytube HTTPError: HTTP Error 429: Too Many Requests

I was going to download youtube video so wrote a code in python using pytube module.
I tried pip install pytube and pip install pytube3 .
Only pytube3 worked for me .
from pytube import YouTube
url = 'https://www.youtube.com/watch?v=lJ08IGp5ulA&t=23s'
yt = YouTube(url)
videos = yt.streams.all()
print(videos)
I used this code but got error like
Traceback (most recent call last):
File "c:\Users\ASUS\Desktop\utube\sunil.py", line 5, in <module>
yt = YouTube(url)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pytube\__main__.py", line 91, in __init__
self.prefetch()
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pytube\__main__.py", line 162, in prefetch
self.watch_html = request.get(url=self.watch_url)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pytube\request.py", line 36, in get
return _execute_request(url).read().decode("utf-8")
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pytube\request.py", line 24, in _execute_request
return urlopen(request) # nosec
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 531, in open
response = meth(req, response)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 640, in http_response
response = self.parent.error(
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 563, in error
result = self._call_chain(*args)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 502, in _call_chain
result = func(*args)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 755, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 531, in open
response = meth(req, response)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 640, in http_response
response = self.parent.error(
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 502, in _call_chain
result = func(*args)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 429: Too Many Requests
So help me to fix this.

downloading a CSV with wget

Hi i am newish to python, and working on modeling of the CV-19 outbreak in the UK.
Currently I am writting a program to automatically download the UK governments latest death statistics.
Here is my code so far:
import wget
url = "https://coronavirus.data.gov.uk/downloads/csv/coronavirus-cases_latest.csv"#
wget.download(url, 'C:/Users/Moshe/Downloads/Covid_19_uk_timeseries.csv')
Running this in pycharm I get:
C:\Users\Moshe\PycharmProjects\HelloWorld\venv\Scripts\python.exe C:/Users/Moshe/.PyCharmCE2019.1/config/scratches/Get_CV19_UK_stats.py
Traceback (most recent call last):
File "C:/Users/Moshe/.PyCharmCE2019.1/config/scratches/Get_CV19_UK_stats.py", line 6, in <module>
wget.download(url, 'C:/Users/Moshe/Downloads/Covid_19_uk_timeseries.csv')
File "C:\Users\Moshe\PycharmProjects\HelloWorld\venv\lib\site-packages\wget.py", line 526, in download
(tmpfile, headers) = ulib.urlretrieve(binurl, tmpfile, callback)
File "C:\Users\Moshe\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 247, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "C:\Users\Moshe\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\Moshe\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 531, in open
response = meth(req, response)
File "C:\Users\Moshe\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\Moshe\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
File "C:\Users\Moshe\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 503, in _call_chain
result = func(*args)
File "C:\Users\Moshe\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 308: Permanent Redirect
Process finished with exit code 1
The file I am trying to get can be found on this page:
https://coronavirus.data.gov.uk/?_ga=2.157835066.1251021075.1589887735-1783596499.1585566366
Why won't this work? and what do i need to change?
Can you verify that your output path is valid and writeable?
try
import wget
import os
output_dir = 'C:/Users/Moshe/Downloads'
output_file = 'Covid_19_uk_timeseries.csv'
assert os.path.exists(output_dir)
url = "https://coronavirus.data.gov.uk/downloads/csv/coronavirus-cases_latest.csv"#
wget.download(url, out = os.path.join(output_dir,output_file))

How to download .log files from the server and save it to local disk using Python

I am trying to download error log files such as IIS logs, HTTP API error logs from the server and save it to my disk. These logs contain the .log extension.
Have tried below code to download, it works well for text files but it didn't work for the files which are of type .log:
from urllib.request import urlopen
response = urlopen('https://servername/path/errorlog.txt') #doesn't work for response = urlopen('https://servername/path/errorlog.log')
data = response.read()
# Write data to file
filename = "test.txt"
file_ = open(filename, 'w')
file_.write(data)
file_.close()
This is the error messages i am getting:
Traceback (most recent call last):
File "<pyshell#26>", line 1, in <module>
response = urlopen(url)
File "C:\Python\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "C:\Python\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\Python\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python\lib\urllib\request.py", line 570, in error
return self._call_chain(*args)
File "C:\Python\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Python\lib\urllib\request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 404: Not Found
Do you have a solution to this problem?

amazon product api returning HTTP Error 400: Bad Request

I've access data using the amazon product API before and that was easy. All I had to do was make some api keys and use a module in Python and it gave back the data.
however, this was 4-5 months ago and now when I try to use the api keys, its giving me 400 error.
Here is my code
from amazon.api import AmazonAPI
asin = 'B00CAB5ZKC'
AMAZON_ACCESS_KEY='AKIAI4ZP7EZGNSTAWKTA'
AMAZON_SECRET_KEY ='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'
AMAZON_ASSOC_TAG =246152698300
amazon = AmazonAPI(AMAZON_ACCESS_KEY, AMAZON_SECRET_KEY, AMAZON_ASSOC_TAG, region = "DE")
product = amazon.lookup(ItemId=asin, Condition='All', MerchantId='All')
This is the error I get
Traceback (most recent call last):
File "C:\Users\Hari\Documents\Python\data\amazon_test_script.py", line 22, in <module>
product = amazon.lookup(ItemId=asin, Condition='All', MerchantId='All')
File "build\bdist.win-amd64\egg\amazon\api.py", line 173, in lookup
response = self.api.ItemLookup(ResponseGroup=ResponseGroup, **kwargs)
File "C:\Python27\lib\site-packages\bottlenose\api.py", line 265, in __call__
{'api_url': api_url, 'cache_url': cache_url})
File "C:\Python27\lib\site-packages\bottlenose\api.py", line 226, in _call_api
return urllib2.urlopen(api_request, timeout=self.Timeout)
File "C:\Python27\lib\urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 437, in open
response = meth(req, response)
File "C:\Python27\lib\urllib2.py", line 550, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python27\lib\urllib2.py", line 475, in error
return self._call_chain(*args)
File "C:\Python27\lib\urllib2.py", line 409, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 558, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
HTTPError: HTTP Error 400: Bad Request
>>>
I've been debugging this for 2 days now, with no success. Could anyone tell me what am I missing.

HTTP Error 403 returned on accessing Amazon API via Bottlenose (Python)

Anyway, I'm trying to write a simple request to the Amazon API using the following code:
ak = "***"
sk = "***"
at = "***"
import bottlenose
amazon = bottlenose.Amazon(ak, sk, at, "DE")
response=amazon.ItemLookup(ItemId="B00KWAO4CI")
print(response.price_and_currency)
It should return an XML object. Instead I get the following result:
Traceback (most recent call last):
File "simpleamazon.py", line 7, in <module>
response=amazon.ItemLookup(ItemId="B00KWAO4CI")
File "/Library/Python/2.7/site-packages/bottlenose/api.py", line 251, in __call__
{'api_url': api_url, 'cache_url': cache_url})
File "/Library/Python/2.7/site-packages/bottlenose/api.py", line 212, in _call_api
return urllib2.urlopen(api_request, timeout=self.Timeout)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 437, in open
response = meth(req, response)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 550, in http_response
'http', request, response, code, msg, hdrs)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 475, in error
return self._call_chain(*args)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 409, in _call_chain
result = func(*args)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 558, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 403: Forbidden
Up until recently I received HTTP Error 400 instead. To my knowledge I haven't changed anything. I've also tried using response groups, but it resulted in the same error(s).
Do you have any leads?
Using Python 3.5.2

Categories