Python URL download

Python URL download - python

The code below returns none. How can I fix it? I'm using Python 2.6.
import urllib
URL = "http://download.finance.yahoo.com/d/quotes.csv?s=%s&f=sl1t1v&e=.csv"
symbols = ('GGP', 'JPM', 'AIG', 'AMZN','GGP', 'JPM', 'AIG', 'AMZN')
#symbols = ('GGP')
def fetch_quote(symbols):
url = URL % '+'.join(symbols)
fp = urllib.urlopen(url)
try:
data = fp.read()
finally:
fp.close()
def main():
data_fp = fetch_quote(symbols)
# print data_fp
if __name__ =='__main__':
main()

You have to explicitly return the data from fetch_quote function. Something like this:
def fetch_quote(symbols):
url = URL % '+'.join(symbols)
fp = urllib.urlopen(url)
try:
data = fp.read()
finally:
fp.close()
return data # <======== Return
In the absence of an explicit return statement Python returns None which is what you are seeing.

Your method doesn't explicitly return anything, so it returns None

Related

Value not appending to global array

I am trying to run a multithreaded email checker to see if the emails are office 365 valid.
Looking over and over my code, I cannot seem to find the reason it's not working correctly.
It should be appending the email to a GOOD or BAD list.
Instead, it's not appending anything!
This is my code:
...
currentDirectory = os.getcwd() # set the current directory - /new/
# Locations
location_emails_goods = currentDirectory + '/contacts/goods/'
location_emails_bads = currentDirectory + '/contacts/bads/'
location_emails = currentDirectory + '/contacts/contacts.txt'
now = datetime.now()
todayString = now.strftime('%d-%m-%Y-%H-%M-%S')
FILE_NAME_DATE_GOODS = None
FILE_NAME_DATE_BADS = None
ALL_EMAILS = get_contacts(location_emails)
url = 'https://login.microsoftonline.com/common/GetCredentialType'
# Get all emails
def get_contacts(filename):
emails = []
with open(filename, mode='r', encoding='utf-8') as contacts_file:
for a_contact in contacts_file:
emails.append(a_contact.strip())
return emails
def saveLogs():
global GOOD_EMAILS_ARRAY, BAD_EMAILS_ARRAY, file_bads, file_goods, FILE_NAME_DATE_GOODS, FILE_NAME_DATE_BADS
#print(GOOD_EMAILS_ARRAY)
for good in GOOD_EMAILS_ARRAY:
file_goods.write(good + '\n')
file_goods.close()
for bad in BAD_EMAILS_ARRAY:
file_bads.write(bad + '\n')
file_bads.close()
def newChecker(email):
global url, GOOD_EMAILS_ARRAY, BAD_EMAILS_ARRAY
s = req.session()
body = '{"Username":"%s"}' % email
request = req.post(url, data=body)
response = request.text
valid = re.search('"IfExistsResult":0,', response)
invalid = re.search('"IfExistsResult":1,', response)
if invalid:
BAD_EMAILS_ARRAY.append(email)
if valid:
GOOD_EMAILS_ARRAY.append(email)
else:
if valid:
GOOD_EMAILS_ARRAY.append(email)
else:
BAD_EMAILS_ARRAY.append(email)
# The follow is showing empty array eventhough I have defined GOOD_EMAILS_ARRAY globally so it should be updating
print(GOOD_EMAILS_ARRAY)
def mp_handler(p):
global ALL_EMAILS
p.map(newChecker, ALL_EMAILS)
if __name__ == '__main__':
# Foreach email, parse it into our checker
# Define a filename to save to
FILE_NAME_DATE_GOODS = '{}{}{}'.format(location_emails_goods, todayString, '.txt')
FILE_NAME_DATE_BADS = '{}{}{}'.format(location_emails_bads, todayString, '.txt')
file_bads = open(FILE_NAME_DATE_BADS, 'a')
file_goods = open(FILE_NAME_DATE_GOODS, 'a')
p = multiprocessing.Pool(500)
mp_handler(p)
saveLogs()
p.close()
As you can see, I am trying to append an email to either GOOD_EMAILS_ARRAY or BAD_EMAILS_ARRAY.
The BAD_EMAILS_ARRAY and GOOD_EMAILS_ARRAY are global variables but it for reason won't append to them.
I am running this through multiprocessing if you need to know.
Any ideas or errors looking in my code?

Processes do not share memory, the global variable with same name in two processes are two different object.
If you need share state between processes, see this:
https://docs.python.org/3/library/multiprocessing.html#sharing-state-between-processes

Okay so it turns out that I just needed to use the Manager from multiprocessing:
from multiprocessing import Manager, Pool
then I could use a normal array through the manager such as:
# Set empty arrays using manager so we can carry it over
manager = Manager()
bad_list = manager.list()
good_list = manager.list()
This allowed me to then use my script like it was, just using these new arrays by Manager which works just how I wanted :)
...
FILE_NAME_DATE_GOODS = None
FILE_NAME_DATE_BADS = None
# Set empty arrays using manager so we can carry it over
manager = Manager()
bad_list = manager.list()
good_list = manager.list()
# Get all emails
def get_contacts(filename):
emails = []
with open(filename, mode='r', encoding='utf-8') as contacts_file:
for a_contact in contacts_file:
emails.append(a_contact.strip())
return emails
ALL_EMAILS = get_contacts(location_emails)
url = 'https://login.microsoftonline.com/common/GetCredentialType'
def saveLogs():
global file_bads, file_goods, FILE_NAME_DATE_GOODS, FILE_NAME_DATE_BADS, good_list, bad_list
for good in good_list:
file_goods.write(good + '\n')
file_goods.close()
for bad in bad_list:
file_bads.write(bad + '\n')
file_bads.close()
print('{} => Fully completed email scanning'.format(Fore.CYAN))
print('{} => Good emails [{}] || Bad emails [{}]'.format(Fore.GREEN, FILE_NAME_DATE_GOODS, FILE_NAME_DATE_BADS))
def newChecker(email):
global url, good_list, bad_list
s = req.session()
body = '{"Username":"%s"}' % email
request = req.post(url, data=body)
response = request.text
valid = re.search('"IfExistsResult":0,', response)
invalid = re.search('"IfExistsResult":1,', response)
if invalid:
bad_list.append(email)
if valid:
good_list.append(email)
else:
if valid:
good_list.append(email)
else:
bad_list.append(email)
def mp_handler(p):
global ALL_EMAILS
p.map(newChecker, ALL_EMAILS)
if __name__ == '__main__':
# Foreach email, parse it into our checker
# Define a filename to save to
FILE_NAME_DATE_GOODS = '{}{}{}'.format(location_emails_goods, todayString, '.txt')
FILE_NAME_DATE_BADS = '{}{}{}'.format(location_emails_bads, todayString, '.txt')
file_bads = open(FILE_NAME_DATE_BADS, 'a')
file_goods = open(FILE_NAME_DATE_GOODS, 'a')
p = multiprocessing.Pool(500)
mp_handler(p)
saveLogs()
p.close()

multithreading for loop not working in Python with no errors

I have put together the below and wanted to test multithreading.
I am trying to make the for loop run threaded, so several URLs in the list can be processed in parallel.
This script doesn't error, but it doesn't do anything and I am not sure why.
If I remove the multithreading pieces, it works fine
Can anyone help me?
import multiprocessing.dummy as mp
import requests
import pandas as pd
import datetime
urls = [
'http://google.co.uk',
'http://bbc.co.uk/'
]
def do_print(s):
check_data = pd.DataFrame([])
now = datetime.datetime.now()
try:
response = requests.get(url)
except:
response = 'null'
try:
response_code = response.status_code
except:
response_code = 'null'
try:
response_content = response.content
except:
response_content = 'null'
try:
response_text = response.text
except:
response_text = 'null'
try:
response_content_type = response.headers['Content-Type']
except:
response_content_type = 'null'
try:
response_server = response.headers['Server']
except:
response_server = 'null'
try:
response_last_modified = response.headers['Last-Modified']
except:
response_last_modified = 'null'
try:
response_content_encoding = response.headers['Content-Encoding']
except:
response_content_encoding = 'null'
try:
response_content_length = response.headers['Content-Length']
except:
response_content_length = 'null'
try:
response_url = response.url
except:
response_url = 'null'
if int(response_code) <400:
availability = 'OK'
elif int(response_code) >399 and int(response_code) < 500:
availability = 'Client Error'
elif int(response_code) >499:
availability = 'Server Error'
if int(response_code) <400:
availability_score = 1
elif int(response_code) >399 and int(response_code) < 500:
availability_score = 0
elif int(response_code) >499:
availability_score = 0
d = {'check_time': [now], 'code': [response_code], 'type': [response_content_type], 'url': [response_url], 'server': [response_server], 'modified': [response_last_modified], 'encoding': [response_content_encoding], 'availability': [availability], 'availability_score': [availability_score]}
df = pd.DataFrame(data=d)
check_data = check_data.append(df ,ignore_index=True,sort=False)
if __name__=="__main__":
p=mp.Pool(4)
p.map(do_print, urls)
p.close()
p.join()

When I run code I get error because it try to convert int("null") - all because you have
except:
response_code = 'null'`
If I use except Exception as ex: print(ex) then I get error that variable url doesn't exists. And it is true because you have def do_print(s): but it should be def do_print(url):
BTW: instead of 'null' you could use standard None and later check if response_code: before you try to covnert it to integer. Or simply skip rest of code when you get error.
Other problem - process should use return df and you should get it
results = p.map(...)
and then use results to create DataFrame check_data

Use of files on hard drives instead of url with python

I would like to modify this script to use offline files, if I download the files from url works, but if the same file as I withdraw from hard drives, does not open, someone helps me to understand why and how to do, thank you.
def INDEX():
TVLIST('https://www.*********/playlist/*******/test.m3u')
def TVLIST(url):
try:
m3u = getHtml(url)
parsem3u(m3u)
except:
addDir('Nothing found', '', '', '', Folder=False)
xbmcplugin.endOfDirectory(int(sys.argv[1]))
urlopen = urllib2.urlopen
Request = urllib2.Request
def getHtml(url, referer=None, hdr=None, data=None):
if not hdr:
req = Request(url, data, headers)
else:
req = Request(url, data, hdr)
if referer:
req.add_header('Referer', referer)
if data:
req.add_header('Content-Length', len(data))
response = urlopen(req)
if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO( response.read())
f = gzip.GzipFile(fileobj=buf)
data = f.read()
f.close()
else:
data = response.read()
response.close()
return data
def parsem3u(html, sitechk=True):
match = re.compile('#.+,(.+?)\n(.+?)\n').findall(html)
txtfilter = txtfilter = GETFILTER()
txtfilter = txtfilter.split(',') if txtfilter else []
txtfilter = [f.lower().strip() for f in txtfilter]
i = 0
count = 0
for name, url in match:
status = ""
url = url.replace('\r','')
if not txtfilter or any(f in name.lower() for f in txtfilter):
if sitechk:
if i < 5:
try:
siteup = urllib.urlopen(url).getcode()
status = " [COLOR red]offline[/COLOR]" if siteup != 200 else " [COLOR green]online[/COLOR]"
except: status = " [COLOR red]offline[/COLOR]"
i += 1
addPlayLink(name+status, url, 3, uiptvicon)
count += 1
return count
I thought, was enough to put the local path
def INDEX():
TVLIST(r'c:\Desktop\IPTVLIST\M3U\playlist\test.m3u')
who explains why it does not work and how can I do? Thank you

As suggested by #languitar in the comments you would have file:// which of course it should work for windows, but moving to a platform like android, you have different file system there, you don't have C drive. So make sure you got an alternative location on the android.

Python appending to binary file works in Unix, not in Windows

The function below receives file chunks from a web requests and assembles them. It works perfectly in Unix (OSX) but on Windows, it doesn't. Specifically, the file does assemble, however it always ends up too small, just a few KB. I cannot figure out what is causing this. No exceptions are raised, it all appears to work, except that the final file is not all there. I've included the entire function for context but I've marked the section which appears not to be working correctly. (Python 2.7 and Windows Server 2008 R2)
#view_config(route_name='upload', renderer='json')
def upload(request):
r = request.response
final_dir = 'w:\\foobar'
filename = request.params.get('flowFilename')
chunk_number = request.params.get('flowChunkNumber')
total_chunks = request.params.get('flowTotalChunks')
try:
temp_dir = os.path.join(final_dir, request.params.get('flowIdentifier'))
file_part = os.path.join(temp_dir, '%s.part.%s' % (filename, chunk_number))
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
except TypeError:
pass
if request.method == 'GET':
if file_part:
if os.path.isfile(file_part):
r.status = 200
else:
r.status = 404
return r
if request.POST:
try:
fo = request.params.get('file').file
f = open(file_part, 'wb')
f.write(fo.read())
f.close()
if chunk_number == total_chunks:
final_filename = os.path.join(final_dir, filename)
temp_filename = filename + '_INCOMPLETE'
#####################################################################
# This is where is appears to be going wrong...
final_file = open(temp_filename, 'a+b')
try:
for i in range(1, int(total_chunks) + 1):
ff = open(os.path.join(temp_dir, '%s.part.%s' % (filename, i)))
final_file.write(ff.read())
ff.close()
final_file.close()
os.rename(temp_filename, final_filename) # rename to final filename
shutil.rmtree(temp_dir) # clean up temp part files
except:
raise
####################################################################
r.status = 200
except Exception, e:
print 'ERROR', e.message
r.status = 404
return r

Problems using multipart_encode (poster library)

I am trying to upload a file using multipart_encode to realize the MIME process. However, I met the following error AttributeError: multipart_yielder instance has no attribute '__len__'. Below are is my approach, I really appreciate if anyone can give me some suggestions.
url = "https://pi-user-files.s3-external-1.amazonaws.com/"
post_data = {}
#data is a dict
post_data['AWSAccessKeyId']=(data['ticket']['AWSAccessKeyId'])
post_data['success_action_redirect']=(data['ticket']['success_action_redirect'])
post_data['acl']=(data['ticket']['acl'])
post_data['key']=(data['ticket']['key'])
post_data['signature']=(data['ticket']['signature'])
post_data['policy']=(data['ticket']['policy'])
post_data['Content-Type']=(data['ticket']['Content-Type'])
#I would like to upload a text file "new 2"
post_data['file']=open("new 2.txt", "rb")
datagen, headers = multipart_encode(post_data)
request2 = urllib2.Request(url, datagen, headers)
result = urllib2.urlopen(request2)

If you want to send a file you should wrap other parameters with a MultipartParam object, example code for creating a send file request:
from poster.encode import multipart_encode, MultipartParam
import urllib2
def postFileRequest(url, paramName, fileObj, additionalHeaders={}, additionalParams={}):
items = []
#wrap post parameters
for name, value in additionalParams.items():
items.append(MultipartParam(name, value))
#add file
items.append(MultipartParam.from_file(paramName, fileObj))
datagen, headers = multipart_encode(items)
#add headers
for item, value in additionalHeaders.iteritems():
headers[item] = value
return urllib2.Request(url, datagen, headers)
Also I think you should execute register_openers() once at the beginning. Some details you can find in docs

The problem is that in httplib.py, the generator is not detected as such and is treated instead like a string that holds the full data to be sent (and therefore it tries to find its length):
if hasattr(data,'read') and not isinstance(data, array): # generator
if self.debuglevel > 0: print "sendIng a read()able"
....
A solution is to make the generator act like a read()able:
class GeneratorToReadable():
def __init__(self, datagen):
self.generator = datagen
self._end = False
self.data = ''
def read(self, n_bytes):
while not self._end and len(self.data) < n_bytes:
try:
next_chunk = self.generator.next()
if next_chunk:
self.data += next_chunk
else:
self._end = True
except StopIteration:
self._end = True
result = self.data[0:n_bytes]
self.data = self.data[n_bytes:]
return result
and use like so:
datagen, headers = multipart_encode(post_data)
readable = GeneratorToReadable(datagen)
req = urllib2.Request(url, readable, headers)
result = urllib2.urlopen(req)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python URL download - python

Your method doesn't explicitly return anything, so it returns None

Related

Value not appending to global array

multithreading for loop not working in Python with no errors

Use of files on hard drives instead of url with python

Python appending to binary file works in Unix, not in Windows

Problems using multipart_encode (poster library)

Categories

Resources