getting csv file from the url - python

I am trying to download a csv file which is on web portal, when doing it manually we login to the url and click on Download CSV button then it prompts for saving. we are using python3
I am trying to do this via python scripting, when we execute this script we get the the html page with the name Download CSV, when we click on that we get a csv file through that.
import urllib.request
import requests
session = requests.session()
playload = {'j_username':'avinash.reddy', 'j_password':'password'}
r = session.post('https://url_of_the_portal/auth/login','data=playload')
r = session.get('URL_of_the_page_where_the_csv_file_exiests')
url='https://url_of_the_portal/review/download/bm_sis'
print ('done')
urllib.request.urlretrieve (url, "Download CSV")

I think it should look like this + your login creds.
import csv
import urllib2
url = 'http://winterolympicsmedals.com/medals.csv'
response = urllib2.urlopen(url)
cr = csv.reader(response)
for row in cr:
print row
Else...
url = 'http://winterolympicsmedals.com/medals.csv'
r = requests.get(url)
text = r.iter_lines()
reader = csv.reader(text, delimiter=',')
Else...
import requests
from contextlib import closing
import csv
url = "http://download-and-process-csv-efficiently/python.csv"
with closing(requests.get(url, stream=True)) as r:
reader = csv.reader(r.iter_lines(), delimiter=',', quotechar='"')
for row in reader:
# Handle each row here...
print row
How to read a CSV file from a URL with Python?

Related

No connection adapters were found?

I am trying to open scrape all urls in a csv file. Then open the csv file and read each url opening each url to search and grab the Source info, Author, and License info. Then need to follow the respected gitlink to see if there is a license file or not. If there is a license file download and save it to csv file.
I have the below code in place however am receiving the following error upon reading the first url in my file:
No connection adapters were found for "['https://tools.kali.org/information-gathering/ace-voip']"
Actual Error:
File "ommitted", line 742, in get_adapter
raise InvalidSchema("No connection adapters were found for {!r}".format(url))
InvalidSchema: No connection adapters were found for "['https://tools.kali.org/information-gathering/ace-voip']"
I think this is happening because there is the added "[' in front of my url however, this doesnt exist in my file of listed urls.
I am new to python and appreciate any and all help on this.
import urllib.request, urllib.parse, urllib.error
import ssl
import zlib
from bs4 import BeautifulSoup
import csv
from urllib.request import urlopen
import urllib
import urllib.parse
import requests
#Testing ssl and reading url
#urllib.request.urlopen('https://google.com').read()
ctx = ssl._create_default_https_context()
# Establish chrome driver and go to report site URL
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
url = 'https://tools.kali.org/tools-listing'
html = urllib.request.urlopen(url, context=ctx)#.read().decode('utf-8')
de_data=zlib.decompress(html.read(), 16+zlib.MAX_WBITS)
print(de_data)
soup = BeautifulSoup(de_data, 'lxml')
data = []
for url in soup.find_all('a', href=True, text=True):
print(url['href'])
data.append(url['href'])
print(data)
####New Replacement for above that works removing spaces########
with open('kalitools.csv', 'w') as file:
for url in data:
file.write(str(url) + 'n')
# loading csv file with URLS and parsing each
######TESTING Reading URLS########
with open('E:/KaliScrape/kalitools.txt', 'r') as f_urls, open('ommitted/output.txt', 'w', newline='') as f_output:
csv_urls = csv.reader(f_urls)
csv_output = csv.writer(f_output)
csv_output.writerow(['Source', 'Author', 'License'])
print(csv_urls)
for line in csv_urls:
r = requests.get(line)#.text
soup = BeautifulSoup(r, 'lxml')
#r = requests.get(line[0], verify=False)#.text
#for line in csv_urls:
# line = 'https://' + line if 'https' not in line else line
# source = urlopen(line).read()
src = soup.find('li')
print('Source:', src.text)
auth = soup.find('li')
print('Author:', auth.text)
lic = soup.find('li')
print('License:', lic.text)
csv_output.writerow([src.text, auth.text, lic.text])
So, the problem is you are getting a list, and you just need to pick the list element at the zero index,
for line in csv_urls:
r = requests.get(line[0])#.text

How to download csv file and zip file in python?

I have been trying to download the csv and zip file from the given links:
** https://nseindia.com/content/fo/fo.zip
** https://nseindia.com/archives/nsccl/sett/FOSett_prce_17052019.csv
The following code gives an error as HTTP Error 403: Forbidden
import urllib.request
csv_url = 'https://nseindia.com/archives/nsccl/sett/FOSett_prce_17052019.csv'
urllib.request.urlretrieve(csv_url, '17_05.csv')
The problem of yours is because the default User-Agent (Python-urllib/3.7) of Python-urllib is blocked by the website server. However, you can bypass the blockage by changing the User-Agent header:
import urllib.request
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib.request.install_opener(opener)
csv_url = 'https://nseindia.com/archives/nsccl/sett/FOSett_prce_17052019.csv'
urllib.request.urlretrieve(csv_url, '17_05.csv')
Here you can get the content of the CSV file and you can write the CSV file.
import csv
import requests
CSV_URL = 'https://nseindia.com/archives/nsccl/sett/FOSett_prce_17052019.csv'
with requests.Session() as s:
download = s.get(CSV_URL)
decoded_content = download.content.decode('utf-8')
cr = csv.reader(decoded_content.splitlines(), delimiter=',')
my_list = list(cr)
for row in my_list:
print(row)
Install the package requests.
pip install requests
Then, use requests.get api to download the file and then write it to the desired file.
import requests
csv_url = 'https://nseindia.com/archives/nsccl/sett/FOSett_prce_17052019.csv'
r = requests.get(csv_url, allow_redirects=True)
open('test.csv', 'wb').write(r.content)

Retrieving data from password protected webpages with Python 3.3.3

I am trying to use urllib to access a website and then strip the page source so I can collect some data from it. I know how to do this for public websites but I don't know how to use urllib to do this for password protected webpages. I know the username and password, I am just very confused about how to get urllib to put in the correct credentials then reroute me to the correct page that I want to strip the data from. Currently, my code looks like this. The problem is that it is bringing up the login page's source.
from tkinter import *
import csv
from re import findall
import urllib.request
def info():
file = filedialog.askopenfilename()
fileR = open(file, 'r')
hold = csv.reader(fileR, delimiter=',', quotechar='|')
aList=[]
for item in hold:
if item[1] and item[2] == "":
print(item[1])
url = "www.example.com/id=" + item[1]
request = urllib.request.urlopen(url)
html = request.read()
data = str(html)
person = findall('''\$MainContent\$txtRecipient\"\stype=\"text\"\svalue=\"([^\"]+)\"''',data)
else:
pass
fileR.close
Remember, I am using python 3.3.3. Any help would be appreciated!

How to open .csv file from a url with Python?

I'm trying to open a csv file from a url but for some reason I get an error saying that there is an invalid mode or filename. I'm not sure what the issue is. Help?
url = "http://...."
data = open(url, "r")
read = csv.DictReader(data)
Download the stream, then process:
import urllib2
url = "http://httpbin.org/get"
response = urllib2.urlopen(url)
data = response.read()
read = csv.DictReader(data)
I recommend pandas for this:
import pandas as pd
read = pandas.io.parsers.read_csv("http://....", ...)
please see the documentation.
You can do the following :
import csv
import urllib2
url = 'http://winterolympicsmedals.com/medals.csv'
response = urllib2.urlopen(url)
cr = csv.reader(response)
for row in cr:
print row
Slightly tongue in cheek:
require json
>>> for line in file(','):
... print json.loads('['+line+']')
CSV is not a well defined format. JSON is so this will parse a certain type of CSV correctly every time.

how to save redirection url to csv file in python

i have a redirection url
www.test.com
it will redirect me to
www.test.com/XXYYXXYY
ans every time when i open it will redirect me to a new url ( XXYYXXYY will change every time )
so i want to save them into a CSV file
import urllib2
import csv
import sys
url = 'http://www.test.com'
u = urllib2.urlopen(url)
localFile = open('file.csv', 'w')
localFile.write(u.read())
localFile.close()
is this a correct code ?
thank you
geturl() will give you the final URL
import urllib2
import csv
import sys
url = 'http://www.test.com'
u = urllib2.urlopen(url)
localFile = open('file.csv', 'w')
localFile.write(u.geturl())
localFile.close()

Categories