I have a python script to scan internet with the following structure that I'm trying to adapt to read alist of IPs instead a range of IPs
/ip-ranges
range.txt
/script
loader.py
scanner.py
the scanner.py is the following
# Import modules
from .inspection import Request, InspectPaths, InspectContent, PortIsOpen, GetTitle
from ipaddress import ip_address
from threading import Thread
# Scan result
class __Result:
def __init__(self, name, atype, path, title):
self.name = name
self.type = atype
self.path = path
self.title = title
# Return IPs in IPv4 range, inclusive.
import ipaddress
def process(iptext):
try:
print(ipaddress.ip_interface(iptext).network)
return
except Exception:
print("INVALID")
return
with open('ipaddresses.txt', 'r') as f:
for line in f:
line = "".join(line.split())
process(line)
# Scan IP address range
def ScanRange(ranges):
threads = []
# *-- Scan IP range --*
for address in IPsRange(ranges):
t = Thread(
target=__СheckAddrThreaded,
args=(address,)
)
threads.append(t)
t.start()
for thread in threads:
thread.join()
The loader.py is the following
# Import modules
from os import listdir
from sys import exit
# Select IP ranges from directory
def SelectIPRanges():
path = "ip-ranges/" # Directory path.
files = listdir(path) # Get directory files.
for i, f in enumerate(files): # Enumerate and
print(f" [{i+1}] - {f}") # print files.
# *-- Get user input --*
try:
file = path + files[int(input("\n [?] Please select country to scan --> ")) - 1]
except ValueError:
exit(f" [!] ERROR: Please enter a numerical value!")
except IndexError:
exit(f" [!] ERROR: Please enter value from 1 to {len(files)}!")
else:
# *-- Read file --*
with open(file, "r") as ranges_file: # Open file in reading mode.
ranges = ranges_file.readlines() # Read all lines.
return ranges, file.split("/")[-1]
The changes that I have made on the scanner.py was on this part
# Return IPs in IPv4 range, inclusive.
import ipaddress
def process(iptext):
try:
print(ipaddress.ip_interface(iptext).network)
return
except Exception:
print("INVALID")
return
with open('ipaddresses.txt', 'r') as f:
for line in f:
line = "".join(line.split())
process(line)
the original scanner.py is
# Import modules
from .inspection import Request, InspectPaths, InspectContent, PortIsOpen, GetTitle
from ipaddress import ip_address
from threading import Thread
# Scan result
class __Result:
def __init__(self, name, atype, path, title):
self.name = name
self.type = atype
self.path = path
self.title = title
# Return IPs in IPv4 range, inclusive.
def IPsRange(start='', end=''):
if not start and not end:
return []
if not end and start.__contains__("-"):
start, end = start.split("-")
end = end.replace("\n","")
start = int(ip_address(start).packed.hex(), 16)
end = int(ip_address(end).packed.hex(), 16)
return [ip_address(ip).exploded for ip in range(start, end)]
# Scan IP address range
def ScanRange(ranges):
threads = []
# *-- Scan IP range --*
for address in IPsRange(ranges):
t = Thread(
target=__СheckAddrThreaded,
args=(address,)
)
threads.append(t)
t.start()
for thread in threads:
thread.join()
At this moment I'm getting some hard time to link the loader.py and scanner.py. There are some changes that I know I need to do but can't figure out exactly what. I would requeste some guidance here
Thanks you all
So you load all possible ips with loader, and process them with scanner correct?
Right. I see. You suggest to import loader.py in scanner.py, but in this case I'd rather make a class that holds related functionality:
# Import modules
from .inspection import Request, InspectPaths, InspectContent, PortIsOpen, GetTitle
from ipaddress import ip_address
from threading import Thread
import ipaddress
from os import listdir
from sys import exit
# Scan result
class IP_Scanner():
def __init__(self, name='', atype='', path='', title=''):
self.name = name
self.type = atype
self.path = path
self.title = title
self.processed_range = []
# Return IPs in IPv4 range, inclusive.
def process(self, ip):
try:
line = "".join(ip.split())
if ipaddress.ip_interface(line).network:
self.processed_range.append(line)
except Exception:
print("INVALID: "+line)
return 0
def set_range(self):
# Select IP ranges from directory
path = "ip-ranges/" # Directory path.
files = listdir(path) # Get directory files.
for i, f in enumerate(files): # Enumerate and
print(f" [{i + 1}] - {f}") # print files.
# *-- Get user input --*
try:
file = path + files[int(input("\n [?] Please select country to scan --> ")) - 1]
except ValueError:
exit(f" [!] ERROR: Please enter a numerical value!")
except IndexError:
exit(f" [!] ERROR: Please enter value from 1 to {len(files)}!")
else:
# *-- Read file --*
with open(file, "r") as ranges_file: # Open file in reading mode.
self.range = ranges_file.readlines() # Read all lines.
return self.range, file.split("/")[-1]
# Scan IP address range
def scan_range(self):
if self.range:
threads = []
# *-- Scan IP range --*
for address in self.range:
t = Thread(
target=process, #__СheckAddrThreaded, #I'm assuming this is what your process is doing
args=(address,)
)
threads.append(t)
t.start()
for thread in threads:
thread.join()
if __name__ == '__main__':
x = IP_Scanner()
x.set_range()
x.scan_range()
print('Success on: '+x.processed_range)
Many other ways to do this, but this encapsulates the related info into an object so you can run many instances of it in parallel.
For imports from a different folder with init.py (or package), see: https://realpython.com/absolute-vs-relative-python-imports/
Related
I've got a method to download a bunch of files, and then do things with them.
The multithreaded download methods worked when not in a class, but when I put them inside the class, they cease processing immediately after initiating the first file in the list. There are no errors thrown; the URL call is good, etc. So I am probably missing something related to OOP in python.
from multiprocessing import cpu_count
from multiprocessing.pool import ThreadPool
import os
from requests import Session
import time
class OSM
def __init__(self):
self.url_root = "https://my.site/index.html"
self.s = self._mount_session()
self.data = None # zip object of download links and associated local paths
self.download_path = "C:/Temp"
def _download_parallel(self, args):
results = ThreadPool(cpu_count() - 1).imap_unordered(self._download_url, args)
for result in results:
print(f"URL: {result[0]} | Time (s): {result[1]}")
def _download_url(self, args):
t0 = time.time()
url, fn = args[0], args[1]
try:
r = self.s.get(url)
with open(fn, 'wb') as f:
f.write(r.content)
return (url, time.time() - t0)
except Exception as e:
print(f"Exception in _download_url(): {e}")
pass
def _mount_session(self):
return Session() # placefiller, the session is negotiated in here
def download(self):
# expose this to the user
if not os.path.exists(self.download_path): os.makedirs(self.download_path)
return self._download_parallel(self.data)
def do_stuff_with_files(self):
# process files, etc
return
def get_file_list(self):
dl_links = []
local_files = []
# check the website, get list of links, create list of local files, zip together
self.data = zip(dl_links, local_files)
if __name__ == "__main__":
o = OSM()
o.get_file_list()
o.download()
I am trying to execute below code to get the list of files from 2 nodes parallel and process them:
#!/usr/bin/python
import paramiko
import cffi
from multiprocessing import Process, Pool
key1 = paramiko.RSAKey.from_private_key_file("/Users//Downloads/test1.pem")
key2 = paramiko.RSAKey.from_private_key_file("/Users//Downloads/test2.pem")
client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
# Connect/ssh to an instance
file_list = []
host1 = '3.93.XX.X'
host2 = '3.93.XX.X'
filepath="/home/ubuntu/test/"
def readFileNode(host,key):
try:
# Here 'ubuntu' is user name and 'instance_ip' is public IP of EC2
client.connect(hostname=host, username="ubuntu", pkey=key)
sftp = client.open_sftp()
file_list=sftp.listdir_attr(filepath)
client.close()
# break
except Exception:
print("Exception")
return file_list
def compareFilesOnNode(listoffiles):
fileNode1 = []
fileNode1 = listoffiles[0]
fileNode2 = []
fileNode2 = listoffiles[1]
client.connect(hostname=host1, username="ubuntu", pkey=key)
mysftp1 = client.open_sftp()
client.connect(hostname=host2, username="ubuntu", pkey=key)
mysftp2 = client.open_sftp()
print(fileNode1[0].filename)
print(fileNode2[0].filename)
FileCountNode1 = len(fileNode1)
FileCountNode2 = len(fileNode2)
print("Files in Node1:", FileCountNode1, "Total Files in Node2:", FileCountNode2)
# loop should be running number of times that is equal to node having less number of files
counter=min(len(fileNode1),len(fileNode2))
for i in range(0,counter ):
filePath1 = filepath + fileNode1[i].filename
filePath2 = filepath + fileNode2[i].filename
print("Comparing:", filePath1, "From host:", host1, "to", filePath2, "In Host:", host2)
s1 = mysftp1.open(filePath1)
s2 = mysftp2.open(filePath2)
#file metadata validation
print(s1.stat)
print(s2.stat)
if (s1.stat() == s2.stat()):
print("File Stats are Equal")
else:
print("Stats are not Equal")
# check integrity of file
print("Checksum file from Node 1 is", s1.__hash__())
print("Cheksum file from Node2 is ", s2.__hash__())
# match metadata of files from both nodes
if (s1.readlines() == s2.readlines()):
print("File content are Equal")
yield "pass"
else:
print("File Content are Not Equal")
yield "fail"
s1.close()
s2.close()
if __name__ == '__main__':
p = Pool(2)
#call readFileNode in Parallel
#filenames = p.map(readFileNode, [host1, host2])
filenames=p.starmap(readFileNode, [(host1, host2), (key1, key2)])
#Both nodes files will be stored in filenames as list
print(filenames)
#compare files across node and node2 and get the result as pass or fail
for res in compareFilesOnNode(filenames):
print(res)
But, I am getting the below error:
TypeError: cannot pickle '_cffi_backend.FFI' object
I also tried uninstalling several modules and installing it . Some examples are like uninstalling paramiko, cryptography module and reinstalling it, but it doesn't work.
Please help!
I have the following code:
#!/usr/bin/env python
# coding=utf-8
import threading
import requests
import Queue
import sys
import re
#ip to num
def ip2num(ip):
ip = [int(x) for x in ip.split('.')]
return ip[0] << 24 | ip[1] << 16 | ip[2] << 8 | ip[3]
#num to ip
def num2ip(num):
return '%s.%s.%s.%s' % ((num & 0xff000000) >> 24,(num & 0x00ff0000) >> 16,(num & 0x0000ff00) >> 8,num & 0x000000ff)
def ip_range(start, end):
return [num2ip(num) for num in range(ip2num(start), ip2num(end) + 1) if num & 0xff]
def bThread(iplist):
threadl = []
queue = Queue.Queue()
for host in iplist:
queue.put(host)
for x in xrange(0, int(SETTHREAD)):
threadl.append(tThread(queue))
for t in threadl:
t.start()
for t in threadl:
t.join()
#create thread
class tThread(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while not self.queue.empty():
host = self.queue.get()
try:
checkServer(host)
except:
continue
def checkServer(host):
ports = [80]
for k in ports:
try:
aimurl = "http://"+host+":"+str(k)
response = requests.get(url=aimurl,timeout=3)
serverText = response.headers['server']
if (response.status_code) == 403:
print "-"*50+"\n"+aimurl +" Server: "+serverText
except:
pass
if __name__ == '__main__':
print '\n############# CDN IP #############'
print ' '
print '################################################\n'
global SETTHREAD
try:
SETTHREAD = sys.argv[2]
iplist = []
file = open(sys.argv[1], 'r')
tmpIpList = file.readlines()
for ip in tmpIpList:
iplist.append(ip.rstrip("\n"))
print '\nEscaneando '+str(len(iplist))+" IP's...\n"
bThread(iplist)
except KeyboardInterrupt:
print 'Keyboard Interrupt!'
sys.exit()
This script works as follows, a range of ip is entered:
python2 script.py 104.0.0.0-104.0.1.255 100 (100 is the number of threads)
I want to add support so that it reads the ip of a file, and that the range also works.
python2 script.py ips.txt 100
I tried this:
file = open(sys.argv[1], 'r')
iplist = file.readlines()
But it does not work.
Edit1: added file reading code recommended by user Syed Hasan, the problem seems to be the bThread(iplist) function
I assume you're attempting to use 'iplist' the same way as your CLI input was attempting to parse it. However, the readlines function simply reads the entire file at once and appends a newline (\n) at the end (provided you do format the IPs with a succeeding newline character).
Currently, you should be getting a list of IPs with a succeeding newline character. Try removing it from the rightmost end using rstrip:
file = open(sys.argv[1], 'r')
tmpIpList = file.readlines()
for ip in tmpIpList:
iplist.append(ip.rstrip("\n"))
How you switch between the two modes is a challenge you should attempt to solve. Perhaps use command-line parameter support to identify the mode of operations (look into the argparse library).
I'm trying to get the list of files that are fully uploaded on the FTP server.
I have access to this FTP server where a 3rd party writes data and marker files every 15 minutes. Once the data file is completely uploaded then a marker file gets created. we know once this marker file is there that means data files are ready and we can download it. I'm looking for a way to efficiently approach this problem. I want to check every minute if there are any new stable files on FTP server, if there is then I'll download those files. one preferred way is see if the marker file is 2 minutes old then we are good to download marker file and corresponding data file.
I'm new with python and looking for help.
I have some code till I list out the files
import paramiko
from datetime import datetime, timedelta
FTP_HOST = 'host_address'
FTP_PORT = 21
FTP_USERNAME = 'username'
FTP_PASSWORD = 'password'
FTP_ROOT_PATH = 'path_to_dir'
def today():
return datetime.strftime(datetime.now(), '%Y%m%d')
def open_ftp_connection(ftp_host, ftp_port, ftp_username, ftp_password):
"""
Opens ftp connection and returns connection object
"""
client = paramiko.SSHClient()
client.load_system_host_keys()
try:
transport = paramiko.Transport(ftp_host, ftp_port)
except Exception as e:
return 'conn_error'
try:
transport.connect(username=ftp_username, password=ftp_password)
except Exception as identifier:
return 'auth_error'
ftp_connection = paramiko.SFTPClient.from_transport(transport)
return ftp_connection
def show_ftp_files_stat():
ftp_connection = open_ftp_connection(FTP_HOST, int(FTP_PORT), FTP_USERNAME, FTP_PASSWORD)
full_ftp_path = FTP_ROOT_PATH + "/" + today()
file_attr_list = ftp_connection.listdir_attr(full_ftp_path)
print(file_attr_list)
for file_attr in file_attr_list:
print(file_attr.filename, file_attr.st_size, file_attr.st_mtime)
if __name__ == '__main__':
show_ftp_files_stat()
Sample file name
org-reference-delta-quotes.REF.48C2.20200402.92.1.1.txt.gz
Sample corresponding marker file name
org-reference-delta-quotes.REF.48C2.20200402.92.note.txt.gz
I solved my use case with 2 min stable rule, if modified time is within 2 min of the current time, I consider them stable.
import logging
import time
from datetime import datetime, timezone
from ftplib import FTP
FTP_HOST = 'host_address'
FTP_PORT = 21
FTP_USERNAME = 'username'
FTP_PASSWORD = 'password'
FTP_ROOT_PATH = 'path_to_dir'
logger = logging.getLogger()
logger.setLevel(logging.ERROR)
def today():
return datetime.strftime(datetime.now(tz=timezone.utc), '%Y%m%d')
def current_utc_ts():
return datetime.utcnow().timestamp()
def current_utc_ts_minus_120():
return int(datetime.utcnow().timestamp()) - 120
def yyyymmddhhmmss_string_epoch_ts(dt_string):
return time.mktime(time.strptime(dt_string, '%Y%m%d%H%M%S'))
def get_ftp_connection(ftp_host, ftp_username, ftp_password):
try:
ftp = FTP(ftp_host, ftp_username, ftp_password)
except Exception as e:
print(e)
logger.error(e)
return 'conn_error'
return ftp
def get_list_of_files(ftp_connection, date_to_process):
full_ftp_path = FTP_ROOT_PATH + "/" + date_to_process + "/"
ftp_connection.cwd(full_ftp_path)
entries = list(ftp_connection.mlsd())
entry_list = [line for line in entries if line[0].endswith('.gz') | line[0].endswith('.zip')]
ftp_connection.quit()
print('Total file count', len(entry_list))
return entry_list
def parse_file_list_to_dict(entries):
try:
file_dict_list = []
for line in entries:
file_dict = dict({"file_name": line[0],
"server_timestamp": int(yyyymmddhhmmss_string_epoch_ts(line[1]['modify'])),
"server_date": line[0].split(".")[3])
file_dict_list.append(file_dict)
except IndexError as e:
# Output expected IndexErrors.
logging.exception(e)
except Exception as exception:
# Output unexpected Exceptions.
logging.exception(exception, False)
return file_dict_list
def get_stable_files_dict_list(dict_list):
stable_list = list(filter(lambda d: d['server_timestamp'] < current_utc_ts_minus_120(), dict_list))
print('stable file count: {}'.format(len(stable_list)))
return stable_list
if __name__ == '__main__':
ftp_connection = get_ftp_connection(FTP_HOST, FTP_USERNAME, FTP_PASSWORD)
if ftp_connection == 'conn_error':
logger.error('Failed to connect FTP Server!')
else:
file_list = get_list_of_files(ftp_connection, today())
parse_file_list = parse_file_list_to_dict(file_list)
stable_file_list = get_stable_files_dict_list(parse_file_list)
New in version 3.7 supports ThreadingHTTPServer as mentioned in doc
to run from command line we use
python -m http.server
but its still run normal HTTPServer, is there any way to enable via command line.
EDITED:
python 3.7 runs ThreadingHTTPServer by default, no argument necessary
Simple Python 2 HTTP Server with multi-threading and partial-content support
#!/usr/bin/env python2
# Standard library imports.
from SocketServer import ThreadingMixIn
import BaseHTTPServer
import SimpleHTTPServer
import sys
import json
import os
from os.path import (join, exists, dirname, abspath, isabs, sep, walk, splitext,
isdir, basename, expanduser, split, splitdrive)
from os import makedirs, unlink, getcwd, chdir, curdir, pardir, rename, fstat
from shutil import copyfileobj, copytree
import glob
from zipfile import ZipFile
from urlparse import urlparse, parse_qs
from urllib import urlopen, quote, unquote
from posixpath import normpath
from cStringIO import StringIO
import re
import ConfigParser
import cgi
import threading
import socket
import errno
DATA_DIR = getcwd() # join(expanduser('~'), APP_NAME)
class ThreadingHTTPServer(ThreadingMixIn, BaseHTTPServer.HTTPServer):
pass
class RequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
""" Handler to handle POST requests for actions.
"""
serve_path = DATA_DIR
def do_GET(self):
""" Overridden to handle HTTP Range requests. """
self.range_from, self.range_to = self._get_range_header()
if self.range_from is None:
# nothing to do here
return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
print 'range request', self.range_from, self.range_to
f = self.send_range_head()
if f:
self.copy_file_range(f, self.wfile)
f.close()
def copy_file_range(self, in_file, out_file):
""" Copy only the range in self.range_from/to. """
in_file.seek(self.range_from)
# Add 1 because the range is inclusive
bytes_to_copy = 1 + self.range_to - self.range_from
buf_length = 64*1024
bytes_copied = 0
while bytes_copied < bytes_to_copy:
read_buf = in_file.read(min(buf_length, bytes_to_copy-bytes_copied))
if len(read_buf) == 0:
break
out_file.write(read_buf)
bytes_copied += len(read_buf)
return bytes_copied
def send_range_head(self):
"""Common code for GET and HEAD commands.
This sends the response code and MIME headers.
Return value is either a file object (which has to be copied
to the outputfile by the caller unless the command was HEAD,
and must be closed by the caller under all circumstances), or
None, in which case the caller has nothing further to do.
"""
path = self.translate_path(self.path)
f = None
if isdir(path):
if not self.path.endswith('/'):
# redirect browser - doing basically what apache does
self.send_response(301)
self.send_header("Location", self.path + "/")
self.end_headers()
return None
for index in "index.html", "index.htm":
index = join(path, index)
if exists(index):
path = index
break
else:
return self.list_directory(path)
if not exists(path) and path.endswith('/data'):
# FIXME: Handle grits-like query with /data appended to path
# stupid grits
if exists(path[:-5]):
path = path[:-5]
ctype = self.guess_type(path)
try:
# Always read in binary mode. Opening files in text mode may cause
# newline translations, making the actual size of the content
# transmitted *less* than the content-length!
f = open(path, 'rb')
except IOError:
self.send_error(404, "File not found")
return None
if self.range_from is None:
self.send_response(200)
else:
self.send_response(206)
self.send_header("Content-type", ctype)
fs = fstat(f.fileno())
file_size = fs.st_size
if self.range_from is not None:
if self.range_to is None or self.range_to >= file_size:
self.range_to = file_size-1
self.send_header("Content-Range",
"bytes %d-%d/%d" % (self.range_from,
self.range_to,
file_size))
# Add 1 because ranges are inclusive
self.send_header("Content-Length",
(1 + self.range_to - self.range_from))
else:
self.send_header("Content-Length", str(file_size))
self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
self.end_headers()
return f
def list_directory(self, path):
"""Helper to produce a directory listing (absent index.html).
Return value is either a file object, or None (indicating an
error). In either case, the headers are sent, making the
interface the same as for send_head().
"""
try:
list = os.listdir(path)
except os.error:
self.send_error(404, "No permission to list directory")
return None
list.sort(key=lambda a: a.lower())
f = StringIO()
displaypath = cgi.escape(unquote(self.path))
f.write('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
f.write("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
f.write("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
f.write("<hr>\n<ul>\n")
for name in list:
fullname = os.path.join(path, name)
displayname = linkname = name
# Append / for directories or # for symbolic links
if os.path.isdir(fullname):
displayname = name + "/"
linkname = name + "/"
if os.path.islink(fullname):
displayname = name + "#"
# Note: a link to a directory displays with # and links with /
f.write('<li>%s\n'
% (quote(linkname), cgi.escape(displayname)))
f.write("</ul>\n<hr>\n</body>\n</html>\n")
length = f.tell()
f.seek(0)
self.send_response(200)
encoding = sys.getfilesystemencoding()
self.send_header("Content-type", "text/html; charset=%s" % encoding)
self.send_header("Content-Length", str(length))
self.end_headers()
return f
def translate_path(self, path):
""" Override to handle redirects.
"""
path = path.split('?',1)[0]
path = path.split('#',1)[0]
path = normpath(unquote(path))
words = path.split('/')
words = filter(None, words)
path = self.serve_path
for word in words:
drive, word = splitdrive(word)
head, word = split(word)
if word in (curdir, pardir): continue
path = join(path, word)
return path
# Private interface ######################################################
def _get_range_header(self):
""" Returns request Range start and end if specified.
If Range header is not specified returns (None, None)
"""
range_header = self.headers.getheader("Range")
if range_header is None:
return (None, None)
if not range_header.startswith("bytes="):
print "Not implemented: parsing header Range: %s" % range_header
return (None, None)
regex = re.compile(r"^bytes=(\d+)\-(\d+)?")
rangething = regex.search(range_header)
if rangething:
from_val = int(rangething.group(1))
if rangething.group(2) is not None:
return (from_val, int(rangething.group(2)))
else:
return (from_val, None)
else:
print 'CANNOT PARSE RANGE HEADER:', range_header
return (None, None)
def get_server(port=8000, next_attempts=0, serve_path=None):
Handler = RequestHandler
if serve_path:
Handler.serve_path = serve_path
while next_attempts >= 0:
try:
httpd = ThreadingHTTPServer(("", port), Handler)
return httpd
except socket.error as e:
if e.errno == errno.EADDRINUSE:
next_attempts -= 1
port += 1
else:
raise
def main(args=None):
if args is None:
args = sys.argv[1:]
PORT = 8000
if len(args)>0:
PORT = int(args[-1])
serve_path = DATA_DIR
if len(args) > 1:
serve_path = abspath(args[-2])
httpd = get_server(port=PORT, serve_path=serve_path)
print "serving at port", PORT
httpd.serve_forever()
if __name__ == "__main__" :
main()