How can I make Flask wait? - python

I'm running a program that works with requests. I need to write the feedback time to my database. This code works fine, but it updates my DB too often. How can I make index() method wait for 60s? time.sleep(60) doesn't work here.
app = Flask(__name__)
app.config['SECRET_KEY'] = 'secret!'
dbconn = mysql.connector.connect(host="myhost",
database='mydb',
user='root', password='12345')
#app.route('/', methods = ['GET', 'POST'])
def index():
if request.method == 'POST':
cursor = dbconn.cursor()
time_check = datetime.datetime.now()
query = ("update mytable set response_time=%s where service_name = 'my_service'")
param = time_check
cursor.execute(query, (param,))
print("sent query")
dbconn.commit()
cursor.close()
#time.sleep(60)
return render_template('index.html')
if __name__ == '__main__':
app.run(host = "myhostaddress", port = 1010)

As already suggested in the comment, using some dedicated task queue would probably be the best solution. If you don't want to bring any dependency though, you might adapt this simple example:
from queue import Queue
import random
from threading import Thread
import time
from flask import Flask
app = Flask(__name__)
#app.route('/')
def index():
n = random.randint(0, 100)
q.put(n)
return '%s\n' % n
def worker():
while True:
item = q.get()
if item is None:
break
print('Processing %s' % item) # do the work e.g. update database
time.sleep(1)
q.task_done()
if __name__ == '__main__':
q = Queue()
t = Thread(target=worker)
t.start()
app.run(host='0.0.0.0')
q.join()
q.put(None)
t.join()
And the test:
pasmen#nyx:~$ for x in 1 2 3 4 5 6 7 8 9 10; do curl http://0.0.0.0:5000; done
1
90
79
25
45
50
77
25
36
99
Output:
(venv) pasmen#nyx:~/tmp/test$ python test.py
* Serving Flask app "test" (lazy loading)
* Environment: production
WARNING: Do not use the development server in a production environment.
Use a production WSGI server instead.
* Debug mode: off
* Running on http://0.0.0.0:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [04/Apr/2019 14:57:57] "GET / HTTP/1.1" 200 -
Processing 1
127.0.0.1 - - [04/Apr/2019 14:57:57] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2019 14:57:57] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2019 14:57:57] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2019 14:57:57] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2019 14:57:57] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2019 14:57:57] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2019 14:57:57] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2019 14:57:57] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2019 14:57:57] "GET / HTTP/1.1" 200 -
Processing 90
Processing 79
Processing 25
Processing 45
Processing 50
Processing 77
Processing 25
Processing 36
Processing 99
As you can see, the HTTP requests are processed immediately while there's a 1 second delay between the actual work carried by worker.

Related

How do I connect to a MySQL5 DB using mysql-connector-python?

I believe the crux of the issue is the version mismatch, but I'm not sure how to get around this.
This is my conda environment file:
channels:
- anaconda
- defaults
- conda-forge
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=2_gnu
- bzip2=1.0.8=h7f98852_4
- ca-certificates=2022.12.7=ha878542_0
- cffi=1.15.1=py311h409f033_3
- cryptography=39.0.1=py311h9b4c7bb_0
- dnspython=2.3.0=pyhd8ed1ab_0
- greenlet=2.0.2=py311hcafe171_0
- idna=3.3=pyhd3eb1b0_0
- ld_impl_linux-64=2.39=hc81fddc_0
- libblas=3.9.0=16_linux64_openblas
- libcblas=3.9.0=16_linux64_openblas
- libffi=3.4.2=h7f98852_5
- libgcc-ng=12.2.0=h65d4601_19
- libgfortran-ng=11.2.0=h00389a5_1
- libgfortran5=11.2.0=h1234567_1
- libgomp=12.2.0=h65d4601_19
- liblapack=3.9.0=16_linux64_openblas
- libnsl=2.0.0=h7f98852_0
- libopenblas=0.3.21=h043d6bf_0
- libprotobuf=3.21.12=h3eb15da_0
- libsqlite=3.40.0=h753d276_0
- libstdcxx-ng=12.2.0=h46fd767_19
- libuuid=2.32.1=h7f98852_1000
- libzlib=1.2.13=h166bdaf_4
- lz4-c=1.9.3=h295c915_1
- mysql-common=8.0.32=ha901b37_0
- mysql-connector-python=8.0.31=py311h0cf059c_2
- mysql-libs=8.0.32=hd7da12d_0
- ncurses=6.3=h27087fc_1
- numpy=1.24.2=py311h8e6699e_0
- openssl=3.0.8=h0b41bf4_0
- pandas=1.5.3=py311h2872171_0
- protobuf=4.21.12=py311hcafe171_0
- pycparser=2.21=pyhd3eb1b0_0
- python=3.11.0=ha86cf86_0_cpython
- python-dateutil=2.8.2=pyhd3eb1b0_0
- python_abi=3.11=3_cp311
- pytz=2021.3=pyhd3eb1b0_0
- readline=8.1.2=h0f457ee_0
- six=1.16.0=pyhd3eb1b0_1
- sqlalchemy=2.0.2=py311h2582759_0
- tk=8.6.12=h27826a3_0
- typing-extensions=4.4.0=hd8ed1ab_0
- typing_extensions=4.4.0=pyha770c72_0
- tzdata=2022f=h191b570_0
- xz=5.2.6=h166bdaf_0
- zlib=1.2.13=h166bdaf_4
- zstd=1.5.2=ha4553b6_0
- pip:
- pip==22.3.1
- setuptools==65.5.1
- wheel==0.38.4
and this is my code:
import pandas as pd
import mysql.connector
database = 'db'
host = 'host'
port = '3306'
user = 'user'
password = 'pass'
con = mysql.connector.connect(user=user,
password=password,
host=host,
database=database,
ssl_disabled=True)
I get the following error:
mysql.connector.errors.OperationalError: 1043 (08S01): Bad handshake
I've tried with ssl_disabled and with it enabled.
I was specifying my ssl settings incorrectly. The following worked:
con = mysql.connector.connect(user=user,
password=password,
host=host,
database=database,
use_pure=True,
charset='latin1')
The charset parameter might not be necessary depending on the DB

Why is a flask app.route causing a class method to loop?

I have a HTML Page with multiple divs containing links. When I click on the link, flask should run the class method and then render that same page again.
#app.route("/<drinkName>/")
def action(drinkName:str):
goodDrinks = initBartender(drink_list)
for drink in goodDrinks:
drink:MenuItem
if drink.name == drinkName:
Bartender().makeDrink(drink, drink.attributes['ingredients'])
return render_template('./index.html')
def makeDrink(self, drink:MenuItem, ingredients: dict[str, int]):
self.running = True
print('Making your drink...')
maxTime = 0
pumpThreads = []
for ing in ingredients.keys():
for pump in self.pump_configuration.keys():
if ing == self.pump_configuration[pump]['value']:
waitTime = ingredients[ing] * FLOW_RATE
if waitTime > maxTime:
maxTime = waitTime
pump_t = threading.Thread(target=self.pour, args=(self.pump_configuration[pump]['pin'], waitTime))
pumpThreads.append(pump_t)
print(f"Making {drink.name}!")
for thread in pumpThreads:
thread:threading.Thread
thread.start()
thread.join()
self.running = False
I tried swapping the return render template for a redirect to the base url, I tried indenting the render_template to directly below the class method. This is my console log.
127.0.0.1 - - [28/Mar/2022 09:46:07] "GET /Screwdriver HTTP/1.1" 308 -
Making your drink...
Making Screwdriver!
Making your drink...
Making Screwdriver!
127.0.0.1 - - [28/Mar/2022 09:50:07] "GET /Screwdriver/ HTTP/1.1" 200 -
127.0.0.1 - - [28/Mar/2022 09:50:08] "GET /static/styles.css HTTP/1.1" 304 -
127.0.0.1 - - [28/Mar/2022 09:50:08] "GET /static/drinkList.json HTTP/1.1" 304 -
As you can see, it runs the class method twice and takes four minutes to render index.html. I want to make it so that it runs the class method once, then (with as little delay as possible) render index.html

flask socketio send different messages to different clients at the same time

I have a flask-socketio server and several react socket.io-client namespaces connected to it.
Clients are in the same ip and port but in different namespaces like /finalResponse, /algoSignal etc.
Client app
componentDidMount() {
// Connecting to backend api for data
const { dispatch } = this.props;
socket = io(`http://${IP.clientOrderFeedFlaskIP}:6050/algoSignal`);
dispatch(loadClientOrdersDataSocket(socket));
var interval = setInterval(()=>{
socket.emit('getdata')
console.log(socket.connected)
},2000)
this.setState({interval:interval})
socket.emit('update')
socket.on("algo_signal_data", (res) => {
console.log(JSON.parse(res))
dispatch(ClientOrdersData(res));
});
}
Here it sends up an interval to ping the server with 'getdata' to request every 2 seconds. There is an event bucket to handle incoming data in 'algo_signal_data'.
'update' bucket is to get data on refresh.
Similar to this I have around 8-9 clients in different namespaces.
Backend server
#socket.on('getdata', namespace = '/algoSignal')
def algoSignal():
# global algosig
lastOrderUpdated = json.loads(pingConn.get('lastOrderUpdated'))
if lastOrderUpdated != '0':
print('---------------------sent algo signal data ---------------------')
algosig = SymphonyOrderRaw(mongoIp).algoSignal.to_json(orient = 'records')
emit('algo_signal_data', algosig, broadcast = True)
pingConn.set('lastOrderUpdated', json.dumps('0'))
else:
emit(json.dumps(200))
#socket.on('getdata', namespace='/finalResponse')
def getfinalResponse():
# global finalres
lastOrderUpdated = json.loads(pingConn.get('lastOrderUpdated'))
if lastOrderUpdated != '0':
finalres = FinalResponse(mongoIp).finalResponse.to_json(orient = 'records')
print('--------------------sent final Response data----------------')
# print(finalres)
emit('response_data', finalres, broadcast = True)
pingConn.set('lastOrderUpdated', json.dumps('0'))
else:
emit(json.dumps(200))
I have event buckets like these who recieve the 'getdata' prompt and check if the database was updated. If yes, it sends the data else it sends status code.
The expected result is that each client page should recieve data when db is updated.
The problem is that when I open multiple client pages in seperate tabs, The server only sends the data to one page. But when the other pages is refreshed, then data is updated in those also.
Update bucket
#socket.on('update', namespace = '/algoSignal')
def update2():
algosig = SymphonyOrderRaw(mongoIp).algoSignal.to_json(orient = 'records')
emit('algo_signal_data', algosig, broadcast = True)
This is the output at the server terminal
(flask-venv) mockAdmin#python-react:~/RMS-git/FlaskServerCodes/serverFlask$ python3 main.py
Starting
* Restarting with stat
Starting
* Debugger is active!
* Debugger PIN: 113-048-226
(59454) wsgi starting up on http://10.160.0.2:6050
(59454) accepted ('103.62.42.27', 65522)
103.62.42.27 - - [31/Dec/2021 11:22:05] "GET /socket.io/?EIO=3&transport=polling&t=NuF2vTi HTTP/1.1" 200 401 0.000859
algo signal client connected
103.62.42.27 - - [31/Dec/2021 11:22:05] "POST /socket.io/?EIO=3&transport=polling&t=NuF2vVA&sid=65d328eeffb943a5a2ad80ee4216c856 HTTP/1.1" 200 219 0.000986
103.62.42.27 - - [31/Dec/2021 11:22:05] "GET /socket.io/?EIO=3&transport=polling&t=NuF2vVh&sid=65d328eeffb943a5a2ad80ee4216c856 HTTP/1.1" 200 249 0.000353
(59454) accepted ('103.62.42.27', 65523)
103.62.42.27 - - [31/Dec/2021 11:22:05] "POST /socket.io/?EIO=3&transport=polling&t=NuF2vWP&sid=65d328eeffb943a5a2ad80ee4216c856 HTTP/1.1" 200 219 0.001710
(59454) accepted ('103.62.42.27', 65524)
103.62.42.27 - - [31/Dec/2021 11:22:05] "GET /socket.io/?EIO=3&transport=polling&t=NuF2vWQ&sid=65d328eeffb943a5a2ad80ee4216c856 HTTP/1.1" 200 235 0.000171
(59454) accepted ('103.62.42.27', 65525)
103.62.42.27 - - [31/Dec/2021 11:22:05] "GET /socket.io/?EIO=3&transport=polling&t=NuF2vWZ HTTP/1.1" 200 401 0.000345
(59454) accepted ('103.62.42.27', 65526)
103.62.42.27 - - [31/Dec/2021 11:22:05] "GET /socket.io/?EIO=3&transport=polling&t=NuF2vX6&sid=65d328eeffb943a5a2ad80ee4216c856 HTTP/1.1" 200 235 0.000255
103.62.42.27 - - [31/Dec/2021 11:22:05] "POST /socket.io/?EIO=3&transport=polling&t=NuF2vX5&sid=65d328eeffb943a5a2ad80ee4216c856 HTTP/1.1" 200 219 0.005536
final response client connected
103.62.42.27 - - [31/Dec/2021 11:22:05] "POST /socket.io/?EIO=3&transport=polling&t=NuF2vXg&sid=8e278beb8e5a4f77b5b203e28af9b596 HTTP/1.1" 200 219 0.000881
103.62.42.27 - - [31/Dec/2021 11:22:05] "GET /socket.io/?EIO=3&transport=polling&t=NuF2vXl&sid=65d328eeffb943a5a2ad80ee4216c856 HTTP/1.1" 200 235 0.000180
103.62.42.27 - - [31/Dec/2021 11:22:05] "GET /socket.io/?EIO=3&transport=polling&t=NuF2vY4&sid=8e278beb8e5a4f77b5b203e28af9b596 HTTP/1.1" 200 252 0.000208
(59454) accepted ('103.62.42.27', 65527)
103.62.42.27 - - [31/Dec/2021 11:22:05] "GET /socket.io/?EIO=3&transport=polling&t=NuF2vYk&sid=8e278beb8e5a4f77b5b203e28af9b596 HTTP/1.1" 200 260 0.000265
103.62.42.27 - - [31/Dec/2021 11:22:05] "POST /socket.io/?EIO=3&transport=polling&t=NuF2vYj&sid=8e278beb8e5a4f77b5b203e28af9b596 HTTP/1.1" 200 219 0.002399
103.62.42.27 - - [31/Dec/2021 11:22:05] "GET /socket.io/?EIO=3&transport=polling&t=NuF2vZL&sid=8e278beb8e5a4f77b5b203e28af9b596 HTTP/1.1" 200 235 0.000189
103.62.42.27 - - [31/Dec/2021 11:22:05] "POST /socket.io/?EIO=3&transport=polling&t=NuF2vZM&sid=8e278beb8e5a4f77b5b203e28af9b596 HTTP/1.1" 200 219 0.005291
-----------------------------sent final Response data-----------------------
How can I send data to all the open client pages at the same time from a single server?

Cannot read channel message from slack in Python using slack SocketModeHandler

Below is the python code for reading and responding to message from slack channel to python. I wrote this script by using their tutorials and ended up here with the problem. Also I am unable to send message to slack using client.chat_postMessage(channel="XXXXXXXXXXX", text=msg, user="XXXXXXXXXXX")
I don't know why but when I write command "/hi" in channel, the python reads the event and prints data but if I try any keyword like check and knock knock, the python doesn't responds to this,
import os
# Use the package we installed
from slack_bolt import App
from slack_bolt.adapter.socket_mode import SocketModeHandler
from os.path import join, dirname
import time
import re
from datetime import datetime
dotenv_path = join(dirname(__file__), '.env')
load_dotenv(dotenv_path)
# Initializes your app with your bot token and signing secret
app = App(
token=os.environ['SLACK_BOT_TOKEN'],
signing_secret=os.environ['SIGNING_SECRET']
)
# Add functionality here
#app.message("check")
def say_hello(message, client, body, logger):
print(message)
print(client)
print(body)
msg = "Hi there from Python"
try:
client.chat_postMessage(channel="XXXXXXXXXXX", text=msg, user="XXXXXXXXXXX")
except Exception as e:
logger.exception(f"Failed to post a message {e}")
print(e)
#app.message("knock knock")
def ask_who(message, say):
say("_Who's there?_")
#app.event("message")
def handle_message_events(body, logger):
logger.info(body)
print("messaging", body)
#app.command("/hi")
def handle_some_command(ack, body, logger):
ack()
logger.info(body)
print(body)
# Start your app
if __name__ == "__main__":
SocketModeHandler(app, os.environ["SLACK_APP_TOKEN"]).start()
Here is the manifest of my app from slackbolt
_metadata:
major_version: 1
minor_version: 1
display_information:
name: Hotline App
features:
app_home:
home_tab_enabled: true
messages_tab_enabled: true
messages_tab_read_only_enabled: false
bot_user:
display_name: Hotline Bot
always_online: false
slash_commands:
- command: /hi
description: greets user
should_escape: false
oauth_config:
scopes:
user:
- chat:write
- channels:read
- im:history
- channels:history
- groups:history
bot:
- incoming-webhook
- calls:read
- calls:write
- app_mentions:read
- channels:history
- channels:join
- channels:manage
- channels:read
- chat:write
- chat:write.customize
- chat:write.public
- commands
- dnd:read
- emoji:read
- files:read
- files:write
- groups:history
- groups:read
- groups:write
- im:history
- im:read
- im:write
- links:read
- links:write
- mpim:history
- mpim:read
- mpim:write
- pins:read
- pins:write
- reactions:read
- reactions:write
- reminders:read
- reminders:write
- remote_files:read
- remote_files:share
- remote_files:write
- team:read
- usergroups:write
- usergroups:read
- users.profile:read
- users:read
- users:read.email
- users:write
- workflow.steps:execute
settings:
event_subscriptions:
user_events:
- channel_archive
- channel_created
- channel_deleted
- channel_rename
- message.channels
- message.groups
- message.im
bot_events:
- app_mention
- channel_archive
- channel_created
- channel_deleted
- channel_history_changed
- channel_id_changed
- channel_left
- channel_rename
- channel_shared
- channel_unarchive
- channel_unshared
- dnd_updated_user
- email_domain_changed
- emoji_changed
- file_change
- file_created
- file_deleted
- file_public
- file_shared
- file_unshared
- group_archive
- group_deleted
- group_history_changed
- group_left
- group_rename
- group_unarchive
- im_history_changed
- link_shared
- member_joined_channel
- member_left_channel
- message.channels
- message.groups
- message.im
- message.mpim
- pin_added
- pin_removed
- reaction_added
- reaction_removed
- subteam_created
- subteam_members_changed
- subteam_updated
- team_domain_change
- team_join
- team_rename
- user_change
interactivity:
is_enabled: true
org_deploy_enabled: false
socket_mode_enabled: true
Any help to this problem from experts may reduce my headache and workload, Thanks in advance!
Kind regards,
Gohar
The bot must be a member of the channel where the message is being sent — please make sure to invite the bot into that channel and it should begin receiving those message events.
Also, this is somewhat incidental to your question, but as a security precaution, please only request the scopes necessary for your bot to function. You risk creating a token with far too permissive a number of scopes. You likely don't need user scopes for this app. The same holds true for events — consider only subscribing to events your app actually requires.

How to find how many times a particular ip is pinged to the url?

I have a python script which extracts unique IP addresses from a log file and displays their count of how many times those IPs are pinged the code is as follows.
import sys
def extract_ip(line):
return line.split()[0]
def increase_count(ip_dict, ip_addr):
if ip_addr in ip_dict:
ip_dict[ip_addr] += 1
else:
ip_dict[ip_addr] = 1
def read_ips(infilename):
res_dict = {}
log_file = file(infilename)
for line in log_file:
if line.isspace():
continue
ip_addr = extract_ip(line)
increase_count(res_dict, ip_addr)
return res_dict
def write_ips(outfilename, ip_dict):
out_file = file(outfilename, "w")
for ip_addr, count in ip_dict.iteritems():
out_file.write("%5d\t%s\n" % (count, ip_addr))
out_file.close()
def parse_cmd_line_args():
if len(sys.argv)!=3:
print("Usage: %s [infilename] [outfilename]" % sys.argv[0])
sys.exit(1)
return sys.argv[1], sys.argv[2]
def main():
infilename, outfilename = parse_cmd_line_args()
ip_dict = read_ips(infilename)
write_ips(outfilename, ip_dict)
if __name__ == "__main__":
main()
The log file is in the following format with 2L lines. These are the first 30 lines of the log file
220.227.40.118 - - [06/Mar/2012:00:00:00 -0800] "GET /mysidebars/newtab.html HTTP/1.1" 404 0 - -
220.227.40.118 - - [06/Mar/2012:00:00:00 -0800] "GET /hrefadd.xml HTTP/1.1" 204 214 - -
59.95.13.217 - - [06/Mar/2012:00:00:00 -0800] "GET /dbupdates2.xml HTTP/1.1" 404 0 - -
111.92.9.222 - - [06/Mar/2012:00:00:00 -0800] "GET /mysidebars/newtab.html HTTP/1.1" 404 0 - -
120.56.236.46 - - [06/Mar/2012:00:00:00 -0800] "GET /hrefadd.xml HTTP/1.1" 204 214 - -
49.138.106.21 - - [06/Mar/2012:00:00:00 -0800] "GET /add.txt HTTP/1.1" 204 214 - -
117.195.185.130 - - [06/Mar/2012:00:00:00 -0800] "GET /mysidebars/newtab.html HTTP/1.1" 404 0 - -
122.160.166.220 - - [06/Mar/2012:00:00:00 -0800] "GET /mysidebars/newtab.html HTTP/1.1" 404 0 - -
117.214.20.28 - - [06/Mar/2012:00:00:00 -0800] "GET /welcome.html HTTP/1.1" 204 212 - -
117.18.231.5 - - [06/Mar/2012:00:00:00 -0800] "GET /mysidebars/newtab.html HTTP/1.1" 404 0 - -
117.18.231.5 - - [06/Mar/2012:00:00:00 -0800] "GET /mysidebars/newtab.html HTTP/1.1" 404 0 - -
122.169.136.211 - - [06/Mar/2012:00:00:00 -0800] "GET /mysidebars/newtab.html HTTP/1.1" 404 0 - -
203.217.145.10 - - [06/Mar/2012:00:00:00 -0800] "GET /mysidebars/newtab.html HTTP/1.1" 404 0 - -
117.18.231.5 - - [06/Mar/2012:00:00:00 -0800] "GET /hrefadd.xml HTTP/1.1" 204 214 - -
59.95.13.217 - - [06/Mar/2012:00:00:00 -0800] "GET /dbupdates2.xml HTTP/1.1" 404 0 - -
203.217.145.10 - - [06/Mar/2012:00:00:00 -0800] "GET /mysidebars/newtab.html HTTP/1.1" 404 0 - -
117.206.70.4 - - [06/Mar/2012:00:00:00 -0800] "GET /mysidebars/newtab.html HTTP/1.1" 404 0 - -
117.214.20.28 - - [06/Mar/2012:00:00:00 -0800] "GET /css/epic.css HTTP/1.1" 204 214 "http://www.epicbrowser.com/welcome.html" -
117.206.70.4 - - [06/Mar/2012:00:00:00 -0800] "GET /add.txt HTTP/1.1" 204 214 - -
117.206.70.4 - - [06/Mar/2012:00:00:00 -0800] "GET /hrefadd.xml HTTP/1.1" 204 214 - -
118.97.38.130 - - [06/Mar/2012:00:00:00 -0800] "GET /mysidebars/newtab.html HTTP/1.1" 404 0 - -
117.214.20.28 - - [06/Mar/2012:00:00:00 -0800] "GET /js/flash_detect_min.js HTTP/1.1" 304 0 "http://www.epicbrowser.com/welcome.html" -
117.214.20.28 - - [06/Mar/2012:00:00:00 -0800] "GET /images/home-page-bottom.jpg HTTP/1.1" 304 0 "http://www.epicbrowser.com/welcome.html" -
117.214.20.28 - - [06/Mar/2012:00:00:00 -0800] "GET /images/Facebook_Like.png HTTP/1.1" 204 214 "http://www.epicbrowser.com/welcome.html" -
117.214.20.28 - - [06/Mar/2012:00:00:00 -0800] "GET /images/Twitter_Follow.png HTTP/1.1" 204 214 "http://www.epicbrowser.com/welcome.html" -
117.214.20.28 - - [06/Mar/2012:00:00:00 -0800] "GET /images/home-page-top.jpg HTTP/1.1" 304 0 "http://www.epicbrowser.com/welcome.html" -
49.138.106.21 - - [06/Mar/2012:00:00:01 -0800] "GET /dbupdates2.xml HTTP/1.1" 404 0 - -
117.18.231.5 - - [06/Mar/2012:00:00:01 -0800] "GET /mysidebars/newtab.html HTTP/1.1" 404 0 - -
117.18.231.5 - - [06/Mar/2012:00:00:01 -0800] "GET /hrefadd.xml HTTP/1.1" 204 214 - -
120.61.182.186 - - [06/Mar/2012:00:00:01 -0800] "GET /mysidebars/newtab.html HTTP/1.1" 404 0 - -
the output of the file is in the following format
Number of Times IPS
158 111.92.9.222
11 58.97.187.231
30 212.57.209.41
5 119.235.51.66
3 122.168.134.106
5 180.234.220.75
13 115.252.223.243
Here the ip 111.92.9.222 - - [06/Mar/2012:00:00:00 -0800] "GET /mysidebars/newtab.html HTTP/1.1" 404 0 - - is pinged into epic 158 times totally.
Now i want to add a functionality to the code so that if i pass a particular URL, it should return how many times the URL was accessed by which IP addresses(IP address either from log file or from output file).
E.g. if I pass the url as input: http://www.epicbrowser.com/hrefadd.xml
the output should be in the following format
10.10.128.134 4
10.134.222.232 6
I assume your requirement that you want only IPs of one given URL is true. In this case you just have to add an additional filter to the program which filters out the unwanted lines. The structure of the program can be unchanged.
Because the log files do not know anything about hosts, you have to specify only the path part of the URL as the third parameter; example: "/hrefadd.xml"
#!/usr/bin/env python
#
# Counts the IP addresses of a log file.
#
# Assumption: the IP address is logged in the first column.
# Example line: 117.195.185.130 - - [06/Mar/2012:00:00:00 -0800] \
# "GET /mysidebars/newtab.html HTTP/1.1" 404 0 - -
#
import sys
def urlcheck(line, url):
'''Checks if the url is part of the log line.'''
lsplit = line.split()
if len(lsplit)<7:
return False
return url==lsplit[6]
def extract_ip(line):
'''Extracts the IP address from the line.
Currently it is assumed, that the IP address is logged in
the first column and the columns are space separated.'''
return line.split()[0]
def increase_count(ip_dict, ip_addr):
'''Increases the count of the IP address.
If an IP address is not in the given dictionary,
it is initially created and the count is set to 1.'''
if ip_addr in ip_dict:
ip_dict[ip_addr] += 1
else:
ip_dict[ip_addr] = 1
def read_ips(infilename, url):
'''Read the IP addresses from the file and store (count)
them in a dictionary - returns the dictionary.'''
res_dict = {}
log_file = file(infilename)
for line in log_file:
if line.isspace():
continue
if not urlcheck(line, url):
continue
ip_addr = extract_ip(line)
increase_count(res_dict, ip_addr)
return res_dict
def write_ips(outfilename, ip_dict):
'''Write out the count and the IP addresses.'''
out_file = file(outfilename, "w")
for ip_addr, count in ip_dict.iteritems():
out_file.write("%s\t%5d\n" % (ip_addr, count))
out_file.close()
def parse_cmd_line_args():
'''Return the in and out file name.
If there are more or less than two parameters,
an error is logged in the program is exited.'''
if len(sys.argv)!=4:
print("Usage: %s [infilename] [outfilename] [url]" % sys.argv[0])
sys.exit(1)
return sys.argv[1], sys.argv[2], sys.argv[3]
def main():
infilename, outfilename, url = parse_cmd_line_args()
ip_dict = read_ips(infilename, url)
write_ips(outfilename, ip_dict)
if __name__ == "__main__":
main()
IMHO it would be helpful if also the original post was referenced.
IMHO you should leave the comments in place.
Instead of using a database (which might be a better solution in the long run) you can use a dictionary of dictionaries.
urls = {}
def increase_path_count(dict, path, ip_addr):
if path not in dict:
dict[path] = {}
increase_count(dict[path], ip_addr)
Edit
You have to parse the actual contents of the logfile to get the path. This can be done with the regular expression module. A good regular expression to start with might be this:
'GET (?P<path>/[\w.]+)'
Since you only have the paths in the logfile, you need to extract the path from the URL in the command line argument. This can be done with the urlparse module.
Edit 2
import re
# ....
def read_ips_and_paths(infilename, url):
'''Read the IP addresses and paths from the file and store (count)
them in a dictionary - returns the dictionary.'''
res_dict = {}
log_file = file(infilename)
for line in log_file:
if line.isspace():
continue
# Get the ip address for the log entry
ip_addr = extract_ip(line)
# Get the path from the log entry
match = re.search('GET (?P<path>/[\w.]+)', line);
path = match.group('path')
increase_path_count(res_dict, path, ip_addr)
return res_dict
Now when you want to get all IP addresses and counts for a specific path, you use urlparse to get the path part of the URL supplied from the command line:
from urlparse import urlparse
# ....
url_path = urlparse(complete_url).path
Not you use the path to print the requested data:
for i in url_dict[url_path].items():
print "ip address: %r - %d" % (i[0], i[1])
Your problem cries out for the use of a relational database.
Using a database will let you construct queries like "how many hits did I get from each URL?" as SQL queries like SELECT ip, COUNT(ip) as hits FROM requests GROUP BY ip. The database will then take care of looping through the data and counting things.
Complete solution using an in-memory SQLite database given below. I have tested this and it works. 'logfile.txt' should be a file of precisely the format you gave in your example above.
Edit: Revised to work with imprecisely specified data format - the only requirements now are that each row must consist of at least seven whitespace-separated fields, of which the first field must be an IP in dotted quad format, and the seventh field must be a path starting with '/'.
(Note the use of defensive programming techniques - check that the data you're getting looks the way you expect it to look, and raise an error if the data is malformed. This prevents the bad data from causing your entire program to blow up later.)
import os, sqlite3, re
fh = open('logfile.txt','r')
db = sqlite3.connect(':memory:') #create temporary SQLite database in memory
db.execute("""
CREATE TABLE requests (
ip TEXT,
url TEXT
)
""")
for line in fh:
line_split = line.split()
if len(line_split) < 7:
raise ValueError ("Not enough fields - need at least seven.")
ip = line_split[0]
url = line_split[6]
# Check that the 'ip' variable really contains four sets of number separated by dots.
if (re.match(r'\d+\.\d+\.\d+\.\d+', ip) == None):
errmsg = "The value %s found in the first column was not an IP address." % ip
raise ValueError (errmsg)
# check that the 'url' variable contains a string starting with /
if (url.startswith("/") == False):
errmsg = "The value %s found in the 7th column was not a path beginning with /" % url
raise ValueError ( errmsg )
#if len(line_split) != 12:
# print (line_split)
# raise ValueError("Malformatted line - must have 10 fields")
db.execute("INSERT INTO requests VALUES (?,?)",(ip,url) )
db.commit() #save data
# print what's in the database
print("\nData in the database\n")
results = db.execute("SELECT * FROM requests")
for row in results:
print row
# Count hits from each IP
print ("\nNumber of hits from each IP\n")
results = db.execute("""
SELECT ip, COUNT(ip) AS hits
FROM requests
GROUP BY ip""")
for row in results:
print(row)
# Count hits from each IP for the particular URL '/mysidebars/newtab.html'
print("\nNumber of hits from each IP for url %s" % url)
target_url = '/mysidebars/newtab.html'
results = db.execute("""
SELECT ip, COUNT(ip) AS hits
FROM requests
WHERE url=?
GROUP BY ip
""", [target_url])
for row in results:
print(row)
The output is:
Data in the database
(u'220.227.40.118', u'/mysidebars/newtab.html')
(u'220.227.40.118', u'/hrefadd.xml')
(u'59.95.13.217', u'/dbupdates2.xml')
(u'111.92.9.222', u'/mysidebars/newtab.html')
(u'120.56.236.46', u'/hrefadd.xml')
(u'49.138.106.21', u'/add.txt')
(u'117.195.185.130', u'/mysidebars/newtab.html')
(u'122.160.166.220', u'/mysidebars/newtab.html')
(u'117.214.20.28', u'/welcome.html')
(u'117.18.231.5', u'/mysidebars/newtab.html')
(u'117.18.231.5', u'/mysidebars/newtab.html')
(u'122.169.136.211', u'/mysidebars/newtab.html')
(u'203.217.145.10', u'/mysidebars/newtab.html')
(u'117.18.231.5', u'/hrefadd.xml')
(u'59.95.13.217', u'/dbupdates2.xml')
(u'203.217.145.10', u'/mysidebars/newtab.html')
(u'117.206.70.4', u'/mysidebars/newtab.html')
(u'117.214.20.28', u'/css/epic.css')
(u'117.206.70.4', u'/add.txt')
(u'117.206.70.4', u'/hrefadd.xml')
(u'118.97.38.130', u'/mysidebars/newtab.html')
(u'117.214.20.28', u'/js/flash_detect_min.js')
(u'117.214.20.28', u'/images/home-page-bottom.jpg')
(u'117.214.20.28', u'/images/Facebook_Like.png')
(u'117.214.20.28', u'/images/Twitter_Follow.png')
(u'117.214.20.28', u'/images/home-page-top.jpg')
(u'49.138.106.21', u'/dbupdates2.xml')
(u'117.18.231.5', u'/mysidebars/newtab.html')
(u'117.18.231.5', u'/hrefadd.xml')
(u'120.61.182.186', u'/mysidebars/newtab.html')
Number of hits from each IP
(u'111.92.9.222', 1)
(u'117.18.231.5', 5)
(u'117.195.185.130', 1)
(u'117.206.70.4', 3)
(u'117.214.20.28', 7)
(u'118.97.38.130', 1)
(u'120.56.236.46', 1)
(u'120.61.182.186', 1)
(u'122.160.166.220', 1)
(u'122.169.136.211', 1)
(u'203.217.145.10', 2)
(u'220.227.40.118', 2)
(u'49.138.106.21', 2)
(u'59.95.13.217', 2)
Number of hits from each IP for url /mysidebars/newtab.html
(u'111.92.9.222', 1)
(u'117.18.231.5', 3)
(u'117.195.185.130', 1)
(u'117.206.70.4', 1)
(u'118.97.38.130', 1)
(u'120.61.182.186', 1)
(u'122.160.166.220', 1)
(u'122.169.136.211', 1)
(u'203.217.145.10', 2)
(u'220.227.40.118', 1)
Sidenote: Your existing code is not a good solution to your problem (SQL is a much better way of dealing with "tabular" data.) But if you ever need to count occurrences of a repeated value for another purpose, collections.Counter from the standard library is easier to use and faster than your increase_count() function.

Categories