How to build the simplest simulation in Python using Simpy? - python

I have been trying to make this simulation using Simpy, but I just can't figure out how it works. If you have any tips on how to learn it from example code (starting at the bottom and going up through functions, or the other way around?), or any good sources that would already be of great help.
What I want to simulate:
A bike rental service with S rental stations and T bikes at t=0.
Customers arrivals and rental times are exponentially distributed. When a bike is rented, there is a given probability to go to any of the rental stations. For example, with S=2, the probabilities are [[0.9,0.1],[0.5,0.5]].
I tried to do it without simpy, but I don't know how to manage the number of bikes at the stations and manage arrivals while rentals are happening.
Any help is more than welcome as I am starting to get kind of desperate.
Thank you!

Here is one way to do it
"""
Simple simulation of several bike rental stations
Stations are modeled with containers so bikes can be returned
to a station different from where it was rented from
programer: Michael R. Gibbs
"""
import simpy
import random
# scenario attributes
station_names = ['A','B']
rent_probs = [.9,.1]
return_probs = [.5,.5]
bikes_per_station = 5
def rent_proc(env, id, station_names, rent_probs, return_probs, station_map):
"""
Models the process of:
selecting a station
renting a bike
using a bike
returning a bike (can be different station)
"""
#select a station
name = random.choices(station_names,weights=rent_probs)
name = name[0]
station = station_map[name]
print(f'{env.now}: id:{id} has arrived at station {name} q-len:{len(station.get_queue)} and {station.level} bikes')
# get a bike
yield station.get(1)
print(f'{env.now}: id:{id} has rented bike at station {name} q-len:{len(station.get_queue)} and {station.level} bikes')
# use bike
yield env.timeout(random.triangular(1,5,3))
# return bike
name = random.choices(station_names,weights=return_probs)
name = name[0]
station = station_map[name]
yield station.put(1)
print(f'{env.now}: id:{id} has returned bike at station {name} q-len:{len(station.get_queue)} and {station.level} bikes')
def gen_arrivals(env, station_names, rent_probs, return_probs, station_map):
"""
Generates arrivales to the rental stations
"""
cnt = 0
while True:
yield env.timeout(random.expovariate(2.5))
cnt += 1
env.process(rent_proc(env,cnt,station_names,rent_probs,return_probs, station_map))
# set up
env = simpy.Environment()
# create station based on name list
cap = len(station_names) * bikes_per_station
station_map = {
name: simpy.Container(env, init=bikes_per_station, capacity=cap)
for name in station_names
}
# start generation arrivals
env.process(gen_arrivals(env, station_names, rent_probs, return_probs, station_map))
# start sim
env.run(100)

Related

Problem generating a list with a numeric qualifier

I am working on a course with low code requirements, and have one step where I am stuck.
I have this code that creates a list of restaurants and the number of reviews each has:
Filter the rated restaurants
df_rated = df[df['rating'] != 'Not given'].copy()
df_rated['rating'] = df_rated['rating'].astype('int')
df_rating_count = df_rated.groupby(['restaurant_name'])['rating'].count().sort_values(ascending = False).reset_index()
df_rating_count.head()
From there I am supposed to create a list limited to those above 50 reviews, starting from this base:
# Get the restaurant names that have rating count more than 50
rest_names = df_rating_count['______________']['restaurant_name']
# Filter to get the data of restaurants that have rating count more than 50
df_mean_4 = df_rated[df_rated['restaurant_name'].isin(rest_names)].copy()
# Group the restaurant names with their ratings and find the mean rating of each restaurant
df_mean_4.groupby(['_______'])['_______'].mean().sort_values(ascending = False).reset_index().dropna() ## Complete the code to find the mean rating
Where I am stuck is on the first step.
rest_names = df_rating_count['______________']['restaurant_name']
I am pretty confident in the other 2 steps.
df_mean_4 = df_rated[df_rated['restaurant_name'].isin(rest_names)].copy()
df_mean_4.groupby(['restaurant_name'])['rating'].mean().sort_values(ascending = False).reset_index().dropna()
I have frankly tried so many different things I don't even know where to start.
Does anyone have any hints to at least point me in the right direction?
you can index and filter using [].
# Get the restaurant names that have rating count more than 50
rest_names = df_rating_count[df_rating_count['rating'] > 50]['restaurant_name']
#function to determine the revenue
def compute_rev(x):
if x > 20:
return x*0.25
elif x > 5:
return x*0.15
else:
return x*0
## Write the appropriate column name to compute the revenue
df['Revenue'] = df['________'].apply(compute_rev)
df.head()

Is there a way to filter between two data rows in python?

I am new to python and I came across a problem that I have in my code. In this code below I am trying to print out and list of flight numbers, destination, departure, and arrival times but I'm also trying to filter between two airport codes: origin airports and destination airports.
I ran into a problem of not knowing how to filter between the two codes. Is there a way to do this in python if there is how do I do that?
def flights_from():
# list of flights for the flights.
# list of airport nodes
# list of flight numbers, destination, departure and arrival times
flights = load_flights('flights_01_01.csv')
for flight in flights:
print(
flight.getOriginairport(),
flight.getDestinationairport(),
flight.getFlightnumber(),
flight.getFlightnumber(),
flight.getScheduleddeparture(),
flight.getScheduledarrival()
)
# here is where I am stuck. Trying to filter between two airport codes.
flights_from()
for example, the output for the for loop is
origin airport, destination airports, flight numbers, destination, departure and arrival times
SFO, CLT, 840, 840, 20, 806
I'm also trying to filter between two airport codes: origin airports and destination airports.
If you just want to filter the list based on matching origin and destination, you could do so like this:
def filter_flights(origin, destination):
flights = load_flights('flights_01_01.csv')
matching_flights = []
for flight in flights:
if (
flight.getOriginairport() == origin
and flight.getDestinationairport() == destination
):
matching_flights.append(flight)
return matching_flights
You could also do this with a list comprehension:
def filter_flights(origin, destination):
return [
flight
for flight in load_flights('flights_01_01.csv')
if flight.getOriginairport() == origin
and flight.getDestinationairport() == destination
]
You may want to try this.
I am not very sure of it myself because I don't know how does your file looks like.
import csv
with open('flights_01_01.csv', newline='') as flight_list:
flights = csv.DictReader(flight_list)
for flight in flights:
print(flight['OriginAirport'], flight['DestinationAirport'])

Linear Programming question on Fantasy premier league fixtures List

My objective is:
to find which teams have the easiest game for each game week
to select 2 teams max in the solution (by permuting score from those 2 teams for each game week)
my data set looks like this:
event (1) being the first week game, event (2) the second week game etc...
I am currently able to select the best game for each fixture using:
for event_id in np.unique(events):
model += sum(decisions[i] for i in range(event) if events[i]==event_id) == 1 # pick one fixture for each game week
Bu i do not know how to build a constraint asking the model to pick only 2 teams for the season and managing the permutation. I have tried a few things with no success.
my LP function is as follow:
def fixtures_analyser(team, events, expected_scores):
event = len(events)
objFunction=pulp.LpMaximize
model = pulp.LpProblem("Constrained value maximisation", objFunction)
decisions = [
pulp.LpVariable("x{}".format(i), lowBound=0, upBound=1, cat='Integer')
for i in range(event)
]
# objective function:
model += sum((decisions[i]) * (float(expected_scores[i]))
for i in range(event)), "Objective"
# event constraint
for event_id in np.unique(events):
model += sum(decisions[i] for i in range(event) if events[i]==event_id) == 1 # total cost
model.solve()
print("Total expected score = {}".format(model.objective.value()))
return decisions
Output is currently like this:
Expected outcome would be to see only 2 teams e.g. Liverpool and Man city and not chelsea etc..

How do I add values in a column with IF statements in Pandas?

I have the following data:
Positions, Department, Salaries
Teacher, 600101, $1000
Janitor, 600230, $500
Principle, 600154, $1500
Secretary, 600342, $750
Manager, 600411, $2000
Teacher, 600105, $1200
Secretary, 600311, $1000
Teacher, 600300, $4000
I am trying to add together all of the salaries for the departments starting with 6001xx, and then also add all the salaries for departments starting 6002xx - 6004xx.
I have this and it kind of works....
def get_salaries(desc)
lineAmount = df.loc[(df['Position'] == desc) & (df['Department'] < 600200) & (df['Department'] >= 600100)]['Salaries'].values[0]
return lineAmount
totalSalaries = 0
totalSalaries += get_salaries('Teacher")
print(totalSalaries)
This will return only return the first salary. If i take off the .values[0] then it will return the salaries for Teachers in the correct department but it returns all of them in one output with the index as well, and it will not add them together. I need to be able to add the $1,000 and $1,200 from the teachers salaries from departments starting with 6001xx, and I just cannot figure it out. Any help is appreciated.
You can define a new column as department_id for each of 60001xx, 6002xx and then use groupby:
df['department_id']=np.floor(df['Department']/100)
df.groupby(['department_id'])['Salaries'].sum()

Natural language processing - extracting data

I need help with processing unstructured data of day-trading/swing-trading/investment recommendations. I've the unstructured data in the form of CSV.
Following are 3 sample paragraphs from which data needs to be extracted:
Chandan Taparia of Anand Rathi has a buy call on Coal India Ltd. with
an intra-day target price of Rs 338 . The current market
price of Coal India Ltd. is 325.15 . Chandan Taparia recommended to
keep stop loss at Rs 318 .
Kotak Securities Limited has a buy call on Engineers India Ltd. with a
target price of Rs 335 .The current market price of Engineers India Ltd. is Rs 266.05 The analyst gave a year for Engineers
India Ltd. price to reach the defined target. Engineers India enjoys a
healthy market share in the Hydrocarbon consultancy segment. It enjoys
a prolific relationship with few of the major oil & gas companies like
HPCL, BPCL, ONGC and IOC. The company is well poised to benefit from a
recovery in the infrastructure spending in the hydrocarbon sector.
Independent analyst Kunal Bothra has a sell call on Ceat Ltd. with a
target price of Rs 1150 .The current market price of Ceat Ltd. is Rs 1199.6 The time period given by the analyst is 1-3 days
when Ceat Ltd. price can reach the defined target. Kunal Bothra
maintained stop loss at Rs 1240.
Its been a challenge extracting 4 information out of the paragraphs:
each recommendation is differently framed but essentially has
Target Price
Stop Loss Price
Current Price.
Duration
and not necessarily all the information will be available in all the recommendations - every recommendation will atleast have Target Price.
I was trying to use regular expressions, but not very successful, can anyone guide me how to extract this information may be using nltk?
Code I've so far in cleaning the data:
import pandas as pd
import re
#etanalysis_final.csv has 4 columns with
#0th Column having data time
#1st Column having a simple hint like 'Sell Ceat Ltd. target Rs 1150 : Kunal Bothra,Sell Ceat Ltd. at a price target of Rs 1150 and a stoploss at Rs 1240 from entry point', not all the hints are same, I can rely on it for recommender, Buy or Sell, which stock.
#4th column has the detailed recommendation given.
df = pd.read_csv('etanalysis_final.csv',encoding='ISO-8859-1')
df.DATE = pd.to_datetime(df.DATE)
df.dropna(inplace=True)
df['RECBY'] = df['C1'].apply(lambda x: re.split(':|\x96',x)[-1].strip())
df['ACT'] = df['C1'].apply(lambda x: x.split()[0].strip())
df['STK'] = df['C1'].apply(lambda x: re.split('\.|\,|:| target| has| and|Buy|Sell| with',x)[1])
#Getting the target price - not always correct
df['TGT'] = df['C4'].apply(lambda x: re.findall('\d+.', x)[0])
#Getting the stop loss price - not always correct
df['STL'] = df['C4'].apply(lambda x: re.findall('\d+.\d+', x)[-1])
This is a hard question in that there are different possibilities in which each of the 4 pieces of information might be written. Here is a naive approach that might work, albeit would require verification. I'll do the example for the target but you can extend this to any:
CONTEXT = 6
def is_float(x):
try:
float(x)
return True
except ValueError:
return False
def get_target_price(s):
words = s.split()
n = words.index('target')
words_in_range = words[n-CONTEXT:n+CONTEXT]
return float(list(filter(is_float, words_in_range))[0]) # returns any instance of a float
This is a simple approach to get you started but you can put extra checks to make this safer. Things to potentially improve:
Make sure that the the index before the one where the proposed float is found is Rs.
If no float is found in the context range, expand the context
Add user verification if there are ambiguities i.e. more than one instance of target or more than one float in the context range etc.
I got the solution :
Code here contains only solution part of the question asked. It shall be possible to greatly improve this solution using fuzzywuzzy library.
from nltk import word_tokenize
periods = ['year',"year's", 'day','days',"day's", 'month', "month's", 'week',"week's", 'intra-day', 'intraday']
stop = ['target', 'current', 'stop', 'period', 'stoploss']
def extractinfo(row):
if 'intra day' in row.lower():
row = row.lower().replace('intra day', 'intra-day')
tks = [ w for w in word_tokenize(row) if any([w.lower() in stop, isfloat(w)])]
tgt = ''
crt = ''
stp = ''
prd = ''
if 'target' in tks:
if len(tks[tks.index('target'):tks.index('target')+2]) == 2:
tgt = tks[tks.index('target'):tks.index('target')+2][-1]
if 'current' in tks:
if len(tks[tks.index('current'):tks.index('current')+2]) == 2:
crt = tks[tks.index('current'):tks.index('current')+2][-1]
if 'stop' in tks:
if len(tks[tks.index('stop'):tks.index('stop')+2]) == 2:
stp = tks[tks.index('stop'):tks.index('stop')+2][-1]
prdd = set(periods).intersection(tks)
if 'period' in tks:
pdd = tks[tks.index('period'):tks.index('period')+3]
prr = set(periods).intersection(pdd)
if len(prr) > 0:
if len(pdd) > 2:
prd = ' '.join(pdd[-2::1])
elif len(pdd) == 2:
prd = pdd[-1]
elif len(prdd) > 0:
prd = list(prdd)[0]
return (crt, tgt, stp, prd)
Solution is relatively self explanatory - otheriwse please let me know.

Categories