Im trying to solve a 2 step problem, where in the first one I run an assignment model which calculates the best option that optimize the pick up and deliveries arcs between nodes because not all the vehicles can transport the same products and other complications the problem has. The result of the first model are the arcs that serves as an input in the second VRP model as data['pickups_deliveries']. The next code is an easy example where the code works but a node cant be a delivery and also a pickup node at the same time. Which is what i need to solve.
"""Capacited Vehicles Routing Problem (CVRP)."""
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp
def create_data_model():
"""Stores the data for the problem."""
data = {}
data['distance_matrix'] = [
[
0, 548, 776, 696, 582, 274, 502, 194, 308, 194, 536, 502, 388, 354,
468, 776, 662
],
[
548, 0, 684, 308, 194, 502, 730, 354, 696, 742, 1084, 594, 480, 674,
1016, 868, 1210
],
[
776, 684, 0, 992, 878, 502, 274, 810, 468, 742, 400, 1278, 1164,
1130, 788, 1552, 754
],
[
696, 308, 992, 0, 114, 650, 878, 502, 844, 890, 1232, 514, 628, 822,
1164, 560, 1358
],
[
582, 194, 878, 114, 0, 536, 764, 388, 730, 776, 1118, 400, 514, 708,
1050, 674, 1244
],
[
274, 502, 502, 650, 536, 0, 228, 308, 194, 240, 582, 776, 662, 628,
514, 1050, 708
],
[
502, 730, 274, 878, 764, 228, 0, 536, 194, 468, 354, 1004, 890, 856,
514, 1278, 480
],
[
194, 354, 810, 502, 388, 308, 536, 0, 342, 388, 730, 468, 354, 320,
662, 742, 856
],
[
308, 696, 468, 844, 730, 194, 194, 342, 0, 274, 388, 810, 696, 662,
320, 1084, 514
],
[
194, 742, 742, 890, 776, 240, 468, 388, 274, 0, 342, 536, 422, 388,
274, 810, 468
],
[
536, 1084, 400, 1232, 1118, 582, 354, 730, 388, 342, 0, 878, 764,
730, 388, 1152, 354
],
[
502, 594, 1278, 514, 400, 776, 1004, 468, 810, 536, 878, 0, 114,
308, 650, 274, 844
],
[
388, 480, 1164, 628, 514, 662, 890, 354, 696, 422, 764, 114, 0, 194,
536, 388, 730
],
[
354, 674, 1130, 822, 708, 628, 856, 320, 662, 388, 730, 308, 194, 0,
342, 422, 536
],
[
468, 1016, 788, 1164, 1050, 514, 514, 662, 320, 274, 388, 650, 536,
342, 0, 764, 194
],
[
776, 868, 1552, 560, 674, 1050, 1278, 742, 1084, 810, 1152, 274,
388, 422, 764, 0, 798
],
[
662, 1210, 754, 1358, 1244, 708, 480, 856, 514, 468, 354, 844, 730,
536, 194, 798, 0
],
]
data['pickups_deliveries'] = [
[1, 6],
[2, 10],
[4, 3],
[5, 9],
[7, 8],
[15, 11],
[13, 12],
[16, 14]
]
data['demands'] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
data['vehicle_capacities'] = [1, 1, 1, 1, 1, 1, 1, 1, 1]
data['num_vehicles'] = 9
data['depot'] = 0
return data
def print_solution(data, manager, routing, solution):
"""Prints solution on console."""
print(f'Objective: {solution.ObjectiveValue()}')
total_distance = 0
total_load = 0
for vehicle_id in range(data['num_vehicles']):
index = routing.Start(vehicle_id)
plan_output = 'Route for vehicle {}:\n'.format(vehicle_id)
route_distance = 0
route_load = 0
while not routing.IsEnd(index):
node_index = manager.IndexToNode(index)
route_load += data['demands'][node_index]
plan_output += ' {0} Load({1}) -> '.format(node_index, route_load)
previous_index = index
index = solution.Value(routing.NextVar(index))
route_distance += routing.GetArcCostForVehicle(
previous_index, index, vehicle_id)
plan_output += ' {0} Load({1})\n'.format(manager.IndexToNode(index),
route_load)
plan_output += 'Distance of the route: {}m\n'.format(route_distance)
plan_output += 'Load of the route: {}\n'.format(route_load)
print(plan_output)
total_distance += route_distance
total_load += route_load
print('Total distance of all routes: {}m'.format(total_distance))
print('Total load of all routes: {}'.format(total_load))
def main():
"""Entry point of the program."""
# Instantiate the data problem.
# [START data]
data = create_data_model()
# [END data]
# Create the routing index manager.
# [START index_manager]
manager = pywrapcp.RoutingIndexManager(len(data['distance_matrix']),
data['num_vehicles'], data['depot'])
# [END index_manager]
# Create Routing Model.
# [START routing_model]
routing = pywrapcp.RoutingModel(manager)
# [END routing_model]
# Define cost of each arc.
# [START arc_cost]
def distance_callback(from_index, to_index):
"""Returns the manhattan distance between the two nodes."""
# Convert from routing variable Index to distance matrix NodeIndex.
from_node = manager.IndexToNode(from_index)
to_node = manager.IndexToNode(to_index)
return data['distance_matrix'][from_node][to_node]
transit_callback_index = routing.RegisterTransitCallback(distance_callback)
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
# [END arc_cost]
# Add Distance constraint.
# [START distance_constraint]
dimension_name = 'Distance'
routing.AddDimension(
transit_callback_index,
0, # no slack
3000, # vehicle maximum travel distance
True, # start cumul to zero
dimension_name)
distance_dimension = routing.GetDimensionOrDie(dimension_name)
distance_dimension.SetGlobalSpanCostCoefficient(100)
# [END distance_constraint]
# Define Transportation Requests.
# [START pickup_delivery_constraint]
for request in data['pickups_deliveries']:
pickup_index = manager.NodeToIndex(request[0])
delivery_index = manager.NodeToIndex(request[1])
routing.AddPickupAndDelivery(pickup_index, delivery_index)
routing.solver().Add(
routing.VehicleVar(pickup_index) == routing.VehicleVar(
delivery_index))
routing.solver().Add(
distance_dimension.CumulVar(pickup_index) <=
distance_dimension.CumulVar(delivery_index))
routing.SetPickupAndDeliveryPolicyOfAllVehicles(
pywrapcp.RoutingModel.PICKUP_AND_DELIVERY_FIFO)
# [END pickup_delivery_constraint]
# Setting first solution heuristic.
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
search_parameters.first_solution_strategy = (
routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)
search_parameters.local_search_metaheuristic = (
routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH)
search_parameters.time_limit.FromSeconds(1)
# Solve the problem.
solution = routing.SolveWithParameters(search_parameters)
# Print solution on console.
if solution:
print_solution(data, manager, routing, solution)
if __name__ == '__main__':
main()
Route for vehicle 0:
0 Load(1) -> 4 Load(2) -> 3 Load(3) -> 5 Load(4) -> 9 Load(5) -> 0 Load(5)
Distance of the route: 1780m
Load of the route: 5
Route for vehicle 1:
0 Load(1) -> 2 Load(2) -> 10 Load(3) -> 0 Load(3)
Distance of the route: 1712m
Load of the route: 3
Route for vehicle 2:
0 Load(1) -> 0 Load(1)
Distance of the route: 0m
Load of the route: 1
Route for vehicle 3:
0 Load(1) -> 0 Load(1)
Distance of the route: 0m
Load of the route: 1
Route for vehicle 4:
0 Load(1) -> 0 Load(1)
Distance of the route: 0m
Load of the route: 1
Route for vehicle 5:
0 Load(1) -> 0 Load(1)
Distance of the route: 0m
Load of the route: 1
Route for vehicle 6:
0 Load(1) -> 1 Load(2) -> 6 Load(3) -> 0 Load(3)
Distance of the route: 1780m
Load of the route: 3
Route for vehicle 7:
0 Load(1) -> 7 Load(2) -> 8 Load(3) -> 16 Load(4) -> 14 Load(5) -> 0 Load(5)
Distance of the route: 1712m
Load of the route: 5
Route for vehicle 8:
0 Load(1) -> 13 Load(2) -> 12 Load(3) -> 15 Load(4) -> 11 Load(5) -> 0 Load(5)
Distance of the route: 1712m
Load of the route: 5
This code works fine for simple graph assignment where each pickup node is just a pickup node and each delivery node is just a delivery node. But if a want a node to be pickup and delivery, I thought i can add this as another graph, for example, making node 14, former delivery node, also a pickup node for the arc[14,13]. I thought i could force one vehicle to go 16->14->13->12 by adding this to the data['pickups_deliveries'] but python collapse and stops working.
data['pickups_deliveries'] = [
[1, 6],
[2, 10],
[4, 3],
[5, 9],
[7, 8],
[15, 11],
[13, 12],
[16, 14],
[14,13] ## Added
]
Mainly what I want to do is be able to add graphs where in one a node can be a pickup node and in another the same node can be a delivery one.
Thanks and sorry for the extension.
You must duplicate the node and adapt your transit callback accordingly.
Then you could merge node id when post processing the solution assignment.
Another way is to hack the transit callback to do the mapping there so you have to recompute a new transit matrix.
e.g.
create a duplicate node 17 and 18 for node 13 and 14.
so you can add the new P&D pair [18, 17]
in your transit callback:
def distance_callback(from_index, to_index):
"""Returns the manhattan distance between the two nodes."""
# Convert from routing variable Index to distance matrix NodeIndex.
from_node = manager.IndexToNode(from_index)
# rebind 17 or 18 to 13 or 14 respectively
if from_node in [17, 18]:
from_node = from_node - 4
to_node = manager.IndexToNode(to_index)
# rebind 17 or 18 to 13 or 14 respectively
if to_node in [17, 18]:
to_node = to_node - 4
return data['distance_matrix'][from_node][to_node]
and also change
# [START index_manager]
manager = pywrapcp.RoutingIndexManager(len(data['distance_matrix']) + 2,
data['num_vehicles'], data['depot'])
# [END index_manager]
Related
In chapter 2 of "Python Data Science Handbook" by Jake VanderPlas, he computes the sum of squared differences of several 2-d points using the following code:
rand = np.random.RandomState(42)
X = rand.rand(10,2)
dist_sq = np.sum(X[:,np.newaxis,:] - X[np.newaxis,:,:]) ** 2, axis=-1)
Two questions:
Why is a third axis created? What is the best way to visualize what is going on?
Is there a more intuitive way to perform this calculation?
Why is a third axis created? What is the best way to visualize what is going on?
The adding new dimensions before adding/subtracting trick is a relatively common one to generate all pairs, by using broadcasting (None is the same as np.newaxis here):
>>> a = np.arange(10)
>>> a[:,None]
array([[0],
[1],
[2],
[3],
[4],
[5],
[6],
[7],
[8],
[9]])
>>> a[None,:]
array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
>>> a[:,None] + 100*a[None,:]
array([[ 0, 100, 200, 300, 400, 500, 600, 700, 800, 900],
[ 1, 101, 201, 301, 401, 501, 601, 701, 801, 901],
[ 2, 102, 202, 302, 402, 502, 602, 702, 802, 902],
[ 3, 103, 203, 303, 403, 503, 603, 703, 803, 903],
[ 4, 104, 204, 304, 404, 504, 604, 704, 804, 904],
[ 5, 105, 205, 305, 405, 505, 605, 705, 805, 905],
[ 6, 106, 206, 306, 406, 506, 606, 706, 806, 906],
[ 7, 107, 207, 307, 407, 507, 607, 707, 807, 907],
[ 8, 108, 208, 308, 408, 508, 608, 708, 808, 908],
[ 9, 109, 209, 309, 409, 509, 609, 709, 809, 909]])
Your example does the same, just with 2-vectors instead of scalars at the innermost level:
>>> X[:,np.newaxis,:].shape
(10, 1, 2)
>>> X[np.newaxis,:,:].shape
(1, 10, 2)
>>> (X[:,np.newaxis,:] - X[np.newaxis,:,:]).shape
(10, 10, 2)
Thus we find that the 'magical subtraction' is just all combinations of the coordinate X subtracted from each other.
Is there a more intuitive way to perform this calculation?
Yes, use scipy.spatial.distance.pdist for pairwise distances. To get an equivalent result to your example:
from scipy.spatial.distance import pdist, squareform
dist_sq = squareform(pdist(X))**2
I've such a square matrix:
[[0, 516, 226, 853, 1008, 1729, 346, 1353, 1554, 827, 226, 853, 1729, 1008],
[548, 0, 474, 1292, 1442, 2170, 373, 1801, 1989, 1068, 474, 1292, 2170, 1442],
[428, 466, 0, 1103, 1175, 1998, 226, 1561, 1715, 947, 0, 1103, 1998, 1175],
[663, 1119, 753, 0, 350, 1063, 901, 681, 814, 1111, 753, 0, 1063, 350],
[906, 1395, 1003, 292, 0, 822, 1058, 479, 600, 1518, 1003, 292, 822, 0],
[1488, 1994, 1591, 905, 776, 0, 1746, 603, 405, 1676, 1591, 905, 0, 776],
[521, 357, 226, 1095, 1167, 1987, 0, 1552, 1705, 1051, 226, 1095, 1987, 1167],
[1092, 1590, 1191, 609, 485, 627, 1353, 0, 422, 1583, 1191, 609, 627, 485],
[1334, 1843, 1436, 734, 609, 396, 1562, 421, 0, 1745, 1436, 734, 396, 609],
[858, 1186, 864, 1042, 1229, 1879, 984, 1525, 1759, 0, 864, 1042, 1879, 1229],
[428, 466, 0, 1103, 1175, 1998, 226, 1561, 1715, 947, 0, 1103, 1998, 1175],
[663, 1119, 753, 0, 350, 1063, 901, 681, 814, 1111, 753, 0, 1063, 350],
[1488, 1994, 1591, 905, 776, 0, 1746, 603, 405, 1676, 1591, 905, 0, 776],
[906, 1395, 1003, 292, 0, 822, 1058, 479, 600, 1518, 1003, 292, 822, 0]]
And I need to remove say a1 a2 and a3 indexed columns and rows at the sametime. How can I do this? What is the neat way?
Note that, I need to get another square matrix. Both rows and columns at the same index should be removed. Also note that, when you remove a row/column, indexes get shifted. Either I need to shift e.g. a1, a2, a3 too or do something more clever.
An example case
The square matrix:
[[10,11,12,13],
[14,15,16,17],
[18,19,20,21],
[22,23,24,25]]
remove 1st and 3rd indexes and the result is:
[[10,12],
[18,20]]
If you are open to other packages, pandascan make it easy:
import pandas as pd
to_drop = [a1,a2,a3]
out = pd.DataFrame(a).drop(to_drop).drop(to_drop, axis=1).to_numpy()
Update: output of the code on sample data
array([[10, 12],
[18, 20]])
If you want numpy only and assuming the array is always squared:
a = np.array([[10,11,12,13],
[14,15,16,17],
[18,19,20,21],
[22,23,24,25]])
valid = [r for r in range(a.shape[0]) if r not in [1,3]]
a[valid][:,valid]
>>>array([[10, 12],
[18, 20]])
Try this method in numpy. np.ix_ creates a meshgrid for you to index the numpy array columns and rows. The list of indexes can simply be created by taking the set.difference between the range of rows in square matrix and the list of indexes of row/columns you want to remove -
sqm = np.array([[10,11,12,13],
[14,15,16,17],
[18,19,20,21],
[22,23,24,25]])
rem = [1,3] #Rows/columns to remove
idx = list(set(range(sqm.shape[0])).difference(rem))
print('Rows/columns to keep:',idx)
output = sqm[np.ix_(idx,idx)]
print(output)
Rows/columns to keep: [0, 2]
array([[10, 12],
[18, 20]])
EDIT: Benchmarking results are added below for square matrix 10000X10000 and ~500 row/columns to remove. (macbook pro 13)
sqm = np.random.random((10000,10000))
rem = np.unique(np.random.randint(0,10000,size=500))
Quang Hoang's Approach - 841 ms ± 8.19 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
MBeale's Approach - 1.62 s ± 48.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Akshay Sehgal's Approach - 655 ms ± 19.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
I have a data frame that looks like this:
Season Dist
0 '14 - '15 [120, 128, 175, 474, 615]
1 '15 - '16 [51, 305, 398, 839, 991, 1093, 1304]
2 '16 - '17 [223, 293, 404, 588, 661, 706, 964, 1049, 1206]
3 '17 - '18 [12, 37, 204, 229, 276, 349, 809, 845, 1072, 1...
4 '18 - '19 [210, 214, 259, 383, 652, 798, 1150]
5 '19 - '20 [182, 206, 221, 282, 283, 297, 1330, 1332]
I'm trying to plot it with matplotlib where the x axis is the range of instances and for each season on the y axis, the plot shows the distribution of the df['Dist']. I've sketched a very crappy graph below to illustrate my point.
Does anyone know how I could do this?
Plot each list individually on the same graph. The list values will work as x-coordinates, so for y-coordinates map each season values to ints. i.e something like this
Season Dist
0 0 [120, 128, 175, 474, 615]
1 1 [51, 305, 398, 839, 991, 1093, 1304]
2 ' 2 [223, 293, 404, 588, 661, 706, 964, 1049, 1206]
Now scatterplot will require y-coordinates for every x-coordinate.
So create something like this
y x
[0,0,0,0,0] [120, 128, 175, 474, 615]
[1,1,1,1,1,1,1] [51, 305, 398, 839, 991, 1093, 1304]
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
df = pd.DataFrame({'Season':['14 - 15','15 - 16','16 - 17'],'Dist':\
[[120, 128, 175, 474, 615],\
[51, 305, 398, 839, 991, 1093, 1304],\
[223, 293, 404, 588, 661, 706, 964, 1049, 1206]]})
y = np.arange(len(df)) #map the seasons
for i in range(len(df)):
plt.scatter(df['Dist'][i],[y[i] for j in range(len(df['Dist'][i]))]) #create a list of y coordinates for every x coordinate
plt.yticks(y,df['Season']) #show the actual seasons as xticks
plt.show()
I have a json data which looks like this:
"rows": [
["2019-08-02", 364, 209, 2, 2],
["2019-08-03", 386, 250, 2, 5],
["2019-08-04", 382, 221, 3, 1],
["2019-08-05", 361, 218, 1, 0],
["2019-08-06", 338, 205, 4, 0],
["2019-08-07", 353, 208, 2, 2],
["2019-08-08", 405, 223, 2, 2],
["2019-08-09", 405, 266, 2, 2],
["2019-08-10", 494, 288, 0, 1],
]
I wanted to be headers of data as(not included in JSON file) as
["day", "estimatedPeopleVisited", "bought", "gives_pfeedback", "gives_nfeedback"]
I tried following code for reading file:
f = pd.read_json("data1308.json")
print(f)
and this gives output like:
rows
0 [2019-08-02, 364, 209, 2, 2]
1 [2019-08-03, 386, 250, 2, 5]
2 [2019-08-04, 382, 221, 3, 1]
3 [2019-08-05, 361, 218, 1, 0]
4 [2019-08-06, 338, 205, 4, 0]
5 [2019-08-07, 353, 208, 2, 2]
6 [2019-08-08, 405, 223, 2, 2]
7 [2019-08-09, 405, 266, 2, 2]
8 [2019-08-10, 494, 288, 0, 1]
I expect the output in form of:
day est bought gives_pfeedback gives_nfeedback
0 2019-08-02 364 209 2 2
1 2019-08-03 386 250 2 5
2 2019-08-04 382 221 3 1
3 2019-08-05 361 218 1 0
4 2019-08-06 338 205 4 0
. . . . . .
. . . . . .
. . . . . .
I can transform data in specified form after reading as problemset format but, is there any way to read directly JSON data in specified format?
What about this?
import pandas as pd
data = {"rows": [
["2019-08-02", 364, 209, 2, 2],
["2019-08-03", 386, 250, 2, 5],
["2019-08-04", 382, 221, 3, 1],
["2019-08-05", 361, 218, 1, 0],
["2019-08-06", 338, 205, 4, 0],
["2019-08-07", 353, 208, 2, 2],
["2019-08-08", 405, 223, 2, 2],
["2019-08-09", 405, 266, 2, 2],
["2019-08-10", 494, 288, 0, 1],
]}
cols = ["day", "estimatedPeopleVisited", "bought", "gives_pfeedback", "gives_nfeedback"]
df = pd.DataFrame.from_dict(data["rows"])
df.columns = cols
import pandas as pd
import numpy as np
f = pd.read_csv('151101.mnd',skiprows=33, sep ='\s+',chunksize=30)
data = pd.concat(f)
data = data.convert_objects(convert_numeric=True)
print data.head()
print ''
height = data['#']
wspd = data['z']
hub = np.where(height==80)
print np.where(height==80)
Beginning Part of the File:
# z speed dir W sigW bck error
0 30 5.05 333.0 0.23 0.13 144000 0 NaN
1 40 5.05 337.1 -0.02 0.14 7690 0 NaN
2 50 5.03 338.5 0.00 0.15 4830 0 NaN
3 60 6.21 344.3 -0.09 0.18 6130 0 NaN
4 70 5.30 336.5 0.01 0.21 158000 0 NaN
Output (indices Where height column = 80):
(array([ 5, 37, 69, 101, 133, 165, 197, 229, 261, 293, 325,
357, 389, 421, 453, 485, 517, 549, 581, 613, 645, 677,
709, 741, 773, 805, 837, 869, 901, 933, 965, 997, 1029,
1061, 1093, 1125, 1157, 1189, 1221, 1253, 1285, 1317, 1349, 1381,
1413, 1445, 1477, 1509, 1541, 1573, 1605, 1637, 1669, 1701, 1733,
1765, 1797, 1829, 1861, 1893, 1925, 1957, 1989, 2021, 2053, 2085,
2117, 2149, 2181, 2213, 2245, 2277, 2309, 2341, 2373, 2405, 2437,
2469, 2501, 2533, 2565, 2597, 2629, 2661, 2693, 2725, 2757, 2789,
2821, 2853, 2885, 2917, 2949, 2981, 3013, 3045, 3077, 3109, 3141,
3173, 3205, 3237, 3269, 3301, 3333, 3365, 3397, 3429, 3461, 3493,
3525, 3557, 3589, 3621, 3653, 3685, 3717, 3749, 3781, 3813, 3845,
3877, 3909, 3941, 3973, 4005, 4037, 4069, 4101, 4133, 4165, 4197,
4229, 4261, 4293, 4325, 4357, 4389, 4421, 4453, 4485, 4517, 4549,
4581], dtype=int64),)
So I want to find the wspd, data.['z'], where the height, data.['#']=80 and store that as a variable. How do I do this? I tried to do a np.where(height=80) and store that as a variable 'hub' but when I take wspd at the indices of hub, wspd[hub] I get an error. ValueError: Can only tuple-index with a MultiIndex. Is there an easier way to do this?
Example usage :
import pandas as pd
import numpy as np
df1 = pd.DataFrame({'A': [2,3,2,5],
'B': ['B0', 'B1', 'B2', 'B3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']},
index=[0, 1, 2, 3])
print df1
c = df1[df1.A == 2].index # get all the indices where value is 2 in column 'A'
d= df1.iloc[c,] #Subset dataframe with only these row indices
d_values = df1.iloc[c,1].values #to return an array of values in column 'B'/2nd column.
Output:
array(['B0', 'B2'], dtype=object)
In your case:
hub = data[data['#'] == 80].index
new_data = data.iloc[hub,]
To get the wspd values only, use this instead:
new_data = data.iloc[hub,1].values #assuming that it is the 2nd column always, this will return an array.