diff --git a/notebooks/solutions.ipynb b/notebooks/solutions.ipynb
index 779d2d7..0a79c0c 100644
--- a/notebooks/solutions.ipynb
+++ b/notebooks/solutions.ipynb
@@ -296,17 +296,85 @@
"id": "19641daf",
"metadata": {},
"source": [
- "## TSP Exercise: Measure search overhead"
+ "### Exercise: Measure search overhead\n",
+ "Modify the code of the serial and parallel algorithms so that the functions return the number of nodes in the search tree that they visit. You can then compare how many more nodes are visited by the parallel algorithm compared with the serial algorithm (known as _search overhead_). You can then use the third cell to gather some statistics about the search overhead using your altered version of the functions. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "22ccce39",
+ "metadata": {},
+ "source": [
+ "## TSP: Exercise x (Measure search overhead)\n",
+ "This is the solution of how the code can be altered to measure the search overhead:"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "f00557a0",
+ "execution_count": 13,
+ "id": "a4d5ab70",
"metadata": {},
"outputs": [],
+ "source": [
+ "using Distributed\n",
+ "\n",
+ "if procs() == workers()\n",
+ " addprocs(4)\n",
+ "end\n",
+ "\n",
+ "@everywhere function visited(city,hops,path)\n",
+ " for i = 1:hops\n",
+ " if path[i] == city\n",
+ " return true\n",
+ " end\n",
+ " end\n",
+ " return false\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "bcee99f0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tsp_serial (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"## TSP serial \n",
+ "function tsp_serial_impl(connections,hops,path,current_distance, min_path, min_distance, node_count)\n",
+ " num_cities = length(connections)\n",
+ " if hops == num_cities\n",
+ " if current_distance < min_distance\n",
+ " min_path .= path\n",
+ " return min_path, current_distance, node_count\n",
+ " end\n",
+ " else\n",
+ " current_city = path[hops]\n",
+ " next_hops = hops + 1\n",
+ " for (next_city,distance_increment) in connections[current_city]\n",
+ " if !visited(next_city,hops,path)\n",
+ " node_count += 1\n",
+ " path[next_hops] = next_city\n",
+ " next_distance = current_distance + distance_increment\n",
+ " if next_distance < min_distance\n",
+ " min_path, min_distance, node_count = tsp_serial_impl(connections,next_hops,path,next_distance,min_path,min_distance, node_count)\n",
+ " end\n",
+ " end\n",
+ " end \n",
+ " end\n",
+ " return min_path, min_distance, node_count\n",
+ "end\n",
+ "\n",
"function tsp_serial(connections,city)\n",
" num_cities = length(connections)\n",
" path=zeros(Int,num_cities)\n",
@@ -315,29 +383,40 @@
" min_path = zeros(Int, num_cities)\n",
" current_distance = 0\n",
" min_distance = typemax(Int)\n",
- " # Collect search time \n",
- " search_time = @elapsed min_path, min_distance = tsp_serial_impl(connections,hops,path,current_distance, min_path, min_distance)\n",
- " (;path=min_path,distance=min_distance, search_time)\n",
+ " node_count = 1\n",
+ " # Count the number of nodes visited in recursive function and return\n",
+ " min_path, min_distance, node_count = tsp_serial_impl(connections,hops,path,current_distance, min_path, min_distance, node_count)\n",
+ " (;path=min_path,distance=min_distance, node_count)\n",
"end"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "30784da2",
+ "execution_count": 15,
+ "id": "327f5349",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tsp_dist (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"## TSP distributed\n",
- "@everywhere function tsp_dist_impl(wait_time, connections,hops,path,current_distance,min_dist_chnl, max_hops,jobs_chnl,ftr_result)\n",
+ "@everywhere function tsp_dist_impl(connections,hops,path,current_distance,min_dist_chnl, max_hops,jobs_chnl,ftr_result,node_count)\n",
" num_cities = length(connections)\n",
" if hops == num_cities\n",
" min_distance = fetch(min_dist_chnl)\n",
" if current_distance < min_distance\n",
" take!(min_dist_chnl)\n",
- " # Collect wait time to substract from overall search time \n",
" if ftr_result !== nothing\n",
- " wait_time += @elapsed @spawnat 1 begin\n",
+ " @spawnat 1 begin\n",
" result = fetch(ftr_result)\n",
" result.path .= path\n",
" result.min_distance_ref[] = current_distance\n",
@@ -351,23 +430,21 @@
" for (next_city,distance_increment) in connections[current_city]\n",
" if !visited(next_city,hops,path)\n",
" path[next_hops] = next_city\n",
+ " node_count += 1\n",
" next_distance = current_distance + distance_increment\n",
- " # Collect wait time because fetch may block\n",
- " wait_time += @elapsed min_distance = fetch(min_dist_chnl)\n",
+ " min_distance = fetch(min_dist_chnl)\n",
" if next_distance < min_distance\n",
- " tsp_dist_impl(wait_time, connections,next_hops,path,next_distance,min_dist_chnl,max_hops,jobs_chnl,ftr_result)\n",
+ " node_count = tsp_dist_impl(connections,next_hops,path,next_distance,min_dist_chnl,max_hops,jobs_chnl,ftr_result,node_count)\n",
" end\n",
" end\n",
" end \n",
" else\n",
- " # Collect communication time and add to wait time\n",
- " wait_time += @elapsed if jobs_chnl !== nothing \n",
+ " if jobs_chnl !== nothing \n",
" path_copy = copy(path) \n",
" put!(jobs_chnl,(;hops,path=path_copy,current_distance))\n",
" end\n",
" end\n",
- " # Return wait time\n",
- " wait_time\n",
+ " return node_count\n",
"end\n",
"\n",
"function tsp_dist(connections,city)\n",
@@ -375,19 +452,23 @@
" num_cities = length(connections)\n",
" path=zeros(Int,num_cities)\n",
" result_path=zeros(Int, num_cities)\n",
- " wait_time = 0\n",
- " search_time = 0\n",
" hops = 1\n",
" path[hops] = city\n",
" current_distance = 0\n",
" min_distance = typemax(Int)\n",
+ " node_count = 1\n",
" jobs_chnl = RemoteChannel(()->Channel{Any}(10))\n",
" min_dist_chnl = RemoteChannel(()->Channel{Int}(1))\n",
" put!(min_dist_chnl, min_distance)\n",
" ftr_result = @spawnat 1 (;path=result_path,min_distance_ref=Ref(min_distance))\n",
+ " # Add another future to store number of visited nodes\n",
+ " ftr_node_count = @spawnat 1 node_count_ref = Ref(node_count)\n",
" @async begin\n",
- " # Collect search time from master process\n",
- " search_time += @elapsed wait_time += tsp_dist_impl(wait_time,connections,hops,path,current_distance,min_dist_chnl,max_hops,jobs_chnl,nothing)\n",
+ " ncount = 0\n",
+ " ncount += tsp_dist_impl(connections,hops,path,current_distance,min_dist_chnl,max_hops,jobs_chnl,nothing, ncount)\n",
+ " # Update node counter\n",
+ " node_count_ref = fetch(ftr_node_count)\n",
+ " node_count_ref[] += ncount \n",
" for w in workers()\n",
" put!(jobs_chnl,nothing)\n",
" end\n",
@@ -404,79 +485,229 @@
" hops = job.hops\n",
" path = job.path \n",
" current_distance = job.current_distance\n",
+ " ncount = 0\n",
" min_distance = fetch(min_dist_chnl)\n",
" if current_distance < min_distance\n",
- " # Collect search time from worker processes \n",
- " search_time += @elapsed wait_time += tsp_dist_impl(wait_time,connections,hops,path,current_distance,min_dist_chnl,max_hops,nothing,ftr_result)\n",
+ " ncount += tsp_dist_impl(connections,hops,path,current_distance,min_dist_chnl,max_hops,nothing,ftr_result, ncount)\n",
+ " # Update node counter\n",
+ " @spawnat 1 begin \n",
+ " node_count_ref = fetch(ftr_node_count)\n",
+ " node_count_ref[] += ncount \n",
+ " end\n",
" end\n",
" end\n",
" end\n",
" end \n",
" result = fetch(ftr_result)\n",
- " (;path = result.path, distance = result.min_distance_ref[], search_time, wait_time)\n",
- "end\n"
+ " # Fetch number of visited nodes for return\n",
+ " node_count_ref = fetch(ftr_node_count)\n",
+ " (;path = result.path, distance = result.min_distance_ref[], node_count=node_count_ref[])\n",
+ "end"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "694de934",
+ "execution_count": 16,
+ "id": "706242f2",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "n = 4\n",
+ "n = 6\n",
+ "n = 8\n",
+ "n = 10\n",
+ "search_overhead_perc = [0.75 0.16666666666666666 0.3333333333333333 0.2727272727272727 0.14285714285714285 0.07692307692307693 0.0 0.16666666666666666 0.4 0.16666666666666666; 0.01092896174863388 0.005154639175257732 0.031578947368421054 0.05384615384615385 0.4672897196261682 0.10434782608695652 0.09917355371900827 0.06666666666666667 -0.0056179775280898875 0.2736842105263158; 0.0 -0.009295120061967466 -0.0032278889606197547 0.01906318082788671 0.028241335044929396 0.0011111111111111111 -0.008201892744479496 0.004958047292143402 -0.005873715124816446 -0.009497336113041464; 3.87551835057939e-5 -0.014818818265230451 7.133685261806249e-5 -0.03200184183262346 -0.01696773663002659 -0.004075168167420009 6.629541235746487e-5 -0.0033114074608037486 -0.016359150396910535 -0.0025293711126468557]\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "\n",
+ "\n"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"using Distributed\n",
- "using RandomMatrix\n",
"using Plots\n",
+ "using Statistics\n",
"\n",
- "function generate_rand_connections(city_range, distance_range)\n",
- " # generate random connections matrix \n",
- " n_cities = rand(city_range)\n",
- " matrix = randTriangular(distance_range, n_cities; Diag=false)\n",
- "\n",
- " connections = Array{Array{Tuple{Int64,Int64},1},1}(undef, n_cities)\n",
- " for i in 1:n_cities\n",
- " connections[i] = Array{Tuple{Int64,Int64},1}(undef, n_cities)\n",
+ "# Generate random matrices\n",
+ "function rand_symmetric_distance_table(n)\n",
+ " threshold = 0.2\n",
+ " mincost = 3\n",
+ " maxcost = 10\n",
+ " infinity = 10000*maxcost\n",
+ " C = fill(infinity,n,n)\n",
+ " for j in 1:n\n",
+ " for i in 1:j\n",
+ " if rand() > threshold\n",
+ " C[i,j] = rand(mincost:maxcost)\n",
+ " C[j,i] = C[i,j]\n",
+ " end\n",
+ " end\n",
+ " C[j,j] = 0\n",
" end\n",
- " for i in 1:n_cities\n",
- " for j in i:n_cities\n",
- " distance = matrix[i,j]\n",
- " connections[i][j] = (j,distance)\n",
- " connections[j][i] = (i,distance)\n",
+ " C,infinity\n",
+ "end\n",
+ "\n",
+ "# Sort to get ascending distance weights\n",
+ "function sort_neighbors(C)\n",
+ " n = size(C,1)\n",
+ " map(1:n) do i\n",
+ " Ci = C[i,:]\n",
+ " cities = sortperm(Ci)\n",
+ " distances = Ci[cities]\n",
+ " collect(zip(cities,distances))[2:end]\n",
+ " end\n",
+ "end\n",
+ "\n",
+ "n_cities = [4, 6, 8, 10]\n",
+ "n_rep = 10\n",
+ "city = 1\n",
+ "node_count_serial = zeros(Union{Missing,Int64},length(n_cities), n_rep)\n",
+ "node_count_dist = zeros(Union{Missing,Int64},length(n_cities), n_rep)\n",
+ "for (i, n) in enumerate(n_cities)\n",
+ " @show n\n",
+ " for j in 1:n_rep\n",
+ " # Generate random connections matrix\n",
+ " C, inf = rand_symmetric_distance_table(n)\n",
+ " C = sort_neighbors(C)\n",
+ " # Run serial algorithm\n",
+ " path, distance, ncount_serial = tsp_serial(C, city)\n",
+ " # Check if graph is connected \n",
+ " if distance >= inf\n",
+ " println(\"The input graph size $n, it $j is not connected\")\n",
+ " node_count_serial[i,j] = missing\n",
+ " node_count_dist[i,j] = missing\n",
+ " else\n",
+ " path, distance, ncount_dist = tsp_dist(C, city)\n",
+ " node_count_serial[i,j] = ncount_serial\n",
+ " node_count_dist[i,j] = ncount_dist\n",
" end\n",
" end\n",
- " return connections\n",
"end\n",
"\n",
- "# Run once so compile times are not measured\n",
- "distance_range = 1:100\n",
- "connections = generate_rand_connections(4:4, distance_range)\n",
- "tsp_dist(connections,1)\n",
- "tsp_serial(connections,1)\n",
+ "# Calculate average and confidence interval\n",
+ "search_overhead_perc = (node_count_dist .- node_count_serial)./node_count_serial\n",
+ "avg_search_overhead = [mean(skipmissing(search_overhead_perc[i,:])) for i in axes(search_overhead_perc,1)]\n",
+ "conf_int = [1.96*std(skipmissing(search_overhead_perc[i,:]))/\n",
+ " sqrt(count(!ismissing,search_overhead_perc[i,:])) \n",
+ " for i in axes(search_overhead_perc,1)]\n",
"\n",
- "# Measure runtimes of serial and parallel algorithm\n",
- "n_it = 5\n",
- "city_ranges = [4:4, 6:6, 8:8, 10:10]\n",
- "search_overhead = zeros(Float64, length(city_ranges), n_it )\n",
- "for (i, n) in enumerate(city_ranges)\n",
- " for k in 1:n_it\n",
- " connections = generate_rand_connections(n, distance_range)\n",
- " @show n, k\n",
- " path_dist, distance_dist, search_time_dist, wait_time_dist = tsp_dist(connections,1)\n",
- " path_serial, distance_serial, search_time_serial = tsp_serial(connections,1)\n",
- " # Compute search overhead as difference between distributed program and serial program\n",
- " # (without time spent communicating or waiting)\n",
- " search_overhead[i, k] = search_time_dist - wait_time_dist - search_time_serial\n",
- " end\n",
- "end\n",
- "\n",
- "min_search_oh = minimum(search_overhead, dims=2)\n",
- "city_sizes = [4,6,8,10]\n",
- "plot(city_sizes, min_search_oh, yaxis=:log, seriestype=:scatter,legend=false)\n",
- "plot!(city_sizes, min_search_oh, yaxis=:log, legend=false)\n",
- "\n",
- "xlabel!(\"Number of cities\")\n",
- "ylabel!(\"Search overhead (s)\")\n",
- "title!(\"Minimum search overhead for different problem sizes\")"
+ "# Plot\n",
+ "plot(n_cities, avg_search_overhead, ribbon=conf_int, markershape=:circle, legend=false)\n",
+ "title!(\"Average search overhead (%) and 95%-CI \\nin parallel algorithm\")\n",
+ "ylabel!(\"Average % extra nodes visited \\n in parallel algorithm\")\n",
+ "xlabel!(\"Number of cities\")"
]
},
{