Merge pull request #25 from fverdugo/francesc

Changes after session on Tuesday 19th 2023
This commit is contained in:
Francesc Verdugo 2023-09-20 09:10:56 +02:00 committed by GitHub
commit 50fb289e5b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 119 additions and 14 deletions

View File

@ -160,4 +160,57 @@ end
msg = 2 msg = 2
@fetchfrom 2 work(msg) @fetchfrom 2 work(msg)
``` ```
## Matrix-matrix multiplication
### Exercise 1
```julia
function matmul_dist_3!(C,A,B)
m = size(C,1)
n = size(C,2)
l = size(A,2)
@assert size(A,1) == m
@assert size(B,2) == n
@assert size(B,1) == l
@assert mod(m,nworkers()) == 0
nrows_w = div(m,nworkers())
@sync for (iw,w) in enumerate(workers())
lb = 1 + (iw-1)*nrows_w
ub = iw*nrows_w
A_w = A[lb:ub,:]
ftr = @spawnat w begin
C_w = similar(A_w)
matmul_seq!(C_w,A_w,B)
C_w
end
@async C[lb:ub,:] = fetch(ftr)
end
C
end
@everywhere function matmul_seq!(C,A,B)
m = size(C,1)
n = size(C,2)
l = size(A,2)
@assert size(A,1) == m
@assert size(B,2) == n
@assert size(B,1) == l
z = zero(eltype(C))
for j in 1:n
for i in 1:m
Cij = z
for k in 1:l
@inbounds Cij = Cij + A[i,k]*B[k,j]
end
C[i,j] = Cij
end
end
C
end
```
### Exercise 2
At each call to @spawnat we will communicate O(N) and compute O(N) in a worker process just like in algorithm 1. However, we will do this work N^2/P times on average at each worker. Thus, the total communication and computation on a worker will be O(N^3/P) for both communication and computation. Thus, the communication over computation ratio will still be O(1) and thus the communication will dominate in practice, making the algorithm inefficient.

View File

@ -72,9 +72,10 @@
" \"It's not correct. Keep trying! 💪\"\n", " \"It's not correct. Keep trying! 💪\"\n",
" end |> println\n", " end |> println\n",
"end\n", "end\n",
"alg_0_comp_check(answer) = answer_checker(answer, \"d\")\n",
"alg_1_deps_check(answer) = answer_checker(answer,\"b\")\n", "alg_1_deps_check(answer) = answer_checker(answer,\"b\")\n",
"alg_1_comm_overhead_check(answer) = answer_checker(answer, \"c\")\n", "alg_1_comm_overhead_check(answer) = answer_checker(answer, \"b\")\n",
"alg_1_comp_check(answer) = answer_checker(answer, \"a\")\n", "alg_1_comp_check(answer) = answer_checker(answer, \"b\")\n",
"alg_2_complex_check(answer) = answer_checker(answer, \"b\")\n", "alg_2_complex_check(answer) = answer_checker(answer, \"b\")\n",
"alg_2_deps_check(answer) = answer_checker(answer,\"d\")\n", "alg_2_deps_check(answer) = answer_checker(answer,\"d\")\n",
"alg_3_deps_check(answer) = answer_checker(answer, \"c\")\n", "alg_3_deps_check(answer) = answer_checker(answer, \"c\")\n",
@ -88,7 +89,7 @@
"source": [ "source": [
"## Problem Statement\n", "## Problem Statement\n",
"\n", "\n",
"Let us consider the (dense) matrix-matrix product `C=A*B`." "Given $A$ and $B$ two $N$-by-$N$ matrices, compute the matrix-matrix product $C=AB$. Compute it in parallel and efficiently."
] ]
}, },
{ {
@ -157,7 +158,7 @@
"source": [ "source": [
"## Serial implementation\n", "## Serial implementation\n",
"\n", "\n",
"We start by considering the (naive) sequential algorithm:" "We start by considering the (naive) sequential algorithm, which is based on the math definition of the matrix-matrix product $C_{ij} = \\sum_k A_{ik} B_{kj}$"
] ]
}, },
{ {
@ -188,6 +189,30 @@
"end" "end"
] ]
}, },
{
"cell_type": "markdown",
"id": "e3b86457",
"metadata": {},
"source": [
"Run next cell to test the implementation."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c5caf799",
"metadata": {},
"outputs": [],
"source": [
"using Test\n",
"N = 10\n",
"A = rand(N,N)\n",
"B = rand(N,N)\n",
"C = similar(A)\n",
"matmul_seq!(C,A,B)\n",
"@test C ≈ A*B"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "f967d2ea", "id": "f967d2ea",
@ -216,6 +241,32 @@
"@btime mul!(C,A,B);" "@btime mul!(C,A,B);"
] ]
}, },
{
"cell_type": "markdown",
"id": "0ca2fbd4",
"metadata": {},
"source": [
"<div class=\"alert alert-block alert-success\">\n",
"<b>Question:</b> Which is the complexity (number of operations) of the serial algorithm? Assume that all matrices are $N$-by-$N$ matrices. \n",
"</div>\n",
"\n",
" a) O(1)\n",
" b) O(N)\n",
" c) O(N²)\n",
" d) O(N³)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "078e974e",
"metadata": {},
"outputs": [],
"source": [
"answer = \"x\" # replace x with a, b, c, or d \n",
"alg_0_comp_check(answer)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "0eedd28a", "id": "0eedd28a",
@ -489,10 +540,10 @@
"<b>Question:</b> How many scalars are communicated from and to a worker? Assume that matrices A, B, and C are N by N matrices.\n", "<b>Question:</b> How many scalars are communicated from and to a worker? Assume that matrices A, B, and C are N by N matrices.\n",
"</div>\n", "</div>\n",
"\n", "\n",
" a) 3N\n", " a) O(1)\n",
" b) 2N + 2\n", " b) O(N)\n",
" c) 2N + 1\n", " c) O(N²)\n",
" d) N² + 1" " d) O(N³)"
] ]
}, },
{ {
@ -515,9 +566,10 @@
"<b>Question:</b> How many operations are done in a worker? \n", "<b>Question:</b> How many operations are done in a worker? \n",
"</div>\n", "</div>\n",
"\n", "\n",
" a) O(N)\n", " a) O(1)\n",
" b) O(N²)\n", " b) O(N)\n",
" c) O(N³)" " c) O(N²)\n",
" d) O(N³)"
] ]
}, },
{ {
@ -905,9 +957,9 @@
"\n", "\n",
"| Algorithm | Parallelism <br>(#workers) | Communication <br>per worker | Computation <br>per worker | Ratio communication/<br>computation |\n", "| Algorithm | Parallelism <br>(#workers) | Communication <br>per worker | Computation <br>per worker | Ratio communication/<br>computation |\n",
"|---|---|---|---|---|\n", "|---|---|---|---|---|\n",
"| 1 | N² | 2N + 1 | N | O(1) |\n", "| 1 | N² | O(N) | O(N) | O(1) |\n",
"| 2 | N | 2N + N² | N² | O(1) |\n", "| 2 | N | O(N²) | O(N²) | O(1) |\n",
"| 3 | P | N² + 2N²/P | N³/P | O(P/N) |\n", "| 3 | P | O(N²) | O(N³/P) | O(P/N) |\n",
"\n", "\n",
"\n", "\n",
"- Matrix-matrix multiplication is trivially parallelizable (all entries in the result matrix can be computed in parallel, at least in theory)\n", "- Matrix-matrix multiplication is trivially parallelizable (all entries in the result matrix can be computed in parallel, at least in theory)\n",