diff --git a/docs/src/solutions_for_all_notebooks.md b/docs/src/solutions_for_all_notebooks.md index 292903c..80140b3 100644 --- a/docs/src/solutions_for_all_notebooks.md +++ b/docs/src/solutions_for_all_notebooks.md @@ -160,4 +160,57 @@ end msg = 2 @fetchfrom 2 work(msg) ``` +## Matrix-matrix multiplication + +### Exercise 1 + +```julia +function matmul_dist_3!(C,A,B) + m = size(C,1) + n = size(C,2) + l = size(A,2) + @assert size(A,1) == m + @assert size(B,2) == n + @assert size(B,1) == l + @assert mod(m,nworkers()) == 0 + nrows_w = div(m,nworkers()) + @sync for (iw,w) in enumerate(workers()) + lb = 1 + (iw-1)*nrows_w + ub = iw*nrows_w + A_w = A[lb:ub,:] + ftr = @spawnat w begin + C_w = similar(A_w) + matmul_seq!(C_w,A_w,B) + C_w + end + @async C[lb:ub,:] = fetch(ftr) + end + C +end + +@everywhere function matmul_seq!(C,A,B) + m = size(C,1) + n = size(C,2) + l = size(A,2) + @assert size(A,1) == m + @assert size(B,2) == n + @assert size(B,1) == l + z = zero(eltype(C)) + for j in 1:n + for i in 1:m + Cij = z + for k in 1:l + @inbounds Cij = Cij + A[i,k]*B[k,j] + end + C[i,j] = Cij + end + end + C +end +``` + +### Exercise 2 + +At each call to @spawnat we will communicate O(N) and compute O(N) in a worker process just like in algorithm 1. However, we will do this work N^2/P times on average at each worker. Thus, the total communication and computation on a worker will be O(N^3/P) for both communication and computation. Thus, the communication over computation ratio will still be O(1) and thus the communication will dominate in practice, making the algorithm inefficient. + diff --git a/notebooks/matrix_matrix.ipynb b/notebooks/matrix_matrix.ipynb index 7968e87..1cc299c 100644 --- a/notebooks/matrix_matrix.ipynb +++ b/notebooks/matrix_matrix.ipynb @@ -72,9 +72,10 @@ " \"It's not correct. Keep trying! 💪\"\n", " end |> println\n", "end\n", + "alg_0_comp_check(answer) = answer_checker(answer, \"d\")\n", "alg_1_deps_check(answer) = answer_checker(answer,\"b\")\n", - "alg_1_comm_overhead_check(answer) = answer_checker(answer, \"c\")\n", - "alg_1_comp_check(answer) = answer_checker(answer, \"a\")\n", + "alg_1_comm_overhead_check(answer) = answer_checker(answer, \"b\")\n", + "alg_1_comp_check(answer) = answer_checker(answer, \"b\")\n", "alg_2_complex_check(answer) = answer_checker(answer, \"b\")\n", "alg_2_deps_check(answer) = answer_checker(answer,\"d\")\n", "alg_3_deps_check(answer) = answer_checker(answer, \"c\")\n", @@ -88,7 +89,7 @@ "source": [ "## Problem Statement\n", "\n", - "Let us consider the (dense) matrix-matrix product `C=A*B`." + "Given $A$ and $B$ two $N$-by-$N$ matrices, compute the matrix-matrix product $C=AB$. Compute it in parallel and efficiently." ] }, { @@ -157,7 +158,7 @@ "source": [ "## Serial implementation\n", "\n", - "We start by considering the (naive) sequential algorithm:" + "We start by considering the (naive) sequential algorithm, which is based on the math definition of the matrix-matrix product $C_{ij} = \\sum_k A_{ik} B_{kj}$" ] }, { @@ -188,6 +189,30 @@ "end" ] }, + { + "cell_type": "markdown", + "id": "e3b86457", + "metadata": {}, + "source": [ + "Run next cell to test the implementation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5caf799", + "metadata": {}, + "outputs": [], + "source": [ + "using Test\n", + "N = 10\n", + "A = rand(N,N)\n", + "B = rand(N,N)\n", + "C = similar(A)\n", + "matmul_seq!(C,A,B)\n", + "@test C ≈ A*B" + ] + }, { "cell_type": "markdown", "id": "f967d2ea", @@ -216,6 +241,32 @@ "@btime mul!(C,A,B);" ] }, + { + "cell_type": "markdown", + "id": "0ca2fbd4", + "metadata": {}, + "source": [ + "