Merge pull request #25 from fverdugo/francesc

Changes after session on Tuesday 19th 2023
2025-12-30 18:58:31 +01:00 · 2023-09-20 09:10:56 +02:00
parent 6cf3b87d0a 173d1ad747
commit 50fb289e5b
2 changed files with 119 additions and 14 deletions
--- a/docs/src/solutions_for_all_notebooks.md
+++ b/docs/src/solutions_for_all_notebooks.md
@@ -160,4 +160,57 @@ end
 msg = 2
@fetchfrom 2 work(msg)
 ```
 ## Matrix-matrix multiplication
 ### Exercise 1
 ```julia
 function matmul_dist_3!(C,A,B)
    m = size(C,1)
    n = size(C,2)
    l = size(A,2)
    @assert size(A,1) == m
    @assert size(B,2) == n
    @assert size(B,1) == l
    @assert mod(m,nworkers()) == 0
    nrows_w = div(m,nworkers())
    @sync for (iw,w) in enumerate(workers())
        lb = 1 + (iw-1)*nrows_w
        ub = iw*nrows_w
        A_w = A[lb:ub,:]
        ftr = @spawnat w begin
             C_w = similar(A_w)
             matmul_seq!(C_w,A_w,B)
             C_w
        end
        @async C[lb:ub,:] = fetch(ftr)
    end
    C
 end
@everywhere function matmul_seq!(C,A,B)
    m = size(C,1)
    n = size(C,2)
    l = size(A,2)
    @assert size(A,1) == m
    @assert size(B,2) == n
    @assert size(B,1) == l
    z = zero(eltype(C))
    for j in 1:n
        for i in 1:m
            Cij = z
            for k in 1:l
                @inbounds Cij = Cij + A[i,k]*B[k,j]
            end
            C[i,j] = Cij
        end
    end
    C
 end
 ```
 ### Exercise 2
 At each call to @spawnat we will communicate O(N) and compute O(N) in a worker process just like in algorithm 1. However, we will do this work N^2/P times on average at each worker. Thus, the total communication and computation on a worker will be O(N^3/P) for both communication and computation.  Thus, the communication over computation ratio will still be O(1) and thus the communication will dominate in practice, making the algorithm inefficient.
--- a/notebooks/matrix_matrix.ipynb
+++ b/notebooks/matrix_matrix.ipynb
@@ -72,9 +72,10 @@
    "        \"It's not correct. Keep trying! 💪\"\n",
    "    end |> println\n",
    "end\n",
    "alg_0_comp_check(answer) = answer_checker(answer, \"d\")\n",
    "alg_1_deps_check(answer) = answer_checker(answer,\"b\")\n",
-    "alg_1_comm_overhead_check(answer) = answer_checker(answer, \"c\")\n",
+    "alg_1_comm_overhead_check(answer) = answer_checker(answer, \"b\")\n",
-    "alg_1_comp_check(answer) = answer_checker(answer, \"a\")\n",
+    "alg_1_comp_check(answer) = answer_checker(answer, \"b\")\n",
    "alg_2_complex_check(answer) = answer_checker(answer, \"b\")\n",
    "alg_2_deps_check(answer) = answer_checker(answer,\"d\")\n",
    "alg_3_deps_check(answer) = answer_checker(answer, \"c\")\n",
@@ -88,7 +89,7 @@
   "source": [
    "## Problem Statement\n",
    "\n",
-    "Let us consider the (dense) matrix-matrix product `C=A*B`."
+    "Given $A$ and $B$ two $N$-by-$N$ matrices, compute the matrix-matrix product $C=AB$. Compute it in parallel and efficiently."
   ]
  },
  {
@@ -157,7 +158,7 @@
   "source": [
    "## Serial implementation\n",
    "\n",
-    "We start by considering the (naive) sequential algorithm:"
+    "We start by considering the (naive) sequential algorithm, which is based on the math definition of the matrix-matrix product $C_{ij} = \\sum_k A_{ik} B_{kj}$"
   ]
  },
  {
@@ -188,6 +189,30 @@
    "end"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e3b86457",
   "metadata": {},
   "source": [
    "Run next cell to test the implementation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c5caf799",
   "metadata": {},
   "outputs": [],
   "source": [
    "using Test\n",
    "N = 10\n",
    "A = rand(N,N)\n",
    "B = rand(N,N)\n",
    "C = similar(A)\n",
    "matmul_seq!(C,A,B)\n",
    "@test C ≈ A*B"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f967d2ea",
@@ -216,6 +241,32 @@
    "@btime mul!(C,A,B);"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0ca2fbd4",
   "metadata": {},
   "source": [
    "<div class=\"alert alert-block alert-success\">\n",
    "<b>Question:</b>  Which is the complexity (number of operations) of the serial algorithm? Assume that all matrices are $N$-by-$N$ matrices.    \n",
    "</div>\n",
    "\n",
    "    a) O(1)\n",
    "    b) O(N)\n",
    "    c) O(N²)\n",
    "    d) O(N³)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "078e974e",
   "metadata": {},
   "outputs": [],
   "source": [
    "answer = \"x\" # replace x with a, b, c, or d \n",
    "alg_0_comp_check(answer)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0eedd28a",
@@ -489,10 +540,10 @@
    "<b>Question:</b>  How many scalars are communicated from and to a worker?  Assume that matrices A, B, and C are N by N matrices.\n",
    "</div>\n",
    "\n",
-    "    a) 3N\n",
+    "    a) O(1)\n",
-    "    b) 2N + 2\n",
+    "    b) O(N)\n",
-    "    c) 2N + 1\n",
+    "    c) O(N²)\n",
-    "    d) N² + 1"
+    "    d) O(N³)"
   ]
  },
  {
@@ -515,9 +566,10 @@
    "<b>Question:</b>  How many operations are done in a worker?    \n",
    "</div>\n",
    "\n",
-    "    a) O(N)\n",
+    "    a) O(1)\n",
-    "    b) O(N²)\n",
+    "    b) O(N)\n",
-    "    c) O(N³)"
+    "    c) O(N²)\n",
    "    d) O(N³)"
   ]
  },
  {
@@ -905,9 +957,9 @@
    "\n",
    "| Algorithm | Parallelism <br>(#workers) | Communication <br>per worker | Computation <br>per worker | Ratio communication/<br>computation |\n",
    "|---|---|---|---|---|\n",
-    "| 1 | N² | 2N + 1 | N | O(1) |\n",
+    "| 1 | N² | O(N) | O(N) | O(1) |\n",
-    "| 2 | N | 2N + N² | N² | O(1) |\n",
+    "| 2 | N | O(N²) | O(N²) | O(1) |\n",
-    "| 3 | P | N² + 2N²/P | N³/P | O(P/N) |\n",
+    "| 3 | P | O(N²) | O(N³/P) | O(P/N) |\n",
    "\n",
    "\n",
    "- Matrix-matrix multiplication is trivially parallelizable (all entries in the result matrix can be computed in parallel, at least in theory)\n",