using Printf

function answer_checker(answer,solution)
    if answer == solution
        "🥳 Well done! "
    else
        "It's not correct. Keep trying! 💪"
    end |> println
end
gauss_seidel_1_check(answer) = answer_checker(answer,"c")
jacobi_1_check(answer) = answer_checker(answer, "d")
jacobi_2_check(answer) = answer_checker(answer, "b")
jacobi_3_check(answer) = answer_checker(answer, "c")
jacobi_4_check(anwswer) = answer_checker(answer, "d")

function jacobi(n,niters)
    u = zeros(n+2)
    u[1] = -1
    u[end] = 1
    u_new = copy(u)
    for t in 1:niters
        for i in 2:(n+1)
            u_new[i] = 0.5*(u[i-1]+u[i+1])
        end
        u, u_new = u_new, u
    end
    u
end

jacobi(5,0)

for t in 1:nsteps
    for i in 2:(n+1)
        u_new[i] = 0.5*(u[i-1]+u[i+1])
    end
    u, u_new = u_new, u
end

function gauss_seidel(n,niters)
    u = zeros(n+2)
    u[1] = -1
    u[end] = 1
    for t in 1:niters
        for i in 2:(n+1)
            u[i] = 0.5*(u[i-1]+u[i+1])
        end
    end
    u
end

gauss_seidel(5,1000)

for t in 1:niters
    for i in 2:(n+1)
        u[i] = 0.5*(u[i-1]+u[i+1])
    end
end

answer = "x" # replace x with a, b, c or d
gauss_seidel_1_check(answer)

function jacobi_2d(n,niters)
    u = zeros(n+2,n+2)
    u[1,:] = u[end,:] = u[:,1] = u[:,end] .= 1
    u_new = copy(u)
    for t in 1:niters
        for j in 2:(n+1)
            for i in 2:(n+1)
                north = u[i,j+1]
                south = u[i,j-1]
                east = u[i+1,j]
                west = u[i-1,j]
                u_new[i,j] = 0.25*(north+south+east+west)
            end
        end
        u, u_new = u_new, u
    end
    u
end

u = jacobi_2d(10,0)

for t in 1:niters
    for j in 2:(n+1)
        for i in 2:(n+1)
            north = u[i,j+1]
            south = u[i,j-1]
            east = u[i+1,j]
            west = u[i-1,j]
            u_new[i,j] = 0.25*(north+south+east+west)
        end
    end
    u, u_new = u_new, u
end

] add MPI MPIClusterManagers

using MPIClusterManagers 
using Distributed

if procs() == workers()
    nw = 3
    manager = MPIWorkerManager(nw)
    addprocs(manager)
end

# Test cell, remove me
u = [-1, 0, 0, 0, 0, 1]
view(u, 6:6)

@mpi_do manager begin
    using MPI
    comm = MPI.Comm_dup(MPI.COMM_WORLD)
    nw = MPI.Comm_size(comm)
    iw = MPI.Comm_rank(comm)+1
    function jacobi_mpi(n,niters)
        if mod(n,nw) != 0
            println("n must be a multiple of nw")
            MPI.Abort(comm,1)
        end
        n_own = div(n,nw)
        u = zeros(n_own+2)
        u[1] = -1
        u[end] = 1
        u_new = copy(u)
        for t in 1:niters
            reqs = MPI.Request[]
            # Exchange cell values with neighbors
            if iw != 1
                neig_rank = (iw-1)-1
                req = MPI.Isend(view(u,2:2),comm,dest=neig_rank,tag=0)
                push!(reqs,req)
                req = MPI.Irecv!(view(u,1:1),comm,source=neig_rank,tag=0)
                push!(reqs,req)
            end
            if iw != nw
                neig_rank = (iw+1)-1
                s = n_own+1
                r = n_own+2
                req = MPI.Isend(view(u,s:s),comm,dest=neig_rank,tag=0)
                push!(reqs,req)
                req = MPI.Irecv!(view(u,r:r),comm,source=neig_rank,tag=0)
                push!(reqs,req)
            end
            MPI.Waitall(reqs)
            for i in 2:(n_own+1)
                u_new[i] = 0.5*(u[i-1]+u[i+1])
            end
            u, u_new = u_new, u
        end
        u
        @show u
        # Gather results in root process
        results = zeros(n+2)
        results[1] = -1
        results[n+2] = 1
        MPI.Gather!(view(u,2:n_own+1), view(results, 2:n+1), root=0, comm)
        if iw == 1
            @show results
        end            
    end
    niters = 100
    load = 4
    n = load*nw
    jacobi_mpi(n,niters)
end

answer = "x" # replace x with a, b, c or d
jacobi_2_check(answer)

answer = "x" # replace x with a, b, c or d
jacobi_3_check(answer)

answer = "x" # replace x with a, b, c or d.
jacobi_4_check(answer)

@mpi_do manager begin
    using MPI
    comm = MPI.Comm_dup(MPI.COMM_WORLD)
    nw = MPI.Comm_size(comm)
    iw = MPI.Comm_rank(comm)+1
    function jacobi_mpi(n,niters)
        if mod(n,nw) != 0
            println("n must be a multiple of nw")
            MPI.Abort(comm,1)
        end
        n_own = div(n,nw)
        u = zeros(n_own+2)
        u[1] = -1
        u[end] = 1
        u_new = copy(u)
        for t in 1:niters
            reqs = MPI.Request[]
            # Exchange cell values with neighbors
            if iw != 1
                neig_rank = (iw-1)-1
                req = MPI.Isend(view(u,2:2),comm,dest=neig_rank,tag=0)
                push!(reqs,req)
                req = MPI.Irecv!(view(u,1:1),comm,source=neig_rank,tag=0)
                push!(reqs,req)
            end
            if iw != nw
                neig_rank = (iw+1)-1
                s = n_own+1
                r = n_own+2
                req = MPI.Isend(view(u,s:s),comm,dest=neig_rank,tag=0)
                push!(reqs,req)
                req = MPI.Irecv!(view(u,r:r),comm,source=neig_rank,tag=0)
                push!(reqs,req)
            end
            MPI.Waitall(reqs)
            for i in 2:(n_own+1)
                u_new[i] = 0.5*(u[i-1]+u[i+1])
            end
            u, u_new = u_new, u
        end
        u
        @show u
        # Gather results in root process
        results = zeros(n+2)
        results[1] = -1
        results[n+2] = 1
        MPI.Gather!(view(u,2:n_own+1), view(results, 2:n+1), root=0, comm)
        if iw == 1
            @show results
        end            
    end
    niters = 100
    load = 4
    n = load*nw
    jacobi_mpi(n,niters)
end

Programming large-scale parallel systems¶

Jacobi method¶

Contents¶

The Jacobi method for the Laplace equation¶

Serial implementation¶

Where can we exploit parallelism?¶

The Gauss-Seidel method¶

Parallelization of the Jacobi method¶

Parallelization strategy¶

Data dependencies¶

Efficiency¶

Extension to 2D¶

Serial implementation¶

Where can we exploit parallelism?¶

Parallelization strategies¶

1D block partition¶

2D block partition¶

2D cyclic partition¶

Which partition is the best one?¶

Implementation¶

Ghost (aka halo) cells¶

Code¶

Latency hiding¶

Exercises¶

Exercise 1¶

License¶