using Printf

function answer_checker(answer,solution)
    if answer == solution
        "🥳 Well done! "
    else
        "It's not correct. Keep trying! 💪"
    end |> println
end
gauss_seidel_1_check(answer) = answer_checker(answer,"c")
jacobi_1_check(answer) = answer_checker(answer, "d")
jacobi_2_check(answer) = answer_checker(answer, "b")
jacobi_3_check(answer) = answer_checker(answer, "c")

function jacobi(n,niters)
    u = zeros(n+2)
    u[1] = -1
    u[end] = 1
    u_new = copy(u)
    for t in 1:niters
        for i in 2:(n+1)
            u_new[i] = 0.5*(u[i-1]+u[i+1])
        end
        u, u_new = u_new, u
    end
    u
end

jacobi(5,0)

for t in 1:nsteps
    for i in 2:(n+1)
        u_new[i] = 0.5*(u[i-1]+u[i+1])
    end
    u, u_new = u_new, u
end

function gauss_seidel(n,niters)
    u = zeros(n+2)
    u[1] = -1
    u[end] = 1
    for t in 1:niters
        for i in 2:(n+1)
            u[i] = 0.5*(u[i-1]+u[i+1])
        end
    end
    u
end

gauss_seidel(5,1000)

for t in 1:niters
    for i in 2:(n+1)
        u[i] = 0.5*(u[i-1]+u[i+1])
    end
end

answer = "x" # replace x with a, b, c or d
gauss_seidel_1_check(answer)

] add MPI MPIClusterManagers

using MPIClusterManagers 
using Distributed

if procs() == workers()
    nw = 3
    manager = MPIWorkerManager(nw)
    addprocs(manager)
end

@everywhere workers() begin
    using MPI
    comm = MPI.Comm_dup(MPI.COMM_WORLD)
    function jacobi_mpi(n,niters)
        nranks = MPI.Comm_size(comm)
        rank = MPI.Comm_rank(comm)
        if mod(n,nranks) != 0
            println("n must be a multiple of nranks")
            MPI.Abort(comm,1)
        end
        n_own = div(n,nranks)
        u = zeros(n_own+2)
        u[1] = -1
        u[end] = 1
        u_new = copy(u)
        for t in 1:niters
            reqs = MPI.Request[]
            if rank != 0
                neig_rank = rank-1
                req = MPI.Isend(view(u,2:2),comm,dest=neig_rank,tag=0)
                push!(reqs,req)
                req = MPI.Irecv!(view(u,1:1),comm,source=neig_rank,tag=0)
                push!(reqs,req)
            end
            if rank != (nranks-1)
                neig_rank = rank+1
                s = n_own+1
                r = n_own+2
                req = MPI.Isend(view(u,s:s),comm,dest=neig_rank,tag=0)
                push!(reqs,req)
                req = MPI.Irecv!(view(u,r:r),comm,source=neig_rank,tag=0)
                push!(reqs,req)
            end
            MPI.Waitall(reqs)
            for i in 2:(n_own+1)
                u_new[i] = 0.5*(u[i-1]+u[i+1])
            end
            u, u_new = u_new, u
        end
        return u
    end
end

@everywhere workers() function jacobi(n,niters)
    u = zeros(n+2)
    u[1] = -1
    u[end] = 1
    u_new = copy(u)
    for t in 1:niters
        for i in 2:(n+1)
            u_new[i] = 0.5*(u[i-1]+u[i+1])
        end
        u, u_new = u_new, u
    end
    u
end

@everywhere workers() begin
    # Call jacobi in parallel
    niters = 10
    load = 4
    nranks = MPI.Comm_size(comm)
    n = load*nranks
    u = jacobi_mpi(n,niters)
    # Gather results in root process and check
    rank = MPI.Comm_rank(comm)
    n_own = div(n,nranks)
    if rank == 0
        results = zeros(n+2)
        results[1] = -1
        results[n+2] = 1
        rcv = view(results, 2:n+1)
    else
        rcv = nothing
    end
    MPI.Gather!(view(u,2:n_own+1),rcv,comm;root=0)
    if rank == 0
        @show results ≈ jacobi(n,niters)
    end   
end

answer = "x" # replace x with a, b, c or d
jacobi_2_check(answer)

answer = "x" # replace x with a, b, c or d
jacobi_3_check(answer)

function jacobi_2d(n,niters)
    u = zeros(n+2,n+2)
    u[1,:] = u[end,:] = u[:,1] = u[:,end] .= 1
    u_new = copy(u)
    for t in 1:niters
        for j in 2:(n+1)
            for i in 2:(n+1)
                north = u[i,j+1]
                south = u[i,j-1]
                east = u[i+1,j]
                west = u[i-1,j]
                u_new[i,j] = 0.25*(north+south+east+west)
            end
        end
        u, u_new = u_new, u
    end
    u
end

u = jacobi_2d(10,0)

for t in 1:niters
    for j in 2:(n+1)
        for i in 2:(n+1)
            north = u[i,j+1]
            south = u[i,j-1]
            east = u[i+1,j]
            west = u[i-1,j]
            u_new[i,j] = 0.25*(north+south+east+west)
        end
    end
    u, u_new = u_new, u
end

Partition	Messages per iteration	Communication per worker	Computation per worker	Ratio communication/ computation
1d block	2	O(N)	N²/P	O(P/N)
2d block	4	O(N/√P)	N²/P	O(√P/N)
2d cyclic	4	O(N²/P)	N²/P	O(1)

Programming large-scale parallel systems¶

Jacobi method¶

Contents¶

The Jacobi method for the Laplace equation¶

Serial implementation¶

Where can we exploit parallelism?¶

The Gauss-Seidel method¶

Parallelization of the Jacobi method¶

Parallelization strategy¶

Data dependencies¶

Communication overhead¶

1D Implementation¶

Ghost (aka halo) cells¶

Code¶

Latency hiding¶

Extension to 2D¶

Serial implementation¶

Where can we exploit parallelism?¶

Parallelization strategies¶

1D block partition¶

2D block partition¶

2D cyclic partition¶

Summary¶

Which partition is the best one?¶

Exercises¶

Exercise 1¶

License¶