# Solutions

## Julia Basics

### Exercise 1

```julia
function ex1(a)
    j = 1
    m = a[j]
    for (i,ai) in enumerate(a)
        if m < ai
            m = ai
            j = i
        end
    end
    (m,j)
end
```

### Exercise 2

```julia
ex2(f,g) = x -> f(x) + g(x)
```

### Exercise 3

```julia
using GLMakie
max_iters = 100
n = 1000
x = LinRange(-1.7,0.7,n)
y = LinRange(-1.2,1.2,n)
heatmap(x,y,(i,j)->mandel(i,j,max_iters))
```

## Asynchronous programming in Julia

## Distributed computing in Julia

### Exercise 1

```julia
f = () -> Channel{Int}(1)
chnls = [ RemoteChannel(f,w) for w in workers() ]
@sync for (iw,w) in enumerate(workers())
    @spawnat w begin
        chnl_snd = chnls[iw]
        if w == 2
            chnl_rcv = chnls[end]
            msg = 2
            println("msg = $msg")
            put!(chnl_snd,msg)
            msg = take!(chnl_rcv)
            println("msg = $msg")
        else
           chnl_rcv = chnls[iw-1]
           msg = take!(chnl_rcv)
           msg += 1
           println("msg = $msg")
           put!(chnl_snd,msg)
        end
    end
end
```

This is another possible solution.

```julia
@everywhere function work(msg)
    println("msg = $msg")
    if myid() != nprocs()
        next = myid() + 1
        @fetchfrom next work(msg+1)
    else
        @fetchfrom 2 println("msg = $msg")
    end
end
msg = 2
@fetchfrom 2 work(msg)
```


## Matrix-matrix multiplication

### Exercise 1

```julia
function matmul_dist_3!(C,A,B)
    m = size(C,1)
    n = size(C,2)
    l = size(A,2)
    @assert size(A,1) == m
    @assert size(B,2) == n
    @assert size(B,1) == l
    @assert mod(m,nworkers()) == 0
    nrows_w = div(m,nworkers())
    @sync for (iw,w) in enumerate(workers())
        lb = 1 + (iw-1)*nrows_w
        ub = iw*nrows_w
        A_w = A[lb:ub,:]
        ftr = @spawnat w begin
             C_w = similar(A_w)
             matmul_seq!(C_w,A_w,B)
             C_w
        end
        @async C[lb:ub,:] = fetch(ftr)
    end
    C
end

@everywhere function matmul_seq!(C,A,B)
    m = size(C,1)
    n = size(C,2)
    l = size(A,2)
    @assert size(A,1) == m
    @assert size(B,2) == n
    @assert size(B,1) == l
    z = zero(eltype(C))
    for j in 1:n
        for i in 1:m
            Cij = z
            for k in 1:l
                @inbounds Cij = Cij + A[i,k]*B[k,j]
            end
            C[i,j] = Cij
        end
    end
    C
end
```

## MPI (Point-to-point)

### Exercise 1

```julia
function matmul_mpi_3!(C,A,B)
    comm = MPI.COMM_WORLD
    rank = MPI.Comm_rank(comm)
    P = MPI.Comm_size(comm)
    if  rank == 0
        N = size(A,1)
        myB = B
        for dest in 1:(P-1)
            MPI.Send(B,comm;dest)
        end
    else
        source = 0
        status = MPI.Probe(comm,MPI.Status;source)
        count = MPI.Get_count(status,eltype(B))
        N = Int(sqrt(count))
        myB = zeros(N,N)
        MPI.Recv!(myB,comm;source)
    end
    L = div(N,P)
    myA = zeros(L,N)
    if  rank == 0
        lb = L*rank+1
        ub = L*(rank+1)
        myA[:,:] = view(A,lb:ub,:)
        for dest in 1:(P-1)
            lb = L*dest+1
            ub = L*(dest+1)
            MPI.Send(view(A,lb:ub,:),comm;dest)
        end
    else
        source = 0
        MPI.Recv!(myA,comm;source)
    end
    myC = myA*myB
    if rank == 0
        lb = L*rank+1
        ub = L*(rank+1)
        C[lb:ub,:] = myC
        for source in 1:(P-1)
            lb = L*source+1
            ub = L*(source+1)
            MPI.Recv!(view(C,lb:ub,:),comm;source)
        end
    else
        dest = 0
        MPI.Send(myC,comm;dest)
    end
    C
end
```

### Exercise 2

```julia
using MPI
MPI.Init()
comm = MPI.COMM_WORLD
rank = MPI.Comm_rank(comm)
nranks = MPI.Comm_size(comm)
buffer = Ref(0)
if rank == 0
    msg = 2
    buffer[] = msg
    println("msg = $(buffer[])")
    MPI.Send(buffer,comm;dest=rank+1,tag=0)
    MPI.Recv!(buffer,comm;source=nranks-1,tag=0)
    println("msg = $(buffer[])")
else
    dest = if (rank != nranks-1)
        rank+1
    else
        0
    end
    MPI.Recv!(buffer,comm;source=rank-1,tag=0)
    buffer[] += 1
    println("msg = $(buffer[])")
    MPI.Send(buffer,comm;dest,tag=0)
end
```
## MPI (collectives)

### Exercise 1

```julia
function matmul_mpi_3!(C,A,B)
    comm = MPI.COMM_WORLD
    rank = MPI.Comm_rank(comm)
    P = MPI.Comm_size(comm)
    root = 0
    if  rank == root
        N = size(A,1)
        Nref = Ref(N)
    else
        Nref = Ref(0)
    end
    MPI.Bcast!(Nref,comm;root)
    N = Nref[]
    if  rank == root
        myB = B
    else
        myB = zeros(N,N)
    end
    MPI.Bcast!(myB,comm;root)
    L = div(N,P)
    # Tricky part
    # Scatter and gather work "row major"
    # while Julia works "col major"
    myAt = zeros(N,L)
    At = collect(transpose(A))
    MPI.Scatter!(At,myAt,comm;root)
    myCt = transpose(myB)*myAt
    Ct = similar(C)
    MPI.Gather!(myCt,Ct,comm;root)
    C .= transpose(Ct)
    C
end
```

This other solution uses a column partition instead of a row partition.
It is more natural to work with column partitions in Julia if possible since matrices are
in "col major" format. Note that we do not need all the auxiliary transposes anymore.

```julia
function matmul_mpi_3!(C,A,B)
    comm = MPI.COMM_WORLD
    rank = MPI.Comm_rank(comm)
    P = MPI.Comm_size(comm)
    root = 0
    if  rank == root
        N = size(A,1)
        Nref = Ref(N)
    else
        Nref = Ref(0)
    end
    MPI.Bcast!(Nref,comm;root)
    N = Nref[]
    if  rank == root
        myA = A
    else
        myA = zeros(N,N)
    end
    MPI.Bcast!(myA,comm;root)
    L = div(N,P)
    myB = zeros(N,L)
    MPI.Scatter!(B,myB,comm;root)
    myC = myA*myB
    MPI.Gather!(myC,C,comm;root)
    C
end
```

## Jacobi method

### Exercise 1

```julia
function jacobi_mpi(n,niters)
    comm = MPI.COMM_WORLD
    nranks = MPI.Comm_size(comm)
    rank = MPI.Comm_rank(comm)
    if mod(n,nranks) != 0
        println("n must be a multiple of nranks")
        MPI.Abort(comm,1)
    end
    load = div(n,nranks)
    u = zeros(load+2)
    u[1] = -1
    u[end] = 1
    u_new = copy(u)
    for t in 1:niters
        reqs = MPI.Request[]
        if rank != 0
            neig_rank = rank-1
            req = MPI.Isend(view(u,2:2),comm,dest=neig_rank,tag=0)
            push!(reqs,req)
            req = MPI.Irecv!(view(u,1:1),comm,source=neig_rank,tag=0)
            push!(reqs,req)
        end
        if rank != (nranks-1)
            neig_rank = rank+1
            s = load+1
            r = load+2
            req = MPI.Isend(view(u,s:s),comm,dest=neig_rank,tag=0)
            push!(reqs,req)
            req = MPI.Irecv!(view(u,r:r),comm,source=neig_rank,tag=0)
            push!(reqs,req)
        end
        for i in 3:load
            u_new[i] = 0.5*(u[i-1]+u[i+1])
        end
        MPI.Waitall(reqs)
        for i in (2,load+1)
            u_new[i] = 0.5*(u[i-1]+u[i+1])
        end
        u, u_new = u_new, u

    end
    # Gather the results
    if rank !=0
        lb = 2
        ub = load+1
        MPI.Send(view(u,lb:ub),comm,dest=0)
        u_all = zeros(0) # This will nevel be used
    else
        u_all = zeros(n+2)
        # Set boundary
        u_all[1] = -1
        u_all[end] = 1
        # Set data for rank 0
        lb = 2
        ub = load+1
        u_all[lb:ub] = view(u,lb:ub)
        # Set data for other ranks
        for other_rank in 1:(nranks-1)
            lb += load
            ub += load
            MPI.Recv!(view(u_all,lb:ub),comm;source=other_rank)
        end
    end
    return u_all
end
```