# Solutions ## Julia Basics ### Exercise 1 ```julia function ex1(a) j = 1 m = a[j] for (i,ai) in enumerate(a) if m < ai m = ai j = i end end (m,j) end ``` ### Exercise 2 ```julia ex2(f,g) = x -> f(x) + g(x) ``` ### Exercise 3 ```julia using GLMakie max_iters = 100 n = 1000 x = LinRange(-1.7,0.7,n) y = LinRange(-1.2,1.2,n) heatmap(x,y,(i,j)->mandel(i,j,max_iters)) ``` ## Asynchronous programming in Julia ## Distributed computing in Julia ### Exercise 1 ```julia f = () -> Channel{Int}(1) chnls = [ RemoteChannel(f,w) for w in workers() ] @sync for (iw,w) in enumerate(workers()) @spawnat w begin chnl_snd = chnls[iw] if w == 2 chnl_rcv = chnls[end] msg = 2 println("msg = $msg") put!(chnl_snd,msg) msg = take!(chnl_rcv) println("msg = $msg") else chnl_rcv = chnls[iw-1] msg = take!(chnl_rcv) msg += 1 println("msg = $msg") put!(chnl_snd,msg) end end end ``` This is another possible solution. ```julia @everywhere function work(msg) println("msg = $msg") if myid() != nprocs() next = myid() + 1 @fetchfrom next work(msg+1) else @fetchfrom 2 println("msg = $msg") end end msg = 2 @fetchfrom 2 work(msg) ``` ## Matrix-matrix multiplication ### Exercise 1 ```julia function matmul_dist_3!(C,A,B) m = size(C,1) n = size(C,2) l = size(A,2) @assert size(A,1) == m @assert size(B,2) == n @assert size(B,1) == l @assert mod(m,nworkers()) == 0 nrows_w = div(m,nworkers()) @sync for (iw,w) in enumerate(workers()) lb = 1 + (iw-1)*nrows_w ub = iw*nrows_w A_w = A[lb:ub,:] ftr = @spawnat w begin C_w = similar(A_w) matmul_seq!(C_w,A_w,B) C_w end @async C[lb:ub,:] = fetch(ftr) end C end @everywhere function matmul_seq!(C,A,B) m = size(C,1) n = size(C,2) l = size(A,2) @assert size(A,1) == m @assert size(B,2) == n @assert size(B,1) == l z = zero(eltype(C)) for j in 1:n for i in 1:m Cij = z for k in 1:l @inbounds Cij = Cij + A[i,k]*B[k,j] end C[i,j] = Cij end end C end ``` ## MPI (Point-to-point) ### Exercise 1 ```julia function matmul_mpi_3!(C,A,B) comm = MPI.COMM_WORLD rank = MPI.Comm_rank(comm) P = MPI.Comm_size(comm) if rank == 0 N = size(A,1) myB = B for dest in 1:(P-1) MPI.Send(B,comm;dest) end else source = 0 status = MPI.Probe(comm,MPI.Status;source) count = MPI.Get_count(status,eltype(B)) N = Int(sqrt(count)) myB = zeros(N,N) MPI.Recv!(myB,comm;source) end L = div(N,P) myA = zeros(L,N) if rank == 0 lb = L*rank+1 ub = L*(rank+1) myA[:,:] = view(A,lb:ub,:) for dest in 1:(P-1) lb = L*dest+1 ub = L*(dest+1) MPI.Send(view(A,lb:ub,:),comm;dest) end else source = 0 MPI.Recv!(myA,comm;source) end myC = myA*myB if rank == 0 lb = L*rank+1 ub = L*(rank+1) C[lb:ub,:] = myC for source in 1:(P-1) lb = L*source+1 ub = L*(source+1) MPI.Recv!(view(C,lb:ub,:),comm;source) end else dest = 0 MPI.Send(myC,comm;dest) end C end ``` ### Exercise 2 ```julia using MPI MPI.Init() comm = MPI.COMM_WORLD rank = MPI.Comm_rank(comm) nranks = MPI.Comm_size(comm) buffer = Ref(0) if rank == 0 msg = 2 buffer[] = msg println("msg = $(buffer[])") MPI.Send(buffer,comm;dest=rank+1,tag=0) MPI.Recv!(buffer,comm;source=nranks-1,tag=0) println("msg = $(buffer[])") else dest = if (rank != nranks-1) rank+1 else 0 end MPI.Recv!(buffer,comm;source=rank-1,tag=0) buffer[] += 1 println("msg = $(buffer[])") MPI.Send(buffer,comm;dest,tag=0) end ``` ## MPI (collectives) ### Exercise 1 ```julia function matmul_mpi_3!(C,A,B) comm = MPI.COMM_WORLD rank = MPI.Comm_rank(comm) P = MPI.Comm_size(comm) root = 0 if rank == root N = size(A,1) Nref = Ref(N) else Nref = Ref(0) end MPI.Bcast!(Nref,comm;root) N = Nref[] if rank == root myB = B else myB = zeros(N,N) end MPI.Bcast!(myB,comm;root) L = div(N,P) # Tricky part # Julia works "col major" myAt = zeros(N,L) At = collect(transpose(A)) MPI.Scatter!(At,myAt,comm;root) myCt = transpose(myB)*myAt Ct = similar(C) MPI.Gather!(myCt,Ct,comm;root) C .= transpose(Ct) C end ``` This other solution uses a column partition instead of a row partition. It is more natural to work with column partitions in Julia if possible since matrices are in "col major" format. Note that we do not need all the auxiliary transposes anymore. ```julia function matmul_mpi_3!(C,A,B) comm = MPI.COMM_WORLD rank = MPI.Comm_rank(comm) P = MPI.Comm_size(comm) root = 0 if rank == root N = size(A,1) Nref = Ref(N) else Nref = Ref(0) end MPI.Bcast!(Nref,comm;root) N = Nref[] if rank == root myA = A else myA = zeros(N,N) end MPI.Bcast!(myA,comm;root) L = div(N,P) myB = zeros(N,L) MPI.Scatter!(B,myB,comm;root) myC = myA*myB MPI.Gather!(myC,C,comm;root) C end ``` ## Jacobi method ### Exercise 1 ```julia function jacobi_mpi(n,niters) u, u_new = init(n,comm) load = length(u)-2 rank = MPI.Comm_rank(comm) nranks = MPI.Comm_size(comm) nreqs = 2*((rank != 0) + (rank != (nranks-1))) reqs = MPI.MultiRequest(nreqs) for t in 1:niters ireq = 0 if rank != 0 neig_rank = rank-1 u_snd = view(u,2:2) u_rcv = view(u,1:1) dest = neig_rank source = neig_rank ireq += 1 MPI.Isend(u_snd,comm,reqs[ireq];dest) ireq += 1 MPI.Irecv!(u_rcv,comm,reqs[ireq];source) end if rank != (nranks-1) neig_rank = rank+1 u_snd = view(u,(load+1):(load+1)) u_rcv = view(u,(load+2):(load+2)) dest = neig_rank source = neig_rank ireq += 1 MPI.Isend(u_snd,comm,reqs[ireq];dest) ireq += 1 MPI.Irecv!(u_rcv,comm,reqs[ireq];source) end # Upload interior cells for i in 3:load u_new[i] = 0.5*(u[i-1]+u[i+1]) end # Wait for the communications to finish MPI.Waitall(reqs) # Update boundaries for i in (2,load+1) u_new[i] = 0.5*(u[i-1]+u[i+1]) end u, u_new = u_new, u end return u end ``` ### Exercise 2 ```julia function jacobi_mpi(n,niters,tol,comm) # new tol arg u, u_new = init(n,comm) load = length(u)-2 rank = MPI.Comm_rank(comm) nranks = MPI.Comm_size(comm) nreqs = 2*((rank != 0) + (rank != (nranks-1))) reqs = MPI.MultiRequest(nreqs) for t in 1:niters ireq = 0 if rank != 0 neig_rank = rank-1 u_snd = view(u,2:2) u_rcv = view(u,1:1) dest = neig_rank source = neig_rank ireq += 1 MPI.Isend(u_snd,comm,reqs[ireq];dest) ireq += 1 MPI.Irecv!(u_rcv,comm,reqs[ireq];source) end if rank != (nranks-1) neig_rank = rank+1 u_snd = view(u,(load+1):(load+1)) u_rcv = view(u,(load+2):(load+2)) dest = neig_rank source = neig_rank ireq += 1 MPI.Isend(u_snd,comm,reqs[ireq];dest) ireq += 1 MPI.Irecv!(u_rcv,comm,reqs[ireq];source) end MPI.Waitall(reqs) # Compute the max diff in the current # rank while doing the local update mydiff = 0.0 for i in 2:load+1 u_new[i] = 0.5*(u[i-1]+u[i+1]) diff_i = abs(u_new[i] - u[i]) mydiff = max(mydiff,diff_i) end # Now we need to find the global diff diff_ref = Ref(mydiff) MPI.Allreduce!(diff_ref,max,comm) diff = diff_ref[] # If global diff below tol, stop! if diff < tol return u_new end u, u_new = u_new, u end return u end ``` ## All pairs of shortest paths ### Exercise 1 ```julia function floyd_iterations!(myC,comm) L = size(myC,1) N = size(myC,2) rank = MPI.Comm_rank(comm) P = MPI.Comm_size(comm) lb = L*rank+1 ub = L*(rank+1) C_k = similar(myC,N) for k in 1:N if (lb<=k) && (k<=ub) # If I have the row, fill in the buffer myk = (k-lb)+1 C_k[:] = view(myC,myk,:) end # We need to find out the owner of row k. # Easy since N is a multiple of P root = div(k-1,L) MPI.Bcast!(C_k,comm;root) # Now, we have the data dependencies and # we can do the updates locally for j in 1:N for i in 1:L myC[i,j] = min(myC[i,j],myC[i,k]+C_k[j]) end end end myC end ```