Sciences Po CompEcon 2017
@profile
macro.function profile_test(n)
for i = 1:n
A = randn(100,100,20)
m = maximum(A)
Afft = fft(A)
Am = mapslices(sum, A, 2)
B = A[:,:,5]
Bsort = mapslices(sort, B, 1)
b = rand(100)
C = B.*b
end
end
profile_test(1) #Â compile
Profile.clear() # if we have previous profile data
@time profile_test(10)
took = @elapsed profile_test(10)
@profile profile_test(100)
Profile.print()
0.122868 seconds (291.13 k allocations: 87.349 MB, 10.85% gc time) 849 ./task.jl:360; (::IJulia.##13#19)() 849 ...Julia/src/eventloop.jl:8; eventloop(::ZMQ.Socket) 849 ...rc/execute_request.jl:157; execute_request(::ZMQ.Socket, ::... 849 ./loading.jl:441; include_string(::String, ::String) 848 ./<missing>:?; anonymous 848 ./profile.jl:16; macro expansion; 154 ./In[2]:3; profile_test(::Int64) 144 ./random.jl:1208; randn!(::MersenneTwister, ::A... 35 ./random.jl:0; randn(::MersenneTwister, ::Ty... 79 ./random.jl:1202; randn(::MersenneTwister, ::Ty... 29 ./random.jl:1130; randn 29 ./random.jl:263; rand_ui52 29 ./random.jl:125; rand_ui52_raw 9 ./random.jl:124; rand_ui52_raw_inbounds 9 ./random.jl:117; rand_inbounds 9 ./random.jl:104; mt_pop! 20 ./random.jl:111; reserve_1 15 ./random.jl:107; gen_rand 2 ./dSFMT.jl:0; dsfmt_fill_array_close1_op... 13 ./dSFMT.jl:75; dsfmt_fill_array_close1_op... 1 ./random.jl:108; gen_rand 1 ./random.jl:102; mt_setfull! 3 ./random.jl:1131; randn 23 ./random.jl:1133; randn 1 ./random.jl:1134; randn 16 ./random.jl:1135; randn 3 ./random.jl:0; randn_unlikely(::MersenneTwi... 9 ./random.jl:1147; randn_unlikely(::MersenneTwi... 1 ./random.jl:243; rand 1 ./random.jl:122; rand 1 ./random.jl:111; reserve_1 3 ./random.jl:1150; randn_unlikely(::MersenneTwi... 1 ./random.jl:0; randn(::MersenneTwister) 1 ./random.jl:1134; randn(::MersenneTwister) 1 ./random.jl:1135; randn(::MersenneTwister) 1 ./random.jl:1147; randn_unlikely(::MersenneTw... 50 ./In[2]:4; profile_test(::Int64) 50 ./reduce.jl:162; _mapreduce(::Base.#identity, :... 4 ./reduce.jl:272; mapreduce_impl(::Base.#identi... 1 ./reduce.jl:278; mapreduce_impl(::Base.#identi... 45 ./reduce.jl:280; mapreduce_impl(::Base.#identi... 246 ./In[2]:5; profile_test(::Int64) 11 ./dft.jl:43; copy1(::Type{Complex{Float64}}... 34 ./dft.jl:44; copy1(::Type{Complex{Float64}}... 34 ./multidimensional.jl:725; circcopy!(::Array{Complex{Flo... 1 ./abstractarray.jl:0; copy!(::Base.LinearFast, ::Ar... 6 ./abstractarray.jl:558; copy!(::Base.LinearFast, ::Ar... 27 ./abstractarray.jl:559; copy!(::Base.LinearFast, ::Ar... 201 ./dft.jl:57; fft(::Array{Complex{Float64},3... 20 ./fft/FFTW.jl:585; #plan_fft#5(::UInt32, ::Float... 3 ./fft/FFTW.jl:463; Base.DFT.FFTW.cFFTWPlan{Compl... 1 ./fft/FFTW.jl:414; dims_howmany(::Array{Complex{... 1 ./set.jl:120; unique(::Array{Int64,1}) 1 ./set.jl:6; Type 1 ./dict.jl:344; Type 1 ./array.jl:169; zeros(::Type{T}, ::Int64, ... 2 ./fft/FFTW.jl:423; dims_howmany(::Array{Complex{... 1 ./array.jl:0; hcat(::Array{Int64,1}, ::Arr... 1 ./array.jl:741; hcat(::Array{Int64,1}, ::Arr... 8 ./fft/FFTW.jl:464; Base.DFT.FFTW.cFFTWPlan{Compl... 9 ./fft/FFTW.jl:87; fakesimilar(::UInt32, ::Array... 1 ./fft/FFTW.jl:84; Base.DFT.FFTW.FakeArray{T,N}(... 1 ./fft/FFTW.jl:350; colmajorstrides(::Tuple{Int6... 1 ./arraymath.jl:450; cumprod(::Array{Int64,1}) 9 ./fft/FFTW.jl:617; * 172 ./fft/FFTW.jl:618; * 352 ./In[2]:6; profile_test(::Int64) 1 ./abstractarray.jl:1610; mapslices(::Base.#sum, ::Arra... 1 ./abstractarray.jl:67; indices 1 ./array.jl:20; size 1 ./array.jl:24; _size 2 ./abstractarray.jl:1614; mapslices(::Base.#sum, ::Arra... 1 ./array.jl:1466; setdiff(::Array{Int64,1}, ::A... 1 ./set.jl:10; Type 1 ./set.jl:7; Type 1 ./dict.jl:344; Type 1 ./array.jl:169; zeros(::Type{T}, ::Int64, :... 1 ./array.jl:1468; setdiff(::Array{Int64,1}, ::A... 1 ./set.jl:6; Type 1 ./dict.jl:344; Type 1 ./array.jl:169; zeros(::Type{T}, ::Int64, ::... 1 ./abstractarray.jl:1628; mapslices(::Base.#sum, ::Arra... 1 ./abstractarray.jl:1635; mapslices(::Base.#sum, ::Arra... 23 ./abstractarray.jl:1648; mapslices(::Base.#sum, ::Arra... 1 .../lib/julia/sys.dylib:?; !(::Bool) 1 ./bool.jl:0; !(::Bool) 1 ./abstractarray.jl:1652; mapslices(::Base.#sum, ::Arra... 53 ./abstractarray.jl:1653; mapslices(::Base.#sum, ::Arra... 1 .../lib/julia/sys.dylib:?; getindex(::Tuple{Int64,Int64}, ... 1 .../lib/julia/sys.dylib:?; setindex!(::Array{Any,1}, ::Any... 99 ./abstractarray.jl:1655; mapslices(::Base.#sum, ::Arra... 18 ./multidimensional.jl:340; _unsafe_getindex!(::Array{Flo... 18 ./multidimensional.jl:348; macro expansion 1 ./cartesian.jl:62; macro expansion 17 ./cartesian.jl:64; macro expansion 17 ./multidimensional.jl:350; macro expansion 169 ./abstractarray.jl:1656; mapslices(::Base.#sum, ::Arra... 66 ./abstractarray.jl:0; setindex!(::Array{Float64,3},... 21 ./abstractarray.jl:832; setindex!(::Array{Float64,3},... 1 ./multidimensional.jl:0; _setindex!(::Base.LinearFast,... 1 ./multidimensional.jl:365; _setindex!(::Base.LinearFast,... 1 ./multidimensional.jl:366; _setindex!(::Base.LinearFast,... 1 ./multidimensional.jl:421; _unsafe_batchsetindex!(::Arra... 1 ./multidimensional.jl:423; macro expansion 1 ./reduce.jl:229; sum(::Array{Float64,1}) 1 ./reduce.jl:162; _mapreduce(::Base.#identity, ... 1 ./reduce.jl:102; mapreduce_impl(::Base.#identi... 1 ./simdloop.jl:66; macro expansion 3 ./In[2]:7; profile_test(::Int64) 3 ./abstractarray.jl:752; getindex 3 ./multidimensional.jl:270; _getindex 3 ./abstractarray.jl:284; checkbounds(::Array{Float64,3... 1 ./abstractarray.jl:270; checkbounds(::Type{Bool}, ::A... 1 ./essentials.jl:0; argtail(::Colon, ::Colon, ::V... 42 ./In[2]:8; profile_test(::Int64) 1 ./abstractarray.jl:1612; mapslices(::Base.#sort, ::Arr... 1 .../lib/julia/sys.dylib:?; trailingsize(::BitArray{2}, ::I... 2 ./abstractarray.jl:1614; mapslices(::Base.#sort, ::Arr... 2 ./array.jl:1466; setdiff(::Array{Int64,1}, ::A... 2 ./set.jl:10; Type 2 ./set.jl:7; Type 2 ./dict.jl:344; Type 2 ./array.jl:169; zeros(::Type{T}, ::Int64, :... 1 ./abstractarray.jl:1617; mapslices(::Base.#sort, ::Arr... 1 ./abstractarray.jl:1622; mapslices(::Base.#sort, ::Arr... 1 ./abstractarray.jl:1623; mapslices(::Base.#sort, ::Arr... 1 ./sort.jl:417; sort(::Array{Float64,1}) 1 ./sort.jl:417; #sort#8(::Array{Any,1}, ::Fun... 1 ./array.jl:65; copy!(::Array{Float64,1}, ::I... 2 ./abstractarray.jl:1637; mapslices(::Base.#sort, ::Arr... 4 ./abstractarray.jl:1648; mapslices(::Base.#sort, ::Arr... 4 ./abstractarray.jl:1653; mapslices(::Base.#sort, ::Arr... 6 ./abstractarray.jl:1655; mapslices(::Base.#sort, ::Arr... 1 ./multidimensional.jl:340; _unsafe_getindex!(::Array{Floa... 1 ./multidimensional.jl:348; macro expansion 1 ./cartesian.jl:64; macro expansion 1 ./multidimensional.jl:350; macro expansion 20 ./abstractarray.jl:1656; mapslices(::Base.#sort, ::Arr... 1 ./abstractarray.jl:832; setindex!(::Array{Float64,2},... 1 ./multidimensional.jl:421; _unsafe_batchsetindex!(::Arra... 1 ./multidimensional.jl:429; macro expansion 1 ./cartesian.jl:64; macro expansion 1 ./multidimensional.jl:430; macro expansion 14 ./sort.jl:417; sort(::Array{Float64,1}) 14 ./sort.jl:417; #sort#8(::Array{Any,1}, ::Fun... 1 ./abstractarray.jl:653; copymutable 1 ./ordering.jl:0; ord(::Base.#isless, ::Base.#... 12 ./sort.jl:623; sort!(::Array{Float64,1}, ::... 3 ./sort.jl:606; fpsort!(::Array{Float64,1}, ... 3 ./sort.jl:581; nans2right!(::Array{Float64... 3 ./sort.jl:587; nans2right!(::Array{Float64... 2 ./sort.jl:609; fpsort!(::Array{Float64,1}, ... 2 ./sort.jl:614; fpsort!(::Array{Float64,1}, ... 1 ./sort.jl:294; sort!(::Array{Float64,1}, :... 1 ./sort.jl:279; partition!(::Array{Float64,... 1 ./sort.jl:299; sort!(::Array{Float64,1}, :... 1 ./sort.jl:293; sort!(::Array{Float64,1}, :... 1 ./sort.jl:222; sort!(::Array{Float64,1}, ... 5 ./sort.jl:615; fpsort!(::Array{Float64,1}, ... 1 ./sort.jl:293; sort!(::Array{Float64,1}, :... 1 ./sort.jl:222; sort!(::Array{Float64,1}, :... 1 ./sort.jl:294; sort!(::Array{Float64,1}, :... 1 ./sort.jl:279; partition!(::Array{Float64,... 3 ./sort.jl:299; sort!(::Array{Float64,1}, :... 3 ./sort.jl:293; sort!(::Array{Float64,1}, :... 1 ./sort.jl:218; sort!(::Array{Float64,1}, ... 1 ./sort.jl:221; sort!(::Array{Float64,1}, ... 1 ./sort.jl:229; sort!(::Array{Float64,1}, ... 1 ./In[2]:10; profile_test(::Int64) 1 ...rse/sparsematrix.jl:1718; .*(::Array{Float64,2}, ::Array... 1 ...rse/sparsematrix.jl:1681; broadcast_zpreserving 1 ./broadcast.jl:230; broadcast 1 ./broadcast.jl:228; broadcast_t 1 ./broadcast.jl:172; broadcast! 1 ./broadcast.jl:117; _broadcast!(::Base.#*, ::Ar... 1 ./broadcast.jl:123; macro expansion 1 ./simdloop.jl:73; macro expansion 1 ./broadcast.jl:129; macro expansion
@profile
data¶4 ./fft/FFTW.jl:464; Base.DFT.FFTW.cFFTWPlan{Compl...
./fft/FFTW.jl
on line 464
4
above as 4 seconds; you will have to compare this to the overall samples taken (here: 608).Profile.print(format=:flat)
@profile
data¶ProfileView.jl
using ProfileView
ProfileView.view()
Profile
¶@profile
: the execution time of your program plays a role here, but this is really a statistical problem.Profile.init() # returns the current settings
Profile.init(n, delay)
Profile.init(delay = 0.01)
julia --track-allocation=user
# in your terminal
julia --track-allocation=user
# in julia: include your code
include("NFXP/src/nfxp.jl")
# run once
nfxp.simulate_single_run()
Profile.clear_malloc_data()
# run for measurement
nfxp.simulate_single_run()
# exit
quit()
# restart julia (no options)
julia
using Coverage
analyze_malloc(".")
addprocs
Base.Threads
addprocs
addprocs
?addprocs
n
processes at startup via julia -p n
addprocs(n)
.n
processes on your computer, mimicking multithreading.x
on process 1 is not visible on process 2!addprocs() # adds maximal amount of workers
workers()
8-element Array{Int64,1}: 2 3 4 5 6 7 8 9
remotecall
@spawn
and @spawnat
parallel
loop and pmap
Here is a simple example:
# save that in a file
function count_heads(n)
c::Int = 0
for i=1:n
c += rand(Bool)
end
c
end
# then
@everywhere include("count_heads.jl")
a = @spawn count_heads(100000000)
b = @spawn count_heads(100000000)
fetch(a)+fetch(b)
# or, even better
nheads = @parallel (+) for i=1:200000000
Int(rand(Bool))
end
Threads.nthreads()
JULIA_NUM_THREADS=4 julia
. that is 4 threads.Threads.threadid()
# example
a = zeros(Int,10)
Threads.@threads for i in 1:10
a[i] = Threads.threadid()
end
println(a)
#10-element Array{Int64,1}:
# 1
# 1
# 1
# 2
# 2
# 2
# 3
# 3
# 4
# 4
[1,1,1,1,1,1,1,1,1,1]