Sciences Po CompEcon 2017
@profile macro.function profile_test(n)
for i = 1:n
A = randn(100,100,20)
m = maximum(A)
Afft = fft(A)
Am = mapslices(sum, A, 2)
B = A[:,:,5]
Bsort = mapslices(sort, B, 1)
b = rand(100)
C = B.*b
end
end
profile_test(1) #Â compile
Profile.clear() # if we have previous profile data
@time profile_test(10)
took = @elapsed profile_test(10)
@profile profile_test(100)
Profile.print()
0.122868 seconds (291.13 k allocations: 87.349 MB, 10.85% gc time)
849 ./task.jl:360; (::IJulia.##13#19)()
849 ...Julia/src/eventloop.jl:8; eventloop(::ZMQ.Socket)
849 ...rc/execute_request.jl:157; execute_request(::ZMQ.Socket, ::...
849 ./loading.jl:441; include_string(::String, ::String)
848 ./<missing>:?; anonymous
848 ./profile.jl:16; macro expansion;
154 ./In[2]:3; profile_test(::Int64)
144 ./random.jl:1208; randn!(::MersenneTwister, ::A...
35 ./random.jl:0; randn(::MersenneTwister, ::Ty...
79 ./random.jl:1202; randn(::MersenneTwister, ::Ty...
29 ./random.jl:1130; randn
29 ./random.jl:263; rand_ui52
29 ./random.jl:125; rand_ui52_raw
9 ./random.jl:124; rand_ui52_raw_inbounds
9 ./random.jl:117; rand_inbounds
9 ./random.jl:104; mt_pop!
20 ./random.jl:111; reserve_1
15 ./random.jl:107; gen_rand
2 ./dSFMT.jl:0; dsfmt_fill_array_close1_op...
13 ./dSFMT.jl:75; dsfmt_fill_array_close1_op...
1 ./random.jl:108; gen_rand
1 ./random.jl:102; mt_setfull!
3 ./random.jl:1131; randn
23 ./random.jl:1133; randn
1 ./random.jl:1134; randn
16 ./random.jl:1135; randn
3 ./random.jl:0; randn_unlikely(::MersenneTwi...
9 ./random.jl:1147; randn_unlikely(::MersenneTwi...
1 ./random.jl:243; rand
1 ./random.jl:122; rand
1 ./random.jl:111; reserve_1
3 ./random.jl:1150; randn_unlikely(::MersenneTwi...
1 ./random.jl:0; randn(::MersenneTwister)
1 ./random.jl:1134; randn(::MersenneTwister)
1 ./random.jl:1135; randn(::MersenneTwister)
1 ./random.jl:1147; randn_unlikely(::MersenneTw...
50 ./In[2]:4; profile_test(::Int64)
50 ./reduce.jl:162; _mapreduce(::Base.#identity, :...
4 ./reduce.jl:272; mapreduce_impl(::Base.#identi...
1 ./reduce.jl:278; mapreduce_impl(::Base.#identi...
45 ./reduce.jl:280; mapreduce_impl(::Base.#identi...
246 ./In[2]:5; profile_test(::Int64)
11 ./dft.jl:43; copy1(::Type{Complex{Float64}}...
34 ./dft.jl:44; copy1(::Type{Complex{Float64}}...
34 ./multidimensional.jl:725; circcopy!(::Array{Complex{Flo...
1 ./abstractarray.jl:0; copy!(::Base.LinearFast, ::Ar...
6 ./abstractarray.jl:558; copy!(::Base.LinearFast, ::Ar...
27 ./abstractarray.jl:559; copy!(::Base.LinearFast, ::Ar...
201 ./dft.jl:57; fft(::Array{Complex{Float64},3...
20 ./fft/FFTW.jl:585; #plan_fft#5(::UInt32, ::Float...
3 ./fft/FFTW.jl:463; Base.DFT.FFTW.cFFTWPlan{Compl...
1 ./fft/FFTW.jl:414; dims_howmany(::Array{Complex{...
1 ./set.jl:120; unique(::Array{Int64,1})
1 ./set.jl:6; Type
1 ./dict.jl:344; Type
1 ./array.jl:169; zeros(::Type{T}, ::Int64, ...
2 ./fft/FFTW.jl:423; dims_howmany(::Array{Complex{...
1 ./array.jl:0; hcat(::Array{Int64,1}, ::Arr...
1 ./array.jl:741; hcat(::Array{Int64,1}, ::Arr...
8 ./fft/FFTW.jl:464; Base.DFT.FFTW.cFFTWPlan{Compl...
9 ./fft/FFTW.jl:87; fakesimilar(::UInt32, ::Array...
1 ./fft/FFTW.jl:84; Base.DFT.FFTW.FakeArray{T,N}(...
1 ./fft/FFTW.jl:350; colmajorstrides(::Tuple{Int6...
1 ./arraymath.jl:450; cumprod(::Array{Int64,1})
9 ./fft/FFTW.jl:617; *
172 ./fft/FFTW.jl:618; *
352 ./In[2]:6; profile_test(::Int64)
1 ./abstractarray.jl:1610; mapslices(::Base.#sum, ::Arra...
1 ./abstractarray.jl:67; indices
1 ./array.jl:20; size
1 ./array.jl:24; _size
2 ./abstractarray.jl:1614; mapslices(::Base.#sum, ::Arra...
1 ./array.jl:1466; setdiff(::Array{Int64,1}, ::A...
1 ./set.jl:10; Type
1 ./set.jl:7; Type
1 ./dict.jl:344; Type
1 ./array.jl:169; zeros(::Type{T}, ::Int64, :...
1 ./array.jl:1468; setdiff(::Array{Int64,1}, ::A...
1 ./set.jl:6; Type
1 ./dict.jl:344; Type
1 ./array.jl:169; zeros(::Type{T}, ::Int64, ::...
1 ./abstractarray.jl:1628; mapslices(::Base.#sum, ::Arra...
1 ./abstractarray.jl:1635; mapslices(::Base.#sum, ::Arra...
23 ./abstractarray.jl:1648; mapslices(::Base.#sum, ::Arra...
1 .../lib/julia/sys.dylib:?; !(::Bool)
1 ./bool.jl:0; !(::Bool)
1 ./abstractarray.jl:1652; mapslices(::Base.#sum, ::Arra...
53 ./abstractarray.jl:1653; mapslices(::Base.#sum, ::Arra...
1 .../lib/julia/sys.dylib:?; getindex(::Tuple{Int64,Int64}, ...
1 .../lib/julia/sys.dylib:?; setindex!(::Array{Any,1}, ::Any...
99 ./abstractarray.jl:1655; mapslices(::Base.#sum, ::Arra...
18 ./multidimensional.jl:340; _unsafe_getindex!(::Array{Flo...
18 ./multidimensional.jl:348; macro expansion
1 ./cartesian.jl:62; macro expansion
17 ./cartesian.jl:64; macro expansion
17 ./multidimensional.jl:350; macro expansion
169 ./abstractarray.jl:1656; mapslices(::Base.#sum, ::Arra...
66 ./abstractarray.jl:0; setindex!(::Array{Float64,3},...
21 ./abstractarray.jl:832; setindex!(::Array{Float64,3},...
1 ./multidimensional.jl:0; _setindex!(::Base.LinearFast,...
1 ./multidimensional.jl:365; _setindex!(::Base.LinearFast,...
1 ./multidimensional.jl:366; _setindex!(::Base.LinearFast,...
1 ./multidimensional.jl:421; _unsafe_batchsetindex!(::Arra...
1 ./multidimensional.jl:423; macro expansion
1 ./reduce.jl:229; sum(::Array{Float64,1})
1 ./reduce.jl:162; _mapreduce(::Base.#identity, ...
1 ./reduce.jl:102; mapreduce_impl(::Base.#identi...
1 ./simdloop.jl:66; macro expansion
3 ./In[2]:7; profile_test(::Int64)
3 ./abstractarray.jl:752; getindex
3 ./multidimensional.jl:270; _getindex
3 ./abstractarray.jl:284; checkbounds(::Array{Float64,3...
1 ./abstractarray.jl:270; checkbounds(::Type{Bool}, ::A...
1 ./essentials.jl:0; argtail(::Colon, ::Colon, ::V...
42 ./In[2]:8; profile_test(::Int64)
1 ./abstractarray.jl:1612; mapslices(::Base.#sort, ::Arr...
1 .../lib/julia/sys.dylib:?; trailingsize(::BitArray{2}, ::I...
2 ./abstractarray.jl:1614; mapslices(::Base.#sort, ::Arr...
2 ./array.jl:1466; setdiff(::Array{Int64,1}, ::A...
2 ./set.jl:10; Type
2 ./set.jl:7; Type
2 ./dict.jl:344; Type
2 ./array.jl:169; zeros(::Type{T}, ::Int64, :...
1 ./abstractarray.jl:1617; mapslices(::Base.#sort, ::Arr...
1 ./abstractarray.jl:1622; mapslices(::Base.#sort, ::Arr...
1 ./abstractarray.jl:1623; mapslices(::Base.#sort, ::Arr...
1 ./sort.jl:417; sort(::Array{Float64,1})
1 ./sort.jl:417; #sort#8(::Array{Any,1}, ::Fun...
1 ./array.jl:65; copy!(::Array{Float64,1}, ::I...
2 ./abstractarray.jl:1637; mapslices(::Base.#sort, ::Arr...
4 ./abstractarray.jl:1648; mapslices(::Base.#sort, ::Arr...
4 ./abstractarray.jl:1653; mapslices(::Base.#sort, ::Arr...
6 ./abstractarray.jl:1655; mapslices(::Base.#sort, ::Arr...
1 ./multidimensional.jl:340; _unsafe_getindex!(::Array{Floa...
1 ./multidimensional.jl:348; macro expansion
1 ./cartesian.jl:64; macro expansion
1 ./multidimensional.jl:350; macro expansion
20 ./abstractarray.jl:1656; mapslices(::Base.#sort, ::Arr...
1 ./abstractarray.jl:832; setindex!(::Array{Float64,2},...
1 ./multidimensional.jl:421; _unsafe_batchsetindex!(::Arra...
1 ./multidimensional.jl:429; macro expansion
1 ./cartesian.jl:64; macro expansion
1 ./multidimensional.jl:430; macro expansion
14 ./sort.jl:417; sort(::Array{Float64,1})
14 ./sort.jl:417; #sort#8(::Array{Any,1}, ::Fun...
1 ./abstractarray.jl:653; copymutable
1 ./ordering.jl:0; ord(::Base.#isless, ::Base.#...
12 ./sort.jl:623; sort!(::Array{Float64,1}, ::...
3 ./sort.jl:606; fpsort!(::Array{Float64,1}, ...
3 ./sort.jl:581; nans2right!(::Array{Float64...
3 ./sort.jl:587; nans2right!(::Array{Float64...
2 ./sort.jl:609; fpsort!(::Array{Float64,1}, ...
2 ./sort.jl:614; fpsort!(::Array{Float64,1}, ...
1 ./sort.jl:294; sort!(::Array{Float64,1}, :...
1 ./sort.jl:279; partition!(::Array{Float64,...
1 ./sort.jl:299; sort!(::Array{Float64,1}, :...
1 ./sort.jl:293; sort!(::Array{Float64,1}, :...
1 ./sort.jl:222; sort!(::Array{Float64,1}, ...
5 ./sort.jl:615; fpsort!(::Array{Float64,1}, ...
1 ./sort.jl:293; sort!(::Array{Float64,1}, :...
1 ./sort.jl:222; sort!(::Array{Float64,1}, :...
1 ./sort.jl:294; sort!(::Array{Float64,1}, :...
1 ./sort.jl:279; partition!(::Array{Float64,...
3 ./sort.jl:299; sort!(::Array{Float64,1}, :...
3 ./sort.jl:293; sort!(::Array{Float64,1}, :...
1 ./sort.jl:218; sort!(::Array{Float64,1}, ...
1 ./sort.jl:221; sort!(::Array{Float64,1}, ...
1 ./sort.jl:229; sort!(::Array{Float64,1}, ...
1 ./In[2]:10; profile_test(::Int64)
1 ...rse/sparsematrix.jl:1718; .*(::Array{Float64,2}, ::Array...
1 ...rse/sparsematrix.jl:1681; broadcast_zpreserving
1 ./broadcast.jl:230; broadcast
1 ./broadcast.jl:228; broadcast_t
1 ./broadcast.jl:172; broadcast!
1 ./broadcast.jl:117; _broadcast!(::Base.#*, ::Ar...
1 ./broadcast.jl:123; macro expansion
1 ./simdloop.jl:73; macro expansion
1 ./broadcast.jl:129; macro expansion
@profile data¶4 ./fft/FFTW.jl:464; Base.DFT.FFTW.cFFTWPlan{Compl...
./fft/FFTW.jl on line 4644 above as 4 seconds; you will have to compare this to the overall samples taken (here: 608).Profile.print(format=:flat)
@profile data¶ProfileView.jlusing ProfileView
ProfileView.view()
Profile¶@profile: the execution time of your program plays a role here, but this is really a statistical problem.Profile.init() # returns the current settings
Profile.init(n, delay)
Profile.init(delay = 0.01)
julia --track-allocation=user# in your terminal
julia --track-allocation=user
# in julia: include your code
include("NFXP/src/nfxp.jl")
# run once
nfxp.simulate_single_run()
Profile.clear_malloc_data()
# run for measurement
nfxp.simulate_single_run()
# exit
quit()
# restart julia (no options)
julia
using Coverage
analyze_malloc(".")
addprocsBase.Threadsaddprocsaddprocs?addprocsn processes at startup via julia -p naddprocs(n).n processes on your computer, mimicking multithreading.x on process 1 is not visible on process 2!addprocs() # adds maximal amount of workers
workers()
8-element Array{Int64,1}:
2
3
4
5
6
7
8
9
remotecall@spawn and @spawnatparallel loop and pmapHere is a simple example:
# save that in a file
function count_heads(n)
c::Int = 0
for i=1:n
c += rand(Bool)
end
c
end
# then
@everywhere include("count_heads.jl")
a = @spawn count_heads(100000000)
b = @spawn count_heads(100000000)
fetch(a)+fetch(b)
# or, even better
nheads = @parallel (+) for i=1:200000000
Int(rand(Bool))
end
Threads.nthreads()JULIA_NUM_THREADS=4 julia. that is 4 threads.Threads.threadid()# example
a = zeros(Int,10)
Threads.@threads for i in 1:10
a[i] = Threads.threadid()
end
println(a)
#10-element Array{Int64,1}:
# 1
# 1
# 1
# 2
# 2
# 2
# 3
# 3
# 4
# 4
[1,1,1,1,1,1,1,1,1,1]