A peak into the Erlang compiler and BEAM bytecode

Compiler

Intermediate representaions:

Erlang source code --> Abstract Syntax Tree ('P') --> expanded AST ('E') --> Core Erlang ('to_core') --> BEAM byte-code

-module(add).

-export([add/2]).

add(A, B) ->
    id(A) + id(B).

id(I) ->
    I.
[{attribute,1,file,{"examples/add.erl",1}},
 {attribute,2,module,add},
 {attribute,4,export,[{add,2}]},
 {function,6,add,2,
           [{clause,6,
                    [{var,6,'A'},{var,6,'B'}],
                    [],
                    [{op,7,'+',
                         {call,7,{atom,7,id},[{var,...}]},
                         {call,7,{atom,7,...},[{...}]}}]}]},
 {function,9,id,1,
           [{clause,9,[{var,9,'I'}],[],[{var,10,'I'}]}]},
 {eof,11}]}
{add,[{add,2},{module_info,0},{module_info,1}],
     [{attribute,1,file,{"add.erl",1}},
      {function,6,add,2,
                [{clause,6,
                         [{var,6,'A'},{var,6,'B'}],
                         [],
                         [{op,7,'+',
                              {call,7,{atom,7,...},[{...}]},
                              {call,7,{atom,...},[...]}}]}]},
      {function,9,id,1,
                [{clause,9,[{var,9,'I'}],[],[{var,10,'I'}]}]},
      {function,0,module_info,0,
                [{clause,0,[],[],
                         [{call,0,
                                {remote,0,{atom,...},{...}},
                                [{atom,0,...}]}]}]},
      {function,0,module_info,1,
                [{clause,0,
                         [{var,0,'X'}],
                         [],
                         [{call,0,
                                {remote,0,{...},...},
                                [{atom,...},{...}]}]}]}]}
module 'add' ['add'/2,
          'module_info'/0,
          'module_info'/1]
    attributes []
'add'/2 =
    %% Line 6
    fun (_cor1,_cor0) ->
    let <_cor3> =
        %% Line 7
        apply 'id'/1
    	(_cor1)
    in  let <_cor2> =
    	%% Line 7
    	apply 'id'/1
    	    (_cor0)
        in  %% Line 7
    	call 'erlang':'+'
    	    (_cor3, _cor2)
'id'/1 =
    %% Line 9
    fun (_cor0) ->
    _cor0
'module_info'/0 =
    fun () ->
    call 'erlang':'get_module_info'
        ('add')
'module_info'/1 =
    fun (_cor0) ->
    call 'erlang':'get_module_info'
        ('add', _cor0)
end
{module, add}.  %% version = 0
{exports, [{add,2},{module_info,0},{module_info,1}]}.
{attributes, []}.
{labels, 9}.

{function, add, 2, 2}.
  {label,1}.
    {line,[{location,"add.erl",6}]}.
    {func_info,{atom,add},{atom,add},2}.
  {label,2}.
    {allocate,1,2}.
    {move,{x,1},{y,0}}.
    {line,[{location,"add.erl",7}]}.
    {call,1,{f,4}}.
    {move,{x,0},{x,1}}.
    {move,{y,0},{x,0}}.
    {move,{x,1},{y,0}}.
    {line,[{location,"add.erl",7}]}.
    {call,1,{f,4}}.
    {line,[{location,"add.erl",7}]}.
    {gc_bif,'+',{f,0},1,[{y,0},{x,0}],{x,0}}.
    {deallocate,1}.
    return.

{function, id, 1, 4}.
  {label,3}.
    {line,[{location,"add.erl",9}]}.
    {func_info,{atom,add},{atom,id},1}.
  {label,4}.
    return.

{function, module_info, 0, 6}.
  {label,5}.
    {line,[]}.
    {func_info,{atom,add},{atom,module_info},0}.
  {label,6}.
    {move,{atom,add},{x,0}}.
    {line,[]}.
    {call_ext_only,1,{extfunc,erlang,get_module_info,1}}.

{function, module_info, 1, 8}.
  {label,7}.
    {line,[]}.
    {func_info,{atom,add},{atom,module_info},1}.
  {label,8}.
    {move,{x,0},{x,1}}.
    {move,{atom,add},{x,0}}.
    {line,[]}.
    {call_ext_only,2,{extfunc,erlang,get_module_info,2}}.

Detailed steps:

> compile:file("examples/add.erl", [time]).
Compiling "examples/add.erl"
 remove_file                   :       0.00 s       1.0 kB %% remove target beam
 parse_module                  :       0.00 s       2.2 kB %% preprocessor
 transform_module              :       0.00 s       2.2 kB
 lint_module                   :       0.00 s       2.2 kB
 expand_module                 :       0.00 s       2.9 kB
 core_module                   :       0.01 s      21.8 kB
 core_fold_module              :       0.00 s      11.3 kB
 core_transforms               :       0.00 s      11.3 kB
 core_dsetel_module            :       0.00 s      11.3 kB
 kernel_module                 :       0.00 s      12.1 kB
 v3_life                       :       0.00 s       7.6 kB
 v3_codegen                    :       0.00 s       5.6 kB
 beam_a                        :       0.00 s       5.3 kB
 beam_block                    :       0.00 s       5.9 kB
 beam_except                   :       0.00 s       5.9 kB
 beam_bool                     :       0.00 s       5.9 kB
 beam_type                     :       0.00 s       5.9 kB
 beam_split                    :       0.00 s       5.7 kB
 beam_dead                     :       0.00 s       5.9 kB
 beam_jump                     :       0.00 s       5.9 kB
 beam_peep                     :       0.00 s       5.9 kB
 beam_clean                    :       0.00 s       5.9 kB
 beam_bsm                      :       0.00 s       5.9 kB
 beam_receive                  :       0.00 s       5.9 kB
 beam_trim                     :       0.00 s       5.9 kB
 beam_flatten                  :       0.00 s       5.3 kB
 beam_z                        :       0.00 s       5.2 kB
 beam_validator                :       0.00 s       5.2 kB
 beam_asm                      :       0.00 s       1.0 kB
 save_binary                   :       0.00 s       1.0 kB

Erlang VM/BEAM Emulator/ERTS

A garbage collecting, reduction counting, non-preemptive, directly threaded, register virtual machine

See the slides from Erik Stenman's talk at Erlang User Conference 2013

BEAM bytecode

Control contructs

if vs case vs guards

if the tested operation obviously has a boolean outcome i/2, c/2, cw/2 and even g/2 are identical (all compiled to a single test is_lt expression)

i(I, J) ->
    if I > J -> I;
       true -> J
    end.

fun i/2
2: test is_lt [x1 x0] ->3:
   return
3: move x1 x0
   return
c(I, J) ->
    case I > J of
        true -> I;
        false -> J
    end.
fun c/2
2: test is_lt [x1 x0] ->3:
   return
3: move x1 x0
   return
cw(I, J) ->
    case I of
        _ when I > J -> I;
        _ -> J
    end.
fun cw/2
2: test is_lt [x1 x0] ->3:
   return
3: move x1 x0
   return
g(I, J) when I > J -> I;
g(_I, J) -> J.



fun g/2
2: test is_lt [x1 x0] ->3:
   return
3: move x1 x0
   return
g2(I, _) when I > 2 -> I;
g2(_, J) -> J.



fun g2/2
2: test is_lt [{integer,2} x0] ->3:
   return
3: move x1 x0
   return

iff I is not an operation with boolean outcome case will do more:

emb_c(I) ->
    a:b(case I of true -> a; false -> b end),
    ok.












fun emb_c/1
 2: allocate 0 1
    test is_atom [x0] ->6:
    select_val x0
        {atom,false} ->3:
        {atom,true} ->4:
        ->6:
 3: move {atom,b} x0
    jump ->5:
 4: move {atom,a} x0
 5: call_ext 1 {extfunc,a,b,1}
    move {atom,ok} x0
    deallocate 0
    return
 6: case_end x0
emb_i(I) ->
    a:b(if I -> a; true -> b end),
    ok.







fun emb_i/1
2: allocate 0 1
   test is_eq_exact [x0 {atom,true}] ->3:
   move {atom,a} x0
   jump ->4:
3: move {atom,b} x0
4: call_ext 1 {extfunc,a,b,1}
   move {atom,ok} x0
   deallocate 0
   return
emb_c2(I) ->
    a:b(case I of true -> a; _ -> b end),
    ok.







fun emb_c2/1
2: allocate 0 1
   test is_eq_exact [x0 {atom,true}] ->3:
   move {atom,a} x0
   jump ->4:
3: move {atom,b} x0
4: call_ext 1 {extfunc,a,b,1}
   move {atom,ok} x0
   deallocate 0
   return

Another example from real life: nested cases

c1(A) ->
    case A > 5 of
        true -> big;
        false ->
            case A > 0 of
                true -> small;
                false -> negative
            end
    end.
fun c1/1
2: test is_lt [{integer,5} x0] ->3:
   move {atom,big} x0
   return
3: test is_lt [{integer,0} x0] ->4:
   move {atom,small} x0
   return
4: move {atom,negative} x0
   return
i1(A) ->
    if A > 5 -> big;
       A > 0 -> small;
       true -> negative
    end.




fun i1/1
2: test is_lt [{integer,5} x0] ->3:
   move {atom,big} x0
   return
3: test is_lt [{integer,0} x0] ->4:
   move {atom,small} x0
   return
4: move {atom,negative} x0
   return
bad_case(A) ->
    case A of
        a -> ok
    end.

fun bad_case/1
2: test is_eq_exact [x0 {atom,a}] ->3:
   move {atom,ok} x0
   return
3: case_end x0
bad_match(A) ->
    a = A,
    ok.


fun bad_match/1
2: test is_eq_exact [x0 {atom,a}] ->3:
   move {atom,ok} x0
   return
3: badmatch x0
good_case(A) ->
    case A of
        a -> ok;
        b -> A;
        c -> ok;
        error -> error
    end.





fun good_case/1
 2: test is_atom [x0] ->5:
    select_val x0
        {atom,c} ->3:
        {atom,a} ->3:
        {atom,error} ->4:
        {atom,b} ->4:
        ->5:
 3: move {atom,ok} x0
    return
 4: return
 5: case_end x0

orelse vs if

orelse is strickter as it throws badarg if the variable is not boolean

orelse_(Bool) ->
    Bool orelse a:b().












fun orelse_/1
 2: test is_atom [x0] ->5:
    select_val x0
        {atom,true} ->3:
        {atom,false} ->4:
        ->5:
 3: return
 4: call_ext_only 0 {extfunc,a,b,0}
 5: test_heap 3 1
    put_tuple 2 x1
    put {atom,badarg}
    put x0
    move x1 x0
    call_ext 1 {extfunc,erlang,error,1}

if the varibale is anything else than the atom true the default clause will be called (no badarg)

if_(Bool) ->
    if Bool ->
            %% note the optimization:
            %% as Bool is in x0 and it is true
            %% we can return x0 without moving anything into it
            true;
       true ->
            a:b()
    end.
fun if_/1
2: test is_eq_exact [x0 {atom,true}] ->3:
   return
3: call_ext_only 0 {extfunc,a,b,0}





if_1b(A, B, Bool) ->
    if A > B -> ok;
       Bool ->
            %% Bool is in x2 so have to move the atom true
            %% into x0 before returning
            true;
       true ->
            a:b()
    end.
fun if_1b/3
2: test is_lt [x1 x0] ->3:
   move {atom,ok} x0
   return
3: test is_eq_exact [x2 {atom,true}] ->4:
   move {atom,true} x0
   return
4: call_ext_only 0 {extfunc,a,b,0}

if the test is not a variable so the outcome is guaranteed to be boolean then these two clauses are identical

orelse_2(A, B) ->
    A > B orelse a:b(),
    ok.




fun orelse_2/2
2: allocate 0 2
   test is_ge [x1 x0] ->3:
   call_ext 0 {extfunc,a,b,0}
3: move {atom,ok} x0
   deallocate 0
   return
if_2(A, B) ->
    if A > B ->
            true;
       true ->
            a:b()
    end,
    ok.
fun if_2/2
2: allocate 0 2
   test is_ge [x1 x0] ->3:
   call_ext 0 {extfunc,a,b,0}
3: move {atom,ok} x0
   deallocate 0
   return

throw, exit, error and stacktrace

th(A) ->
    throw(A).
fun th/1
2: call_ext 1 {extfunc,erlang,throw,1}
ex(A) ->
    erlang:exit(A).
fun ex/1
2: call_ext 1 {extfunc,erlang,exit,1}
err1(_A) ->
   erlang:error(my_error). 

fun err1/1
2: move {atom,my_error} x0
   call_ext 1 {extfunc,erlang,error,1}
err2(A) ->
   erlang:error(my_error, [A]). 



fun err2/1
2: test_heap 2 1
   put_list x0 nil x1
   move {atom,my_error} x0
   call_ext 2 {extfunc,erlang,error,2}
err3(A) ->
   erlang:error(my_error, [A, b]). 



fun err3/1
2: test_heap 2 1
   put_list x0 {literal,[b]} x1
   move {atom,my_error} x0
   call_ext 2 {extfunc,erlang,error,2}

Functions and funs

Funs

these are different this creates a local function calling external a:b/1

fun_remote1() ->
    fun(E) -> a:b(E) end.



fun fun_remote1/0
2: make_fun2 ->8: 0 0 0
   return
fun '-fun_remote1/0-fun-0-'/1
8: call_ext_only 1 {extfunc,a,b,1}

this calls external function erlang:make_fun(a,b,1)

fun_remote2() ->
    fun a:b/1.



fun fun_remote2/0
2: move {atom,b} x1
   move {integer,1} x2
   move {atom,a} x0
   call_ext_only 3 {extfunc,erlang,make_fun,3}

these are the same - both create a local function (so actually one separate each) calling local/1

fun_local1() ->
    fun(E) -> local(E) end.






fun fun_local1/0
 2: make_fun2 ->10: 0 0 0
    return
fun local/1
 4: move {atom,do_nothing} x0
    return
fun '-fun_local1/0-fun-0-'/1
10: call_only 1 ->4:
fun_local2() ->
    fun local/1.






fun fun_local2/0
 2: make_fun2 ->10: 0 0 0
    return
fun local/1
 4: move {atom,do_nothing} x0
    return
fun '-fun_local2/0-fun-0-'/1
10: call_only 1 ->4:

each call creates a separate local function (so these create two)

fun_local3() ->
    F1 = fun local/1,
    F2 = fun local/1,
    {F1, F2}.















fun fun_local3/0
 2: allocate_zero 1 0
    make_fun2 ->12: 0 0 0
    move x0 y0
    make_fun2 ->10: 0 0 0
    test_heap 3 1
    put_tuple 2 x1
    put y0
    put x0
    move x1 x0
    deallocate 1
    return
fun local/1
 4: move {atom,do_nothing} x0
    return
fun '-fun_local3/0-fun-1-'/1
10: call_only 1 ->4:
fun '-fun_local3/0-fun-0-'/1
12: call_only 1 ->4:

EEP-23: Allow variables in fun M:F/A implemented in R15A in commit ff432e262e6524...

eep23(M, F, A) ->
    %% this calls external function erlang:make_fun(M,F,A)
    fun M:F/A.
fun eep23/3
2: call_ext_only 3 {extfunc,erlang,make_fun,3}

Funs and variable scopes

f1(A) ->
    F = fun(B) ->
                A + B
        end,
    F(1).

use_f2(A, B) ->
    F = f2(A),
    F(B).

f2(A) ->
    fun(B) ->
            B + A
    end.












fun f1/1
 2: allocate 0 1
    make_fun2 ->14: 0 0 1
    move x0 x1
    move {integer,1} x0
    call_fun 1
    deallocate 0
    return
fun use_f2/2
 4: allocate 1 2
    move x1 y0
    call 1 ->6:
    move x0 x1
    move y0 x0
    call_fun 1
    deallocate 1
    return
fun f2/1
 6: make_fun2 ->12: 0 0 1
    return
fun '-f2/1-fun-0-'/2
12: gc_bif '+' 2 [x0 x1] x0 ->0:
    return
fun '-f1/1-fun-0-'/2
14: gc_bif '+' 2 [x1 x0] x0 ->0:
    return

foreach vs list comprehension

(when the result is not assign to a variable or not returned as a last call in a function clause)

From Efficiency Guide:List Handling "Since R12B list comprehensions are translated to local recursive functions If the result of the list comprehension will obviously not be used, a list will not be constructed."

(the code is mostly the same in lists:foreach but not using lists:foreach will avoid calling an external function)

lc() ->
    L = [1,2,3],
    [a:b(E)||E <- L],
    ok.












fun lc/0
 2: allocate 0 0
    move {literal,[1,2,3]} x0
    call 1 ->8:
    move {atom,ok} x0
    deallocate 0
    return
fun '-lc/0-lc$^0/1-0-'/1
 8: test is_nonempty_list [x0] ->9:
    allocate 1 1
    get_list x0 x0 y0
    call_ext 1 {extfunc,a,b,1}
    move y0 x0
    call_last 1 ->8: 1
 9: test is_nil [x0] ->7:
    return
lc_local(L) ->
    [local(E)||E <- L],
    ok.















fun lc_local/1
 2: allocate 0 1
    call 1 ->10:
    move {atom,ok} x0
    deallocate 0
    return
fun local/1
 4: move {atom,do_nothing} x0
    return
fun '-lc_local/1-lc$^0/1-0-'/1
10: test is_nonempty_list [x0] ->11:
    allocate 1 1
    get_list x0 x0 y0
    call 1 ->4:
    move y0 x0
    call_last 1 ->10: 1
11: test is_nil [x0] ->9:
    return
fe() ->
    L = [1,2,3],
    lists:foreach(fun(E) -> a:b(E) end, L),
    ok.






fun fe/0
2: allocate 0 0
   make_fun2 ->8: 0 0 0
   move {literal,[1,2,3]} x1
   call_ext 2 {extfunc,lists,foreach,2}
   move {atom,ok} x0
   deallocate 0
   return
fun '-fe/0-fun-0-'/1
8: call_ext_only 1 {extfunc,a,b,1}
fe_local() ->
    L = [1,2,3],
    lists:foreach(fun(E) -> local(E) end, L),
    ok.









fun fe_local/0
 2: allocate 0 0
    make_fun2 ->10: 0 0 0
    move {literal,[1,2,3]} x1
    call_ext 2 {extfunc,lists,foreach,2}
    move {atom,ok} x0
    deallocate 0
    return
fun local/1
 4: move {atom,do_nothing} x0
    return
fun '-fe_local/0-fun-0-'/1
10: call_only 1 ->4:

Recursive vs tail-recursive functions

map1(F, [H|T]) ->
    [F(H)|map1(F, T)];
map1(_, []) ->
    [].

















fun map1/2
2: test is_nonempty_list [x1] ->3:
   allocate 2 2
   get_list x1 x2 y1
   move x0 x1
   move x2 x0
   move x1 y0
   call_fun 1
   move x0 x2
   move y1 x1
   move y0 x0
   move x2 y1
   trim 1 1
   call 2 ->2:
   test_heap 2 1
   put_list y0 x0 x0
   deallocate 1
   return
3: test is_nil [x1] ->1:
   move nil x0
   return
tmap1(F, [H|T], Acc) ->
    tmap1(F, T, [F(H)|Acc]);
tmap1(_, [], Acc) ->
    lists:reverse(Acc).













fun tmap1/3
2: test is_nonempty_list [x1] ->3:
   allocate 3 3
   get_list x1 x3 y2
   move x0 x1
   move x3 x0
   move x2 y0
   move x1 y1
   call_fun 1
   test_heap 2 1
   put_list x0 y0 x2
   move y2 x1
   move y1 x0
   call_last 3 ->2: 3
3: test is_nil [x1] ->1:
   move x2 x0
   call_ext_only 1 {extfunc,lists,reverse,1}

Small compile time optimizations

variable R is eliminated

v1(A) ->
    R = A + 1,
    R.
fun v1/1
2: gc_bif '+' 1 [x0 {integer,1}] x0 ->0:
   return
v2(A) ->
    A + 1.

fun v2/1
2: gc_bif '+' 1 [x0 {integer,1}] x0 ->0:
   return

Compile-time constant literals

c() ->
    {"asd",1}.

fun c/0
2: move {literal,{"asd",1}} x0
   return
minus_zero() ->
    binary_to_term(<<131,70,128,0,0,0,0,0,0,0>>).

fun minus_zero/0
2: move {literal,<<131,70,128,0,0,0,0,0,0,0>>} x0
   call_ext_only 1 {extfunc,erlang,binary_to_term,1}
minus_zero2() ->
    -1 * 0.0.

fun minus_zero2/0
2: move {float,0.0} x0
   return

Constant folding

bifs with constant arguments, guard tests known to succeed or fail

abs_() ->
    abs(-3).             %% 3.

fun abs_/0
2: move {integer,3} x0
   return
element_() ->
    element(2, {a,b}).   %% b.

fun element_/0
2: move {atom,b} x0
   return
atom_to_list_() ->
    atom_to_list(a).     %% "a".

fun atom_to_list_/0
2: move {literal,"a"} x0
   return

All bifs

some functions in the erlang module are called as bifs or gc_bifs but most of them are called as external functions

bif.tab

Record

f() ->
    R1 = id(#r{}),
    R2 = #r{f1 = 1},
    R3 = R1#r{f2 = 2},
    {R2, R3}.
id(A) -> A.

















fun f/0
 2: allocate 0 0
    move {literal,{r,0,undefined}} x0
    call 1 ->5:
    test is_tuple [x0] ->3:
    test test_arity [x0 3] ->3:
    get_tuple_element x0 0 x1
    test is_eq_exact [x1 {atom,r}] ->3:
    move x0 x1
    move {integer,2} x2
    move {integer,3} x0
    call_ext 3 {extfunc,erlang,setelement,3}
    test_heap 3 1
    put_tuple 2 x1
    put {literal,{r,1,undefined}}
    put x0
    move x1 x0
    deallocate 0
    return
 3: move {literal,{badrecord,r}} x0
    call_ext 1 {extfunc,erlang,error,1}
fun id/1
 5: return

get the default value for record field f1

defval() ->
    #r{f1 = V} = #r{},
    V.
defval2() ->
    (#r{})#r.f1.
defval3() ->
    %% after we know how it is implemented
    %% we can use some compile time constant folding
    element(#r.f1, #r{}).














fun defval/0
 2: move {literal,{r,0,undefined}} x0
    test is_tuple [x0] ->3:
    test test_arity [x0 3] ->3:
    get_tuple_element x0 0 x1
    get_tuple_element x0 1 x2
    test is_eq_exact [x1 {atom,r}] ->3:
    move x2 x0
    return
 3: if_end
fun defval2/0
 5: move {literal,{r,0,undefined}} x0
    test is_tuple [x0] ->6:
    test test_arity [x0 3] ->6:
    get_tuple_element x0 0 x1
    get_tuple_element x0 1 x2
    test is_eq_exact [x1 {atom,r}] ->6:
    move x2 x0
    return
 6: if_end
fun defval3/0
 8: move {integer,0} x0
    return

Maps

f() ->
    M = #{a=>1},
    #{1:=a,2:=b,3:="c","4":="d"} = id(#{1=>a,2=>b,3=>"c","4"=>"d"}),
    M.
%% Use this function to avoid compile-time evaluation of an expression.
id(I) -> I.























fun f/0
 2: allocate_zero 1 0
    put_map_assoc ->0: nil x0 0 {list,[{atom,a},{integer,1}]}
    put_map_assoc ->0: nil x1 1 {list,[{integer,1},
       {atom,a},
       {integer,2},
       {atom,b},
       {integer,3},
       {literal,"c"},
       {literal,"4"},
       {literal,"d"}]}
    move x0 y0
    move x1 x0
    call 1 ->5:
    test is_map [x0] ->3:
    get_map_element ->3: x0 {literal,1} x1
    get_map_element ->3: x0 {literal,2} x2
    get_map_element ->3: x0 {literal,3} x3
    get_map_element ->3: x0 {literal,"4"} x4
    test is_eq_exact [x1 {atom,a}] ->3:
    test is_eq_exact [x2 {atom,b}] ->3:
    test is_eq_exact [x3 {literal,"c"}] ->3:
    test is_eq_exact [x4 {literal,"d"}] ->3:
    move y0 x0
    deallocate 1
    return
 3: badmatch x0
fun id/1
 5: return

Some more topics that could be covered