Fibonacci example in OCaml

Here's my translation of the Fibonacci example into OCaml:

open Printf
open Llvm

let build_fib m =
   let fibf =
     define_function "fib" (function_type i32_type [| i32_type |]) m in

   let bb = builder_at_end (entry_block fibf) in

   let one = const_int i32_type 1 and two = const_int i32_type 2 in

   let argx = param fibf 0 in
   set_value_name "AnArg" argx;

   let retbb = append_block "return" fibf in
   let retb = builder_at_end retbb in
   let recursebb = append_block "recurse" fibf in
   let recurseb = builder_at_end recursebb in

   let condinst = build_icmp Icmp_sle argx two "cond" bb in
   ignore(build_cond_br condinst retbb recursebb bb);
   ignore(build_ret one retb);

   let sub = build_sub argx one "arg" recurseb in
   let callfibx1 = build_call fibf [|sub|] "fibx1" recurseb in

   let sub = build_sub argx two "arg" recurseb in
   let callfibx2 = build_call fibf [|sub|] "fibx2" recurseb in

   let sum = build_add callfibx1 callfibx2 "addresult" recurseb in

   build_ret sum recurseb;

   fibf

let main filename =
   let m = create_module filename in

   let puts =
     declare_function "puts"
       (function_type i32_type [|pointer_type i8_type|]) m in

   let fib = build_fib m in

   let main = define_function "main" (function_type i32_type [| |]) m in
   let at_entry = builder_at_end (entry_block main) in
   let n = build_call fib [| const_int i32_type 40 |] "" at_entry in
   ignore (build_ret (const_null i32_type) at_entry);

   if not (Llvm_bitwriter.write_bitcode_file m filename) then exit 1;
   dispose_module m

let () = match Sys.argv with
  > [|_; filename|] -> main filename
  > _ as a -> eprintf "Usage: %s <file>\n" a.(0)

I'd appreciate it if someone could gloss over this and let me know if I'm
going in the correct direction. Also, I have a really stupid question: what
is the easiest way to print the int result!? :slight_smile:

Here is a complete 104-line native code compiler for a tiny subset of OCaml
that is expressive enough to compile an external Fibonacci program:

type expr =
  > Int of int
  > Var of string
  > BinOp of [ `Add | `Sub | `Leq ] * expr * expr
  > If of expr * expr * expr
  > Apply of expr * expr

type defn =
  > LetRec of string * string * expr

open Camlp4.PreCast;;

let expr = Gram.Entry.mk "expr"
let defn = Gram.Entry.mk "defn"
let prog = Gram.Entry.mk "defn"

EXTEND Gram
  expr:
  [ [ "if"; p = expr; "then"; t = expr; "else"; f = expr ->
  If(p, t, f) ]
  > [ e1 = expr; "<="; e2 = expr -> BinOp(`Leq, e1, e2) ]
  > [ e1 = expr; "+"; e2 = expr -> BinOp(`Add, e1, e2)
    > e1 = expr; "-"; e2 = expr -> BinOp(`Sub, e1, e2) ]
  > [ f = expr; x = expr -> Apply(f, x) ]
  > [ v = LIDENT -> Var v
    > n = INT -> Int(int_of_string n)
    > "("; e = expr; ")" -> e ] ];
  defn:
  [ [ "let"; "rec"; f = LIDENT; x = LIDENT; "="; body = expr ->
  LetRec(f, x, body) ] ];
  prog:
  [ [ defns = LIST0 defn; "do"; run = expr -> defns, run ] ];
END

open Printf

let program, run =
  try Gram.parse prog Loc.ghost (Stream.of_channel (open_in "fib.ml")) with
  > Loc.Exc_located(loc, e) ->
      printf "%s at line %d\n" (Printexc.to_string e) (Loc.start_line loc);
      exit 1

open Llvm

let ( |> ) x f = f x

type state =
    { fn: llvalue;
      blk: llbasicblock;
      vars: (string * llvalue) list }

let bb state = builder_at_end state.blk
let new_block state name = append_block name state.fn
let find state v =
  try List.assoc v state.vars with Not_found ->
    eprintf "Unknown variable %s\n" v;
    raise Not_found
let cont (v, state) dest_blk =
  build_br dest_blk (bb state) |> ignore;
  v, state

let rec expr state = function
  > Int n -> const_int i32_type n, state
  > Var x -> find state x, state
  > BinOp(op, f, g) ->
      let f, state = expr state f in
      let g, state = expr state g in
      let build, name = match op with
  > `Add -> build_add, "add"
  > `Sub -> build_sub, "sub"
  > `Leq -> build_icmp Icmp_sle, "leq" in
      build f g name (bb state), state
  > If(p, t, f) ->
      let t_blk = new_block state "pass" in
      let f_blk = new_block state "fail" in
      let k_blk = new_block state "cont" in
      let cond, state = expr state p in
      build_cond_br cond t_blk f_blk (bb state) |> ignore;
      let t, state = cont (expr { state with blk = t_blk } t) k_blk in
      let f, state = cont (expr { state with blk = f_blk } f) k_blk in
      let phi = build_phi [t, t_blk; f, f_blk] "join" (bb state) in
      phi, state
  > Apply(f, arg) ->
      let f, state = expr state f in
      let arg, state = expr state arg in
      build_call f [|arg|] "apply" (bb state), state

let defn m vars = function
  > LetRec(f, arg, body) ->
      let ty = function_type i32_type [| i32_type |] in
      let fn = define_function f ty m in
      let vars' = (arg, param fn 0) :: (f, fn) :: vars in
      let body, state =
  expr { fn = fn; blk = entry_block fn; vars = vars' } body in
      build_ret body (bb state) |> ignore;
      (f, fn) :: vars

let int n = const_int i32_type n

let main filename =
  let m = create_module filename in

  let string = pointer_type i8_type in

  let print =
    declare_function "printf" (var_arg_function_type i32_type [|string|]) m in

  let main = define_function "main" (function_type i32_type [| |]) m in
  let blk = entry_block main in
  let bb = builder_at_end blk in

  let str s = define_global "buf" (const_stringz s) m in
  let int_spec = build_gep (str "%d\n") [| int 0; int 0 |] "int_spec" bb in

  let vars = List.fold_left (defn m) program in
  let n, _ = expr { fn = main; blk = blk; vars = vars } run in

  build_call print [| int_spec; n |] "" bb |> ignore;

  build_ret (int 0) bb |> ignore;

  if not (Llvm_bitwriter.write_bitcode_file m filename) then exit 1;
  dispose_module m

let () = match Sys.argv with
  > [|_; filename|] -> main filename
  > _ as a -> Printf.eprintf "Usage: %s <file>\n" a.(0)

Compile with:

$ ocamlc -dtypes -pp camlp4oof -I +camlp4 dynlink.cma camlp4lib.cma -cc
g++ -I /usr/local/lib/ocaml/ llvm.cma llvm_bitwriter.cma minml.ml -o minml

Run on the following fib.ml file:

  let rec fib n =
    if n <= 2 then 1 else
      fib(n-1) + fib(n-2)

  do fib 40

with:

$ ./minml run.bc
$ llc -f run.bc -o run.s &&
$ gcc run.s -o run
$ ./run

The language already accepts sequences of single-argument function definitions
(only via "let rec") that may refer to each other:

  let rec g n = n-1

  let rec fib n =
    if n <= 2 then g 2 else
      fib(n-1) + fib(n-2)

  do fib 40

The expression at the end after the "do" is compiled into the "main" function
and its int result is printed using printf.

I was kind of hoping that function pointers would just magically work, so
this:

  do (if 1 <= 2 then fib else fib) 40

would run, but instead it produces this error:

$ llc -f run.bc -o run.s
llc: bitcode didn't read correctly.
Reason: Invalid instruction with no BB

I assume I need a real type system that can handle both ints and function
pointers?

After that, I'll add compound types and a memory leak. :wink:

Here is a complete 104-line native code compiler for a tiny subset of OCaml that is expressive enough to compile an external Fibonacci program:

[...]

I was kind of hoping that function pointers would just magically work, so this:

do (if 1 <= 2 then fib else fib) 40

would run, but instead it produces this error:

$ llc -f run.bc -o run.s
llc: bitcode didn't read correctly.
Reason: Invalid instruction with no BB

Try 'Llvm_analysis.assert_valid_module m;' before you write bitcode to figure out where things went awry. ('dump_module m;' may also help.) GIGO applies (but garbage-in/segfault-out is more likely).

Unfortunately, even if the bindings were more strongly typed, it would still be structurally possible to build invalid LLVM code, so you've just got to take care not to violate the invariants, then use the verifier as a cross-check.

Using an asserts build of LLVM is also helpful, but you should have this if you built from SVN.

I assume I need a real type system that can handle both ints and function pointers?

Something like that.

If you're fond of Ocaml's tagged object model (and it does have some nice properties), you'll want to slam every value to some generic value type and use lots of bitcasts. The value type is either the native integer (i32 or i64 depending on target) or a pointer type, probably the recursive pointer type:

   let value_ty =
     let temp_ty = opaque_type () in
     let h = handle_to_type temp_ty in
     refine_type temp_ty (pointer_type temp_ty);
     type_of_handle h

Since LLVM does not have an intptr type, you'd need to know your target and parameterize your codegen accordingly to use the integer types.

The bitcasts have no runtime cost, and LLVM optimization passes can simplify them. Your compiler's semantic analysis phase can be responsible for proving type-safety, so it isn't strictly necessary to propagate type annotations on expressions into the codegen phase so long as the AST nodes themselves are not polymorphic.

However, the potential exists to significantly reduce memory pressure (avoiding boxing) by using a typed object model instead of a tagged one. In this case, propagating types throughout would be necessary. This decision has very significant implications for the compilation of polymorphic functions.

After that, I'll add compound types and a memory leak. :wink:

Indeed.

— Gordon

Try 'Llvm_analysis.assert_valid_module m;' before you write bitcode to
figure out where things went awry. ('dump_module m;' may also help.)
GIGO applies (but garbage-in/segfault-out is more likely).

Ok, thanks for the tip.

Unfortunately, even if the bindings were more strongly typed, it would
still be structurally possible to build invalid LLVM code, so you've
just got to take care not to violate the invariants, then use the
verifier as a cross-check.

I suspect the OCaml bindings to LLVM could express more of these constraints
in the static type system. Has anyone tried to leverage this?

I'd certainly like to do this for a higher-level representation. I have lots
of ideas but I suspect that is OT for this list.

Using an asserts build of LLVM is also helpful, but you should have
this if you built from SVN.

I did, yes.

> I assume I need a real type system that can handle both ints and
> function pointers?

Something like that.

If you're fond of Ocaml's tagged object model (and it does have some
nice properties), you'll want to slam every value to some generic
value type and use lots of bitcasts. The value type is either the
native integer (i32 or i64 depending on target) or a pointer type,
probably the recursive pointer type:

   let value_ty =
     let temp_ty = opaque_type () in
     let h = handle_to_type temp_ty in
     refine_type temp_ty (pointer_type temp_ty);
     type_of_handle h

Since LLVM does not have an intptr type, you'd need to know your
target and parameterize your codegen accordingly to use the integer
types.

The bitcasts have no runtime cost, and LLVM optimization passes can
simplify them. Your compiler's semantic analysis phase can be
responsible for proving type-safety, so it isn't strictly necessary to
propagate type annotations on expressions into the codegen phase so
long as the AST nodes themselves are not polymorphic.

However, the potential exists to significantly reduce memory pressure
(avoiding boxing) by using a typed object model instead of a tagged
one. In this case, propagating types throughout would be necessary.
This decision has very significant implications for the compilation of
polymorphic functions.

So this is the part where I start asking really basic questions. :slight_smile:

As I understand it, the simplest approach is to box all values, which means
that every value is stored as a struct containing the type (an enum) and the
value itself (inline if it is no bigger than a pointer or as a pointer to a
larger data structure otherwise). Something like this:

enum runtype {
  Int;
  Float;
  Array;
}

struct array {
  int length;
  box *a;
}

union value {
  int n; // 64-bit
  float x; // 64-bit
  array* u; // >64-bit
}

struct box {
  runtype t;
  value v;
}

So you'd create an int with:

box make_int(int n) {
  box b;
  b.t = Int;
  b.v.n = n;
  return b;
}

Is that right or are values stored as a box*?

I'd rather box everything rather than tag ints to start with. I'll relegate
that to a potential optimization.

What of this can LLVM's optimizer optimize away for me?

So I have to work out how to generate IR that handles those data structures.

Unfortunately, even if the bindings were more strongly typed, it would still be structurally possible to build invalid LLVM code, so you've just got to take care not to violate the invariants, then use the verifier as a cross-check.

I suspect the OCaml bindings to LLVM could express more of these constraints in the static type system. Has anyone tried to leverage this?

Any specific ideas?

I assume I need a real type system that can handle both ints and
function pointers?

As I understand it, the simplest approach is to box all values, which means that every value is stored as a struct containing the type (an enum) and the value itself (inline if it is no bigger than a pointer or as a pointer to a larger data structure otherwise).

I'd rather box everything rather than tag ints to start with. I'll relegate that to a potential optimization.

You could go about it that way, sure. Memory pressure will be very high.

What of this can LLVM's optimizer optimize away for me?

Not much. LLVM won't really change your memory layout. The features which did perform dramatic memory reorganization were excised from the source tree due to patent infringement issues. (They remain in the repository on a branch for UIUC research.) These were also targeted toward manual memory management.

It should be possible for LLVM to perform intra- or interprocedural escape analysis and lower heap allocations to stack allocations, but I don't believe this is implemented. Garbage collection would also complicate it, since LLVM would need to recognize GC allocator functions as such. This would be an excellent project if you are interested in contributing to LLVM itself.

So I have to work out how to generate IR that handles those data structures.

Should be straightforward. Read the getelementptr FAQ. Unions are handled with pointer bitcasts.

http://llvm.org/docs/GetElementPtr.html

— Gordon

>> Unfortunately, even if the bindings were more strongly typed, it
>> would still be structurally possible to build invalid LLVM code, so
>> you've just got to take care not to violate the invariants, then
>> use the verifier as a cross-check.
>
> I suspect the OCaml bindings to LLVM could express more of these
> constraints in the static type system. Has anyone tried to leverage
> this?

Any specific ideas?

Provide a type enumerating the valid terminators and restrict the last
instruction in a block to be a terminator. Something like this:

  type terminator = [ `ret of llvalue | `br of llvalue ]
  type instruction =
      [ terminator
      > `add of llvalue * llvalue
      > `sub of llvalue * llvalue ]
  type block = instruction list * terminator

If you want to avoid having a data structure representing the input to LLVM
then you can probably achieve the same result using combinators, e.g. by
having the building functions for terminators change an [`incomplete] block
into a [`complete] block. However, that might make the bindings harder to
use.

Use phantom types to track the type of each llvalue:

  type 'a llvalue

This could prevent my OCaml code with a function pointer error from compiling.
For example, the "define_function" function would return a value of the type:

  [ `function ] llvalue

and the build_call function would require a value of that type, so you could
not accidentally pass it an int.

I would use polymorphic variants more, particularly for enums and types that
are only used once (e.g. "linkage" and "visibility"). So types would be `i32
rather than i32_type and int_predicate and real_predicate would become
overlapping sum types, e.g. `ugt is valid for both. I'd also rather see
structuring than identifier bloat, e.g.:

module Linkage = struct
  type linkage =
      [ `External
      > `Link_once
      > `Weak
      > `Appending
      > `Internal
      > `Dllimport
      > `Dllexport
      > `External_weak
      > `Ghost ]

  external linkage : llvalue -> linkage = "llvm_linkage"
end

> I'd rather box everything rather than tag ints to start with. I'll
> relegate that to a potential optimization.

You could go about it that way, sure. Memory pressure will be very high.

Let's ignore that for now and get something up and running. I think I can
unbox within expressions easily enough.

> What of this can LLVM's optimizer optimize away for me?

Not much. LLVM won't really change your memory layout. The features
which did perform dramatic memory reorganization were excised from the
source tree due to patent infringement issues. (They remain in the
repository on a branch for UIUC research.) These were also targeted
toward manual memory management.

That's a shame. I'm not sure how this works but presumably distros are free to
add that functionality back in provided they are not in geographical areas
affected by software patents?

It should be possible for LLVM to perform intra- or interprocedural
escape analysis and lower heap allocations to stack allocations, but I
don't believe this is implemented. Garbage collection would also
complicate it, since LLVM would need to recognize GC allocator
functions as such. This would be an excellent project if you are
interested in contributing to LLVM itself.

Perhaps such optimizations would be better done at a higher-level, above the
level of the GC? So that would be ideal for a higher-level VM on top of LLVM.

> So I have to work out how to generate IR that handles those data
> structures.

Should be straightforward. Read the getelementptr FAQ. Unions are
handled with pointer bitcasts.

http://llvm.org/docs/GetElementPtr.html

Brilliant, thanks.

Provide a type enumerating the valid terminators and restrict the last instruction in a block to be a terminator. Something like this:

type terminator = [ `ret of llvalue | `br of llvalue ]
type instruction =
     [ terminator
     > `add of llvalue * llvalue
     > `sub of llvalue * llvalue ]
type block = instruction list * terminator

If you want to avoid having a data structure representing the input to LLVM then you can probably achieve the same result using combinators, e.g. by having the building functions for terminators change an [`incomplete] block into a [`complete] block. However, that might make the bindings harder to use.

Intermediate states are important, though they might be invalid. This is inherited from the underlying object model. I don't think validity is a concept that can effectively be grafted in through the type system of a binding.

Use phantom types to track the type of each llvalue:

type 'a llvalue

This does not cover the full generality of the IR. The first argument to a call instruction need not be a Function. Rather, the type of the value must be pointer to function.

Still, those phantom types may be a solution for binding the Value hierarchy without introducing gratuitous static casts. (The argument to set_visibility must be a GlobalValue, for instance.) Can you represent multiple-level inheritance? Value -> GlobalValue -> GlobalVariable, say.

I would use polymorphic variants more, particularly for enums and types that are only used once (e.g. "linkage" and "visibility"). So types would be `i32 rather than i32_type

Types are not enums, they're first-class objects.

and int_predicate and real_predicate would become overlapping sum types, e.g. `ugt is valid for both.

These variant types were set up to have a 1:1 correspondence with the C++ enums, and I'd prefer to keep that. There's also no overlap for integer and FP predicates (unsigned greater than is not unordered greater than).

I'd also rather see structuring than identifier bloat, e.g.:

module Linkage = struct
type linkage =
     [ `External
     > ...
     > `Ghost ]

external linkage : llvalue -> linkage = "llvm_linkage"
end

This is a fair idea for grouping enums.

It should be possible for LLVM to perform intra- or interprocedural escape analysis and lower heap allocations to stack allocations, but I don't believe this is implemented. Garbage collection would also complicate it, since LLVM would need to recognize GC allocator functions as such. This would be an excellent project if you are interested in contributing to LLVM itself.

Perhaps such optimizations would be better done at a higher-level, above the level of the GC? So that would be ideal for a higher-level VM on top of LLVM.

Escape analysis is perfectly practical on the LLVM representation.

Reorganizing data structures is probably best done by the language front end. A functional language is the ideal host for such experiments. The closest LLVM does is SROA.

— Gordon

> Use phantom types to track the type of each llvalue:
>
> type 'a llvalue

This does not cover the full generality of the IR.

Yes.

The first argument
to a call instruction need not be a Function. Rather, the type of the
value must be pointer to function.

Still, those phantom types may be a solution for binding the Value
hierarchy without introducing gratuitous static casts. (The argument
to set_visibility must be a GlobalValue, for instance.) Can you
represent multiple-level inheritance? Value -> GlobalValue ->
GlobalVariable, say.

You may be able to by enumerating the members of GlobalValue in
a "global_value" type:

  type global_value = [ `GlobalVariable | `GlobalConstant | ... ]

and then saying that "set_visibility" accepts some subset of those
constructors:

  val set_visibility : [< global_value ] -> ...

I haven't checked that this works though.

> I would use polymorphic variants more, particularly for enums and
> types that are only used once (e.g. "linkage" and "visibility"). So
> types would be `i32 rather than i32_type

Types are not enums, they're first-class objects.

Ok.

> and int_predicate and real_predicate would become overlapping sum
> types, e.g. `ugt is valid for both.

These variant types were set up to have a 1:1 correspondence with the C
++ enums, and I'd prefer to keep that. There's also no overlap for
integer and FP predicates (unsigned greater than is not unordered
greater than).

That's fine then.

> Perhaps such optimizations would be better done at a higher-level,
> above the level of the GC? So that would be ideal for a higher-level
> VM on top of LLVM.

Escape analysis is perfectly practical on the LLVM representation.

Reorganizing data structures is probably best done by the language
front end. A functional language is the ideal host for such
experiments. The closest LLVM does is SROA.

Right.

Hi,

Gordon Henriksen wrote:

> What of this can LLVM's optimizer optimize away for me?

Not much. LLVM won't really change your memory layout. The features
which did perform dramatic memory reorganization were excised from the
source tree due to patent infringement issues. (They remain in the
repository on a branch for UIUC research.) These were also targeted
toward manual memory management.

Concern about software patents is a shame. Do those behind LLVM support
the patent holders in this case, or are they just concerned about being
hassled over infringement. If the latter, could an approach be made to
one of the anti-software patent organisations to see if prior art can be
found, or else look at moving the software outside of the scope of the
applicable patent law?

Cheers,

Ralph.

The patents in question are based on unification points-to analysis held by Microsoft. Working around them is easy, just don't use those techniques. Unfortunately, DSA is designed and built around them, so it can't be "simply modified" to not use them.

Also, as far as software patents go, they are fairly reasonable. I don't think there was any prior art in that area.

-Chris