Zulip Chat Archive

Stream: general

Topic: Best practice to annotate AST?

xiao (Mar 04 2025 at 04:37):

I want to design a DSL, probably with some annotations like line number or doc-string. I may also extend the syntax tree for some specific syntax sugar. What is the best way to do this? In Haskell, there's one example with the Mu-type.

In that example, the basic language is the following.

inductive Exp where
  | Const: Nat -> Exp
  | Add: Exp -> Exp -> Exp


inductive Inst where
  | Ann: String -> Inst
  | Push: Nat -> Inst
  | Add | Mult
  deriving Repr


abbrev Prog := List Inst


def Exp.compile: Exp -> Prog
  | .Const n => [.Push n]
  | .Add a b => a.compile ++ b.compile ++ [.Add]


def Exp.eval: Exp -> Nat
  | .Const n => n
  | .Add a b => a.eval + b.eval

However, it will be a bit painful if you want to add extra fields or constructors to the defined Exp, because you have to repeat the same logic many times.

inductive ExpA where
  | Const: String -> Nat -> ExpA
  | Add: String -> ExpA -> ExpA -> ExpA


def ExpA.compile: ExpA -> Prog
  | .Const s n => [.Ann s, .Push n]
  | .Add s a b => [.Ann s] ++ a.compile ++ b.compile ++ [.Add]


def ExpA.eval: ExpA -> Nat
  | .Const _ n => n
  | .Add _ a b => a.eval + b.eval


inductive ExpM where
  | Const: Nat -> ExpM
  | Add: ExpM -> ExpM -> ExpM
  | Mult: ExpM -> ExpM -> ExpM


def ExpM.compile: ExpM -> Prog
  | .Const n => [.Push n]
  | .Add a b => a.compile ++ b.compile ++ [.Add]
  | .Mult a b => a.compile ++ b.compile ++ [.Mult]


def ExpM.eval: ExpM -> Nat
  | .Const n => n
  | .Add a b => a.eval + b.eval
  | .Mult a b => a.eval * b.eval

I understand that the inductive type is itself the fixed point operator, so I tried the following. I made a functor and fixed it with another inductive type, which gives the catamorphism.

inductive ExpF (T: Type) where
  | Const: Nat -> ExpF T
  | Add: T -> T -> ExpF T


inductive Exp where
  | In: ExpF Exp -> Exp


def Exp.const n := Exp.In (ExpF.Const n)
def Exp.add a b := Exp.In (.Add a b)

def Exp.cata {B} (f: ExpF B -> B): Exp -> B
  | .In (.Const n) => f (.Const n)
  | .In (.Add a b) => f (.Add (a.cata f) (b.cata f))

Then, the compile is defined based on the functor ExpF

abbrev Prog := List Inst


def ExpF.compile: ExpF Prog -> Prog
  | .Const n => [.Push n]
  | .Add a b => a ++ b ++ [.Add]


def ExpF.eval: ExpF Nat -> Nat
  | .Const n => n
  | .Add a b => a + b


def Exp.eval := Exp.cata ExpF.eval
def Exp.compile := Exp.cata ExpF.compile

def e23: Exp := .add (.const 2) (.const 3)

example: e23.eval = 5 := by eq_refl
example: e23.compile = [.Push 2, .Push 3, .Add] := by eq_refl

It works well with ExpA and ExpM

inductive ExpA where
  | Ann: String -> ExpF ExpA -> ExpA


def ExpA.cata
  {B: Type}
  (f: String -> ExpF B -> B)
: ExpA -> B
  | .Ann s (.Const n) => f s (.Const n)
  | .Ann s (.Add a b) => f s (.Add (a.cata f) (b.cata f))


def ExpA.eval := ExpA.cata (fun _ b => ExpF.eval b)
def ExpA.compile := ExpA.cata (fun s b => .Ann s :: ExpF.compile b)


def a2: ExpA := (.Ann "pos1" (.Const 2))
def a3: ExpA := (.Ann "pos2" (.Const 3))
def a23: ExpA := (.Ann "pos3" (.Add a2 a3))

example: a23.eval = 5 := by eq_refl
example: a23.compile = [
  Inst.Ann "pos3",
  Inst.Ann "pos1",
  Inst.Push 2,
  Inst.Ann "pos2",
  Inst.Push 3,
  Inst.Add
] := by eq_refl


inductive ExpM where
  | In: ExpF ExpM -> ExpM
  | Mult: ExpM -> ExpM -> ExpM


def ExpM.mult := ExpM.Mult
def ExpM.const n := ExpM.In (.Const n)
def ExpM.add a b := ExpM.In (.Add a b)

def ExpM.cata {B} (f: ExpF B -> B) (mult: B -> B -> B): ExpM -> B
  | .In (.Const n) => f (.Const n)
  | .In (.Add n1 n2) => f (.Add (ExpM.cata f mult n1) (ExpM.cata f mult n2))
  | .Mult a1 a2 => mult (a1.cata f mult) (a2.cata f mult)

def ExpM.eval := ExpM.cata ExpF.eval Nat.mul
def ExpM.compile := ExpM.cata ExpF.compile (fun a b => a ++ b ++ [.Mult])

def m523 := ExpM.mult (.const 5) (.add (.const 2) (.const 3))
example: m523.eval = 5 * (2 + 3) := by eq_refl
example: m523.compile = [
  Inst.Push 5, Inst.Push 2, Inst.Push 3, Inst.Add, Inst.Mult
] := by eq_refl

This time I don't have to repeat the same logic many times, but the cons are that I have to rewrite the catamorphisms again and again, as well as the inductive principles (recursors).

def Exp.ind.{u}
  {P: Exp -> Sort u}
  (const: forall n, P (.const n))
  (add: forall a b, P a -> P b -> P (.add a b))
:
  forall (e: Exp), P e
:= fun e => match e with
  | .In (.Const a) => const a
  | .In (.Add a b) => add a b
    (Exp.ind const add a) (Exp.ind const add b)

def ExpM.ind.{u}
  {P: ExpM -> Sort u}
  (const: forall n, P (.const n))
  (add: forall a b, P a -> P b -> P (.add a b))
  (mult: forall a b, P a -> P b -> P (.mult a b))
: forall e, P e
  | .const n => const n
  | .add a b => add a b (a.ind const add mult) (b.ind const add mult)
  | .mult a b => mult a b (a.ind const add mult) (b.ind const add mult)

What is the best practice for such a tree-decoration problem?

Some one also suggested the following.

inductive ExpF (O: Type) where
  | Const: Nat -> ExpF O
  | Add: ExpF O -> ExpF O -> ExpF O
  | Out: O -> ExpF O

abbrev Exp := ExpF Empty

def ExpF.compile: ExpF Prog -> Prog
  | .Const n => [.Push n]
  | .Add a b => a.compile ++ b.compile ++ [.Add]
  | .Out p => p

def ExpF.eval: ExpF Nat -> Nat
  | .Const n => n
  | .Add a b => a.eval + b.eval
  | .Out n => n


def ExpF.cata {B} (f: ExpF B -> B): Exp -> B
  | .Const n => f (.Const n)
  | .Add a b => f (.Add (.Out $ a.cata f) (.Out $ b.cata f))
  | .Out o => o.elim


def e23: Exp := .Add (.Const 2) (.Const 3)

def Exp.compile := ExpF.cata ExpF.compile
def Exp.eval := ExpF.cata ExpF.eval

example: e23.compile = [Inst.Push 2, Inst.Push 3, Inst.Add] := by eq_refl
example: e23.eval = 5 := by eq_refl


inductive ExpM where
  | In: ExpF ExpM -> ExpM
  | Mult: ExpM -> ExpM -> ExpM


@[simp] def ExpM.const (n: Nat): ExpM := .In (.Const n)
@[simp] def ExpM.add (a b: ExpM): ExpM := .In (.Add (.Out a) (.Out b))
@[simp] def ExpM.mult := ExpM.Mult


def m523: ExpM := .mult (.const 5) (.add (.const 2) (.const 3))

@[simp]
def ExpM.cata {B} (f: ExpF B -> B) (mult: B -> B -> B): ExpM -> B
  | .In (.Const n) => f (.Const n)
  | .In (.Add a b) =>
    f (.Add
      (.Out $ ExpM.cata f mult (.In a))
      (.Out $ ExpM.cata f mult (.In b))
    )
  | .In (.Out o) => f (.Out $ o.cata f mult)
  | .Mult a b => mult (a.cata f mult) (b.cata f mult)

@[simp]
def ExpM.compile := ExpM.cata ExpF.compile (fun a b => a ++ b ++ [.Mult])
@[simp]
def ExpM.eval := ExpM.cata ExpF.eval (fun a b => a * b)

example: m523.compile
  = [Inst.Push 5, Inst.Push 2, Inst.Push 3, Inst.Add, Inst.Mult]
:= by
  simp [ExpF.compile, m523]

example: m523.eval = 5 * (2 + 3) := by
  simp [ExpF.eval, m523]

Or even

inductive ExpF (C A O: Type) where
  | Const: C -> Nat -> ExpF C A O
  | Add: A -> ExpF C A O -> ExpF C A O -> ExpF C A O
  | Out: O -> ExpF C A O

abbrev Exp := ExpF Unit Unit Empty
def Exp.const (n: Nat): Exp := .Const Unit.unit n
def Exp.add (a b: Exp): Exp := .Add Unit.unit a b

def ExpF.compile {C A: Type}: ExpF C A Prog -> Prog
  | .Const _ n => [.Push n]
  | .Add _ a b => a.compile ++ b.compile ++ [.Add]
  | .Out p => p

Last updated: May 02 2025 at 03:31 UTC