Zulip Chat Archive

Stream: general

Topic: Get rid of `Except` when `.error` is impossible


nrs (Nov 25 2024 at 14:22):

The following is a Pratt parser. I'm trying to figure out whether it is possible to refactor the code to get rid of the extract function whose sole purpose is extracting the value from an EStateM.Result. As you can notice from the pattern match at nexttokmatch, we rule out the possibility that it errors in the places where we call extract nexttokmatch. Is there an alternative way of writing this code that makes this explicit, in some way maybe that requires supplying a proof? i.e., instead of a function like extract which leaves implicit the fact that its branch with default cannot be reached, I'd be looking for a function Except A B -> B that would make it clear an error is impossible (general style/writing suggestions are also welcome!)

set_option allowUnsafeReducibility true
mutual
  partial def step (minBindingPower : Nat) : Lexer Sexp := do
    let nexttokmatch : Except String Sexp := match (<- .next) with
      | .atom c => .ok $ .atom c
      | tok => throw "nexttokmatch: bad token: {tok}"
    ReaderT.run (stepAux minBindingPower) nexttokmatch

  partial abbrev stepAux (minBindingPower : Nat) : ReaderT (Except String Sexp) Lexer Sexp := do
      let extract : Except String Sexp -> Sexp := (match . with | .ok c => c | _ => default) -- how do I get rid of this? it is impossible to reach default
      let nexttokmatch <- .read
      match (<- .peek) with
        | .eof => return extract nexttokmatch
        | .atom c => throw s!"peektokmatch: bad token: {c}"
        | .op c => match infixBindingPower c with
          | .error str => fun _ => .error str
          | .ok (leftBindingPower, rightBindingPower) =>
            if leftBindingPower < minBindingPower then
              return extract nexttokmatch
            else
              discard .next
              let rhs <- step rightBindingPower
              let nextRun := Sexp.cons c #[extract nexttokmatch, rhs]
              stepAux minBindingPower (.ok nextRun)
end

Eric Wieser (Nov 25 2024 at 14:56):

I'm not sure I agree that it is unreachable

nrs (Nov 25 2024 at 15:12):

Eric Wieser said:

I'm not sure I agree that it is unreachable

Either extract nexttokmatch is encountered from a call tree of shape step -> stepAux or it is encountered from a call tree of shape step -> stepAux -> ... -> stepAux. In the first case, the default branch is unreachable by virtue of the pattern match within step. In the second case, it is unreachable by virtue of .ok nextRun

Eric Wieser (Nov 25 2024 at 15:14):

I disagree with your analysis of the first case

Eric Wieser (Nov 25 2024 at 15:15):

The pattern match within step says "if (<- .next) is not an atom, then make .read return .error when it is first called."

nrs (Nov 25 2024 at 15:18):

Eric Wieser said:

The pattern match within step says "if (<- .next) is not an atom, then make .read return .error when it is first called."

ah you're right, I had misinterpreted throw as exiting the recursion

nrs (Nov 25 2024 at 15:19):

hm I have to reconsider the entire code in light of this. tyvm for the comments I would have not realized it otherwise!

nrs (Nov 25 2024 at 15:21):

extract then actually should either extract the value or propagate the error

Eric Wieser (Nov 25 2024 at 15:22):

Without knowing the definition of Lexer, it's hard to know if that is possible

nrs (Nov 25 2024 at 15:31):

Eric Wieser said:

Without knowing the definition of Lexer, it's hard to know if that is possible

Here's the the entire context, if you're interested. We have abbrev Lexer (α : Type) := EStateM String (Array Token) α. The above code gets fixed by letting extract propage the error.

inductive Token
  | atom : Char -> Token
  | op : Char -> Token
  | eof
deriving Repr

def Token.toString : Token -> String
  | .atom c => s!"atom({c})"
  | .op c => s!"op({c})"
  | .eof => "eof"

instance : ToString Token where
  toString := Token.toString

abbrev Lexer (α : Type) := EStateM String (Array Token) α

def mkLexer (ar : Array String) : Lexer Unit := do
  set (ar |>.map (String.data
           |> (List.toArray  .))
          |>.flatten
          |>.reverse
          |>.filter (not  Char.isWhitespace)
          |>.map (fun c => if Char.isAlphanum c then Token.atom c else Token.op c))

def Lexer.next : Lexer Token := do
  let lastState <- get
  set lastState.pop
  if h : 0 < lastState.size then
    return lastState[lastState.size - 1]
  else
    return .eof

def Lexer.peek : Lexer Token := do
  let lastState <- get
  if h : 0 < lastState.size then
    return lastState[lastState.size - 1]
  else
    return .eof

inductive Sexp
  | atom : Char -> Sexp
  | cons : Char -> Array Sexp -> Sexp
deriving Inhabited, Repr

def Sexp.toString : Sexp -> String
  | .atom c => s!" {c} "
  | .cons c ar => s!"({c}" ++ Array.foldr (. ++ .) "" (Array.map Sexp.toString ar) ++ ")"

instance : ToString Sexp where
  toString := Sexp.toString


def Sexp.append : Sexp -> Sexp -> Sexp
  | .atom c, .atom cc => .cons c #[.atom cc]
  | .atom c, .cons cc ar => .cons c (#[.atom cc] ++ ar)
  | .cons c ar, .atom cc => .cons c (ar ++ #[.atom cc])
  | .cons c ar, .cons cc arr => .cons c (ar ++ #[.atom cc] ++ arr)

instance : HAppend Sexp Sexp Sexp where
  hAppend := Sexp.append


def infixBindingPower (c : Char) : Except String (Nat × Nat) :=
      if c == '+' || c == '-' then
        .ok (1, 2)
      else if c == '*' || c == '/' then
        .ok (3, 4)
      else
       throw s!"infixBindingPower: unknown operator: {c}"


set_option allowUnsafeReducibility true
mutual
  partial def step (minBindingPower : Nat) : Lexer Sexp := do
    let nexttokmatch : Except String Sexp := match (<- .next) with
      | Token.atom c => .ok $ Sexp.atom c
      | tok => throw "nexttokmatch: bad token: {tok}"
    ReaderT.run (stepAux minBindingPower) nexttokmatch

  partial abbrev stepAux (minBindingPower : Nat) : ReaderT (Except String Sexp) Lexer Sexp := do
      let extract : Except String Sexp -> Sexp := (match . with | .ok c => c | _ => default)
      let nexttokmatch <- .read
      match (<- .peek) with
        | Token.eof => return extract nexttokmatch
        | .atom c => throw s!"peektokmatch: bad token: {c}"
        | .op c => match infixBindingPower c with
          | .error str => fun _ => .error str
          | .ok (leftBindingPower, rightBindingPower) =>
            if leftBindingPower < minBindingPower then
              return extract nexttokmatch
            else
              discard .next
              let rhs <- step rightBindingPower
              let nextRun := Sexp.cons c #[extract nexttokmatch, rhs]
              stepAux minBindingPower (.ok nextRun)
end

def tempmain : IO Unit := do
  let lex : Lexer Sexp := do mkLexer #["a+b*c*d+e"]; step 0
  match lex #[] with
  | .error ermsg state => .error ermsg
  | .ok val state => .println val

#eval! tempmain

Last updated: May 02 2025 at 03:31 UTC