Zulip Chat Archive

Stream: lean4

Topic: Slowdown in v4.22.0-rc3

pandaman (Jul 12 2025 at 11:27):

I observed that the lean-regex library slowed down by 10-30% for regexes with alternations after upgrading to v4.22.0-rc3. I wonder if the new compiler introduced a regression. I'd appreciate any comments!

According to perf, this SparseSet.insert function seems to spend more time in v4.22.0-rc3. Upon inspecing the generated C code, v4.22.0-rc3 emits more lean_dec in the exclusive branch (one per SparseSet field), which might have caused the regression.

Traces

I collected perf samples for the two versions in Pop!_OS 22.04 (Ubuntu-like distribution). You can see the stack traces below:

v4.21.0 perf traces: https://share.firefox.dev/40cYcIZ
v4.22.0-rc3 perf traces: https://share.firefox.dev/44Jq8FE

How to reproduce

Checkout v4.21.0-benchmark and v4.22.0-rc3-benchmark branches from the lean-regex repository. In the regex directory, execute lake exe Bench -e 'def|have|push|wf|nfa' -n 1000 Regex/Backtracker/Basic.lean to see how long each regex execution takes. In my environment, v4.22.0-rc3 takes roughly 20% more time.

Markus Himmel (Jul 12 2025 at 11:33):

Yes, it's very likely that this is due to the new compiler. We're also experiencing some regressions of a similar order of magnitude in the standard library. Thanks for preparing these benchmarks and for investigating! cc @Cameron Zwarich

Cameron Zwarich (Jul 12 2025 at 18:47):

Thanks for reporting this! From your description that this specifically affects alt patterns and specifically involves ref-counting operations, this seems very likely to be due to a known regression with the new compiler involving the increased use of join points (essentially non-escaping local functions called in tail position from two distinct places).

The new compiler generates more of these, which is generally speaking more optimal, but the ref-counting passes at the back end of the compiler (which did not change) are a bit pessimistic in the face of join points.

Apologies for the temporary regression. I will probably use lean-regex to generate test cases when I fix this issue.

pandaman (Jul 13 2025 at 02:08):

Thanks. I hope the codegen improves in the newer versions!

I tried to isolate the affected component, but it shows only a few percentage of slowdown and has a higher variance. It also depends on a bit long proofs, so it might be only useful as a starting point.

(note: this reproduction is quadratic in -n. I used -n 30000, but you might need to find a good -n that works in your environment)

Reproducer

-- Minimal theory about the bijection between `Fin n`.
namespace Regex.Data.SparseSet.Bijection

def inj {α β} (f : α → β) := ∀ x y, f x = f y → x = y
def surj {α β} (f : α → β) := ∀ y, ∃ x, f x = y
def bij {α β} (f : α → β) := inj f ∧ surj f

theorem _root_.Fin.eq_of_ge {n} {i : Fin (n + 1)} (h : i ≥ n) : i = ⟨n, Nat.lt_succ_self n⟩ := by
  apply Fin.eq_of_val_eq
  exact Nat.le_antisymm (Nat.le_of_succ_le_succ i.isLt) h

theorem _root_.Fin.eq_of_not_lt {n} {i : Fin (n + 1)} (h : ¬ i < n) : i = ⟨n, Nat.lt_succ_self n⟩ :=
  Fin.eq_of_ge (Nat.ge_of_not_lt h)

theorem surj_of_inj {n} (f : Fin n → Fin n) (h : inj f) : surj f := by
  induction n with
  | zero =>
    intro y
    have : y.val < 0 := y.isLt
    contradiction
  | succ n ih =>
    let n' : Fin (n + 1) := ⟨n, Nat.lt_succ_self n⟩
    let f' (x : Fin n) : Fin n :=
      let x' : Fin (n + 1) := ⟨x.val, Nat.lt_trans x.isLt (Nat.lt_succ_self n)⟩
      let y := f x'
      if isLt : y.val < n then
        ⟨y.val, isLt⟩
      else
        have eqx : f x' = n' := Fin.eq_of_not_lt isLt
        have isLt : f n' < n' := by
          refine Decidable.byContradiction fun nlt => ?_
          have eqn : f n' = n' := Fin.eq_of_not_lt nlt
          have : f x' = f n' := by rw [eqx, eqn]
          have : x' = n' := h _ _ this
          have : x.val = n := by
            have : x'.val = n'.val := by rw [this]
            exact this
          exact Nat.lt_irrefl _ (this ▸ x.isLt)
        ⟨f ⟨n, Nat.lt_succ_self n⟩, isLt⟩
    have : inj f' := by
      intro x y eq
      let x' : Fin (n + 1) := ⟨x.val, Nat.lt_trans x.isLt (Nat.lt_succ_self n)⟩
      let y' : Fin (n + 1) := ⟨y.val, Nat.lt_trans y.isLt (Nat.lt_succ_self n)⟩

      if hx : f x' < n then
        if hy : f y' < n then
          simp [f', hx, hy, x', y'] at eq
          have := h _ _ (Fin.eq_of_val_eq eq)
          simp at this
          exact Fin.eq_of_val_eq this
        else
          simp [f', hx, hy, x', y'] at eq
          have := h _ _ (Fin.eq_of_val_eq eq)
          simp at this
          exact absurd (this ▸ x.isLt) (Nat.lt_irrefl _)
      else
        if hy : f y' < n then
          simp [f', hx, hy, x', y'] at eq
          have := h _ _ (Fin.eq_of_val_eq eq)
          simp at this
          exact absurd (this.symm ▸ y.isLt) (Nat.lt_irrefl _)
        else
          have eqx := Fin.eq_of_not_lt hx
          have eqy := Fin.eq_of_not_lt hy
          have : f x' = f y' := by rw [eqx, eqy]
          have : x' = y' := h _ _ this
          have : x.val = y.val := by
            have : x'.val = y'.val := by rw [this]
            exact this
          exact Fin.eq_of_val_eq this
    have surj : surj f' := ih _ this

    intro y
    have : y.val ≤ n := Nat.le_of_succ_le_succ y.isLt

    if isLt : y.val < n then
      let ⟨x, eq⟩ := surj ⟨y.val, isLt⟩
      simp [f'] at eq
      split at eq
      case isTrue =>
        simp at eq
        exact ⟨⟨x.val, Nat.lt_trans x.isLt (Nat.lt_succ_self n)⟩, Fin.eq_of_val_eq eq⟩
      case isFalse =>
        simp at eq
        exact ⟨n', Fin.eq_of_val_eq eq⟩
    else
      have := Fin.eq_of_not_lt isLt
      simp [this]
      if isLt' : f n' < n then
        let ⟨x, eq⟩ := surj ⟨(f n').val, isLt'⟩
        simp [f'] at eq
        split at eq
        case isTrue =>
          simp at eq
          have := Fin.val_eq_of_eq (h _ _ (Fin.eq_of_val_eq eq))
          simp at this
          exact absurd (this ▸ x.isLt) (Nat.lt_irrefl _)
        case isFalse nlt =>
          have := Fin.eq_of_not_lt nlt
          exact ⟨⟨x.val, Nat.lt_trans x.isLt (Nat.lt_succ_self n)⟩, this⟩
      else
        exact ⟨n', Fin.eq_of_not_lt isLt'⟩

end Regex.Data.SparseSet.Bijection

namespace Regex.Data

structure SparseSet (n : Nat) where
  count : Nat
  dense : Vector (Fin n) n
  sparse : Vector (Fin n) n
  sparse_dense : ∀ i : Fin n, i < count → sparse[dense[i.val].val] = i
  le_count : count ≤ n

namespace SparseSet

variable {n : Nat} {s : SparseSet n} {i j : Fin n}

open Bijection

def empty {n : Nat} : SparseSet n :=
  let v := Vector.ofFn (fun x : Fin n => ⟨0, x.pos⟩)
  ⟨0, v, v, fun _ _ => by contradiction, Nat.zero_le _⟩

theorem sparse_dense_fin (h : i < s.count) : s.sparse[s.dense[i]] = i :=
  s.sparse_dense i h

@[inline]
def mem (s : SparseSet n) (i : Fin n) : Bool :=
  s.sparse[i] < s.count && s.dense[s.sparse[i]] == i

instance : Membership (Fin n) (SparseSet n) where
  mem s i := s.mem i

@[simp]
theorem mem_mem : i ∈ s ↔ s.mem i := Iff.rfl

@[inline]
instance : Decidable (i ∈ s) :=
  match h : s.mem i with
  | true => isTrue h
  | false => isFalse (by simp [h])

theorem mem_dense_of_lt (h : i < s.count) : s.dense[i] ∈ s := by
  simp [mem, sparse_dense, h]

theorem dense_inj (hi : i < s.count) (hj : j < s.count) (eq : s.dense[i] = s.dense[j]) :
  i = j := by
  have : s.sparse[s.dense[i]] = s.sparse[s.dense[j]] := by rw [eq]
  simp [s.sparse_dense i hi, s.sparse_dense j hj] at this
  exact this

theorem dense_surj (h : j ∈ s) : ∃ i : Fin n, i < s.count ∧ s.dense[i] = j := by
  simp [mem] at h
  exists s.sparse[j]

theorem dense_sparse_of_full (h : n ≤ s.count) : s.dense[s.sparse[j]] = j := by
  let f (i : Fin n) : Fin n := s.dense[i]
  have inj : inj f := by
    intro x y eq
    exact dense_inj (Nat.lt_of_lt_of_le x.isLt h) (Nat.lt_of_lt_of_le y.isLt h) eq
  have surj : surj f := surj_of_inj _ inj
  have ⟨i, eq⟩ := surj j
  simp [f, ←eq, s.sparse_dense i (Nat.lt_of_lt_of_le i.isLt h)]

theorem lt_of_mem (i : Fin n) (h : ¬i ∈ s) : s.count < n := by
  simp [SparseSet.mem] at h
  refine Decidable.byContradiction fun nlt => ?_
  have ge := Nat.le_of_not_lt nlt
  apply h (Nat.lt_of_lt_of_le s.sparse[i].isLt ge)
  exact dense_sparse_of_full ge

def insert (s : SparseSet n) (i : Fin n) : SparseSet n :=
  if mem : i ∈ s then
    s
  else
    let ⟨count, dense, sparse, sparse_dense, _⟩ := s
    have isLt : count < n := lt_of_mem i mem
    let dense' := dense.set count i
    let sparse' := sparse.set i ⟨count, isLt⟩
    have sparse_dense' (j : Fin n) (h : j < count + 1) : sparse'[dense'[j]] = j := by
      have : j ≤ count := Nat.le_of_succ_le_succ h
      cases Nat.eq_or_lt_of_le this with
      | inl eq =>
        simp [dense', sparse', eq, Vector.getElem_set_self]
        exact Fin.eq_of_val_eq eq.symm
      | inr lt =>
        have : dense'[j] = dense[j] := by
          simp [dense']
          rw [Vector.getElem_set_ne isLt (by omega) (by omega)]
        simp [sparse', this]
        rw [Vector.getElem_set]
        split
        case isTrue eq =>
          simp [SparseSet.mem] at mem
          have : sparse[i.val] = j := by
            simp [eq, sparse_dense j lt]
          simp [this, lt] at mem
          exact absurd (Fin.eq_of_val_eq eq.symm) mem
        case isFalse => exact sparse_dense j lt
    ⟨count + 1, dense', sparse', sparse_dense', isLt⟩

end Regex.Data.SparseSet

open Regex.Data (SparseSet)

@[noinline]
def insertMany (n : Nat) (s : SparseSet n) : IO (SparseSet n) := do
  let mut s := s
  for h : i in [:n] do
    s := s.insert ⟨i, by simp [Membership.mem] at h; omega⟩
  return s

def benchmark (iterations : Nat) : IO Unit := do
  let mut totalTime := 0
  for n in [:iterations] do
    let startTime ← IO.monoNanosNow
    let _ ← insertMany n .empty
    let endTime ← IO.monoNanosNow
    totalTime := totalTime + (endTime - startTime)
  let totalTimeMs := totalTime.toFloat / 1_000_000
  IO.println s!"Ran {iterations} iterations in {totalTimeMs}ms"
  IO.println s!"Average time: {totalTimeMs / iterations.toFloat}ms per iteration"

def parseArgs (args : List String) : Except String Nat := do
  match args with
  | "-n" :: n :: _ => do
    let n ← n.toNat?.getDM (throw "Iterations must be a number")
    if n < 0 then
      throw "Iterations must be a positive number"
    else
      return n
  | [] => return 1000
  | _ :: args => parseArgs args

def main (args : List String) : IO UInt32 := do
  match parseArgs args with
  | .ok n =>
    benchmark n
    return 0
  | .error err =>
    IO.eprintln err
    return 1

Last updated: Feb 28 2026 at 14:05 UTC