Zulip Chat Archive

Stream: new members

Topic: Difficulty proving byte/bitvec conversion theorems

Paula Neeley (Sep 19 2025 at 16:30):

I'm working on some code where I sometimes have to reason about bit vectors, and other times about lists of bytes. I've defined some conversion functions between the two, but I'm having more difficulty than I expected proving that the conversion functions are inverses of each other.

Specifically, I can prove the inverse theorems (which I'm calling list_bytes_to_bitvec_eq and bitvec_to_list_bytes_eq) if I rely on some helper lemmas, but I'm having trouble proving those. If I unfold definitions, things just get really messy.

What proof approaches should I be using on the helper functions? Or should I redefine my conversion functions away from zipIdx, foldl, and reverse to somehow make the reasoning easier to do? Thanks in advance.

import Mathlib

@[reducible] def ByteSize : ℕ := 8

@[reducible] def Byte := BitVec ByteSize
  deriving DecidableEq

inductive Endian
  | l
  | b
  deriving DecidableEq

def BytesToBitvec {n : Nat} (bytes : List Byte) (e : Endian) (_ : bytes.length = n) : BitVec (ByteSize * n) :=
  let ordered := match e with
    | Endian.l => bytes
    | Endian.b => bytes.reverse
  let natVal := ordered.zipIdx.foldl (init := 0) fun acc ⟨b, i⟩ => acc + (b.toNat <<< (ByteSize * i))
  BitVec.ofNat (ByteSize * n) natVal

def BitvecToBytes {n : Nat} (bv : BitVec (ByteSize * n)) (e : Endian) : List Byte :=
  let ordered := match e with
    | Endian.l => List.range n
    | Endian.b => (List.range n).reverse
  ordered.map (λ i =>
    let shift := i * ByteSize
    BitVec.ofNat ByteSize ((bv >>> shift).toNat &&& 0xFF))

lemma BitvecToBytes_length {n : ℕ} (bv : BitVec (ByteSize * n)) (e : Endian) :
  (BitvecToBytes bv e).length = n :=
  by
  unfold BitvecToBytes
  dsimp
  simp_all only [BitVec.ofNat_and, List.length_map]
  split
  next e => simp_all only [List.length_range]
  next e => simp_all only [List.length_reverse, List.length_range]


lemma BitvecToBytes_cons_LE {n : ℕ} (b : Byte) (bs : List Byte) (hbs : bs.length = n) :
  BitvecToBytes (BytesToBitvec (b :: bs) Endian.l (by rfl)) Endian.l =
  b :: BitvecToBytes (BytesToBitvec bs Endian.l hbs) Endian.l :=
  by
  ext m b'
  constructor
  intro H
  subst hbs
  have H1 := @List.getElem?_cons _ b (BitvecToBytes (BytesToBitvec bs Endian.l (by rfl)) Endian.l) m
  rw [H1]
  sorry
  sorry

lemma BitvecToBytes_append_BE {n : ℕ} (b : Byte) (bs : List Byte) (hbs : bs.length = n) :
  BitvecToBytes (BytesToBitvec (b :: bs) Endian.b (by rfl)) Endian.b =
  BitvecToBytes (BytesToBitvec bs Endian.b hbs) Endian.b ++ [b] :=
  by
  ext m b'
  constructor
  intro H
  subst hbs
  sorry
  sorry


theorem list_bytes_to_bitvec_eq (bs : List Byte) (e : Endian) :
  BitvecToBytes (BytesToBitvec bs e (by rfl)) e =
    match e with
    | Endian.l => bs
    | Endian.b => bs.reverse :=
  by
  cases e with
  | l =>
    induction bs with
    | nil =>
      unfold BitvecToBytes
      unfold BytesToBitvec
      simp_all only [List.length_nil, Nat.mul_zero, List.zipIdx_nil, List.foldl_nil, BitVec.zero_ushiftRight,
        BitVec.toNat_ofNat, pow_zero, Nat.zero_mod, Nat.zero_and, List.range_zero, List.map_nil]
    | cons b bs ih =>
      rw [@BitvecToBytes_cons_LE bs.length b bs]
      rw [ih]
  | b =>
    induction bs with
    | nil =>
      unfold BitvecToBytes
      unfold BytesToBitvec
      simp_all only [List.length_nil, Nat.mul_zero, List.reverse_nil, List.zipIdx_nil, List.foldl_nil,
        BitVec.zero_ushiftRight, BitVec.toNat_ofNat, pow_zero, Nat.zero_mod, Nat.zero_and, List.range_zero,
        List.map_nil]
    | cons b bs ih =>
      have H := @BitvecToBytes_append_BE bs.length b bs
      rw [H]
      rw [ih]
      simp_all only [List.length_cons, forall_const, List.reverse_cons]


lemma BitvecToBytes_reconst {x : BitVec (ByteSize * n)} (e : Endian) :
  let bs := BitvecToBytes x e
  let ordered := match e with
    | Endian.l => bs
    | Endian.b => bs.reverse
  ordered.zipIdx.foldl (init := 0) (fun acc ⟨b, i⟩ => acc + (b.toNat <<< (ByteSize * i))) = x.toNat :=
  by
  unfold BitvecToBytes at *
  unfold ByteSize at *
  unfold Byte at *
  intro bs ordered
  simp_all only [Lean.Elab.WF.paramLet, BitVec.toNat_ushiftRight, BitVec.ofNat_and, ordered, bs]
  cases e with
  | l =>
    simp_all only
    sorry
  | b =>
    simp_all only [List.map_reverse, List.reverse_reverse]
    sorry


theorem bitvec_to_list_bytes_eq (x : BitVec (ByteSize * n)) (e : Endian) :
  BytesToBitvec (BitvecToBytes x e) e (by rw [BitvecToBytes_length]) = x :=
  by
  have H : (BytesToBitvec (BitvecToBytes x e) e (by rw [BitvecToBytes_length])) = BitVec.ofNat (ByteSize * n) x.toNat :=
    by
    have H1 := @BitvecToBytes_reconst n x e
    unfold BytesToBitvec at *
    unfold BitvecToBytes at *
    simp only
    simp_all only [BitVec.toNat_ushiftRight, BitVec.ofNat_and, BitVec.ofNat_toNat, BitVec.setWidth_eq]
  rw [H]
  simp_all only [BitVec.ofNat_toNat, BitVec.setWidth_eq]

Robin Arnez (Sep 19 2025 at 18:12):

What about

@[reducible] def byteSize : Nat := 8

@[reducible] def Byte := BitVec byteSize
  deriving DecidableEq

inductive Endian
  | l
  | b
  deriving DecidableEq

def bytesToBitvec {n : Nat} (bytes : List Byte) (e : Endian) (h : bytes.length = n) : BitVec (byteSize * n) :=
  match bytes with
  | [] => 0#_
  | a :: l =>
    match e with
    | .l => (bytesToBitvec l e rfl ++ a).cast (by grind)
    | .b => (a ++ bytesToBitvec l e rfl).cast (by grind)

def bitvecToBytes {n : Nat} (bv : BitVec (byteSize * n)) (e : Endian) : List Byte :=
  match e with
  | .l => List.ofFn (n := n) fun i => bv.extractLsb' (byteSize * i) _
  | .b => List.ofFn (n := n) fun i => bv.extractLsb' (byteSize * (n - i - 1)) _

@[simp]
theorem length_bitvecToBytes (bv : BitVec (byteSize * n)) (e : Endian) : (bitvecToBytes bv e).length = n := by
  cases e <;> simp [bitvecToBytes]

theorem bitvecToBytes_bytesToBitvec (l : List Byte) (e : Endian) : bitvecToBytes (bytesToBitvec l e rfl) e = l := by
  induction l with
  | nil => cases e <;> simp [bytesToBitvec, bitvecToBytes]
  | cons a l ih => cases e <;> simp_all [bitvecToBytes, bytesToBitvec] <;> grind

theorem bytesToBitvec_bitvecToBytes (bv : BitVec (byteSize * n)) (e : Endian) :
    bytesToBitvec (bitvecToBytes bv e) e (by simp) = bv := by
  induction n with
  | zero => apply BitVec.eq_of_zero_length; simp
  | succ k ih =>
    cases e
    · specialize ih (bv.extractLsb' byteSize (byteSize * k))
      simp_all [bitvecToBytes, List.ofFn_succ, bytesToBitvec]
      grind
    · specialize ih (bv.extractLsb' 0 (byteSize * k))
      simp_all [bitvecToBytes, List.ofFn_succ, bytesToBitvec]
      grind

Paula Neeley (Sep 19 2025 at 18:38):

Thanks, this code certainly looks much cleaner. Unfortunately, the grind actually doesn't work for me on bytesToBitvec_bitvecToBytes and bitvecToBytes_bytesToBitvec. Does it depend on what version of Lean I'm running? I'm using v4.22.0.

Last updated: Feb 28 2026 at 14:05 UTC