Zulip Chat Archive

I made #24997 to remove a duplicate lemma that I spotted. This made me wonder if this is a more general problem that can be tackled with automation, so I wrote a program to find pairs of lemmas that are duplicated of eachother. It takes into account that arguments can have different orders. Of course there are many cases where we want multiple names for the same lemma, but many cases also look accidental. In particular we can restrict it to just instances, since those don't have good reasons to be duplicates, and we can still find many duplicates.

Code

import Mathlib

open Lean Meta

def sortHyps (fvars : Array Expr) (e : Expr) : MetaM (Array (Expr × BinderInfo) × Expr) := do
  let n := fvars.size
  let fvars : Vector Expr n := fvars.toVector
  let mut types ← fvars.mapM (some <$> inferType ·)
  let mut e := e
  let mut result := #[]
  for h : j in [:n] do
    let mut ominIdx : Option (Fin n) := none
    let mut minType : Expr := default
    for h : i in [:n] do
      if let some type := types[i] then
        if !type.hasFVar then
          if ominIdx.isNone || type.quickLt minType then
            ominIdx := some ⟨i, by get_elem_tactic⟩
            minType := type
    let some minIdx := ominIdx | throwError "{types.toArray}"
    let fvar := fvars[minIdx]
    result := result.push (minType, ← fvar.fvarId!.getBinderInfo)
    types := types.set minIdx none
    let abst (e : Expr) := e.liftLooseBVars 0 1 |>.abstract #[fvar]
    types := types.map (Option.map abst)
    e := abst e
  return (result, e)

def runLinter : MetaM Unit := do
  _ ← (← getEnv).constants.map₁.foldM (init := ({} : Std.HashMap _ (Name × Expr))) fun (s : Std.HashMap (Array (Expr × BinderInfo) × Expr) (Name × Expr)) (name:Name) cinfo => do
    if name.isInternalDetail then return s
    -- let .thmInfo _ := cinfo | return s
    unless ← Option.isSome <$> isClass? cinfo.type do return s -- only considder type class instances
    if Linter.isDeprecated (← getEnv) name then return s
    if LazyDiscrTree.blacklistInsertion (← getEnv) name then return s
    let type ← instantiateTypeLevelParams cinfo.toConstantVal (cinfo.levelParams.map fun _ => 0)
    forallTelescope type fun fvars e => do
      let result ← sortHyps fvars e
      if let some (name', type') := s[result]? then
        logInfo m! "{name} : {cinfo.type}\n{name'} : {type'}"
        return s
      else
        return s.insert result (name, type)

run_meta runLinter

Jovan Gerbscheid (May 18 2025 at 17:49):

Here's the duplicate instances in lean core:

instLTUInt32 : LT UInt32
instLTUInt32_1 : LT UInt32

Lean.IR.UnreachableBranches.instToFormatValue : ToFormat IR.UnreachableBranches.Value
Lean.IR.UnreachableBranches.Value.instToFormat : ToFormat IR.UnreachableBranches.Value

String.leTrans : Trans (fun x1 x2 => x1 ≤ x2) (fun x1 x2 => x1 ≤ x2) fun x1 x2 => x1 ≤ x2
Char.leTrans : Trans (fun x1 x2 => x1 ≤ x2) (fun x1 x2 => x1 ≤ x2) fun x1 x2 => x1 ≤ x2

float32DecLe : (a b : Float32) → Decidable (a ≤ b)
Float32.decLe : (a b : Float32) → Decidable (a ≤ b)

instDecidableLtInt8 : (a b : Int8) → Decidable (a < b)
Int8.decLt : (a b : Int8) → Decidable (a < b)

UInt64.decLe : (a b : UInt64) → Decidable (a ≤ b)
instDecidableLeUInt64 : (a b : UInt64) → Decidable (a ≤ b)

String.leTotal : Std.Total fun x1 x2 => x1 ≤ x2
Char.leTotal : Std.Total fun x1 x2 => x1 ≤ x2

String.leRefl : Std.Refl fun x1 x2 => x1 ≤ x2
Char.leRefl : Std.Refl fun x1 x2 => x1 ≤ x2

ByteArray.instHashable : Hashable ByteArray
instHashableByteArray : Hashable ByteArray

Int16.decLe : (a b : Int16) → Decidable (a ≤ b)
instDecidableLeInt16 : (a b : Int16) → Decidable (a ≤ b)

instDecidableLtInt32 : (a b : Int32) → Decidable (a < b)
Int32.decLt : (a b : Int32) → Decidable (a < b)

instDecidableLtUInt64 : (a b : UInt64) → Decidable (a < b)
UInt64.decLt : (a b : UInt64) → Decidable (a < b)

Lean.Meta.Grind.instBEqOrigin : BEq Grind.Origin
Lean.Meta.Grind.instBEqOrigin_1 : BEq Grind.Origin

String.ltAsymm : Std.Asymm fun x1 x2 => x1 < x2
Char.ltAsymm : Std.Asymm fun x1 x2 => x1 < x2

instDecidableLeInt8 : (a b : Int8) → Decidable (a ≤ b)
Int8.decLe : (a b : Int8) → Decidable (a ≤ b)

instLEUInt32 : LE UInt32
instLEUInt32_1 : LE UInt32

ISize.decLe : (a b : ISize) → Decidable (a ≤ b)
instDecidableLeISize : (a b : ISize) → Decidable (a ≤ b)

Char.ltIrrefl : Std.Irrefl fun x1 x2 => x1 < x2
String.ltIrrefl : Std.Irrefl fun x1 x2 => x1 < x2

String.leAntisymm : Std.Antisymm fun x1 x2 => x1 ≤ x2
Char.leAntisymm : Std.Antisymm fun x1 x2 => x1 ≤ x2

instDecidableLeUInt32 : (a b : UInt32) → Decidable (a ≤ b)
UInt32.decLe : (a b : UInt32) → Decidable (a ≤ b)

Lean.IR.UnreachableBranches.instToStringValue : ToString IR.UnreachableBranches.Value
Lean.IR.UnreachableBranches.Value.instToString : ToString IR.UnreachableBranches.Value

Prod.lex : {α : Type u} → {β : Type v} → WellFoundedRelation α → WellFoundedRelation β → WellFoundedRelation (α × β)
Prod.rprod : {α β : Type} → WellFoundedRelation α → WellFoundedRelation β → WellFoundedRelation (α × β)

instDecidableLtUInt16 : (a b : UInt16) → Decidable (a < b)
UInt16.decLt : (a b : UInt16) → Decidable (a < b)

Int.instAssociativeNatMax : Std.Associative max
Nat.instAssociativeMax : Std.Associative max

Int16.decLt : (a b : Int16) → Decidable (a < b)
instDecidableLtInt16 : (a b : Int16) → Decidable (a < b)

Lean.ToLevel.max : [ToLevel] → [ToLevel] → ToLevel
Lean.ToLevel.imax : [ToLevel] → [ToLevel] → ToLevel

Std.Tactic.BVDecide.BVExpr.decEq : {w : Nat} → DecidableEq (Std.Tactic.BVDecide.BVExpr w)
Std.Tactic.BVDecide.BVExpr.instDecidableEq : {w : Nat} → DecidableEq (Std.Tactic.BVDecide.BVExpr w)

Lean.instReprMVarId_1 : Repr MVarId
Lean.instReprMVarId : Repr MVarId

floatDecLe : (a b : Float) → Decidable (a ≤ b)
Float.decLe : (a b : Float) → Decidable (a ≤ b)

UInt16.decLe : (a b : UInt16) → Decidable (a ≤ b)
instDecidableLeUInt16 : (a b : UInt16) → Decidable (a ≤ b)

Ord.arrayOrd : {α : Type u_1} → [a : Ord α] → Ord (Array α)
Array.instOrd : {α : Type} → [Ord α] → Ord (Array α)

instDecidableEqVector : {α : Type u_1} → {n : Nat} → [DecidableEq α] → DecidableEq (Vector α n)
Vector.instDecidableEq : {α : Type} → {n : Nat} → [DecidableEq α] → DecidableEq (Vector α n)

instDecidableLtUInt8 : (a b : UInt8) → Decidable (a < b)
UInt8.decLt : (a b : UInt8) → Decidable (a < b)

Lean.Grind.CommRing.instReprPower : Repr Grind.CommRing.Power
Lean.Meta.Grind.Arith.CommRing.instReprPower_lean : Repr Grind.CommRing.Power

instInhabitedProp : Inhabited Prop
instInhabitedSort : Inhabited Prop

instWellFoundedRelationOfSizeOf : {α : Sort u_1} → [SizeOf α] → WellFoundedRelation α
sizeOfWFRel : {α : Prop} → [SizeOf α] → WellFoundedRelation α

Int64.decLt : (a b : Int64) → Decidable (a < b)
instDecidableLtInt64 : (a b : Int64) → Decidable (a < b)

UInt8.decLe : (a b : UInt8) → Decidable (a ≤ b)
instDecidableLeUInt8 : (a b : UInt8) → Decidable (a ≤ b)

Lean.Meta.Grind.Arith.CommRing.instReprMon_lean : Repr Grind.CommRing.Mon
Lean.Grind.CommRing.instReprMon : Repr Grind.CommRing.Mon

instDecidableLeInt64 : (a b : Int64) → Decidable (a ≤ b)
Int64.decLe : (a b : Int64) → Decidable (a ≤ b)

Lean.JsonNumber.instLT : LT JsonNumber
Lean.JsonNumber.ltProp : LT JsonNumber

Int32.decLe : (a b : Int32) → Decidable (a ≤ b)
instDecidableLeInt32 : (a b : Int32) → Decidable (a ≤ b)

instDecidableEqSum : {α : Type u} → {β : Type v} → [DecidableEq α] → [DecidableEq β] → DecidableEq (α ⊕ β)
Sum.instDecidableEq : {α β : Type} → [DecidableEq α] → [DecidableEq β] → DecidableEq (α ⊕ β)

USize.decLt : (a b : USize) → Decidable (a < b)
instDecidableLtUSize : (a b : USize) → Decidable (a < b)

Lean.Widget.instTypeNameMessageData : TypeName MessageData
Lean.instTypeNameMessageData : TypeName MessageData

instInhabitedForInStep : {a : Type u_1} → [Inhabited a] → Inhabited (ForInStep a)
instInhabitedForInStep_1 : {a : Type} → [Inhabited a] → Inhabited (ForInStep a)

Int.instAssociativeNatMin : Std.Associative min
Nat.instAssociativeMin : Std.Associative min

float32DecLt : (a b : Float32) → Decidable (a < b)
Float32.decLt : (a b : Float32) → Decidable (a < b)

Exists.nonempty : ∀ {α : Sort u_1} {p : α → Prop}, (∃ x, p x) → Nonempty α
nonempty_of_exists : ∀ {α : Prop} {p : α → Prop}, (∃ x, p x) → Nonempty α

Float.decLt : (a b : Float) → Decidable (a < b)
floatDecLt : (a b : Float) → Decidable (a < b)

UInt32.decLt : (a b : UInt32) → Decidable (a < b)
instDecidableLtUInt32 : (a b : UInt32) → Decidable (a < b)

instDecidableLtISize : (a b : ISize) → Decidable (a < b)
ISize.decLt : (a b : ISize) → Decidable (a < b)

instDecidableLeUSize : (a b : USize) → Decidable (a ≤ b)
USize.decLe : (a b : USize) → Decidable (a ≤ b)

Jovan Gerbscheid (May 18 2025 at 17:51):

But it also flags some other suspicious lemmas, e.g.

Int8.not_eq_neg_add : ∀ (a : Int8), ~~~a = -a - 1
Array.not_lt_iff_ge {α : Type u_1} [LT α] {l₁ l₂ : List α} : ¬l₁ < l₂ ↔ l₂ ≤ l₁

Michael Rothgang (May 18 2025 at 18:58):

Nice find! If you post a list of duplicate mathlib lemmas (that are not aliasses for discoverability --- is that easy to automate or do we just need to check by hand), I'm sure people with be happy to take a look!

Yakov Pechersky (May 18 2025 at 19:12):

And also not cases where we switched argument type from explicit to implicit, like after defining a structure.

Jovan Gerbscheid (May 18 2025 at 19:37):

I noticed a lot of rfl/refl variations were flagged as the same theorem, so I made it be aware of argument implicitness.

Which kind of changing implicitness do you mean?

Eric Wieser (May 18 2025 at 20:16):

I've sent a PR for docs#Array.not_lt_iff_ge in lean4#8392

Eric Wieser (May 19 2025 at 01:17):

docs#Int.instAssociativeNatMin (and Max) looks like a typo too

Kim Morrison (May 19 2025 at 01:29):

@Jovan Gerbscheid, thanks for this list! I've cleaned up nearly all of it in lean#8397

Kim Morrison (May 19 2025 at 01:29):

I'll note three false positives:

import Lean

#check instInhabitedProp
#check instInhabitedSort

#check Prod.lex
#check Prod.rprod

#check Lean.ToLevel.max
#check Lean.ToLevel.imax

Kim Morrison (May 19 2025 at 01:29):

Finally, @Cameron Zwarich it seems the following pairs are genuinely different implementations: could I ask you to look at these and decide if anything needs cleaning up?

import Lean

#check Lean.IR.UnreachableBranches.instToFormatValue
#check Lean.IR.UnreachableBranches.Value.instToFormat

#check Lean.IR.UnreachableBranches.instToStringValue
#check Lean.IR.UnreachableBranches.Value.instToString

Cameron Zwarich (May 19 2025 at 01:41):

@Kim Morrison I'll take a look and remove one of the pairs.

Yury G. Kudryashov (May 19 2025 at 03:52):

This pair probably can't be found by automation, since it isn't an exact match, but we should deduplicate it: docs#MonoidHom.ofLeftInverse (I guess, it was accidentally moved to the MonoidHom namespace during some code move) and docs#MulEquiv.ofLeftInverse

Jovan Gerbscheid (May 19 2025 at 08:56):

For instInhabitedProp and instInhabitedSort, I'd say instInhabitedProp is redundant. Or do we want the default value in Prop to be True (and give that instance a higher priority)?

Jovan Gerbscheid (May 19 2025 at 09:29):

Here is a list of duplicate theorems that were found in lean core. I filtered out the type class instances, and theorems that are proved directly from another theorem.
duplicate theorems Lean core.txt

A very suspicious lemma is

UInt16.toFin_mod : ∀ (a b : UInt8), (a % b).toFin = a.toFin % b.toFin

Jovan Gerbscheid (May 19 2025 at 10:55):

Int.ofNat_ne_zero : ∀ {n : ℕ}, ↑n ≠ 0 ↔ n ≠ 0

also seems wrong, as it is about Nat.cast instead of ofNat, so it's the same as Int.natCast_ne_zero

Eric Wieser (May 19 2025 at 10:57):

I think the answer there is delete the lemma, Int.ofNat isn't supposed to be mentioned in any lemmas besides the one that translates it to Nat.cast IMO

Eric Wieser (May 19 2025 at 10:57):

I think a lot of lemmas got left with the old wrong name when the coercion was changed

Jovan Gerbscheid (May 19 2025 at 11:14):

Here's a list of lemmas that appear duplicate in Mathlib:
duplicate_theorems_mathlib.txt

For example

Nimber.bot_eq_zero : ⊥ = 0

is about Nat :sweat_smile:

Eric Wieser (May 19 2025 at 11:16):

Maybe we should have a linter that complains about theorem Foo.bar : T if T makes no mention of Foo

Jovan Gerbscheid (May 19 2025 at 11:17):

And I presume it would ignore the case where the namespace isn't a type.

Jovan Gerbscheid (May 19 2025 at 11:19):

Or actually, if the namespace isn't a constant. I kind of assumed it would always be a type.

Kevin Buzzard (May 19 2025 at 11:44):

Here's a gist with Jovan's list of hundreds of duplicated mathlib lemmas (it's less annoying to read that way on e.g. mobile)

Kevin Buzzard (May 19 2025 at 11:46):

Quotient.ind₂' and Quotient.inductionOn₂' have argument inputs in different order and I suspect that the duplication here is intentional?

Kevin Buzzard (May 19 2025 at 11:47):

(off-topic: a regex expert might want to try and figure out why docs#Quotient.ind₂' and docs#Quotient.inductionOn₂' still don't work)

Damiano Testa (May 19 2025 at 11:55):

Where are the linkifiers?

Kevin Buzzard (May 19 2025 at 11:57):

Settings (cog in top right) -> Organization Settings -> Linkifiers

Jovan Gerbscheid (May 19 2025 at 11:57):

For example LinearOrder.max_def and max_def are flagged as duplicate, because one is proved from the other by a rw tactic (instead of a literal application like in an exact tactic), so it doesn't get filtered out.

Kevin Buzzard (May 19 2025 at 11:59):

This thread should be called "naming convention quiz"!

Subtype.val_prop : ∀ {α : Type u_1} {S : Set α} (a : { a // a ∈ S }), ↑a ∈ S
Subtype.coe_prop : ∀ {α : Type u_1} {S : Set α} (a : { a // a ∈ S }), ↑a ∈ S

Int.IsUnit.natAbs_eq : ∀ {u : ℤ}, IsUnit u → u.natAbs = 1
Int.natAbs_of_isUnit : ∀ {u : ℤ}, IsUnit u → u.natAbs = 1

Multiset.sub_add_cancel : ∀ {α : Type u_1} [inst : DecidableEq α] {s t : Multiset α}, t ≤ s → s - t + t = s
Multiset.add_sub_cancel : ∀ {α : Type u_1} [inst : DecidableEq α] {s t : Multiset α}, t ≤ s → s - t + t = s

Multiset.ext'_iff : ∀ {α : Type u_1} [inst : DecidableEq α] {s t : Multiset α},
  s = t ↔ ∀ (a : α), Multiset.count a s = Multiset.count a t
Multiset.ext : ∀ {α : Type u_1} [inst : DecidableEq α] {s t : Multiset α},
  s = t ↔ ∀ (a : α), Multiset.count a s = Multiset.count a t

mul_le_mul_of_nonneg_of_nonpos' : ∀ {R : Type u} [inst : Semiring R] [inst_1 : Preorder R] {a b c d : R}
  [ExistsAddOfLE R] [PosMulMono R] [MulPosMono R] [AddRightMono R] [AddRightReflectLE R],
  c ≤ a → b ≤ d → 0 ≤ a → d ≤ 0 → a * b ≤ c * d
mul_le_mul_of_nonpos_of_nonneg' : ∀ {R : Type u} [inst : Semiring R] [inst_1 : Preorder R] {a b c d : R}
  [ExistsAddOfLE R] [PosMulMono R] [MulPosMono R] [AddRightMono R] [AddRightReflectLE R],
  c ≤ a → b ≤ d → 0 ≤ a → d ≤ 0 → a * b ≤ c * d

etc

Edward van de Meent (May 19 2025 at 12:13):

I have no preference, and don't know if mathlib does
i suspect that Int.IsUnit.natAbs_eq is better (~~due to allowing dot notation~~ for that it should be _root_.IsUnit., as yaël points out), modulo ending on eq. (i have no issues with this, but i recall reading some people do?)
clearly Multiset.add_sub_cancel is wrong
Multiset.ext should be just the right to left implication.
mul_le_mul_of_nonpos_of_nonneg' is wrong, the "of" bits are supposed to be in the same order in the name as they are in the arguments.

Jovan Gerbscheid (May 19 2025 at 12:19):

The first one is about membership in a set, so shouldn't it be called Subtype.val_mem? Or rather, it should be generalized to a general predicate p : α → Prop.

Andrew Yang (May 19 2025 at 12:20):

I think it is fine if lemmas are duplicated for discoverability reasons. As long as the proofs are not duplicated (one is an alias of the other) and they are Prop-valued.

Yaël Dillies (May 19 2025 at 12:22):

Edward van de Meent said:

i suspect that Int.IsUnit.natAbs_eq is better (due to allowing dot notation), modulo ending on eq. (i have no issues with this, but i recall reading some people do?)

I actually think the exact opposite. In fact, it looks like it got misported: mathport didn't record _root_

Jovan Gerbscheid (May 19 2025 at 12:22):

Indeed, but this list has filtered out all theorems that are proven by applying another theorem

Jovan Gerbscheid (Jun 03 2025 at 22:42):

I got back to this and made a PR that makes a start at removing all duplicate instances in Mathlib: #25410

Jovan Gerbscheid (Jun 03 2025 at 22:52):

There's also the file Mathlib.Algebra.Small.Ring, which consists only of instances that already exist.

Ruben Van de Velde (Jun 04 2025 at 00:10):

Nice. Note that you have some lint errors

Bhavik Mehta (Aug 21 2025 at 02:12):

I've ran into quite a lot of duplicate declarations in mathlib recently, is it possible to run this somewhat more regularly so they can be spotted and resolved? Or even running it on PRs to detect duplicates?
The one I just found is:
image.png
but I think I've seen 3 or 4 duplicates in mathlib in the last week or so

Bhavik Mehta (Aug 21 2025 at 02:16):

Another example:
image.png

Ruben Van de Velde (Aug 21 2025 at 06:33):

Seems hard for definitions

Kenny Lau (Aug 21 2025 at 07:22):

we would need to make expressions hashable to run this in linear time

Jovan Gerbscheid (Aug 21 2025 at 07:52):

The program I wrote should do a reasonable job at this. Feel free to pick up this project where I left it :). The problem is that there are 100s of duplicates, and it's a lot of work going through all of them.

Eric Wieser (Aug 21 2025 at 08:56):

docs#Function.Embedding.oneEmbeddingEquiv seems like a bad name (Edit: added in #23386)

Ruben Van de Velde (Aug 21 2025 at 11:53):

Bhavik Mehta said:

Another example:
image.png

#28732

Violeta Hernández (Aug 23 2025 at 19:56):

Found another pair (which is my fault!) docs#Ordinal.deriv_eq_id_of_nfp_eq_id and docs#Ordinal.deriv_id_of_nfp_id

Violeta Hernández (Aug 23 2025 at 19:57):

I'll fix that as part of #28743

Thomas Murrills (Aug 23 2025 at 20:07):

Bhavik Mehta said:

I've ran into quite a lot of duplicate declarations in mathlib recently, is it possible to run this somewhat more regularly so they can be spotted and resolved? Or even running it on PRs to detect duplicates?

Once we get rid of existing ones, we could just have automation loogle all new declarations as part of the PR process, right? (Presumably “directly”, using loogle’s backend, of course; modulo details to ensure matches have exactly the same set of arguments, allow for defs, etc.)

Jovan Gerbscheid (Oct 24 2025 at 16:03):

Here's an up to date list:
252 duplicate instances: https://gist.github.com/JovanGerb/66286dd2ec00a70acc8e302f5cb225fd
1441 duplicate theorems: https://gist.github.com/JovanGerb/f015b098102210136f25b94376924e05

Sébastien Gouëzel (Oct 24 2025 at 16:05):

Just browsing randomly, I see

UInt16.toFin_mod : ∀ (a b : UInt8), (a % b).toFin = a.toFin % b.toFin

How many bugs to uncover in there?

Bhavik Mehta (Oct 24 2025 at 17:06):

Jovan Gerbscheid said:

Here's an up to date list:
252 duplicate instances: https://gist.github.com/JovanGerb/66286dd2ec00a70acc8e302f5cb225fd
1441 duplicate theorems: https://gist.github.com/JovanGerb/f015b098102210136f25b94376924e05

I think some of the ones in this list aren't correctly printing their universe variables, eg the second of the pair in the first and third are printed as being in Type rather than being universe polymorphic

Ruben Van de Velde (Oct 24 2025 at 18:39):

One down: #30862

Ruben Van de Velde (Oct 24 2025 at 18:45):

Do these count?

Ordinal.lift_uzero : ∀ (a : Ordinal.{u}), Ordinal.lift.{0, u} a = a
Ordinal.lift_id' : ∀ (a : Ordinal.{0}), Ordinal.lift.{0, 0} a = a

Bhavik Mehta (Oct 24 2025 at 18:47):

No, Ordinal.lift_id' has more general universe variables and so it can't be simp, but Ordinal.lift_uzero is simp

Ruben Van de Velde (Oct 24 2025 at 18:49):

Another fun one:

Set.Nontrivial.offDiag_nonempty : ∀ {α : Type u_1} {s : Set α}, s.Nontrivial → s.offDiag.Nonempty
Set.Subsingleton.offDiag_eq_empty : ∀ {α : Type} {s : Set α}, s.Nontrivial → s.offDiag.Nonempty

Bhavik Mehta (Oct 24 2025 at 18:52):

Is it possible to filter out those which are aliases already? eg docs#le_antisymm and docs#LE.le.antisymm is an "expected" duplicate, since the latter is easy for dot notation whereas the former is easy when dot notation isn't available

Ruben Van de Velde (Oct 24 2025 at 18:53):

#30863

Ruben Van de Velde (Oct 24 2025 at 18:54):

Also, it would be nice to see or filter out the ones that are in core, as well as the ones in tactic-specific namespaces (like for grind or linear_combination)

Ruben Van de Velde (Oct 24 2025 at 18:57):

Ruben Van de Velde said:

Another fun one:

Set.Nontrivial.offDiag_nonempty : ∀ {α : Type u_1} {s : Set α}, s.Nontrivial → s.offDiag.Nonempty
Set.Subsingleton.offDiag_eq_empty : ∀ {α : Type} {s : Set α}, s.Nontrivial → s.offDiag.Nonempty

As the emoji reaction indicates, this one does date back to Yaël (three years ago, in mathlib3#16803)

Ruben Van de Velde (Oct 24 2025 at 18:59):

IsDenseInducing.toIsInducing : ∀ {α : Type u_1} {β : Type u_2} [inst : TopologicalSpace α] [inst_1 : TopologicalSpace β]
  {i : α → β}, IsDenseInducing i → Topology.IsInducing i
IsDenseInducing.isInducing : ∀ {α β : Type} [inst : TopologicalSpace α] [inst_1 : TopologicalSpace β] {i : α → β},
  IsDenseInducing i → Topology.IsInducing i

the first one is an auto-generated projection to the parent class

Ruben Van de Velde (Oct 24 2025 at 19:04):

#30864

Ruben Van de Velde (Oct 24 2025 at 19:11):

#30865

(Might not be the most efficient approach, but more at once is too daunting for me :))

Andrew Yang (Oct 24 2025 at 20:15):

There's also

Algebra.algHom (R : Type u_1) (A : Type u_2) (B : Type u_3) [CommSemiring R] [CommSemiring A]
  [Semiring B] [Algebra R A] [Algebra R B] [Algebra A B] [IsScalarTower R A B] : A →ₐ[R] B
IsScalarTower.toAlgHom (R : Type u) (S : Type v) (A : Type w) [CommSemiring R] [CommSemiring S]
  [Semiring A] [Algebra R S] [Algebra S A] [Algebra R A] [IsScalarTower R S A] : S →ₐ[R] A

Ruben Van de Velde (Oct 24 2025 at 21:21):

#30868

Jovan Gerbscheid (Oct 25 2025 at 13:53):

Here's an updated list of duplicate theorems: It filters out aliases and metaprogramming theorems.

620 duplicate theorems: https://gist.github.com/JovanGerb/eda537d9748895a28bc0f8527618d293

Aaron Liu (Oct 25 2025 at 13:57):

how do you filter out metaprogramming theorems

Aaron Liu (Oct 25 2025 at 13:57):

immediately I see

forall_congr : ∀ {α : Sort u} {p q : α → Prop}, (∀ (a : α), p a = q a) → (∀ (a : α), p a) = ∀ (a : α), q a
pi_congr : ∀ {α : Sort u} {β β' : α → Sort v}, (∀ (a : α), β a = β' a) → ((a : α) → β a) = ((a : α) → β' a)

so there's also something about universes

Jovan Gerbscheid (Oct 25 2025 at 13:58):

I instantiate all the universes with 0 before comparing theorems

Aaron Liu (Oct 25 2025 at 13:59):

so something stated for Sort u and something stated for Type u won't be unified?

Jovan Gerbscheid (Oct 25 2025 at 13:59):

Unfortunately not :(

Jovan Gerbscheid (Oct 25 2025 at 14:00):

Here is my current code

Code

import Mathlib

open Lean Meta

def sortHyps (fvars : Array Expr) (e : Expr) : MetaM (Array (Expr × BinderInfo) × Expr) := do
  let n := fvars.size
  let fvars : Vector Expr n := fvars.toVector
  let mut types ← fvars.mapM (some <$> inferType ·)
  let mut e := e
  let mut result := #[]
  for h : j in [:n] do
    let mut ominIdx : Option (Fin n) := none
    let mut minType : Expr := default
    for h : i in [:n] do
      if let some type := types[i] then
        if !type.hasFVar then
          if ominIdx.isNone || type.quickLt minType then
            ominIdx := some ⟨i, by get_elem_tactic⟩
            minType := type
    let some minIdx := ominIdx | throwError "{types.toArray}"
    let fvar := fvars[minIdx]
    result := result.push (minType, ← fvar.fvarId!.getBinderInfo)
    types := types.set minIdx none
    let abst (e : Expr) := e.liftLooseBVars 0 1 |>.abstract #[fvar]
    types := types.map (Option.map abst)
    e := abst e
  return (result, e)

def runLinter : MetaM (Array (Name × Name)) := do
  let env ← getEnv
  let mut s : Std.HashMap (Array (Expr × BinderInfo) × Expr) (Name × Expr) := {}
  let mut dups := #[]
  for (name, cinfo) in env.constants.map₁ do
    if name.isInternalDetail || name.isMetaprogramming || !allowCompletion env name || Linter.isDeprecated env name then continue -- filter out bad constants
    if let some v := cinfo.value? then if let .const .. := v.eta then continue -- filter out aliases
    let .thmInfo _ := cinfo | continue -- only consider theorems
    -- unless ← Option.isSome <$> isClass? cinfo.type do continue -- only considder type class instances
    let type ← instantiateTypeLevelParams cinfo.toConstantVal (cinfo.levelParams.map fun _ => 0)
    (s, dups) ← forallTelescope type fun fvars e => do
      let result ← sortHyps fvars e
      if let some (name', _) := s[result]? then
        return (s, dups.push (name, name'))
      else
        return (s.insert result (name, type), dups)
  return dups

def printResults : MetaM MessageData := do
  let dups ← runLinter
  let mut m := m!"Number of duplicates: {dups.size}"
  for (a, b) in dups do
    m := m!"{m}\n\n{a} : {(← getConstInfo a).type}\n{b} : {(← getConstInfo b).type}"
  return m

set_option maxHeartbeats 10000000000000
run_meta do
  logInfo m!"{← printResults}"

Aaron Liu (Oct 25 2025 at 14:01):

what if you instantiate them with mvars

Aaron Liu (Oct 25 2025 at 14:01):

or how does universes work

Aaron Liu (Oct 25 2025 at 14:02):

universes seems a lot harder to work with than local hyps is from my experience

Jovan Gerbscheid (Oct 25 2025 at 14:02):

The current approach for comparison is by looking up in a HashMap. I don't think it is feasible to do pairwise unifications/comparisons of all mathlib theorems.

Jovan Gerbscheid (Oct 25 2025 at 14:03):

But it should be possible to go into the expressions and manually replace every single Sort _ with Sort 0.

Aaron Liu (Oct 25 2025 at 14:09):

oh there aren't that many mathlib theorems surely

Aaron Liu (Oct 25 2025 at 14:09):

oh wait pairwise nvm

Jovan Gerbscheid (Oct 25 2025 at 14:34):

Ok, I've now added code for erasing universes from the theorems. It is quite a bit slower, but it found 8 extra duplicates. I've updated the above gist to have 628 duplicate theorems.

Bryan Gin-ge Chen (Oct 25 2025 at 14:37):

If it's something we want, it wouldn't be hard to add a workflow which runs this and posts a weekly report on Zulip; it might even be possible to adapt some of the deprecation auto-removal infrastructure to generate PRs automatically from this too? cc: @Damiano Testa

Jovan Gerbscheid (Oct 25 2025 at 14:38):

I'm using the infoview to run the code, but it has this stupid property that if I run logInfo "hello world", and ctrl+a ctrl+c in the infoview, it copies the content three times over?? Hence why the gists are 3x as long as they should be :sweat_smile:

Aaron Liu (Oct 25 2025 at 14:41):

You can write it to a file

Jovan Gerbscheid (Oct 25 2025 at 14:41):

Ah sorry, I meant to say that I use Lean web.

Aaron Liu (Oct 25 2025 at 14:41):

This is how I was able to handle 6000 lines of output without crashing the infoview

Aaron Liu (Oct 25 2025 at 14:42):

Jovan Gerbscheid (Oct 25 2025 at 14:42):

The bug doesn't appear locally

Aaron Liu (Oct 25 2025 at 14:42):

yeah that makes more sense

Bryan Gin-ge Chen (Oct 25 2025 at 14:43):

I guess it's just because the message appears in "Messages" and "All messages" as well?

Jovan Gerbscheid (Oct 25 2025 at 14:44):

Even when my cursor is somewhere else (so that the messages can only come from "All messages") I get the 3x duplicate copying

Jovan Gerbscheid (Oct 25 2025 at 14:45):

I get this on my clip-board:

MathlibDemo.lean:11:0
No info found.
MathlibDemo.lean:11:0
No info found.
All Messages (1)
MathlibDemo.lean:4:0

hello world

MathlibDemo.lean:4:0

hello world

All Messages (1)
MathlibDemo.lean:4:0

hello world

Jovan Gerbscheid (Oct 25 2025 at 14:45):

It could be nice to have a weekly report on the duplicate declarations. What do you think the report should include? The amount of duplicates is maybe still a bit too large to show in a Zulip message.

But I don't think a bot should make PRs to remove duplicates automatically.

Bryan Gin-ge Chen (Oct 25 2025 at 14:46):

Ah, right. A bot is probably infeasible since we'd have to decide which to keep.

Jovan Gerbscheid (Oct 25 2025 at 14:47):

Unless we get a bot that fully knows the naming conventions :)

Bryan Gin-ge Chen (Oct 25 2025 at 14:48):

It should be possible for bots to post gists, so we could post a summary (number of duplicates? what else?) and then link to the full report.

Jovan Gerbscheid (Oct 25 2025 at 14:49):

Btw the universe-erasing version found this one:

Subtype.eq : ∀ {α : Type u} {p : α → Prop} {a1 a2 : { x // p x }}, ↑a1 = ↑a2 → a1 = a2
Subtype.ext : ∀ {α : Sort u} {p : α → Prop} {a1 a2 : { x // p x }}, ↑a1 = ↑a2 → a1 = a2

Aaron Liu (Oct 25 2025 at 14:50):

one of these is not maximally universe general

Aaron Liu (Oct 25 2025 at 14:52):

but I wonder if in specific cases it could sometimes be useful to not be maximally universe general

Aaron Liu (Oct 25 2025 at 14:52):

like to help unification

Jovan Gerbscheid (Oct 25 2025 at 14:53):

I would hope not :upside_down:

Aaron Liu (Oct 25 2025 at 14:57):

well, theoretically most of the things stated for Sort (u + 1) (like docs#List) could also be stated for Sort (max 1 u)

Aaron Liu (Oct 25 2025 at 14:58):

this is technically more universe general

Jovan Gerbscheid (Oct 25 2025 at 15:04):

Oh I forgot that Subtype.{u} has type Sort (max 1 u)

Jovan Gerbscheid (Oct 25 2025 at 15:04):

That is so silly

Aaron Liu (Oct 25 2025 at 15:05):

oh that's so silly

Aaron Liu (Oct 25 2025 at 15:09):

the other option is also kind of silly though

set_option genInjectivity false in
set_option genSizeOf false in
set_option bootstrap.inductiveCheckResultingUniverse false in
inductive MySubtype {α : Sort u} (p : α → Prop) : Sort u where
  | mk (val : α) (property : p val) : MySubtype p

you can still get the value out and you can still prove injectivity

Jovan Gerbscheid (Oct 25 2025 at 15:10):

Nice!

Aaron Liu (Oct 25 2025 at 15:10):

but it's really a pain to set it all up manually

Jovan Gerbscheid (Oct 25 2025 at 15:10):

Sure, but this only needs to be done once in core

Aaron Liu (Oct 25 2025 at 15:12):

there's like 12 things here

import Mathlib.Util.WhatsNew

whatsnew in
structure MySubtype {α : Sort u} (p : α → Prop) : Sort (max 1 u) where
  val : α
  property : p val

and we also need to overwrite the default recursors so that we can eliminate into Sort instead of Prop

Aaron Liu (Oct 25 2025 at 15:13):

and all the environment extensions

Aaron Liu (Oct 25 2025 at 15:14):

oh and the same trick works for docs#PProd

Aaron Liu (Oct 25 2025 at 15:14):

and docs#PSigma

Jovan Gerbscheid (Oct 25 2025 at 15:45):

Here's the list of 353 duplicate theorems in just Mathlib

https://gist.github.com/JovanGerb/9167b17b8a2a588cc83e0e397d37b562

Jovan Gerbscheid (Oct 25 2025 at 15:57):

Let me join in on the fun: #30883

Jovan Gerbscheid (Oct 25 2025 at 16:06):

#30884

Jovan Gerbscheid (Oct 25 2025 at 16:32):

It seems Mathlib can't decide whether to spell it as LeOne or LEOne:

RankLeOneStruct, NormLeOne, LeOnePart VS ZeroLEOneClass, Ring.DimensionLEOne

Michael Rothgang (Oct 25 2025 at 16:33):

New style guide addition, after coming to consensus?

Snir Broshi (Oct 25 2025 at 17:12):

docs#SimpleGraph.Walk.mapLe vs docs#OrderEmbedding.ofMapLEIff

Chris Henson (Oct 25 2025 at 21:22):

Jovan Gerbscheid said:

It could be nice to have a weekly report on the duplicate declarations. What do you think the report should include? The amount of duplicates is maybe still a bit too large to show in a Zulip message.

We could also help prevent new duplicates by checking declarations added in a PR. Seems related to making the declarations diff a metaprogram that respects namespaces. (Which I am still planning to work on soon...)

Jovan Gerbscheid (Oct 25 2025 at 22:02):

One nice thing about the declarations diff in its current form is that it works even when CI hasn't passed yet. But I agree it would be better if it ran in Lean, and had direct access to the environments before and after. Then we could also check if there are any new duplicate declarations.

Kim Morrison (Oct 25 2025 at 23:12):

@Jovan Gerbscheid (or anyone else up to speed on running this code), could I please have a list of duplicates where one or more of the theorems is defined in lean4 itself?

Jovan Gerbscheid (Oct 25 2025 at 23:17):

Here's 158 duplicate theorems where both duplicates are in lean4

https://gist.github.com/JovanGerb/99daa102ec8648707e15156fb05ec231

Jovan Gerbscheid (Oct 25 2025 at 23:21):

And here are 27 duplicate non-theorem instances

Number of duplicates: 27

instInhabitedProp : Inhabited Prop
instInhabitedSort : Inhabited (Sort u)

Std.instMaxSubtypeOfMaxEqOr : {α : Type u} → [inst : Max α] → [Std.MaxEqOr α] → {P : α → Prop} → Max (Subtype P)
Subtype.instMax : {α : Type u} → [inst : Max α] → [Std.MaxEqOr α] → {P : α → Prop} → Max (Subtype P)

String.instAppend : Append String
instAppendString : Append String

String.instInhabited_1 : Inhabited String
String.instInhabited : Inhabited String

Prod.rprod : {α : Type u} → {β : Type v} → WellFoundedRelation α → WellFoundedRelation β → WellFoundedRelation (α × β)
Prod.lex : {α : Type u} → {β : Type v} → WellFoundedRelation α → WellFoundedRelation β → WellFoundedRelation (α × β)

Std.DTreeMap.instCoeTypeForall_2 : {α : Type u} → Coe (Type v) (α → Type v)
Std.DTreeMap.instCoeTypeForall_1 : {α : Type u} → Coe (Type v) (α → Type v)

Std.DTreeMap.instCoeTypeForall : {α : Type u} → Coe (Type v) (α → Type v)
Std.DTreeMap.instCoeTypeForall_1 : {α : Type u} → Coe (Type v) (α → Type v)

Std.DTreeMap.Internal.Impl.instCoeTypeForall_2 : {α : Type u} → Coe (Type v) (α → Type v)
Std.DTreeMap.instCoeTypeForall_1 : {α : Type u} → Coe (Type v) (α → Type v)

Std.DTreeMap.Internal.Impl.Const.instCoeTypeForall : {α : Type u} → Coe (Type v) (α → Type v)
Std.DTreeMap.instCoeTypeForall_1 : {α : Type u} → Coe (Type v) (α → Type v)

Std.DTreeMap.Internal.Impl.instCoeTypeForall : {α : Type u} → Coe (Type v) (α → Type v)
Std.DTreeMap.instCoeTypeForall_1 : {α : Type u} → Coe (Type v) (α → Type v)

Std.DTreeMap.Internal.instCoeTypeForall_std : {α : Type u} → Coe (Type v) (α → Type v)
Std.DTreeMap.instCoeTypeForall_1 : {α : Type u} → Coe (Type v) (α → Type v)

Std.DTreeMap.Internal.Impl.instCoeTypeForall_1 : {α : Type u} → Coe (Type v) (α → Type v)
Std.DTreeMap.instCoeTypeForall_1 : {α : Type u} → Coe (Type v) (α → Type v)

Std.DTreeMap.instCoeTypeForall_3 : {α : Type u} → Coe (Type v) (α → Type v)
Std.DTreeMap.instCoeTypeForall_1 : {α : Type u} → Coe (Type v) (α → Type v)

Std.DTreeMap.Raw.instCoeTypeForall : {α : Type u} → Coe (Type v) (α → Type v)
Std.DTreeMap.instCoeTypeForall_1 : {α : Type u} → Coe (Type v) (α → Type v)

Std.DTreeMap.instCoeTypeForall_1 : {α : Type u} → Coe (Type v) (α → Type v)
Std.DTreeMap.Raw.instCoeTypeForall_1 : {α : Type u} → Coe (Type v) (α → Type v)

Std.DTreeMap.instCoeTypeForall_1 : {α : Type u} → Coe (Type v) (α → Type v)
Std.DTreeMap.Raw.WF.instCoeTypeForall : {α : Type u} → Coe (Type v) (α → Type v)

Std.DTreeMap.instCoeTypeForall_1 : {α : Type u} → Coe (Type v) (α → Type v)
Std.ExtDTreeMap.instCoeTypeForall : {α : Type u} → Coe (Type v) (α → Type v)

Std.DTreeMap.instCoeTypeForall_1 : {α : Type u} → Coe (Type v) (α → Type v)
Std.ExtDTreeMap.instCoeTypeForall_1 : {α : Type u} → Coe (Type v) (α → Type v)

Std.Internal.IO.Async.EAsync.instMonadLiftEIO : {ε : Type} → MonadLift (EIO ε) (Std.Internal.IO.Async.EAsync ε)
Std.Internal.IO.Async.EAsync.instMonadLiftEIO_1 : {ε : Type} → MonadLift (EIO ε) (Std.Internal.IO.Async.EAsync ε)

Std.Internal.IO.Async.instMonadAwaitStateTOfMonad_1 : {m t : Type → Type} →
  {s : Type} → [Monad m] → [Std.Internal.IO.Async.MonadAwait t m] → Std.Internal.IO.Async.MonadAwait t (StateT s m)
Std.Internal.IO.Async.instMonadAwaitStateTOfMonad : {m t : Type → Type} →
  {n : Type} → [Monad m] → [Std.Internal.IO.Async.MonadAwait t m] → Std.Internal.IO.Async.MonadAwait t (StateT n m)

Std.Time.instInhabitedPlainDate : Inhabited Std.Time.PlainDate
Std.Time.PlainDate.instInhabited : Inhabited Std.Time.PlainDate

Std.Time.instReprPlainDate : Repr Std.Time.PlainDate
Std.Time.PlainDate.instRepr : Repr Std.Time.PlainDate

Std.Time.instReprPlainDateTime : Repr Std.Time.PlainDateTime
Std.Time.PlainDateTime.instRepr : Repr Std.Time.PlainDateTime

Std.Time.instReprTimestamp_1 : Repr Std.Time.Timestamp
Std.Time.instReprTimestamp : Repr Std.Time.Timestamp

Std.Time.instReprDuration : Repr Std.Time.Duration
Std.Time.instReprDuration_1 : Repr Std.Time.Duration

Std.Time.PlainTime.instRepr : Repr Std.Time.PlainTime
Std.Time.instReprPlainTime : Repr Std.Time.PlainTime

Std.Time.Internal.UnitVal.instLE : {α : Rat} → LE (Std.Time.Internal.UnitVal α)
Std.Time.Internal.instLEUnitVal : {x : Rat} → LE (Std.Time.Internal.UnitVal x)

Kim Morrison (Oct 26 2025 at 06:55):

Jovan Gerbscheid said:

Here's 158 duplicate theorems where both duplicates are in lean4

https://gist.github.com/JovanGerb/99daa102ec8648707e15156fb05ec231

Outside of the Int and Nat namespaces, I've now mostly taken care of these. (And delegated a few parts to others.) We should re-run this in a while and do a second round.

Ruben Van de Velde (Oct 26 2025 at 11:04):

#30922

Ruben Van de Velde (Oct 26 2025 at 11:13):

And #30923

Jovan Gerbscheid (Oct 26 2025 at 11:28):

#30924 golfs a proof in order to convince the linter that max_def and LinearOrder.max_def are aliases.

Snir Broshi (Oct 27 2025 at 01:31):

Jovan Gerbscheid said:

Here's 158 duplicate theorems where both duplicates are in lean4

https://gist.github.com/JovanGerb/99daa102ec8648707e15156fb05ec231

There's a duplicate that differs by an Eq.symm: docs#Nat.le.dest and docs#Nat.exists_eq_add_of_le

Snir Broshi (Oct 27 2025 at 01:34):

Mathlib duplicate of a lean4 theorem with different variable order/optionality: docs#List.getD_eq_getD_getElem? and docs#List.getD_eq_getElem?_getD
I haven't found this duplicate in any list linked above, but I might've missed it somewhere

Jovan Gerbscheid (Oct 27 2025 at 02:21):

Snir Broshi said:

Mathlib duplicate of a lean4 theorem with different variable order/optionality: docs#List.getD_eq_getD_getElem? and docs#List.getD_eq_getElem?_getD
I haven't found this duplicate in any list linked above, but I might've missed it somewhere

Oh right, my algorithm only flags things as duplicate if the binder info is the same. In these two, one has an explicit and one an implicit argument. I don't think there is any good reason for comparing the binder infos :sweat_smile:. So I guess there will be more that I missed.

Ruben Van de Velde (Oct 27 2025 at 07:23):

There's a number where both exist intentionally, like le_refl and le_rfl. But maybe your explicit alias detection would catch those

Johan Commelin (Oct 27 2025 at 13:49):

@Jovan Gerbscheid I think it would be great to have a weekly report for this. Instead of posting the list in a zulip message, I think it's best if the report could link to a gist.
We could include the length of the list in #mathlib4 > Technical Debt Counters .

Bryan Gin-ge Chen (Oct 27 2025 at 13:54):

I'm happy to help with this -- if Jovan or anyone else can get the script in a form where it can be run from the command line and outputs the list of duplicates to stdout or a file; I can do the rest of the actions wrangling.

Ruben Van de Velde (Oct 27 2025 at 23:59):

#30986

Thomas Murrills (Oct 28 2025 at 03:18):

By the way, while this algorithm will catch the vast majority of duplicates (which is awesome!), note that there is a small class of declarations which might be missed by this algorithm! Namely, theorems which have arguments of the same type in a different order.

example

import Mathlib

open Lean Meta

def sortHyps (fvars : Array Expr) (e : Expr) : MetaM (Array (Expr × BinderInfo) × Expr) := do
  let n := fvars.size
  let fvars : Vector Expr n := fvars.toVector
  let mut types ← fvars.mapM (some <$> inferType ·)
  let mut e := e
  let mut result := #[]
  for h : j in [:n] do
    let mut ominIdx : Option (Fin n) := none
    let mut minType : Expr := default
    for h : i in [:n] do
      if let some type := types[i] then
        if !type.hasFVar then
          if ominIdx.isNone || type.quickLt minType then
            ominIdx := some ⟨i, by get_elem_tactic⟩
            minType := type
    let some minIdx := ominIdx | throwError "{types.toArray}"
    let fvar := fvars[minIdx]
    result := result.push (minType, ← fvar.fvarId!.getBinderInfo)
    types := types.set minIdx none
    let abst (e : Expr) := e.liftLooseBVars 0 1 |>.abstract #[fvar]
    types := types.map (Option.map abst)
    e := abst e
  return (result, e)

open Qq

-- #1 < #0 with [ℕ, ℕ]
-- #0 < #1 with [ℕ, ℕ]
run_meta do
  let type₀ := q(∀ n m : Nat, n < m)
  let type₁ := q(∀ m n : Nat, n < m)
  forallTelescope type₀ fun fvars e => do
    let (sorted, abst) ← sortHyps fvars e
    logInfo m!"{abst} with {sorted.map (·.1)}"
  forallTelescope type₁ fun fvars e => do
    let (sorted, abst) ← sortHyps fvars e
    logInfo m!"{abst} with {sorted.map (·.1)}"

Canonicalizing the local context is actually quite a nontrivial problem! :sweat_smile: It's related to certain graph isomorphism problems, but obviously a lot less general.

However, canonicalizing the local context is not our only option here: we can also try to match types to other types, as Loogle does.

The Loogle backend is insensitive to the order of arguments when matching, and so e.g. searching for

∀ (m n : ℕ), n + m = m + n

will still successfully match

Nat.add_comm 📋 Init.Data.Nat.Basic
(n m : ℕ) : n + m = m + n

If we want to be thorough but still efficient, we can possibly get the best of both worlds with a hybrid strategy; perhaps something like:

collapse free variables of equal type into a single free variable, and remember that we did this
match them against the recorded expressions; if we have a match, put them in a "maybe duplicate" set with the match(es) (we're being overzealous in such a duplicate designation)
After creating all duplicate sets, use the loogle backend to match entries in the sets of maybe-duplicates against (only) other entries in the set, to see which are truly duplicates

Jovan Gerbscheid (Oct 28 2025 at 08:18):

Thanks for pointing that out! I think an easier fix could be to make the assumption that in such cases, the variables will appear in the goal. And then we can check in which order they appear in the goal, sorting them based on that. This solution will also not be complete, but I can't think of any real-world cases where it would fail.

Jovan Gerbscheid (Oct 28 2025 at 08:33):

@Kim Morrison, Here are the duplicate declarations that my algorithm had missed due to requiring that the binder info is the same in both declarations:

42 duplicate theorems: https://gist.github.com/JovanGerb/030f4f4f50c83239b150ccb2eb6221d2

3 duplicate instances:

beqOfOrd : {α : Type u_1} → [Ord α] → BEq α
BEq.ofOrd : (α : Type u) → [Ord α] → BEq α

leOfOrd : {α : Type u_1} → [Ord α] → LE α
LE.ofOrd : (α : Type u) → [Ord α] → LE α

ltOfOrd : {α : Type u_1} → [Ord α] → LT α
LT.ofOrd : (α : Type u) → [Ord α] → LT α

Jovan Gerbscheid (Oct 28 2025 at 08:51):

Here's the complete list of 284 duplicate theorems where the binderinfo is different: https://gist.github.com/JovanGerb/eeba89354b53597da94c5a873645e449

And 22 duplicate instances: https://gist.github.com/JovanGerb/2ebdcc7431f228690be8c8a470d1af21

All of these duplicates have not been in any of the above gists, so feel free to have a look.

Jovan Gerbscheid (Oct 28 2025 at 08:54):

What is the best way to sort the entries in the list of duplicate theorems? It probably would be good to have a clear separation between Mathlib, Batteries, Lean and Std. But if the two declarations are defined in different places, which List should it be in? I guess it should be in the further downstream repository, since that is where the duplication actually exists.

Johan Commelin (Oct 28 2025 at 09:19):

Yep, in the downstream list.

Johan Commelin (Oct 28 2025 at 09:19):

If we make this into a regular report, I think we also want to have a little bit of functionality to exclude known cases that we wish to ignore.

Ruben Van de Velde (Oct 28 2025 at 10:39):

#30999 and #31000

Thomas Murrills (Oct 28 2025 at 15:22):

Jovan Gerbscheid said:

Thanks for pointing that out! I think an easier fix could be to make the assumption that in such cases, the variables will appear in the goal. And then we can check in which order they appear in the goal, sorting them based on that. This solution will also not be complete, but I can't think of any real-world cases where it would fail.

I was curious if there were any real-world cases where this had the potential to fail (i.e. where free variables of the same type did not appear in the goal), because I also couldn't think of any! Turns out there are 2110 such cases; the pattern seems to be "we use these variables to construct a necessary hypothesis for the goal".

Simple examples on the list include e.g. And.imp {a c b d : Prop} (f : a → c) (g : b → d) (h : a ∧ b) : c ∧ d and Ne.elim : ∀ {α : Sort u} {a b : α}, a ≠ b → a = b → False (though many are more complicated).

(Note: just to be clear, these are not duplicate theorems, just theorems which would not be caught as duplicates if certain arguments were swapped, even if sorting by appearance in the goal.)

import Mathlib

open Lean Meta

def getConstsWithNonGoalDupHyps : MetaM <| Array (Name × ConstantInfo) := do
  -- code from the duplicate-catching linter!
  let env ← getEnv
  let mut cs : Array (Name × ConstantInfo) := #[]
  for (name, cinfo) in env.constants.map₁ do
    if name.isInternalDetail || name.isMetaprogramming || !allowCompletion env name || Linter.isDeprecated env name then continue -- filter out bad constants
    if let some v := cinfo.value? then if let .const .. := v.eta then continue -- filter out aliases
    let .thmInfo _ := cinfo | continue -- only consider theorems
    let type ← instantiateTypeLevelParams cinfo.toConstantVal (cinfo.levelParams.map fun _ => 0)
    -- check if any two fvars in the type that are not in the goal have the same type
    let hasDups ← forallTelescope type fun fvars e => do
      let (_, { fvarSet .. }) ← liftMetaM <| e.collectFVars.run {}
      let fvars := fvars.filter fun fvar =>
        !fvarSet.contains fvar.fvarId!
      -- quadratic, but fine for demo purposes
      for h : i in 0...fvars.size do
        for h : j in (i+1)...fvars.size do
          if (← inferType fvars[i]) == (← inferType fvars[j]) then
            return true
      return false
    if hasDups then cs := cs.push (name, cinfo)
  return cs

set_option maxHeartbeats 5000000

run_meta do
  let cs ← getConstsWithNonGoalDupHyps
  logInfo m!"{cs.size} declarations:\n{cs.map (MessageData.ofConstName ·.1)}"

Maybe doing something recursive would work? But at that point it might not be significantly simpler than the loogle-esque matching approach I outlined before.

Either way, this is definitely a future refinement (2110 is really not a huge pool in which to find duplicates, given the fraction of all declarations which are likely to be duplicates), and let's not let it get in the way of creating a technical debt report. :)

Kim Morrison (Oct 29 2025 at 03:06):

#31040 deprecates a lean4/Mathlib duplicated instance from the above list.

Kim Morrison (Oct 29 2025 at 03:07):

@Jovan Gerbscheid, could these reports include the module name in which each definition appears? That would be helpful both for triaging and fixing these duplications.

Kim Morrison (Oct 29 2025 at 03:08):

Classical.inhabited_of_nonempty : {α : Sort u} → Nonempty α → Inhabited α
Classical.inhabited_of_nonempty' : {α : Sort u_3} → [h : Nonempty α] → Inhabited α

is an intentional duplication, per the doc-string. I guess we need some way to remove such cases from the reports?

Kim Morrison (Oct 29 2025 at 03:33):

Jovan Gerbscheid said:

Kim Morrison, Here are the duplicate declarations that my algorithm had missed due to requiring that the binder info is the same in both declarations:

42 duplicate theorems: https://gist.github.com/JovanGerb/030f4f4f50c83239b150ccb2eb6221d2

lean#11004 deals with most of these.

Jovan Gerbscheid (Oct 29 2025 at 04:13):

Kim Morrison said:

Jovan Gerbscheid, could these reports include the module name in which each definition appears? That would be helpful both for triaging and fixing these duplications.

What would be the most convenient representation of the results? Something like this?

Module.One, Module.Two
Classical.inhabited_of_nonempty : {α : Sort u} → Nonempty α → Inhabited α
Classical.inhabited_of_nonempty' : {α : Sort u_3} → [h : Nonempty α] → Inhabited α

Jovan Gerbscheid (Oct 29 2025 at 04:18):

Kim Morrison said:

I guess we need some way to remove such cases from the reports?

In this case, one of the two is not actually tagged as an instance. If we want the algorithm to not have false positives, then we can make it only look at type class instances that are actually tagged as an instance.

Kevin Buzzard (Oct 29 2025 at 08:59):

If the number of actually-we-do-want-both-of-these duplicates is small then we could just have an [@yes-we-know-this-is-duplicated] attribute.

Jovan Gerbscheid (Oct 29 2025 at 09:01):

The doc-prime linter has a file somewhere listing all of the exceptions, so we could do the same here I guess?

Damiano Testa (Oct 29 2025 at 09:03):

The file for the doc-prime needed some more work, though, since modifying the file does not corrupt the oleans. This means that after you add an exception, you still have to rebuild, instead of using the cached build.

Ruben Van de Velde (Oct 31 2025 at 08:12):

Not in the list, but: #31106

Snir Broshi (Oct 31 2025 at 19:41):

Init.Data.Nat.Basic contains a lot of duplicates which are intentional according to their docstring, but why? and since alias is in Batteries they're defined manually.
docs#Nat.lt_asymm and docs#Nat.not_lt_of_gt and docs#Nat.not_lt_of_lt
docs#Nat.le_total and docs#Nat.le_or_ge and docs#Nat.le_or_le
docs#Nat.lt_iff_le_not_le and docs#Nat.lt_iff_le_and_not_ge
docs#Nat.ne_iff_lt_or_gt and docs#Nat.lt_or_gt
docs#Nat.zero_lt_one and docs#Nat.one_pos

Snir Broshi (Oct 31 2025 at 19:45):

docs#Nat.pos_of_ne_zero and docs#Nat.zero_lt_of_ne_zero are in the list, but maybe it should also flag duplicates of half an iff, as they're both sides of docs#Nat.pos_iff_ne_zero and docs#Nat.ne_zero_iff_zero_lt which are themselves duplicates up to Iff.symm

Robin Arnez (Nov 01 2025 at 11:24):

I believe most of the aliases about Nat are for discoverability purposes. For example, if you didn't know the formal term "asymm" you could still find docs#Nat.not_lt_of_gt using the usual naming scheme.

Snir Broshi (Nov 22 2025 at 19:41):

docs#List.getElem_zero
docs#List.getElem_zero_eq_head
docs#List.head_eq_getElem
docs#List.get_mk_zero
So many variants of l.head = l[0] :cold_sweat:

Bhavik Mehta (Nov 23 2025 at 05:03):

docs#div_div_cancel and docs#div_div_self' are identical; with the Nat version being named docs#Nat.div_div_self and the group with zero version being docs#div_div_cancel₀

Bhavik Mehta (Nov 23 2025 at 19:34):

Here are some more which don't seem to be in the above lists: docs#Int.div_lt_div_iff_of_dvd_of_pos is an exact match to docs#Int.ediv_lt_ediv_iff_of_dvd_of_pos, as are the emph three lemmas immediately after.

Jireh Loreaux (Nov 24 2025 at 03:17):

Those last ones sound like a consequence of the fact that div in core used to be different.

Snir Broshi (Dec 08 2025 at 04:48):

#27887 recently added @[to_dual], similar to @[to_additive], and there are a few recent PRs that delete existing theorems and instead autogenerate them using this attribute.
Is it perhaps possible to modify the script that finds duplicates to first transform every declaration using either of these automatic translation attributes and then find matches, to generate a list of theorems which are actually dual?

Jovan Gerbscheid (Dec 08 2025 at 13:35):

Yes, that sounds like it could be helpful for tagging more things with @[to_dual].

Jovan Gerbscheid (Jan 06 2026 at 01:19):

Sorry for taking so long to get back to this. I've now put the updated code in a PR: #33640. This version ignores binder info. It also sorts the results better, so it starts with entries in Init, then Lean, then Std, etc. I've made 3 categories: theorems (things in Prop), instances that aren't Props, and other definitions (where we also need to index the body, not just the type).

The one searching for theorems is somewhat slow, taking multiple minutes when run on all of mathlib.

The filter that ignores alias entries doesn't work for theorems in a module because the theorem body isn't imported. So instead, the program should be run in a non-module.

I've ran the code on current mathlib:

728 duplicate theorems: https://gist.github.com/JovanGerb/da914d9201bff759f84631d925817519
123 duplicate instances: https://gist.github.com/JovanGerb/0c93a1467006ed0583999e559ec05390
180 duplicate definitions (most of which are intentional): https://gist.github.com/JovanGerb/e4015fb5837065baa95e7a991c010896

Violeta Hernández (Jan 06 2026 at 06:28):

Thanks a lot! I'm scouring for easy PRs and I've already opened three of them.

Violeta Hernández (Jan 06 2026 at 06:29):

Honorary mention to docs#SemimoduleCat.hom_zsmul which is a duplicate of docs#SemimoduleCat.hom_nsmul, because a semimodule doesn't have a zsmul operation! Just because the lakefile builds doesn't mean it's "correct".

Johan Commelin (Jan 06 2026 at 08:52):

Violeta Hernández said:

Thanks a lot! I'm scouring for easy PRs and I've already opened three of them.

Could you link them here, please? One PR per message, to help the emoji-bot.

Chris Henson (Jan 06 2026 at 12:27):

If this is a tad slow to always run in CI, can it be included in maybe the weekly linting or technical debt metrics? (With a way to exempt declarations)

Jovan Gerbscheid (Jan 06 2026 at 12:30):

Yes, that is the idea, to have it run periodically (e.g. weekly), and then report the status here on Zulip. It could be it's own weekly message, although I guess it could also be integrated with the new weekly linter report.

Chris Henson (Jan 06 2026 at 12:40):

The weekly linting set would be nice for downstream reuse, though maybe it's awkward to include something that has the flavor of an environment linter.

Eric Wieser (Jan 06 2026 at 12:54):

Jovan Gerbscheid said:

The filter that ignores alias entries doesn't work for theorems in a module because the theorem body isn't imported. So instead, the program should be run in a non-module.

Does this mean we should be recording alias information in an environment extension?

Eric Wieser (Jan 06 2026 at 12:55):

Alternatively, perhaps you can detect the alias docstring for now?

Jovan Gerbscheid (Jan 06 2026 at 12:57):

My filter also includes the case where something isn't defined using the alias command, but it is defined directly from another constant.

That's because in such a case someone usually made the conscious decision of having multiple of the same constant.

Eric Wieser (Jan 06 2026 at 13:01):

I think if we want to auto-detect duplicate declarations, we need an explicit marker that indicates where duplication is intentional

Last updated: Feb 28 2026 at 14:05 UTC

Stream: mathlib4

Topic: duplicate declarations

Jovan Gerbscheid (May 18 2025 at 17:48):

Jovan Gerbscheid (May 18 2025 at 17:49):

Jovan Gerbscheid (May 18 2025 at 17:51):

Michael Rothgang (May 18 2025 at 18:58):

Yakov Pechersky (May 18 2025 at 19:12):

Jovan Gerbscheid (May 18 2025 at 19:37):

Eric Wieser (May 18 2025 at 20:16):

Eric Wieser (May 19 2025 at 01:17):

Kim Morrison (May 19 2025 at 01:29):

Kim Morrison (May 19 2025 at 01:29):

Kim Morrison (May 19 2025 at 01:29):

Cameron Zwarich (May 19 2025 at 01:41):

Yury G. Kudryashov (May 19 2025 at 03:52):

Jovan Gerbscheid (May 19 2025 at 08:56):

Jovan Gerbscheid (May 19 2025 at 09:29):

Jovan Gerbscheid (May 19 2025 at 10:55):

Eric Wieser (May 19 2025 at 10:57):

Eric Wieser (May 19 2025 at 10:57):

Jovan Gerbscheid (May 19 2025 at 11:14):

Eric Wieser (May 19 2025 at 11:16):

Jovan Gerbscheid (May 19 2025 at 11:17):

Jovan Gerbscheid (May 19 2025 at 11:19):

Kevin Buzzard (May 19 2025 at 11:44):

Kevin Buzzard (May 19 2025 at 11:46):

Kevin Buzzard (May 19 2025 at 11:47):

Damiano Testa (May 19 2025 at 11:55):

Kevin Buzzard (May 19 2025 at 11:57):

Jovan Gerbscheid (May 19 2025 at 11:57):

Kevin Buzzard (May 19 2025 at 11:59):

Edward van de Meent (May 19 2025 at 12:13):

Jovan Gerbscheid (May 19 2025 at 12:19):

Andrew Yang (May 19 2025 at 12:20):

Yaël Dillies (May 19 2025 at 12:22):

Jovan Gerbscheid (May 19 2025 at 12:22):

Jovan Gerbscheid (Jun 03 2025 at 22:42):

Jovan Gerbscheid (Jun 03 2025 at 22:52):

Ruben Van de Velde (Jun 04 2025 at 00:10):

Bhavik Mehta (Aug 21 2025 at 02:12):

Bhavik Mehta (Aug 21 2025 at 02:16):

Ruben Van de Velde (Aug 21 2025 at 06:33):

Kenny Lau (Aug 21 2025 at 07:22):

Jovan Gerbscheid (Aug 21 2025 at 07:52):

Eric Wieser (Aug 21 2025 at 08:56):

Ruben Van de Velde (Aug 21 2025 at 11:53):

Violeta Hernández (Aug 23 2025 at 19:56):

Violeta Hernández (Aug 23 2025 at 19:57):

Thomas Murrills (Aug 23 2025 at 20:07):

Jovan Gerbscheid (Oct 24 2025 at 16:03):

Sébastien Gouëzel (Oct 24 2025 at 16:05):

Bhavik Mehta (Oct 24 2025 at 17:06):

Ruben Van de Velde (Oct 24 2025 at 18:39):

Ruben Van de Velde (Oct 24 2025 at 18:45):

Bhavik Mehta (Oct 24 2025 at 18:47):

Ruben Van de Velde (Oct 24 2025 at 18:49):

Bhavik Mehta (Oct 24 2025 at 18:52):

Ruben Van de Velde (Oct 24 2025 at 18:53):

Ruben Van de Velde (Oct 24 2025 at 18:54):

Ruben Van de Velde (Oct 24 2025 at 18:57):

Ruben Van de Velde (Oct 24 2025 at 18:59):

Ruben Van de Velde (Oct 24 2025 at 19:04):

Ruben Van de Velde (Oct 24 2025 at 19:11):

Andrew Yang (Oct 24 2025 at 20:15):

Ruben Van de Velde (Oct 24 2025 at 21:21):

Jovan Gerbscheid (Oct 25 2025 at 13:53):

Aaron Liu (Oct 25 2025 at 13:57):

Aaron Liu (Oct 25 2025 at 13:57):

Jovan Gerbscheid (Oct 25 2025 at 13:58):

Aaron Liu (Oct 25 2025 at 13:59):

Jovan Gerbscheid (Oct 25 2025 at 13:59):

Jovan Gerbscheid (Oct 25 2025 at 14:00):

Aaron Liu (Oct 25 2025 at 14:01):

Aaron Liu (Oct 25 2025 at 14:01):

Aaron Liu (Oct 25 2025 at 14:02):

Jovan Gerbscheid (Oct 25 2025 at 14:02):

Jovan Gerbscheid (Oct 25 2025 at 14:03):

Aaron Liu (Oct 25 2025 at 14:09):

Aaron Liu (Oct 25 2025 at 14:09):

Jovan Gerbscheid (Oct 25 2025 at 14:34):