Zulip Chat Archive

Stream: general

Topic: jixia


Riccardo Brasca (Jul 01 2024 at 11:03):

This is a thread to discuss jixia. See the announcement.

Riccardo Brasca (Jul 01 2024 at 11:03):

@Tony Beta Lambda can this tool be used to visualize a dependency graph of a given declaration?

Tony Beta Lambda (Jul 01 2024 at 16:12):

Riccardo Brasca said:

Tony Beta Lambda can this tool be used to visualize a dependency graph of a given declaration?

It can generate all the data needed for such visualization. For the actual visualization I recommend lean-graph.

Ruben Van de Velde (Jul 01 2024 at 16:32):

Hmm, that might be an interesting use case for distinguishing theorems from lemmas

Riccardo Brasca (Jul 02 2024 at 13:51):

I am still looking for something that makes easy to find unused declaration in a project (wrt to a final goal). Maybe this can help...

Yaël Dillies (Jul 02 2024 at 16:27):

@Sky Wilshaw has written that something

Sky Wilshaw (Jul 02 2024 at 16:29):

I wrote some stuff here: https://github.com/leanprover-community/con-nf/blob/main/DependencyGraph.lean

Sky Wilshaw (Jul 02 2024 at 16:30):

I didn't intend for this to be used outside of the Con(NF) project so some tweaking will need to be done before it's usable

Riccardo Brasca (Jul 02 2024 at 16:51):

Thanks!! I am having a look

Riccardo Brasca (Jul 02 2024 at 17:09):

Can you explain (briefly) how to use it?

Sky Wilshaw (Jul 02 2024 at 17:10):

The way it currently works is that if you replace import ConNF with import <your_project> and write #unseen at the end of the file, it'll do all the calculations and tell you what's not currently used.

Sky Wilshaw (Jul 02 2024 at 17:10):

This isn't ideal, but works enough for my purposes - I have a CI step that manually appends #unseen to that file and reads off its output.

Sky Wilshaw (Jul 02 2024 at 17:11):

It's not written in there by default because computing unseen defs takes quite a long time, so I didn't want to have it run unless it really needs to.

Riccardo Brasca (Jul 02 2024 at 17:12):

I guess I have to replace ConNF with the name of my project everywhere in the script, right?

Sky Wilshaw (Jul 02 2024 at 17:12):

I think so, yes.

Riccardo Brasca (Jul 02 2024 at 17:40):

It seems it works, thanks a lot!!

Riccardo Brasca (Jul 02 2024 at 17:41):

I am not an expert but I am sure this is something can be useful for a lot of people.

Riccardo Brasca (Jul 02 2024 at 17:52):

Are you interested in improving it? For example I think it lists declarations generated by simps, that cannot be removed. Note that I am not complaining (quite the opposite!), but if you or someone else is interested we can make it an "official" mathlib script.

Sky Wilshaw (Jul 02 2024 at 17:54):

I don't think I know enough yet about Lean meta stuff to do a good job of cleaning this up and making it more generally usable - I just haven't really had the time to learn it properly. In the meantime, if anyone else with more knowledge is interested, feel free to give it a shot.

Kim Morrison (Jul 02 2024 at 22:34):

@Riccardo Brasca, @Sky Wilshaw, it would be lovely if this functionality could be added to the existing import-graph repository, which Mathlib already depends on.

Tony Beta Lambda (Jul 10 2024 at 05:45):

Riccardo Brasca said:

I am still looking for something that makes easy to find unused declaration in a project (wrt to a final goal). Maybe this can help...

Yes, it is possible with the data provided by jixia. The dependency relation is computed per "symbol", which can be linked to "declaration" by the equation symbol.name == declaration.fullname.

  1. Find your declaration's fullname.
  2. Compute its transitive dependencies with data in .sym.json files.
  3. Find all relevant declarations.

We are limiting the scope of jixia to extracting info from a lean project for interoperability with other tools, so this won't be part of jixia. But we are planning to write a set of useful scripts (most likely in python) to work with data generated by jixia, and this can be one of them.

Riccardo Brasca (Sep 01 2025 at 12:06):

Is there any progress on this? I used the script to remove unused declarations, but the following

import Lean
import FltRegular

open Lean

def nameCode (n : Name) : String :=
  if n = .anonymous then
    "anonymous"
  else
    n.toString.replace "." "_"

def nameDisplay (n : Name) : String :=
  n.components.getLast!.toString

def printDeps₁ (k : Name) (_v : Array Name) (print : String  IO Unit) : IO Unit := do
  let n := k.componentsRev[1]!
  print (nameCode k ++ " [label=\"" ++ nameDisplay k ++ "\"" ++
    " group=\"" ++ n.toString ++ "\"]" ++ ";\n")

def printDeps₂ (k : Name) (v : Array Name) (print : String  IO Unit) : IO Unit := do
  for val in v do
    if (`FltRegular).isPrefixOf val then
      print (nameCode val ++ " -> " ++ nameCode k ++ ";\n")

def group (name : Name) : Name :=
  (name.eraseSuffix? name.componentsRev.head!).get!

def groups (imports : NameMap (Array Name)) : NameMap Unit :=
  RBMap.fromList (imports.fold (fun xs k _ =>
    if (`FltRegular).isPrefixOf k then (group k, ()) :: xs else xs) []) _

/-- `#deptree` outputs a graphviz dependency graph to `depgraph.dot`. Build it with
`dot -Tpdf -Gnewrank=true -Goverlaps=false -Gsplines=ortho depgraph.dot > depgraph.pdf`. -/
elab "#deptree " : command => do
  let env  getEnv
  let imports := env.importGraph
  IO.FS.withFile "docs/depgraph.dot" IO.FS.Mode.write fun h => do
  h.write "digraph {\n".toUTF8
  h.write "compound=true;\n".toUTF8
  for (gp, _) in groups imports do
    h.write ("subgraph cluster_" ++ nameCode gp ++ " {\n").toUTF8
    for (k, v) in imports do
      if (`FltRegular).isPrefixOf k && group k = gp then do
        printDeps₁ k v (fun s => h.write s.toUTF8)
    h.write ("label = \"" ++ gp.toString ++ "\";\n").toUTF8
    h.write ("margin = 32;\n").toUTF8
    h.write ("pad = 32;\n").toUTF8
    h.write ("penwidth = 5;\n").toUTF8
    h.write ("color = cyan4;\n").toUTF8
    h.write "}\n".toUTF8
  for (k, v) in imports do
    if (`FltRegular).isPrefixOf k then do
      printDeps₂ k v (fun s => h.write s.toUTF8)
  h.write "}\n".toUTF8

/-- Extracts the names of the declarations in `env` on which `decl` depends. -/
-- source:
-- https://leanprover.zulipchat.com/#narrow/stream/287929-mathlib4/topic/Counting.20prerequisites.20of.20a.20theorem/near/425370265
def getVisited (env : Environment) (decl : Name) :=
  let (_, { visited, .. }) := Lean.CollectAxioms.collect decl |>.run env |>.run {}
  visited.erase decl

partial def allDeclsIn (module : Name) : Elab.Command.CommandElabM (Array Name) := do
  let mFile  findOLean module
  unless ( mFile.pathExists) do
    logError m!"object file '{mFile}' of module {module} does not exist"
  let (md, _)  readModuleData mFile
  let decls  md.constNames.filterM fun d =>
    return !( d.isBlackListed) && !(`injEq).isSuffixOf d && !(`sizeOf_spec).isSuffixOf d
  return decls

def allFiles (env : Environment) : List Name :=
  (env.importGraph.foldl (fun xs k _ => if (`FltRegular).isPrefixOf k then
    k :: xs else xs) []).mergeSort
    (toString · < toString ·)

def allDecls (env : Environment) : Elab.Command.CommandElabM NameSet :=
  (fun l => RBTree.ofList (l.map (fun a => a.toList)).flatten) <$>
    (List.mapM allDeclsIn (allFiles env))

/-- `#index` computes an index of the declations in the project and saves it to `index.csv`. -/
elab "#index " : command => do
  let env  getEnv
  let allDecls  allDecls env
  let result  List.mapM (fun decl => do
    let ranges  findDeclarationRanges? decl
    let mod  findModuleOf? decl
    match (ranges, mod) with
    | (some ranges, some mod) => pure (some (decl, ranges, mod))
    | _ => pure none)
    (allDecls.toList.mergeSort (toString · < toString ·))
  let result' := result.filterMap id
  IO.FS.withFile "docs/index.csv" IO.FS.Mode.write (fun h => do
    for (decl, ranges, mod) in result' do
      h.write (decl.toString ++ ", " ++ mod.toString ++ ", " ++
        ranges.range.pos.line.repr ++ ", " ++ ranges.range.pos.column.repr ++ "\n").toUTF8)

def seenIn (env : Environment) (allDecls : NameSet) (decl : Name) : NameSet :=
  (getVisited env decl).foldl
    (fun decls x => if allDecls.contains x then decls.insert x else decls) RBTree.empty

/-- `#unseen` computes a list of the declarations in the project that are
defined but not used in the current file. The list is stored in `unseen_defs.txt`.
The average runtime on my computer is 1 minute, with 16 threads. -/
elab "#unseen " : command => do
  let env  getEnv
  let allDecls  allDecls env
  let mut unseen := allDecls
  let timeStart  IO.monoMsNow
  let tasks := (fun l => Task.spawn (fun _ => seenIn env allDecls l)) <$>
    allDecls.toList.mergeSort (toString · < toString ·)
  for task in tasks do
    for v in task.get do
      unseen := unseen.erase v
  IO.FS.withFile "docs/unseen_defs.txt" IO.FS.Mode.write (fun h => do
    for v in unseen.toList.mergeSort (toString · < toString ·) do
      h.write (v.toString ++ "\n").toUTF8)
  let timeEnd  IO.monoMsNow
  logInfo m!"operation took {(timeEnd - timeStart) / 1000}s"

#unseen

is now broken and I don't really understand how to fix it.

Damiano Testa (Sep 01 2025 at 12:11):

A mindless "fix" is below. I have no idea what I was doing, though, I simply appeased the errors.

import Lean
import Mathlib
--import FltRegular

open Lean

def nameCode (n : Name) : String :=
  if n = .anonymous then
    "anonymous"
  else
    n.toString.replace "." "_"

def nameDisplay (n : Name) : String :=
  n.components.getLast!.toString

def printDeps₁ (k : Name) (_v : Array Name) (print : String  IO Unit) : IO Unit := do
  let n := k.componentsRev[1]!
  print (nameCode k ++ " [label=\"" ++ nameDisplay k ++ "\"" ++
    " group=\"" ++ n.toString ++ "\"]" ++ ";\n")

def printDeps₂ (k : Name) (v : Array Name) (print : String  IO Unit) : IO Unit := do
  for val in v do
    if (`FltRegular).isPrefixOf val then
      print (nameCode val ++ " -> " ++ nameCode k ++ ";\n")

def group (name : Name) : Name :=
  (name.eraseSuffix? name.componentsRev.head!).get!

def groups (imports : NameMap (Array Name)) : NameMap Unit :=
  imports.foldl (fun xs k _ =>
    if (`FltRegular).isPrefixOf k then xs.insert (group k) () else xs) 

/-- `#deptree` outputs a graphviz dependency graph to `depgraph.dot`. Build it with
`dot -Tpdf -Gnewrank=true -Goverlaps=false -Gsplines=ortho depgraph.dot > depgraph.pdf`. -/
elab "#deptree " : command => do
  let env  getEnv
  let imports := env.importGraph
  IO.FS.withFile "docs/depgraph.dot" IO.FS.Mode.write fun h => do
  h.write "digraph {\n".toUTF8
  h.write "compound=true;\n".toUTF8
  for (gp, _) in groups imports do
    h.write ("subgraph cluster_" ++ nameCode gp ++ " {\n").toUTF8
    for (k, v) in imports do
      if (`FltRegular).isPrefixOf k && group k = gp then do
        printDeps₁ k v (fun s => h.write s.toUTF8)
    h.write ("label = \"" ++ gp.toString ++ "\";\n").toUTF8
    h.write ("margin = 32;\n").toUTF8
    h.write ("pad = 32;\n").toUTF8
    h.write ("penwidth = 5;\n").toUTF8
    h.write ("color = cyan4;\n").toUTF8
    h.write "}\n".toUTF8
  for (k, v) in imports do
    if (`FltRegular).isPrefixOf k then do
      printDeps₂ k v (fun s => h.write s.toUTF8)
  h.write "}\n".toUTF8

/-- Extracts the names of the declarations in `env` on which `decl` depends. -/
-- source:
-- https://leanprover.zulipchat.com/#narrow/stream/287929-mathlib4/topic/Counting.20prerequisites.20of.20a.20theorem/near/425370265
def getVisited (env : Environment) (decl : Name) :=
  let (_, { visited, .. }) := Lean.CollectAxioms.collect decl |>.run env |>.run {}
  visited.erase decl

partial def allDeclsIn (module : Name) : Elab.Command.CommandElabM (Array Name) := do
  let mFile  findOLean module
  unless ( mFile.pathExists) do
    logError m!"object file '{mFile}' of module {module} does not exist"
  let (md, _)  readModuleData mFile
  let decls  md.constNames.filterM fun d =>
    return !( d.isBlackListed) && !(`injEq).isSuffixOf d && !(`sizeOf_spec).isSuffixOf d
  return decls

def allFiles (env : Environment) : List Name :=
  (env.importGraph.foldl (fun xs k _ => if (`FltRegular).isPrefixOf k then
    k :: xs else xs) []).mergeSort
    (toString · < toString ·)

def allDecls (env : Environment) : Elab.Command.CommandElabM NameSet :=
  (fun l => NameSet.ofList (l.map (fun a => a.toList)).flatten) <$>
    (List.mapM allDeclsIn (allFiles env))

/-- `#index` computes an index of the declations in the project and saves it to `index.csv`. -/
elab "#index " : command => do
  let env  getEnv
  let allDecls  allDecls env
  let result  List.mapM (fun decl => do
    let ranges  findDeclarationRanges? decl
    let mod  findModuleOf? decl
    match (ranges, mod) with
    | (some ranges, some mod) => pure (some (decl, ranges, mod))
    | _ => pure none)
    (allDecls.toList.mergeSort (toString · < toString ·))
  let result' := result.filterMap id
  IO.FS.withFile "docs/index.csv" IO.FS.Mode.write (fun h => do
    for (decl, ranges, mod) in result' do
      h.write (decl.toString ++ ", " ++ mod.toString ++ ", " ++
        ranges.range.pos.line.repr ++ ", " ++ ranges.range.pos.column.repr ++ "\n").toUTF8)

def seenIn (env : Environment) (allDecls : NameSet) (decl : Name) : NameSet :=
  (getVisited env decl).foldl
    (fun decls x => if allDecls.contains x then decls.insert x else decls) 

/-- `#unseen` computes a list of the declarations in the project that are
defined but not used in the current file. The list is stored in `unseen_defs.txt`.
The average runtime on my computer is 1 minute, with 16 threads. -/
elab "#unseen " : command => do
  let env  getEnv
  let allDecls  allDecls env
  let mut unseen := allDecls
  let timeStart  IO.monoMsNow
  let tasks := (fun l => Task.spawn (fun _ => seenIn env allDecls l)) <$>
    allDecls.toList.mergeSort (toString · < toString ·)
  for task in tasks do
    for v in task.get do
      unseen := unseen.erase v
  IO.FS.withFile "docs/unseen_defs.txt" IO.FS.Mode.write (fun h => do
    for v in unseen.toList.mergeSort (toString · < toString ·) do
      h.write (v.toString ++ "\n").toUTF8)
  let timeEnd  IO.monoMsNow
  logInfo m!"operation took {(timeEnd - timeStart) / 1000}s"

#unseen

Riccardo Brasca (Sep 01 2025 at 12:26):

It's good enough for me, thanks!


Last updated: Dec 20 2025 at 21:32 UTC