Zulip Chat Archive

Stream: general

Topic: random io output


Patrick Massot (Sep 16 2020 at 19:46):

I'm trying to return a bit to leancrawler but it seems mathlib is now too big for my old approach of using tactic.trace and redirect stderr. I can reliably lockdown my computer by trying to run it on present day mathlib. So I tried to switch to io.fs.put_str_ln as in doc-gen. But the result is randomly cut-off, even when I try to use the list splitting trick. I also see mysterious output to terminal where I would expect only file ouput. @Rob Lewis could you have a look?

Patrick Massot (Sep 16 2020 at 19:46):

import meta.expr
import data.list.sort


open tactic declaration environment

-- The next instance is there to prevent PyYAML trying to be too smart
meta def my_name_to_string : has_to_string name :=
⟨λ n, "\"" ++ to_string n ++ "\""

local attribute [instance] my_name_to_string

meta def expr.get_pi_app_fn : expr  expr
| (expr.pi _ _ _ e) := e.get_pi_app_fn
| e                 := e.get_app_fn

meta def list_items (e : expr) : list name :=
e.fold [] $ λ e _ cs,
if e.is_constant  ¬ e.const_name  cs
  then e.const_name :: cs
  else cs

meta def mnot : bool  tactic bool := λ p, return (¬ p)

meta def pos_line (p : option pos) : string :=
match p with
| some x := to_string x.line
| _      := ""
end

meta def file_name (p : option string) : string :=
match p with
| some x := x
| _      := "Unknown file"
end

section

structure declaration.modifiers :=
(Class := ff)
(Structure := ff)
(StructureField := ff)
(Inductive := ff)
(Instance := ff)
(IsRecursor := ff)
(IsConstructor := ff)

def bool.to_string_python : has_to_string bool := ⟨λ k, match k with tt := "True" | ff := "False" end
local attribute [instance] bool.to_string_python

instance : has_to_string declaration.modifiers := ⟨λ m,
  "{ class: " ++ to_string m.Class ++
  ", structure: " ++ to_string m.Structure ++
  ", structure_field: " ++ to_string m.StructureField ++
  ", is_recursor: " ++ to_string m.IsRecursor ++
  ", is_constructor: " ++ to_string m.IsConstructor ++
  ", inductive: " ++ to_string m.Inductive ++
  ", instance: " ++ to_string m.Instance ++ " }"

open  tactic declaration environment

meta def declaration.get_kind_string : declaration  string
| (thm _ _ _ _) := "lemma"
| (defn _ _ _ _ _ _) := "definition"
| (cnst _ _ _ _) := "constant"
| (ax _ _ _) := "axiom"


meta def environment.get_modifiers (env : environment) (n : name) : tactic declaration.modifiers :=
do
  c  (has_attribute `class n >> return tt) <|> return ff,
  i  (has_attribute `instance n >> return tt) <|> return ff,
  return {
    Class := c,
    Structure := env.is_structure n,
    StructureField := (env.is_projection n).is_some,
    IsConstructor := env.is_constructor n,
    IsRecursor := env.is_recursor n,
    Inductive := env.is_ginductive n,
    Instance := i }
end



meta def itersplit {α} : list α    list (list α)
| l 0 := [l]
| l 1 := let (l1, l2) := l.split in [l1, l2]
| l (k+2) := let (l1, l2) := l.split in itersplit l1 (k+1) ++ itersplit l2 (k+1)

open io (handle run_tactic) io.fs

meta def put_item_crawl (h : handle) (env : environment) (decl : declaration) : io unit :=
let name := decl.to_name,
    pos := pos_line (env.decl_pos name),
    fname := file_name (env.decl_olean name) in
do
   put_str_ln h ("- Name: " ++ to_string name),
   put_str_ln h ("  File: " ++ fname),
   put_str_ln h ("  Line: " ++ pos),
   put_str_ln h ("  Kind: " ++ decl.get_kind_string),
   mods  run_tactic $ env.get_modifiers name,
   put_str_ln h ("  Modifiers: " ++ to_string mods),

   pp_type  pp decl.type,
   put_str_ln h ("  Type: " ++ (to_string pp_type).quote),
   type_proofs  run_tactic $ (list_items decl.type).mfilter $ λ c, mk_const c >>= is_proof,
   type_others  run_tactic $ (list_items decl.type).mfilter $ λ c, mk_const c >>= is_proof >>= mnot,
   put_str_ln h ("  Type uses proofs: " ++ to_string type_proofs),
   put_str_ln h ("  Type uses others: " ++ to_string type_others),

   pp_value  pp decl.value,
   put_str_ln h ("  Value: " ++ (to_string pp_value).quote),
   value_proofs  run_tactic $ (list_items decl.value).mfilter $ λ c, mk_const c >>= is_proof,
   value_others  run_tactic $ (list_items decl.value).mfilter $ λ c, mk_const c >>= is_proof >>= mnot,
   put_str_ln h ("  Value uses proofs: " ++ to_string value_proofs),
   put_str_ln h ("  Value uses others: " ++ to_string value_others),

   put_str_ln h ("  Target class: " ++ if mods.Instance then to_string decl.type.get_pi_app_fn else ""),
   put_str_ln h ("  Parent: " ++  match env.is_projection name with
                           | some info := to_string info.cname
                           | none :=  ""
                           end),
   put_str_ln h ("  Fields: " ++ (to_string $ (env.structure_fields_full name).get_or_else []))


meta def print_all_content : io unit :=
do curr_env  run_tactic get_env,
   let decls := curr_env.fold [] list.cons,
   let local_decls := decls.filter
     (λ x, not (to_name x).is_internal),
   handle  io.mk_file_handle "mathlib.yaml" io.mode.write,
   let declss := itersplit local_decls 8,
   declss.mmap' (λ l, l.mmap' $ put_item_crawl handle curr_env)

Patrick Massot (Sep 16 2020 at 19:47):

After putting that file somewhere I import it in all.lean and add

meta def main : io unit :=  print_all_conten

Patrick Massot (Sep 16 2020 at 19:47):

and use lean --run all.lean

Patrick Massot (Sep 16 2020 at 19:48):

The produced yaml file typically stops in the middle of a line (at around 40Mb)

Rob Lewis (Sep 16 2020 at 19:52):

I don't have time to try it tonight but I can look tomorrow. At a quick glance I don't see anything suspicious. Does doc-gen do the same for you?

Patrick Massot (Sep 16 2020 at 19:53):

I never tried to run doc-gen locally. I'll see if I can find instructions

Rob Lewis (Sep 16 2020 at 19:55):

You only need to try to generate the json dump, just lean --run whatever.lean the obvious file in src.

Patrick Massot (Sep 16 2020 at 20:15):

doc-gen seems to work fine.

Rob Lewis (Sep 16 2020 at 20:16):

What's the size of the output file? Is it bigger than yours was dying on?

Patrick Massot (Sep 16 2020 at 20:17):

Yes, 57Mb for the json export vs 43Mb for my biggest random ouput

Patrick Massot (Sep 16 2020 at 20:17):

Wait, it depends

Patrick Massot (Sep 16 2020 at 20:18):

The thing that really crashed my computer was smaller, but not using io.fs

Patrick Massot (Sep 16 2020 at 20:18):

The code I posted tonight does not seem to crash, it simply randomly stops, at 40Mb or 43Mb depending on the run

Rob Lewis (Sep 16 2020 at 20:25):

Sounds like super fun debugging time then! What's the relevant difference between your code and doc-gen? They both fold over the environment and dump a bunch of text.

Patrick Massot (Sep 16 2020 at 20:26):

The main difference I can see is one of them has been written by some clueless functional programmer using cargo cult coding.

Patrick Massot (Sep 17 2020 at 18:48):

Rob, did you find some time to have a look at this issue?

Rob Lewis (Sep 17 2020 at 19:16):

I did not, it's been an extremely busy day.

Rob Lewis (Sep 17 2020 at 19:17):

Is there a relevant difference to doc-gen in the printing code?

Rob Lewis (Sep 17 2020 at 19:17):

They're doing essentially the same thing, and one of them seems to works on more data.

Patrick Massot (Sep 17 2020 at 19:18):

I have no idea.

Rob Lewis (Sep 17 2020 at 19:18):

I wouldn't really assume any sensible behavior here, but it would be sensible if this was caused by something being different, heh.

Rob Lewis (Sep 17 2020 at 19:40):

I think this needs someone who knows more about how io is implemented to debug, sorry.

Rob Lewis (Sep 17 2020 at 19:41):

There are clearly memory issues (clear from the fact that the itersplit hack is needed anywhere). And you're writing a huge amount of info to the file.

Rob Lewis (Sep 17 2020 at 19:42):

You could try changing put_item_crawl to write a single string instead of a bunch of individual writes, I don't know if that will make a difference.

Alex Peattie (Sep 17 2020 at 19:56):

Rob Lewis said:

There are clearly memory issues (clear from the fact that the itersplit hack is needed anywhere). And you're writing a huge amount of info to the file.

Yeah I had a go at running the script and it was eating through memory (it hit 20GB+ when I killed it), so I guess you might be hitting an OOM issue Patrick?

One thing you could try is opening (and then closing) a different file handle for each part of the list? E.g. writing to mathlib1.yaml, mathlib2.yaml, which you could then stitch together subsequently? And seeing if that makes any difference re: memory usage?

Patrick Massot (Sep 19 2020 at 15:13):

I returned to that and Rob's idea to write only one string per declaration seems to work. I still don't have a nice little graph for Quanta, but here is the current graph after removing logic.
image.png

Patrick Massot (Sep 19 2020 at 15:13):

Who can guess what are the pieces that stick out?

Rob Lewis (Sep 19 2020 at 15:13):

The one on the left has to be meta land, right?

Rob Lewis (Sep 19 2020 at 15:14):

Heaven or hell depending on your perspective

Patrick Massot (Sep 19 2020 at 15:14):

Almost correct! Meta-land is on the right.

Mario Carneiro (Sep 19 2020 at 15:15):

what's that one point at the top-right just left of the blob

Patrick Massot (Sep 19 2020 at 15:16):

name!

Rob Lewis (Sep 19 2020 at 15:17):

Are the colors def/lemma or something else?

Patrick Massot (Sep 19 2020 at 15:18):

Yes.

Patrick Massot (Sep 19 2020 at 15:21):

I'm sure Mario can guess what is the upper-left blob.

Rob Lewis (Sep 19 2020 at 15:22):

So if meta land is mostly blue, that must be def. What substantial blob is more disconnected from everything else than meta land, has a lot of defs, and only connects to the rest along a few strands? Is it computability?

Patrick Massot (Sep 19 2020 at 15:22):

I think you're vastly overestimating the size of computability in mathlib.

Rob Lewis (Sep 19 2020 at 15:23):

I agree, but it's hard to get a sense of scale from the picture since I can't see individual dots in most of the big blob

Rob Lewis (Sep 19 2020 at 15:24):

Is it category theory?

Patrick Massot (Sep 19 2020 at 15:24):

Hopefully this is about to change.

Mario Carneiro (Sep 19 2020 at 15:25):

that was my next guess as well. It's a little sad that it's so visibly separate

Patrick Massot (Sep 19 2020 at 15:25):

Algebraic geometry starts to use category theory if I understand correctly.

Rob Lewis (Sep 19 2020 at 15:26):

I would have expected meta land to be more distinct than category island.

Patrick Massot (Sep 19 2020 at 15:30):

Trying to remove meta gives:
image.png

Johan Commelin (Sep 19 2020 at 15:31):

What does the graph for abelian_category look like?

Patrick Massot (Sep 19 2020 at 15:35):

image.png

Patrick Massot (Sep 19 2020 at 15:35):

The right-hand blob is fintype and its friends.

Johan Commelin (Sep 19 2020 at 15:42):

That's a reasonably small graph. I was hoping that add_group and category would be distinct blobs.

Johan Commelin (Sep 19 2020 at 15:43):

It's funny that fintype is showing up (-; The informal definition only mentions binary products (-; But of course we work with arbitrary finite ones (-;

Johan Commelin (Sep 19 2020 at 15:43):

I don't know if this graph is "interesting" for Quanta

Kevin Buzzard (Sep 19 2020 at 16:38):

@Mario Carneiro yes, the definition of schemes uses categories, and if you look at the way that Ramon did it (presheaves of types, sheaves of types, presheaves of rings, sheaves of rings) and then the way we continued in the perfectoid project (presheaves of topological rings, sheaves of topological rings) you could see it was not going to scale. We want presheaves and sheaves of abelian groups, R-algebras, R-modules, \mathcal{R}-modules where \mathcal{R} is a sheaf of rings etc.

Kevin Buzzard (Sep 19 2020 at 16:39):

Scott's approach gives us presheaves and sheaves taking values in an arbitrary category, and this looks like an appealing approach to me.

Mario Carneiro (Sep 19 2020 at 16:39):

That all sounds good, and in particular it is an important source of applications that were sorely lacking back at the start of the category theory library

Mario Carneiro (Sep 19 2020 at 16:40):

I think we should focus more on making category theory applicable to other areas by direct generalization of useful theorems in other areas

Kevin Buzzard (Sep 19 2020 at 16:40):

Re: applications -- This is exactly why I'm flagging it. The acid test will be whether we can glue sheaves and schemes together, because now we're taking quotients and who knows if stuff will get hairy.

Kevin Buzzard (Sep 19 2020 at 16:41):

"Lean has schemes" -> "mathlib has schemes" took a really long time, and basically this is because I got it wrong twice and learnt a lot from my mistakes -- but we learnt that category theory had to be ready, and for a long time it wasn't.

Kevin Buzzard (Sep 19 2020 at 16:42):

I'm really hoping that Amelia will be able to use it [category theory] to start doing some serious cohomology theory like Ext and Tor in her forthcoming MSc.

Kevin Buzzard (Sep 19 2020 at 16:43):

So I'm seeing that area of the graph as hopefully changing soon, it should be interesting. It might just all grind to a halt or it might work great.

Kevin Buzzard (Sep 19 2020 at 16:51):

My student Shenyang Wu defined group cohomology but did not prove the long exact sequence of cohomology associated to a short exact sequence of abelian groups because he had to submit his thesis before he was able to learn how to use the new machinery. All the mathematical theorems were there though, as far as I could see; we just ran out of time before we got the machine going. We need the long exact sequence of cohomology associated to a short exact sequence of abelian groups/Z-modules/objects in some abelian category. I think Shenyang must have written the API for this, so perhaps this is something we can hand over to the category people.

Patrick Massot (Sep 19 2020 at 16:52):

Johan Commelin said:

That's a reasonably small graph. I was hoping that add_group and category would be distinct blobs.

They are. add_group is a the bottom-left

Reid Barton (Sep 19 2020 at 16:53):

Can you make a graph for Scheme?

Johan Commelin (Sep 19 2020 at 16:53):

Aha, now I notice that there are two blobs close together

Patrick Massot (Sep 19 2020 at 17:00):

Schemes for Reid
image.png

Patrick Massot (Sep 19 2020 at 17:00):

colored by cluster

Patrick Massot (Sep 19 2020 at 17:02):

Blue is category theory, orange is order and some topology, green seems to be more category theory, grey is commutative algebra

Heather Macbeth (Sep 19 2020 at 17:04):

Why order? Is that because of filters?

Patrick Massot (Sep 19 2020 at 17:05):

We use order and lattices everywhere in mathlib.

Patrick Massot (Sep 19 2020 at 17:05):

screenshot_190450.png

Patrick Massot (Sep 19 2020 at 17:05):

This is a close-up view

Heather Macbeth (Sep 19 2020 at 17:06):

I guess the other relevant orders here might be on topologies and on subrings.

Patrick Massot (Sep 19 2020 at 17:07):

And on open sets...

Johan Commelin (Sep 19 2020 at 17:10):

@Patrick Massot This looks really sweet! Thanks so much for doing this

Patrick Massot (Sep 19 2020 at 17:10):

We still need a more efficient way of pruning foundations.

Johan Commelin (Sep 19 2020 at 17:10):

Another idea for Quanta: The normed_ring instance on Z_p?

Heather Macbeth (Sep 19 2020 at 17:10):

Maybe a few of these could go on the website?

Heather Macbeth (Sep 19 2020 at 17:11):

They are so pretty, and they capture the imagination.

Patrick Massot (Sep 19 2020 at 17:11):

Looking at the scheme graph I still see stuff like quotient. This idea is important and indeed unifies a lot of things, but it makes it impossible to have nice clusters

Heather Macbeth (Sep 19 2020 at 17:15):

I would love to see Lie group (sometime, whenever you have time).

Patrick Massot (Sep 19 2020 at 17:30):

lie_group.png

Johan Commelin (Sep 19 2020 at 17:31):

Can someone tell the difference between a Lie group and a perfectoid space?

Patrick Massot (Sep 19 2020 at 17:32):

The weird thing in this case is that clusters don't seem to mean much. The graph is simply confusing for Gephi.

Patrick Massot (Sep 19 2020 at 18:02):

I tried a more aggressive pruning strategy and the graph looks completely different.
lie_group2.png
This is all very unscientific

Patrick Massot (Sep 19 2020 at 18:03):

But I post it anyway because it's kind of pretty

Patrick Massot (Sep 19 2020 at 18:03):

and indeed removing more foundational stuff bring more structure

Heather Macbeth (Sep 19 2020 at 21:46):

Does Gephi know that these are directed (rather than undirected) graphs? Are there clustering algorithms that take this into account?

Patrick Massot (Sep 20 2020 at 08:39):

It knows they are directed. I don't know about the other questions. I actually don't know anything about graph analysis, I'm just clicking random buttons.

Kevin Buzzard (Sep 20 2020 at 09:39):

Are you training to be an ML researcher?


Last updated: Dec 20 2023 at 11:08 UTC