Documentation

Init.Data.String.Defs

Preliminary developments for strings #

This file contains the material about strings which we can write down without the results in Init.Data.String.Decode, i.e., without knowing about the bijection between String and List Char given by UTF-8 decoding and encoding.

Note that this file, despite being called Defs, contains quite a few lemmas.

@[inline]

Decodes an array of bytes that encode a string as UTF-8 into the corresponding string.

Equations
Instances For
    @[extern lean_string_to_utf8]

    Encodes a string in UTF-8 as an array of bytes.

    Equations
    Instances For
      @[simp]
      @[extern lean_string_append]

      Appends two strings. Usually accessed via the ++ operator.

      The internal implementation will perform destructive updates if the string is not shared.

      Examples:

      • "abc".append "def" = "abcdef"
      • "abc" ++ "def" = "abcdef"
      • "" ++ "" = ""
      Equations
      Instances For
        Equations
        @[simp]
        theorem String.bytes_append {s t : String} :
        (s ++ t).bytes = s.bytes ++ t.bytes
        theorem String.bytes_inj {s t : String} :
        s.bytes = t.bytes s = t
        @[deprecated String.bytes_ofList (since := "2025-10-30")]
        @[deprecated String.exists_eq_ofList (since := "2025-10-30")]
        @[simp]
        theorem String.bytes_push {s : String} {c : Char} :
        @[deprecated String.rawEndPos (since := "2025-10-20")]
        Equations
        Instances For

          The start position of the string, as a String.Pos.Raw.

          Equations
          Instances For
            @[simp]
            @[deprecated String.byteIdx_rawEndPos (since := "2025-10-20")]
            @[simp]
            theorem String.utf8ByteSize_ofByteArray {b : ByteArray} {h : b.IsValidUTF8} :
            { bytes := b, isValidUTF8 := h }.utf8ByteSize = b.size
            @[deprecated String.singleton_eq_ofList (since := "2025-10-30")]
            @[simp]
            theorem String.append_singleton {s : String} {c : Char} :
            s ++ singleton c = s.push c
            @[simp]
            theorem String.append_left_inj {s₁ s₂ : String} (t : String) :
            s₁ ++ t = s₂ ++ t s₁ = s₂
            theorem String.append_assoc {s₁ s₂ s₃ : String} :
            s₁ ++ s₂ ++ s₃ = s₁ ++ (s₂ ++ s₃)
            @[deprecated String.rawEndPos_eq_zero_iff (since := "2025-10-20")]
            @[inline]
            def String.pushn (s : String) (c : Char) (n : Nat) :

            Adds multiple repetitions of a character to the end of a string.

            Returns s, with n repetitions of c at the end. Internally, the implementation repeatedly calls String.push, so the string is modified in-place if there is a unique reference to it.

            Examples:

            • "indeed".pushn '!' 2 = "indeed!!"
            • "indeed".pushn '!' 0 = "indeed"
            • "".pushn ' ' 4 = " "
            Equations
            Instances For
              theorem String.pushn_eq_repeat_push {s : String} {c : Char} {n : Nat} :
              s.pushn c n = Nat.repeat (fun (s : String) => s.push c) n s
              @[export lean_string_pushn]
              Equations
              Instances For
                @[inline]

                Checks whether a string is empty.

                Empty strings are equal to "" and have length and end position 0.

                Examples:

                Equations
                Instances For
                  @[export lean_string_isempty]
                  Equations
                  Instances For
                    @[inline]

                    Appends all the strings in a list of strings, in order.

                    Use String.intercalate to place a separator string between the strings in a list.

                    Examples:

                    Equations
                    Instances For

                      Appends the strings in a list of strings, placing the separator s between each pair.

                      Examples:

                      • ", ".intercalate ["red", "green", "blue"] = "red, green, blue"
                      • " and ".intercalate ["tea", "coffee"] = "tea and coffee"
                      • " | ".intercalate ["M", "", "N"] = "M | | N"
                      Equations
                      Instances For
                        @[export lean_string_intercalate]
                        Equations
                        Instances For
                          structure String.Pos.Raw.IsValid (s : String) (off : Raw) :

                          Predicate for validity of positions inside a String.

                          There are multiple equivalent definitions for validity.

                          We say that a position is valid if the string obtained by taking all of the bytes up to, but excluding, the given position, is valid UTF-8; see Pos.isValid_iff_isValidUTF8_extract_zero.

                          Similarly, a position is valid if the string obtained by taking all of the bytes starting at the given position is valid UTF-8; see Pos.isValid_iff_isValidUTF8_extract_utf8ByteSize.

                          An equivalent condition is that the position is the length of the UTF-8 encoding of some prefix of the characters of the string; see Pos.isValid_iff_exists_append and Pos.isValid_iff_exists_take_data.

                          Another equivalent condition that can be checked efficiently is that the position is either the end position or strictly smaller than the end position and the byte at the position satisfies UInt8.IsUTF8FirstByte; see Pos.isValid_iff_isUTF8FirstByte.

                          Examples:

                          • String.Pos.IsValid "abc" ⟨0⟩
                          • String.Pos.IsValid "abc" ⟨1⟩
                          • String.Pos.IsValid "abc" ⟨3⟩
                          • ¬ String.Pos.IsValid "abc" ⟨4⟩
                          • String.Pos.IsValid "𝒫(A)" ⟨0⟩
                          • ¬ String.Pos.IsValid "𝒫(A)" ⟨1⟩
                          • ¬ String.Pos.IsValid "𝒫(A)" ⟨2⟩
                          • ¬ String.Pos.IsValid "𝒫(A)" ⟨3⟩
                          • String.Pos.IsValid "𝒫(A)" ⟨4⟩
                          Instances For
                            @[deprecated String.Pos.Raw.IsValid.le_rawEndPos (since := "2025-10-20")]
                            theorem String.Pos.Raw.IsValid.le_endPos {s : String} {off : Raw} (h : IsValid s off) :
                            @[simp]
                            structure String.ValidPos (s : String) :

                            A ValidPos s is a byte offset in s together with a proof that this position is at a UTF-8 character boundary.

                            Instances For
                              theorem String.ValidPos.ext {s : String} {x y : s.ValidPos} (offset : x.offset = y.offset) :
                              x = y
                              theorem String.ValidPos.ext_iff {s : String} {x y : s.ValidPos} :
                              x = y x.offset = y.offset
                              def String.instDecidableEqValidPos.decEq {s✝ : String} (x✝ x✝¹ : s✝.ValidPos) :
                              Decidable (x✝ = x✝¹)
                              Equations
                              • One or more equations did not get rendered due to their size.
                              Instances For
                                @[inline]

                                The start position of s, as an s.ValidPos.

                                Equations
                                Instances For
                                  @[inline]

                                  The past-the-end position of s, as an s.ValidPos.

                                  Equations
                                  Instances For
                                    Equations
                                    Equations
                                    theorem String.ValidPos.lt_iff {s : String} {l r : s.ValidPos} :
                                    l < r l.offset < r.offset
                                    structure String.Slice :

                                    A region or slice of some underlying string.

                                    A slice consists of a string together with the start and end byte positions of a region of interest. Actually extracting a substring requires copying and memory allocation, while many slices of the same underlying string may exist with very little overhead. While this could be achieved by tracking the bounds by hand, the slice API is much more convenient.

                                    String.Slice bundles proofs to ensure that the start and end positions always delineate a valid string. For this reason, it should be preferred over Substring.Raw.

                                    • str : String

                                      The underlying strings.

                                    • startInclusive : self.str.ValidPos

                                      The byte position of the start of the string slice.

                                    • endExclusive : self.str.ValidPos

                                      The byte position of the end of the string slice.

                                    • startInclusive_le_endExclusive : self.startInclusive self.endExclusive

                                      The slice is not degenerate (but it may be empty).

                                    Instances For
                                      Equations
                                      • One or more equations did not get rendered due to their size.
                                      @[inline]

                                      Returns a slice that contains the entire string.

                                      Equations
                                      Instances For
                                        @[simp]

                                        The number of bytes of the UTF-8 encoding of the string slice.

                                        Equations
                                        Instances For

                                          The end position of a slice, as a Pos.Raw.

                                          Equations
                                          Instances For

                                            Criterion for validity of positions in string slices.

                                            Instances For
                                              @[inline]

                                              Accesses the indicated byte in the UTF-8 encoding of a string slice.

                                              At runtime, this function is implemented by efficient, constant-time code.

                                              Equations
                                              Instances For

                                                Accesses the indicated byte in the UTF-8 encoding of the string slice, or panics if the position is out-of-bounds.

                                                Equations
                                                • One or more equations did not get rendered due to their size.
                                                Instances For
                                                  structure String.Slice.Pos (s : Slice) :

                                                  A Slice.Pos s is a byte offset in s together with a proof that this position is at a UTF-8 character boundary.

                                                  Instances For
                                                    theorem String.Slice.Pos.ext {s : Slice} {x y : s.Pos} (offset : x.offset = y.offset) :
                                                    x = y
                                                    theorem String.Slice.Pos.ext_iff {s : Slice} {x y : s.Pos} :
                                                    x = y x.offset = y.offset
                                                    def String.Slice.instDecidableEqPos.decEq {s✝ : Slice} (x✝ x✝¹ : s✝.Pos) :
                                                    Decidable (x✝ = x✝¹)
                                                    Equations
                                                    • One or more equations did not get rendered due to their size.
                                                    Instances For
                                                      @[inline]

                                                      The start position of s, as an s.Pos.

                                                      Equations
                                                      Instances For
                                                        @[inline]

                                                        The past-the-end position of s, as an s.Pos.

                                                        Equations
                                                        Instances For
                                                          instance String.instLEPos {s : Slice} :
                                                          Equations
                                                          instance String.instLTPos {s : Slice} :
                                                          Equations
                                                          theorem String.Slice.Pos.le_iff {s : Slice} {l r : s.Pos} :
                                                          theorem String.Slice.Pos.lt_iff {s : Slice} {l r : s.Pos} :
                                                          l < r l.offset < r.offset
                                                          @[reducible, inline]

                                                          pos.IsAtEnd is just shorthand for pos = s.endValidPos that is easier to write if s is long.

                                                          Equations
                                                          Instances For
                                                            @[simp]
                                                            @[reducible, inline]
                                                            abbrev String.Slice.Pos.IsAtEnd {s : Slice} (pos : s.Pos) :

                                                            pos.IsAtEnd is just shorthand for pos = s.endPos that is easier to write if s is long.

                                                            Equations
                                                            Instances For
                                                              @[simp]
                                                              theorem String.Slice.Pos.isAtEnd_iff {s : Slice} {pos : s.Pos} :
                                                              pos.IsAtEnd pos = s.endPos
                                                              @[inline]
                                                              def String.Slice.Pos.byte {s : Slice} (pos : s.Pos) (h : pos s.endPos) :

                                                              Returns the byte at a position in a slice that is not the end position.

                                                              Equations
                                                              Instances For
                                                                @[simp]