Zulip Chat Archive

Stream: maths

Topic: partial derivatives


view this post on Zulip Kenny Lau (Jul 12 2019 at 14:42):

namespace mv_polynomial

variables {σ : Type u} [decidable_eq σ]
variables {α : Type v} [comm_semiring α] [decidable_eq α]
variables (i j : σ) (p q : mv_polynomial σ α) (r : α)

def partial_deriv : mv_polynomial σ α :=
-- p.sum $ λ v r, C r * (add_monoid.smul (v i) (X i ^ (v i - 1)) * v.prod (λ j n, if i = j then 1 else (mv_polynomial.X j)^n))
p.sum $ λ v r, finsupp.single (v.sum $ λ j n, finsupp.single j (if i = j then n-1 else n)) (add_monoid.smul (v i) r)

-- #exit
theorem partial_deriv_add : (p + q).partial_deriv i = p.partial_deriv i + q.partial_deriv i :=
finsupp.sum_add_index (λ v, by rw [add_monoid.smul_zero, finsupp.single_zero]; refl) $
λ v a1 a2, by rw [add_monoid.smul_add, finsupp.single_add]

theorem partial_deriv_zero : (0 : mv_polynomial σ α).partial_deriv i = 0 :=
finsupp.sum_zero_index

theorem partial_deriv_C : (C r).partial_deriv i = 0 :=
(finsupp.sum_single_index $ by rw [add_monoid.smul_zero, finsupp.single_zero]; refl).trans $
by rw [finsupp.zero_apply, add_monoid.zero_smul, finsupp.single_zero]; refl

theorem partial_deriv_X : (X i : mv_polynomial σ α).partial_deriv j = if i = j then 1 else 0 :=
begin
  rw [partial_deriv, X, monomial, finsupp.sum_single_index, finsupp.sum_single_index, nat.sub_self, finsupp.single_apply],
  { split_ifs with hji hij hij,
    { rw [add_monoid.one_smul, finsupp.single_zero], refl },
    { exact absurd hji.symm hij },
    { exact absurd hij.symm hji },
    { rw [add_monoid.zero_smul, finsupp.single_zero], refl } },
  { split_ifs with hji,
    { rw [nat.zero_sub, finsupp.single_zero] },
    { rw finsupp.single_zero } },
  { rw [add_monoid.smul_zero, finsupp.single_zero], refl }
end

theorem partial_deriv_one : (1 : mv_polynomial σ α).partial_deriv i = 0 :=
by rw [ C_1, partial_deriv_C]

instance : semimodule α (mv_polynomial σ α) := finsupp.to_semimodule _ _

theorem C_mul'' : C r * p = r  p :=
begin
  refine finsupp.induction p _ _,
  { exact (mul_zero _).trans (smul_zero _).symm },
  { intros v s f hfv hs ih,
    rw [mul_add, smul_add, ih],
    rw [C, monomial, finsupp.mul_def, finsupp.sum_single_index, finsupp.sum_single_index, zero_add],
    rw [finsupp.smul_single, smul_eq_mul],
    { rw [mul_zero, finsupp.single_zero] },
    { rw [finsupp.sum_single_index]; rw [zero_mul, finsupp.single_zero] } }
end

theorem partial_deriv_smul : (r  p).partial_deriv i = r  p.partial_deriv i :=
begin
  rw [partial_deriv, finsupp.sum_smul_index', partial_deriv,  C_mul'', finsupp.mul_sum], refine finset.sum_congr rfl _,
  { intros v hv, dsimp only, rw [add_monoid.smul_eq_mul, add_monoid.smul_eq_mul, mul_left_comm,  smul_eq_mul,  finsupp.smul_single, C_mul''] },
  { intros v, rw [add_monoid.smul_zero, finsupp.single_zero], refl }
end

theorem partial_deriv_mul : (p * q).partial_deriv i = p * q.partial_deriv i + q * p.partial_deriv i :=
begin
  have :  v w : σ  , w i  0  finsupp.sum (v + w) (λ (j : σ) (n : ), finsupp.single j (ite (i = j) (n - 1) n)) =
    v + finsupp.sum w (λ (j : σ) (n : ), finsupp.single j (ite (i = j) (n - 1) n)),
  { intros v w hw, ext j,
    rw [finsupp.sum_apply, finsupp.add_apply, finsupp.sum_apply, finsupp.sum, finset.sum_eq_single j, finsupp.sum, finset.sum_eq_single j],
    { rw [finsupp.single_apply, finsupp.single_apply, if_pos rfl, if_pos rfl], split_ifs with hij,
      { subst hij, rw [finsupp.add_apply, nat.add_sub_assoc], exact nat.one_le_of_lt (nat.pos_of_ne_zero hw) },
      { refl } },
    { intros k hk hkj, rw [finsupp.single_apply, if_neg hkj] },
    { intros hjw, rw [finsupp.single_apply, if_pos rfl], split_ifs; rw [finsupp.not_mem_support_iff.1 hjw] },
    { intros k hk hkj, rw [finsupp.single_apply, if_neg hkj] },
    { intros hjw, rw [finsupp.single_apply, if_pos rfl], split_ifs; rw [finsupp.not_mem_support_iff.1 hjw] } },
  refine finsupp.induction p _ (λ v r p hpv hr ihp, _),
  { erw zero_mul, rw zero_add, erw partial_deriv_zero, rw mul_zero },
  rw [add_mul, partial_deriv_add], refine eq.trans (congr_arg _ ihp) _,
  rw [add_mul, partial_deriv_add, mul_add, add_comm₄], congr' 1,
  refine finsupp.induction q _ (λ w s q hqw hs ihq, _),
  { erw mul_zero, rw zero_add, erw zero_mul, rw partial_deriv_zero },
  rw [mul_add, partial_deriv_add], refine eq.trans (congr_arg _ ihq) _,
  rw [add_mul, partial_deriv_add, mul_add, add_comm₄], congr' 1,
  rw [finsupp.single_mul_single, finsupp.single_eq_smul_one v r, finsupp.single_eq_smul_one w s, finsupp.single_eq_smul_one (v+w) (r*s)],
  rw [partial_deriv_smul, partial_deriv_smul, partial_deriv_smul,  C_mul'',  C_mul'',  C_mul'',  C_mul'',  C_mul''],
  rw [mul_comm₄,  C_mul, mul_comm₄,  C_mul, mul_comm s r,  mul_add], congr' 1,
  rw [partial_deriv, finsupp.sum_single_index], swap, { rw [add_monoid.smul_zero, finsupp.single_zero], refl },
  rw [partial_deriv, finsupp.sum_single_index], swap, { rw [add_monoid.smul_zero, finsupp.single_zero], refl },
  rw [partial_deriv, finsupp.sum_single_index], swap, { rw [add_monoid.smul_zero, finsupp.single_zero], refl },
  rw [finsupp.add_apply, finsupp.single_mul_single, finsupp.single_mul_single, one_mul, one_mul],
  by_cases hv : v i = 0,
  { rw [hv, zero_add, add_monoid.zero_smul, finsupp.single_zero], erw add_zero,
    by_cases hw : w i = 0,
    { rw [hw, add_monoid.zero_smul, finsupp.single_zero, finsupp.single_zero] },
    { rw [this v w hw] } },
  by_cases hw : w i = 0,
  { rw [hw, add_monoid.zero_smul, finsupp.single_zero, add_zero], erw zero_add, rw [add_comm v w, this w v hv] },
  rw [add_monoid.add_smul, finsupp.single_add, add_comm], congr' 1,
  { rw this v w hw }, { rw [add_comm v w, this w v hv] }
end

theorem vars_add : (p + q).vars  p.vars  q.vars :=
λ i, by unfold vars; simp only [finset.mem_union, multiset.mem_to_finset]; exact
λ hi, multiset.mem_add.1 (multiset.subset_of_le
  (le_trans (degrees_add p q) $ lattice.sup_le (multiset.le_add_right _ _) (multiset.le_add_left _ _)) hi)

theorem vars_mul : (p * q).vars  p.vars  q.vars :=
λ i, by unfold vars; simp only [finset.mem_union, multiset.mem_to_finset]; exact
λ hi, multiset.mem_add.1 (multiset.subset_of_le (degrees_mul p q) hi)

theorem vars_X_subset : vars (X i : mv_polynomial σ α)  {i} :=
λ j hj, multiset.subset_of_le (degrees_X i) $ multiset.mem_to_finset.1 hj

theorem of_not_mem_vars (h : i  p.vars) (v : σ  ) (hv : v  p.support) : v i = 0 :=
finsupp.not_mem_support_iff.1 $ λ hiv, h $
have h1 : v.to_multiset  p.degrees := finset.le_sup hv,
have h2 : _  v.to_multiset := finset.single_le_sum (λ _ _, multiset.zero_le _) hiv,
multiset.mem_to_finset.2 $ multiset.subset_of_le h1 $ multiset.subset_of_le h2 $
by rw  nat.succ_pred_eq_of_pos (nat.pos_of_ne_zero $ finsupp.mem_support_iff.1 hiv); exact
multiset.mem_cons_self _ _

theorem partial_deriv_of_not_mem_vars (h : i  p.vars) : p.partial_deriv i = 0 :=
finset.sum_eq_zero $ λ v hv, by dsimp only; rw [of_not_mem_vars i p h v hv, add_monoid.zero_smul, finsupp.single_zero]; refl

end mv_polynomial

view this post on Zulip Kenny Lau (Jul 12 2019 at 14:43):

PR-ready?

view this post on Zulip Johan Commelin (Jul 12 2019 at 15:03):

Looks pretty good to me. Maybe C_mul'' can be called C_mul_eq_smul?

view this post on Zulip Yury G. Kudryashov (Oct 04 2020 at 04:17):

I've started thinking about a proper way to introduce partial derivatives into mathlib.

view this post on Zulip Yury G. Kudryashov (Oct 04 2020 at 04:24):

It's clear how to define pderiv for a function f : (fin n → k) → E

view this post on Zulip Yury G. Kudryashov (Oct 04 2020 at 04:25):

But this definition won't work, e.g., for f : k × k → E or f : k → E, and this doesn't sound good for a mathematician.

view this post on Zulip Yury G. Kudryashov (Oct 04 2020 at 04:29):

I suggest introducing something like

class has_canonical_basis (k : Type*) [normed_field k] (E : Type*) [normed_group E] [normed_space k E] :=
(σ : Type)
[hfin : fintype σ]
(iso : (σ  k) L[k] E)

view this post on Zulip Yury G. Kudryashov (Oct 04 2020 at 04:30):

With instances for σ → k, euclidean_space, E × F (using σ = σE ⊕ σF), k (using σ = unit).

view this post on Zulip Yury G. Kudryashov (Oct 04 2020 at 04:31):

@Patrick Massot @Sebastien Gouezel What do you think?

view this post on Zulip Johan Commelin (Oct 04 2020 at 05:41):

We have the same problem in the omin repository. We introduced a has_coordinates typeclass.

view this post on Zulip Johan Commelin (Oct 04 2020 at 05:41):

@Reid Barton Do you think it is ready for PR?

view this post on Zulip Johan Commelin (Oct 04 2020 at 05:42):

(Of course it doesn't assume any normed* stuff. So we'll have to think whether it is good enough, or maybe you need some has_normed_coordinates extending it.

view this post on Zulip Yury G. Kudryashov (Oct 04 2020 at 05:45):

Actually, we can just upgrade from linear_equiv to continuous_linear_equiv using finiteness of σ.

view this post on Zulip Yury G. Kudryashov (Oct 04 2020 at 05:46):

Could you please post a link to your has_coordinates code?

view this post on Zulip Johan Commelin (Oct 04 2020 at 05:48):

https://github.com/rwbarton/lean-omin/blob/master/src/o_minimal/coordinates.lean

view this post on Zulip Johan Commelin (Oct 04 2020 at 05:50):

There is also Patrick's has_uncurry class in mathlib, which might be the right solution for your use case

view this post on Zulip Yury G. Kudryashov (Oct 04 2020 at 05:56):

No, it will uncurry f : k × k → E to k → k → E, not to (fin 2 → k) → E.

view this post on Zulip Yury G. Kudryashov (Oct 04 2020 at 05:57):

Or (unit ⊕ unit → k) → E

view this post on Zulip Yury G. Kudryashov (Oct 04 2020 at 05:59):

I would prefer to have any (σ : Type*) [fintype σ] as an index type, not necessarily fin n.

view this post on Zulip Yury G. Kudryashov (Oct 04 2020 at 06:00):

Because for E × F it's easier to deal with sum.inl k than some fin magic.

view this post on Zulip Yury G. Kudryashov (Oct 04 2020 at 06:01):

And I need coordinates to be linear. So, probably I'll add another typeclass.

view this post on Zulip Sebastien Gouezel (Oct 04 2020 at 07:31):

There is still the issue that, on EFE \oplus F, some people call partial derivative the derivative with respect to EE (so, it's not a number, it's really a matrix). But it is impossible to get a notion that covers all these variants, so I think your idea is a very nice one to cover one-dimensional partial derivatives.

view this post on Zulip Yury G. Kudryashov (Oct 04 2020 at 07:37):

We can mention that ∂f/∂x : E →L[k] G for f : E × F → G can be written as fderiv (λ x, f (x, y)).

view this post on Zulip Sebastien Gouezel (Oct 04 2020 at 07:37):

Yes, sure!

view this post on Zulip Reid Barton (Oct 04 2020 at 15:06):

In general, the problem is how to replace a given fin.dim. R-vector space with some fixed model R^ι (ι a finite type) or R^n in a conventional way. For some purposes the order of coordinates is irrelevant, and then R^ι is enough, but for other purposes the order really matters and you need R^n. For example, consider row reduction of the matrices that represent linear maps, or classifying subspaces by how they meet the standard flag inside R^n. Likewise in model theory, quantifier elimination works by eliminating one variable at a time, say, the "last" one, and for this it's again better to work with R^(n+1) than with fintypes. (Actually row reduction of matrices is basically a special case of this, I think.)

view this post on Zulip Reid Barton (Oct 04 2020 at 15:10):

So, unless we think it makes sense to have both concepts, I think the R^n version is better.

view this post on Zulip Reid Barton (Oct 04 2020 at 15:13):

Yury G. Kudryashov said:

Because for E × F it's easier to deal with sum.inl k than some fin magic.

With some version of the "reindexing" concept you can hide the details of exactly how the coordinates of (x, y) : E × F are related to those of x : E and y : F.

view this post on Zulip Reid Barton (Oct 04 2020 at 15:24):

The main advantage of a fintype version would be that you can have an instance for ι → R itself whose coordinates are given by the identity and so, in particular, the coordinates for (ι ⊕ ι') → R are related to those for ι → R and ι' → R specifically by composing with sum.inl and sum.inr. In lean-omin this has never come up because we have actually never had a use for specifically ι → R with [fintype ι]. But, you could have an instance which uses choice to pick an ordering on ι and uses it to define coordinates. In that case, there would be no specific relationship between the coordinates for (ι ⊕ ι') → R and ι → R and ι' → R. I claim this doesn't really matter because you can describe the coordinate change as a reindexing, anyways. However, probably a sensible thing to do would be to only have a global instance when ι is an instance of docs#fin_enum and then ensure that ι ⊕ ι' has a sensible fin_enum instance.

view this post on Zulip Yury G. Kudryashov (Oct 04 2020 at 15:24):

Is R^ι with a decidable_linear_order on ι any worse than fin n?

view this post on Zulip Reid Barton (Oct 04 2020 at 15:25):

Another consideration about the fintype version is that either you introduce a second universe variable, or in the ι → R instance you only handle ι in Type 0.

view this post on Zulip Reid Barton (Oct 04 2020 at 15:26):

It is worse because fin n has a bunch of API that is not conveniently available on an arbitrary finite decidable linear order, as well as somewhat better definitional behavior.

view this post on Zulip Reid Barton (Oct 04 2020 at 15:28):

(Also, conceptually the point of this whole construction is to reduce some problem/construction from general fin.dim. vector spaces to specific model spaces, so why stop at R^ι when you could go all the way to R^n?)

view this post on Zulip Reid Barton (Oct 04 2020 at 15:34):

Once you're working in this setting, the general objects are like (V : Type*) [has_coordinates R V] and the specific objects are like fin n -> R, so there's no longer any special significance to ι → R with [fintype ι].

view this post on Zulip Reid Barton (Oct 04 2020 at 15:35):

It only comes up if you have a [fintype ι] that comes from outside, so to speak. And then perhaps it would be better to have [fin_enum ι] which is basically the non-linear analogue of this setup.

view this post on Zulip Yury G. Kudryashov (Oct 04 2020 at 15:38):

OK, I'll try fin n. Thank you for the explanation.

view this post on Zulip Reid Barton (Oct 04 2020 at 15:53):

I should probably also add that in lean-omin this reindexing stuff is only ever used to prove Props, but you would probably also want to for example say what the partial derivatives of prod.fst : E \x F -> E are, so that might affect the design.

view this post on Zulip Yury G. Kudryashov (Oct 04 2020 at 15:56):

I'll just declare an instance on E × F using docs#equiv.sum_fin_sum_equiv.


Last updated: May 12 2021 at 06:14 UTC