## Stream: maths

### Topic: partial derivatives

#### Kenny Lau (Jul 12 2019 at 14:42):

namespace mv_polynomial

variables {σ : Type u} [decidable_eq σ]
variables {α : Type v} [comm_semiring α] [decidable_eq α]
variables (i j : σ) (p q : mv_polynomial σ α) (r : α)

def partial_deriv : mv_polynomial σ α :=
-- p.sum $λ v r, C r * (add_monoid.smul (v i) (X i ^ (v i - 1)) * v.prod (λ j n, if i = j then 1 else (mv_polynomial.X j)^n)) p.sum$ λ v r, finsupp.single (v.sum $λ j n, finsupp.single j (if i = j then n-1 else n)) (add_monoid.smul (v i) r) -- #exit theorem partial_deriv_add : (p + q).partial_deriv i = p.partial_deriv i + q.partial_deriv i := finsupp.sum_add_index (λ v, by rw [add_monoid.smul_zero, finsupp.single_zero]; refl)$

theorem partial_deriv_zero : (0 : mv_polynomial σ α).partial_deriv i = 0 :=
finsupp.sum_zero_index

theorem partial_deriv_C : (C r).partial_deriv i = 0 :=
(finsupp.sum_single_index $by rw [add_monoid.smul_zero, finsupp.single_zero]; refl).trans$
by rw [finsupp.zero_apply, add_monoid.zero_smul, finsupp.single_zero]; refl

theorem partial_deriv_X : (X i : mv_polynomial σ α).partial_deriv j = if i = j then 1 else 0 :=
begin
rw [partial_deriv, X, monomial, finsupp.sum_single_index, finsupp.sum_single_index, nat.sub_self, finsupp.single_apply],
{ split_ifs with hji hij hij,
{ rw [add_monoid.one_smul, finsupp.single_zero], refl },
{ exact absurd hji.symm hij },
{ exact absurd hij.symm hji },
{ rw [add_monoid.zero_smul, finsupp.single_zero], refl } },
{ split_ifs with hji,
{ rw [nat.zero_sub, finsupp.single_zero] },
{ rw finsupp.single_zero } },
{ rw [add_monoid.smul_zero, finsupp.single_zero], refl }
end

theorem partial_deriv_one : (1 : mv_polynomial σ α).partial_deriv i = 0 :=
by rw [← C_1, partial_deriv_C]

instance : semimodule α (mv_polynomial σ α) := finsupp.to_semimodule _ _

theorem C_mul'' : C r * p = r • p :=
begin
refine finsupp.induction p _ _,
{ exact (mul_zero _).trans (smul_zero _).symm },
{ intros v s f hfv hs ih,
rw [C, monomial, finsupp.mul_def, finsupp.sum_single_index, finsupp.sum_single_index, zero_add],
rw [finsupp.smul_single, smul_eq_mul],
{ rw [mul_zero, finsupp.single_zero] },
{ rw [finsupp.sum_single_index]; rw [zero_mul, finsupp.single_zero] } }
end

theorem partial_deriv_smul : (r • p).partial_deriv i = r • p.partial_deriv i :=
begin
rw [partial_deriv, finsupp.sum_smul_index', partial_deriv, ← C_mul'', finsupp.mul_sum], refine finset.sum_congr rfl _,
{ intros v hv, dsimp only, rw [add_monoid.smul_eq_mul, add_monoid.smul_eq_mul, mul_left_comm, ← smul_eq_mul, ← finsupp.smul_single, C_mul''] },
{ intros v, rw [add_monoid.smul_zero, finsupp.single_zero], refl }
end

theorem partial_deriv_mul : (p * q).partial_deriv i = p * q.partial_deriv i + q * p.partial_deriv i :=
begin
have : ∀ v w : σ →₀ ℕ, w i ≠ 0 → finsupp.sum (v + w) (λ (j : σ) (n : ℕ), finsupp.single j (ite (i = j) (n - 1) n)) =
v + finsupp.sum w (λ (j : σ) (n : ℕ), finsupp.single j (ite (i = j) (n - 1) n)),
{ intros v w hw, ext j,
rw [finsupp.sum_apply, finsupp.add_apply, finsupp.sum_apply, finsupp.sum, finset.sum_eq_single j, finsupp.sum, finset.sum_eq_single j],
{ rw [finsupp.single_apply, finsupp.single_apply, if_pos rfl, if_pos rfl], split_ifs with hij,
{ refl } },
{ intros k hk hkj, rw [finsupp.single_apply, if_neg hkj] },
{ intros hjw, rw [finsupp.single_apply, if_pos rfl], split_ifs; rw [finsupp.not_mem_support_iff.1 hjw] },
{ intros k hk hkj, rw [finsupp.single_apply, if_neg hkj] },
{ intros hjw, rw [finsupp.single_apply, if_pos rfl], split_ifs; rw [finsupp.not_mem_support_iff.1 hjw] } },
refine finsupp.induction p _ (λ v r p hpv hr ihp, _),
{ erw zero_mul, rw zero_add, erw partial_deriv_zero, rw mul_zero },
refine finsupp.induction q _ (λ w s q hqw hs ihq, _),
{ erw mul_zero, rw zero_add, erw zero_mul, rw partial_deriv_zero },
rw [finsupp.single_mul_single, finsupp.single_eq_smul_one v r, finsupp.single_eq_smul_one w s, finsupp.single_eq_smul_one (v+w) (r*s)],
rw [partial_deriv_smul, partial_deriv_smul, partial_deriv_smul, ← C_mul'', ← C_mul'', ← C_mul'', ← C_mul'', ← C_mul''],
rw [mul_comm₄, ← C_mul, mul_comm₄, ← C_mul, mul_comm s r, ← mul_add], congr' 1,
rw [partial_deriv, finsupp.sum_single_index], swap, { rw [add_monoid.smul_zero, finsupp.single_zero], refl },
rw [partial_deriv, finsupp.sum_single_index], swap, { rw [add_monoid.smul_zero, finsupp.single_zero], refl },
rw [partial_deriv, finsupp.sum_single_index], swap, { rw [add_monoid.smul_zero, finsupp.single_zero], refl },
rw [finsupp.add_apply, finsupp.single_mul_single, finsupp.single_mul_single, one_mul, one_mul],
by_cases hv : v i = 0,
by_cases hw : w i = 0,
{ rw [hw, add_monoid.zero_smul, finsupp.single_zero, finsupp.single_zero] },
{ rw [this v w hw] } },
by_cases hw : w i = 0,
{ rw this v w hw }, { rw [add_comm v w, this w v hv] }
end

theorem vars_add : (p + q).vars ⊆ p.vars ∪ q.vars :=
λ i, by unfold vars; simp only [finset.mem_union, multiset.mem_to_finset]; exact
(le_trans (degrees_add p q) $lattice.sup_le (multiset.le_add_right _ _) (multiset.le_add_left _ _)) hi) theorem vars_mul : (p * q).vars ⊆ p.vars ∪ q.vars := λ i, by unfold vars; simp only [finset.mem_union, multiset.mem_to_finset]; exact λ hi, multiset.mem_add.1 (multiset.subset_of_le (degrees_mul p q) hi) theorem vars_X_subset : vars (X i : mv_polynomial σ α) ⊆ {i} := λ j hj, multiset.subset_of_le (degrees_X i)$ multiset.mem_to_finset.1 hj

theorem of_not_mem_vars (h : i ∉ p.vars) (v : σ →₀ ℕ) (hv : v ∈ p.support) : v i = 0 :=
finsupp.not_mem_support_iff.1 $λ hiv, h$
have h1 : v.to_multiset ≤ p.degrees := finset.le_sup hv,
have h2 : _ ≤ v.to_multiset := finset.single_le_sum (λ _ _, multiset.zero_le _) hiv,
multiset.mem_to_finset.2 $multiset.subset_of_le h1$ multiset.subset_of_le h2 $by rw ← nat.succ_pred_eq_of_pos (nat.pos_of_ne_zero$ finsupp.mem_support_iff.1 hiv); exact
multiset.mem_cons_self _ _

theorem partial_deriv_of_not_mem_vars (h : i ∉ p.vars) : p.partial_deriv i = 0 :=
finset.sum_eq_zero \$ λ v hv, by dsimp only; rw [of_not_mem_vars i p h v hv, add_monoid.zero_smul, finsupp.single_zero]; refl

end mv_polynomial


#### Johan Commelin (Jul 12 2019 at 15:03):

Looks pretty good to me. Maybe C_mul'' can be called C_mul_eq_smul?

#### Yury G. Kudryashov (Oct 04 2020 at 04:17):

I've started thinking about a proper way to introduce partial derivatives into mathlib.

#### Yury G. Kudryashov (Oct 04 2020 at 04:24):

It's clear how to define pderiv for a function f : (fin n → k) → E

#### Yury G. Kudryashov (Oct 04 2020 at 04:25):

But this definition won't work, e.g., for f : k × k → E or f : k → E, and this doesn't sound good for a mathematician.

#### Yury G. Kudryashov (Oct 04 2020 at 04:29):

I suggest introducing something like

class has_canonical_basis (k : Type*) [normed_field k] (E : Type*) [normed_group E] [normed_space k E] :=
(σ : Type)
[hfin : fintype σ]
(iso : (σ → k) ≃L[k] E)


#### Yury G. Kudryashov (Oct 04 2020 at 04:30):

With instances for σ → k, euclidean_space, E × F (using σ = σE ⊕ σF), k (using σ = unit).

#### Yury G. Kudryashov (Oct 04 2020 at 04:31):

@Patrick Massot @Sebastien Gouezel What do you think?

#### Johan Commelin (Oct 04 2020 at 05:41):

We have the same problem in the omin repository. We introduced a has_coordinates typeclass.

#### Johan Commelin (Oct 04 2020 at 05:41):

@Reid Barton Do you think it is ready for PR?

#### Johan Commelin (Oct 04 2020 at 05:42):

(Of course it doesn't assume any normed* stuff. So we'll have to think whether it is good enough, or maybe you need some has_normed_coordinates extending it.

#### Yury G. Kudryashov (Oct 04 2020 at 05:45):

Actually, we can just upgrade from linear_equiv to continuous_linear_equiv using finiteness of σ.

#### Yury G. Kudryashov (Oct 04 2020 at 05:46):

Could you please post a link to your has_coordinates code?

#### Johan Commelin (Oct 04 2020 at 05:48):

https://github.com/rwbarton/lean-omin/blob/master/src/o_minimal/coordinates.lean

#### Johan Commelin (Oct 04 2020 at 05:50):

There is also Patrick's has_uncurry class in mathlib, which might be the right solution for your use case

#### Yury G. Kudryashov (Oct 04 2020 at 05:56):

No, it will uncurry f : k × k → E to k → k → E, not to (fin 2 → k) → E.

#### Yury G. Kudryashov (Oct 04 2020 at 05:57):

Or (unit ⊕ unit → k) → E

#### Yury G. Kudryashov (Oct 04 2020 at 05:59):

I would prefer to have any (σ : Type*) [fintype σ] as an index type, not necessarily fin n.

#### Yury G. Kudryashov (Oct 04 2020 at 06:00):

Because for E × F it's easier to deal with sum.inl k than some fin magic.

#### Yury G. Kudryashov (Oct 04 2020 at 06:01):

And I need coordinates to be linear. So, probably I'll add another typeclass.

#### Sebastien Gouezel (Oct 04 2020 at 07:31):

There is still the issue that, on $E \oplus F$, some people call partial derivative the derivative with respect to $E$ (so, it's not a number, it's really a matrix). But it is impossible to get a notion that covers all these variants, so I think your idea is a very nice one to cover one-dimensional partial derivatives.

#### Yury G. Kudryashov (Oct 04 2020 at 07:37):

We can mention that ∂f/∂x : E →L[k] G for f : E × F → G can be written as fderiv (λ x, f (x, y)).

Yes, sure!

#### Reid Barton (Oct 04 2020 at 15:06):

In general, the problem is how to replace a given fin.dim. R-vector space with some fixed model R^ι (ι a finite type) or R^n in a conventional way. For some purposes the order of coordinates is irrelevant, and then R^ι is enough, but for other purposes the order really matters and you need R^n. For example, consider row reduction of the matrices that represent linear maps, or classifying subspaces by how they meet the standard flag inside R^n. Likewise in model theory, quantifier elimination works by eliminating one variable at a time, say, the "last" one, and for this it's again better to work with R^(n+1) than with fintypes. (Actually row reduction of matrices is basically a special case of this, I think.)

#### Reid Barton (Oct 04 2020 at 15:10):

So, unless we think it makes sense to have both concepts, I think the R^n version is better.

#### Reid Barton (Oct 04 2020 at 15:13):

Yury G. Kudryashov said:

Because for E × F it's easier to deal with sum.inl k than some fin magic.

With some version of the "reindexing" concept you can hide the details of exactly how the coordinates of (x, y) : E × F are related to those of x : E and y : F.

#### Reid Barton (Oct 04 2020 at 15:24):

The main advantage of a fintype version would be that you can have an instance for ι → R itself whose coordinates are given by the identity and so, in particular, the coordinates for (ι ⊕ ι') → R are related to those for ι → R and ι' → R specifically by composing with sum.inl and sum.inr. In lean-omin this has never come up because we have actually never had a use for specifically ι → R with [fintype ι]. But, you could have an instance which uses choice to pick an ordering on ι and uses it to define coordinates. In that case, there would be no specific relationship between the coordinates for (ι ⊕ ι') → R and ι → R and ι' → R. I claim this doesn't really matter because you can describe the coordinate change as a reindexing, anyways. However, probably a sensible thing to do would be to only have a global instance when ι is an instance of docs#fin_enum and then ensure that ι ⊕ ι' has a sensible fin_enum instance.

#### Yury G. Kudryashov (Oct 04 2020 at 15:24):

Is R^ι with a decidable_linear_order on ι any worse than fin n?

#### Reid Barton (Oct 04 2020 at 15:25):

Another consideration about the fintype version is that either you introduce a second universe variable, or in the ι → R instance you only handle ι in Type 0.

#### Reid Barton (Oct 04 2020 at 15:26):

It is worse because fin n has a bunch of API that is not conveniently available on an arbitrary finite decidable linear order, as well as somewhat better definitional behavior.

#### Reid Barton (Oct 04 2020 at 15:28):

(Also, conceptually the point of this whole construction is to reduce some problem/construction from general fin.dim. vector spaces to specific model spaces, so why stop at R^ι when you could go all the way to R^n?)

#### Reid Barton (Oct 04 2020 at 15:34):

Once you're working in this setting, the general objects are like (V : Type*) [has_coordinates R V] and the specific objects are like fin n -> R, so there's no longer any special significance to ι → R with [fintype ι].

#### Reid Barton (Oct 04 2020 at 15:35):

It only comes up if you have a [fintype ι] that comes from outside, so to speak. And then perhaps it would be better to have [fin_enum ι] which is basically the non-linear analogue of this setup.

#### Yury G. Kudryashov (Oct 04 2020 at 15:38):

OK, I'll try fin n. Thank you for the explanation.

#### Reid Barton (Oct 04 2020 at 15:53):

I should probably also add that in lean-omin this reindexing stuff is only ever used to prove Props, but you would probably also want to for example say what the partial derivatives of prod.fst : E \x F -> E are, so that might affect the design.

#### Yury G. Kudryashov (Oct 04 2020 at 15:56):

I'll just declare an instance on E × F using docs#equiv.sum_fin_sum_equiv.

Last updated: May 12 2021 at 06:14 UTC