From 49f34716234bd02b09ca086419e6843532c240a1 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 5 Apr 2024 00:01:55 -0500
Subject: [PATCH 01/37] Add kde_rand()

---
 Lib/statistics.py | 92 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 91 insertions(+), 1 deletion(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 58fb31def8896e..089802136140db 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -138,12 +138,13 @@
 from itertools import count, groupby, repeat
 from bisect import bisect_left, bisect_right
 from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum, sumprod
-from math import isfinite, isinf, pi, cos, sin, cosh, atan
+from math import isfinite, isinf, pi, cos, sin, tan, cosh, asin, atan
 from functools import reduce
 from operator import itemgetter
 from collections import Counter, namedtuple, defaultdict
 
 _SQRT2 = sqrt(2.0)
+_random = random
 
 # === Exceptions ===
 
@@ -1697,3 +1698,92 @@ def __getstate__(self):
 
     def __setstate__(self, state):
         self._mu, self._sigma = state
+
+
+## kde_rand() ################################################################
+
+def _newton_raphson(f_inv_est, f, f_prime, tolerance=1e-12):
+    def f_inv(y):
+        "Return x such that f(x) ≈ y within the specified tolerance."
+        x = f_inv_est(y)
+        while abs(diff := y - f(x)) > tolerance:
+            x += diff / f_prime(x)
+        return x
+    return f_inv
+
+_parabolic_invcdf = _newton_raphson(
+    f_inv_est = lambda p: ((2.0 * p) ** 0.583367470424302 - 1.0
+                           if p <= 1/2 else
+                           1.0 - (2.0 - 2.0*p) ** 0.583367470424302),
+    f = lambda t: -1/4 * t**3 + 3/4 * t + 1/2,
+    f_prime = lambda t: 3/4 * (1.0 - t * t))
+
+_quartic_invcdf = _newton_raphson(
+    f_inv_est = lambda p: ((2.0 * p) ** 0.4258865685331 - 1.0
+                           if p <= 1/2 else
+                           1.0 - (2.0 - 2.0*p) ** 0.4258865685331),
+    f = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2,
+    f_prime = lambda t: 15/16 * (1.0 - t * t) ** 2)
+
+_triweight_invcdf = _newton_raphson(
+    f_inv_est = lambda p: ((2.0 * p) ** 0.3400218741872791 - 1.0
+                           if p <= 1/2 else
+                           1.0 - (2.0 - 2.0*p) ** 0.3400218741872791),
+    f = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2,
+    f_prime = lambda t: 35/32 * (1.0 - t * t) ** 3)
+
+_kernel_invcdfs = {
+    'normal': NormalDist().inv_cdf,
+    'logisitic': lambda p: log(p / (1 - p)),
+    'sigmoid': lambda p: log(tan(p * pi/2)),
+    'rectangular': lambda p: 2*p - 1,
+    'parabolic': _parabolic_invcdf,
+    'quartic': _quartic_invcdf,
+    'triweight': _triweight_invcdf,
+    'triangular': lambda p: sqrt(2*p) - 1 if p < 0.5 else 1 - sqrt(2 - 2*p),
+    'cosine': lambda p: 2*asin(2*p - 1)/pi,
+}
+_kernel_invcdfs['gauss'] = _kernel_invcdfs['normal']
+_kernel_invcdfs['uniform'] = _kernel_invcdfs['rectangular']
+_kernel_invcdfs['epanechnikov'] = _kernel_invcdfs['parabolic']
+_kernel_invcdfs['biweight'] = _kernel_invcdfs['quartic']
+
+def kde_random(data, h, kernel='normal', *, seed=None):
+    """Return a function that makes a random selection from the estimated
+    probability density function created by:  kde(data, h, kernel)
+
+    For reproducible results, set *seed* to an integer, float, str, or bytes.
+    Not thread-safe without a lock around calls.
+
+    A StatisticsError will be raised if the data sequence is empty.
+
+    """
+    n = len(data)
+    if not n:
+        raise StatisticsError('Empty data sequence')
+
+    if not isinstance(data[0], (int, float)):
+        raise TypeError('Data sequence must contain ints or floats')
+
+    if h <= 0.0:
+        raise StatisticsError(f'Bandwidth h must be positive, not {h=!r}')
+
+    if seed is None:
+        random = _random.random
+        choice = _random.choice
+    else:
+        prng = _random.Random(seed)
+        random = prng.random
+        choice = prng.choice
+
+    try:
+        kernel_invcdf = _kernel_invcdfs[kernel]
+    except KeyError:
+        raise StatisticsError(f'Unknown kernel name: {kernel!r}')
+
+    def rand():
+        return choice(data) + h * kernel_invcdf(random())
+
+    rand.__doc__ = f'Random KDE selection with {h=!r} and {kernel=!r}'
+
+    return rand

From 0fd40f3cecbf1a8bd20e0d624ba8d253d9a9550f Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 5 Apr 2024 00:09:56 -0500
Subject: [PATCH 02/37] Update __all__

---
 Lib/statistics.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 089802136140db..8ca63b8ccc9749 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -113,6 +113,7 @@
     'geometric_mean',
     'harmonic_mean',
     'kde',
+    'kde_random',
     'linear_regression',
     'mean',
     'median',

From 75680e5fca6aa01f12b963fcca333725ab0a58e8 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 5 Apr 2024 21:24:33 -0500
Subject: [PATCH 03/37] Auto detect size changes

---
 Lib/statistics.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 8ca63b8ccc9749..8b746e8fce5255 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -976,9 +976,11 @@ def kde(data, h, kernel='normal', *, cumulative=False):
     if support is None:
 
         def pdf(x):
+            n = len(data)
             return sum(K((x - x_i) / h) for x_i in data) / (n * h)
 
         def cdf(x):
+            n = len(data)
             return sum(I((x - x_i) / h) for x_i in data) / n
 
     else:
@@ -987,12 +989,20 @@ def cdf(x):
         bandwidth = h * support
 
         def pdf(x):
+            nonlocal n, sample
+            if len(data) != n:
+                sample = sorted(data)
+                n = len(data)
             i = bisect_left(sample, x - bandwidth)
             j = bisect_right(sample, x + bandwidth)
             supported = sample[i : j]
             return sum(K((x - x_i) / h) for x_i in supported) / (n * h)
 
         def cdf(x):
+            nonlocal n, sample
+            if len(data) != n:
+                sample = sorted(data)
+                n = len(data)
             i = bisect_left(sample, x - bandwidth)
             j = bisect_right(sample, x + bandwidth)
             supported = sample[i : j]

From 63ba396e5edbf1393edec8813740b6d3157d651e Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 5 Apr 2024 21:31:55 -0500
Subject: [PATCH 04/37] .

---
 Lib/statistics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 8b746e8fce5255..95e99432e268e2 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1711,7 +1711,7 @@ def __setstate__(self, state):
         self._mu, self._sigma = state
 
 
-## kde_rand() ################################################################
+## kde_random() ##############################################################
 
 def _newton_raphson(f_inv_est, f, f_prime, tolerance=1e-12):
     def f_inv(y):

From c69c5042cd4b3166490279d97767d0077eb97de5 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 5 Apr 2024 21:46:34 -0500
Subject: [PATCH 05/37] Factor-out s-scurve function

---
 Lib/statistics.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 95e99432e268e2..0eec821f69b0ee 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1722,24 +1722,25 @@ def f_inv(y):
         return x
     return f_inv
 
+def _simple_s_curve(power):
+    "S-curve that crosses (0, -1), (1/2, 0), (1, 1)."
+    # Approximates the invcdf for a kernels with support==1.0
+    return lambda p: ((2 * p) ** power - 1
+                      if p <= 1/2 else
+                      1 - (2 - 2*p) ** power)
+
 _parabolic_invcdf = _newton_raphson(
-    f_inv_est = lambda p: ((2.0 * p) ** 0.583367470424302 - 1.0
-                           if p <= 1/2 else
-                           1.0 - (2.0 - 2.0*p) ** 0.583367470424302),
+    f_inv_est = _simple_s_curve(0.583367470424302),
     f = lambda t: -1/4 * t**3 + 3/4 * t + 1/2,
     f_prime = lambda t: 3/4 * (1.0 - t * t))
 
 _quartic_invcdf = _newton_raphson(
-    f_inv_est = lambda p: ((2.0 * p) ** 0.4258865685331 - 1.0
-                           if p <= 1/2 else
-                           1.0 - (2.0 - 2.0*p) ** 0.4258865685331),
+    f_inv_est = _simple_s_curve(0.4258865685331),
     f = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2,
     f_prime = lambda t: 15/16 * (1.0 - t * t) ** 2)
 
 _triweight_invcdf = _newton_raphson(
-    f_inv_est = lambda p: ((2.0 * p) ** 0.3400218741872791 - 1.0
-                           if p <= 1/2 else
-                           1.0 - (2.0 - 2.0*p) ** 0.3400218741872791),
+    f_inv_est = _simple_s_curve(0.3400218741872791),
     f = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2,
     f_prime = lambda t: 35/32 * (1.0 - t * t) ** 3)
 
@@ -1751,7 +1752,7 @@ def f_inv(y):
     'parabolic': _parabolic_invcdf,
     'quartic': _quartic_invcdf,
     'triweight': _triweight_invcdf,
-    'triangular': lambda p: sqrt(2*p) - 1 if p < 0.5 else 1 - sqrt(2 - 2*p),
+    'triangular': lambda p: sqrt(2*p) - 1 if p < 1/2 else 1 - sqrt(2 - 2*p),
     'cosine': lambda p: 2*asin(2*p - 1)/pi,
 }
 _kernel_invcdfs['gauss'] = _kernel_invcdfs['normal']

From dcd83f2204d8cf791aec4e5d0c41860bbfe03077 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 5 Apr 2024 21:50:13 -0500
Subject: [PATCH 06/37] .

---
 Lib/statistics.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 0eec821f69b0ee..8c64a294ee82a0 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1713,10 +1713,10 @@ def __setstate__(self, state):
 
 ## kde_random() ##############################################################
 
-def _newton_raphson(f_inv_est, f, f_prime, tolerance=1e-12):
+def _newton_raphson(f_inv_estimate, f, f_prime, tolerance=1e-12):
     def f_inv(y):
         "Return x such that f(x) ≈ y within the specified tolerance."
-        x = f_inv_est(y)
+        x = f_inv_estimate(y)
         while abs(diff := y - f(x)) > tolerance:
             x += diff / f_prime(x)
         return x
@@ -1730,17 +1730,17 @@ def _simple_s_curve(power):
                       1 - (2 - 2*p) ** power)
 
 _parabolic_invcdf = _newton_raphson(
-    f_inv_est = _simple_s_curve(0.583367470424302),
+    f_inv_estimate = _simple_s_curve(0.583367470424302),
     f = lambda t: -1/4 * t**3 + 3/4 * t + 1/2,
     f_prime = lambda t: 3/4 * (1.0 - t * t))
 
 _quartic_invcdf = _newton_raphson(
-    f_inv_est = _simple_s_curve(0.4258865685331),
+    f_inv_estimate = _simple_s_curve(0.4258865685331),
     f = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2,
     f_prime = lambda t: 15/16 * (1.0 - t * t) ** 2)
 
 _triweight_invcdf = _newton_raphson(
-    f_inv_est = _simple_s_curve(0.3400218741872791),
+    f_inv_estimate = _simple_s_curve(0.3400218741872791),
     f = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2,
     f_prime = lambda t: 35/32 * (1.0 - t * t) ** 3)
 

From 57492bfd8201eaa871c759719af2048364e40aab Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 5 Apr 2024 22:20:19 -0500
Subject: [PATCH 07/37] .

---
 Lib/statistics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 8c64a294ee82a0..13c9c3a6f73e46 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1724,7 +1724,7 @@ def f_inv(y):
 
 def _simple_s_curve(power):
     "S-curve that crosses (0, -1), (1/2, 0), (1, 1)."
-    # Approximates the invcdf for a kernels with support==1.0
+    # Approximates the invcdf for kernels with support==1.0
     return lambda p: ((2 * p) ** power - 1
                       if p <= 1/2 else
                       1 - (2 - 2*p) ** power)
@@ -1753,7 +1753,7 @@ def _simple_s_curve(power):
     'quartic': _quartic_invcdf,
     'triweight': _triweight_invcdf,
     'triangular': lambda p: sqrt(2*p) - 1 if p < 1/2 else 1 - sqrt(2 - 2*p),
-    'cosine': lambda p: 2*asin(2*p - 1)/pi,
+    'cosine': lambda p: 2 * asin(2*p - 1) / pi,
 }
 _kernel_invcdfs['gauss'] = _kernel_invcdfs['normal']
 _kernel_invcdfs['uniform'] = _kernel_invcdfs['rectangular']

From 5b41598493a04d00b07eee2bfd952a820c29aade Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sat, 6 Apr 2024 21:03:30 -0500
Subject: [PATCH 08/37] .

---
 Lib/statistics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 13c9c3a6f73e46..9e1083e54d5b2d 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1722,9 +1722,9 @@ def f_inv(y):
         return x
     return f_inv
 
-def _simple_s_curve(power):
+def _simple_s_curve(power=0.5):
     "S-curve that crosses (0, -1), (1/2, 0), (1, 1)."
-    # Approximates the invcdf for kernels with support==1.0
+    # Approximates the invcdf for kernels with support: -1 <= x <= 1.
     return lambda p: ((2 * p) ** power - 1
                       if p <= 1/2 else
                       1 - (2 - 2*p) ** power)

From 89a6033824356c73252d994fbc571af708f2baaa Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sat, 6 Apr 2024 22:11:06 -0500
Subject: [PATCH 09/37] Add docs for kde_random().

---
 Doc/library/statistics.rst | 80 +++++++++++---------------------------
 1 file changed, 23 insertions(+), 57 deletions(-)

diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index 197c123f8356d8..c99f16e12ada7a 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -311,6 +311,29 @@ However, for reading convenience, most of the examples show sorted sequences.
    .. versionadded:: 3.13
 
 
+.. function:: kde_random(data, h, kernel='normal', *, seed=None)
+
+   Return a function that makes a random selection from the estimated
+   probability density function created by ``kde(data, h, kernel)``.
+
+   For reproducible results, set *seed* to an integer, float, str, or bytes.
+   Not thread-safe without a lock around calls.
+
+   A :exc:`StatisticsError` will be raised if the *data* sequence is empty.
+
+   Continuing the above example for :func:`kde`, we can use
+   :func:`kde_random` to generate new random selections from an
+   estimated probability density function:
+
+      >>> data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2]
+      >>> rand = kde_random(sample, h=1.5, seed=8675309)
+      >>> new_selections = [rand() for i in range(10)]
+      >>> [round(x, 1) for x in new_selections]
+      [0.7, 6.2, 1.2, 6.9, 7.0, 1.8, 2.5, -0.5, -1.8, 5.6]
+
+   .. versionadded:: 3.13
+
+
 .. function:: median(data)
 
    Return the median (middle value) of numeric data, using the common "mean of
@@ -1148,63 +1171,6 @@ The final prediction goes to the largest posterior. This is known as the
   'female'
 
 
-Sampling from kernel density estimation
-***************************************
-
-The :func:`kde()` function creates a continuous probability density
-function from discrete samples.  Some applications need a way to make
-random selections from that distribution.
-
-The technique is to pick a sample from a bandwidth scaled kernel
-function and recenter the result around a randomly chosen point from
-the input data.  This can be done with any kernel that has a known or
-accurately approximated inverse cumulative distribution function.
-
-.. testcode::
-
-   from random import choice, random, seed
-   from math import sqrt, log, pi, tan, asin
-   from statistics import NormalDist
-
-   kernel_invcdfs = {
-       'normal': NormalDist().inv_cdf,
-       'logistic': lambda p: log(p / (1 - p)),
-       'sigmoid': lambda p: log(tan(p * pi/2)),
-       'rectangular': lambda p: 2*p - 1,
-       'triangular': lambda p: sqrt(2*p) - 1 if p < 0.5 else 1 - sqrt(2 - 2*p),
-       'cosine': lambda p: 2*asin(2*p - 1)/pi,
-   }
-
-   def kde_random(data, h, kernel='normal'):
-       'Return a function that samples from kde() smoothed data.'
-       kernel_invcdf = kernel_invcdfs[kernel]
-       def rand():
-           return h * kernel_invcdf(random()) + choice(data)
-       return rand
-
-For example:
-
-.. doctest::
-
-   >>> discrete_samples = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2]
-   >>> rand = kde_random(discrete_samples, h=1.5)
-   >>> seed(8675309)
-   >>> selections = [rand() for i in range(10)]
-   >>> [round(x, 1) for x in selections]
-   [4.7, 7.4, 1.2, 7.8, 6.9, -1.3, 5.8, 0.2, -1.4, 5.7]
-
-.. testcode::
-    :hide:
-
-    from statistics import kde
-    from math import isclose
-
-    # Verify that cdf / invcdf will round trip
-    xarr = [i/100 for i in range(-100, 101)]
-    for kernel, invcdf in kernel_invcdfs.items():
-        cdf = kde([0.0], h=1.0, kernel=kernel, cumulative=True)
-        for x in xarr:
-            assert isclose(invcdf(cdf(x)), x, abs_tol=1E-9)
 
 ..
    # This modelines must appear within the last ten lines of the file.

From 4d4f7928b2e31a525c7db268eec16db006a17306 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sat, 6 Apr 2024 22:18:34 -0500
Subject: [PATCH 10/37] .

---
 Doc/library/statistics.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index c99f16e12ada7a..8510658c183039 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -314,7 +314,7 @@ However, for reading convenience, most of the examples show sorted sequences.
 .. function:: kde_random(data, h, kernel='normal', *, seed=None)
 
    Return a function that makes a random selection from the estimated
-   probability density function created by ``kde(data, h, kernel)``.
+   probability density function produced by ``kde(data, h, kernel)``.
 
    For reproducible results, set *seed* to an integer, float, str, or bytes.
    Not thread-safe without a lock around calls.

From 7848ccabc21ae14b6e80874a4dd3652230cd1ba3 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sat, 6 Apr 2024 22:38:43 -0500
Subject: [PATCH 11/37] .

---
 Lib/statistics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 9e1083e54d5b2d..236ca78715737f 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1722,7 +1722,7 @@ def f_inv(y):
         return x
     return f_inv
 
-def _simple_s_curve(power=0.5):
+def _simple_s_curve(power):
     "S-curve that crosses (0, -1), (1/2, 0), (1, 1)."
     # Approximates the invcdf for kernels with support: -1 <= x <= 1.
     return lambda p: ((2 * p) ** power - 1

From 37a89553ae953ee4598c3dbb4feffabd8654d045 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sun, 7 Apr 2024 13:19:07 -0500
Subject: [PATCH 12/37] Improve docstrings

---
 Doc/library/statistics.rst |  7 +++++--
 Lib/statistics.py          | 21 ++++++++++++++++-----
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index 8510658c183039..0dd81d5510660b 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -316,8 +316,11 @@ However, for reading convenience, most of the examples show sorted sequences.
    Return a function that makes a random selection from the estimated
    probability density function produced by ``kde(data, h, kernel)``.
 
-   For reproducible results, set *seed* to an integer, float, str, or bytes.
-   Not thread-safe without a lock around calls.
+   Providing a *seed* allows reproducible selections within a single
+   thread (or with a lock around calls).  In the future, the selection
+   method for the *parabolic*, *quartic*, and *triweight* kernels may be
+   replaced with faster algorithms that give different results.  The
+   seed may be an integer, float, str, or bytes.
 
    A :exc:`StatisticsError` will be raised if the *data* sequence is empty.
 
diff --git a/Lib/statistics.py b/Lib/statistics.py
index 236ca78715737f..a35c7826d171ce 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1724,7 +1724,7 @@ def f_inv(y):
 
 def _simple_s_curve(power):
     "S-curve that crosses (0, -1), (1/2, 0), (1, 1)."
-    # Approximates the invcdf for kernels with support: -1 <= x <= 1.
+    # Approximates the inverse CDF for kernels with support: -1 < x < 1.
     return lambda p: ((2 * p) ** power - 1
                       if p <= 1/2 else
                       1 - (2 - 2*p) ** power)
@@ -1762,12 +1762,23 @@ def _simple_s_curve(power):
 
 def kde_random(data, h, kernel='normal', *, seed=None):
     """Return a function that makes a random selection from the estimated
-    probability density function created by:  kde(data, h, kernel)
+    probability density function created by kde(data, h, kernel).
 
-    For reproducible results, set *seed* to an integer, float, str, or bytes.
-    Not thread-safe without a lock around calls.
+    Providing a *seed* allows reproducible selections within a single
+    thread (or with a lock around calls).  In the future, the selection
+    method for the parabolic, quartic, and triweight kernels may be
+    replaced with faster algorithms that give different results.
+    The seed may be an integer, float, str, or bytes.
 
-    A StatisticsError will be raised if the data sequence is empty.
+    A StatisticsError will be raised if the *data* sequence is empty.
+
+    Example:
+
+    >>> data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2]
+    >>> rand = kde_random(sample, h=1.5, seed=8675309)
+    >>> new_selections = [rand() for i in range(10)]
+    >>> [round(x, 1) for x in new_selections]
+    [0.7, 6.2, 1.2, 6.9, 7.0, 1.8, 2.5, -0.5, -1.8, 5.6]
 
     """
     n = len(data)

From a30fb9d9dd2b07fd9df64c348bf6021ed307da9d Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sun, 7 Apr 2024 13:23:58 -0500
Subject: [PATCH 13/37] The "with locks" comment was inaccurate.

---
 Doc/library/statistics.rst | 8 ++++----
 Lib/statistics.py          | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index 0dd81d5510660b..22c7b988cd8d39 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -317,10 +317,10 @@ However, for reading convenience, most of the examples show sorted sequences.
    probability density function produced by ``kde(data, h, kernel)``.
 
    Providing a *seed* allows reproducible selections within a single
-   thread (or with a lock around calls).  In the future, the selection
-   method for the *parabolic*, *quartic*, and *triweight* kernels may be
-   replaced with faster algorithms that give different results.  The
-   seed may be an integer, float, str, or bytes.
+   thread.  In the future, the selection method for the *parabolic*,
+   *quartic*, and *triweight* kernels may be replaced with faster
+   algorithms that give different results.  The seed may be an integer,
+   float, str, or bytes.
 
    A :exc:`StatisticsError` will be raised if the *data* sequence is empty.
 
diff --git a/Lib/statistics.py b/Lib/statistics.py
index a35c7826d171ce..ccbb825ddf9bf4 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1765,10 +1765,10 @@ def kde_random(data, h, kernel='normal', *, seed=None):
     probability density function created by kde(data, h, kernel).
 
     Providing a *seed* allows reproducible selections within a single
-    thread (or with a lock around calls).  In the future, the selection
-    method for the parabolic, quartic, and triweight kernels may be
-    replaced with faster algorithms that give different results.
-    The seed may be an integer, float, str, or bytes.
+    thread.  In the future, the selection method for the parabolic,
+    quartic, and triweight kernels may be replaced with faster
+    algorithms that give different results.  The seed may be an
+    integer, float, str, or bytes.
 
     A StatisticsError will be raised if the *data* sequence is empty.
 

From a2a077f107721cb310ed4375023ecaed64e2cea7 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sun, 7 Apr 2024 18:51:46 -0500
Subject: [PATCH 14/37] Add summary table entry. Fix doctests.

---
 Doc/library/statistics.rst | 3 ++-
 Lib/statistics.py          | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index 22c7b988cd8d39..5e442caa154499 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -77,6 +77,7 @@ or sample.
 :func:`geometric_mean`   Geometric mean of data.
 :func:`harmonic_mean`    Harmonic mean of data.
 :func:`kde`              Estimate the probability density distribution of the data.
+:func:`kde_random`       Random sampling from the PDF generated by kde().
 :func:`median`           Median (middle value) of data.
 :func:`median_low`       Low median of data.
 :func:`median_high`      High median of data.
@@ -329,7 +330,7 @@ However, for reading convenience, most of the examples show sorted sequences.
    estimated probability density function:
 
       >>> data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2]
-      >>> rand = kde_random(sample, h=1.5, seed=8675309)
+      >>> rand = kde_random(data, h=1.5, seed=8675309)
       >>> new_selections = [rand() for i in range(10)]
       >>> [round(x, 1) for x in new_selections]
       [0.7, 6.2, 1.2, 6.9, 7.0, 1.8, 2.5, -0.5, -1.8, 5.6]
diff --git a/Lib/statistics.py b/Lib/statistics.py
index ccbb825ddf9bf4..ee6ab4e958f31f 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1775,7 +1775,7 @@ def kde_random(data, h, kernel='normal', *, seed=None):
     Example:
 
     >>> data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2]
-    >>> rand = kde_random(sample, h=1.5, seed=8675309)
+    >>> rand = kde_random(data, h=1.5, seed=8675309)
     >>> new_selections = [rand() for i in range(10)]
     >>> [round(x, 1) for x in new_selections]
     [0.7, 6.2, 1.2, 6.9, 7.0, 1.8, 2.5, -0.5, -1.8, 5.6]

From a82b100daf0f68c2ab0f7640786ade3a05d3ba54 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sun, 7 Apr 2024 19:58:19 -0500
Subject: [PATCH 15/37] Fix typo

---
 Lib/statistics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index ee6ab4e958f31f..75ceb18eb7cb51 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1746,7 +1746,7 @@ def _simple_s_curve(power):
 
 _kernel_invcdfs = {
     'normal': NormalDist().inv_cdf,
-    'logisitic': lambda p: log(p / (1 - p)),
+    'logistic': lambda p: log(p / (1 - p)),
     'sigmoid': lambda p: log(tan(p * pi/2)),
     'rectangular': lambda p: 2*p - 1,
     'parabolic': _parabolic_invcdf,

From 1397e91d094cc9735d5f7282edc17c6f09962f3d Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Thu, 11 Apr 2024 03:17:40 -0500
Subject: [PATCH 16/37] .

---
 Lib/statistics.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 75ceb18eb7cb51..4e97bddeaa8710 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1730,17 +1730,17 @@ def _simple_s_curve(power):
                       1 - (2 - 2*p) ** power)
 
 _parabolic_invcdf = _newton_raphson(
-    f_inv_estimate = _simple_s_curve(0.583367470424302),
+    f_inv_estimate = _simple_s_curve(0.583367470424302),  # (7/12)
     f = lambda t: -1/4 * t**3 + 3/4 * t + 1/2,
     f_prime = lambda t: 3/4 * (1.0 - t * t))
 
 _quartic_invcdf = _newton_raphson(
-    f_inv_estimate = _simple_s_curve(0.4258865685331),
+    f_inv_estimate = _simple_s_curve(0.4258865685331),   # (204/479)
     f = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2,
     f_prime = lambda t: 15/16 * (1.0 - t * t) ** 2)
 
 _triweight_invcdf = _newton_raphson(
-    f_inv_estimate = _simple_s_curve(0.3400218741872791),
+    f_inv_estimate = _simple_s_curve(0.3400218741872791), (17/50)
     f = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2,
     f_prime = lambda t: 35/32 * (1.0 - t * t) ** 3)
 

From 630d942a6a54aeae76fbb2f9f3c20dbb88889962 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sat, 13 Apr 2024 17:52:10 -0500
Subject: [PATCH 17/37] Change variable name to match the supporting reference.

---
 Lib/statistics.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 4e97bddeaa8710..2fe7d6ac0133b7 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -921,13 +921,13 @@ def kde(data, h, kernel='normal', *, cumulative=False):
             sqrt2pi = sqrt(2 * pi)
             sqrt2 = sqrt(2)
             K = lambda t: exp(-1/2 * t * t) / sqrt2pi
-            I = lambda t: 1/2 * (1.0 + erf(t / sqrt2))
+            W = lambda t: 1/2 * (1.0 + erf(t / sqrt2))
             support = None
 
         case 'logistic':
             # 1.0 / (exp(t) + 2.0 + exp(-t))
             K = lambda t: 1/2 / (1.0 + cosh(t))
-            I = lambda t: 1.0 - 1.0 / (exp(t) + 1.0)
+            W = lambda t: 1.0 - 1.0 / (exp(t) + 1.0)
             support = None
 
         case 'sigmoid':
@@ -935,39 +935,39 @@ def kde(data, h, kernel='normal', *, cumulative=False):
             c1 = 1 / pi
             c2 = 2 / pi
             K = lambda t: c1 / cosh(t)
-            I = lambda t: c2 * atan(exp(t))
+            W = lambda t: c2 * atan(exp(t))
             support = None
 
         case 'rectangular' | 'uniform':
             K = lambda t: 1/2
-            I = lambda t: 1/2 * t + 1/2
+            W = lambda t: 1/2 * t + 1/2
             support = 1.0
 
         case 'triangular':
             K = lambda t: 1.0 - abs(t)
-            I = lambda t: t*t * (1/2 if t < 0.0 else -1/2) + t + 1/2
+            W = lambda t: t*t * (1/2 if t < 0.0 else -1/2) + t + 1/2
             support = 1.0
 
         case 'parabolic' | 'epanechnikov':
             K = lambda t: 3/4 * (1.0 - t * t)
-            I = lambda t: -1/4 * t**3 + 3/4 * t + 1/2
+            W = lambda t: -1/4 * t**3 + 3/4 * t + 1/2
             support = 1.0
 
         case 'quartic' | 'biweight':
             K = lambda t: 15/16 * (1.0 - t * t) ** 2
-            I = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2
+            W = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2
             support = 1.0
 
         case 'triweight':
             K = lambda t: 35/32 * (1.0 - t * t) ** 3
-            I = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2
+            W = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2
             support = 1.0
 
         case 'cosine':
             c1 = pi / 4
             c2 = pi / 2
             K = lambda t: c1 * cos(c2 * t)
-            I = lambda t: 1/2 * sin(c2 * t) + 1/2
+            W = lambda t: 1/2 * sin(c2 * t) + 1/2
             support = 1.0
 
         case _:
@@ -981,7 +981,7 @@ def pdf(x):
 
         def cdf(x):
             n = len(data)
-            return sum(I((x - x_i) / h) for x_i in data) / n
+            return sum(W((x - x_i) / h) for x_i in data) / n
 
     else:
 
@@ -1006,7 +1006,7 @@ def cdf(x):
             i = bisect_left(sample, x - bandwidth)
             j = bisect_right(sample, x + bandwidth)
             supported = sample[i : j]
-            return sum((I((x - x_i) / h) for x_i in supported), i) / n
+            return sum((W((x - x_i) / h) for x_i in supported), i) / n
 
     if cumulative:
         cdf.__doc__ = f'CDF estimate with {h=!r} and {kernel=!r}'

From e9e5d543fb4ed633c44d0aa2c900b0b9f793424e Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sun, 14 Apr 2024 22:06:25 -0500
Subject: [PATCH 18/37] Closed-form for _parabolic_invcdf().

---
 Lib/statistics.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 2fe7d6ac0133b7..74afc080562c37 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1729,18 +1729,13 @@ def _simple_s_curve(power):
                       if p <= 1/2 else
                       1 - (2 - 2*p) ** power)
 
-_parabolic_invcdf = _newton_raphson(
-    f_inv_estimate = _simple_s_curve(0.583367470424302),  # (7/12)
-    f = lambda t: -1/4 * t**3 + 3/4 * t + 1/2,
-    f_prime = lambda t: 3/4 * (1.0 - t * t))
-
 _quartic_invcdf = _newton_raphson(
     f_inv_estimate = _simple_s_curve(0.4258865685331),   # (204/479)
     f = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2,
     f_prime = lambda t: 15/16 * (1.0 - t * t) ** 2)
 
 _triweight_invcdf = _newton_raphson(
-    f_inv_estimate = _simple_s_curve(0.3400218741872791), (17/50)
+    f_inv_estimate = _simple_s_curve(0.3400218741872791),  # (17/50)
     f = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2,
     f_prime = lambda t: 35/32 * (1.0 - t * t) ** 3)
 
@@ -1749,7 +1744,7 @@ def _simple_s_curve(power):
     'logistic': lambda p: log(p / (1 - p)),
     'sigmoid': lambda p: log(tan(p * pi/2)),
     'rectangular': lambda p: 2*p - 1,
-    'parabolic': _parabolic_invcdf,
+    'parabolic': lambda p: 2 * cos((acos(2*p-1) + pi) / 3),
     'quartic': _quartic_invcdf,
     'triweight': _triweight_invcdf,
     'triangular': lambda p: sqrt(2*p) - 1 if p < 1/2 else 1 - sqrt(2 - 2*p),

From 8a0fd6994acc7d03d94727d9bb6d3e9f037b77c8 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sun, 14 Apr 2024 23:03:47 -0500
Subject: [PATCH 19/37] Flip sign in Newton-Raphson to match the common
 presentation.

---
 Lib/statistics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 74afc080562c37..cc80505ace965e 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1717,8 +1717,8 @@ def _newton_raphson(f_inv_estimate, f, f_prime, tolerance=1e-12):
     def f_inv(y):
         "Return x such that f(x) ≈ y within the specified tolerance."
         x = f_inv_estimate(y)
-        while abs(diff := y - f(x)) > tolerance:
-            x += diff / f_prime(x)
+        while abs(diff := f(x) - y) > tolerance:
+            x -= diff / f_prime(x)
         return x
     return f_inv
 

From 3459bf20ac203c591445b727d956ade0e84fe189 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Tue, 16 Apr 2024 23:09:20 -0500
Subject: [PATCH 20/37] Add kernel_invcdf tests

---
 Lib/statistics.py           |  2 +-
 Lib/test/test_statistics.py | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index cc80505ace965e..04833aeff5cb8e 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -139,7 +139,7 @@
 from itertools import count, groupby, repeat
 from bisect import bisect_left, bisect_right
 from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum, sumprod
-from math import isfinite, isinf, pi, cos, sin, tan, cosh, asin, atan
+from math import isfinite, isinf, pi, cos, sin, tan, cosh, asin, atan, acos
 from functools import reduce
 from operator import itemgetter
 from collections import Counter, namedtuple, defaultdict
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index 204787a88a9c5f..ff8d743fa58726 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -2426,6 +2426,17 @@ def integrate(func, low, high, steps=10_000):
         self.assertEqual(f_hat(-1.0), 1/2)
         self.assertEqual(f_hat(1.0), 1/2)
 
+    def test_kde_kernel_invcdfs(self):
+        kernel_invcdfs = statistics._kernel_invcdfs
+        kde = statistics.kde
+
+        # Verify that cdf / invcdf will round trip
+        xarr = [i/100 for i in range(-100, 101)]
+        for kernel, invcdf in kernel_invcdfs.items():
+            with self.subTest(kernel=kernel):
+                cdf = kde([0.0], h=1.0, kernel=kernel, cumulative=True)
+                for x in xarr:
+                    self.assertAlmostEqual(invcdf(cdf(x)), x, places=5)
 
 class TestQuantiles(unittest.TestCase):
 

From 1ca870ace5af8e5babe2e9436671de902c66d2be Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 19 Apr 2024 01:21:38 -0500
Subject: [PATCH 21/37] Better _quartic_invcdf_estimate

---
 Lib/statistics.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 04833aeff5cb8e..912148c1a209ae 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1729,8 +1729,15 @@ def _simple_s_curve(power):
                       if p <= 1/2 else
                       1 - (2 - 2*p) ** power)
 
+def _quartic_invcdf_estimate(p):
+    sign, p = (1, p) if p <= 1/2 else (-1, 1 - p)
+    x = (2 * p) ** 0.4258865685331 - 1  # (204/479)
+    if p >= 0.004:
+        x += 0.026818732 * sin(7.101753784 * p + 2.732305658)
+    return x * sign
+
 _quartic_invcdf = _newton_raphson(
-    f_inv_estimate = _simple_s_curve(0.4258865685331),   # (204/479)
+    f_inv_estimate = _quartic_invcdf_estimate,
     f = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2,
     f_prime = lambda t: 15/16 * (1.0 - t * t) ** 2)
 

From 96a5f14d2f04c4441fecb9e084228fd9f8caede1 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 19 Apr 2024 01:36:41 -0500
Subject: [PATCH 22/37] Make standalone _triweight_invcdf_estimate()

---
 Lib/statistics.py | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 912148c1a209ae..2a93297db98e0a 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1722,13 +1722,6 @@ def f_inv(y):
         return x
     return f_inv
 
-def _simple_s_curve(power):
-    "S-curve that crosses (0, -1), (1/2, 0), (1, 1)."
-    # Approximates the inverse CDF for kernels with support: -1 < x < 1.
-    return lambda p: ((2 * p) ** power - 1
-                      if p <= 1/2 else
-                      1 - (2 - 2*p) ** power)
-
 def _quartic_invcdf_estimate(p):
     sign, p = (1, p) if p <= 1/2 else (-1, 1 - p)
     x = (2 * p) ** 0.4258865685331 - 1  # (204/479)
@@ -1741,8 +1734,13 @@ def _quartic_invcdf_estimate(p):
     f = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2,
     f_prime = lambda t: 15/16 * (1.0 - t * t) ** 2)
 
+def _triweight_invcdf_estimate(p):
+    sign, p = (1, p) if p <= 1/2 else (-1, 1 - p)
+    x = (2 * p) ** 0.3400218741872791 - 1  # (17/50)
+    return x * sign
+
 _triweight_invcdf = _newton_raphson(
-    f_inv_estimate = _simple_s_curve(0.3400218741872791),  # (17/50)
+    f_inv_estimate = _triweight_invcdf_estimate,
     f = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2,
     f_prime = lambda t: 35/32 * (1.0 - t * t) ** 3)
 
@@ -1767,10 +1765,7 @@ def kde_random(data, h, kernel='normal', *, seed=None):
     probability density function created by kde(data, h, kernel).
 
     Providing a *seed* allows reproducible selections within a single
-    thread.  In the future, the selection method for the parabolic,
-    quartic, and triweight kernels may be replaced with faster
-    algorithms that give different results.  The seed may be an
-    integer, float, str, or bytes.
+    thread.  The seed may be an integer, float, str, or bytes.
 
     A StatisticsError will be raised if the *data* sequence is empty.
 

From f05eddbda6c1174b9407a6ff790447123a2f023e Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 19 Apr 2024 01:43:10 -0500
Subject: [PATCH 23/37] Floats everywhere

---
 Lib/statistics.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 2a93297db98e0a..0ec57108ead6e8 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1723,8 +1723,8 @@ def f_inv(y):
     return f_inv
 
 def _quartic_invcdf_estimate(p):
-    sign, p = (1, p) if p <= 1/2 else (-1, 1 - p)
-    x = (2 * p) ** 0.4258865685331 - 1  # (204/479)
+    sign, p = (1.0, p) if p <= 1/2 else (-1.0, 1.0 - p)
+    x = (2.0 * p) ** 0.4258865685331 - 1.0
     if p >= 0.004:
         x += 0.026818732 * sin(7.101753784 * p + 2.732305658)
     return x * sign
@@ -1735,8 +1735,8 @@ def _quartic_invcdf_estimate(p):
     f_prime = lambda t: 15/16 * (1.0 - t * t) ** 2)
 
 def _triweight_invcdf_estimate(p):
-    sign, p = (1, p) if p <= 1/2 else (-1, 1 - p)
-    x = (2 * p) ** 0.3400218741872791 - 1  # (17/50)
+    sign, p = (1.0, p) if p <= 1/2 else (-1.0, 1.0 - p)
+    x = (2.0 * p) ** 0.3400218741872791 - 1.0
     return x * sign
 
 _triweight_invcdf = _newton_raphson(

From 065be116969cdb8bda6f967da14b656a8be5090f Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Tue, 23 Apr 2024 18:25:50 -0500
Subject: [PATCH 24/37] .

---
 Lib/statistics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 0ec57108ead6e8..011d95065d03ac 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1725,8 +1725,8 @@ def f_inv(y):
 def _quartic_invcdf_estimate(p):
     sign, p = (1.0, p) if p <= 1/2 else (-1.0, 1.0 - p)
     x = (2.0 * p) ** 0.4258865685331 - 1.0
-    if p >= 0.004:
-        x += 0.026818732 * sin(7.101753784 * p + 2.732305658)
+    if p >= 0.004 < 0.499:
+        x += 0.026818732 * sin(7.101753784 * p + 2.73230839482953)
     return x * sign
 
 _quartic_invcdf = _newton_raphson(

From d71ca9d396ca41f62cf2253c518cf0a868be7b48 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Mon, 29 Apr 2024 21:47:34 -0700
Subject: [PATCH 25/37] Never used the global, shared Random instance.

---
 Lib/statistics.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 011d95065d03ac..4fd17cccf1412d 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1788,13 +1788,9 @@ def kde_random(data, h, kernel='normal', *, seed=None):
     if h <= 0.0:
         raise StatisticsError(f'Bandwidth h must be positive, not {h=!r}')
 
-    if seed is None:
-        random = _random.random
-        choice = _random.choice
-    else:
-        prng = _random.Random(seed)
-        random = prng.random
-        choice = prng.choice
+    prng = _random.Random(seed)
+    random = prng.random
+    choice = prng.choice
 
     try:
         kernel_invcdf = _kernel_invcdfs[kernel]

From ee43ed76328f1a642b102a65b5c64112224dcd9d Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Mon, 29 Apr 2024 21:48:36 -0700
Subject: [PATCH 26/37] .

---
 Lib/statistics.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 4fd17cccf1412d..01c5c49e65b74a 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1788,15 +1788,15 @@ def kde_random(data, h, kernel='normal', *, seed=None):
     if h <= 0.0:
         raise StatisticsError(f'Bandwidth h must be positive, not {h=!r}')
 
-    prng = _random.Random(seed)
-    random = prng.random
-    choice = prng.choice
-
     try:
         kernel_invcdf = _kernel_invcdfs[kernel]
     except KeyError:
         raise StatisticsError(f'Unknown kernel name: {kernel!r}')
 
+    prng = _random.Random(seed)
+    random = prng.random
+    choice = prng.choice
+
     def rand():
         return choice(data) + h * kernel_invcdf(random())
 

From f85c3037639e303de732f366bdb04510f72f7552 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Tue, 30 Apr 2024 00:27:14 -0700
Subject: [PATCH 27/37] Update whatsnew

---
 Doc/whatsnew/3.13.rst | 3 ++-
 Lib/statistics.py     | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index 98349a5984bb7e..b04cfd59f96236 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -722,7 +722,8 @@ statistics
 
 * Add :func:`statistics.kde` for kernel density estimation.
   This makes it possible to estimate a continuous probability density function
-  from a fixed number of discrete samples.
+  from a fixed number of discrete samples.  Also added :func:`kde_random`
+  for sampling the estimated probability density function.
   (Contributed by Raymond Hettinger in :gh:`115863`.)
 
 .. _whatsnew313-subprocess:
diff --git a/Lib/statistics.py b/Lib/statistics.py
index 01c5c49e65b74a..f3ce2d8b6b442a 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -1078,6 +1078,7 @@ def quantiles(data, *, n=4, method='exclusive'):
         if ld == 1:
             return data * (n - 1)
         raise StatisticsError('must have at least one data point')
+
     if method == 'inclusive':
         m = ld - 1
         result = []
@@ -1086,6 +1087,7 @@ def quantiles(data, *, n=4, method='exclusive'):
             interpolated = (data[j] * (n - delta) + data[j + 1] * delta) / n
             result.append(interpolated)
         return result
+
     if method == 'exclusive':
         m = ld + 1
         result = []
@@ -1096,6 +1098,7 @@ def quantiles(data, *, n=4, method='exclusive'):
             interpolated = (data[j - 1] * (n - delta) + data[j] * delta) / n
             result.append(interpolated)
         return result
+
     raise ValueError(f'Unknown method: {method!r}')
 
 

From 544ca8fee9feb3afd7646fdcc80e8c37cebfb1cf Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Tue, 30 Apr 2024 00:36:27 -0700
Subject: [PATCH 28/37] Expand "it" variable name to "iterator"

---
 Doc/library/itertools.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst
index 9a5cb8be37d349..df64b7cda8b3b5 100644
--- a/Doc/library/itertools.rst
+++ b/Doc/library/itertools.rst
@@ -903,9 +903,9 @@ and :term:`generators <generator>` which incur interpreter overhead.
    def sliding_window(iterable, n):
        "Collect data into overlapping fixed-length chunks or blocks."
        # sliding_window('ABCDEFG', 4) → ABCD BCDE CDEF DEFG
-       it = iter(iterable)
-       window = collections.deque(islice(it, n-1), maxlen=n)
-       for x in it:
+       iterator = iter(iterable)
+       window = collections.deque(islice(iterator, n-1), maxlen=n)
+       for x in iterator:
            window.append(x)
            yield tuple(window)
 
@@ -955,8 +955,8 @@ and :term:`generators <generator>` which incur interpreter overhead.
        seq_index = getattr(iterable, 'index', None)
        if seq_index is None:
            # Path for general iterables
-           it = islice(iterable, start, stop)
-           for i, element in enumerate(it, start):
+           iterator = islice(iterable, start, stop)
+           for i, element in enumerate(iterator, start):
                if element is value or element == value:
                    yield i
        else:

From 1249d14388c87f47e4ce0c1985c2b47cf690291b Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 3 May 2024 19:17:29 -0500
Subject: [PATCH 29/37] Test the kde_random() outer function

---
 Lib/test/test_statistics.py | 43 +++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index ff8d743fa58726..1ad917af1be376 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -2438,6 +2438,49 @@ def test_kde_kernel_invcdfs(self):
                 for x in xarr:
                     self.assertAlmostEqual(invcdf(cdf(x)), x, places=5)
 
+    def test_kde_random(self):
+        kde_random = statistics.kde_random
+        StatisticsError = statistics.StatisticsError
+        kernels = ['normal', 'gauss', 'logistic', 'sigmoid', 'rectangular',
+                   'uniform', 'triangular', 'parabolic', 'epanechnikov',
+                   'quartic', 'biweight', 'triweight', 'cosine']
+        sample = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2]
+
+        # Smoke test
+
+        for kernel in kernels:
+            with self.subTest(kernel=kernel):
+                rand = kde_random(sample, h=1.5, kernel=kernel)
+                selections = [rand() for i in range(10)]
+
+
+        # Check error cases
+
+        with self.assertRaises(StatisticsError):
+            kde_random([], h=1.0)                       # Empty dataset
+        with self.assertRaises(TypeError):
+            kde_random(['abc', 'def'], 1.5)             # Non-numeric data
+        with self.assertRaises(TypeError):
+            kde_random(iter(sample), 1.5)               # Data is not a sequence
+        with self.assertRaises(StatisticsError):
+            kde_random(sample, h=0.0)                   # Zero bandwidth
+        with self.assertRaises(StatisticsError):
+            kde_random(sample, h=0.0)                   # Negative bandwidth
+        with self.assertRaises(TypeError):
+            kde_random(sample, h='str')                 # Wrong bandwidth type
+        with self.assertRaises(StatisticsError):
+            kde_random(sample, h=1.0, kernel='bogus')   # Invalid kernel
+
+        # Test name and docstring of the generated function
+
+        h = 1.5
+        kernel = 'cosine'
+        prng = kde_random(sample, h, kernel)
+        self.assertEqual(prng.__name__, 'rand')
+        self.assertIn(kernel, prng.__doc__)
+        self.assertIn(repr(h), prng.__doc__)
+
+
 class TestQuantiles(unittest.TestCase):
 
     def test_specific_cases(self):

From a09d30b7322935532ba8c957db03a33115d399a2 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 3 May 2024 19:27:47 -0500
Subject: [PATCH 30/37] Add fully qualified reference

---
 Doc/whatsnew/3.13.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index 05579cf3f0a590..c2f106231438e4 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -745,7 +745,7 @@ statistics
 
 * Add :func:`statistics.kde` for kernel density estimation.
   This makes it possible to estimate a continuous probability density function
-  from a fixed number of discrete samples.  Also added :func:`kde_random`
+  from a fixed number of discrete samples.  Also added :func:`statistics.kde_random`
   for sampling the estimated probability density function.
   (Contributed by Raymond Hettinger in :gh:`115863`.)
 

From 405d443dc4ce27f4787a148588ba01069345470f Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 3 May 2024 22:18:37 -0500
Subject: [PATCH 31/37] Add an approximate distribution test

---
 Lib/test/test_statistics.py | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index 1ad917af1be376..39d4376c96e5a1 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -2453,7 +2453,6 @@ def test_kde_random(self):
                 rand = kde_random(sample, h=1.5, kernel=kernel)
                 selections = [rand() for i in range(10)]
 
-
         # Check error cases
 
         with self.assertRaises(StatisticsError):
@@ -2480,6 +2479,31 @@ def test_kde_random(self):
         self.assertIn(kernel, prng.__doc__)
         self.assertIn(repr(h), prng.__doc__)
 
+        # Approximate distribution test
+
+        data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2, 7.8, 14.3, 15.1, 15.3, 15.8, 17.0]
+        n = 1_000_000
+        h = 1.75
+
+        for kernel in kernels:
+            with self.subTest(kernel=kernel):
+
+                f_hat = statistics.kde(data, h, kernel)
+                rand = kde_random(data, h, kernel, seed=8675309**2)
+                big_sample = sorted([rand() for i in range(n)])
+
+                def pdf_observed(x, dx=0.1):
+                    # P(x-dx <= X < x+dx) / (2*dx)
+                    i = bisect.bisect_left(big_sample, x - dx)
+                    j = bisect.bisect_right(big_sample, x + dx)
+                    p_nearby = (j - i) / len(big_sample)
+                    return p_nearby / (2 * dx)
+
+                for x in range(-4, 19):
+                    pd1 = f_hat(x)
+                    pd2 = pdf_observed(x)
+                    self.assertTrue(math.isclose(pd1, pd2, abs_tol=0.01))
+
 
 class TestQuantiles(unittest.TestCase):
 

From a4692bf45f1d9aa75598b2bd650127c94c674429 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 3 May 2024 22:25:55 -0500
Subject: [PATCH 32/37] Use F_hat() for a better estimate

---
 Lib/test/test_statistics.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index 39d4376c96e5a1..351a35ee08b4b7 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -2484,25 +2484,26 @@ def test_kde_random(self):
         data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2, 7.8, 14.3, 15.1, 15.3, 15.8, 17.0]
         n = 1_000_000
         h = 1.75
+        dx = 0.1
+
+        def p_expected(x):
+            return F_hat(x + dx) - F_hat(x - dx)
+
+        def p_observed(x):
+            # P(x-dx <= X < x+dx) / (2*dx)
+            i = bisect.bisect_left(big_sample, x - dx)
+            j = bisect.bisect_right(big_sample, x + dx)
+            return (j - i) / len(big_sample)
 
         for kernel in kernels:
             with self.subTest(kernel=kernel):
 
-                f_hat = statistics.kde(data, h, kernel)
+                F_hat = statistics.kde(data, h, kernel, cumulative=True)
                 rand = kde_random(data, h, kernel, seed=8675309**2)
                 big_sample = sorted([rand() for i in range(n)])
 
-                def pdf_observed(x, dx=0.1):
-                    # P(x-dx <= X < x+dx) / (2*dx)
-                    i = bisect.bisect_left(big_sample, x - dx)
-                    j = bisect.bisect_right(big_sample, x + dx)
-                    p_nearby = (j - i) / len(big_sample)
-                    return p_nearby / (2 * dx)
-
                 for x in range(-4, 19):
-                    pd1 = f_hat(x)
-                    pd2 = pdf_observed(x)
-                    self.assertTrue(math.isclose(pd1, pd2, abs_tol=0.01))
+                    self.assertTrue(math.isclose(p_observed(x), p_expected(x), abs_tol=0.001))
 
 
 class TestQuantiles(unittest.TestCase):

From ec2d9f29ab1b3054333e0394da7aad8187d965c2 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 3 May 2024 22:30:01 -0500
Subject: [PATCH 33/37] Test the curve in more places

---
 Lib/test/test_statistics.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index 351a35ee08b4b7..fbf15bca1f5e46 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -2479,7 +2479,7 @@ def test_kde_random(self):
         self.assertIn(kernel, prng.__doc__)
         self.assertIn(repr(h), prng.__doc__)
 
-        # Approximate distribution test
+        # Approximate distribution test: Compare a random sample to expected distribution
 
         data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2, 7.8, 14.3, 15.1, 15.3, 15.8, 17.0]
         n = 1_000_000
@@ -2502,7 +2502,8 @@ def p_observed(x):
                 rand = kde_random(data, h, kernel, seed=8675309**2)
                 big_sample = sorted([rand() for i in range(n)])
 
-                for x in range(-4, 19):
+                for x in range(-40, 190):
+                    x /= 10
                     self.assertTrue(math.isclose(p_observed(x), p_expected(x), abs_tol=0.001))
 
 

From c612c1f0f4de55b9921e983d85ec83f16219b965 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 3 May 2024 22:39:35 -0500
Subject: [PATCH 34/37] Refine the reproducibility note.

---
 Doc/library/statistics.rst | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index e9f6eec0eba0ba..0a4219921e2921 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -317,15 +317,13 @@ However, for reading convenience, most of the examples show sorted sequences.
    Return a function that makes a random selection from the estimated
    probability density function produced by ``kde(data, h, kernel)``.
 
-   Providing a *seed* allows reproducible selections within a single
-   thread.  In the future, the selection method for the *parabolic*,
-   *quartic*, and *triweight* kernels may be replaced with faster
-   algorithms that give different results.  The seed may be an integer,
-   float, str, or bytes.
+   Providing a *seed* allows reproducible selections. In the future, the
+   values may change slightly as more accurate kernel inverse estimates
+   are implemented.  The seed may be an integer, float, str, or bytes.
 
    A :exc:`StatisticsError` will be raised if the *data* sequence is empty.
 
-   Continuing the above example for :func:`kde`, we can use
+   Continuing the example for :func:`kde`, we can use
    :func:`kde_random` to generate new random selections from an
    estimated probability density function:
 

From 1324952149bdf11aca357ebd3f2aa48e03c05d2d Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 3 May 2024 22:41:13 -0500
Subject: [PATCH 35/37] Missing CDF qualifier

---
 Doc/library/statistics.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index 0a4219921e2921..d5a316e45ee3e2 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -318,7 +318,7 @@ However, for reading convenience, most of the examples show sorted sequences.
    probability density function produced by ``kde(data, h, kernel)``.
 
    Providing a *seed* allows reproducible selections. In the future, the
-   values may change slightly as more accurate kernel inverse estimates
+   values may change slightly as more accurate kernel inverse CDF estimates
    are implemented.  The seed may be an integer, float, str, or bytes.
 
    A :exc:`StatisticsError` will be raised if the *data* sequence is empty.

From 13e702f35772980e0304900d97130a5ce95f884d Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 3 May 2024 22:42:48 -0500
Subject: [PATCH 36/37] Word smithing

---
 Doc/whatsnew/3.13.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index c2f106231438e4..269a7cc985ad19 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -746,7 +746,7 @@ statistics
 * Add :func:`statistics.kde` for kernel density estimation.
   This makes it possible to estimate a continuous probability density function
   from a fixed number of discrete samples.  Also added :func:`statistics.kde_random`
-  for sampling the estimated probability density function.
+  for sampling from the estimated probability density function.
   (Contributed by Raymond Hettinger in :gh:`115863`.)
 
 .. _whatsnew313-subprocess:

From 15a4764887a3dad5212ca53e50548f74b98d49bd Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Fri, 3 May 2024 22:44:02 -0500
Subject: [PATCH 37/37] Word smithing

---
 Lib/test/test_statistics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index fbf15bca1f5e46..fe6c59c30dae28 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -2479,7 +2479,7 @@ def test_kde_random(self):
         self.assertIn(kernel, prng.__doc__)
         self.assertIn(repr(h), prng.__doc__)
 
-        # Approximate distribution test: Compare a random sample to expected distribution
+        # Approximate distribution test: Compare a random sample to the expected distribution
 
         data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2, 7.8, 14.3, 15.1, 15.3, 15.8, 17.0]
         n = 1_000_000