From 376df8a48e7f762522360e190c2e51b0c3a785be Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 27 Jan 2023 17:12:58 -0600 Subject: [PATCH 1/7] FMA version --- Modules/mathmodule.c | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 2da50bbd54b57f..f7c0f3692af38f 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2835,10 +2835,6 @@ long_add_would_overflow(long a, long b) Double and triple length extended precision floating point arithmetic based on: - A Floating-Point Technique for Extending the Available Precision - by T. J. Dekker - https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf - Accurate Sum and Dot Product by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi https://doi.org/10.1137/030601818 @@ -2858,26 +2854,12 @@ twosum(double a, double b) return (DoubleLength) {x, y}; } -static inline DoubleLength -dl_split(double x) { - // Rump Algorithm 3.2 Error-free splitting of a floating point number - // Dekker (5.5) and (5.6). - double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1 - double hi = t - (t - x); - double lo = x - hi; - return (DoubleLength) {hi, lo}; -} - static inline DoubleLength dl_mul(double x, double y) { - // Dekker (5.12) and mul12() - DoubleLength xx = dl_split(x); - DoubleLength yy = dl_split(y); - double p = xx.hi * yy.hi; - double q = xx.hi * yy.lo + xx.lo * yy.hi; - double z = p + q; - double zz = p - z + q + xx.lo * yy.lo; + // Rump Algorithm 3.5. Error-free transformation of a product + double z = x * y; + double zz = fma(x, y, -z); return (DoubleLength) {z, zz}; } From 3f700be54074178102fce1020e20b5cb9f7819de Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 27 Jan 2023 17:35:49 -0600 Subject: [PATCH 2/7] Neated-up --- Modules/mathmodule.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index f7c0f3692af38f..0159fcc1e7a446 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2844,20 +2844,20 @@ based on: typedef struct{ double hi; double lo; } DoubleLength; -static inline DoubleLength -twosum(double a, double b) +static DoubleLength +dl_sum(double a, double b) { - // Rump Algorithm 3.1 Error-free transformation of the sum + // Algorithm 3.1 Error-free transformation of the sum double x = a + b; double z = x - a; double y = (a - (x - z)) + (b - z); return (DoubleLength) {x, y}; } -static inline DoubleLength +static DoubleLength dl_mul(double x, double y) { - // Rump Algorithm 3.5. Error-free transformation of a product + // Algorithm 3.5. Error-free transformation of a product double z = x * y; double zz = fma(x, y, -z); return (DoubleLength) {z, zz}; @@ -2867,21 +2867,21 @@ typedef struct { double hi; double lo; double tiny; } TripleLength; static const TripleLength tl_zero = {0.0, 0.0, 0.0}; -static inline TripleLength +static TripleLength tl_fma(TripleLength total, double x, double y) { - // Rump Algorithm 5.10 with K=3 and using SumKVert + // Algorithm 5.10 with SumKVert for K=3 DoubleLength pr = dl_mul(x, y); - DoubleLength sm = twosum(total.hi, pr.hi); - DoubleLength r1 = twosum(total.lo, pr.lo); - DoubleLength r2 = twosum(r1.hi, sm.lo); + DoubleLength sm = dl_sum(total.hi, pr.hi); + DoubleLength r1 = dl_sum(total.lo, pr.lo); + DoubleLength r2 = dl_sum(r1.hi, sm.lo); return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo}; } -static inline double +static double tl_to_d(TripleLength total) { - DoubleLength last = twosum(total.lo, total.hi); + DoubleLength last = dl_sum(total.lo, total.hi); return total.tiny + last.lo + last.hi; } From 43a7ba9afa8f64c4e8ee3f208f0790dc737397ef Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 27 Jan 2023 17:57:12 -0600 Subject: [PATCH 3/7] Compute TWO_PRODUCT() with FMA --- Lib/test/test_math.py | 86 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index b8ac8f32055d33..513ee65f29decd 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1369,6 +1369,92 @@ def run(func, *args): args, ) + @requires_IEEE_754 + @unittest.skipIf(HAVE_DOUBLE_ROUNDING, + "sumprod() accuracy not guaranteed on machines with double rounding") + @support.cpython_only # Other implementations may choose a different algorithm + @support.requires_resource('cpu') + def test_sumprod_extended_precision_accuracy(self): + sumprod = math.sumprod + import operator + from fractions import Fraction + from itertools import starmap + from collections import namedtuple + from math import log2, exp2, log10, fabs + from random import choices, uniform, shuffle + from statistics import median + from functools import partial + from pprint import pp + + DotExample = namedtuple('DotExample', ('x', 'y', 'target_sumprod', 'condition')) + + def DotExact(x, y): + vec1 = map(Fraction, x) + vec2 = map(Fraction, y) + return sum(starmap(operator.mul, zip(vec1, vec2, strict=True))) + + def Condition(x, y): + return 2.0 * DotExact(map(abs, x), map(abs, y)) / abs(DotExact(x, y)) + + def linspace(lo, hi, n): + width = (hi - lo) / (n - 1) + return [lo + width * i for i in range(n)] + + def GenDot(n, c=1e12): + """ Algorithm 6.1 (GenDot) works as follows. The condition number (5.7) of + the dot product xT y is proportional to the degree of cancellation. In + order to achieve a prescribed cancellation, we generate the first half of + the vectors x and y randomly within a large exponent range. This range is + chosen according to the anticipated condition number. The second half of x + and y is then constructed choosing xi randomly with decreasing exponent, + and calculating yi such that some cancellation occurs. Finally, we permute + the vectors x, y randomly and calculate the achieved condition number. + """ + + assert n >= 6 + n2 = n // 2 + x = [0.0] * n + y = [0.0] * n + b = log2(c) + + # First half with exponents from 0 to |_b/2_| and random ints in between + e = choices(range(int(b/2)), k=n2) + e[0] = int(b / 2) + 1 + e[-1] = 0.0 + + x[:n2] = [uniform(-1.0, 1.0) * exp2(p) for p in e] + y[:n2] = [uniform(-1.0, 1.0) * exp2(p) for p in e] + + # Second half + e = list(map(round, linspace(b/2, 0.0 , n-n2))) + for i in range(n2, n): + x[i] = uniform(-1.0, 1.0) * exp2(e[i - n2]) + y[i] = (uniform(-1.0, 1.0) * exp2(e[i - n2]) - DotExact(x, y)) / x[i] + + # Shuffle + pairs = list(zip(x, y)) + shuffle(pairs) + x, y = zip(*pairs) + + return DotExample(x, y, DotExact(x, y), Condition(x, y)) + + def RelativeError(res, ex): + x, y, target_sumprod, condition = ex + n = DotExact(list(x) + [-res], list(y) + [1]) + return fabs(n / target_sumprod) + + def Trial(dotfunc, c=10e7, n=10): + ex = GenDot(10, c) + res = dotfunc(ex.x, ex.y) + return RelativeError(res, ex) + + times = 1000 # Number of trials + n = 20 # Length of vectors + c = 1e30 # Target condition number + + relative_err = median(Trial(sumprod, c=c, n=n) for i in range(times)) + self.assertLess(relative_err, 1e-16) + def testModf(self): self.assertRaises(TypeError, math.modf) From 925235f8a212a10c3a30c468573173fa0944d6f6 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 27 Jan 2023 18:05:15 -0600 Subject: [PATCH 4/7] Remove default values from the test support functions --- Lib/test/test_math.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index 513ee65f29decd..0687f87a2a7023 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1400,7 +1400,7 @@ def linspace(lo, hi, n): width = (hi - lo) / (n - 1) return [lo + width * i for i in range(n)] - def GenDot(n, c=1e12): + def GenDot(n, c): """ Algorithm 6.1 (GenDot) works as follows. The condition number (5.7) of the dot product xT y is proportional to the degree of cancellation. In order to achieve a prescribed cancellation, we generate the first half of @@ -1443,7 +1443,7 @@ def RelativeError(res, ex): n = DotExact(list(x) + [-res], list(y) + [1]) return fabs(n / target_sumprod) - def Trial(dotfunc, c=10e7, n=10): + def Trial(dotfunc, c, n): ex = GenDot(10, c) res = dotfunc(ex.x, ex.y) return RelativeError(res, ex) @@ -1452,7 +1452,7 @@ def Trial(dotfunc, c=10e7, n=10): n = 20 # Length of vectors c = 1e30 # Target condition number - relative_err = median(Trial(sumprod, c=c, n=n) for i in range(times)) + relative_err = median(Trial(sumprod, c, n) for i in range(times)) self.assertLess(relative_err, 1e-16) def testModf(self): From 16ea0dbc5ed28049dc92d599af9d127c6df32b1b Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 27 Jan 2023 18:10:58 -0600 Subject: [PATCH 5/7] Remove unused imports --- Lib/test/test_math.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index 0687f87a2a7023..fcdec93484e000 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1375,16 +1375,13 @@ def run(func, *args): @support.cpython_only # Other implementations may choose a different algorithm @support.requires_resource('cpu') def test_sumprod_extended_precision_accuracy(self): - sumprod = math.sumprod import operator from fractions import Fraction from itertools import starmap from collections import namedtuple - from math import log2, exp2, log10, fabs + from math import log2, exp2, fabs from random import choices, uniform, shuffle from statistics import median - from functools import partial - from pprint import pp DotExample = namedtuple('DotExample', ('x', 'y', 'target_sumprod', 'condition')) @@ -1452,7 +1449,7 @@ def Trial(dotfunc, c, n): n = 20 # Length of vectors c = 1e30 # Target condition number - relative_err = median(Trial(sumprod, c, n) for i in range(times)) + relative_err = median(Trial(math.sumprod, c, n) for i in range(times)) self.assertLess(relative_err, 1e-16) def testModf(self): From 66bcff34064132f9dae0dd82e503aec1ec4970dd Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 27 Jan 2023 20:32:34 -0600 Subject: [PATCH 6/7] Make tl_signature match argument order of the fma() in C --- Modules/mathmodule.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 0159fcc1e7a446..22abeaa6e34222 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2868,7 +2868,7 @@ typedef struct { double hi; double lo; double tiny; } TripleLength; static const TripleLength tl_zero = {0.0, 0.0, 0.0}; static TripleLength -tl_fma(TripleLength total, double x, double y) +tl_fma(double x, double y, TripleLength total) { // Algorithm 5.10 with SumKVert for K=3 DoubleLength pr = dl_mul(x, y); @@ -3048,7 +3048,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) } else { goto finalize_flt_path; } - TripleLength new_flt_total = tl_fma(flt_total, flt_p, flt_q); + TripleLength new_flt_total = tl_fma(flt_p, flt_q, flt_total); if (isfinite(new_flt_total.hi)) { flt_total = new_flt_total; flt_total_in_use = true; From 1e97b8a648d08e2e867aaa7440d4efb82f7c12d5 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 27 Jan 2023 20:34:38 -0600 Subject: [PATCH 7/7] Beautify comments --- Modules/mathmodule.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 22abeaa6e34222..481e29915464f1 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2832,8 +2832,7 @@ long_add_would_overflow(long a, long b) } /* -Double and triple length extended precision floating point arithmetic -based on: +Double and triple length extended precision algorithms from: Accurate Sum and Dot Product by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi @@ -2847,7 +2846,7 @@ typedef struct{ double hi; double lo; } DoubleLength; static DoubleLength dl_sum(double a, double b) { - // Algorithm 3.1 Error-free transformation of the sum + /* Algorithm 3.1 Error-free transformation of the sum */ double x = a + b; double z = x - a; double y = (a - (x - z)) + (b - z); @@ -2857,7 +2856,7 @@ dl_sum(double a, double b) static DoubleLength dl_mul(double x, double y) { - // Algorithm 3.5. Error-free transformation of a product + /* Algorithm 3.5. Error-free transformation of a product */ double z = x * y; double zz = fma(x, y, -z); return (DoubleLength) {z, zz}; @@ -2870,7 +2869,7 @@ static const TripleLength tl_zero = {0.0, 0.0, 0.0}; static TripleLength tl_fma(double x, double y, TripleLength total) { - // Algorithm 5.10 with SumKVert for K=3 + /* Algorithm 5.10 with SumKVert for K=3 */ DoubleLength pr = dl_mul(x, y); DoubleLength sm = dl_sum(total.hi, pr.hi); DoubleLength r1 = dl_sum(total.lo, pr.lo);