Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
bbd2da9
Merge pull request #1 from python/master
rhettinger Mar 16, 2021
74bdf1b
Merge branch 'master' of github.com:python/cpython
rhettinger Mar 22, 2021
6c53f1a
Merge branch 'master' of github.com:python/cpython
rhettinger Mar 22, 2021
a487c4f
.
rhettinger Mar 24, 2021
eb56423
.
rhettinger Mar 25, 2021
cc7ba06
.
rhettinger Mar 26, 2021
d024dd0
.
rhettinger Apr 22, 2021
b10f912
merge
rhettinger May 5, 2021
fb6744d
merge
rhettinger May 6, 2021
7f21a1c
Merge branch 'main' of github.com:python/cpython
rhettinger Aug 15, 2021
7da42d4
Merge branch 'main' of github.com:rhettinger/cpython
rhettinger Aug 25, 2021
e31757b
Merge branch 'main' of github.com:python/cpython
rhettinger Aug 31, 2021
f058a6f
Merge branch 'main' of github.com:python/cpython
rhettinger Aug 31, 2021
1fc29bd
Merge branch 'main' of github.com:python/cpython
rhettinger Sep 4, 2021
e5c0184
Merge branch 'main' of github.com:python/cpython
rhettinger Oct 30, 2021
3c86ec1
Merge branch 'main' of github.com:python/cpython
rhettinger Nov 9, 2021
96675e4
Merge branch 'main' of github.com:rhettinger/cpython
rhettinger Nov 9, 2021
de558c6
Merge branch 'main' of github.com:python/cpython
rhettinger Nov 9, 2021
418a07f
Merge branch 'main' of github.com:python/cpython
rhettinger Nov 14, 2021
ea23a8b
Merge branch 'main' of github.com:python/cpython
rhettinger Nov 21, 2021
ba248b7
Merge branch 'main' of github.com:python/cpython
rhettinger Nov 27, 2021
9bc1df1
Merge branch 'main' of github.com:python/cpython
rhettinger Dec 1, 2021
d4466ba
Merge branch 'main' of github.com:python/cpython
rhettinger Dec 1, 2021
a89f02e
Merge branch 'main' of github.com:python/cpython
rhettinger Dec 8, 2021
aae9a5f
Merge branch 'main' of github.com:python/cpython
rhettinger Dec 10, 2021
7ba634b
Merge branch 'main' of github.com:python/cpython
rhettinger Jan 1, 2022
0b54723
Add doctest and improve readability for move_to_end() example.
rhettinger Jan 3, 2022
6ce943f
Single pass sum of squares
rhettinger Jan 4, 2022
c8e2de7
Use len() to get the count
rhettinger Jan 4, 2022
45d83da
Avoid converting iterators to lists
rhettinger Jan 4, 2022
b1a89be
Neaten-up
rhettinger Jan 4, 2022
b4d2797
Add blurb
rhettinger Jan 4, 2022
712f648
Avoid touching collections.rst
rhettinger Jan 4, 2022
dc98276
Accumulate unsquared denominators
rhettinger Jan 4, 2022
6b2e8ca
Update Lib/statistics.py
rhettinger Jan 4, 2022
ae382ff
Make mean() single pass over iterators
rhettinger Jan 4, 2022
2e03c7a
Merge branch 'statistics_fast_ss' of github.com:rhettinger/cpython in…
rhettinger Jan 4, 2022
bbe6558
Update blurb to cover mean().
rhettinger Jan 4, 2022
326bce8
Move _ss() into the private utilities section.
rhettinger Jan 4, 2022
208abcd
Use defaultdict() instead of boundmethod
rhettinger Jan 5, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Move _ss() into the private utilities section.
  • Loading branch information
rhettinger committed Jan 4, 2022
commit 326bce8fe3e8d70308a68d08593f10c84f4abf9c
77 changes: 39 additions & 38 deletions Lib/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,45 @@ def _sum(data):
return (T, total, count)


def _ss(data, c=None):
"""Return sum of square deviations of sequence data.

If ``c`` is None, the mean is calculated in one pass, and the deviations
from the mean are calculated in a second pass. Otherwise, deviations are
calculated from ``c`` as given. Use the second case with care, as it can
lead to garbage results.
"""
if c is not None:
T, total, count = _sum((d := x - c) * d for x in data)
return (T, total, count)
count = 0
sx_partials = {}
sx_partials_get = sx_partials.get
sxx_partials = {}
sxx_partials_get = sxx_partials.get
T = int
for typ, values in groupby(data, type):
T = _coerce(T, typ) # or raise TypeError
for n, d in map(_exact_ratio, values):
count += 1
sx_partials[d] = sx_partials_get(d, 0) + n
sxx_partials[d] = sxx_partials_get(d, 0) + n * n
if not count:
total = Fraction(0)
elif None in sx_partials:
# The sum will be a NAN or INF. We can ignore all the finite
# partials, and just look at this special one.
total = sx_partials[None]
assert not _isfinite(total)
else:
sx = sum(Fraction(n, d) for d, n in sx_partials.items())
sxx = sum(Fraction(n, d*d) for d, n in sxx_partials.items())
# This formula has poor numeric properties for floats,
# but with fractions it is exact.
total = (count * sxx - sx * sx) / count
return (T, total, count)


def _isfinite(x):
try:
return x.is_finite() # Likely a Decimal.
Expand Down Expand Up @@ -773,44 +812,6 @@ def quantiles(data, *, n=4, method='exclusive'):
# See http://mathworld.wolfram.com/Variance.html
# http://mathworld.wolfram.com/SampleVariance.html

def _ss(data, c=None):
"""Return sum of square deviations of sequence data.

If ``c`` is None, the mean is calculated in one pass, and the deviations
from the mean are calculated in a second pass. Otherwise, deviations are
calculated from ``c`` as given. Use the second case with care, as it can
lead to garbage results.
"""
if c is not None:
T, total, count = _sum((d := x - c) * d for x in data)
return (T, total, count)
count = 0
sx_partials = {}
sx_partials_get = sx_partials.get
sxx_partials = {}
sxx_partials_get = sxx_partials.get
T = int
for typ, values in groupby(data, type):
T = _coerce(T, typ) # or raise TypeError
for n, d in map(_exact_ratio, values):
count += 1
sx_partials[d] = sx_partials_get(d, 0) + n
sxx_partials[d] = sxx_partials_get(d, 0) + n * n
if not count:
total = Fraction(0)
elif None in sx_partials:
# The sum will be a NAN or INF. We can ignore all the finite
# partials, and just look at this special one.
total = sx_partials[None]
assert not _isfinite(total)
else:
sx = sum(Fraction(n, d) for d, n in sx_partials.items())
sxx = sum(Fraction(n, d*d) for d, n in sxx_partials.items())
# This formula has poor numeric properties for floats,
# but with fractions it is exact.
total = (count * sxx - sx * sx) / count
return (T, total, count)


def variance(data, xbar=None):
"""Return the sample variance of data.
Expand Down