From 1fa012de3afad60ed696189ecc917c4bb62bce8b Mon Sep 17 00:00:00 2001 From: Mahmoud Hashemi Date: Sat, 28 Oct 2023 23:12:54 -0700 Subject: [PATCH] first cut at adding fill parameters to pairwise and windowed, with tests and doc updates --- boltons/iterutils.py | 86 +++++++++++++++++++++++++++++------------ tests/test_iterutils.py | 26 +++++++++++++ 2 files changed, 87 insertions(+), 25 deletions(-) diff --git a/boltons/iterutils.py b/boltons/iterutils.py index eddbeff2..150b5009 100644 --- a/boltons/iterutils.py +++ b/boltons/iterutils.py @@ -64,7 +64,7 @@ try: from future_builtins import filter - from itertools import izip + from itertools import izip, izip_longest as zip_longest _IS_PY3 = False except ImportError: # Python 3 compat @@ -72,6 +72,7 @@ basestring = (str, bytes) unicode = str izip, xrange = zip, range + from itertools import zip_longest def is_iterable(obj): @@ -424,7 +425,7 @@ def chunk_ranges(input_size, chunk_size, input_offset=0, overlap_size=0, align=F return -def pairwise(src): +def pairwise(src, fill=_UNSET): """Convenience function for calling :func:`windowed` on *src*, with *size* set to 2. @@ -433,14 +434,22 @@ def pairwise(src): >>> pairwise([]) [] - The number of pairs is always one less than the number of elements - in the iterable passed in, except on empty inputs, which returns - an empty list. + Unless *fill* is set, the number of pairs is always one less than + the number of elements in the iterable passed in, except on an empty input, + which will return an empty list. + + With *fill* set, a number of pairs equal to the length of *src* is returned, + with the last item of the last pair being equal to *fill*. + + >>> list(pairwise(range(3), fill=None)) + [(0, 1), (1, 2), (2, None)] + + This way, *fill* values can be useful to signal the end of the iterable. """ - return windowed(src, 2) + return windowed(src, 2, fill=fill) -def pairwise_iter(src): +def pairwise_iter(src, fill=_UNSET): """Convenience function for calling :func:`windowed_iter` on *src*, with *size* set to 2. @@ -449,43 +458,70 @@ def pairwise_iter(src): >>> list(pairwise_iter([])) [] - The number of pairs is always one less than the number of elements - in the iterable passed in, or zero, when *src* is empty. + Unless *fill* is set, the number of pairs is always one less + than the number of elements in the iterable passed in, + or zero, when *src* is empty. + + With *fill* set, a number of pairs equal to the length of *src* is returned, + with the last item of the last pair being equal to *fill*. + >>> list(pairwise_iter(range(3), fill=None)) + [(0, 1), (1, 2), (2, None)] + + This way, *fill* values can be useful to signal the end of the iterable. + For infinite iterators, setting *fill* has no effect. """ - return windowed_iter(src, 2) + return windowed_iter(src, 2, fill=fill) -def windowed(src, size): - """Returns tuples with exactly length *size*. If the iterable is - too short to make a window of length *size*, no tuples are - returned. See :func:`windowed_iter` for more. +def windowed(src, size, fill=_UNSET): + """Returns tuples with exactly length *size*. If *fill* is unset + and the iterable is too short to make a window of length *size*, + no tuples are returned. See :func:`windowed_iter` for more. """ - return list(windowed_iter(src, size)) + return list(windowed_iter(src, size, fill=fill)) -def windowed_iter(src, size): +def windowed_iter(src, size, fill=_UNSET): """Returns tuples with length *size* which represent a sliding window over iterable *src*. >>> list(windowed_iter(range(7), 3)) [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)] - If the iterable is too short to make a window of length *size*, - then no window tuples are returned. + If *fill* is unset, and the iterable is too short to make a window + of length *size*, then no window tuples are returned. >>> list(windowed_iter(range(3), 5)) [] + + With *fill* set, the iterator always yields a number of windows + equal to the length of the *src* iterable. + + >>> windowed(range(4), 3, fill=None) + [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)] + + This way, *fill* values can be useful to signal the end of the iterable. + For infinite iterators, setting *fill* has no effect. """ - # TODO: lists? (for consistency) tees = itertools.tee(src, size) - try: - for i, t in enumerate(tees): - for _ in xrange(i): + if fill is _UNSET: + try: + for i, t in enumerate(tees): + for _ in range(i): + next(t) + except StopIteration: + return zip([]) + return zip(*tees) + + for i, t in enumerate(tees): + for _ in range(i): + try: next(t) - except StopIteration: - return izip([]) - return izip(*tees) + except StopIteration: + continue + return zip_longest(*tees, fillvalue=fill) + def xfrange(stop, start=None, step=1.0): diff --git a/tests/test_iterutils.py b/tests/test_iterutils.py index 2738d4b7..68a281a0 100644 --- a/tests/test_iterutils.py +++ b/tests/test_iterutils.py @@ -5,6 +5,10 @@ from boltons.dictutils import OMD from boltons.iterutils import (first, + pairwise, + pairwise_iter, + windowed, + windowed_iter, remap, research, default_enter, @@ -551,3 +555,25 @@ def test_strip(): assert strip([0,0,0,1,0,2,0,3,0,0,0],0) == [1,0,2,0,3] assert strip([]) == [] + +def test_pairwise_filled(): + assert pairwise(range(4)) == [(0, 1), (1, 2), (2, 3)] + assert pairwise(range(4), fill=None) == [(0, 1), (1, 2), (2, 3), (3, None)] + + assert pairwise([]) == [] + assert pairwise([1], fill=None) == [(1, None)] + + assert list(pairwise_iter(range(4))) == [(0, 1), (1, 2), (2, 3)] + assert list(pairwise_iter(range(4), fill=None)) == [(0, 1), (1, 2), (2, 3), (3, None)] + + +def test_windowed_filled(): + assert windowed(range(4), 3) == [(0, 1, 2), (1, 2, 3)] + assert windowed(range(4), 3, fill=None) == [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)] + + assert windowed([], 3) == [] + assert windowed([], 3, fill=None) == [] + assert windowed([1, 2], 3, fill=None) == [(1, 2, None), (2, None, None)] + + assert list(windowed_iter(range(4), 3)) == [(0, 1, 2), (1, 2, 3)] + assert list(windowed_iter(range(4), 3, fill=None)) == [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]