first cut at adding fill parameters to pairwise and windowed, with te…

…sts and doc updates
mahmoud · Oct 29, 2023 · 1fa012d · 1fa012d
1 parent 46599bc
commit 1fa012d
Show file tree

Hide file tree

Showing 2 changed files with 87 additions and 25 deletions.
diff --git a/boltons/iterutils.py b/boltons/iterutils.py
@@ -64,14 +64,15 @@
 
 try:
     from future_builtins import filter
-    from itertools import izip
+    from itertools import izip, izip_longest as zip_longest
     _IS_PY3 = False
 except ImportError:
     # Python 3 compat
     _IS_PY3 = True
     basestring = (str, bytes)
     unicode = str
     izip, xrange = zip, range
+    from itertools import zip_longest
 
 
 def is_iterable(obj):
@@ -424,7 +425,7 @@ def chunk_ranges(input_size, chunk_size, input_offset=0, overlap_size=0, align=F
             return
 
 
-def pairwise(src):
+def pairwise(src, fill=_UNSET):
     """Convenience function for calling :func:`windowed` on *src*, with
     *size* set to 2.
 
@@ -433,14 +434,22 @@ def pairwise(src):
     >>> pairwise([])
     []
 
-    The number of pairs is always one less than the number of elements
-    in the iterable passed in, except on empty inputs, which returns
-    an empty list.
+    Unless *fill* is set, the number of pairs is always one less than 
+    the number of elements in the iterable passed in, except on an empty input, 
+    which will return an empty list.
+
+    With *fill* set, a number of pairs equal to the length of *src* is returned,
+    with the last item of the last pair being equal to *fill*.
+
+    >>> list(pairwise(range(3), fill=None))
+    [(0, 1), (1, 2), (2, None)]
+
+    This way, *fill* values can be useful to signal the end of the iterable.
     """
-    return windowed(src, 2)
+    return windowed(src, 2, fill=fill)
 
 
-def pairwise_iter(src):
+def pairwise_iter(src, fill=_UNSET):
     """Convenience function for calling :func:`windowed_iter` on *src*,
     with *size* set to 2.
 
@@ -449,43 +458,70 @@ def pairwise_iter(src):
     >>> list(pairwise_iter([]))
     []
 
-    The number of pairs is always one less than the number of elements
-    in the iterable passed in, or zero, when *src* is empty.
+    Unless *fill* is set, the number of pairs is always one less 
+    than the number of elements in the iterable passed in, 
+    or zero, when *src* is empty.
+
+    With *fill* set, a number of pairs equal to the length of *src* is returned,
+    with the last item of the last pair being equal to *fill*. 
 
+    >>> list(pairwise_iter(range(3), fill=None))
+    [(0, 1), (1, 2), (2, None)]    
+
+    This way, *fill* values can be useful to signal the end of the iterable.
+    For infinite iterators, setting *fill* has no effect.
     """
-    return windowed_iter(src, 2)
+    return windowed_iter(src, 2, fill=fill)
 
 
-def windowed(src, size):
-    """Returns tuples with exactly length *size*. If the iterable is
-    too short to make a window of length *size*, no tuples are
-    returned. See :func:`windowed_iter` for more.
+def windowed(src, size, fill=_UNSET):
+    """Returns tuples with exactly length *size*. If *fill* is unset 
+    and the iterable is too short to make a window of length *size*, 
+    no tuples are returned. See :func:`windowed_iter` for more.
     """
-    return list(windowed_iter(src, size))
+    return list(windowed_iter(src, size, fill=fill))
 
 
-def windowed_iter(src, size):
+def windowed_iter(src, size, fill=_UNSET):
     """Returns tuples with length *size* which represent a sliding
     window over iterable *src*.
 
     >>> list(windowed_iter(range(7), 3))
     [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)]
 
-    If the iterable is too short to make a window of length *size*,
-    then no window tuples are returned.
+    If *fill* is unset, and the iterable is too short to make a window 
+    of length *size*, then no window tuples are returned.
 
     >>> list(windowed_iter(range(3), 5))
     []
+
+    With *fill* set, the iterator always yields a number of windows
+    equal to the length of the *src* iterable.
+
+    >>> windowed(range(4), 3, fill=None)
+    [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]
+
+    This way, *fill* values can be useful to signal the end of the iterable.
+    For infinite iterators, setting *fill* has no effect.
     """
-    # TODO: lists? (for consistency)
     tees = itertools.tee(src, size)
-    try:
-        for i, t in enumerate(tees):
-            for _ in xrange(i):
+    if fill is _UNSET:
+        try:
+            for i, t in enumerate(tees):
+                for _ in range(i):
+                    next(t)
+        except StopIteration:
+            return zip([])
+        return zip(*tees)
+
+    for i, t in enumerate(tees):
+        for _ in range(i):  
+            try:
                 next(t)
-    except StopIteration:
-        return izip([])
-    return izip(*tees)
+            except StopIteration:
+                continue
+    return zip_longest(*tees, fillvalue=fill)
+
 
 
 def xfrange(stop, start=None, step=1.0):

diff --git a/tests/test_iterutils.py b/tests/test_iterutils.py
@@ -5,6 +5,10 @@
 
 from boltons.dictutils import OMD
 from boltons.iterutils import (first,
+                               pairwise,
+                               pairwise_iter,
+                               windowed,
+                               windowed_iter,
                                remap,
                                research,
                                default_enter,
@@ -551,3 +555,25 @@ def test_strip():
     assert strip([0,0,0,1,0,2,0,3,0,0,0],0) == [1,0,2,0,3]
     assert strip([]) == []
 
+
+def test_pairwise_filled():
+    assert pairwise(range(4)) == [(0, 1), (1, 2), (2, 3)]
+    assert pairwise(range(4), fill=None) == [(0, 1), (1, 2), (2, 3), (3, None)]
+
+    assert pairwise([]) == []
+    assert pairwise([1], fill=None) == [(1, None)]
+
+    assert list(pairwise_iter(range(4))) == [(0, 1), (1, 2), (2, 3)]
+    assert list(pairwise_iter(range(4), fill=None)) == [(0, 1), (1, 2), (2, 3), (3, None)]
+
+
+def test_windowed_filled():
+    assert windowed(range(4), 3) == [(0, 1, 2), (1, 2, 3)]
+    assert windowed(range(4), 3, fill=None) == [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]
+
+    assert windowed([], 3) == []
+    assert windowed([], 3, fill=None) == []
+    assert windowed([1, 2], 3, fill=None) == [(1, 2, None), (2, None, None)]
+
+    assert list(windowed_iter(range(4), 3)) == [(0, 1, 2), (1, 2, 3)]
+    assert list(windowed_iter(range(4), 3, fill=None)) == [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]