Skip to content
Open
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
ccd0bbc
draft: impl lazy input consumption in mp.Pool.imap(_unordered)
Jul 20, 2025
002ef46
Use semaphore to synchronize threads
Jul 20, 2025
6e0bc58
Update buffersize behavior to match concurrent.futures.Executor behavior
Jul 21, 2025
62b2b6a
Release all `buffersize_lock` obj from the parent thread when terminate
Jul 21, 2025
0b6ba41
Add 2 basic `ThreadPool.imap()` tests w/ and w/o buffersize
Jul 21, 2025
aade15e
Fix accidental swap in imports
Jul 21, 2025
fb38a72
clear Pool._taskqueue_buffersize_semaphores safely
Jul 21, 2025
6ef488b
Slightly optimize Pool._taskqueue_buffersize_semaphores terminate
Jul 21, 2025
1716725
Rename `Pool.imap()` buffersize-related tests
Jul 21, 2025
9b43cd0
Fix typo in `IMapIterator.__init__()`
Jul 22, 2025
2d89341
Add tests for buffersize combinations with other kwargs
Jul 22, 2025
9ab2705
Remove if-branch in `_terminate_pool`
Jul 27, 2025
a955003
Add more edge-case tests for `imap` and `imap_unodered`
Jul 27, 2025
80efd6e
Split inf iterable test for `imap` and `imap_unordered`
Jul 27, 2025
83d6930
Add doc for `buffersize` argument of `imap` and `imap_unordered`
Jul 27, 2025
995ad8c
add *versionadded* for `imap_unordered`
Jul 28, 2025
3b6ad65
Remove ambiguity in `buffersize` description.
Jul 28, 2025
c941c16
Set *versionadded* as next in docs
Jul 28, 2025
d09e891
Add whatsnew entry
Jul 28, 2025
9c6d89d
Fix aggreed comments on code formatting/minor refactoring
Jul 28, 2025
4550a01
Remove `imap` and `imap_unordered` body code duplication
Jul 28, 2025
77bde4d
Merge branch 'main' into feature/add-buffersize-to-multiprocessing
obaltian Aug 31, 2025
aec39fc
Merge branch 'main' into feature/add-buffersize-to-multiprocessing
obaltian Sep 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Remove imap and imap_unordered body code duplication
  • Loading branch information
Oleksandr Baltian authored and obaltian committed Aug 14, 2025
commit 4550a01bec9470991ce241a397ef5cf199aee3a1
87 changes: 31 additions & 56 deletions Lib/multiprocessing/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,67 +417,14 @@ def imap(self, func, iterable, chunksize=1, buffersize=None):
'''
Equivalent of `map()` -- can be MUCH slower than `Pool.map()`.
'''
self._check_running()
self._check_chunksize(chunksize)
self._check_buffersize(buffersize)

result = IMapIterator(self, buffersize)
if chunksize == 1:
self._taskqueue.put(
(
self._guarded_task_generation(result._job, func, iterable,
result._buffersize_sema),
result._set_length,
)
)
return result
else:
task_batches = Pool._get_tasks(func, iterable, chunksize)
self._taskqueue.put(
(
self._guarded_task_generation(
result._job,
mapstar,
task_batches,
result._buffersize_sema,
),
result._set_length,
)
)
return (item for chunk in result for item in chunk)
return self._imap(IMapIterator, func, iterable, chunksize, buffersize)

def imap_unordered(self, func, iterable, chunksize=1, buffersize=None):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it this method the same as _imap? Is there a reason why we duplicated them since only IMapIterator vs IMapUnorderedIterator is changed. I don't know if it's a good target for refactoring where we would pass the wrapper:

@staticmethod
def _imap(IteratorClass, pool, func, iterable, chunksize=1, buffersize=None):
    result = IteratorClass(pool, buffersize)
    ...

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are almost the same, indeed. Sure, I will make implementation shared, thanks.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually don't know if it's something that was thought to be duplicated. But it appears that it is. If @gpshead thinks it's better to duplicate the code for future improvements, then we can keep it as is (because if in the future only one of the two functions has issues or a different behavior, it's better to have two separate functions)

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another argument to reuse code is the fact that _map_async is also shared here.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added the shared method in a separate commit, can revert it if it's unwanted.

'''
Like `imap()` method but ordering of results is arbitrary.
'''
self._check_running()
self._check_chunksize(chunksize)
self._check_buffersize(buffersize)

result = IMapUnorderedIterator(self, buffersize)
if chunksize == 1:
self._taskqueue.put(
(
self._guarded_task_generation(result._job, func, iterable,
result._buffersize_sema),
result._set_length,
)
)
return result
else:
task_batches = Pool._get_tasks(func, iterable, chunksize)
self._taskqueue.put(
(
self._guarded_task_generation(
result._job,
mapstar,
task_batches,
result._buffersize_sema,
),
result._set_length,
)
)
return (item for chunk in result for item in chunk)
return self._imap(IMapUnorderedIterator, func, iterable, chunksize,
buffersize)

def apply_async(self, func, args=(), kwds={}, callback=None,
error_callback=None):
Expand Down Expand Up @@ -526,6 +473,34 @@ def _map_async(self, func, iterable, mapper, chunksize=None, callback=None,
)
return result

def _imap(self, iterator_cls, func, iterable, chunksize=1,
buffersize=None):
self._check_running()
self._check_chunksize(chunksize)
self._check_buffersize(buffersize)

result = iterator_cls(self, buffersize)
if chunksize == 1:
self._taskqueue.put(
(
self._guarded_task_generation(result._job, func, iterable,
result._buffersize_sema),
result._set_length,
)
)
return result
else:
task_batches = Pool._get_tasks(func, iterable, chunksize)
self._taskqueue.put(
(
self._guarded_task_generation(result._job, mapstar,
task_batches,
result._buffersize_sema),
result._set_length,
)
)
return (item for chunk in result for item in chunk)

@staticmethod
def _check_chunksize(chunksize):
if chunksize < 1:
Expand Down
Loading