Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
gh-133136: Limit excess memory held by QSBR
The free threading build uses QSBR to delay the freeing of dictionary
keys and list arrays when the objects are accessed by multiple threads
in order to allow concurrent reads to proceeed with holding the object
lock. The requests are processed in batches to reduce execution
overhead, but for large memory blocks this can lead to excess memory
usage.

Take into account the size of the memory block when deciding when to
process QSBR requests.
  • Loading branch information
colesbury committed Jun 3, 2025
commit 7ef2e30c9d0d45472c3942ff9733f1d7dfc184ec
2 changes: 1 addition & 1 deletion Include/internal/pycore_pymem.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ extern wchar_t *_PyMem_DefaultRawWcsdup(const wchar_t *str);
extern int _PyMem_DebugEnabled(void);

// Enqueue a pointer to be freed possibly after some delay.
extern void _PyMem_FreeDelayed(void *ptr);
extern void _PyMem_FreeDelayed(void *ptr, size_t size);

// Enqueue an object to be freed possibly after some delay
#ifdef Py_GIL_DISABLED
Expand Down
4 changes: 4 additions & 0 deletions Include/internal/pycore_qsbr.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ struct _qsbr_thread_state {
// Used to defer advancing write sequence a fixed number of times
int deferrals;

// Estimate for the amount of memory that is held by this thread since
// the last non-deferred advance.
size_t memory_deferred;

// Is this thread state allocated?
bool allocated;
struct _qsbr_thread_state *freelist_next;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Limit excess memory usage in the :term:`free threading` build when a
large dictionary or list is resized and accessed by multiple threads.
2 changes: 1 addition & 1 deletion Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3350,7 +3350,7 @@ create_tlbc_lock_held(PyCodeObject *co, Py_ssize_t idx)
}
memcpy(new_tlbc->entries, tlbc->entries, tlbc->size * sizeof(void *));
_Py_atomic_store_ptr_release(&co->co_tlbc, new_tlbc);
_PyMem_FreeDelayed(tlbc);
_PyMem_FreeDelayed(tlbc, tlbc->size * sizeof(void *));
tlbc = new_tlbc;
}
char *bc = PyMem_Calloc(1, _PyCode_NBYTES(co));
Expand Down
4 changes: 2 additions & 2 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -813,7 +813,7 @@ free_keys_object(PyDictKeysObject *keys, bool use_qsbr)
{
#ifdef Py_GIL_DISABLED
if (use_qsbr) {
_PyMem_FreeDelayed(keys);
_PyMem_FreeDelayed(keys, _PyDict_KeysSize(keys));
return;
}
#endif
Expand Down Expand Up @@ -858,7 +858,7 @@ free_values(PyDictValues *values, bool use_qsbr)
assert(values->embedded == 0);
#ifdef Py_GIL_DISABLED
if (use_qsbr) {
_PyMem_FreeDelayed(values);
_PyMem_FreeDelayed(values, values_size_from_count(values->capacity));
return;
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion Objects/listobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ free_list_items(PyObject** items, bool use_qsbr)
#ifdef Py_GIL_DISABLED
_PyListArray *array = _Py_CONTAINER_OF(items, _PyListArray, ob_item);
if (use_qsbr) {
_PyMem_FreeDelayed(array);
size_t size = sizeof(_PyListArray) + array->allocated * sizeof(PyObject *);
_PyMem_FreeDelayed(array, size);
}
else {
PyMem_Free(array);
Expand Down
39 changes: 32 additions & 7 deletions Objects/obmalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1141,8 +1141,27 @@
}
}

static int
should_advance_qsbr(_PyThreadStateImpl *tstate, size_t size)

Check warning on line 1145 in Objects/obmalloc.c

View workflow job for this annotation

GitHub Actions / Ubuntu / build and test (ubuntu-24.04)

‘should_advance_qsbr’ defined but not used [-Wunused-function]

Check warning on line 1145 in Objects/obmalloc.c

View workflow job for this annotation

GitHub Actions / Ubuntu / build and test (ubuntu-24.04-arm)

‘should_advance_qsbr’ defined but not used [-Wunused-function]

Check warning on line 1145 in Objects/obmalloc.c

View workflow job for this annotation

GitHub Actions / Cross build Linux

‘should_advance_qsbr’ defined but not used [-Wunused-function]

Check warning on line 1145 in Objects/obmalloc.c

View workflow job for this annotation

GitHub Actions / Hypothesis tests on Ubuntu

‘should_advance_qsbr’ defined but not used [-Wunused-function]

Check warning on line 1145 in Objects/obmalloc.c

View workflow job for this annotation

GitHub Actions / Address sanitizer (ubuntu-24.04)

‘should_advance_qsbr’ defined but not used [-Wunused-function]

Check warning on line 1145 in Objects/obmalloc.c

View workflow job for this annotation

GitHub Actions / Ubuntu (bolt) / build and test (ubuntu-24.04)

‘should_advance_qsbr’ defined but not used [-Wunused-function]
{
// If the deferred memory exceeds 1 MiB, we force an advance in the
// shared QSBR sequence number to limit excess memory usage.
static const size_t QSBR_DEFERRED_LIMIT = 1024 * 1024;
if (size > QSBR_DEFERRED_LIMIT) {
tstate->qsbr->memory_deferred = 0;
return 1;
}

tstate->qsbr->memory_deferred += size;
if (tstate->qsbr->memory_deferred > QSBR_DEFERRED_LIMIT) {
tstate->qsbr->memory_deferred = 0;
return 1;
}
return 0;
}
Comment on lines +1146 to +1162
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need early return here?
It looks like it will be same eventually.

Suggested change
should_advance_qsbr(_PyThreadStateImpl *tstate, size_t size)
{
// If the deferred memory exceeds 1 MiB, we force an advance in the
// shared QSBR sequence number to limit excess memory usage.
static const size_t QSBR_DEFERRED_LIMIT = 1024 * 1024;
if (size > QSBR_DEFERRED_LIMIT) {
tstate->qsbr->memory_deferred = 0;
return 1;
}
tstate->qsbr->memory_deferred += size;
if (tstate->qsbr->memory_deferred > QSBR_DEFERRED_LIMIT) {
tstate->qsbr->memory_deferred = 0;
return 1;
}
return 0;
}
should_advance_qsbr(_PyThreadStateImpl *tstate, size_t size)
{
// If the deferred memory exceeds 1 MiB, we force an advance in the
// shared QSBR sequence number to limit excess memory usage.
static const size_t QSBR_DEFERRED_LIMIT = 1024 * 1024;
tstate->qsbr->memory_deferred += size;
if (tstate->qsbr->memory_deferred > QSBR_DEFERRED_LIMIT) {
tstate->qsbr->memory_deferred = 0;
return 1;
}
return 0;
}


static void
free_delayed(uintptr_t ptr)
free_delayed(uintptr_t ptr, size_t size)
{
#ifndef Py_GIL_DISABLED
free_work_item(ptr, NULL, NULL);
Expand Down Expand Up @@ -1200,23 +1219,29 @@
}

assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
uint64_t seq = _Py_qsbr_deferred_advance(tstate->qsbr);
uint64_t seq;
int force_advance = should_advance_qsbr(tstate, size);
if (force_advance) {
seq = _Py_qsbr_advance(tstate->qsbr->shared);
}
else {
seq = _Py_qsbr_deferred_advance(tstate->qsbr);
}
buf->array[buf->wr_idx].ptr = ptr;
buf->array[buf->wr_idx].qsbr_goal = seq;
buf->wr_idx++;

if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
if (buf->wr_idx == WORK_ITEMS_PER_CHUNK || force_advance) {
_PyMem_ProcessDelayed((PyThreadState *)tstate);
}
#endif
}

void
_PyMem_FreeDelayed(void *ptr)
_PyMem_FreeDelayed(void *ptr, size_t size)
{
assert(!((uintptr_t)ptr & 0x01));
if (ptr != NULL) {
free_delayed((uintptr_t)ptr);
free_delayed((uintptr_t)ptr, size);
}
}

Expand All @@ -1226,7 +1251,7 @@
{
assert(!((uintptr_t)ptr & 0x01));
if (ptr != NULL) {
free_delayed(((uintptr_t)ptr)|0x01);
free_delayed(((uintptr_t)ptr)|0x01, 64);
}
}
#endif
Expand Down
Loading