Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/jit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ concurrency:

env:
FORCE_COLOR: 1
LLVM_VERSION: 19
LLVM_VERSION: 21

jobs:
interpreter:
Expand Down Expand Up @@ -168,7 +168,6 @@ jobs:
fail-fast: false
matrix:
include:

- name: JIT without optimizations (Debug)
configure_flags: --enable-experimental-jit --with-pydebug
test_env: "PYTHON_UOPS_OPTIMIZE=0"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/tail-call.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ concurrency:

env:
FORCE_COLOR: 1
LLVM_VERSION: 20
LLVM_VERSION: 21

jobs:
windows:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update JIT compilation to use LLVM 21 at build time.
69 changes: 57 additions & 12 deletions PCbuild/get_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import os
import pathlib
import shutil
import sys
import time
import urllib.error
Expand All @@ -22,15 +23,13 @@ def retrieve_with_retries(download_location, output_path, reporthook,
)
except (urllib.error.URLError, ConnectionError) as ex:
if attempt == max_retries:
msg = f"Download from {download_location} failed."
raise OSError(msg) from ex
raise OSError(f'Download from {download_location} failed.') from ex
time.sleep(2.25**attempt)
else:
return resp


def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
repo = f'cpython-{"bin" if binary else "source"}-deps'
repo = 'cpython-bin-deps' if binary else 'cpython-source-deps'
url = f'https://github.com/{org}/{repo}/archive/{commit_hash}.zip'
reporthook = None
if verbose:
Expand All @@ -44,6 +43,29 @@ def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
return filename


def fetch_release(tag, tarball_dir, *, org='python', verbose=False):
url = f'https://github.com/{org}/cpython-bin-deps/releases/download/{tag}/{tag}.tar.xz'
reporthook = None
if verbose:
reporthook = print
tarball_dir.mkdir(parents=True, exist_ok=True)
output_path = tarball_dir / f'{tag}.tar.xz'
retrieve_with_retries(url, output_path, reporthook)
return output_path


def extract_tarball(externals_dir, tarball_path, tag):
output_path = externals_dir / tag
try:
shutil.unpack_archive(os.fspath(tarball_path), os.fspath(output_path))
except Exception as ex:
raise OSError(
f'Failed to extract {tarball_path}. The archive may be '
f'corrupted; try deleting it and re-running.'
) from ex
return output_path


def extract_zip(externals_dir, zip_path):
with zipfile.ZipFile(os.fspath(zip_path)) as zf:
zf.extractall(os.fspath(externals_dir))
Expand All @@ -55,6 +77,8 @@ def parse_args():
p.add_argument('-v', '--verbose', action='store_true')
p.add_argument('-b', '--binary', action='store_true',
help='Is the dependency in the binary repo?')
p.add_argument('-r', '--release', action='store_true',
help='Download from GitHub release assets instead of branch')
p.add_argument('-O', '--organization',
help='Organization owning the deps repos', default='python')
p.add_argument('-e', '--externals-dir', type=pathlib.Path,
Expand All @@ -67,15 +91,36 @@ def parse_args():

def main():
args = parse_args()
zip_path = fetch_zip(
args.tag,
args.externals_dir / 'zips',
org=args.organization,
binary=args.binary,
verbose=args.verbose,
)
final_name = args.externals_dir / args.tag
extracted = extract_zip(args.externals_dir, zip_path)

# Check if the dependency already exists in externals/ directory
# (either already downloaded/extracted, or checked into the git tree)
if final_name.exists():
if args.verbose:
print(f'{args.tag} already exists at {final_name}, skipping download.')
return

# Determine download method: release artifacts for large deps (like LLVM),
# otherwise zip download from GitHub branches
if args.release:
tarball_path = fetch_release(
args.tag,
args.externals_dir / 'tarballs',
org=args.organization,
verbose=args.verbose,
)
extracted = extract_tarball(args.externals_dir, tarball_path, args.tag)
else:
# Use zip download from GitHub branches
# (cpython-bin-deps if --binary, cpython-source-deps otherwise)
zip_path = fetch_zip(
args.tag,
args.externals_dir / 'zips',
org=args.organization,
binary=args.binary,
verbose=args.verbose,
)
extracted = extract_zip(args.externals_dir, zip_path)
for wait in [1, 2, 3, 5, 8, 0]:
try:
extracted.replace(final_name)
Expand Down
8 changes: 5 additions & 3 deletions PCbuild/get_externals.bat
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
@echo off
setlocal
setlocal EnableDelayedExpansion
rem Simple script to fetch source for external libraries

if NOT DEFINED PCBUILD (set PCBUILD=%~dp0)
Expand Down Expand Up @@ -82,7 +82,7 @@ if NOT "%IncludeLibffi%"=="false" set binaries=%binaries% libffi-3.4.4
if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-3.0.19
if NOT "%IncludeTkinter%"=="false" set binaries=%binaries% tcltk-8.6.15.0
if NOT "%IncludeSSLSrc%"=="false" set binaries=%binaries% nasm-2.11.06
if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-19.1.7.0
if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-21.1.4.0

for %%b in (%binaries%) do (
if exist "%EXTERNALS_DIR%\%%b" (
Expand All @@ -92,7 +92,9 @@ for %%b in (%binaries%) do (
git clone --depth 1 https://github.com/%ORG%/cpython-bin-deps --branch %%b "%EXTERNALS_DIR%\%%b"
) else (
echo.Fetching %%b...
%PYTHON% -E "%PCBUILD%\get_external.py" -b -O %ORG% -e "%EXTERNALS_DIR%" %%b
set _fetch_args=--binary
echo %%b | findstr /B "llvm-" >nul && set _fetch_args=--release
%PYTHON% -E "%PCBUILD%\get_external.py" !_fetch_args! --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b
)
)

Expand Down
90 changes: 72 additions & 18 deletions Python/jit.c
Original file line number Diff line number Diff line change
Expand Up @@ -419,15 +419,43 @@ patch_x86_64_32rx(unsigned char *location, uint64_t value)
}

void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state);
void patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state);

#include "jit_stencils.h"

#if defined(__aarch64__) || defined(_M_ARM64)
#define TRAMPOLINE_SIZE 16
#define DATA_ALIGN 8
#elif defined(__x86_64__) || defined(_M_X64)
// x86_64 trampolines: 14 bytes (jmp *(%rip) + 8-byte addr) + 2 bytes padding.
// Currently used on macOS where LLVM 21 GOT entries may exceed ±2GB
// PC-relative range, but enabled on all x86_64 platforms defensively.
#define TRAMPOLINE_SIZE 16
#define DATA_ALIGN 8
#else
#define TRAMPOLINE_SIZE 0
#define DATA_ALIGN 1
#endif

// Get the trampoline memory location for a given symbol ordinal.
static unsigned char *
get_trampoline_slot(int ordinal, jit_state *state)
{
const uint32_t symbol_mask = 1 << (ordinal % 32);
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
assert(symbol_mask & trampoline_mask);

// Count the number of set bits in the trampoline mask lower than ordinal.
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
for (int i = 0; i < ordinal / 32; i++) {
index += _Py_popcount32(state->trampolines.mask[i]);
}

unsigned char *trampoline = state->trampolines.mem + index * TRAMPOLINE_SIZE;
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
return trampoline;
}

// Generate and patch AArch64 trampolines. The symbols to jump to are stored
// in the jit_stencils.h in the symbols_map.
void
Expand All @@ -444,20 +472,8 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
return;
}

// Masking is done modulo 32 as the mask is stored as an array of uint32_t
const uint32_t symbol_mask = 1 << (ordinal % 32);
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
assert(symbol_mask & trampoline_mask);

// Count the number of set bits in the trampoline mask lower than ordinal,
// this gives the index into the array of trampolines.
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
for (int i = 0; i < ordinal / 32; i++) {
index += _Py_popcount32(state->trampolines.mask[i]);
}

uint32_t *p = (uint32_t*)(state->trampolines.mem + index * TRAMPOLINE_SIZE);
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
// Out of range - need a trampoline
uint32_t *p = (uint32_t *)get_trampoline_slot(ordinal, state);


/* Generate the trampoline
Expand All @@ -474,6 +490,37 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
patch_aarch64_26r(location, (uintptr_t)p);
}

// Generate and patch x86_64 trampolines.
void
patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state)
{
uint64_t value = (uintptr_t)symbols_map[ordinal];
int64_t range = (int64_t)value - 4 - (int64_t)location;

// If we are in range of 32 signed bits, we can patch directly
if (range >= -(1LL << 31) && range < (1LL << 31)) {
patch_32r(location, value - 4);
return;
}

// Out of range - need a trampoline
unsigned char *trampoline = get_trampoline_slot(ordinal, state);

/* Generate the trampoline (14 bytes, padded to 16):
0: ff 25 00 00 00 00 jmp *(%rip)
6: XX XX XX XX XX XX XX XX (64-bit target address)

Reference: https://wiki.osdev.org/X86-64_Instruction_Encoding#FF (JMP r/m64)
*/
trampoline[0] = 0xFF;
trampoline[1] = 0x25;
memset(trampoline + 2, 0, 4);
memcpy(trampoline + 6, &value, 8);

// Patch the call site to call the trampoline instead
patch_32r(location, (uintptr_t)trampoline - 4);
}

static void
combine_symbol_mask(const symbol_mask src, symbol_mask dest)
{
Expand Down Expand Up @@ -515,8 +562,13 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
// Round up to the nearest page:
size_t page_size = get_page_size();
assert((page_size & (page_size - 1)) == 0);
size_t padding = page_size - ((code_size + state.trampolines.size + data_size) & (page_size - 1));
size_t total_size = code_size + state.trampolines.size + data_size + padding;
size_t code_padding =
DATA_ALIGN - ((code_size + state.trampolines.size) & (DATA_ALIGN - 1));
size_t padding = page_size -
((code_size + state.trampolines.size + code_padding + data_size) &
(page_size - 1));
size_t total_size =
code_size + state.trampolines.size + code_padding + data_size + padding;
unsigned char *memory = jit_alloc(total_size);
if (memory == NULL) {
return -1;
Expand All @@ -535,7 +587,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
// Loop again to emit the code:
unsigned char *code = memory;
state.trampolines.mem = memory + code_size;
unsigned char *data = memory + code_size + state.trampolines.size;
unsigned char *data = memory + code_size + state.trampolines.size + code_padding;
// Compile the shim, which handles converting between the native
// calling convention and the calling convention used by jitted code
// (which may be different for efficiency reasons).
Expand All @@ -557,7 +609,9 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
code += group->code_size;
data += group->data_size;
assert(code == memory + code_size);
assert(data == memory + code_size + state.trampolines.size + data_size);
assert(
data ==
memory + code_size + state.trampolines.size + code_padding + data_size);
if (mark_executable(memory, total_size)) {
jit_free(memory, total_size);
return -1;
Expand Down
22 changes: 13 additions & 9 deletions Tools/jit/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,32 @@ Python 3.11 or newer is required to build the JIT.

The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon).

LLVM version 19 is required. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
LLVM version 21 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-21`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.

It's easy to install all of the required tools:

### Linux

Install LLVM 19 on Ubuntu/Debian:
Install LLVM 21 on Ubuntu/Debian:

```sh
wget https://apt.llvm.org/llvm.sh
chmod +x llvm.sh
sudo ./llvm.sh 19
sudo ./llvm.sh 21
```

Install LLVM 19 on Fedora Linux 40 or newer:
Install LLVM 21 on Fedora Linux 40 or newer:

```sh
sudo dnf install 'clang(major) = 19' 'llvm(major) = 19'
sudo dnf install 'clang(major) = 21' 'llvm(major) = 21'
```

### macOS

Install LLVM 19 with [Homebrew](https://brew.sh):
Install LLVM 21 with [Homebrew](https://brew.sh):

```sh
brew install llvm@19
brew install llvm@21
```

Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them.
Expand All @@ -43,14 +43,18 @@ Homebrew won't add any of the tools to your `$PATH`. That's okay; the build scri

LLVM is downloaded automatically (along with other external binary dependencies) by `PCbuild\build.bat`.

Otherwise, you can install LLVM 19 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=19), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
Otherwise, you can install LLVM 21 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=21), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**

Alternatively, you can use [chocolatey](https://chocolatey.org):

```sh
choco install llvm --version=19.1.0
choco install llvm --version=21.1.8
```

### Dev Containers

If you are working on CPython in a [Codespaces instance](https://devguide.python.org/getting-started/setup-building/#using-codespaces), there's no
need to install LLVM as the Fedora 42 base image includes LLVM 21 out of the box.

## Building

Expand Down
Loading
Loading