Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial implementation of landlock calls #903

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ jobs:
strategy:
matrix:
python-version: [ '3.7', '3.8', '3.9', '3.10', '3.11' ]
sandbox_mode: [ 'ptrace+seccomp' ]
steps:
- uses: actions/checkout@v3
- name: Download docker image
Expand Down Expand Up @@ -118,6 +119,7 @@ jobs:
strategy:
matrix:
python-version: [ '3.11' ]
sandbox_mode: [ 'ptrace+seccomp', 'ptrace+seccomp+landlock' ]
steps:
- uses: actions/checkout@v3
- name: Download docker image
Expand All @@ -134,6 +136,7 @@ jobs:
export LANG=C.UTF-8
export PYTHONIOENCODING=utf8
export PYTHON="/code/python${{ matrix.python-version }}/bin/python${{ matrix.python-version }}"
export DMOJ_SANDBOX_MODE=${{ matrix.sandbox_mode }}

"$PYTHON" -m pip install --upgrade pip wheel
"$PYTHON" -m pip install cython coverage
Expand Down
6 changes: 6 additions & 0 deletions dmoj/citest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,18 @@

from dmoj import judgeenv
from dmoj.contrib import load_contrib_modules
from dmoj.cptbox import has_landlock
from dmoj.executors import executors
from dmoj.testsuite import Tester
from dmoj.utils.ansi import print_ansi


def ci_test(executors_to_test, overrides, allow_fail=frozenset()):
if has_landlock():
print('Running CI with landlock and seccomp...')
else:
print('Running CI with just seccomp...')

result = {}
failed = False
failed_executors = []
Expand Down
1 change: 1 addition & 0 deletions dmoj/cptbox/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
PTBOX_ABI_X32,
PTBOX_ABI_X64,
PTBOX_ABI_X86,
has_landlock,
)
from dmoj.cptbox.handlers import ALLOW, DISALLOW
from dmoj.cptbox.isolate import FilesystemSyscallKind, IsolateTracer
Expand Down
3 changes: 3 additions & 0 deletions dmoj/cptbox/_cptbox.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ PTBOX_SPAWN_FAIL_SECCOMP: int
PTBOX_SPAWN_FAIL_TRACEME: int
PTBOX_SPAWN_FAIL_EXECVE: int
PTBOX_SPAWN_FAIL_SETAFFINITY: int
PTBOX_SPAWN_FAIL_LANDLOCK: int

def has_landlock() -> bool: ...

AT_FDCWD: int
bsd_get_proc_cwd: Callable[[int], str]
Expand Down
42 changes: 40 additions & 2 deletions dmoj/cptbox/_cptbox.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ from libcpp cimport bool
from posix.resource cimport rusage
from posix.types cimport pid_t

__all__ = ['Process', 'Debugger', 'bsd_get_proc_cwd', 'bsd_get_proc_fdno', 'MAX_SYSCALL_NUMBER',
__all__ = ['Process', 'Debugger', 'bsd_get_proc_cwd', 'bsd_get_proc_fdno', 'has_landlock', 'landlock_version', 'MAX_SYSCALL_NUMBER',
'AT_FDCWD', 'ALL_ABIS', 'SUPPORTED_ABIS', 'NATIVE_ABI',
'PTBOX_ABI_X86', 'PTBOX_ABI_X64', 'PTBOX_ABI_X32', 'PTBOX_ABI_ARM', 'PTBOX_ABI_ARM64',
'PTBOX_ABI_FREEBSD_X64', 'PTBOX_ABI_INVALID', 'PTBOX_ABI_COUNT',
'PTBOX_SPAWN_FAIL_NO_NEW_PRIVS', 'PTBOX_SPAWN_FAIL_SECCOMP', 'PTBOX_SPAWN_FAIL_TRACEME',
'PTBOX_SPAWN_FAIL_EXECVE', 'PTBOX_SPAWN_FAIL_SETAFFINITY']
'PTBOX_SPAWN_FAIL_EXECVE', 'PTBOX_SPAWN_FAIL_SETAFFINITY', 'PTBOX_SPAWN_FAIL_LANDLOCK']


cdef extern from 'ptbox.h' nogil:
Expand Down Expand Up @@ -121,6 +121,14 @@ cdef extern from 'helper.h' nogil:
int *seccomp_handlers
unsigned long cpu_affinity_mask

const char **landlock_read_exact_files
const char **landlock_read_exact_dirs
const char **landlock_read_recursive_dirs
const char **landlock_write_exact_files
const char **landlock_write_exact_dirs
const char **landlock_write_recursive_dirs

int get_landlock_version()
void cptbox_closefrom(int lowfd)
int cptbox_child_run(child_config *)
char *_bsd_get_proc_cwd "bsd_get_proc_cwd"(pid_t pid)
Expand All @@ -132,6 +140,7 @@ cdef extern from 'helper.h' nogil:
PTBOX_SPAWN_FAIL_TRACEME
PTBOX_SPAWN_FAIL_EXECVE
PTBOX_SPAWN_FAIL_SETAFFINITY
PTBOX_SPAWN_FAIL_LANDLOCK

int _memory_fd_create "memory_fd_create"()
int _memory_fd_seal "memory_fd_seal"(int fd)
Expand Down Expand Up @@ -222,6 +231,16 @@ def memory_fd_seal(int fd):
if result == -1:
PyErr_SetFromErrno(OSError)

def landlock_version():
cdef int version = get_landlock_version()
if version == -1:
PyErr_SetFromErrno(OSError)
return version

def has_landlock():
# ABI 2 is the minimum acceptable version for us.
return landlock_version() >= 2

cdef class Process


Expand Down Expand Up @@ -491,6 +510,12 @@ cdef class Process:
config.argv = NULL
config.envp = NULL
config.seccomp_handlers = NULL
config.landlock_read_exact_files = NULL
config.landlock_read_exact_dirs = NULL
config.landlock_read_recursive_dirs = NULL
config.landlock_write_exact_files = NULL
config.landlock_write_exact_dirs = NULL
config.landlock_write_recursive_dirs = NULL

try:
config.address_space = self._child_address
Expand Down Expand Up @@ -519,12 +544,25 @@ cdef class Process:
for i in range(MAX_SYSCALL):
config.seccomp_handlers[i] = handlers[i]

config.landlock_read_exact_files = <const char**>alloc_byte_array(self.landlock_read_exact_files)
config.landlock_read_exact_dirs = <const char**>alloc_byte_array(self.landlock_read_exact_dirs)
config.landlock_read_recursive_dirs = <const char**>alloc_byte_array(self.landlock_read_recursive_dirs)
config.landlock_write_exact_files = <const char**>alloc_byte_array(self.landlock_write_exact_files)
config.landlock_write_exact_dirs = <const char**>alloc_byte_array(self.landlock_write_exact_dirs)
config.landlock_write_recursive_dirs = <const char**>alloc_byte_array(self.landlock_write_recursive_dirs)

if self.process.spawn(pt_child, &config):
raise RuntimeError('failed to spawn child')
finally:
free(config.argv)
free(config.envp)
free(config.seccomp_handlers)
free(config.landlock_read_exact_files)
free(config.landlock_read_exact_dirs)
free(config.landlock_read_recursive_dirs)
free(config.landlock_write_exact_files)
free(config.landlock_write_exact_dirs)
free(config.landlock_write_recursive_dirs)

cpdef _monitor(self):
cdef int exitcode
Expand Down
54 changes: 41 additions & 13 deletions dmoj/cptbox/compiler_isolate.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import struct
import sys

from dmoj.cptbox._cptbox import AT_FDCWD, Debugger
from dmoj.cptbox._cptbox import AT_FDCWD, Debugger, has_landlock
from dmoj.cptbox.filesystem_policies import ExactFile, FilesystemPolicy, RecursiveDir
from dmoj.cptbox.handlers import ACCESS_EFAULT, ACCESS_EPERM, ALLOW
from dmoj.cptbox.isolate import DeniedSyscall, FilesystemSyscallKind, IsolateTracer
Expand All @@ -23,6 +23,42 @@ def __init__(self, *, tmpdir, read_fs, write_fs):
write_fs += BASE_WRITE_FILESYSTEM + [RecursiveDir(tmpdir)]
super().__init__(read_fs=read_fs, write_fs=write_fs)

if has_landlock():
self.update(
{
# Directory system calls
sys_mkdir: ALLOW,
sys_mkdirat: ALLOW,
sys_rmdir: ALLOW,
# Link/Rename
sys_link: ALLOW,
sys_linkat: ALLOW,
sys_symlink: ALLOW,
sys_rename: ALLOW,
sys_renameat: ALLOW,
# Unlink
sys_unlink: ALLOW,
sys_unlinkat: ALLOW,
}
)
else:
self.update(
{
# Directory system calls
sys_mkdir: self.handle_file_access(FilesystemSyscallKind.WRITE, file_reg=0),
sys_mkdirat: self.handle_file_access_at(FilesystemSyscallKind.WRITE, dir_reg=0, file_reg=1),
sys_rmdir: self.handle_file_access(FilesystemSyscallKind.WRITE, file_reg=0),
# Linking system calls
sys_link: self.handle_file_access(FilesystemSyscallKind.WRITE, file_reg=1),
sys_linkat: self.handle_file_access_at(FilesystemSyscallKind.WRITE, dir_reg=2, file_reg=3),
sys_unlink: self.handle_file_access(FilesystemSyscallKind.WRITE, file_reg=0),
sys_unlinkat: self.handle_file_access_at(FilesystemSyscallKind.WRITE, dir_reg=0, file_reg=1),
sys_symlink: self.handle_file_access(FilesystemSyscallKind.WRITE, file_reg=1),
sys_rename: self.handle_rename,
sys_renameat: self.handle_renameat,
}
)

self.update(
{
# Process spawning system calls
Expand All @@ -31,16 +67,6 @@ def __init__(self, *, tmpdir, read_fs, write_fs):
sys_execve: ALLOW,
sys_getcpu: ALLOW,
sys_getpgid: ALLOW,
# Directory system calls
sys_mkdir: self.handle_file_access(FilesystemSyscallKind.WRITE, file_reg=0),
sys_mkdirat: self.handle_file_access_at(FilesystemSyscallKind.WRITE, dir_reg=0, file_reg=1),
sys_rmdir: self.handle_file_access(FilesystemSyscallKind.WRITE, file_reg=0),
# Linking system calls
sys_link: self.handle_file_access(FilesystemSyscallKind.WRITE, file_reg=1),
sys_linkat: self.handle_file_access_at(FilesystemSyscallKind.WRITE, dir_reg=2, file_reg=3),
sys_unlink: self.handle_file_access(FilesystemSyscallKind.WRITE, file_reg=0),
sys_unlinkat: self.handle_file_access_at(FilesystemSyscallKind.WRITE, dir_reg=0, file_reg=1),
sys_symlink: self.handle_file_access(FilesystemSyscallKind.WRITE, file_reg=1),
# Miscellaneous other filesystem system calls
sys_chdir: self.handle_file_access(FilesystemSyscallKind.READ, file_reg=0),
sys_chmod: self.handle_file_access(FilesystemSyscallKind.WRITE, file_reg=0),
Expand All @@ -53,8 +79,6 @@ def __init__(self, *, tmpdir, read_fs, write_fs):
sys_fchmod: self.handle_fchmod,
sys_fallocate: ALLOW,
sys_ftruncate: ALLOW,
sys_rename: self.handle_rename,
sys_renameat: self.handle_renameat,
# I/O system calls
sys_readv: ALLOW,
sys_pwrite64: ALLOW,
Expand Down Expand Up @@ -103,6 +127,10 @@ def __init__(self, *, tmpdir, read_fs, write_fs):
}
)

def fullpath_from_reg_and_dirfd(self, debugger: Debugger, *, file_reg, dirfd):
rel_file = self.get_rel_file(debugger, reg=file_reg)
return self.get_full_path_unnormalized(debugger, rel_file, dirfd=dirfd)

def handle_rename(self, debugger: Debugger) -> None:
self.access_check(self._write_fs_jail_getter, self._dirfd_getter_cwd, file_reg=0)(debugger)
self.access_check(self._write_fs_jail_getter, self._dirfd_getter_cwd, file_reg=1)(debugger)
Expand Down
72 changes: 71 additions & 1 deletion dmoj/cptbox/helper.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "helper.h"
#include "landlock_helpers.h"
#include "ptbox.h"

#include <dirent.h>
Expand Down Expand Up @@ -90,13 +91,62 @@ int cptbox_child_run(const struct child_config *config) {
kill(getpid(), SIGSTOP);

#if !PTBOX_FREEBSD
// landlock setup
int ruleset_fd, rc;
struct landlock_ruleset_attr ruleset_attr = {
.handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE | LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_READ_FILE |
LANDLOCK_ACCESS_FS_READ_DIR | LANDLOCK_ACCESS_FS_REMOVE_DIR |
LANDLOCK_ACCESS_FS_REMOVE_FILE | LANDLOCK_ACCESS_FS_MAKE_REG |
LANDLOCK_ACCESS_FS_MAKE_SYM | LANDLOCK_ACCESS_FS_MAKE_DIR | LANDLOCK_ACCESS_FS_REFER,
};

int landlock_version = get_landlock_version();
if (landlock_version < 2) {
// ABI not old enough. Skip to seccomp
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// ABI not old enough. Skip to seccomp
// ABI not new enough. Skip to seccomp

goto seccomp_setup;
}

ruleset_fd = landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
if (ruleset_fd < 0) {
perror("Failed to create a ruleset");
return PTBOX_SPAWN_FAIL_LANDLOCK;
}

// Note: WRITE must imply READ. This is required because even if one rule allows writing and another allows reading,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The O_TRUNC handling in Landlock scares me. Could we add some testcases with a custom grader that make sure that we don't allow truncating etc.?

// unless there is a rule that allows both, a call with O_RDWR will fail.
if (landlock_add_rules(ruleset_fd, config->landlock_read_exact_files,
LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_EXECUTE) ||
landlock_add_rules(ruleset_fd, config->landlock_read_exact_dirs, LANDLOCK_ACCESS_FS_READ_DIR) ||
landlock_add_rules(ruleset_fd, config->landlock_read_recursive_dirs,
LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_EXECUTE | LANDLOCK_ACCESS_FS_READ_DIR) ||
landlock_add_rules(ruleset_fd, config->landlock_write_exact_files,
LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_EXECUTE | LANDLOCK_ACCESS_FS_WRITE_FILE) ||
landlock_add_rules(ruleset_fd, config->landlock_write_exact_dirs, LANDLOCK_ACCESS_FS_READ_DIR) ||
landlock_add_rules(ruleset_fd, config->landlock_write_recursive_dirs,
LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_EXECUTE | LANDLOCK_ACCESS_FS_READ_DIR |
LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_REMOVE_DIR |
LANDLOCK_ACCESS_FS_REMOVE_FILE | LANDLOCK_ACCESS_FS_READ_DIR |
LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_MAKE_SYM | LANDLOCK_ACCESS_FS_MAKE_DIR |
LANDLOCK_ACCESS_FS_REFER)) {
// landlock_add_rules logs errors
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this comment have more details? I don't understand what it's referring to, is this comment unrelated to the following close?

close(ruleset_fd);
return PTBOX_SPAWN_FAIL_LANDLOCK;
}

rc = landlock_restrict_self(ruleset_fd, 0);
close(ruleset_fd);
if (rc) {
perror("Failed to enforce ruleset");
return PTBOX_SPAWN_FAIL_LANDLOCK;
}

seccomp_setup:
scmp_filter_ctx ctx = seccomp_init(SCMP_ACT_TRACE(0));
if (!ctx) {
fprintf(stderr, "Failed to initialize seccomp context!");
goto seccomp_init_fail;
}

int rc;
// By default, the native architecture is added to the filter already, so we add all the non-native ones.
// This will bloat the filter due to additional architectures, but a few extra compares in the BPF matters
// very little when syscalls are rare and other overhead is expensive.
Expand Down Expand Up @@ -175,6 +225,26 @@ int cptbox_child_run(const struct child_config *config) {
#endif
}

int get_landlock_version() {
#if !PTBOX_FREEBSD
char *sandbox_mode = getenv("DMOJ_SANDBOX_MODE");
if (sandbox_mode != nullptr && strcmp(sandbox_mode, "ptrace+seccomp") == 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a surprising place for this check to live, and I think the check should be stricter than this.

We should validate that the string is one of:

  • ptrace+seccomp
  • ptrace+seccomp+landlock
  • auto

and otherwise bail out. Otherwise we can't confidently make changes to the interpretation of this environment variable, as we'd have been too lax in its inputs in earlier versions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that this location is strange. Where should it live?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think living in this file is probably fine, and it should return an enum. Then it should be consulted in the spawn routine alongside the Landlock version we have available. My concern here is more about subtly overloading the semantics of get_landlock_version than anything else.

We should bail if someone requests:

  • seccomp or seccomp+landlock support on FreeBSD
  • ptrace-only support on Linux
  • seccomp+landlock on a Linux with too old a Landlock ABI

This sanity check could exist in judge.py alongside all the other sanity checks, but I don't feel strongly about that.

// Allow disabling of landlock.
return 0;
}

int landlock_version = landlock_create_ruleset(NULL, 0, LANDLOCK_CREATE_RULESET_VERSION);
if (landlock_version >= 0) {
return landlock_version;
}
if (errno == ENOSYS || errno == EOPNOTSUPP)
return 0;
return -1;
#else
return 0; // FreeBSD does not have landlock
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should abort() here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How? Consumers of this function throw a python error, but what would be appropriate to do here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. I think has_landlock should be implemented here rather than in Python, so we can return false order FREEBSD.

#endif
}

// From python's _posixsubprocess
static int pos_int_from_ascii(char *name) {
int num = 0;
Expand Down
9 changes: 9 additions & 0 deletions dmoj/cptbox/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#define PTBOX_SPAWN_FAIL_TRACEME 204
#define PTBOX_SPAWN_FAIL_EXECVE 205
#define PTBOX_SPAWN_FAIL_SETAFFINITY 206
#define PTBOX_SPAWN_FAIL_LANDLOCK 207

struct child_config {
unsigned long memory;
Expand All @@ -27,8 +28,16 @@ struct child_config {
int *seccomp_handlers;
// 64 cores ought to be enough for anyone.
unsigned long cpu_affinity_mask;
const char **landlock_read_exact_files;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may end up looking a little cleaner if we have a

struct {
    const char **read_exact_files;
    ...
} landlock;

or even

const struct {
    char **read_exact_files;
    ...
} landlock;

const char **landlock_read_exact_dirs;
const char **landlock_read_recursive_dirs;
const char **landlock_write_exact_files;
const char **landlock_write_exact_dirs;
const char **landlock_write_recursive_dirs;
};

int get_landlock_version();

void cptbox_closefrom(int lowfd);
int cptbox_child_run(const struct child_config *config);

Expand Down
2 changes: 2 additions & 0 deletions dmoj/cptbox/isolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ class FilesystemSyscallKind(Enum):
class IsolateTracer(dict):
def __init__(self, *, read_fs: Sequence[FilesystemAccessRule], write_fs: Sequence[FilesystemAccessRule]):
super().__init__()
self.read_fs = read_fs
self.write_fs = write_fs
self.read_fs_jail = self._compile_fs_jail(read_fs)
self.write_fs_jail = self._compile_fs_jail(write_fs)

Expand Down
Loading