Compare commits

..

2 commits

Author SHA1 Message Date
92285258b0 steam: Allow the use of sub-namespaces
...and add a seccomp_unsafe profile just for these apps.

Steam Runtime uses Bubblewrap to isolate the games on its own. There is
no way to make Bubblewrap work in our containers other than to allow
unprivileged namespaces inside them.
2023-06-07 15:44:19 -04:00
ef3c3c001f dobu-run: Re-execute entrypoint when container already exists 2023-06-07 14:47:28 -04:00
5 changed files with 817 additions and 14 deletions

View file

@ -8,6 +8,4 @@ USER user
ENV PROTON_NO_FSYNC=1
# We disable CEF sandbox because we already have podman
# and our seccomp filters do not permit another layer of namespaces
ENTRYPOINT [ "/usr/games/steam", "-no-cef-sandbox" ]
ENTRYPOINT [ "/usr/games/steam" ]

View file

@ -1,4 +1,8 @@
#!/usr/bin/env bash
INVALIDATE_CACHE_UPSTREAM_UBUNTU="steam"
DESKTOP_FILE_PATH="/usr/share/applications/steam.desktop"
# Steam Runtime requires its own namespaces
# so we have to allow them in our seccomp filter.
# Don't use it if you don't trust their sandboxing (pressure-vessel).
UNSAFE_I_KNOW_WHAT_I_AM_DOING_ALLOW_NAMESPACES=true

775
assets/seccomp_unsafe.json Normal file
View file

@ -0,0 +1,775 @@
{
"defaultAction": "SCMP_ACT_ERRNO",
"defaultErrnoRet": 1,
"archMap": [
{
"architecture": "SCMP_ARCH_X86_64",
"subArchitectures": [
"SCMP_ARCH_X86",
"SCMP_ARCH_X32"
]
},
{
"architecture": "SCMP_ARCH_AARCH64",
"subArchitectures": [
"SCMP_ARCH_ARM"
]
},
{
"architecture": "SCMP_ARCH_MIPS64",
"subArchitectures": [
"SCMP_ARCH_MIPS",
"SCMP_ARCH_MIPS64N32"
]
},
{
"architecture": "SCMP_ARCH_MIPS64N32",
"subArchitectures": [
"SCMP_ARCH_MIPS",
"SCMP_ARCH_MIPS64"
]
},
{
"architecture": "SCMP_ARCH_MIPSEL64",
"subArchitectures": [
"SCMP_ARCH_MIPSEL",
"SCMP_ARCH_MIPSEL64N32"
]
},
{
"architecture": "SCMP_ARCH_MIPSEL64N32",
"subArchitectures": [
"SCMP_ARCH_MIPSEL",
"SCMP_ARCH_MIPSEL64"
]
},
{
"architecture": "SCMP_ARCH_S390X",
"subArchitectures": [
"SCMP_ARCH_S390"
]
},
{
"architecture": "SCMP_ARCH_RISCV64",
"subArchitectures": null
}
],
"syscalls": [
{
"names": [
"accept",
"accept4",
"access",
"adjtimex",
"alarm",
"bind",
"brk",
"capget",
"capset",
"chdir",
"chmod",
"chown",
"chown32",
"clock_adjtime",
"clock_adjtime64",
"clock_getres",
"clock_getres_time64",
"clock_gettime",
"clock_gettime64",
"clock_nanosleep",
"clock_nanosleep_time64",
"close",
"close_range",
"connect",
"copy_file_range",
"creat",
"dup",
"dup2",
"dup3",
"epoll_create",
"epoll_create1",
"epoll_ctl",
"epoll_ctl_old",
"epoll_pwait",
"epoll_pwait2",
"epoll_wait",
"epoll_wait_old",
"eventfd",
"eventfd2",
"execve",
"execveat",
"exit",
"exit_group",
"faccessat",
"faccessat2",
"fadvise64",
"fadvise64_64",
"fallocate",
"fanotify_mark",
"fchdir",
"fchmod",
"fchmodat",
"fchown",
"fchown32",
"fchownat",
"fcntl",
"fcntl64",
"fdatasync",
"fgetxattr",
"flistxattr",
"flock",
"fork",
"fremovexattr",
"fsetxattr",
"fstat",
"fstat64",
"fstatat64",
"fstatfs",
"fstatfs64",
"fsync",
"ftruncate",
"ftruncate64",
"futex",
"futex_time64",
"futex_waitv",
"futimesat",
"getcpu",
"getcwd",
"getdents",
"getdents64",
"getegid",
"getegid32",
"geteuid",
"geteuid32",
"getgid",
"getgid32",
"getgroups",
"getgroups32",
"getitimer",
"getpeername",
"getpgid",
"getpgrp",
"getpid",
"getppid",
"getpriority",
"getrandom",
"getresgid",
"getresgid32",
"getresuid",
"getresuid32",
"getrlimit",
"get_robust_list",
"getrusage",
"getsid",
"getsockname",
"getsockopt",
"get_thread_area",
"gettid",
"gettimeofday",
"getuid",
"getuid32",
"getxattr",
"inotify_add_watch",
"inotify_init",
"inotify_init1",
"inotify_rm_watch",
"io_cancel",
"ioctl",
"io_destroy",
"io_getevents",
"io_pgetevents",
"io_pgetevents_time64",
"ioprio_get",
"ioprio_set",
"io_setup",
"io_submit",
"io_uring_enter",
"io_uring_register",
"io_uring_setup",
"ipc",
"kill",
"landlock_add_rule",
"landlock_create_ruleset",
"landlock_restrict_self",
"lchown",
"lchown32",
"lgetxattr",
"link",
"linkat",
"listen",
"listxattr",
"llistxattr",
"_llseek",
"lremovexattr",
"lseek",
"lsetxattr",
"lstat",
"lstat64",
"madvise",
"membarrier",
"memfd_create",
"memfd_secret",
"mincore",
"mkdir",
"mkdirat",
"mknod",
"mknodat",
"mlock",
"mlock2",
"mlockall",
"mmap",
"mmap2",
"mprotect",
"mq_getsetattr",
"mq_notify",
"mq_open",
"mq_timedreceive",
"mq_timedreceive_time64",
"mq_timedsend",
"mq_timedsend_time64",
"mq_unlink",
"mremap",
"msgctl",
"msgget",
"msgrcv",
"msgsnd",
"msync",
"munlock",
"munlockall",
"munmap",
"nanosleep",
"newfstatat",
"_newselect",
"open",
"openat",
"openat2",
"pause",
"pidfd_open",
"pidfd_send_signal",
"pipe",
"pipe2",
"pkey_alloc",
"pkey_free",
"pkey_mprotect",
"poll",
"ppoll",
"ppoll_time64",
"prctl",
"pread64",
"preadv",
"preadv2",
"prlimit64",
"process_mrelease",
"pselect6",
"pselect6_time64",
"pwrite64",
"pwritev",
"pwritev2",
"read",
"readahead",
"readlink",
"readlinkat",
"readv",
"recv",
"recvfrom",
"recvmmsg",
"recvmmsg_time64",
"recvmsg",
"remap_file_pages",
"removexattr",
"rename",
"renameat",
"renameat2",
"restart_syscall",
"rmdir",
"rseq",
"rt_sigaction",
"rt_sigpending",
"rt_sigprocmask",
"rt_sigqueueinfo",
"rt_sigreturn",
"rt_sigsuspend",
"rt_sigtimedwait",
"rt_sigtimedwait_time64",
"rt_tgsigqueueinfo",
"sched_getaffinity",
"sched_getattr",
"sched_getparam",
"sched_get_priority_max",
"sched_get_priority_min",
"sched_getscheduler",
"sched_rr_get_interval",
"sched_rr_get_interval_time64",
"sched_setaffinity",
"sched_setattr",
"sched_setparam",
"sched_setscheduler",
"sched_yield",
"seccomp",
"select",
"semctl",
"semget",
"semop",
"semtimedop",
"semtimedop_time64",
"send",
"sendfile",
"sendfile64",
"sendmmsg",
"sendmsg",
"sendto",
"setfsgid",
"setfsgid32",
"setfsuid",
"setfsuid32",
"setgid",
"setgid32",
"setgroups",
"setgroups32",
"setitimer",
"setpgid",
"setpriority",
"setregid",
"setregid32",
"setresgid",
"setresgid32",
"setresuid",
"setresuid32",
"setreuid",
"setreuid32",
"setrlimit",
"set_robust_list",
"setsid",
"setsockopt",
"set_thread_area",
"set_tid_address",
"setuid",
"setuid32",
"setxattr",
"shmat",
"shmctl",
"shmdt",
"shmget",
"shutdown",
"sigaltstack",
"signalfd",
"signalfd4",
"sigprocmask",
"sigreturn",
"socketcall",
"socketpair",
"splice",
"stat",
"stat64",
"statfs",
"statfs64",
"statx",
"symlink",
"symlinkat",
"sync",
"sync_file_range",
"syncfs",
"sysinfo",
"tee",
"tgkill",
"time",
"timer_create",
"timer_delete",
"timer_getoverrun",
"timer_gettime",
"timer_gettime64",
"timer_settime",
"timer_settime64",
"timerfd_create",
"timerfd_gettime",
"timerfd_gettime64",
"timerfd_settime",
"timerfd_settime64",
"times",
"tkill",
"truncate",
"truncate64",
"ugetrlimit",
"umask",
"uname",
"unlink",
"unlinkat",
"utime",
"utimensat",
"utimensat_time64",
"utimes",
"vfork",
"vmsplice",
"wait4",
"waitid",
"waitpid",
"write",
"writev"
],
"action": "SCMP_ACT_ALLOW"
},
{
"names": [
"process_vm_readv",
"process_vm_writev",
"ptrace"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"minKernel": "4.8"
}
},
{
"names": [
"socket"
],
"action": "SCMP_ACT_ALLOW",
"args": [
{
"index": 0,
"value": 40,
"op": "SCMP_CMP_NE"
}
]
},
{
"names": [
"personality"
],
"action": "SCMP_ACT_ALLOW",
"args": [
{
"index": 0,
"value": 0,
"op": "SCMP_CMP_EQ"
}
]
},
{
"names": [
"personality"
],
"action": "SCMP_ACT_ALLOW",
"args": [
{
"index": 0,
"value": 8,
"op": "SCMP_CMP_EQ"
}
]
},
{
"names": [
"personality"
],
"action": "SCMP_ACT_ALLOW",
"args": [
{
"index": 0,
"value": 131072,
"op": "SCMP_CMP_EQ"
}
]
},
{
"names": [
"personality"
],
"action": "SCMP_ACT_ALLOW",
"args": [
{
"index": 0,
"value": 131080,
"op": "SCMP_CMP_EQ"
}
]
},
{
"names": [
"personality"
],
"action": "SCMP_ACT_ALLOW",
"args": [
{
"index": 0,
"value": 4294967295,
"op": "SCMP_CMP_EQ"
}
]
},
{
"names": [
"sync_file_range2",
"swapcontext"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"arches": [
"ppc64le"
]
}
},
{
"names": [
"arm_fadvise64_64",
"arm_sync_file_range",
"sync_file_range2",
"breakpoint",
"cacheflush",
"set_tls"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"arches": [
"arm",
"arm64"
]
}
},
{
"names": [
"arch_prctl"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"arches": [
"amd64",
"x32"
]
}
},
{
"names": [
"modify_ldt"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"arches": [
"amd64",
"x32",
"x86"
]
}
},
{
"names": [
"s390_pci_mmio_read",
"s390_pci_mmio_write",
"s390_runtime_instr"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"arches": [
"s390",
"s390x"
]
}
},
{
"names": [
"riscv_flush_icache"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"arches": [
"riscv64"
]
}
},
{
"names": [
"open_by_handle_at"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"caps": [
"CAP_DAC_READ_SEARCH"
]
}
},
{
"names": [
"fanotify_init",
"fsconfig",
"fsmount",
"fsopen",
"fspick",
"lookup_dcookie",
"name_to_handle_at",
"open_tree",
"perf_event_open",
"quotactl",
"quotactl_fd",
"setdomainname",
"sethostname",
"setns",
"syslog"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"caps": [
"CAP_SYS_ADMIN"
]
}
},
{
"names": [
"clone",
"clone3",
"unshare",
"bpf",
"mount",
"umount",
"umount2",
"pivot_root"
],
"action": "SCMP_ACT_ALLOW",
"comment": "Syscalls required by containers inside our containers (e.g. Steam Runtime); DO NOT USE unless the app is trusted to perform sandboxing correctly"
},
{
"names": [
"reboot"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"caps": [
"CAP_SYS_BOOT"
]
}
},
{
"names": [
"chroot"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"caps": [
"CAP_SYS_CHROOT"
]
}
},
{
"names": [
"delete_module",
"init_module",
"finit_module"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"caps": [
"CAP_SYS_MODULE"
]
}
},
{
"names": [
"acct"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"caps": [
"CAP_SYS_PACCT"
]
}
},
{
"names": [
"kcmp",
"pidfd_getfd",
"process_madvise",
"process_vm_readv",
"process_vm_writev",
"ptrace"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"caps": [
"CAP_SYS_PTRACE"
]
}
},
{
"names": [
"iopl",
"ioperm"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"caps": [
"CAP_SYS_RAWIO"
]
}
},
{
"names": [
"settimeofday",
"stime",
"clock_settime",
"clock_settime64"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"caps": [
"CAP_SYS_TIME"
]
}
},
{
"names": [
"vhangup"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"caps": [
"CAP_SYS_TTY_CONFIG"
]
}
},
{
"names": [
"get_mempolicy",
"mbind",
"set_mempolicy"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"caps": [
"CAP_SYS_NICE"
]
}
},
{
"names": [
"syslog"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"caps": [
"CAP_SYSLOG"
]
}
},
{
"names": [
"bpf"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"caps": [
"CAP_BPF"
]
}
},
{
"names": [
"perf_event_open"
],
"action": "SCMP_ACT_ALLOW",
"includes": {
"caps": [
"CAP_PERFMON"
]
}
}
]
}

View file

@ -18,7 +18,21 @@ log "Home directory path: $home_path"
assert_image_exists "$image_name"
if container_exists "$container_name"; then
die "$container_name is already running; stop the application or run \`podman stop $container_name\` manually"
log "$container_name is already running; re-executing entrypoint command"
log "If this is not desired, please stop the application or run \`podman stop $container_name\` manually"
podman exec -d "$container_name" $(container_entrypoint "$container_name")
exit 0
fi
# Load app control file because some apps require run-time customization
# TODO: Maybe these things should really be container labels?
[ -f "$script_path/apps/$1/control" ] && . "$script_path/apps/$1/control"
if [ "$UNSAFE_I_KNOW_WHAT_I_AM_DOING_ALLOW_NAMESPACES" == "true" ]; then
log "Enabling sub-namespaces support inside this container"
log "This is considered UNSAFE; DO NOT USE if the app inside container does not do its own sandboxing"
log "DO NOT USE if you don't trust sandboxing done by the app inside"
update_podman_security_args "seccomp_unsafe.json"
fi
if [[ -n $PULSE_SERVER ]]; then # remove prefix

View file

@ -9,16 +9,23 @@ DOBU_TMP=/tmp/dobu
. "$script_path/config-default.sh"
[ -f "$script_path/config.sh" ] && . "$script_path/config.sh"
# Default security-related arguments ALWAYS passed to podman
# Install a seccomp filter that disallows sub-namespaces which could lead to exploits
# and use the keep-id mode of userns, such that the user 1100 is mapped to the
# current host user, and the root user inside the namespace is mapped to an unrelated
# large uid on the host.
podman_security_args=(
--security-opt
seccomp="$script_path/assets/seccomp.json"
--userns=keep-id:uid=1100,gid=1100
)
# This function exists so that we can update the seccomp profile used
update_podman_security_args() {
local seccomp_profile="$1"
[ -z "$seccomp_profile" ] && seccomp_profile="seccomp.json"
# Default security-related arguments ALWAYS passed to podman
# Install a seccomp filter that disallows sub-namespaces which could lead to exploits
# and use the keep-id mode of userns, such that the user 1100 is mapped to the
# current host user, and the root user inside the namespace is mapped to an unrelated
# large uid on the host.
podman_security_args=(
--security-opt
seccomp="$script_path/assets/$seccomp_profile"
--userns=keep-id:uid=1100,gid=1100
)
}
update_podman_security_args
assert_prerequisites() {
command -v podman >/dev/null 2>&1 || die "Podman is required"
@ -65,6 +72,11 @@ container_exists() {
podman container exists $1 > /dev/null 2>&1
}
# Note: this works with __containers__, not images
container_entrypoint() {
podman inspect $1 | jq -r '.[0].Config.Entrypoint'
}
ensure_sommelier() {
assert_image_exists dobu/deps-sommelier
container_exists dobu-deps-sommelier && return