linux: Add mseal syscall support

It as added on Linux 6.10 (8be7258aad44b5e25977a98db136f677fa6f4370)
as way to block operations as unmaping, moving to another location,
shrinking the size, expanding the size, or modifying to a pre-existent
memory mapping.

Although the systecall only work on 64 bit CPU, the entrypoint was
added for all ABIs (since kernel might eventually implement it to
additional ones and/or the abi can execute on a 64 bit kernel).

Checked on x86_64-linux-gnu.
This commit is contained in:
Adhemerval Zanella 2024-12-06 14:37:49 -03:00
parent 4e68a5ca5d
commit 9c858712dd
40 changed files with 276 additions and 1 deletions

4
NEWS
View File

@ -9,7 +9,9 @@ Version 2.42
Major new features:
[Add new features here]
* On Linux, the mseal function has been added. It allows to seal memory
mappings to avoid further change during process execution such as protection
permissions, unmapping, moving to another location, or shrinking the size.
Deprecated and removed features, and other changes affecting compatibility:

View File

@ -3072,6 +3072,75 @@ process memory, no matter how it was allocated. However, portable use
of the function requires that it is only used with memory regions
returned by @code{mmap} or @code{mmap64}.
@deftypefun int mseal (void *@var{address}, size_t @var{length}, unsigned long @var{flags})
@standards{Linux, sys/mman.h}
@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
A successful call to the @code {mseal} function protects the memory
range @var{address} of @var{length} bytes, previous allocated with
@code{mmap} or @code{mremap}, against further metadata changes such
as:
@itemize @bullet
@item
Unmapping, moving to another location, extending or shrinking the size,
via @code{munmap} and @code{mremap}.
@item
Moving or expanding a different VMA into the current location, via
@code{mremap}.
@item
Modifying the memory range with @code{mmap} along with flag @code{MAP_FIXED}.
@item
Change the protection flags with @code{mprotect} or @code{pkey_mprotect}. Also
for certain destructive @code{madvise} behaviours (@code{MADV_DONTNEED},
@code{MADV_FREE}, @code{MADV_DONTNEED_LOCKED}, and @code{MADV_WIPEONFORK}),
@code{mseal} only blocks the operation if the protection key associate with
the memory denies write.
@item
Destructive behaviors on anonymous memory, such as @code{madvice} with
@code{MADV_DONTNEED}.
@end itemize
The @var{address} must be an allocated virtual memory done by @code{mmap}
or @code{mremap}, and it must be page aligned. The end address (@var{address}
plus @var{length}) must be within an allocated virtual memory range. There
should be no unallocated memory between the start and end of address range.
The @var{flags} is currently ununsed.
The @code{mseal} function returns @math{0} on sucess and @math{-1} on
failure.
The following @code{errno} error conditions are defined for this
function:
@table @code
@item EPERM
The system blocked the operation, and the given address range is unmodified
without a partial update. This error is also returned when @code{mseal}
is issued on a 32 bit CPUs (the sealing is currently supported only on
64-bit CPUs, although 32 bit binaries running on 64 bit kernel is
supported).
@item ENOMEM
Either the @var{address} is not allocated, or the end address is not within the
allocation, or there is an unallocated memory between start and end address.
@item ENOSYS
The kernel does not support the @code{mseal} syscall.
@strong{NB:} The memory sealing changes the lifetime of a mapping, where the
sealing memory could not be unmapped until the process terminates or replaces
the process image through @code{execve} function. The sealed mappings are
inherited through @code{fork}.
@end table
@end deftypefun
@subsection Memory Protection Keys
@cindex memory protection key

View File

@ -213,6 +213,8 @@ tests += \
tst-misalign-clone \
tst-mlock2 \
tst-mount \
tst-mseal \
tst-mseal-pkey \
tst-ntp_adjtime \
tst-ntp_gettime \
tst-ntp_gettimex \

View File

@ -332,6 +332,9 @@ libc {
sched_getattr;
sched_setattr;
}
GLIBC_2.42 {
mseal;
}
GLIBC_PRIVATE {
# functions used in other libraries
__syscall_rt_sigqueueinfo;

View File

@ -2750,3 +2750,4 @@ GLIBC_2.39 stdc_trailing_zeros_ull F
GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F

View File

@ -3097,6 +3097,7 @@ GLIBC_2.4 wprintf F
GLIBC_2.4 wscanf F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -2511,3 +2511,4 @@ GLIBC_2.39 stdc_trailing_zeros_ull F
GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F

View File

@ -2803,6 +2803,7 @@ GLIBC_2.4 xprt_register F
GLIBC_2.4 xprt_unregister F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -2800,6 +2800,7 @@ GLIBC_2.4 xprt_register F
GLIBC_2.4 xprt_unregister F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -81,6 +81,14 @@ int pkey_free (int __key) __THROW;
range. */
int pkey_mprotect (void *__addr, size_t __len, int __prot, int __pkey) __THROW;
/* Seal the address range to avoid further modifications, such as remmap to
shrink or expand the VMA, change protection permission with mprotect,
unmap with munmap, destructive semantic such madvise with MADV_DONTNEED.
The address range must be valid VMA, withouth any gap (unallocated memory)
between start and end, and ADDR much be page aligned (LEN will be page
aligned implicitly). */
int mseal (void *__addr, size_t __len, unsigned long flags) __THROW;
__END_DECLS
#endif /* __USE_GNU */

View File

@ -2787,3 +2787,4 @@ GLIBC_2.39 stdc_trailing_zeros_ull F
GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F

View File

@ -2824,6 +2824,7 @@ GLIBC_2.4 unshare F
GLIBC_2.41 cacheflush F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -3007,6 +3007,7 @@ GLIBC_2.4 unlinkat F
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -257,4 +257,12 @@
# define __ASSUME_FCHMODAT2 0
#endif
/* The mseal system call was introduced across all architectures in Linux 6.10
(although only supported on 64-bit CPUs). */
#if __LINUX_KERNEL_VERSION >= 0x060A00
# define __ASSUME_MSEAL 1
#else
# define __ASSUME_MSEAL 0
#endif
#endif /* kernel-features.h */

View File

@ -2271,3 +2271,4 @@ GLIBC_2.39 stdc_trailing_zeros_ull F
GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F

View File

@ -2783,6 +2783,7 @@ GLIBC_2.4 xprt_register F
GLIBC_2.4 xprt_unregister F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -2950,6 +2950,7 @@ GLIBC_2.4 unlinkat F
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -2836,3 +2836,4 @@ GLIBC_2.39 stdc_trailing_zeros_ull F
GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F

View File

@ -2833,3 +2833,4 @@ GLIBC_2.39 stdc_trailing_zeros_ull F
GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F

View File

@ -2911,6 +2911,7 @@ GLIBC_2.4 unlinkat F
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -2917,6 +2917,7 @@ GLIBC_2.4 unlinkat F
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -2819,6 +2819,7 @@ GLIBC_2.4 unlinkat F
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -2261,3 +2261,4 @@ GLIBC_2.40 setcontext F
GLIBC_2.40 swapcontext F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F

View File

@ -3140,6 +3140,7 @@ GLIBC_2.4 wprintf F
GLIBC_2.4 wscanf F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -3185,6 +3185,7 @@ GLIBC_2.4 wprintf F
GLIBC_2.4 wscanf F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -2894,6 +2894,7 @@ GLIBC_2.4 wprintf F
GLIBC_2.4 wscanf F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -2970,3 +2970,4 @@ GLIBC_2.39 stdc_trailing_zeros_ull F
GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F

View File

@ -2514,3 +2514,4 @@ GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.40 __riscv_hwprobe F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F

View File

@ -2714,3 +2714,4 @@ GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.40 __riscv_hwprobe F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F

View File

@ -3138,6 +3138,7 @@ GLIBC_2.4 wprintf F
GLIBC_2.4 wscanf F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -2931,6 +2931,7 @@ GLIBC_2.4 wprintf F
GLIBC_2.4 wscanf F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -2830,6 +2830,7 @@ GLIBC_2.4 unlinkat F
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -2827,6 +2827,7 @@ GLIBC_2.4 unlinkat F
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -3159,6 +3159,7 @@ GLIBC_2.4 wprintf F
GLIBC_2.4 wscanf F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -2795,6 +2795,7 @@ GLIBC_2.4 unlinkat F
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -39,6 +39,7 @@ mlockall - mlockall i:i mlockall
mount EXTRA mount i:sssUp __mount mount
mount_setattr EXTRA mount_setattr i:isUpU mount_setattr
move_mount EXTRA move_mount i:isisU move_mount
mseal EXTRA mseal i:bUU __mseal mseal
munlock - munlock i:aU munlock
munlockall - munlockall i: munlockall
nfsservctl EXTRA nfsservctl i:ipp __compat_nfsservctl nfsservctl@GLIBC_2.0:GLIBC_2.28

View File

@ -0,0 +1,84 @@
/* Basic tests for mseal and pkey.
Copyright (C) 2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <errno.h>
#include <sys/mman.h>
#include <support/check.h>
#include <support/xunistd.h>
static int
do_test (void)
{
TEST_VERIFY_EXIT (mseal (MAP_FAILED, 0, 0) == -1);
if (errno == ENOSYS || errno == EPERM)
FAIL_UNSUPPORTED ("kernel does not support mseal");
TEST_COMPARE (errno, EINVAL);
int key = pkey_alloc (0, 0);
if (key < 0)
{
if (errno == ENOSYS)
FAIL_UNSUPPORTED
("kernel does not support memory protection keys");
if (errno == EINVAL)
FAIL_UNSUPPORTED
("CPU does not support memory protection keys: %m");
if (errno == ENOSPC)
FAIL_UNSUPPORTED
("no keys available or kernel does not support memory"
" protection keys");
FAIL_EXIT1 ("pkey_alloc: %m");
}
long pagesize = xsysconf (_SC_PAGESIZE);
void *page = xmmap (NULL, pagesize, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1);
TEST_COMPARE (pkey_mprotect (page, pagesize, PROT_READ | PROT_WRITE,
key), 0);
TEST_VERIFY_EXIT (mseal (page, pagesize, 0) == 0);
/* For certain destructive madvise behaviours (MADV_DONTNEED,
MADV_FREE, MADV_DONTNEED_LOCKED, and MADV_WIPEONFORK), mseal
only blocks the operation if the PKRU denies write. */
TEST_VERIFY_EXIT (pkey_set (key, 0) == 0);
TEST_COMPARE (madvise (page, pagesize, MADV_DONTNEED), 0);
/* The other mapping operation change are always blocked,
regardless of PKRU state. */
TEST_COMPARE (pkey_mprotect (page, pagesize, PROT_READ, key), -1);
TEST_COMPARE (errno, EPERM);
TEST_COMPARE (mprotect (page, pagesize, PROT_READ), -1);
TEST_COMPARE (errno, EPERM);
TEST_VERIFY_EXIT (pkey_set (key, PKEY_DISABLE_WRITE) == 0);
TEST_COMPARE (madvise (page, pagesize, MADV_DONTNEED), -1);
TEST_COMPARE (errno, EPERM);
TEST_COMPARE (mprotect (page, pagesize, PROT_READ), -1);
TEST_COMPARE (errno, EPERM);
TEST_COMPARE (munmap (page, pagesize),-1);
TEST_COMPARE (errno, EPERM);
return 0;
}
#include <support/test-driver.c>

View File

@ -0,0 +1,67 @@
/* Basic tests for mseal.
Copyright (C) 2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <errno.h>
#include <sys/mman.h>
#include <support/check.h>
#include <support/xunistd.h>
static int
do_test (void)
{
TEST_VERIFY_EXIT (mseal (MAP_FAILED, 0, 0) == -1);
if (errno == ENOSYS || errno == EPERM)
FAIL_UNSUPPORTED ("kernel does not support mseal");
TEST_COMPARE (errno, EINVAL);
size_t pagesize = getpagesize ();
void *p = xmmap (NULL, 4 * pagesize, PROT_READ,
MAP_ANONYMOUS | MAP_PRIVATE, -1);
xmunmap (p + 2 * pagesize, pagesize);
/* Unaligned address. */
TEST_VERIFY_EXIT (mseal (p + 1, pagesize, 0) == -1);
TEST_COMPARE (errno, EINVAL);
/* Length too big. */
TEST_VERIFY_EXIT (mseal (p, 3 * pagesize, 0) == -1);
TEST_COMPARE (errno, ENOMEM);
TEST_VERIFY_EXIT (mseal (p, pagesize, 0) == 0);
/* Apply the same seal should be idempotent. */
TEST_VERIFY_EXIT (mseal (p, pagesize, 0) == 0);
TEST_VERIFY_EXIT (mprotect (p, pagesize, PROT_WRITE) == -1);
TEST_COMPARE (errno, EPERM);
TEST_VERIFY_EXIT (munmap (p, pagesize) == -1);
TEST_COMPARE (errno, EPERM);
TEST_VERIFY_EXIT (mremap (p, pagesize, 2 * pagesize, 0) == MAP_FAILED);
TEST_COMPARE (errno, EPERM);
TEST_VERIFY_EXIT (madvise (p, pagesize, MADV_DONTNEED) == -1);
TEST_COMPARE (errno, EPERM);
xmunmap (p + pagesize, pagesize);
xmunmap (p + 3 * pagesize, pagesize);
return 0;
}
#include <support/test-driver.c>

View File

@ -2746,6 +2746,7 @@ GLIBC_2.4 unlinkat F
GLIBC_2.4 unshare F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F
GLIBC_2.5 __readlinkat_chk F
GLIBC_2.5 inet6_opt_append F
GLIBC_2.5 inet6_opt_find F

View File

@ -2765,3 +2765,4 @@ GLIBC_2.39 stdc_trailing_zeros_ull F
GLIBC_2.39 stdc_trailing_zeros_us F
GLIBC_2.41 sched_getattr F
GLIBC_2.41 sched_setattr F
GLIBC_2.42 mseal F