884 lines
28 KiB
Diff
884 lines
28 KiB
Diff
From: Hugh Dickins <hughd@google.com>
|
|
Date: Mon, 19 Jun 2017 04:03:24 -0700
|
|
Subject: mm: larger stack guard gap, between vmas
|
|
Origin: https://git.kernel.org/linus/1be7107fbe18eed3e319a6c3e83c78254b693acb
|
|
|
|
Stack guard page is a useful feature to reduce a risk of stack smashing
|
|
into a different mapping. We have been using a single page gap which
|
|
is sufficient to prevent having stack adjacent to a different mapping.
|
|
But this seems to be insufficient in the light of the stack usage in
|
|
userspace. E.g. glibc uses as large as 64kB alloca() in many commonly
|
|
used functions. Others use constructs liks gid_t buffer[NGROUPS_MAX]
|
|
which is 256kB or stack strings with MAX_ARG_STRLEN.
|
|
|
|
This will become especially dangerous for suid binaries and the default
|
|
no limit for the stack size limit because those applications can be
|
|
tricked to consume a large portion of the stack and a single glibc call
|
|
could jump over the guard page. These attacks are not theoretical,
|
|
unfortunatelly.
|
|
|
|
Make those attacks less probable by increasing the stack guard gap
|
|
to 1MB (on systems with 4k pages; but make it depend on the page size
|
|
because systems with larger base pages might cap stack allocations in
|
|
the PAGE_SIZE units) which should cover larger alloca() and VLA stack
|
|
allocations. It is obviously not a full fix because the problem is
|
|
somehow inherent, but it should reduce attack space a lot.
|
|
|
|
One could argue that the gap size should be configurable from userspace,
|
|
but that can be done later when somebody finds that the new 1MB is wrong
|
|
for some special case applications. For now, add a kernel command line
|
|
option (stack_guard_gap) to specify the stack gap size (in page units).
|
|
|
|
Implementation wise, first delete all the old code for stack guard page:
|
|
because although we could get away with accounting one extra page in a
|
|
stack vma, accounting a larger gap can break userspace - case in point,
|
|
a program run with "ulimit -S -v 20000" failed when the 1MB gap was
|
|
counted for RLIMIT_AS; similar problems could come with RLIMIT_MLOCK
|
|
and strict non-overcommit mode.
|
|
|
|
Instead of keeping gap inside the stack vma, maintain the stack guard
|
|
gap as a gap between vmas: using vm_start_gap() in place of vm_start
|
|
(or vm_end_gap() in place of vm_end if VM_GROWSUP) in just those few
|
|
places which need to respect the gap - mainly arch_get_unmapped_area(),
|
|
and and the vma tree's subtree_gap support for that.
|
|
|
|
Original-patch-by: Oleg Nesterov <oleg@redhat.com>
|
|
Original-patch-by: Michal Hocko <mhocko@suse.com>
|
|
Signed-off-by: Hugh Dickins <hughd@google.com>
|
|
Acked-by: Michal Hocko <mhocko@suse.com>
|
|
Tested-by: Helge Deller <deller@gmx.de> # parisc
|
|
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
|
[bwh: Backported to 4.11: adjust context]
|
|
---
|
|
Documentation/admin-guide/kernel-parameters.txt | 7 ++
|
|
arch/arc/mm/mmap.c | 2 +-
|
|
arch/arm/mm/mmap.c | 4 +-
|
|
arch/frv/mm/elf-fdpic.c | 2 +-
|
|
arch/mips/mm/mmap.c | 2 +-
|
|
arch/parisc/kernel/sys_parisc.c | 15 ++-
|
|
arch/powerpc/mm/hugetlbpage-radix.c | 2 +-
|
|
arch/powerpc/mm/mmap.c | 4 +-
|
|
arch/powerpc/mm/slice.c | 2 +-
|
|
arch/s390/mm/mmap.c | 4 +-
|
|
arch/sh/mm/mmap.c | 4 +-
|
|
arch/sparc/kernel/sys_sparc_64.c | 4 +-
|
|
arch/sparc/mm/hugetlbpage.c | 2 +-
|
|
arch/tile/mm/hugetlbpage.c | 2 +-
|
|
arch/x86/kernel/sys_x86_64.c | 4 +-
|
|
arch/x86/mm/hugetlbpage.c | 2 +-
|
|
arch/xtensa/kernel/syscall.c | 2 +-
|
|
fs/hugetlbfs/inode.c | 2 +-
|
|
fs/proc/task_mmu.c | 4 -
|
|
include/linux/mm.h | 53 ++++-----
|
|
mm/gup.c | 5 -
|
|
mm/memory.c | 38 ------
|
|
mm/mmap.c | 149 ++++++++++++++----------
|
|
23 files changed, 152 insertions(+), 163 deletions(-)
|
|
|
|
--- a/Documentation/admin-guide/kernel-parameters.txt
|
|
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
|
@@ -3779,6 +3779,13 @@
|
|
spia_pedr=
|
|
spia_peddr=
|
|
|
|
+ stack_guard_gap= [MM]
|
|
+ override the default stack gap protection. The value
|
|
+ is in page units and it defines how many pages prior
|
|
+ to (for stacks growing down) resp. after (for stacks
|
|
+ growing up) the main stack are reserved for no other
|
|
+ mapping. Default value is 256 pages.
|
|
+
|
|
stacktrace [FTRACE]
|
|
Enabled the stack tracer on boot up.
|
|
|
|
--- a/arch/arc/mm/mmap.c
|
|
+++ b/arch/arc/mm/mmap.c
|
|
@@ -65,7 +65,7 @@ arch_get_unmapped_area(struct file *filp
|
|
|
|
vma = find_vma(mm, addr);
|
|
if (TASK_SIZE - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
|
|
--- a/arch/arm/mm/mmap.c
|
|
+++ b/arch/arm/mm/mmap.c
|
|
@@ -90,7 +90,7 @@ arch_get_unmapped_area(struct file *filp
|
|
|
|
vma = find_vma(mm, addr);
|
|
if (TASK_SIZE - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
|
|
@@ -141,7 +141,7 @@ arch_get_unmapped_area_topdown(struct fi
|
|
addr = PAGE_ALIGN(addr);
|
|
vma = find_vma(mm, addr);
|
|
if (TASK_SIZE - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
|
|
--- a/arch/frv/mm/elf-fdpic.c
|
|
+++ b/arch/frv/mm/elf-fdpic.c
|
|
@@ -75,7 +75,7 @@ unsigned long arch_get_unmapped_area(str
|
|
addr = PAGE_ALIGN(addr);
|
|
vma = find_vma(current->mm, addr);
|
|
if (TASK_SIZE - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
goto success;
|
|
}
|
|
|
|
--- a/arch/mips/mm/mmap.c
|
|
+++ b/arch/mips/mm/mmap.c
|
|
@@ -93,7 +93,7 @@ static unsigned long arch_get_unmapped_a
|
|
|
|
vma = find_vma(mm, addr);
|
|
if (TASK_SIZE - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
|
|
--- a/arch/parisc/kernel/sys_parisc.c
|
|
+++ b/arch/parisc/kernel/sys_parisc.c
|
|
@@ -90,7 +90,7 @@ unsigned long arch_get_unmapped_area(str
|
|
unsigned long len, unsigned long pgoff, unsigned long flags)
|
|
{
|
|
struct mm_struct *mm = current->mm;
|
|
- struct vm_area_struct *vma;
|
|
+ struct vm_area_struct *vma, *prev;
|
|
unsigned long task_size = TASK_SIZE;
|
|
int do_color_align, last_mmap;
|
|
struct vm_unmapped_area_info info;
|
|
@@ -117,9 +117,10 @@ unsigned long arch_get_unmapped_area(str
|
|
else
|
|
addr = PAGE_ALIGN(addr);
|
|
|
|
- vma = find_vma(mm, addr);
|
|
+ vma = find_vma_prev(mm, addr, &prev);
|
|
if (task_size - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)) &&
|
|
+ (!prev || addr >= vm_end_gap(prev)))
|
|
goto found_addr;
|
|
}
|
|
|
|
@@ -143,7 +144,7 @@ arch_get_unmapped_area_topdown(struct fi
|
|
const unsigned long len, const unsigned long pgoff,
|
|
const unsigned long flags)
|
|
{
|
|
- struct vm_area_struct *vma;
|
|
+ struct vm_area_struct *vma, *prev;
|
|
struct mm_struct *mm = current->mm;
|
|
unsigned long addr = addr0;
|
|
int do_color_align, last_mmap;
|
|
@@ -177,9 +178,11 @@ arch_get_unmapped_area_topdown(struct fi
|
|
addr = COLOR_ALIGN(addr, last_mmap, pgoff);
|
|
else
|
|
addr = PAGE_ALIGN(addr);
|
|
- vma = find_vma(mm, addr);
|
|
+
|
|
+ vma = find_vma_prev(mm, addr, &prev);
|
|
if (TASK_SIZE - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)) &&
|
|
+ (!prev || addr >= vm_end_gap(prev)))
|
|
goto found_addr;
|
|
}
|
|
|
|
--- a/arch/powerpc/mm/hugetlbpage-radix.c
|
|
+++ b/arch/powerpc/mm/hugetlbpage-radix.c
|
|
@@ -65,7 +65,7 @@ radix__hugetlb_get_unmapped_area(struct
|
|
addr = ALIGN(addr, huge_page_size(h));
|
|
vma = find_vma(mm, addr);
|
|
if (TASK_SIZE - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
/*
|
|
--- a/arch/powerpc/mm/mmap.c
|
|
+++ b/arch/powerpc/mm/mmap.c
|
|
@@ -107,7 +107,7 @@ radix__arch_get_unmapped_area(struct fil
|
|
addr = PAGE_ALIGN(addr);
|
|
vma = find_vma(mm, addr);
|
|
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
|
|
@@ -143,7 +143,7 @@ radix__arch_get_unmapped_area_topdown(st
|
|
addr = PAGE_ALIGN(addr);
|
|
vma = find_vma(mm, addr);
|
|
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
|
|
--- a/arch/powerpc/mm/slice.c
|
|
+++ b/arch/powerpc/mm/slice.c
|
|
@@ -105,7 +105,7 @@ static int slice_area_is_free(struct mm_
|
|
if ((mm->task_size - len) < addr)
|
|
return 0;
|
|
vma = find_vma(mm, addr);
|
|
- return (!vma || (addr + len) <= vma->vm_start);
|
|
+ return (!vma || (addr + len) <= vm_start_gap(vma));
|
|
}
|
|
|
|
static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
|
|
--- a/arch/s390/mm/mmap.c
|
|
+++ b/arch/s390/mm/mmap.c
|
|
@@ -100,7 +100,7 @@ arch_get_unmapped_area(struct file *filp
|
|
addr = PAGE_ALIGN(addr);
|
|
vma = find_vma(mm, addr);
|
|
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
|
|
@@ -138,7 +138,7 @@ arch_get_unmapped_area_topdown(struct fi
|
|
addr = PAGE_ALIGN(addr);
|
|
vma = find_vma(mm, addr);
|
|
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
|
|
--- a/arch/sh/mm/mmap.c
|
|
+++ b/arch/sh/mm/mmap.c
|
|
@@ -64,7 +64,7 @@ unsigned long arch_get_unmapped_area(str
|
|
|
|
vma = find_vma(mm, addr);
|
|
if (TASK_SIZE - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
|
|
@@ -114,7 +114,7 @@ arch_get_unmapped_area_topdown(struct fi
|
|
|
|
vma = find_vma(mm, addr);
|
|
if (TASK_SIZE - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
|
|
--- a/arch/sparc/kernel/sys_sparc_64.c
|
|
+++ b/arch/sparc/kernel/sys_sparc_64.c
|
|
@@ -120,7 +120,7 @@ unsigned long arch_get_unmapped_area(str
|
|
|
|
vma = find_vma(mm, addr);
|
|
if (task_size - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
|
|
@@ -183,7 +183,7 @@ arch_get_unmapped_area_topdown(struct fi
|
|
|
|
vma = find_vma(mm, addr);
|
|
if (task_size - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
|
|
--- a/arch/sparc/mm/hugetlbpage.c
|
|
+++ b/arch/sparc/mm/hugetlbpage.c
|
|
@@ -120,7 +120,7 @@ hugetlb_get_unmapped_area(struct file *f
|
|
addr = ALIGN(addr, huge_page_size(h));
|
|
vma = find_vma(mm, addr);
|
|
if (task_size - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
if (mm->get_unmapped_area == arch_get_unmapped_area)
|
|
--- a/arch/tile/mm/hugetlbpage.c
|
|
+++ b/arch/tile/mm/hugetlbpage.c
|
|
@@ -233,7 +233,7 @@ unsigned long hugetlb_get_unmapped_area(
|
|
addr = ALIGN(addr, huge_page_size(h));
|
|
vma = find_vma(mm, addr);
|
|
if (TASK_SIZE - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
if (current->mm->get_unmapped_area == arch_get_unmapped_area)
|
|
--- a/arch/x86/kernel/sys_x86_64.c
|
|
+++ b/arch/x86/kernel/sys_x86_64.c
|
|
@@ -141,7 +141,7 @@ arch_get_unmapped_area(struct file *filp
|
|
addr = PAGE_ALIGN(addr);
|
|
vma = find_vma(mm, addr);
|
|
if (end - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
|
|
@@ -184,7 +184,7 @@ arch_get_unmapped_area_topdown(struct fi
|
|
addr = PAGE_ALIGN(addr);
|
|
vma = find_vma(mm, addr);
|
|
if (TASK_SIZE - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
|
|
--- a/arch/x86/mm/hugetlbpage.c
|
|
+++ b/arch/x86/mm/hugetlbpage.c
|
|
@@ -145,7 +145,7 @@ hugetlb_get_unmapped_area(struct file *f
|
|
addr = ALIGN(addr, huge_page_size(h));
|
|
vma = find_vma(mm, addr);
|
|
if (TASK_SIZE - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
if (mm->get_unmapped_area == arch_get_unmapped_area)
|
|
--- a/arch/xtensa/kernel/syscall.c
|
|
+++ b/arch/xtensa/kernel/syscall.c
|
|
@@ -88,7 +88,7 @@ unsigned long arch_get_unmapped_area(str
|
|
/* At this point: (!vmm || addr < vmm->vm_end). */
|
|
if (TASK_SIZE - len < addr)
|
|
return -ENOMEM;
|
|
- if (!vmm || addr + len <= vmm->vm_start)
|
|
+ if (!vmm || addr + len <= vm_start_gap(vmm))
|
|
return addr;
|
|
addr = vmm->vm_end;
|
|
if (flags & MAP_SHARED)
|
|
--- a/fs/hugetlbfs/inode.c
|
|
+++ b/fs/hugetlbfs/inode.c
|
|
@@ -200,7 +200,7 @@ hugetlb_get_unmapped_area(struct file *f
|
|
addr = ALIGN(addr, huge_page_size(h));
|
|
vma = find_vma(mm, addr);
|
|
if (TASK_SIZE - len >= addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)))
|
|
return addr;
|
|
}
|
|
|
|
--- a/fs/proc/task_mmu.c
|
|
+++ b/fs/proc/task_mmu.c
|
|
@@ -303,11 +303,7 @@ show_map_vma(struct seq_file *m, struct
|
|
|
|
/* We don't show the stack guard page in /proc/maps */
|
|
start = vma->vm_start;
|
|
- if (stack_guard_page_start(vma, start))
|
|
- start += PAGE_SIZE;
|
|
end = vma->vm_end;
|
|
- if (stack_guard_page_end(vma, end))
|
|
- end -= PAGE_SIZE;
|
|
|
|
seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
|
|
seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
|
|
--- a/include/linux/mm.h
|
|
+++ b/include/linux/mm.h
|
|
@@ -1403,12 +1403,6 @@ int clear_page_dirty_for_io(struct page
|
|
|
|
int get_cmdline(struct task_struct *task, char *buffer, int buflen);
|
|
|
|
-/* Is the vma a continuation of the stack vma above it? */
|
|
-static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
|
|
-{
|
|
- return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
|
|
-}
|
|
-
|
|
static inline bool vma_is_anonymous(struct vm_area_struct *vma)
|
|
{
|
|
return !vma->vm_ops;
|
|
@@ -1424,28 +1418,6 @@ bool vma_is_shmem(struct vm_area_struct
|
|
static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
|
|
#endif
|
|
|
|
-static inline int stack_guard_page_start(struct vm_area_struct *vma,
|
|
- unsigned long addr)
|
|
-{
|
|
- return (vma->vm_flags & VM_GROWSDOWN) &&
|
|
- (vma->vm_start == addr) &&
|
|
- !vma_growsdown(vma->vm_prev, addr);
|
|
-}
|
|
-
|
|
-/* Is the vma a continuation of the stack vma below it? */
|
|
-static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
|
|
-{
|
|
- return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
|
|
-}
|
|
-
|
|
-static inline int stack_guard_page_end(struct vm_area_struct *vma,
|
|
- unsigned long addr)
|
|
-{
|
|
- return (vma->vm_flags & VM_GROWSUP) &&
|
|
- (vma->vm_end == addr) &&
|
|
- !vma_growsup(vma->vm_next, addr);
|
|
-}
|
|
-
|
|
int vma_is_stack_for_current(struct vm_area_struct *vma);
|
|
|
|
extern unsigned long move_page_tables(struct vm_area_struct *vma,
|
|
@@ -2232,6 +2204,7 @@ void page_cache_async_readahead(struct a
|
|
pgoff_t offset,
|
|
unsigned long size);
|
|
|
|
+extern unsigned long stack_guard_gap;
|
|
/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
|
|
extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
|
|
|
|
@@ -2260,6 +2233,30 @@ static inline struct vm_area_struct * fi
|
|
return vma;
|
|
}
|
|
|
|
+static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
|
|
+{
|
|
+ unsigned long vm_start = vma->vm_start;
|
|
+
|
|
+ if (vma->vm_flags & VM_GROWSDOWN) {
|
|
+ vm_start -= stack_guard_gap;
|
|
+ if (vm_start > vma->vm_start)
|
|
+ vm_start = 0;
|
|
+ }
|
|
+ return vm_start;
|
|
+}
|
|
+
|
|
+static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
|
|
+{
|
|
+ unsigned long vm_end = vma->vm_end;
|
|
+
|
|
+ if (vma->vm_flags & VM_GROWSUP) {
|
|
+ vm_end += stack_guard_gap;
|
|
+ if (vm_end < vma->vm_end)
|
|
+ vm_end = -PAGE_SIZE;
|
|
+ }
|
|
+ return vm_end;
|
|
+}
|
|
+
|
|
static inline unsigned long vma_pages(struct vm_area_struct *vma)
|
|
{
|
|
return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
|
|
--- a/mm/gup.c
|
|
+++ b/mm/gup.c
|
|
@@ -387,11 +387,6 @@ static int faultin_page(struct task_stru
|
|
/* mlock all present pages, but do not fault in new pages */
|
|
if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
|
|
return -ENOENT;
|
|
- /* For mm_populate(), just skip the stack guard page. */
|
|
- if ((*flags & FOLL_POPULATE) &&
|
|
- (stack_guard_page_start(vma, address) ||
|
|
- stack_guard_page_end(vma, address + PAGE_SIZE)))
|
|
- return -ENOENT;
|
|
if (*flags & FOLL_WRITE)
|
|
fault_flags |= FAULT_FLAG_WRITE;
|
|
if (*flags & FOLL_REMOTE)
|
|
--- a/mm/memory.c
|
|
+++ b/mm/memory.c
|
|
@@ -2855,40 +2855,6 @@ out_release:
|
|
}
|
|
|
|
/*
|
|
- * This is like a special single-page "expand_{down|up}wards()",
|
|
- * except we must first make sure that 'address{-|+}PAGE_SIZE'
|
|
- * doesn't hit another vma.
|
|
- */
|
|
-static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
|
|
-{
|
|
- address &= PAGE_MASK;
|
|
- if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
|
|
- struct vm_area_struct *prev = vma->vm_prev;
|
|
-
|
|
- /*
|
|
- * Is there a mapping abutting this one below?
|
|
- *
|
|
- * That's only ok if it's the same stack mapping
|
|
- * that has gotten split..
|
|
- */
|
|
- if (prev && prev->vm_end == address)
|
|
- return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
|
|
-
|
|
- return expand_downwards(vma, address - PAGE_SIZE);
|
|
- }
|
|
- if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
|
|
- struct vm_area_struct *next = vma->vm_next;
|
|
-
|
|
- /* As VM_GROWSDOWN but s/below/above/ */
|
|
- if (next && next->vm_start == address + PAGE_SIZE)
|
|
- return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
|
|
-
|
|
- return expand_upwards(vma, address + PAGE_SIZE);
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
* We enter with non-exclusive mmap_sem (to exclude vma changes,
|
|
* but allow concurrent faults), and pte mapped but not yet locked.
|
|
* We return with mmap_sem still held, but pte unmapped and unlocked.
|
|
@@ -2904,10 +2870,6 @@ static int do_anonymous_page(struct vm_f
|
|
if (vma->vm_flags & VM_SHARED)
|
|
return VM_FAULT_SIGBUS;
|
|
|
|
- /* Check if we need to add a guard page to the stack */
|
|
- if (check_stack_guard_page(vma, vmf->address) < 0)
|
|
- return VM_FAULT_SIGSEGV;
|
|
-
|
|
/*
|
|
* Use pte_alloc() instead of pte_alloc_map(). We can't run
|
|
* pte_offset_map() on pmds where a huge pmd might be created
|
|
--- a/mm/mmap.c
|
|
+++ b/mm/mmap.c
|
|
@@ -183,6 +183,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
|
|
unsigned long retval;
|
|
unsigned long newbrk, oldbrk;
|
|
struct mm_struct *mm = current->mm;
|
|
+ struct vm_area_struct *next;
|
|
unsigned long min_brk;
|
|
bool populate;
|
|
LIST_HEAD(uf);
|
|
@@ -229,7 +230,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
|
|
}
|
|
|
|
/* Check against existing mmap mappings. */
|
|
- if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
|
|
+ next = find_vma(mm, oldbrk);
|
|
+ if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
|
|
goto out;
|
|
|
|
/* Ok, looks good - let it rip. */
|
|
@@ -253,10 +255,22 @@ out:
|
|
|
|
static long vma_compute_subtree_gap(struct vm_area_struct *vma)
|
|
{
|
|
- unsigned long max, subtree_gap;
|
|
- max = vma->vm_start;
|
|
- if (vma->vm_prev)
|
|
- max -= vma->vm_prev->vm_end;
|
|
+ unsigned long max, prev_end, subtree_gap;
|
|
+
|
|
+ /*
|
|
+ * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
|
|
+ * allow two stack_guard_gaps between them here, and when choosing
|
|
+ * an unmapped area; whereas when expanding we only require one.
|
|
+ * That's a little inconsistent, but keeps the code here simpler.
|
|
+ */
|
|
+ max = vm_start_gap(vma);
|
|
+ if (vma->vm_prev) {
|
|
+ prev_end = vm_end_gap(vma->vm_prev);
|
|
+ if (max > prev_end)
|
|
+ max -= prev_end;
|
|
+ else
|
|
+ max = 0;
|
|
+ }
|
|
if (vma->vm_rb.rb_left) {
|
|
subtree_gap = rb_entry(vma->vm_rb.rb_left,
|
|
struct vm_area_struct, vm_rb)->rb_subtree_gap;
|
|
@@ -352,7 +366,7 @@ static void validate_mm(struct mm_struct
|
|
anon_vma_unlock_read(anon_vma);
|
|
}
|
|
|
|
- highest_address = vma->vm_end;
|
|
+ highest_address = vm_end_gap(vma);
|
|
vma = vma->vm_next;
|
|
i++;
|
|
}
|
|
@@ -541,7 +555,7 @@ void __vma_link_rb(struct mm_struct *mm,
|
|
if (vma->vm_next)
|
|
vma_gap_update(vma->vm_next);
|
|
else
|
|
- mm->highest_vm_end = vma->vm_end;
|
|
+ mm->highest_vm_end = vm_end_gap(vma);
|
|
|
|
/*
|
|
* vma->vm_prev wasn't known when we followed the rbtree to find the
|
|
@@ -856,7 +870,7 @@ again:
|
|
vma_gap_update(vma);
|
|
if (end_changed) {
|
|
if (!next)
|
|
- mm->highest_vm_end = end;
|
|
+ mm->highest_vm_end = vm_end_gap(vma);
|
|
else if (!adjust_next)
|
|
vma_gap_update(next);
|
|
}
|
|
@@ -941,7 +955,7 @@ again:
|
|
* mm->highest_vm_end doesn't need any update
|
|
* in remove_next == 1 case.
|
|
*/
|
|
- VM_WARN_ON(mm->highest_vm_end != end);
|
|
+ VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
|
|
}
|
|
}
|
|
if (insert && file)
|
|
@@ -1787,7 +1801,7 @@ unsigned long unmapped_area(struct vm_un
|
|
|
|
while (true) {
|
|
/* Visit left subtree if it looks promising */
|
|
- gap_end = vma->vm_start;
|
|
+ gap_end = vm_start_gap(vma);
|
|
if (gap_end >= low_limit && vma->vm_rb.rb_left) {
|
|
struct vm_area_struct *left =
|
|
rb_entry(vma->vm_rb.rb_left,
|
|
@@ -1798,7 +1812,7 @@ unsigned long unmapped_area(struct vm_un
|
|
}
|
|
}
|
|
|
|
- gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
|
|
+ gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
|
|
check_current:
|
|
/* Check if current node has a suitable gap */
|
|
if (gap_start > high_limit)
|
|
@@ -1825,8 +1839,8 @@ check_current:
|
|
vma = rb_entry(rb_parent(prev),
|
|
struct vm_area_struct, vm_rb);
|
|
if (prev == vma->vm_rb.rb_left) {
|
|
- gap_start = vma->vm_prev->vm_end;
|
|
- gap_end = vma->vm_start;
|
|
+ gap_start = vm_end_gap(vma->vm_prev);
|
|
+ gap_end = vm_start_gap(vma);
|
|
goto check_current;
|
|
}
|
|
}
|
|
@@ -1890,7 +1904,7 @@ unsigned long unmapped_area_topdown(stru
|
|
|
|
while (true) {
|
|
/* Visit right subtree if it looks promising */
|
|
- gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
|
|
+ gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
|
|
if (gap_start <= high_limit && vma->vm_rb.rb_right) {
|
|
struct vm_area_struct *right =
|
|
rb_entry(vma->vm_rb.rb_right,
|
|
@@ -1903,7 +1917,7 @@ unsigned long unmapped_area_topdown(stru
|
|
|
|
check_current:
|
|
/* Check if current node has a suitable gap */
|
|
- gap_end = vma->vm_start;
|
|
+ gap_end = vm_start_gap(vma);
|
|
if (gap_end < low_limit)
|
|
return -ENOMEM;
|
|
if (gap_start <= high_limit && gap_end - gap_start >= length)
|
|
@@ -1929,7 +1943,7 @@ check_current:
|
|
struct vm_area_struct, vm_rb);
|
|
if (prev == vma->vm_rb.rb_right) {
|
|
gap_start = vma->vm_prev ?
|
|
- vma->vm_prev->vm_end : 0;
|
|
+ vm_end_gap(vma->vm_prev) : 0;
|
|
goto check_current;
|
|
}
|
|
}
|
|
@@ -1967,7 +1981,7 @@ arch_get_unmapped_area(struct file *filp
|
|
unsigned long len, unsigned long pgoff, unsigned long flags)
|
|
{
|
|
struct mm_struct *mm = current->mm;
|
|
- struct vm_area_struct *vma;
|
|
+ struct vm_area_struct *vma, *prev;
|
|
struct vm_unmapped_area_info info;
|
|
|
|
if (len > TASK_SIZE - mmap_min_addr)
|
|
@@ -1978,9 +1992,10 @@ arch_get_unmapped_area(struct file *filp
|
|
|
|
if (addr) {
|
|
addr = PAGE_ALIGN(addr);
|
|
- vma = find_vma(mm, addr);
|
|
+ vma = find_vma_prev(mm, addr, &prev);
|
|
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)) &&
|
|
+ (!prev || addr >= vm_end_gap(prev)))
|
|
return addr;
|
|
}
|
|
|
|
@@ -2003,7 +2018,7 @@ arch_get_unmapped_area_topdown(struct fi
|
|
const unsigned long len, const unsigned long pgoff,
|
|
const unsigned long flags)
|
|
{
|
|
- struct vm_area_struct *vma;
|
|
+ struct vm_area_struct *vma, *prev;
|
|
struct mm_struct *mm = current->mm;
|
|
unsigned long addr = addr0;
|
|
struct vm_unmapped_area_info info;
|
|
@@ -2018,9 +2033,10 @@ arch_get_unmapped_area_topdown(struct fi
|
|
/* requesting a specific address */
|
|
if (addr) {
|
|
addr = PAGE_ALIGN(addr);
|
|
- vma = find_vma(mm, addr);
|
|
+ vma = find_vma_prev(mm, addr, &prev);
|
|
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
|
|
- (!vma || addr + len <= vma->vm_start))
|
|
+ (!vma || addr + len <= vm_start_gap(vma)) &&
|
|
+ (!prev || addr >= vm_end_gap(prev)))
|
|
return addr;
|
|
}
|
|
|
|
@@ -2155,21 +2171,19 @@ find_vma_prev(struct mm_struct *mm, unsi
|
|
* update accounting. This is shared with both the
|
|
* grow-up and grow-down cases.
|
|
*/
|
|
-static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
|
|
+static int acct_stack_growth(struct vm_area_struct *vma,
|
|
+ unsigned long size, unsigned long grow)
|
|
{
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
struct rlimit *rlim = current->signal->rlim;
|
|
- unsigned long new_start, actual_size;
|
|
+ unsigned long new_start;
|
|
|
|
/* address space limit tests */
|
|
if (!may_expand_vm(mm, vma->vm_flags, grow))
|
|
return -ENOMEM;
|
|
|
|
/* Stack limit test */
|
|
- actual_size = size;
|
|
- if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
|
|
- actual_size -= PAGE_SIZE;
|
|
- if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
|
|
+ if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
|
|
return -ENOMEM;
|
|
|
|
/* mlock limit tests */
|
|
@@ -2207,17 +2221,30 @@ static int acct_stack_growth(struct vm_a
|
|
int expand_upwards(struct vm_area_struct *vma, unsigned long address)
|
|
{
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
+ struct vm_area_struct *next;
|
|
+ unsigned long gap_addr;
|
|
int error = 0;
|
|
|
|
if (!(vma->vm_flags & VM_GROWSUP))
|
|
return -EFAULT;
|
|
|
|
/* Guard against wrapping around to address 0. */
|
|
- if (address < PAGE_ALIGN(address+4))
|
|
- address = PAGE_ALIGN(address+4);
|
|
- else
|
|
+ address &= PAGE_MASK;
|
|
+ address += PAGE_SIZE;
|
|
+ if (!address)
|
|
return -ENOMEM;
|
|
|
|
+ /* Enforce stack_guard_gap */
|
|
+ gap_addr = address + stack_guard_gap;
|
|
+ if (gap_addr < address)
|
|
+ return -ENOMEM;
|
|
+ next = vma->vm_next;
|
|
+ if (next && next->vm_start < gap_addr) {
|
|
+ if (!(next->vm_flags & VM_GROWSUP))
|
|
+ return -ENOMEM;
|
|
+ /* Check that both stack segments have the same anon_vma? */
|
|
+ }
|
|
+
|
|
/* We must make sure the anon_vma is allocated. */
|
|
if (unlikely(anon_vma_prepare(vma)))
|
|
return -ENOMEM;
|
|
@@ -2261,7 +2288,7 @@ int expand_upwards(struct vm_area_struct
|
|
if (vma->vm_next)
|
|
vma_gap_update(vma->vm_next);
|
|
else
|
|
- mm->highest_vm_end = address;
|
|
+ mm->highest_vm_end = vm_end_gap(vma);
|
|
spin_unlock(&mm->page_table_lock);
|
|
|
|
perf_event_mmap(vma);
|
|
@@ -2282,6 +2309,8 @@ int expand_downwards(struct vm_area_stru
|
|
unsigned long address)
|
|
{
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
+ struct vm_area_struct *prev;
|
|
+ unsigned long gap_addr;
|
|
int error;
|
|
|
|
address &= PAGE_MASK;
|
|
@@ -2289,6 +2318,17 @@ int expand_downwards(struct vm_area_stru
|
|
if (error)
|
|
return error;
|
|
|
|
+ /* Enforce stack_guard_gap */
|
|
+ gap_addr = address - stack_guard_gap;
|
|
+ if (gap_addr > address)
|
|
+ return -ENOMEM;
|
|
+ prev = vma->vm_prev;
|
|
+ if (prev && prev->vm_end > gap_addr) {
|
|
+ if (!(prev->vm_flags & VM_GROWSDOWN))
|
|
+ return -ENOMEM;
|
|
+ /* Check that both stack segments have the same anon_vma? */
|
|
+ }
|
|
+
|
|
/* We must make sure the anon_vma is allocated. */
|
|
if (unlikely(anon_vma_prepare(vma)))
|
|
return -ENOMEM;
|
|
@@ -2343,28 +2383,25 @@ int expand_downwards(struct vm_area_stru
|
|
return error;
|
|
}
|
|
|
|
-/*
|
|
- * Note how expand_stack() refuses to expand the stack all the way to
|
|
- * abut the next virtual mapping, *unless* that mapping itself is also
|
|
- * a stack mapping. We want to leave room for a guard page, after all
|
|
- * (the guard page itself is not added here, that is done by the
|
|
- * actual page faulting logic)
|
|
- *
|
|
- * This matches the behavior of the guard page logic (see mm/memory.c:
|
|
- * check_stack_guard_page()), which only allows the guard page to be
|
|
- * removed under these circumstances.
|
|
- */
|
|
+/* enforced gap between the expanding stack and other mappings. */
|
|
+unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
|
|
+
|
|
+static int __init cmdline_parse_stack_guard_gap(char *p)
|
|
+{
|
|
+ unsigned long val;
|
|
+ char *endptr;
|
|
+
|
|
+ val = simple_strtoul(p, &endptr, 10);
|
|
+ if (!*endptr)
|
|
+ stack_guard_gap = val << PAGE_SHIFT;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
|
|
+
|
|
#ifdef CONFIG_STACK_GROWSUP
|
|
int expand_stack(struct vm_area_struct *vma, unsigned long address)
|
|
{
|
|
- struct vm_area_struct *next;
|
|
-
|
|
- address &= PAGE_MASK;
|
|
- next = vma->vm_next;
|
|
- if (next && next->vm_start == address + PAGE_SIZE) {
|
|
- if (!(next->vm_flags & VM_GROWSUP))
|
|
- return -ENOMEM;
|
|
- }
|
|
return expand_upwards(vma, address);
|
|
}
|
|
|
|
@@ -2386,14 +2423,6 @@ find_extend_vma(struct mm_struct *mm, un
|
|
#else
|
|
int expand_stack(struct vm_area_struct *vma, unsigned long address)
|
|
{
|
|
- struct vm_area_struct *prev;
|
|
-
|
|
- address &= PAGE_MASK;
|
|
- prev = vma->vm_prev;
|
|
- if (prev && prev->vm_end == address) {
|
|
- if (!(prev->vm_flags & VM_GROWSDOWN))
|
|
- return -ENOMEM;
|
|
- }
|
|
return expand_downwards(vma, address);
|
|
}
|
|
|
|
@@ -2491,7 +2520,7 @@ detach_vmas_to_be_unmapped(struct mm_str
|
|
vma->vm_prev = prev;
|
|
vma_gap_update(vma);
|
|
} else
|
|
- mm->highest_vm_end = prev ? prev->vm_end : 0;
|
|
+ mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
|
|
tail_vma->vm_next = NULL;
|
|
|
|
/* Kill the cache */
|