专题:Linux内存管理专题

关键词:VMA、vm_area_struct、查找/插入/合并VMA、红黑树。

用户进程可以拥有3GB大小的空间,远大于物理内存,那么这些用户进程的虚拟地址空间是如何管理的呢?

malloc()或mmap()操作都会要求在虚拟地址空间中分配内存块,但这些内存在物理上往往都是离散的。

这些进程地址空间在内核中使用struct vm_area_struct数据结构来描述,简称VMA,也被称为进程地址空间或进程线性区。

1. 数据结构

struct vm_area_struct可以说是VMA的描述符,在创建之后会插入到mm->mm_rb红黑树和mm->mmap链表中。

/*
* This struct defines a memory VMM memory area. There is one of these
* per VM-area/task. A VM area is any part of the process virtual memory
* space that has a special rule for the page-fault handlers (ie a shared
* library, the executable area etc).
*/
struct vm_area_struct {
/* The first cache line has the info for VMA tree walking. */ unsigned long vm_start; /* Our start address within vm_mm. */--------VMA在进程地址空间的起始结束地址
unsigned long vm_end; /* The first byte after our end address
within vm_mm. */ /* linked list of VM areas per task, sorted by address */
struct vm_area_struct *vm_next, *vm_prev;----------------------------------VMA链表的前后成员 struct rb_node vm_rb;------------------------------------------------------VMA作为一个节点加入到红黑树中,每个进程的mm_struct中都有一个红黑树mm->mm_rb。 /*
* Largest free memory gap in bytes to the left of this VMA.
* Either between this VMA and vma->vm_prev, or between one of the
* VMAs below us in the VMA rbtree and its ->vm_prev. This helps
* get_unmapped_area find a free area of the right size.
*/
unsigned long rb_subtree_gap; /* Second cache line starts here. */ struct mm_struct *vm_mm; /* The address space we belong to. */--------指向VMA所属进程的struct mm_struct结构。
pgprot_t vm_page_prot; /* Access permissions of this VMA. */------VMA访问权限
unsigned long vm_flags; /* Flags, see mm.h. */--------------------VMA标志位 /*
* For areas with an address space and backing store,
* linkage into the address_space->i_mmap interval tree.
*/
struct {
struct rb_node rb;
unsigned long rb_subtree_last;
} shared; /*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
* list, after a COW of one of the file pages. A MAP_SHARED vma
* can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
* or brk vma (with NULL file) can only be in an anon_vma list.
*/
struct list_head anon_vma_chain; /* Serialized by mmap_sem &-----------用于管理RMAP反向映射。
* page_table_lock */
struct anon_vma *anon_vma; /* Serialized by page_table_lock */------用于管理RMAP反向映射。 /* Function pointers to deal with this struct. */
const struct vm_operations_struct *vm_ops;-----------------------------VMA操作函数合集,常用于文件映射。 /* Information about our backing store: */
unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE-指定文件映射的偏移量,单位是页面。
units, *not* PAGE_CACHE_SIZE */
struct file * vm_file; /* File we map to (can be NULL). */------描述一个被映射的文件。
void * vm_private_data; /* was vm_pte (shared mem) */ #ifndef CONFIG_MMU
struct vm_region *vm_region; /* NOMMU mapping region */
#endif
#ifdef CONFIG_NUMA
struct mempolicy *vm_policy; /* NUMA policy for the VMA */
#endif
}

struct  mm_struct是描述进程内存管理的核心数据结构,VMA属于进程内存区域。在mm_struct中通过mmap链表和mm_rb对vm_area_struct进行管理。

struct mm_struct {
struct vm_area_struct *mmap; /* list of VMAs */-----单链表,按起始地址递增的方式插入,所有的VMA都连接到此链表中。链表头是mm_struct->mmap。
struct rb_root mm_rb;--------------------------------------所有的VMA按照地址插入mm_struct->mm_rb红黑树中,mm_struct->mm_rb是根节点,每个进程都有一个红黑树。
...
}

2. 查找VMA

/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
{
struct rb_node *rb_node;
struct vm_area_struct *vma; /* Check the cache first. */
vma =vmacache_find(mm, addr);
if (likely(vma))
return vma; rb_node = mm->mm_rb.rb_node;
vma = NULL; while (rb_node) {
struct vm_area_struct *tmp; tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb); if (tmp->vm_end > addr) {
vma = tmp;
if (tmp->vm_start <= addr)
break;
rb_node = rb_node->rb_left;
} else
rb_node = rb_node->rb_right;
} if (vma)
vmacache_update(addr, vma);
return vma;
} struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
{
int i; if (!vmacache_valid(mm))
return NULL; count_vm_vmacache_event(VMACACHE_FIND_CALLS); for (i = ; i < VMACACHE_SIZE; i++) {
struct vm_area_struct *vma = current->vmacache[i]; if (!vma)
continue;
if (WARN_ON_ONCE(vma->vm_mm != mm))
break;
if (vma->vm_start <= addr && vma->vm_end > addr) {
count_vm_vmacache_event(VMACACHE_FIND_HITS);
return vma;
}
} return NULL;
} void vmacache_update(unsigned long addr, struct vm_area_struct *newvma)
{
if (vmacache_valid_mm(newvma->vm_mm))
current->vmacache[VMACACHE_HASH(addr)] = newvma;
}

3. 插入VMA

int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
{
struct vm_area_struct *prev;
struct rb_node **rb_link, *rb_parent; /*
* The vm_pgoff of a purely anonymous vma should be irrelevant
* until its first write fault, when page's anon_vma and index
* are set. But now set the vm_pgoff it will almost certainly
* end up with (unless mremap moves it elsewhere before that
* first wfault), so /proc/pid/maps tells a consistent story.
*
* By setting it to reflect the virtual start address of the
* vma, merges and splits can happen in a seamless way, just
* using the existing file pgoff checks and manipulations.
* Similarly in do_mmap_pgoff and in do_brk.
*/
if (!vma->vm_file) {
BUG_ON(vma->anon_vma);
vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
}
if (find_vma_links(mm, vma->vm_start, vma->vm_end,
&prev, &rb_link, &rb_parent))
return -ENOMEM;
if ((vma->vm_flags & VM_ACCOUNT) &&
security_vm_enough_memory_mm(mm, vma_pages(vma)))
return -ENOMEM; vma_link(mm, vma, prev, rb_link, rb_parent);
return ;
}
static int find_vma_links(struct mm_struct *mm, unsigned long addr,
unsigned long end, struct vm_area_struct **pprev,
struct rb_node ***rb_link, struct rb_node **rb_parent)
{
struct rb_node **__rb_link, *__rb_parent, *rb_prev; __rb_link = &mm->mm_rb.rb_node;
rb_prev = __rb_parent = NULL; while (*__rb_link) {
struct vm_area_struct *vma_tmp; __rb_parent = *__rb_link;
vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb); if (vma_tmp->vm_end > addr) {
/* Fail if an existing vma overlaps the area */
if (vma_tmp->vm_start < end)
return -ENOMEM;
__rb_link = &__rb_parent->rb_left;
} else {
rb_prev = __rb_parent;
__rb_link = &__rb_parent->rb_right;
}
} *pprev = NULL;
if (rb_prev)
*pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
*rb_link = __rb_link;
*rb_parent = __rb_parent;
return ;
}
static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
struct vm_area_struct *prev, struct rb_node **rb_link,
struct rb_node *rb_parent)
{
struct address_space *mapping = NULL; if (vma->vm_file) {
mapping = vma->vm_file->f_mapping;
i_mmap_lock_write(mapping);
} __vma_link(mm, vma, prev, rb_link, rb_parent);
__vma_link_file(vma); if (mapping)
i_mmap_unlock_write(mapping); mm->map_count++;
validate_mm(mm);
}

4. 合并VMA

/*
* Given a mapping request (addr,end,vm_flags,file,pgoff), figure out
* whether that can be merged with its predecessor or its successor.
* Or both (it neatly fills a hole).
*
* In most cases - when called for mmap, brk or mremap - [addr,end) is
* certain not to be mapped by the time vma_merge is called; but when
* called for mprotect, it is certain to be already mapped (either at
* an offset within prev, or at the start of next), and the flags of
* this area are about to be changed to vm_flags - and the no-change
* case has already been eliminated.
*
* The following mprotect cases have to be considered, where AAAA is
* the area passed down from mprotect_fixup, never extending beyond one
* vma, PPPPPP is the prev vma specified, and NNNNNN the next vma after:
*
* AAAA AAAA AAAA AAAA
* PPPPPPNNNNNN PPPPPPNNNNNN PPPPPPNNNNNN PPPPNNNNXXXX
* cannot merge might become might become might become
* PPNNNNNNNNNN PPPPPPPPPPNN PPPPPPPPPPPP 6 or
* mmap, brk or case 4 below case 5 below PPPPPPPPXXXX 7 or
* mremap move: PPPPNNNNNNNN 8
* AAAA
* PPPP NNNN PPPPPPPPPPPP PPPPPPPPNNNN PPPPNNNNNNNN
* might become case 1 below case 2 below case 3 below
*
* Odd one out? Case 8, because it extends NNNN but needs flags of XXXX:
* mprotect_fixup updates vm_flags & vm_page_prot on successful return.
*/
struct vm_area_struct *vma_merge(struct mm_struct *mm,
struct vm_area_struct *prev, unsigned long addr,
unsigned long end, unsigned long vm_flags,
struct anon_vma *anon_vma, struct file *file,
pgoff_t pgoff, struct mempolicy *policy)
{
pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
struct vm_area_struct *area, *next;
int err; /*
* We later require that vma->vm_flags == vm_flags,
* so this tests vma->vm_flags & VM_SPECIAL, too.
*/
if (vm_flags & VM_SPECIAL)
return NULL; if (prev)
next = prev->vm_next;
else
next = mm->mmap;
area = next;
if (next && next->vm_end == end) /* cases 6, 7, 8 */
next = next->vm_next; /*
* Can it merge with the predecessor?
*/
if (prev && prev->vm_end == addr &&
mpol_equal(vma_policy(prev), policy) &&
can_vma_merge_after(prev, vm_flags,
anon_vma, file, pgoff)) {
/*
* OK, it can. Can we now merge in the successor as well?
*/
if (next && end == next->vm_start &&
mpol_equal(policy, vma_policy(next)) &&
can_vma_merge_before(next, vm_flags,
anon_vma, file, pgoff+pglen) &&
is_mergeable_anon_vma(prev->anon_vma,
next->anon_vma, NULL)) {
/* cases 1, 6 */
err = vma_adjust(prev, prev->vm_start,
next->vm_end, prev->vm_pgoff, NULL);
} else /* cases 2, 5, 7 */
err = vma_adjust(prev, prev->vm_start,
end, prev->vm_pgoff, NULL);
if (err)
return NULL;
khugepaged_enter_vma_merge(prev, vm_flags);
return prev;
} /*
* Can this new request be merged in front of next?
*/
if (next && end == next->vm_start &&
mpol_equal(policy, vma_policy(next)) &&
can_vma_merge_before(next, vm_flags,
anon_vma, file, pgoff+pglen)) {
if (prev && addr < prev->vm_end) /* case 4 */
err = vma_adjust(prev, prev->vm_start,
addr, prev->vm_pgoff, NULL);
else /* cases 3, 8 */
err = vma_adjust(area, addr, next->vm_end,
next->vm_pgoff - pglen, NULL);
if (err)
return NULL;
khugepaged_enter_vma_merge(area, vm_flags);
return area;
} return NULL;
}

5. 红黑树例子

Linux内存管理 (7)VMA操作的更多相关文章

  1. 【原创】(十三)Linux内存管理之vma/malloc/mmap

    背景 Read the fucking source code! --By 鲁迅 A picture is worth a thousand words. --By 高尔基 说明: Kernel版本: ...

  2. linux内存管理

    一.Linux 进程在内存中的数据结构 一个可执行程序在存储(没有调入内存)时分为代码段,数据段,未初始化数据段三部分:    1) 代码段:存放CPU执行的机器指令.通常代码区是共享的,即其它执行程 ...

  3. Linux内存管理原理

    本文以32位机器为准,串讲一些内存管理的知识点. 1. 虚拟地址.物理地址.逻辑地址.线性地址 虚拟地址又叫线性地址.linux没有采用分段机制,所以逻辑地址和虚拟地址(线性地址)(在用户态,内核态逻 ...

  4. Linux内存管理原理【转】

    转自:http://www.cnblogs.com/zhaoyl/p/3695517.html 本文以32位机器为准,串讲一些内存管理的知识点. 1. 虚拟地址.物理地址.逻辑地址.线性地址 虚拟地址 ...

  5. Windows内存管理和linux内存管理

    windows内存管理 windows 内存管理方式主要分为:页式管理,段式管理,段页式管理. 页式管理的基本原理是将各进程的虚拟空间划分为若干个长度相等的页:页式管理把内存空间按照页的大小划分成片或 ...

  6. Linux内存管理专题

    Linux的内存管理涉及到的内容非常庞杂,而且与内核的方方面面耦合在一起,想要理解透彻非常困难. 在开始学习之前进行了一些准备工作<如何展开Linux Memory Management学习?& ...

  7. Linux内存管理 (25)内存sysfs节点解读

    1. General 1.1 /proc/meminfo /proc/meminfo是了解Linux系统内存使用状况主要接口,也是free等命令的数据来源. 下面是cat /proc/meminfo的 ...

  8. Linux内存管理 (8)malloc

    专题:Linux内存管理专题 关键词:malloc.brk.VMA.VM_LOCK.normal page.special page. 每章问答: malloc()函数是C函数库封装的一个核心函数,对 ...

  9. Linux内存管理 (9)mmap

    专题:Linux内存管理专题 关键词:文件映射.匿名映射.私有映射.共享映射 mmap/munmap是常用的一个系统调用,使用场景是:分配内存.读写大文件.连接动态库文件.多进程间共享内存. 更详细解 ...

随机推荐

  1. leetcode — distinct-subsequences

    import java.util.Arrays; /** * * Source : https://oj.leetcode.com/problems/distinct-subsequences/ * ...

  2. Linux基础知识第三讲,拷贝文件跟移动文件命令

    目录 Linux基础知识第三讲,拷贝文件跟移动文件命令 一丶常用命令 1.tree命令常用选项 2.cp复制文件命令 3.mv 命令的使用 Linux基础知识第三讲,拷贝文件跟移动文件命令 一丶常用命 ...

  3. Docker系列07—Dockerfile 详解

    本文收录在容器技术学习系列文章总目录 1.认识Dockerfile 1.1 镜像的生成途径 基于容器制作  dockerfile,docker build 基于容器制作镜像,已经在上篇Docker系列 ...

  4. CSS float的相关图文详解(一)

    大家好,作为一个刚入门的小前端,第一次写博客,很是鸡冻.由于涉猎较浅,有些知识可能说的不清楚,或者有什么错误,欢迎留言指正.我的第一篇博客写的关于css的浮动的.想必很多小伙伴特别是刚学的,对浮动有一 ...

  5. mysql 随机数 rand使用

    生成随机数 生成0-3的随机数 SELECT RAND() * 最大不会超过3, SELECT FLOOR(RAND() * ) 上面生成整数的值是0,1,2,3生成的随机整数是1,2,3的话,语句如 ...

  6. 【转】Android必备知识点- Android文件(File)操作

    Android 使用与其他平台上基于磁盘的文件系统类似的文件系统. 本文讲述如何使用 Android 文件系统通过 File API 读取和写入文件. File 对象适合按照从开始到结束的顺序不跳过地 ...

  7. Java几种常见的设计模式

    --------------------- 本文来自 旭日Follow_24 的CSDN 博客 ,全文地址请点击:https://blog.csdn.net/xuri24/article/detail ...

  8. 使用String. localeCompare比较字符串

    javascript提供stringA.localeCompare(stringB)方法,来判断一个字符串stringB是否排在stringA的前面. 返回值:    如果引用字符存在于比较字符之前则 ...

  9. 微信小程序异步请求问题

    微信小程序为了提高用户体验,提供的api大部分都是异步操作,除了数据缓存操作里面有一些同步操作.是提高了用户体验,但是在开发的时候, 就有点坑了,例如我要写一个公共方法,发起网络请求,去后台去一些数据 ...

  10. 混用Int与IntPtr导致GetProcAddress始终返回null

      注意NET某些类型在不同平台上的长度 NET中用句柄用得最多的是在DLLIMPORT中,混用int与intptr可能会导致某些API声明在X64平台中表现不正常,如 [DllImport(&quo ...