内存Zone中的pageset成员分析
1: struct per_cpu_pageset __percpu *pageset;
首先,分析一个函数,__free_pages,这个函数是Buddy System提供的API接口函数,用于翻译曾经分配的一组页(多少个页视order大小而定)
1: void __free_pages(struct page *page, unsigned int order)
2: {
3: if (put_page_testzero(page)) {
4: if (order == 0)
5: free_hot_cold_page(page, 0);
6: else
7: __free_pages_ok(page, order);
8: }
9: }
首先,调用put_page_testzero来查看该页是否还有其他引用(struct page结构中的_count)。
即先减去当前的这次引用(减1),然后查看是否引用值已经为0。
1: /*
2: * Drop a ref, return true if the refcount fell to zero (the page has no users)
3: */
4: static inline int put_page_testzero(struct page *page)
5: {
6: VM_BUG_ON(atomic_read(&page->_count) == 0);
7: return atomic_dec_and_test(&page->_count);
8: }
其中,atomic_xxx是内核提供的原子操作实现,有兴趣的话可以进一步深入研究。
然后,如果order为1,代表只有一个内存页需要释放,就调用free_hot_cold_page函数。
1: /*
2: * Free a 0-order page
3: * cold == 1 ? free a cold page : free a hot page
4: */
5: void free_hot_cold_page(struct page *page, int cold)
6: {
7: struct zone *zone = page_zone(page);
8: struct per_cpu_pages *pcp;
9: unsigned long flags;
10: int migratetype;
11: int wasMlocked = __TestClearPageMlocked(page);
12:
13: if (!free_pages_prepare(page, 0))
14: return;
15:
16: migratetype = get_pageblock_migratetype(page);
17: set_page_private(page, migratetype);
18: local_irq_save(flags);
19: if (unlikely(wasMlocked))
20: free_page_mlock(page);
21: __count_vm_event(PGFREE);
22:
23: /*
24: * We only track unmovable, reclaimable and movable on pcp lists.
25: * Free ISOLATE pages back to the allocator because they are being
26: * offlined but treat RESERVE as movable pages so we can get those
27: * areas back if necessary. Otherwise, we may have to free
28: * excessively into the page allocator
29: */
30: if (migratetype >= MIGRATE_PCPTYPES) {
31: if (unlikely(migratetype == MIGRATE_ISOLATE)) {
32: free_one_page(zone, page, 0, migratetype);
33: goto out;
34: }
35: migratetype = MIGRATE_MOVABLE;
36: }
37:
38: pcp = &this_cpu_ptr(zone->pageset)->pcp;
39: if (cold)
40: list_add_tail(&page->lru, &pcp->lists[migratetype]);
41: else
42: list_add(&page->lru, &pcp->lists[migratetype]);
43: pcp->count++;
44: if (pcp->count >= pcp->high) {
45: free_pcppages_bulk(zone, pcp->batch, pcp);
46: pcp->count -= pcp->batch;
47: }
48:
49: out:
50: local_irq_restore(flags);
51: }
page_zone是根据page找到其所在的zone的函数,具体实现是在page->flags里面有相应的比特位,保存它是从哪个zone上分配的。
那么page->flags是从什么时候开始携带这些信息的呢?
首先,所有的page结构体都保存在pglist_data的成员node_mem_map指向的一片内存里。
1: /*
2: * Initially all pages are reserved - free ones are freed
3: * up by free_all_bootmem() once the early boot process is
4: * done. Non-atomic initialization, single-pass.
5: */
6: void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
7: unsigned long start_pfn, enum memmap_context context)
8: {
9: struct page *page;
10: unsigned long end_pfn = start_pfn + size;
11: unsigned long pfn;
12: struct zone *z;
13:
14: if (highest_memmap_pfn < end_pfn - 1)
15: highest_memmap_pfn = end_pfn - 1;
16:
17: z = &NODE_DATA(nid)->node_zones[zone];
18: for (pfn = start_pfn; pfn < end_pfn; pfn++) {
19: /*
20: * There can be holes in boot-time mem_map[]s
21: * handed to this function. They do not
22: * exist on hotplugged memory.
23: */
24: if (context == MEMMAP_EARLY) {
25: if (!early_pfn_valid(pfn))
26: continue;
27: if (!early_pfn_in_nid(pfn, nid))
28: continue;
29: }
30: page = pfn_to_page(pfn);
31: set_page_links(page, zone, nid, pfn);
32: mminit_verify_page_links(page, zone, nid, pfn);
33: init_page_count(page);
34: reset_page_mapcount(page);
35: SetPageReserved(page);
36: /*
37: * Mark the block movable so that blocks are reserved for
38: * movable at startup. This will force kernel allocations
39: * to reserve their blocks rather than leaking throughout
40: * the address space during boot when many long-lived
41: * kernel allocations are made. Later some blocks near
42: * the start are marked MIGRATE_RESERVE by
43: * setup_zone_migrate_reserve()
44: *
45: * bitmap is created for zone's valid pfn range. but memmap
46: * can be created for invalid pages (for alignment)
47: * check here not to call set_pageblock_migratetype() against
48: * pfn out of zone.
49: */
50: if ((z->zone_start_pfn <= pfn)
51: && (pfn < z->zone_start_pfn + z->spanned_pages)
52: && !(pfn & (pageblock_nr_pages - 1)))
53: set_pageblock_migratetype(page, MIGRATE_MOVABLE);
54:
55: INIT_LIST_HEAD(&page->lru);
56: #ifdef WANT_PAGE_VIRTUAL
57: /* The shift won't overflow because ZONE_NORMAL is below 4G. */
58: if (!is_highmem_idx(zone))
59: set_page_address(page, __va(pfn << PAGE_SHIFT));
60: #endif
61: }
62: }
在Buddy System初始化的过程中,会调用memmap_init_zone函数,在该函数中,会将属于该Zone的所有page结构体都遍历处理一遍,都调用一次set_page_links来建立page与zone之间的对应关系。
1: static inline void set_page_zone(struct page *page, enum zone_type zone)
2: {
3: page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
4: page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
5: }
6:
7: static inline void set_page_node(struct page *page, unsigned long node)
8: {
9: page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
10: page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
11: }
12:
13: static inline void set_page_links(struct page *page, enum zone_type zone,
14: unsigned long node, unsigned long pfn)
15: {
16: set_page_zone(page, zone);
17: set_page_node(page, node);
18: #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
19: set_page_section(page, pfn_to_section_nr(pfn));
20: #endif
21: }
内存启动过程的初始化
1: void __init setup_arch(char **cmdline_p)
2: {
3: ......
4: /* max_pfn_mapped is updated here */
5: max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
6: max_pfn_mapped = max_low_pfn_mapped;
7: ......
8: paging_init();
9: ......
10: }
调用init_memory_mapping
1: /*
2: * Setup the direct mapping of the physical memory at PAGE_OFFSET.
3: * This runs before bootmem is initialized and gets pages directly from
4: * the physical memory. To access them they are temporarily mapped.
5: */
6: unsigned long __init_refok init_memory_mapping(unsigned long start,
7: unsigned long end)
8: {
9: ......
10:
11: for (i = 0; i < nr_range; i++)
12: ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
13: mr[i].page_size_mask);
14:
15: ......
16: }
调用kernel_physical_mapping_init
1: /*
2: * This maps the physical memory to kernel virtual address space, a total
3: * of max_low_pfn pages, by creating page tables starting from address
4: * PAGE_OFFSET:
5: */
6: unsigned long __init
7: kernel_physical_mapping_init(unsigned long start,
8: unsigned long end,
9: unsigned long page_size_mask)
10: {
11: int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
12: unsigned long last_map_addr = end;
13: unsigned long start_pfn, end_pfn;
14: pgd_t *pgd_base = swapper_pg_dir;
15: int pgd_idx, pmd_idx, pte_ofs;
16: unsigned long pfn;
17: pgd_t *pgd;
18: pmd_t *pmd;
19: pte_t *pte;
20: unsigned pages_2m, pages_4k;
21: int mapping_iter;
22:
23: start_pfn = start >> PAGE_SHIFT;
24: end_pfn = end >> PAGE_SHIFT;
25:
26: /*
27: * First iteration will setup identity mapping using large/small pages
28: * based on use_pse, with other attributes same as set by
29: * the early code in head_32.S
30: *
31: * Second iteration will setup the appropriate attributes (NX, GLOBAL..)
32: * as desired for the kernel identity mapping.
33: *
34: * This two pass mechanism conforms to the TLB app note which says:
35: *
36: * "Software should not write to a paging-structure entry in a way
37: * that would change, for any linear address, both the page size
38: * and either the page frame or attributes."
39: */
40: mapping_iter = 1;
41:
42: if (!cpu_has_pse)
43: use_pse = 0;
44:
45: repeat:
46: pages_2m = pages_4k = 0;
47: pfn = start_pfn;
48: pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
49: pgd = pgd_base + pgd_idx;
50: for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
51: pmd = one_md_table_init(pgd);
52:
53: if (pfn >= end_pfn)
54: continue;
55: #ifdef CONFIG_X86_PAE
56: pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
57: pmd += pmd_idx;
58: #else
59: pmd_idx = 0;
60: #endif
61: for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn;
62: pmd++, pmd_idx++) {
63: unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
64:
65: /*
66: * Map with big pages if possible, otherwise
67: * create normal page tables:
68: */
69: if (use_pse) {
70: unsigned int addr2;
71: pgprot_t prot = PAGE_KERNEL_LARGE;
72: /*
73: * first pass will use the same initial
74: * identity mapping attribute + _PAGE_PSE.
75: */
76: pgprot_t init_prot =
77: __pgprot(PTE_IDENT_ATTR |
78: _PAGE_PSE);
79:
80: addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
81: PAGE_OFFSET + PAGE_SIZE-1;
82:
83: if (is_kernel_text(addr) ||
84: is_kernel_text(addr2))
85: prot = PAGE_KERNEL_LARGE_EXEC;
86:
87: pages_2m++;
88: if (mapping_iter == 1)
89: set_pmd(pmd, pfn_pmd(pfn, init_prot));
90: else
91: set_pmd(pmd, pfn_pmd(pfn, prot));
92:
93: pfn += PTRS_PER_PTE;
94: continue;
95: }
96: pte = one_page_table_init(pmd);
97:
98: pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
99: pte += pte_ofs;
100: for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
101: pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
102: pgprot_t prot = PAGE_KERNEL;
103: /*
104: * first pass will use the same initial
105: * identity mapping attribute.
106: */
107: pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);
108:
109: if (is_kernel_text(addr))
110: prot = PAGE_KERNEL_EXEC;
111:
112: pages_4k++;
113: if (mapping_iter == 1) {
114: set_pte(pte, pfn_pte(pfn, init_prot));
115: last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;
116: } else
117: set_pte(pte, pfn_pte(pfn, prot));
118: }
119: }
120: }
121: if (mapping_iter == 1) {
122: /*
123: * update direct mapping page count only in the first
124: * iteration.
125: */
126: update_page_count(PG_LEVEL_2M, pages_2m);
127: update_page_count(PG_LEVEL_4K, pages_4k);
128:
129: /*
130: * local global flush tlb, which will flush the previous
131: * mappings present in both small and large page TLB's.
132: */
133: __flush_tlb_all();
134:
135: /*
136: * Second iteration will set the actual desired PTE attributes.
137: */
138: mapping_iter = 2;
139: goto repeat;
140: }
141: return last_map_addr;
142: }
在这里面,将swapper_pg_dir作为pgd_t(Page Directory)的指针,对swapper_pg_dir指向的内存区域作处理,将Normal区域的映射关系建立到该页目录中。
然后在paging_init中
1: static void __init pagetable_init(void)
2: {
3: pgd_t *pgd_base = swapper_pg_dir;
4:
5: permanent_kmaps_init(pgd_base);
6: }
内存Zone中的pageset成员分析的更多相关文章
- DEBUG模式下, 内存中的变量地址分析
测试函数的模板实现 /// @file my_template.h /// @brief 测试数据类型用的模板实现 #ifndef MY_TEMPLATE_H_2016_0123_1226 #defi ...
- 继承的基本概念: (1)Java不支持多继承,也就是说子类至多只能有一个父类。 (2)子类继承了其父类中不是私有的成员变量和成员方法,作为自己的成员变量和方法。 (3)子类中定义的成员变量和父类中定义的成员变量相同时,则父类中的成员变量不能被继承。 (4)子类中定义的成员方法,并且这个方法的名字返回类型,以及参数个数和类型与父类的某个成员方法完全相同,则父类的成员方法不能被继承。 分析以上程
继承的基本概念: (1)Java不支持多继承,也就是说子类至多只能有一个父类. (2)子类继承了其父类中不是私有的成员变量和成员方法,作为自己的成员变量和方法.(3)子类中定义的成员变量和父类中定义的 ...
- linux-3.2.36内核启动2-setup_arch中的内存初始化1(arm平台 分析高端内存和初始化memblock)【转】
转自:http://blog.csdn.net/tommy_wxie/article/details/17093307 上一篇微博留下了这几个函数,现在我们来分析它们 sanity_c ...
- java内存模型中工作内存并不一定会同步主内存的情况分析
其实是为了填之前的一个坑 在一个多线程的案例中出现了阻塞的情况. https://www.cnblogs.com/hetutu-5238/p/10477875.html 其中的第二个问题,即多个 ...
- 关于 self 和 super 在oc 中 的疑惑 与 分析
关于 self 和 super 在oc 中 的疑惑 与 分析 面试一定都是很注重 基础的,不管高级还是初级. 虽然基础好跟基础不好都可以写 代码,网上那么多资料. 区分高低也就是研究的深度和广度 ...
- (第三章)Java内存模型(中)
一.volatile的内存语义 1.1 volatile的特性 理解volatile特性的一个好办法是把对volatile变量的单个读/写,看成是使用同一个锁对这些单个读/写操作做了同步.下面通过具体 ...
- C++中类的内存空间大小(sizeof)分析
注意类大小与结构体大小还是有一些不同的,类里面还包含成员函数(特别是虚函数),结构体中一般只有数据成员. 首先明确各数据类型占多大的空间.例如int到底是占2字节还是4字节空间: 在TC里,int是2 ...
- C++中几个值得分析的小问题(2)
下面有3个小问题,作为C++ Beginner你一定要知道错在哪里了. 1.派生类到基类的引用或指针转换一定“完美”存在? 一般情况,你很可能会认为:派生类对象的引用或指针转换为基类对象的引用或指针是 ...
- C++中static数据成员详解
本文和大家分享的主要是c++中static数据成员的相关用法及源码示例,希望能帮助大家更好的学习C++. static(静态存储)数据成员 StaticTest.cpp : 定义控制台应用程序 ...
随机推荐
- python find()函数
实例(Python 2.0+) str1 = "this is string example....wow!!!"; str2 = "exam"; print ...
- ceph-报错日志
由于时钟不一致问题,导致ceph存储有问题 clock skew时钟偏移overalladj. 全部的:全体的:一切在内的stampedadj. 铭刻的:盖上邮戳的:顿足的 beaconvt. 照亮, ...
- CCflow与基础框架组织机构整合
SELECT No,Name,Pass,FK_Dept,SID FROM Port_Emp SELECT No,Name,ParentNo FROM Port_Dept SELECT No,Name, ...
- 微信小程序观察者模式 observers
const app = getApp(); const request = require('../../../utils/request.js'); Component({ options: { m ...
- ActiveX (ocx) 控件 在vs2010 上debug 的方法
1.在项目的属性中配置Debug,设置调试选项卡中的“命令”.“命令参数”.“工作目录”,“命令”为IE的路径,“命令参数”为自己写的htm页面路径(因htm中未配置ocx路径,所以直接把htm放在了 ...
- this与super的语法比较
this 代表当前对象 可以代表当前属性,当前方法,当前对象(整个自己). 作用:解决同名变量的同名问题,同明变量可能来源于父类,局部变量和成员变量... 语法使用:this( 实参... ); 调用 ...
- vue-cli 中多个组件共用一个mt-checklist
// html <div v-if="dataList"> <mt-popup v-model="popupVisible" position ...
- 使用python+ffmpeg批量转换格式
需求: 给定一个文件夹路径,遍历该文件夹内的所有文件以及子文件夹内的文件,当所有后缀名为wav格式的文件转换为ogg格式的文件. import os # 获取目录下的所有文件列表 import fn ...
- Oracle实现主键自增的几种方式
数据库作为一个系统的核心,数据库设计的1NF就是一个表结构必须有唯一约束也就是主键,Oracle数据库本身没有自增机制,不像MySQL直接使用关键字AUTO_INCREMENT自动加一,所以需要我们去 ...
- Python爬虫实战——反爬策略之代理IP【无忧代理】
一般情况下,我并不建议使用自己的IP来爬取网站,而是会使用代理IP. 原因很简单:爬虫一般都有很高的访问频率,当服务器监测到某个IP以过高的访问频率在进行访问,它便会认为这个IP是一只"爬虫 ...