Page Ext

基本概念

/*
 * Page Extension can be considered as an extended mem_map.
 * A page_ext page is associated with every page descriptor. The
 * page_ext helps us add more information about the page.
 * All page_ext are allocated at boot or memory hotplug event,
 * then the page_ext for pfn always exists.
 */
 
struct page_ext {
	unsigned long flags;
};

page_ext.cpage_ext.h的注释来看,Page Extension 是管理物理页的 extended data 的。

  • 在 Page Extension 出现前,如果想要给物理页增加一个新的特性,我们需要重新编译内核,而 Page Extension 可以让我们通过启动参数在内核启动时决定是否开启这个新的特性(cmdline)。
    • 这个特性是通过将 extended data 存储在额外的区域,而不是struct page中来实现的(struct page是用来描述物理页的数据结构)。
    • Page Extension 所使用的的空间是在系统启动时创建的,如果没有启动某个特性(如 Page Owner),则不会分配这个特性的 extended data。
  • Page Extension 提供了两个 callback 函数:
    • The need callback:决定 extended data 的内存是否会创建
      • 返回 true 的话,Page Extension 会分配对应的内存,并设置对应page_ext_operationsoffset
    • The init callback:用于完成初始化的工作

源码分析

数据结构

Page Extension 主要包含两个数据结构

struct page_ext_operations {
	size_t offset;
	size_t size;
	bool (*need)(void);
	void (*init)(void);
};
 
struct page_ext {
	unsigned long flags;
};
  • page_ext_operations:Page Extension 暴露出来的接口
    • offset:在extended data中的偏移,在invoke_need_callbacks时会被初始化
    • size:数据大小
    • need:the need callback
    • init:the init callback
  • page_ext
    • flags:bitmap,对应 bit 用于表示某个 Extension 特性是否开启,由 Extension 自行控制。

Drawing-2021-09-29-16.53.45.excalidraw

全局变量

  • total_usage:Page Extension 占用的内存空间大小
  • extra_mem:一个 extended data 占用的内存空间大小,在invoke_need_callbacks中计算得到
  • page_ext_ops:由 Page Extension 维护的 Extension
static struct page_ext_operations *page_ext_ops[] = {
#ifdef CONFIG_DEBUG_PAGEALLOC
	&debug_guardpage_ops,
#endif
#ifdef CONFIG_PAGE_OWNER
	&page_owner_ops,
#endif
#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
	&page_idle_ops,
#endif
};

Page Extension 的初始化

Page Extension 的初始化调用链:

start_kernel
    rest_init
        kernel_init
            kernel_init_freeable
                page_ext_init

page_ext_init 实现:

 
void __init page_ext_init(void)
{
	unsigned long pfn;
	int nid;
 
	if (!invoke_need_callbacks())
		return;
 
	for_each_node_state(nid, N_MEMORY) {
		unsigned long start_pfn, end_pfn;
 
		start_pfn = node_start_pfn(nid);
		end_pfn = node_end_pfn(nid);
		/*
		 * start_pfn and end_pfn may not be aligned to SECTION and the
		 * page->flags of out of node pages are not initialized.  So we
		 * scan [start_pfn, the biggest section's pfn < end_pfn) here.
		 */
		for (pfn = start_pfn; pfn < end_pfn;
			pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
 
			if (!pfn_valid(pfn))
				continue;
			/*
			 * Nodes's pfns can be overlapping.
			 * We know some arch can have a nodes layout such as
			 * -------------pfn-------------->
			 * N0 | N1 | N2 | N0 | N1 | N2|....
			 */
			if (pfn_to_nid(pfn) != nid)
				continue;
			if (init_section_page_ext(pfn, nid))
				goto oom;
			cond_resched();
		}
	}
	hotplug_memory_notifier(page_ext_callback, 0);
	pr_info("allocated %ld bytes of page_ext\n", total_usage);
	invoke_init_callbacks();
	return;
 
oom:
	panic("Out of memory");
}

中间的循环会遍历所有物理页并分配对应的内存,在循环前会调用invoke_need_callbacks()判断是否需要分配内存,在循环后会调用invoke_init_callbacks()

static bool __init invoke_need_callbacks(void)
{
	int i;
	int entries = ARRAY_SIZE(page_ext_ops);
	bool need = false;
 
	for (i = 0; i < entries; i++) {
		if (page_ext_ops[i]->need && page_ext_ops[i]->need()) {
			page_ext_ops[i]->offset = sizeof(struct page_ext) +
						extra_mem;
			extra_mem += page_ext_ops[i]->size;
			need = true;
		}
	}
 
	return need;
}
 
static void __init invoke_init_callbacks(void)
{
	int i;
	int entries = ARRAY_SIZE(page_ext_ops);
 
	for (i = 0; i < entries; i++) {
		if (page_ext_ops[i]->init)
			page_ext_ops[i]->init();
	}
}
  • invoke_need_callbacks会通过need callback来确定是否要返回true,同时计算 extended data 的内存大小并设置对应page_extension_operationsoffset

使用方法

page_owner 为例,page_owner需要对每一个物理页都维护一个page_owner结构体:

struct page_owner {
	unsigned short order;
	short last_migrate_reason;
	gfp_t gfp_mask;
	depot_stack_handle_t handle;
};

这些数据就是前面所提到的 extended data。

Page Owner 通过 page_owner_ops 来接入 Page Extension:

struct page_ext_operations page_owner_ops = {
	.size = sizeof(struct page_owner),
	.need = need_page_owner,
	.init = init_page_owner,
};

need_page_ownerinit_page_owner实现如下:

static bool need_page_owner(void)
{
	if (page_owner_disabled) // 默认为 true,当传入参数有 page_owner=on 时,会为 false
		return false;
 
	return true;
}
static void init_page_owner(void)
{
	if (page_owner_disabled)
		return;
 
	register_dummy_stack();
	register_failure_stack();
	register_early_stack();
	static_branch_enable(&page_owner_inited);
	init_early_allocated_pages(); // 完成所有物理页 page_owner 的初始化
}

init_early_allocated_pages会对所有的page_ext调用:__set_page_owner_handle(page_ext, early_handle, 0, 0);

  • order:0
  • last_migrate_reason:-1
  • gfp_mask:0
  • handle:early_handle
  • page_ext->flags对应于 Page Owner 的 bit 会置为 1。

Reference