linux内核源码阅读之facebook硬盘加速flashcache之三
134struct cache_c {
135 struct dm_target *tgt;
136
137 struct dm_dev *disk_dev; /* Source device */
138 struct dm_dev *cache_dev; /* Cache device */
139
140#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27)
141 struct kcopyd_client *kcp_client; /* Kcopyd client for writing back data */
142#else
143 struct dm_kcopyd_client *kcp_client; /* Kcopyd client for writing back data */
144 struct dm_io_client *io_client; /* Client memory pool*/
145#endif
146
147 spinlock_t cache_spin_lock;
148
149 struct cacheblock *cache; /* Hash table for cache blocks */
150 struct cache_set *cache_sets;
151 struct cache_md_sector_head *md_sectors_buf;
152
153 sector_t size; /* Cache size */
154 unsigned int assoc; /* Cache associativity */
155 unsigned int block_size; /* Cache block size */
156 unsigned int block_shift; /* Cache block size in bits */
157 unsigned int block_mask; /* Cache block mask */
158 unsigned int consecutive_shift; /* Consecutive blocks size in bits */
159
160 wait_queue_head_t destroyq; /* Wait queue for I/O completion */
161 /* XXX - Updates of nr_jobs should happen inside the lock. But doing it outside
162 is OK since the filesystem is unmounted at this point */
163 atomic_t nr_jobs; /* Number of I/O jobs */
164 atomic_t fast_remove_in_prog;
165
166 int dirty_thresh_set; /* Per set dirty threshold to start cleaning */
167 int max_clean_ios_set; /* Max cleaning IOs per set */
168 int max_clean_ios_total; /* Total max cleaning IOs */
169 int clean_inprog;
170 int sync_index;
171 int nr_dirty;
172
173 int md_sectors; /* Numbers of metadata sectors, including header */
174
175 /* Stats */
176 unsigned long reads; /* Number of reads */
177 unsigned long writes; /* Number of writes */
178 unsigned long read_hits; /* Number of cache hits */
179 unsigned long write_hits; /* Number of write hits (includes dirty write hits) */
180 unsigned long dirty_write_hits; /* Number of "dirty" write hits */
181 unsigned long replace; /* Number of cache replacements */
182 unsigned long wr_replace;
183 unsigned long wr_invalidates; /* Number of write invalidations */
184 unsigned long rd_invalidates; /* Number of read invalidations */
185 unsigned long pending_inval; /* Invalidations due to concurrent ios on same block */
186 unsigned long cached_blocks; /* Number of cached blocks */
187#ifdef FLASHCACHE_DO_CHECKSUMS
188 unsigned long checksum_store;
189 unsigned long checksum_valid;
190 unsigned long checksum_invalid;
191#endif
192 unsigned long enqueues; /* enqueues on pending queue */
193 unsigned long cleanings;
194 unsigned long noroom; /* No room in set */
195 unsigned long md_write_dirty; /* Metadata sector writes dirtying block */
196 unsigned long md_write_clean; /* Metadata sector writes cleaning block */
197 unsigned long pid_drops;
198 unsigned long pid_adds;
199 unsigned long pid_dels;
200 unsigned long expiry;
201 unsigned long front_merge, back_merge; /* Write Merging */
202 unsigned long uncached_reads, uncached_writes;
203 unsigned long disk_reads, disk_writes;
204 unsigned long ssd_reads, ssd_writes;
205 unsigned long ssd_readfills, ssd_readfill_unplugs;
206
207 unsigned long clean_set_calls;
208 unsigned long clean_set_less_dirty;
209 unsigned long clean_set_fails;
210 unsigned long clean_set_ios;
211 unsigned long set_limit_reached;
212 unsigned long total_limit_reached;
213
214 /* Errors */
215 int disk_read_errors;
216 int disk_write_errors;
217 int ssd_read_errors;
218 int ssd_write_errors;
219 int memory_alloc_errors;
220
221#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
222 struct work_struct delayed_clean;
223#else
224 struct delayed_work delayed_clean;
225#endif
226
227 /* State for doing readfills (batch writes to ssd) */
228 int readfill_in_prog;
229 struct kcached_job *readfill_queue;
230 struct work_struct readfill_wq;
231
232 unsigned long pid_expire_check;
233
234 struct flashcache_cachectl_pid *blacklist_head, *blacklist_tail;
235 struct flashcache_cachectl_pid *whitelist_head, *whitelist_tail;
236 int num_blacklist_pids, num_whitelist_pids;
237 unsigned long blacklist_expire_check, whitelist_expire_check;
238
239 struct cache_c *next_cache;
240
241 char cache_devname[DEV_PATHLEN];
242 char disk_devname[DEV_PATHLEN];
243};
543/*
544 * dbn is the starting sector, io_size is the number of sectors.
545 */
546static int
547flashcache_lookup(struct cache_c *dmc, struct bio *bio, int *index)
548{
549 sector_t dbn = bio->bi_sector;
550#if DMC_DEBUG
551 int io_size = to_sector(bio->bi_size);
552#endif
553 unsigned long set_number = hash_block(dmc, dbn);
554 int invalid, oldest_clean = -1;
555 int start_index;
556
557 start_index = dmc->assoc * set_number;
558 DPRINTK("Cache lookup : dbn %llu(%lu), set = %d",
559 dbn, io_size, set_number);
560 find_valid_dbn(dmc, dbn, start_index, index);
561 if (*index > 0) {
562 DPRINTK("Cache lookup HIT: Block %llu(%lu): VALID index %d",
563 dbn, io_size, *index);
564 /* We found the exact range of blocks we are looking for */
565 return VALID;
566 }
567 invalid = find_invalid_dbn(dmc, start_index);
568 if (invalid == -1) {
569 /* We didn't find an invalid entry, search for oldest valid entry */
570 find_reclaim_dbn(dmc, start_index, &oldest_clean);
571 }
572 /*
573 * Cache miss :
574 * We can't choose an entry marked INPROG, but choose the oldest
575 * INVALID or the oldest VALID entry.
576 */
577 *index = start_index + dmc->assoc;
578 if (invalid != -1) {
579 DPRINTK("Cache lookup MISS (INVALID): dbn %llu(%lu), set = %d, index = %d, start_index = %d",
580 dbn, io_size, set_number, invalid, start_index);
581 *index = invalid;
582 } else if (oldest_clean != -1) {
583 DPRINTK("Cache lookup MISS (VALID): dbn %llu(%lu), set = %d, index = %d, start_index = %d",
584 dbn, io_size, set_number, oldest_clean, start_index);
585 *index = oldest_clean;
586 } else {
587 DPRINTK_LITE("Cache read lookup MISS (NOROOM): dbn %llu(%lu), set = %d",
588 dbn, io_size, set_number);
589 }
590 if (*index < (start_index + dmc->assoc))
591 return INVALID;
592 else {
593 dmc->noroom++;
594 return -1;
595 }
596}
444/*
445 * Map a block from the source device to a block in the cache device.
446 */
447static unsigned long
448hash_block(struct cache_c *dmc, sector_t dbn)
449{
450 unsigned long set_number, value;
451
452 value = (unsigned long)
453 (dbn >> (dmc->block_shift + dmc->consecutive_shift));
454 set_number = value % (dmc->size >> dmc->consecutive_shift);
455 DPRINTK("Hash: %llu(%lu)->%lu", dbn, value, set_number);
456 return set_number;
457}
schedule_delayed_work(&dmc->delayed_clean, 1*HZ);
do_delayed_clean = 1;
struct cache_set *cache_sets;
struct cache_md_sector_head *md_sectors_buf;
第一个结构是cache块在内存中的表示,对应SSD上的是flash_cacheblock。第二个cache_set就是之前一直提到的集合。第三个用于flash_cacheblock刷新,即管理结构从内存cacheblock写到SSD的flash_cacheblock。下面逐一来看这三个结构体:
111/* Cache block metadata structure */
112struct cacheblock {
113 u_int16_t cache_state;
114 int16_t nr_queued; /* jobs in pending queue */
115 u_int16_t lru_prev, lru_next;
116 sector_t dbn; /* Sector number of the cached block */
117#ifdef FLASHCACHE_DO_CHECKSUMS
118 u_int64_t checksum;
119#endif
120 struct pending_job *head;
121};
nr_queued; /* jobs in pending queue */ 等待工作个数
lru_prev, lru_next; 按LRU排序,指向前一个和后一个,注意这里是下标
dbn; /* Sector number of the cached block */ 对应磁盘的扇区
checksum; 校验
struct pending_job *head; 等待工作
123struct cache_set {
124 u_int32_t set_fifo_next;
125 u_int32_t set_clean_next;
126 u_int32_t clean_inprog;
127 u_int32_t nr_dirty;
128 u_int16_t lru_head, lru_tail;
129};
344/*
345 * We have one of these for *every* cache metadata sector, to keep track
346 * of metadata ios in progress for blocks covered in this sector. Only
347 * one metadata IO per sector can be in progress at any given point in
348 * time
349 */
350struct cache_md_sector_head {
351 u_int32_t nr_in_prog;
352 struct kcached_job *pending_jobs, *md_io_inprog;
353};
linux内核源码阅读之facebook硬盘加速flashcache之三的更多相关文章
- linux内核源码阅读之facebook硬盘加速flashcache之八
前面我们的分析中重点关注正常的数据流程,这一小节关注如果有异常,那么流程是怎么走完的呢? 1)创建新任务时kcached_job申请不到 2)读写命中时cache块为忙 3)系统关机时处理,系统开机时 ...
- linux内核源码阅读之facebook硬盘加速flashcache之四
这一小节介绍一下flashcache读写入口和读写的基础实现. 首先,不管是模块还是程序,必须先找到入口,用户态代码会经常去先看main函数,内核看module_init,同样看IO流时候也要找到入口 ...
- linux内核源码阅读之facebook硬盘加速flashcache之二
flashcache数据结构都在flashcache.h文件中,但在看数据结构之前,需要先过一遍flashcache是什么,要完成哪些功能?如果是自己设计这样一个系统的话,大概要怎么设计. 前面讲过, ...
- linux内核源码阅读之facebook硬盘加速flashcache之六
其实到目前为止,如果对读流程已经能轻松地看懂了,那么写流程不需要太多脑细胞.我觉得再写下去没有太大的必要了,后面想想为了保持flashcache完整性,还是写出来吧.接着到写流程: 1530stati ...
- linux内核源码阅读之facebook硬盘加速flashcache之五
正常流程到flashcache_map的1623行或1625行,按顺序先看读流程: 1221static void 1222flashcache_read(struct cache_c *dmc, s ...
- linux内核源码阅读之facebook硬盘加速利器flashcache
从来没有写过源码阅读,这种感觉越来越强烈,虽然劣于文笔,但还是下定决心认真写一回. 源代码下载请参见上一篇flashcache之我见 http://blog.csdn.net/liumangxiong ...
- ubuntu下linux内核源码阅读工具和调试方法总结
http://blog.chinaunix.net/uid-20940095-id-66148.html 一 linux内核源码阅读工具 windows下当然首选source insight, 但是l ...
- Linux内核源码阅读记录一之分析存储在不同段中的函数调用过程
在写驱动的过程中,对于入口函数与出口函数我们会用一句话来修饰他们:module_init与module_exit,那会什么经过修饰后,内核就能狗调用我们编写的入口函数与出口函数呢?下面就来分析内核调用 ...
- Linux内核源码分析
Linux源码下载: https://www.kernel.org/ https://git.kernel.org/ Linux内核源码阅读以及工具(转): https://blog.csdn.net ...
随机推荐
- 入Lucene的第一个坑
兴致勃勃的下载了Lucene6的Jar包,打算跑个Demo看下它神奇的魅力,结果一运行就出错了 Exception in thread "main" java.lang.Unsup ...
- [Jobdu] 题目1521:二叉树的镜像
不知道怎么回事下面的代码通过了4个测试用例,还有1个测试用例始终是Runtime Error,各位帮我看一下是哪里出了问题 镜像输出两种方法,一种是递归进行调整,另外一种就是直接在先序遍历的基础上进行 ...
- c++ primer plus 习题答案(3)
p296.3 #include<iostream> #include<cstdlib> #include<string> #include<cstring&g ...
- Apache proxy中转设置
参考http://sjsky.iteye.com/blog/1067119 打开http.conf (macOS中 Apache配置文件在/etc/apache2/中 etc是隐藏的) 确保下面 ...
- linux服务器安全小知识
使用单用户模式进入系统 Linux启动后出现boot:提示时,使用一个特殊的命令,如linuxsingle或linux 1,就能进入单用户模式(Single-User mode).这个命令非常有 ...
- Spring Boot Admin Reference Guide
1. What is Spring Boot Admin? Spring Boot Admin is a simple application to manage and monitor your S ...
- Azure 网站的新增功能:可配置的环境变量
编辑人员注释:本文章由 WindowsAzure 网站团队的项目经理Erez Benari撰写. Azure最常用的强大功能之一是 XML文档转换 (XDT),通过此功能,您可以在Windows ...
- 开源流媒体处理库live555服务器端、客户端源代码分析总结
RTSP服务器端流程: 1. RTSP连接的建立过程 RTSPServer类用于构建一个RTSP服务器,该类同时在其内部定义了一个RTSPClientSession类,用于处理单独的客户会话. 首先创 ...
- BZOJ 2761 不重复数字 (Hash)
题解:直接使用STL中的hash去重即可 #include <cstdio> #include <map> using namespace std; int ans[50010 ...
- JAVA GUI学习 - JList列表、JScrollPane滚动条组件学习
/** * 本例结合JList和JScrollPane共同使用 * @author Wfei * */ public class JListKnow extends JFrame { JList jL ...