Linux3.10.0块IO子系统流程(4)-- 为请求构造SCSI命令
首先来看scsi_prep_fn
int scsi_prep_fn(struct request_queue *q, struct request *req)
{
struct scsi_device *sdev = q->queuedata;
int ret = BLKPREP_KILL; if (req->cmd_type == REQ_TYPE_BLOCK_PC)
ret = scsi_setup_blk_pc_cmnd(sdev, req);
return scsi_prep_return(q, req, ret);
}
scsi_prep_fn只能处理来自SCSI公共层的命令,在scsi_setup_blk_pc_cmnd函数返回后,根据返回值调用scsi_prep_return进行相应的处理
int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
{
struct scsi_cmnd *cmd;
int ret = scsi_prep_state_check(sdev, req); // 根据请求的标志位以及SCSI设备的状态进行初步检查 if (ret != BLKPREP_OK)
return ret; /*
* 分配一个新的scsi_cmnd描述符,将它记录在special域;如果这里已经指向了一个现有的scsi_cmnd描述符,直接使用它
*/
cmd = scsi_get_cmd_from_req(sdev, req);
if (unlikely(!cmd))
return BLKPREP_DEFER; /*
* BLOCK_PC requests may transfer data, in which case they must a bio attached to them. Or they might contain a SCSI command
* that does not transfer data, in which case they may optionally submit a request without an attached bio.
* 尽管请求来自SCSI公共服务层,但是这些请求也可以涉及数据传输,在bio中保存的数据最终需要复制到SCSI命令描述符的数据缓冲区中
* 具体的工作由scsi_init_io完成,后续分析
* 如果不涉及数据传输,就将SCSI命令缓冲区清零
*/
if (req->bio) {
int ret; BUG_ON(!req->nr_phys_segments); ret = scsi_init_io(cmd, GFP_ATOMIC);
if (unlikely(ret))
return ret;
} else {
BUG_ON(blk_rq_bytes(req)); memset(&cmd->sdb, , sizeof(cmd->sdb));
req->buffer = NULL;
} cmd->cmd_len = req->cmd_len;
if (!blk_rq_bytes(req))
cmd->sc_data_direction = DMA_NONE;
else if (rq_data_dir(req) == WRITE)
cmd->sc_data_direction = DMA_TO_DEVICE;
else
cmd->sc_data_direction = DMA_FROM_DEVICE; cmd->transfersize = blk_rq_bytes(req);
cmd->allowed = req->retries;
return BLKPREP_OK;
}
sd_prep_fn函数从request结构中的信息构造SCSI(读或写)命令,将结果保存在request的special域,sd_prep_fn只能处理来自上层的请求(REQ_TYPE_FS),以及来自SCSI层的(REQ_TYPE_BLOCK_PC)请求。有一种请求比较特殊,即所谓的DISCARD请求。这个请求来自上层,但需要被转换成SCSI请求来处理
/**
* sd_prep_fn - build a scsi (read or write) command from
* information in the request structure.
* @SCpnt: pointer to mid-level's per scsi command structure that
* contains request and into which the scsi command is written
*
* Returns 1 if successful and 0 if error (or cannot be done now).
**/
static int sd_prep_fn(struct request_queue *q, struct request *rq)
{
struct scsi_cmnd *SCpnt;
struct scsi_device *sdp = q->queuedata;
struct gendisk *disk = rq->rq_disk;
struct scsi_disk *sdkp;
sector_t block = blk_rq_pos(rq);
sector_t threshold;
unsigned int this_count = blk_rq_sectors(rq);
int ret, host_dif;
unsigned char protect; /*
* Discard request come in as REQ_TYPE_FS but we turn them into
* block PC requests to make life easier.
*/
if (rq->cmd_flags & REQ_DISCARD) {
ret = sd_setup_discard_cmnd(sdp, rq);
goto out;
} else if (rq->cmd_flags & REQ_WRITE_SAME) {
ret = sd_setup_write_same_cmnd(sdp, rq);
goto out;
} else if (rq->cmd_flags & REQ_FLUSH) {
ret = scsi_setup_flush_cmnd(sdp, rq);
goto out;
} else if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
ret = scsi_setup_blk_pc_cmnd(sdp, rq);
goto out;
} else if (rq->cmd_type != REQ_TYPE_FS) {
ret = BLKPREP_KILL;
goto out;
}
ret = scsi_setup_fs_cmnd(sdp, rq);
if (ret != BLKPREP_OK)
goto out;
SCpnt = rq->special;
sdkp = scsi_disk(disk); /* from here on until we're complete, any goto out
* is used for a killable error condition */
ret = BLKPREP_KILL; SCSI_LOG_HLQUEUE(, scmd_printk(KERN_INFO, SCpnt,
"sd_prep_fn: block=%llu, "
"count=%d\n",
(unsigned long long)block,
this_count)); /*
* 以下几种情况直接结束命令:
* 1.SCSI不在线
* 2.请求数据超出了设备容量
* 3.磁盘介质发生了变化
*/
if (!sdp || !scsi_device_online(sdp) ||
block + blk_rq_sectors(rq) > get_capacity(disk)) {
SCSI_LOG_HLQUEUE(, scmd_printk(KERN_INFO, SCpnt,
"Finishing %u sectors\n",
blk_rq_sectors(rq)));
SCSI_LOG_HLQUEUE(, scmd_printk(KERN_INFO, SCpnt,
"Retry with 0x%p\n", SCpnt));
goto out;
} if (sdp->changed) {
/*
* quietly refuse to do anything to a changed disc until
* the changed bit has been reset
*/
/* printk("SCSI disk has been changed or is not present. Prohibiting further I/O.\n"); */
goto out;
} /*
* Some SD card readers can't handle multi-sector accesses which touch the last one or two hardware sectors. Split accesses as needed.
* 某些设备(如SD卡)不能多扇区访问最后的部分扇区,需分割访问
*/
threshold = get_capacity(disk) - SD_LAST_BUGGY_SECTORS *
(sdp->sector_size / ); if (unlikely(sdp->last_sector_bug && block + this_count > threshold)) {
if (block < threshold) {
/* Access up to the threshold but not beyond */
this_count = threshold - block;
} else {
/* Access only a single hardware sector */
this_count = sdp->sector_size / ;
}
} SCSI_LOG_HLQUEUE(, scmd_printk(KERN_INFO, SCpnt, "block=%llu\n",
(unsigned long long)block)); /*
* If we have a 1K hardware sectorsize, prevent access to single 512 byte sectors.
* In theory we could handle this - in fact the scsi cdrom driver must be able to handle this because
* we typically use 1K blocksizes, and cdroms typically have 2K hardware sectorsizes.
* Of course, things are simpler with the cdrom, since it is read-only. For performance reasons,
* the filesystems should be able to handle this and not force the scsi disk driver to use bounce buffers for this.
* 磁盘的硬件扇区长度可能不是512,而是1024/2048或4096
*/
if (sdp->sector_size == ) {
if ((block & ) || (blk_rq_sectors(rq) & )) {
scmd_printk(KERN_ERR, SCpnt,
"Bad block number requested\n");
goto out;
} else {
block = block >> ;
this_count = this_count >> ;
}
}
if (sdp->sector_size == ) {
if ((block & ) || (blk_rq_sectors(rq) & )) {
scmd_printk(KERN_ERR, SCpnt,
"Bad block number requested\n");
goto out;
} else {
block = block >> ;
this_count = this_count >> ;
}
}
if (sdp->sector_size == ) {
if ((block & ) || (blk_rq_sectors(rq) & )) {
scmd_printk(KERN_ERR, SCpnt,
"Bad block number requested\n");
goto out;
} else {
block = block >> ;
this_count = this_count >> ;
}
}
if (rq_data_dir(rq) == WRITE) {
if (!sdp->writeable) {
goto out;
}
SCpnt->cmnd[] = WRITE_6;
SCpnt->sc_data_direction = DMA_TO_DEVICE; if (blk_integrity_rq(rq))
sd_dif_prepare(rq, block, sdp->sector_size); } else if (rq_data_dir(rq) == READ) {
SCpnt->cmnd[] = READ_6;
SCpnt->sc_data_direction = DMA_FROM_DEVICE;
} else {
scmd_printk(KERN_ERR, SCpnt, "Unknown command %x\n", rq->cmd_flags);
goto out;
} SCSI_LOG_HLQUEUE(, scmd_printk(KERN_INFO, SCpnt,
"%s %d/%u 512 byte blocks.\n",
(rq_data_dir(rq) == WRITE) ?
"writing" : "reading", this_count,
blk_rq_sectors(rq))); /* Set RDPROTECT/WRPROTECT if disk is formatted with DIF */
host_dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type);
if (host_dif)
protect = << ;
else
protect = ; if (host_dif == SD_DIF_TYPE2_PROTECTION) {
SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC); if (unlikely(SCpnt->cmnd == NULL)) {
ret = BLKPREP_DEFER;
goto out;
} SCpnt->cmd_len = SD_EXT_CDB_SIZE;
memset(SCpnt->cmnd, , SCpnt->cmd_len);
SCpnt->cmnd[] = VARIABLE_LENGTH_CMD;
SCpnt->cmnd[] = 0x18;
SCpnt->cmnd[] = (rq_data_dir(rq) == READ) ? READ_32 : WRITE_32;
SCpnt->cmnd[] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : ); /* LBA */
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) block & 0xff; /* Expected Indirect LBA */
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) block & 0xff; /* Transfer length */
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) this_count & 0xff;
} else if (sdp->use_16_for_rw) {
SCpnt->cmnd[] += READ_16 - READ_6;
SCpnt->cmnd[] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : );
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) block & 0xff;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) this_count & 0xff;
SCpnt->cmnd[] = SCpnt->cmnd[] = ;
} else if ((this_count > 0xff) || (block > 0x1fffff) ||
scsi_device_protection(SCpnt->device) ||
SCpnt->device->use_10_for_rw) {
if (this_count > 0xffff)
this_count = 0xffff; SCpnt->cmnd[] += READ_10 - READ_6;
SCpnt->cmnd[] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : );
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) block & 0xff;
SCpnt->cmnd[] = SCpnt->cmnd[] = ;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) this_count & 0xff;
} else {
if (unlikely(rq->cmd_flags & REQ_FUA)) {
/*
* This happens only if this drive failed
* 10byte rw command with ILLEGAL_REQUEST
* during operation and thus turned off
* use_10_for_rw.
*/
scmd_printk(KERN_ERR, SCpnt,
"FUA write on READ/WRITE(6) drive\n");
goto out;
} SCpnt->cmnd[] |= (unsigned char) ((block >> ) & 0x1f);
SCpnt->cmnd[] = (unsigned char) ((block >> ) & 0xff);
SCpnt->cmnd[] = (unsigned char) block & 0xff;
SCpnt->cmnd[] = (unsigned char) this_count;
SCpnt->cmnd[] = ;
}
SCpnt->sdb.length = this_count * sdp->sector_size; /* If DIF or DIX is enabled, tell HBA how to handle request */
if (host_dif || scsi_prot_sg_count(SCpnt))
sd_prot_op(SCpnt, host_dif); /*
* We shouldn't disconnect in the middle of a sector, so with a dumb
* host adapter, it's safe to assume that we can at least transfer
* this many bytes between each connect / disconnect.
*/
SCpnt->transfersize = sdp->sector_size;
SCpnt->underflow = this_count << ;
SCpnt->allowed = SD_MAX_RETRIES; /*
* This indicates that the command is ready from our end to be
* queued.
*/
ret = BLKPREP_OK;
out:
return scsi_prep_return(q, rq, ret);
}
/*
* Setup a REQ_TYPE_FS command. These are simple read/write request
* from filesystems that still need to be translated to SCSI CDBs from
* the ULD.
*/
int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
{
struct scsi_cmnd *cmd;
int ret = scsi_prep_state_check(sdev, req); if (ret != BLKPREP_OK)
return ret; if (unlikely(sdev->scsi_dh_data && sdev->scsi_dh_data->scsi_dh
&& sdev->scsi_dh_data->scsi_dh->prep_fn)) {
ret = sdev->scsi_dh_data->scsi_dh->prep_fn(sdev, req);
if (ret != BLKPREP_OK)
return ret;
} /*
* Filesystem requests must transfer data.
*/
BUG_ON(!req->nr_phys_segments); cmd = scsi_get_cmd_from_req(sdev, req);
if (unlikely(!cmd))
return BLKPREP_DEFER; /*
* 来自上层的请求信息都在bio里,和SCSI公共层请求不一样,我们需要重新为它构造SCSI规范定义的SCSI命令
* 构造好的内容会保存在scsi_cmnd描述符的cmnd域,所以首先将这个命令缓冲区清零
*/
memset(cmd->cmnd, , BLK_MAX_CDB);
return scsi_init_io(cmd, GFP_ATOMIC);
}
Linux3.10.0块IO子系统流程(4)-- 为请求构造SCSI命令的更多相关文章
- Linux3.10.0块IO子系统流程(6)-- 派发SCSI命令到低层驱动
在SCSI策略例程中最后调用scsi_dispatch_cmd将SCSI命令描述符派发给低层驱动进行处理 /** * scsi_dispatch_command - Dispatch a comman ...
- Linux3.10.0块IO子系统流程(5)-- 为SCSI命令准备聚散列表
SCSI数据缓冲区组织成聚散列表的形式.Linux内核中表示聚散列表的基本数据结构是scatterlist,虽然名字中有list,但它只对应一个内存缓冲区,聚散列表就是多个scatterlist的组合 ...
- Linux3.10.0块IO子系统流程(2)-- 构造、排序、合并请求
Linux块设备可以分为三类.分别针对顺序访问物理设备.随机访问物理设备和逻辑设备(即“栈式设备”) 类型 make_request_fn request_fn 备注 SCSI 设备等 从bio构 ...
- Linux3.10.0块IO子系统流程(3)-- SCSI策略例程
很长时间以来,Linux块设备使用了一种称为“蓄流/泄流”(plugging/unplugging)的技术来改进吞吐率.简单而言,这种工作方式类似浴盆排水系统的塞子.当IO被提交时,它被储存在一个队列 ...
- Linux3.10.0块IO子系统流程(0)-- 块IO子系统概述
前言:这个系列主要是记录自己学习Linux块IO子系统的过程,其中代码分析皆基于Linux3.10.0版本,如有描述错误或不妥之处,敬请指出! 参考书籍:存储技术原理分析--基于Linux 2.6内核 ...
- Linux3.10.0块IO子系统流程(7)-- 请求处理完成
和提交请求相反,完成请求的过程是从低层驱动开始的.请求处理完成分为两个部分:上半部和下半部.开始时,请求处理完成总是处在中断上下文,在这里的主要任务是将已完成的请求放到某个队列中,然后引发软终端让中断 ...
- Linux3.10.0块IO子系统流程(1)-- 上层提交请求
Linux通用块层提供给上层的接口函数是submit_bio.上层在构造好bio之后,调用submit_bio提交给通用块层处理. submit_bio函数如下: void submit_bi ...
- DPA 9.1.85 升级到DPA 10.0.352流程
SolarWinds DPA的升级其实是一件非常简单的事情,这里介绍一下从DPA 9.1.95升级到 DPA 10.0.352版本的流程.为什么要升级呢? DPA给用户发的邮件已经写的非常清楚了(如下 ...
- 【转】linux IO子系统和文件系统读写流程
原文地址:linux IO子系统和文件系统读写流程 我们含有分析的,是基于2.6.32及其后的内核. 我们在linux上总是要保存数据,数据要么保存在文件系统里(如ext3),要么就保存在裸设备里.我 ...
随机推荐
- python中socket模块详解
socket模块简介 网络上的两个程序通过一个双向的通信连接实现数据的交换,这个连接的一端称为一个socket.socket通常被叫做"套接字",用于描述IP地址和端口,是一个通信 ...
- 雷林鹏分享: C# 教程
C# 教程 C# 是一个简单的.现代的.通用的.面向对象的编程语言,它是由微软(Microsoft)开发的. 本教程将告诉您基础的 C# 编程,同时将向您讲解 C# 编程语言相关的各种先进理念. 现在 ...
- Linux(centos7)上安装最新版R3.4.1
说来惭愧,居然没有在Linux安装R的经验,因为一直很少用R,用也是在win平台. 下载路径:https://cran.rstudio.com/src/base/R-3/ 强烈建议不要安装最新的R,除 ...
- c++的const总结
转自:http://www.cnblogs.com/yc_sunniwell/archive/2010/07/14/1777416.html 为什么使用const?采用符号常量写出的代码更容易维护:指 ...
- [LintCode] Number of Islands(岛屿个数)
描述 给一个01矩阵,求不同的岛屿的个数. 0代表海,1代表岛,如果两个1相邻,那么这两个1属于同一个岛.我们只考虑上下左右为相邻. 样例 在矩阵: [ [1, 1, 0, 0, 0], [0, 1, ...
- 诡异的楼梯 HDU - 1180
Hogwarts正式开学以后,Harry发现在Hogwarts里,某些楼梯并不是静止不动的,相反,他们每隔一分钟就变动一次方向. 比如下面的例子里,一开始楼梯在竖直方向,一分钟以后它移动到了水平方向, ...
- 20165309 实验二 Java面向对象程序设计
2017-2018-2 20165309实验二<Java面向对象程序设计>实验报告 一.实验内容 1. 初步掌握单元测试和TDD 2. 理解并掌握面向对象三要素:封装.继承.多态 3. 初 ...
- python记录_day33 线程
##进程就像加工厂,线程是里边的流水线##进程是资源单位,线程是运行单位,每个进程至少有一个线程 即进程是资源分配的最小单位,线程是CPU调度的最小单位 一.线程的创建两种方式,和进程类似1.t = ...
- 关于react16.4——转发refs和片段Fragment
1.转发refs Ref 转发是一种自动将 ref 通过组件传递给子组件的技术. 一些组件倾向于以与常规 DOM button 和 input 类似的方式在整个应用程序中使用, 并且访问他们的 DOM ...
- webpack打包vue文件报错,但是cnpm run dev正常,最后我只想说:是我太笨,还是webpack4.4版本太坑
最近做一个项目,需要使用webpack打包 .vue 文件的单页面应用,调试都正常,使用cnpm run dev 都可以,就是webpack打包时报错.如下: ERROR in ./src/App.v ...