cls_bucket_list 函数

librados::IoCtx index_ctx;

// key   - oid (for different shards if there is any)

// value - list result for the corresponding oid (shard), it is filled by the AIO callback

map<int, string> oids;

map<int, struct rgw_cls_list_ret> list_results;

int r = open_bucket_index(bucket, index_ctx, oids, shard_id);

if (r < 0)

return r;

cls_rgw_obj_key start_key(start.name, start.instance);

r = CLSRGWIssueBucketList(index_ctx, start_key, prefix, num_entries, list_versions,

oids, list_results, cct->_conf->rgw_bucket_index_max_aio)();

if (r < 0)

return r;

获取桶的shard 对象,存入oids map中. 获取的内容存储到list_results

// Create a list of iterators that are used to iterate each shard

vector<map<string, struct rgw_bucket_dir_entry>::iterator> vcurrents(list_results.size());

vector<map<string, struct rgw_bucket_dir_entry>::iterator> vends(list_results.size());

vector<string> vnames(list_results.size());

map<int, struct rgw_cls_list_ret>::iterator iter = list_results.begin();

*is_truncated = false;

for (; iter != list_results.end(); ++iter) {

vcurrents.push_back(iter->second.dir.m.begin());

vends.push_back(iter->second.dir.m.end());

vnames.push_back(oids[iter->first]);

*is_truncated = (*is_truncated || iter->second.is_truncated);

}

处理list_results, list_results容器中存放的是桶各个shard的对象.

// Create a map to track the next candidate entry from each shard, if the entry

// from a specified shard is selected/erased, the next entry from that shard will

// be inserted for next round selection

map<string, size_t> candidates;

for (size_t i = 0; i < vcurrents.size(); ++i) {

if (vcurrents[i] != vends[i]) {

candidates[vcurrents[i]->first] = i;

}

}

创建一个map用于跟踪各个bucket shard的

// Select the next one

int pos = candidates.begin()->second;

const string& name = vcurrents[pos]->first;

struct rgw_bucket_dir_entry& dirent = vcurrents[pos]->second;

// fill it in with initial values; we may correct later

RGWObjEnt e;

e.key.set(dirent.key.name, dirent.key.instance);

e.size = dirent.meta.size;

e.accounted_size = dirent.meta.accounted_size;

e.mtime = dirent.meta.mtime;

e.etag = dirent.meta.etag;

e.owner = dirent.meta.owner;

e.owner_display_name = dirent.meta.owner_display_name;

e.content_type = dirent.meta.content_type;

e.tag = dirent.tag;

e.flags = dirent.flags;

e.versioned_epoch = dirent.versioned_epoch;

获取到对应的bucket_entry的值.

bool force_check = force_check_filter && force_check_filter(dirent.key.name);

if ((!dirent.exists && !dirent.is_delete_marker()) || !dirent.pending_map.empty() || force_check) {

/* there are uncommitted ops. We need to check the current state,

* and if the tags are old we need to do cleanup as well. */

librados::IoCtx sub_ctx;

sub_ctx.dup(index_ctx);

r = check_disk_state(sub_ctx, bucket, dirent, e, updates[vnames[pos]]);

if (r < 0 && r != -ENOENT) {

return r;

}

}

判断是否应该进入更新,判断条件

  1. direct.exists 不存在 并且没有被设置delete_marker
  2. dirent.pending_map 不为空, 说明有重新写入情况.
  3. Force_check 强制检查.

if (r >= 0) {

ldout(cct, 10) << "RGWRados::cls_bucket_list: got " << e.key.name << "[" << e.key.instance << "]" << dendl;

m[name] = std::move(e);

++count;

}

检查完成, 将bucket_index entry 复制给m表.

// Refresh the candidates map

candidates.erase(candidates.begin());

++vcurrents[pos];

if (vcurrents[pos] != vends[pos]) {

candidates[vcurrents[pos]->first] = pos;

}

刷新更新表, 继续解析下一个对象.

// Suggest updates if there is any

map<string, bufferlist>::iterator miter = updates.begin();

for (; miter != updates.end(); ++miter) {

if (miter->second.length()) {

ObjectWriteOperation o;

cls_rgw_suggest_changes(o, miter->second);

// we don't care if we lose suggested updates, send them off blindly

AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL);

index_ctx.aio_operate(miter->first, c, &o);

c->release();

}

}

更新对象操作, 重点参考 cls_rgw_suggest_changes 函数

// Check if all the returned entries are consumed or not

for (size_t i = 0; i < vcurrents.size(); ++i) {

if (vcurrents[i] != vends[i])

*is_truncated = true;

}

if (!m.empty())

*last_entry = m.rbegin()->first;

最后设置is_truncated的值  设置last_entry的值

check_disk_state

函数说明: 检查磁盘上的对象的状态

rgw_obj obj;

std::string oid, instance, loc, ns;

rgw_obj_key key;

key.set(list_state.key);

oid = key.name;

if (!rgw_obj::strip_namespace_from_object(oid, ns, instance)) {

// well crap

assert(0 == "got bad object name off disk");

}

obj.init(bucket, oid);

obj.set_loc(list_state.locator);

obj.set_ns(ns);

obj.set_instance(key.instance);

get_obj_bucket_and_oid_loc(obj, bucket, oid, loc);

io_ctx.locator_set_key(loc);

RGWObjState *astate = NULL;

RGWObjectCtx rctx(this);

int r = get_obj_state(&rctx, obj, &astate, NULL);

if (r < 0)

return r;

list_state.pending_map.clear(); // we don't need this and it inflates size

if (!astate->exists) {

/* object doesn't exist right now -- hopefully because it's

* marked as !exists and got deleted */

if (list_state.exists) {

/* FIXME: what should happen now? Work out if there are any

* non-bad ways this could happen (there probably are, but annoying

* to handle!) */

}

// encode a suggested removal of that key

list_state.ver.epoch = io_ctx.get_last_version();

list_state.ver.pool = io_ctx.get_id();

cls_rgw_encode_suggestion(CEPH_RGW_REMOVE, list_state, suggested_updates);

return -ENOENT;

}

string etag;

string content_type;

ACLOwner owner;

object.size = astate->size;

object.mtime = astate->mtime;

map<string, bufferlist>::iterator iter = astate->attrset.find(RGW_ATTR_ETAG);

if (iter != astate->attrset.end()) {

etag = iter->second.c_str();

}

iter = astate->attrset.find(RGW_ATTR_CONTENT_TYPE);

if (iter != astate->attrset.end()) {

content_type = iter->second.c_str();

}

iter = astate->attrset.find(RGW_ATTR_ACL);

if (iter != astate->attrset.end()) {

r = decode_policy(iter->second, &owner);

if (r < 0) {

dout(0) << "WARNING: could not decode policy for object: " << obj << dendl;

}

}

if (astate->has_manifest) {

RGWObjManifest::obj_iterator miter;

RGWObjManifest& manifest = astate->manifest;

for (miter = manifest.obj_begin(); miter != manifest.obj_end(); ++miter) {

rgw_obj loc = miter.get_location();

if (loc.ns == RGW_OBJ_NS_MULTIPART) {

//dout(10) << "check_disk_state(): removing manifest part from index: " << loc << dendl;

r = delete_obj_index(loc);

if (r < 0) {

dout(0) << "WARNING: delete_obj_index() returned r=" << r << dendl;

}

}

}

}

object.etag = etag;

object.content_type = content_type;

object.owner = owner.get_id();

object.owner_display_name = owner.get_display_name();

// encode suggested updates

list_state.ver.pool = io_ctx.get_id();

list_state.ver.epoch = astate->epoch;

list_state.meta.size = object.size;

list_state.meta.mtime = object.mtime;

list_state.meta.category = main_category;

list_state.meta.etag = etag;

list_state.meta.content_type = content_type;

if (astate->obj_tag.length() > 0)

list_state.tag = astate->obj_tag.c_str();

list_state.meta.owner = owner.get_id().to_str();

list_state.meta.owner_display_name = owner.get_display_name();

list_state.exists = true;

cls_rgw_encode_suggestion(CEPH_RGW_UPDATE, list_state, suggested_updates);

return 0;

cls_rgw_suggest_changes

cls_rgw.cc  rgw_dir_suggest_changes

struct rgw_bucket_dir_header {

map<uint8_t, rgw_bucket_category_stats> stats;

uint64_t tag_timeout;

uint64_t ver;

uint64_t master_ver;

string max_marker;

rgw_bucket_dir_header() : tag_timeout(0), ver(0), master_ver(0) {}

};

Bucket header的结构体:

Tag_timeout :  pending_map项的时间戳和当前时间相比,相差超过tag-timeout,则删除pending_map项.

Ver : 每次更新都会增加1

Master_ver : ??  检查代码中只有获取,没有赋值的地方.

Next_marker : ??

rgw_bucket_dir_header持久化为omap header

CLS_LOG(1, "rgw_dir_suggest_changes()");

bufferlist header_bl;

struct rgw_bucket_dir_header header;

bool header_changed = false;

int rc = read_bucket_header(hctx, &header);

if (rc < 0) {

CLS_LOG(1, "ERROR: rgw_dir_suggest_changes(): failed to read header\n");

return rc;

}

取出桶的header信息.

timespan tag_timeout(header.tag_timeout ? header.tag_timeout : CEPH_RGW_TAG_TIMEOUT);

计算超时时长.

while (!in_iter.end()) {

__u8 op;

rgw_bucket_dir_entry cur_change;

rgw_bucket_dir_entry cur_disk;

try {

::decode(op, in_iter);

::decode(cur_change, in_iter);

} catch (buffer::error& err) {

CLS_LOG(1, "ERROR: rgw_dir_suggest_changes(): failed to decode request\n");

return -EINVAL;

}

    //decode dir_key

bufferlist cur_disk_bl;

string cur_change_key;

encode_obj_index_key(cur_change.key, &cur_change_key);

int ret = cls_cxx_map_get_val(hctx, cur_change_key, &cur_disk_bl);

if (ret < 0 && ret != -ENOENT)

return -EINVAL;

   //获取osd中对象信息

if (cur_disk_bl.length()) {

bufferlist::iterator cur_disk_iter = cur_disk_bl.begin();

try {

::decode(cur_disk, cur_disk_iter);

} catch (buffer::error& error) {

CLS_LOG(1, "ERROR: rgw_dir_suggest_changes(): failed to decode cur_disk\n");

return -EINVAL;

}

real_time cur_time = real_clock::now();

map<string, struct rgw_bucket_pending_info>::iterator iter =

cur_disk.pending_map.begin();

while(iter != cur_disk.pending_map.end()) {

map<string, struct rgw_bucket_pending_info>::iterator cur_iter=iter++;

if (cur_time > (cur_iter->second.timestamp + tag_timeout)) {

cur_disk.pending_map.erase(cur_iter);

}

  //如果超时了.则删除这个pending_map,这个可能是安全性的检查.

}

}

CLS_LOG(20, "cur_disk.pending_map.empty()=%d op=%d cur_disk.exists=%d cur_change.pending_map.size()=%d cur_change.exists=%d\n",

cur_disk.pending_map.empty(), (int)op, cur_disk.exists,

(int)cur_change.pending_map.size(), cur_change.exists);

if (cur_disk.pending_map.empty()) {

if (cur_disk.exists) {

struct rgw_bucket_category_stats& old_stats = header.stats[cur_disk.meta.category];

CLS_LOG(10, "total_entries: %" PRId64 " -> %" PRId64 "\n", old_stats.num_entries, old_stats.num_entries - 1);

old_stats.num_entries--;

old_stats.total_size -= cur_disk.meta.accounted_size;

old_stats.total_size_rounded -= get_rounded_size(cur_disk.meta.accounted_size);

header_changed = true;

}

struct rgw_bucket_category_stats& stats =

header.stats[cur_change.meta.category];

switch(op) {

case CEPH_RGW_REMOVE:

CLS_LOG(10, "CEPH_RGW_REMOVE name=%s instance=%s\n", cur_change.key.name.c_str(), cur_change.key.instance.c_str());

ret = cls_cxx_map_remove_key(hctx, cur_change_key);

if (ret < 0)

return ret;

break;

case CEPH_RGW_UPDATE:

CLS_LOG(10, "CEPH_RGW_UPDATE name=%s instance=%s total_entries: %" PRId64 " -> %" PRId64 "\n",

cur_change.key.name.c_str(), cur_change.key.instance.c_str(), stats.num_entries, stats.num_entries + 1);

    //统计更新

stats.num_entries++;

stats.total_size += cur_change.meta.accounted_size;

stats.total_size_rounded += get_rounded_size(cur_change.meta.accounted_size);

header_changed = true;

cur_change.index_ver = header.ver;

bufferlist cur_state_bl;

::encode(cur_change, cur_state_bl);

ret = cls_cxx_map_set_val(hctx, cur_change_key, &cur_state_bl);

if (ret < 0)

return ret;

break;

}

}

}

bucket list 函数解析的更多相关文章

  1. [转]javascript eval函数解析json数据时为什加上圆括号eval("("+data+")")

    javascript eval函数解析json数据时为什么 加上圆括号?为什么要 eval这里要添加 “("("+data+")");//”呢?   原因在于: ...

  2. PHP json_decode 函数解析 json 结果为 NULL 的解决方法

    在做网站 CMS 模块时,对于模块内容 content 字段,保存的是 json 格式的字符串,所以在后台进行模块内容的编辑操作 ( 取出保存的数据 ) 时,需要用到 json_decode() 函数 ...

  3. Matlab中bsxfun和unique函数解析

    一.问题来源 来自于一份LSH代码,记录下来. 二.函数解析 2.1 bsxfun bsxfun是一个matlab自版本R2007a来就提供的一个函数,作用是”applies an element-b ...

  4. socket使用TCP协议时,send、recv函数解析以及TCP连接关闭的问题

    Tcp协议本身是可靠的,并不等于应用程序用tcp发送数据就一定是可靠的.不管是否阻塞,send发送的大小,并不代表对端recv到多少的数据. 在阻塞模式下, send函数的过程是将应用程序请求发送的数 ...

  5. sigaction函数解析

    http://blog.chinaunix.net/uid-1877180-id-3011232.html sigaction函数解析  sigaction函数的功能是检查或修改与指定信号相关联的处理 ...

  6. driver_register()函数解析

    driver_register()函数解析 /** * driver_register - register driver with bus * @drv: driver to register *  ...

  7. async函数解析

    转载请注明出处:async函数解析 async函数是基于Generator函数实现的,也就是说是Generator函数的语法糖.在之前的文章有介绍过Generator函数语法和异步应用,如果对其不了解 ...

  8. tf.train.shuffle_batch函数解析

    tf.train.shuffle_batch (tensor_list, batch_size, capacity, min_after_dequeue, num_threads=1, seed=No ...

  9. oracle中next_day()、last_day()函数解析

    oracle中next_day()函数解析 Sql代码 当前系统时间的下一星期一的时间select   next_day(sysdate,1) from dual NEXT_DAY(date,char ...

随机推荐

  1. 走进python

    python史 1.python之父 Guido van Rossum 2.python的优缺点 优点:开发效率高,可跨平台,可嵌入,可扩展,优雅简洁 缺点:运行稍慢,代码不能加密,不能实现真正的多线 ...

  2. HDU 6215:Brute Force Sorting(链表+队列)

    题目链接 题意 给出一个长度为n的数组,每次操作都要删除数组里面非递增的元素,问最终的数组元素有什么. 思路 容易想到用链表模拟删除,但是不能每次都暴力枚举,这样复杂度O(N^2).想到每次删除元素的 ...

  3. 跟我学SpringCloud | 第十二篇:Spring Cloud Gateway初探

    SpringCloud系列教程 | 第十二篇:Spring Cloud Gateway初探 Springboot: 2.1.6.RELEASE SpringCloud: Greenwich.SR1 如 ...

  4. C# 中奇妙的函数–6. 五个序列聚合运算(Sum, Average, Min, Max,Aggregate)

    今天,我们将着眼于五个用于序列的聚合运算.很多时候当我们在对序列进行操作时,我们想要做基于这些序列执行某种汇总然后,计算结果. Enumerable 静态类的LINQ扩展方法可以做到这一点 .就像之前 ...

  5. 20131221-Dom练习-第二十六天(未完)

    [1] //总结,写代码,一要动脑,理解用脑 //二要练,要动手,要有用身体记忆代码的觉悟,记忆用手 //三学编程最快的方法是,直接接触代码,用脑,用手接触代码 //面向对象的编码方式,对象还是对象, ...

  6. MyBatis select标签的用法

    From<MyBatis从入门到精通> 第一步,在接口中添加方法: public interface UserMapper { SysUser selectById(Long id); } ...

  7. MyBatis从入门到精通:第一章实体类与Mapper.xml文件

    实体类: package tk.mybatis.simple.model; public class Country { public Long getId() { return id; } publ ...

  8. [笨方法学python]习题51自动化测试笔记

    习题51 本节自动化测试部分看不大懂,自己每步都打印出来,帮助理解.(代码标红部分为自己加入调试为打印变量值所用) tests/tools.py from nose.tools import * im ...

  9. Spring Boot2(十四):单文件上传/下载,文件批量上传

    文件上传和下载在项目中经常用到,这里主要学习SpringBoot完成单个文件上传/下载,批量文件上传的场景应用.结合mysql数据库.jpa数据层操作.thymeleaf页面模板. 一.准备 添加ma ...

  10. 创建 gif

    1.动态创建 <Container name="layLoading"/> if (m_pLoadingGif == NULL)  {   m_pLoadingGif ...