分析过程
首先我们从yolo的训练命令开始分析(yolo的源码是用c++写的):

./darknet detector train cfg/voc.data cfg/yolo-voc.cfg darknet19_448.conv.23

从这里我们可以看出yolo主函数main中的参数argv[]在其中对应的值分别是 argv[0] -> darknet argv[1] -> detector argv[2] -> train .....(剩下的自己看),从这里我们可以看出,yolo主函数main一定在examples/darknet.c中,让我们来看一下主函数:

int main(int argc, char **argv)
{
//test_resize("data/bad.jpg");
//test_box();
//test_convolutional_layer();
if(argc < 2){
fprintf(stderr, "usage: %s <function>\n", argv[0]);
return 0;
}
gpu_index = find_int_arg(argc, argv, "-i", 0);
if(find_arg(argc, argv, "-nogpu")) {
gpu_index = -1;
} #ifndef GPU
gpu_index = -1;
#else
if(gpu_index >= 0){
cuda_set_device(gpu_index);
}
#endif if (0 == strcmp(argv[1], "average")){
average(argc, argv);
} else if (0 == strcmp(argv[1], "yolo")){
run_yolo(argc, argv);
} else if (0 == strcmp(argv[1], "voxel")){
run_voxel(argc, argv);
} else if (0 == strcmp(argv[1], "super")){
run_super(argc, argv);
} else if (0 == strcmp(argv[1], "lsd")){
run_lsd(argc, argv);
} else if (0 == strcmp(argv[1], "detector")){
run_detector(argc, argv);
} else if (0 == strcmp(argv[1], "detect")){
float thresh = find_float_arg(argc, argv, "-thresh", .24);
char *filename = (argc > 4) ? argv[4]: 0;
char *outfile = find_char_arg(argc, argv, "-out", 0);
int fullscreen = find_arg(argc, argv, "-fullscreen");
test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, .5, outfile, fullscreen);
} else if (0 == strcmp(argv[1], "cifar")){
run_cifar(argc, argv);
} else if (0 == strcmp(argv[1], "go")){
run_go(argc, argv);
} else if (0 == strcmp(argv[1], "rnn")){
run_char_rnn(argc, argv);
} else if (0 == strcmp(argv[1], "vid")){
run_vid_rnn(argc, argv);
} else if (0 == strcmp(argv[1], "coco")){
run_coco(argc, argv);
} else if (0 == strcmp(argv[1], "classify")){
predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5);
} else if (0 == strcmp(argv[1], "classifier")){
run_classifier(argc, argv);
} else if (0 == strcmp(argv[1], "regressor")){
run_regressor(argc, argv);
} else if (0 == strcmp(argv[1], "segmenter")){
run_segmenter(argc, argv);
} else if (0 == strcmp(argv[1], "art")){
run_art(argc, argv);
} else if (0 == strcmp(argv[1], "tag")){
run_tag(argc, argv);
} else if (0 == strcmp(argv[1], "compare")){
run_compare(argc, argv);
} else if (0 == strcmp(argv[1], "dice")){
run_dice(argc, argv);
} else if (0 == strcmp(argv[1], "writing")){
run_writing(argc, argv);
} else if (0 == strcmp(argv[1], "3d")){
composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0);
} else if (0 == strcmp(argv[1], "test")){
test_resize(argv[2]);
} else if (0 == strcmp(argv[1], "captcha")){
run_captcha(argc, argv);
} else if (0 == strcmp(argv[1], "nightmare")){
run_nightmare(argc, argv);
} else if (0 == strcmp(argv[1], "rgbgr")){
rgbgr_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "reset")){
reset_normalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "denormalize")){
denormalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "statistics")){
statistics_net(argv[2], argv[3]);
} else if (0 == strcmp(argv[1], "normalize")){
normalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "rescale")){
rescale_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "ops")){
operations(argv[2]);
} else if (0 == strcmp(argv[1], "speed")){
speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0);
} else if (0 == strcmp(argv[1], "oneoff")){
oneoff(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "oneoff2")){
oneoff2(argv[2], argv[3], argv[4], atoi(argv[5]));
} else if (0 == strcmp(argv[1], "partial")){
partial(argv[2], argv[3], argv[4], atoi(argv[5]));
} else if (0 == strcmp(argv[1], "average")){
average(argc, argv);
} else if (0 == strcmp(argv[1], "visualize")){
visualize(argv[2], (argc > 3) ? argv[3] : 0);
} else if (0 == strcmp(argv[1], "mkimg")){
mkimg(argv[2], argv[3], atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), argv[7]);
} else if (0 == strcmp(argv[1], "imtest")){
test_resize(argv[2]);
} else {
fprintf(stderr, "Not an option: %s\n", argv[1]);
}
return 0;
}

很简单可以看出,主函数就是对于参数argv[1]的一个判断,根据argv[1]的内容来启动不同的程序。让我们继续跟着训练命令走argv[1] = detector时,调用的函数是run_detector,而这个函数在examples/detector.c的最后,让我们再来看看这个函数吧:

void run_detector(int argc, char **argv)
{
char *prefix = find_char_arg(argc, argv, "-prefix", 0);
float thresh = find_float_arg(argc, argv, "-thresh", .24);
float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
int cam_index = find_int_arg(argc, argv, "-c", 0);
int frame_skip = find_int_arg(argc, argv, "-s", 0);
int avg = find_int_arg(argc, argv, "-avg", 3);
if(argc < 4){
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
return;
}
char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
char *outfile = find_char_arg(argc, argv, "-out", 0);
int *gpus = 0;
int gpu = 0;
int ngpus = 0;
if(gpu_list){
printf("%s\n", gpu_list);
int len = strlen(gpu_list);
ngpus = 1;
int i;
for(i = 0; i < len; ++i){
if (gpu_list[i] == ',') ++ngpus;
}
gpus = calloc(ngpus, sizeof(int));
for(i = 0; i < ngpus; ++i){
gpus[i] = atoi(gpu_list);
gpu_list = strchr(gpu_list, ',')+1;
}
} else {
gpu = gpu_index;
gpus = &gpu;
ngpus = 1;
} int clear = find_arg(argc, argv, "-clear");
int fullscreen = find_arg(argc, argv, "-fullscreen");
int width = find_int_arg(argc, argv, "-w", 0);
int height = find_int_arg(argc, argv, "-h", 0);
int fps = find_int_arg(argc, argv, "-fps", 0); char *datacfg = argv[3];
char *cfg = argv[4];
char *weights = (argc > 5) ? argv[5] : 0;
char *filename = (argc > 6) ? argv[6]: 0;
if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen);
else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile);
else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
else if(0==strcmp(argv[2], "demo")) {
list *options = read_data_cfg(datacfg);
int classes = option_find_int(options, "classes", 2);
char *name_list = option_find_str(options, "names", "data/names.list");
char **names = get_labels(name_list);
demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen);
}
}

在这里 run_detector的主要作用还是在根据argv[]的值执行不同的函数,其他关于gpu啊,threshold啊之类的我们都可以不用管,这里最重要的是argv[2]的值,根据其值的不同,执行不同函数,这里的test_detector,train_detector这些函数在detector.c中都有定义,并且从名字上我们就可以看出这些函数是干什么的。这里我们依旧跟随之前的训练命令,argv[2] = train,这里让我们来看一下train_detector函数(注:这里是我修改过一部分的,不是原来的代码):

void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
{
list *options = read_data_cfg(datacfg);
char *train_images = option_find_str(options, "train", "scripts/train.txt"); //训练集路径
char *backup_directory = option_find_str(options, "backup", "/backup/"); //备份训练结果路径 srand(time(0));
char *base = basecfg(cfgfile);
printf("%s\n", base);
float avg_loss = -1;
network *nets = calloc(ngpus, sizeof(network)); srand(time(0));
int seed = rand();
int i;
for(i = 0; i < ngpus; ++i){
srand(seed);
#ifdef GPU
cuda_set_device(gpus[i]);
#endif
nets[i] = load_network(cfgfile, weightfile, clear); //载入网络
nets[i].learning_rate *= ngpus;
}
srand(time(0));
network net = nets[0]; int imgs = net.batch * net.subdivisions * ngpus;
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
data train, buffer; layer l = net.layers[net.n - 1]; int classes = l.classes;
float jitter = l.jitter; list *plist = get_paths(train_images);
//int N = plist->size;
char **paths = (char **)list_to_array(plist); load_args args = {0};
args.w = net.w;
args.h = net.h;
args.paths = paths;
args.n = imgs;
args.m = plist->size;
args.classes = classes;
args.jitter = jitter;
args.num_boxes = l.max_boxes;
args.d = &buffer;
args.type = DETECTION_DATA;
args.threads = 8; args.angle = net.angle;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue; pthread_t load_thread = load_data(args);
clock_t time;
int count = 0;
//while(i*imgs < N*120){
while(get_current_batch(net) < net.max_batches){
if(l.random && count++%10 == 0){
printf("Resizing\n");
int dim = (rand() % 10 + 10) * 32;
if (get_current_batch(net)+200 > net.max_batches) dim = 608;
//int dim = (rand() % 4 + 16) * 32;
printf("%d\n", dim);
args.w = dim;
args.h = dim; pthread_join(load_thread, 0);
train = buffer;
free_data(train);
load_thread = load_data(args); for(i = 0; i < ngpus; ++i){
resize_network(nets + i, dim, dim);
}
net = nets[0];
}
time=clock();
pthread_join(load_thread, 0);
train = buffer;
load_thread = load_data(args); /*
int k;
for(k = 0; k < l.max_boxes; ++k){
box b = float_to_box(train.y.vals[10] + 1 + k*5);
if(!b.x) break;
printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h);
}
*/
/*
int zz;
for(zz = 0; zz < train.X.cols; ++zz){
image im = float_to_image(net.w, net.h, 3, train.X.vals[zz]);
int k;
for(k = 0; k < l.max_boxes; ++k){
box b = float_to_box(train.y.vals[zz] + k*5);
printf("%f %f %f %f\n", b.x, b.y, b.w, b.h);
draw_bbox(im, b, 1, 1,0,0);
}
show_image(im, "truth11");
cvWaitKey(0);
save_image(im, "truth11");
}
*/ printf("Loaded: %lf seconds\n", sec(clock()-time)); time=clock();
float loss = 0;
#ifdef GPU
if(ngpus == 1){
loss = train_network(net, train);
} else {
loss = train_networks(nets, ngpus, train, 4);
}
#else
loss = train_network(net, train);
#endif
if (avg_loss < 0) avg_loss = loss;
avg_loss = avg_loss*.9 + loss*.1; i = get_current_batch(net);
printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs);
if(i%1000==0){
#ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, "%s/%s.backup", backup_directory, base);
save_weights(net, buff);
}
if(i%10000==0 || (i < 1000 && i%100 == 0)){
#ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
save_weights(net, buff);
}
free_data(train);
}
#ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, "%s/%s_final.weights", backup_directory, base);
save_weights(net, buff);
}

这里我们主要重视的函数是第7行的read_data_cfg,第8行的train_images,第9行的backup_directory和第25行的load_network函数:
read_data_cfg中的参数datacfg在run_detector中可以看出就是arg[3],在本例中对应的就是voc.data
train_images是用来指定所要训练的图片集的路径的。
backup_directory是用来指定训练出来的权值的路劲的。
而load_network是用来载入所要训练的网络结构和参数的,这里run_detector中可以看出load_network的参数之一cfgfile就是argv[4],在我们这个例子中也便就是yolo-voc.cfg

这里我们先看一下cfg/voc.data(注:这里是我修改过了的,不是原来的)

classes= 2
train = /home/iair339-04/darknet/scripts/train.txt
valid = /home/iair339-04/darknet/scripts/2007_test.txt
names = data/kitti.names
backup = backup

这里可以看出voc.data是用来指定类别数classes,训练集路径train,测试集路径valid和类别名称names和备份文件路径backup的(so easy)。

接下来我们来看一下yolo-voc.cfg文件(注:修改过)

[net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=64
subdivisions=8
height=416
width=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1 learning_rate=0.001
burn_in=1000
max_batches = 80200
policy=steps
steps=40000,60000
scales=.1,.1 [convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky [maxpool]
size=2
stride=2 [convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky [maxpool]
size=2
stride=2 [convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky [convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky [convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky [maxpool]
size=2
stride=2 [convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky [convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky [convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky [maxpool]
size=2
stride=2 [convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky [convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky [convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky [convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky [convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky [maxpool]
size=2
stride=2 [convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky [convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky [convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky [convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky [convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky ####### [convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky [convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky [route]
layers=-9 [convolutional]
batch_normalize=1
size=1
stride=1
pad=1
filters=64
activation=leaky [reorg]
stride=2 [route]
layers=-1,-4 [convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky [convolutional]
size=1
stride=1
pad=1
filters=35 #此处修改
activation=linear [region]
anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
bias_match=1
classes=2 #此处修改种类
coords=4
num=5
softmax=1
jitter=.3
rescore=1 object_scale=5
noobject_scale=1
class_scale=1
coord_scale=1 absolute=1
thresh = .6
random=1

这里[net]里面是网络的超参数的设置,而之后的便是yolo v2的网络结构了。

yolov2源码分析的更多相关文章

  1. ABP源码分析一:整体项目结构及目录

    ABP是一套非常优秀的web应用程序架构,适合用来搭建集中式架构的web应用程序. 整个Abp的Infrastructure是以Abp这个package为核心模块(core)+15个模块(module ...

  2. HashMap与TreeMap源码分析

    1. 引言     在红黑树--算法导论(15)中学习了红黑树的原理.本来打算自己来试着实现一下,然而在看了JDK(1.8.0)TreeMap的源码后恍然发现原来它就是利用红黑树实现的(很惭愧学了Ja ...

  3. nginx源码分析之网络初始化

    nginx作为一个高性能的HTTP服务器,网络的处理是其核心,了解网络的初始化有助于加深对nginx网络处理的了解,本文主要通过nginx的源代码来分析其网络初始化. 从配置文件中读取初始化信息 与网 ...

  4. zookeeper源码分析之五服务端(集群leader)处理请求流程

    leader的实现类为LeaderZooKeeperServer,它间接继承自标准ZookeeperServer.它规定了请求到达leader时需要经历的路径: PrepRequestProcesso ...

  5. zookeeper源码分析之四服务端(单机)处理请求流程

    上文: zookeeper源码分析之一服务端启动过程 中,我们介绍了zookeeper服务器的启动过程,其中单机是ZookeeperServer启动,集群使用QuorumPeer启动,那么这次我们分析 ...

  6. zookeeper源码分析之三客户端发送请求流程

    znode 可以被监控,包括这个目录节点中存储的数据的修改,子节点目录的变化等,一旦变化可以通知设置监控的客户端,这个功能是zookeeper对于应用最重要的特性,通过这个特性可以实现的功能包括配置的 ...

  7. java使用websocket,并且获取HttpSession,源码分析

    转载请在页首注明作者与出处 http://www.cnblogs.com/zhuxiaojie/p/6238826.html 一:本文使用范围 此文不仅仅局限于spring boot,普通的sprin ...

  8. ABP源码分析二:ABP中配置的注册和初始化

    一般来说,ASP.NET Web应用程序的第一个执行的方法是Global.asax下定义的Start方法.执行这个方法前HttpApplication 实例必须存在,也就是说其构造函数的执行必然是完成 ...

  9. ABP源码分析三:ABP Module

    Abp是一种基于模块化设计的思想构建的.开发人员可以将自定义的功能以模块(module)的形式集成到ABP中.具体的功能都可以设计成一个单独的Module.Abp底层框架提供便捷的方法集成每个Modu ...

随机推荐

  1. 广商博客冲刺第六七天new

    第四五天沖刺傳送門 第一版的網頁已經放到 云服務器(估計快到期了) 傳送門. (不怎么會玩服務器啊..求指教..目前問題如下: 1.我的電腦mysql密碼跟服務器的密碼不一樣..上傳的時候要把代碼里面 ...

  2. Java Script正则表达式语法学习

    今天在做页面交互验证时,在HTML里面第一反应居然用了Java 处理正则表达式的语法... ---------------------------------题记 学习来源 http://www.ru ...

  3. 10-Python3从入门到实战—基础之函数

    Python从入门到实战系列--目录 函数的定义 函数是组织好的,可重复使用的,用来实现单一,或相关联功能的代码段. 函数的语法 def 函数名(参数列表): 函数体 函数代码块以 def 关键词开头 ...

  4. tensorflow在windows下的安装

    1.python 的安装 这里我选择的是Anaconda4.2,附上下载链接https://www.continuum.io/downloads 2.测试python安装是否成功 在cmd中输入pyt ...

  5. Laravel Exception处理逻辑解析

    Laravel Exception处理逻辑解析 vendor/laravel/framework/src/Illuminate/Foundation/Application.php app首先继承了c ...

  6. 兼容IE-FireFox-Chrome的背景音乐播放

    以music目录下的kn.mp3文件为例: <bgsound src="music/kn.mp3" loop="-1"/> <audio sr ...

  7. [转帖]/etc/security/limits.conf的含义

    https://www.cnblogs.com/pzk7788/p/7250723.html /etc/security/limits.conf 是 Linux 资源使用配置文件,用来限制用户对系统资 ...

  8. poj 3177 Redundant Paths(边双连通分量+缩点)

    链接:http://poj.org/problem?id=3177 题意:有n个牧场,Bessie 要从一个牧场到另一个牧场,要求至少要有2条独立的路可以走.现已有m条路,求至少要新建多少条路,使得任 ...

  9. BZOJ1030[JSOI2007]文本生成器——AC自动机+DP

    题目描述 JSOI交给队员ZYX一个任务,编制一个称之为“文本生成器”的电脑软件:该软件的使用者是一些低幼人群,他们现在使用的是GW文本生成器v6版.该软件可以随机生成一些文章―――总是生成一篇长度固 ...

  10. BZOJ4530 BJOI2014大融合(线段树合并+并查集+dfs序)

    易知所求的是两棵子树大小的乘积.先建出最后所得到的树,求出dfs序和子树大小.之后考虑如何在动态加边过程中维护子树大小.这个可以用树剖比较简单的实现,但还有一种更快更优美的做法就是线段树合并.对每个点 ...