Preemption Context Switches 和 Synchronization Context Switches
- Preemption Context Switches测量操作系统任务调度线程处理器上执行的次数,以及切换到较高-priority螺纹,数。
- Synchronization context switches度量的是因为显式调用线程同步API而发生线程切换的次数。如给多线程共享的变量加锁,多线程共同去改动。有些线程要堵塞在lock。直至占用锁的线程释放lock。这个度量反映的是线程间竞争的程度。
以下的实验来自VTune。旨在探究Preemption Context Switches的来源。
实验一:多线程无锁保护
speedup-example-no-mutex.cpp
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <assert.h> #define N 4
#define M 30000 int nwait = 0; volatile long long sum;
long loops = 6e3; void set_affinity(int core_id) {
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(core_id, &cpuset);
assert(pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset) == 0);
} void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
nwait++;
for (long i = 0; i < loops; i++)
sum += i;
for (long i = 0; i < loops; i++)
sum += i*i*i*i*i*i;
}
} int main(int argc, char *argv[]) {
set_affinity(23);
pthread_t th[N];
int ret; for(unsigned i=0; i<N; ++i) {
ret = pthread_create(&th[i], NULL, thread_func, (void*)i);
assert(!ret && "pthread_create() failed!");
} for(unsigned i=0; i<N; ++i)
pthread_join(th[i], NULL); exit(0);
}
VTune现象:
Preemption Context Switches由两部分组成:clone和Unknown stack frame(s)。
- 后者的Preemption稳定在5:在这个程序中,共同拥有5个线程在执行,VTune显示每一个线程各占1,所以后者的Preemption才稳定在5上。为了验证,我们让N等于8,结果是每一个线程各占1。Unknown stack frame(s)处的Preemption稳定在9。
- clone处的Preemption不是一个确定的数。有可能是6、7、8等。
为了验证,我们让N等于8,结果例如以下:
void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
for (long i = 0; i < loops; i++)
sum += i*i*i*i*i*i;
}
}void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
for (long i = 0; i < loops; i++)
sum += i;
}
}void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
nwait++;
}
}无clone处的Preemption Context Switches
void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
for (long i = 0; i < loops; i++)
sum += i;
for (long i = 0; i < loops; i++)
sum += i;
}
}
void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
for (long i = 0; i < loops; i++)
sum += i;
for (long i = 0; i < loops; i++)
sum += i;
for (long i = 0; i < loops; i++)
sum += i;
}
}void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
for (long i = 0; i < loops; i++)
sum += i;
for (long i = 0; i < loops; i++)
sum += i;
for (long i = 0; i < loops; i++)
sum += i;
for (long i = 0; i < loops; i++)
sum += i;
}
}
void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
for (long i = 0; i < loops; i++) {
sum += i; sum += i; sum += i; sum += i;
}
}
}
void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
for (long i = 0; i < loops; i++) {
sum += i;
sum += i;
sum += i;
sum += i;
sum += i;
sum += i;
sum += i;
}
}
}
void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
for (long i = 0; i < loops; i++)
sum += i;
for (long i = 0; i < loops; i++)
sum += i;
for (long i = 0; i < loops; i++)
sum += i;
for (long i = 0; i < loops; i++)
sum += i;
}
}void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
for (long i = 0; i < loops; i++) {
sum += i;
sum += i;
sum += i;
sum += i;
}
}
}
从运行时间而来。
当然这仅仅是针对多线程间无锁情况,以下给它加上锁。看看是否有哪个因素也会影响到Preemption Context Switches。
实验二:多线程加锁
speedup-example-mutex-only.cpp
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <assert.h> #define N 4
#define M 30000 int nwait = 0; volatile long long sum;
long loops = 6e3; pthread_mutex_t mutex; void set_affinity(int core_id) {
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(core_id, &cpuset);
assert(pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset) == 0);
} void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
phtread_mutex_lock(&mutex);
nwait++;
for (long i = 0; i < loops; i++)
sum += i;
phtread_mutex_unlock(&mutex);
for (long i = 0; i < loops; i++)
sum += i*i*i*i*i*i;
}
} int main(int argc, char *argv[]) {
set_affinity(23);
pthread_t th[N];
int ret; for(unsigned i=0; i<N; ++i) {
ret = pthread_create(&th[i], NULL, thread_func, (void*)i);
assert(!ret && "pthread_create() failed!");
} for(unsigned i=0; i<N; ++i)
pthread_join(th[i], NULL); exit(0);
}
接下来我们改变线程数。即N等于8:(我们期望Unknown处的Preemption添加类似线性,而clone处的添加幅度大。即与多线程无锁的情况类似)
void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
phtread_mutex_lock(&mutex);
nwait++;
for (long i = 0; i < loops; i++)
sum += i;
phtread_mutex_unlock(&mutex);
}
}void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
phtread_mutex_lock(&mutex);
nwait++;
phtread_mutex_unlock(&mutex);
for (long i = 0; i < loops; i++)
sum += i*i*i*i*i*i;
}
}void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
phtread_mutex_lock(&mutex);
nwait++;
phtread_mutex_unlock(&mutex);
}
}void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
phtread_mutex_lock(&mutex);
nwait++;
for (long i = 0; i < loops; i++)
sum += i;
for (long i = 0; i < loops; i++)
sum += i;
phtread_mutex_unlock(&mutex);
}
}void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
phtread_mutex_lock(&mutex);
nwait++;
for (long i = 0; i < loops; i++)
sum += i;
for (long i = 0; i < loops; i++)
sum += i;
for (long i = 0; i < loops; i++)
sum += i;
for (long i = 0; i < loops; i++)
sum += i;
phtread_mutex_unlock(&mutex);
}
}void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
phtread_mutex_lock(&mutex);
nwait++;
for (long i = 0; i < loops; i++) {
sum += i;
sum += i;
sum += i;
sum += i;
}
phtread_mutex_unlock(&mutex);
}
}
void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
phtread_mutex_lock(&mutex);
nwait++;
phtread_mutex_unlock(&mutex);
for (long i = 0; i < loops; i++)
sum += i*i*i*i*i*i;
for (long i = 0; i < loops; i++)
sum += i*i*i*i*i*i;
}
}void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
phtread_mutex_lock(&mutex);
nwait++;
phtread_mutex_unlock(&mutex);
for (long i = 0; i < loops; i++)
sum += i*i*i*i*i*i;
for (long i = 0; i < loops; i++)
sum += i*i*i*i*i*i;
for (long i = 0; i < loops; i++)
sum += i*i*i*i*i*i;
for (long i = 0; i < loops; i++)
sum += i*i*i*i*i*i;
}
}void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
phtread_mutex_lock(&mutex);
nwait++;
phtread_mutex_unlock(&mutex);
for (long i = 0; i < loops; i++) {
sum += i*i*i*i*i*i;
sum += i*i*i*i*i*i;
sum += i*i*i*i*i*i;
sum += i*i*i*i*i*i;
}
}
}
void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
nwait++;
for (long i = 0; i < loops; i++)
sum += i;
for (long i = 0; i < loops; i++)
sum += i*i*i*i*i*i;
}
}
和
void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
nwait++;
for (long i = 0; i < loops; i++)
sum += i;
for (long i = 0; i < loops; i++)
sum += i;
}
}
clone处Preemption的数目基本一致,但在加锁的情况下:
void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
phtread_mutex_lock(&mutex);
nwait++;
for (long i = 0; i < loops; i++)
sum += i;
phtread_mutex_unlock(&mutex);
for (long i = 0; i < loops; i++)
sum += i*i*i*i*i*i;
}
}
和
void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
phtread_mutex_lock(&mutex);
nwait++;
for (long i = 0; i < loops; i++)
sum += i;
for (long i = 0; i < loops; i++)
sum += i;
phtread_mutex_unlock(&mutex);
}
}
clone处Preemption的数目不一样。前者要明显多于后者。可是假设我们将后者改为:
void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
phtread_mutex_lock(&mutex);
nwait++;
for (long i = 0; i < loops; i++)
sum += i;
phtread_mutex_unlock(&mutex);
for (long i = 0; i < loops; i++)
sum += i;
}
}
则VTune分析有:
而解释C、D、E三者之间的差异,也许也能够用我们的“时间理论”。运行三者:
在说明原因之前。先看还有一个程序:
void* thread_func(void *arg) {
set_affinity((int)(long)arg);
for (int j = 0; j < M; j++) {
phtread_mutex_lock(&mutex);
nwait++;
phtread_mutex_unlock(&mutex);
for (long i = 0; i < loops; i++)
sum += i*i*i*i*i*i;
for (long i = 0; i < loops; i++)
sum += i*i*i*i*i*i;
}
}
和D在clone处拥有基本一样的Preemption数。但二者的执行时间却大不一样。
看来无锁和加锁还是有个重要区别的。我们都知道在无锁情况下,全部子线程并行执行。VTune中有例如以下调度:
事实上“时间理论”也适用于加锁情况,那为什么会出现上面C、D、E的情况,以及D和F的情况?我们也从调度图入手:
版权声明:本文博客原创文章,博客,未经同意,不得转载。
Preemption Context Switches 和 Synchronization Context Switches的更多相关文章
- context:component-scan" 的前缀 "context" 未绑定。
SpElUtilTest.testSpELLiteralExpressiontestSpELLiteralExpression(cn.zr.spring.spel.SpElUtilTest)org.s ...
- Android中,Context,什么是Context?
注:本文翻译自Context, What Context?,原文链接在这里,作者是Dave Smith.ps:译者链接http://blog.csdn.net/race604/article/deta ...
- Android开发之Android Context,上下文(Activity Context, Application Context)
转载:http://blog.csdn.net/lmj623565791/article/details/40481055 1.Context概念Context,相信不管是第一天开发Android,还 ...
- System.Drawing.Design.UITypeEditor自定义控件属性GetEditStyle(ITypeDescriptorContext context),EditValue(ITypeDescriptorContext context, IServiceProvider provider, object value)
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.C ...
- Spring context:component-scan中使用context:include-filter和context:exclude-filter
Spring context:component-scan中使用context:include-filter和context:exclude-filter XML: <?xml version= ...
- Android深入理解Context(一)Context关联类和Application Context创建过程
前言 Context也就是上下文对象,是Android较为常用的类,但是对于Context,很多人都停留在会用的阶段,这个系列会带大家从源码角度来分析Context,从而更加深入的理解它. 1.Con ...
- Tomcat 的context.xml说明、Context标签讲解
Tomcat的context.xml说明.Context标签讲解 1. 在tomcat 5.5之前 --------------------------- Context体现在/conf/server ...
- 元素 "context:component-scan" 的前缀 "context" 未绑定的解决方案
在动态web项目(Dynamic Web Project)中,使用SpringMVC框架,新建Spring的配置文件springmvc.xml,添加扫描控制器 <context:componen ...
- Tomcat的context.xml说明、Context标签讲解
Tomcat的context.xml说明.Context标签讲解 1. 在tomcat 5.5之前 --------------------------- Context体现在/conf/server ...
随机推荐
- 辛星解读为什么PHP须要模板
近期有个人问我:为什么PHP须要模板呢?整个站点的编写都是我一个人完毕的,从前端到后端,都是这样,我一个人写站点是不是就不须要模板了呢?我当时还真给问住了,也没想好非常合适的回答它的方式,于是就随便说 ...
- PHP_保留两位小数而且四舍五入_保留两位小数而且不四舍五入
php保留两位小数而且四舍五入 $num = 123213.666666; echo sprintf("%.2f", $num); php保留两位小数而且不四舍五入 $num = ...
- windows phone (22) 隐藏元素
原文:windows phone (22) 隐藏元素 在wp中我们也会用到隐藏某个元素,已达到某种效果,刚刚从文章看到了,分享一下[作者:神舟龙] Visibility 此属性能非常方便的实现隐藏某个 ...
- MySQL Windows ZIP 免费安装和启动设置
MySQL Windows ZIP免安装版,设置和启动的过程事实上挺麻烦的.以下一步一步介绍使用的过程: 1.下载Windows (x86, 64-bit), ZIP Archive: 2.解压zip ...
- 经典排序算法 - 归并排序Merge sort
经典排序算法 - 归并排序Merge sort 原理,把原始数组分成若干子数组,对每个子数组进行排序, 继续把子数组与子数组合并,合并后仍然有序,直到所有合并完,形成有序的数组 举例 无序数组[6 2 ...
- linux 经常使用配置
教研室用的非常旧的fedora14,装一些软件和下载东西的时候比較蛋疼,恰巧ubuntu14.04 公布,于是安装试试,顺便记录下经常使用的配置,备忘. 1. 制作镜像,比較老的主板,写入方式选择US ...
- 瑞丽的SQL-SQL Server的表旋转(行列转换)
所谓表旋转,就是将表的行转换为列,或是将表的列转换为行,这是从SQL Server 2005開始提供的新技术.因此,如果希望使用此功能,须要将数据库的兼容级别设置为90.表旋转在某些方面也是攻克了表的 ...
- Linux高性能server规划——多进程编程
多进程编程 多进程编程包含例如以下内容: 复制进程影映像的fork系统调用和替换进程映像的exec系列系统调用. 僵尸进程以及怎样避免僵尸进程 进程间通信(Inter-Process Communic ...
- Git打补丁常见问题
Git打补丁常见问题 往往觉得得到某个功能的补丁就觉得这个功能我就已经成功拥有了,可是在最后一步的打补丁的工作也是须要相当慎重的,甚至有可能还要比你获取这个补丁花费的时间还要多.看到好多同行遇到这个问 ...
- 【Spring】Spring学习笔记-01-入门级实例
听说当前Spring框架很流行,我也准备好好学学Spring开发,并将学习的过程和大家分享,希望能对志同道合的同学有所帮助. 以下是我学习Spring的第一个样例. 1.Spring开发环境的搭建 我 ...