如题:

官方中的内容支持:

https://www.mindspore.cn/tutorial/training/zh-CN/r1.2/advanced_use/summary_record.html?highlight=sink_mode

使用summary功能时,建议将model.train方法的dataset_sink_mode参数设置为False,从而以step作为collect_freq参数的单位收集数据。当dataset_sink_modeTrue时,将以epoch作为collect_freq的单位,此时建议手动设置collect_freq参数。collect_freq参数默认值为10

从官方文档中我们可以知道:

如果model.train方法的dataset_sink_mode参数设置为False,那么就是以step为单位打印数据。

如果model.train方法的dataset_sink_mode参数设置为True,那么就是以episode为单位打印数据。

这里我们不过多解释,直接上代码:

(代码具体参看:https://www.cnblogs.com/devilmaycry812839668/p/14971668.html

当   dataset_sink_mode=False  时:

model.train(epoch_size, ds_train, callbacks=[ckpoint_cb, LossMonitor(125)], dataset_sink_mode=False)

#!/usr/bin python
# encoding:UTF-8 """" 对输入的超参数进行处理 """
import os
import argparse """ 设置运行的背景context """
from mindspore import context """ 对数据集进行预处理 """
import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.vision.c_transforms as CV
from mindspore.dataset.vision import Inter
from mindspore import dtype as mstype """ 构建神经网络 """
import mindspore.nn as nn
from mindspore.common.initializer import Normal """ 训练时对模型参数的保存 """
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig """ 导入模型训练需要的库 """
from mindspore.nn import Accuracy
from mindspore.train.callback import LossMonitor
from mindspore import Model parser = argparse.ArgumentParser(description='MindSpore LeNet Example')
parser.add_argument('--device_target', type=str, default="GPU", choices=['Ascend', 'GPU', 'CPU']) args = parser.parse_known_args()[0] # 为mindspore设置运行背景context
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) def create_dataset(data_path, batch_size=32, repeat_size=1,
num_parallel_workers=1):
# 定义数据集
mnist_ds = ds.MnistDataset(data_path)
resize_height, resize_width = 32, 32
rescale = 1.0 / 255.0
shift = 0.0
rescale_nml = 1 / 0.3081
shift_nml = -1 * 0.1307 / 0.3081 # 定义所需要操作的map映射
resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)
rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)
rescale_op = CV.Rescale(rescale, shift)
hwc2chw_op = CV.HWC2CHW()
type_cast_op = C.TypeCast(mstype.int32) # 使用map映射函数,将数据操作应用到数据集
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) # 进行shuffle、batch、repeat操作
buffer_size = 10000
mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size)
mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
mnist_ds = mnist_ds.repeat(repeat_size) return mnist_ds class LeNet5(nn.Cell):
"""
Lenet网络结构
""" def __init__(self, num_class=10, num_channel=1):
super(LeNet5, self).__init__()
# 定义所需要的运算
self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid')
self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid')
self.fc1 = nn.Dense(16 * 5 * 5, 120, weight_init=Normal(0.02))
self.fc2 = nn.Dense(120, 84, weight_init=Normal(0.02))
self.fc3 = nn.Dense(84, num_class, weight_init=Normal(0.02))
self.relu = nn.ReLU()
self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
self.flatten = nn.Flatten() def construct(self, x):
# 使用定义好的运算构建前向网络
x = self.conv1(x)
x = self.relu(x)
x = self.max_pool2d(x)
x = self.conv2(x)
x = self.relu(x)
x = self.max_pool2d(x)
x = self.flatten(x)
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
return x # 实例化网络
net = LeNet5() # 定义损失函数
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') # 定义优化器
net_opt = nn.Momentum(net.trainable_params(), learning_rate=0.01, momentum=0.9) # 设置模型保存参数
# 每125steps保存一次模型参数,最多保留15个文件
config_ck = CheckpointConfig(save_checkpoint_steps=125, keep_checkpoint_max=15)
# 应用模型保存参数
ckpoint = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck) def train_net(args, model, epoch_size, data_path, repeat_size, ckpoint_cb, sink_mode):
"""定义训练的方法"""
# 加载训练数据集
ds_train = create_dataset(os.path.join(data_path, "train"), 32, repeat_size)
model.train(epoch_size, ds_train, callbacks=[ckpoint_cb, LossMonitor(125)], dataset_sink_mode=sink_mode) def test_net(network, model, data_path):
"""定义验证的方法"""
ds_eval = create_dataset(os.path.join(data_path, "test"))
acc = model.eval(ds_eval, dataset_sink_mode=False)
print("{}".format(acc)) mnist_path = "./datasets/MNIST_Data"
train_epoch = 1
dataset_size = 1
model = Model(net, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
train_net(args, model, train_epoch, mnist_path, dataset_size, ckpoint, False)
test_net(net, model, mnist_path)

输出结果:

epoch: 1 step: 125, loss is 2.2959
epoch: 1 step: 250, loss is 2.2959309
epoch: 1 step: 375, loss is 2.2982068
epoch: 1 step: 500, loss is 2.2916625
epoch: 1 step: 625, loss is 2.3001077
epoch: 1 step: 750, loss is 1.9395046
epoch: 1 step: 875, loss is 0.728865
epoch: 1 step: 1000, loss is 0.2426785
epoch: 1 step: 1125, loss is 0.45475814
epoch: 1 step: 1250, loss is 0.1676599
epoch: 1 step: 1375, loss is 0.14273866
epoch: 1 step: 1500, loss is 0.030339874
epoch: 1 step: 1625, loss is 0.19792284
epoch: 1 step: 1750, loss is 0.09066871
epoch: 1 step: 1875, loss is 0.12958783
{'Accuracy': 0.9688501602564102}

当   dataset_sink_mode=True  时:

model.train(epoch_size, ds_train, callbacks=[ckpoint_cb, LossMonitor(125)], dataset_sink_mode=True)

#!/usr/bin python
# encoding:UTF-8 """" 对输入的超参数进行处理 """
import os
import argparse """ 设置运行的背景context """
from mindspore import context """ 对数据集进行预处理 """
import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.vision.c_transforms as CV
from mindspore.dataset.vision import Inter
from mindspore import dtype as mstype """ 构建神经网络 """
import mindspore.nn as nn
from mindspore.common.initializer import Normal """ 训练时对模型参数的保存 """
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig """ 导入模型训练需要的库 """
from mindspore.nn import Accuracy
from mindspore.train.callback import LossMonitor
from mindspore import Model parser = argparse.ArgumentParser(description='MindSpore LeNet Example')
parser.add_argument('--device_target', type=str, default="GPU", choices=['Ascend', 'GPU', 'CPU']) args = parser.parse_known_args()[0] # 为mindspore设置运行背景context
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) def create_dataset(data_path, batch_size=32, repeat_size=1,
num_parallel_workers=1):
# 定义数据集
mnist_ds = ds.MnistDataset(data_path)
resize_height, resize_width = 32, 32
rescale = 1.0 / 255.0
shift = 0.0
rescale_nml = 1 / 0.3081
shift_nml = -1 * 0.1307 / 0.3081 # 定义所需要操作的map映射
resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)
rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)
rescale_op = CV.Rescale(rescale, shift)
hwc2chw_op = CV.HWC2CHW()
type_cast_op = C.TypeCast(mstype.int32) # 使用map映射函数,将数据操作应用到数据集
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) # 进行shuffle、batch、repeat操作
buffer_size = 10000
mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size)
mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
mnist_ds = mnist_ds.repeat(repeat_size) return mnist_ds class LeNet5(nn.Cell):
"""
Lenet网络结构
""" def __init__(self, num_class=10, num_channel=1):
super(LeNet5, self).__init__()
# 定义所需要的运算
self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid')
self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid')
self.fc1 = nn.Dense(16 * 5 * 5, 120, weight_init=Normal(0.02))
self.fc2 = nn.Dense(120, 84, weight_init=Normal(0.02))
self.fc3 = nn.Dense(84, num_class, weight_init=Normal(0.02))
self.relu = nn.ReLU()
self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
self.flatten = nn.Flatten() def construct(self, x):
# 使用定义好的运算构建前向网络
x = self.conv1(x)
x = self.relu(x)
x = self.max_pool2d(x)
x = self.conv2(x)
x = self.relu(x)
x = self.max_pool2d(x)
x = self.flatten(x)
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
return x # 实例化网络
net = LeNet5() # 定义损失函数
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') # 定义优化器
net_opt = nn.Momentum(net.trainable_params(), learning_rate=0.01, momentum=0.9) # 设置模型保存参数
# 每125steps保存一次模型参数,最多保留15个文件
config_ck = CheckpointConfig(save_checkpoint_steps=125, keep_checkpoint_max=15)
# 应用模型保存参数
ckpoint = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck) def train_net(args, model, epoch_size, data_path, repeat_size, ckpoint_cb, sink_mode):
"""定义训练的方法"""
# 加载训练数据集
ds_train = create_dataset(os.path.join(data_path, "train"), 32, repeat_size)
model.train(epoch_size, ds_train, callbacks=[ckpoint_cb, LossMonitor(125)], dataset_sink_mode=sink_mode) def test_net(network, model, data_path):
"""定义验证的方法"""
ds_eval = create_dataset(os.path.join(data_path, "test"))
acc = model.eval(ds_eval, dataset_sink_mode=False)
print("{}".format(acc)) mnist_path = "./datasets/MNIST_Data"
train_epoch = 1
dataset_size = 1
model = Model(net, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
train_net(args, model, train_epoch, mnist_path, dataset_size, ckpoint, True)
test_net(net, model, mnist_path)

输出结果:

epoch: 1 step: 1875, loss is 0.04107348
{'Accuracy': 0.9638421474358975}

==================================================================

可以看到在mindspore中进行训练时如果设置 dataset_sink_mode=True

那么无论设置多少step打印一次结果,每个epoch中只会打印一次结果,即一个epoch中最后的那个打印结果

(比如数据集中一个epoch是100个数据,batch_size=10, 一个epoch的数据训练需要10个steps,  如果设置dataset_sink_mode=True那么只会打印第10step的结果,前9次step的结果不打印)。

=====================================================================

经过进一步发现,上面的描述都是在 运行背景设置为:

context.set_context(mode=context.GRAPH_MODE, device_target='GPU')

下才成立的。

如果设置为:

context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU')

那么,无论 dataset_sink_mode 设置为False 还是True ,  都是执行 以step为单位打印数据。

代码如下:

#!/usr/bin python
# encoding:UTF-8 """" 对输入的超参数进行处理 """
import os
import argparse """ 设置运行的背景context """
from mindspore import context """ 对数据集进行预处理 """
import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.vision.c_transforms as CV
from mindspore.dataset.vision import Inter
from mindspore import dtype as mstype """ 构建神经网络 """
import mindspore.nn as nn
from mindspore.common.initializer import Normal """ 训练时对模型参数的保存 """
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig """ 导入模型训练需要的库 """
from mindspore.nn import Accuracy
from mindspore.train.callback import LossMonitor
from mindspore import Model parser = argparse.ArgumentParser(description='MindSpore LeNet Example')
parser.add_argument('--device_target', type=str, default="GPU", choices=['Ascend', 'GPU', 'CPU']) args = parser.parse_known_args()[0] # 为mindspore设置运行背景context
context.set_context(mode=context.PYNATIVE_MODE, device_target=args.device_target) def create_dataset(data_path, batch_size=32, repeat_size=1,
num_parallel_workers=1):
# 定义数据集
mnist_ds = ds.MnistDataset(data_path)
resize_height, resize_width = 32, 32
rescale = 1.0 / 255.0
shift = 0.0
rescale_nml = 1 / 0.3081
shift_nml = -1 * 0.1307 / 0.3081 # 定义所需要操作的map映射
resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)
rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)
rescale_op = CV.Rescale(rescale, shift)
hwc2chw_op = CV.HWC2CHW()
type_cast_op = C.TypeCast(mstype.int32) # 使用map映射函数,将数据操作应用到数据集
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) # 进行shuffle、batch、repeat操作
buffer_size = 10000
mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size)
mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
mnist_ds = mnist_ds.repeat(repeat_size) return mnist_ds class LeNet5(nn.Cell):
"""
Lenet网络结构
""" def __init__(self, num_class=10, num_channel=1):
super(LeNet5, self).__init__()
# 定义所需要的运算
self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid')
self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid')
self.fc1 = nn.Dense(16 * 5 * 5, 120, weight_init=Normal(0.02))
self.fc2 = nn.Dense(120, 84, weight_init=Normal(0.02))
self.fc3 = nn.Dense(84, num_class, weight_init=Normal(0.02))
self.relu = nn.ReLU()
self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
self.flatten = nn.Flatten() def construct(self, x):
# 使用定义好的运算构建前向网络
x = self.conv1(x)
x = self.relu(x)
x = self.max_pool2d(x)
x = self.conv2(x)
x = self.relu(x)
x = self.max_pool2d(x)
x = self.flatten(x)
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.fc3(x)
return x # 实例化网络
net = LeNet5() # 定义损失函数
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') # 定义优化器
net_opt = nn.Momentum(net.trainable_params(), learning_rate=0.01, momentum=0.9) # 设置模型保存参数
# 每125steps保存一次模型参数,最多保留15个文件
config_ck = CheckpointConfig(save_checkpoint_steps=125, keep_checkpoint_max=15)
# 应用模型保存参数
ckpoint = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck) def train_net(args, model, epoch_size, data_path, repeat_size, ckpoint_cb, sink_mode):
"""定义训练的方法"""
# 加载训练数据集
ds_train = create_dataset(os.path.join(data_path, "train"), 32, repeat_size)
model.train(epoch_size, ds_train, callbacks=[ckpoint_cb, LossMonitor(125)], dataset_sink_mode=sink_mode) def test_net(network, model, data_path):
"""定义验证的方法"""
ds_eval = create_dataset(os.path.join(data_path, "test"))
acc = model.eval(ds_eval, dataset_sink_mode=False)
print("{}".format(acc)) mnist_path = "./datasets/MNIST_Data"
train_epoch = 1
dataset_size = 1
model = Model(net, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
train_net(args, model, train_epoch, mnist_path, dataset_size, ckpoint, True)
test_net(net, model, mnist_path)

不得不说,对于新框架 MindSpore 来说,还是坑蛮多的,稍有不注意就会出意料以外的结果。

model.train方法的dataset_sink_mode参数设置为False时以step作为单位打印数据——(只在mode=context.GRAPH_MODE下成立,在mode=context.PYNATIVE_MODE模式下不成立)的更多相关文章

  1. 解决部分在Debug模式下程序没问题但是Release模式下出现问题的方法

    编译策略介绍 关于优化级别:GCC_OPTIMIZATION_LEVEL 描述如下 None: Do not optimize.  [-O0]With this setting, the compil ...

  2. uploadify3.2.1的参数设置

    $('#file_upload').uploadify({ auto:false, //接受true 或 false两个值,当为true时选择文件后会自动上传:为false时只会把选择的文件增加进队列 ...

  3. ArcEngine中IFeatureClass.Search(filter, Recycling)方法中Recycling参数的理解

    转自 ArcEngine中IFeatureClass.Search(filter, Recycling)方法中Recycling参数的理解   ArcGIS Engine中总调用IFeatureCla ...

  4. jqGrid的autoencode参数设置为true在客户端可能引发的编码问题

    不久前使用jqGrid+MVC做过一段时间开发. 一开始,分页参数几乎都是默认值,jqGrid的分页功能很好用. 考虑到each input is evil,我们的系统对安全性又有较高要求,所以,为了 ...

  5. SSRS报表参数设置

    一.日期时间类型的参数注意事项: 关于数据类型的选择:(只有数据类型设置为日期/时间格式,在查询的时候才会显示日期控件,提示信息一般改成汉字) 指定默认值:指定开始日期为前10天,

  6. Lattice 的 Framebuffer IP核使用调试笔记之IP核生成与参数设置

    本文由远航路上ing 原创,转载请标明出处. 这节笔记记录IP核的生成以及参数设置. 先再IP库里下载安装Framebuffer 的ipcore 并安装完毕. 一.IP核的生成: 1.先点击IP核则右 ...

  7. mysql参数设置与查看

    查看默认的参数设置(包含读取到的参数文件中的设置) mysqld --verbose --help   查看默认的参数设置(不包含参数文件中的设置) mysqld --no-defaults --ve ...

  8. 【记录一个问题】opencl enqueueWriteBuffer()中,cl_bool blocking参数设置无效

    err = queue.enqueueWriteBuffer(in_buf, true, 0, bmp_size, bmp_data, NULL, &event); 以上代码中,第二个参数设置 ...

  9. Jdbc Url 设置allowMultiQueries为true和false时底层处理机制研究

    一个mysql jdbc待解之谜 关于jdbc  url参数 allowMultiQueries 如下的一个普通JDBC示例: String user ="root"; Strin ...

  10. cxf client在后台不通且chunk设置为false的时候不能在控制台输出请求日志

    场景: 服务编排框架支持编排webservice服务.call webservice的client是基于cxf做的.为了使用服务编排的开发者调试与定位问题方便,需要将webservice的请求与响应报 ...

随机推荐

  1. 铭瑄主板重启后USB3.0失效键盘鼠标无反应需要重新插拔

    铭瑄主板重启后USB3.0失效键盘鼠标无反应需要重新插拔 环境: 铭瑄B760 主板,使用鼠标键盘使用USB 3.0 HUB 连接到主板 USB 3.0 口. 重启后,键盘鼠标无反应,需要重新插拔. ...

  2. 3代 I3 3220 对比12代 G6900 测试 入手 上手 评测

    3代 I3 3220 对比12代 G6900 测试 入手 上手 评测 I3 3220 的CPU-Z ,和cinebench r23 跑分. G6900 的CPU-Z ,和cinebench r23 跑 ...

  3. C#.NET 读取PFX私钥证书并导出PEM格式私钥

    项目nuget引用 BouncyCastle. 读取证书 X509Certificate2 x509 = new X509Certificate2(lblPfxPath.Text, txtPfxPwd ...

  4. CloseableHttpClient 连接超时导致XxlJob调度阻塞,影响调度任务的执行

    CloseableHttpClient 连接超时导致XxlJob调度阻塞,影响调度任务的执行 问题原因1.分析日志发现,xxlJob后台界面没有执行时间和执行结果,在某一个时间点之后,某一个任务因为阻 ...

  5. python生成随机四位数和AttributeError: module 'random' has no attribute 'sample'

    python生成随机四位数和AttributeError: module 'random' has no attribute 'sample' ## AttributeError: module 'r ...

  6. Markdown常用语法详解

    背景知识 什么是html html是一种网页标记语言.我们平常见到的那么好看的网页就是通过html语言来编写的. html语言举例: <h1>hello world</h1> ...

  7. Linux设备模型:3、Uevent

    作者:wowo 发布于:2014-3-10 20:39 分类:统一设备模型 http://www.wowotech.net/device_model/uevent.html Uevent Uevent ...

  8. STM32 CubeMX 学习:有关说明

    背景 STM32 是我以前学过的,而很久没有整理过的.因为之前学习的时间比较早,再加上各种资料要么不成熟,要么不齐全:再加上自己一开始没有比较完善的学习经验:以至于我的学习并不扎实. 趁着 STM 的 ...

  9. Spring的三种依赖注入的方式

    1.什么是依赖注入 依赖注入(Dependency Injection,简称DI),是IOC的一种别称,用来减少对象间的依赖关系. 提起依赖注入,就少不了IOC. IOC(Inversion of C ...

  10. LaTeX 编辑协作平台 Overleaf 安装和使用教程

    在学术界和科技行业,LaTeX 已成为撰写高质量文档的标准工具.然而,传统的 LaTeX 使用体验常常伴随着以下挑战: 学习曲线陡峭 环境配置复杂 多人协作困难 实时预览不便 当然,市面上不乏很多在线 ...