简介

TFRecord是TensorFlow官方推荐使用的数据格式化存储工具。
它规范了数据的读写方式。
只要生成一次TFRecord，之后的数据读取和加工处理的效率都会得到提高。

将图片转换成TFRecord

本例，将fashion-MNIST数据转换成TFRecord，需要先下载fashion数据集到当前目录下，参考：https://github.com/zalandoresearch/fashion-mnist/tree/master/data/fashion

import numpy as np

import tensorflow as tf

import gzip

import os

fashion_mnist_directory = './data/fashion/'

def load_mnist(path, kind='train'):

    labels_path = os.path.join(path, '%s-labels-idx1-ubyte.gz' % kind)

    images_path = os.path.join(path, '%s-images-idx3-ubyte.gz' % kind)

    with gzip.open(labels_path, 'rb') as lbpath:

        labels = np.frombuffer(lbpath.read(), dtype=np.uint8, offset=8)

    with gzip.open(images_path, 'rb') as imgpath:

        images = np.frombuffer(imgpath.read(), dtype=np.uint8, offset=16).reshape(-1, 784)

    print(labels_path, "shape =", labels.shape)

    print(images_path, "shape =", images.shape)

    return images, labels

def make_example(image, label):

    return tf.train.Example(features=tf.train.Features(feature={

        'image_raw' : tf.train.Feature(bytes_list=tf.train.BytesList(value=[image.tobytes()])),

        'label' :     tf.train.Feature(int64_list=tf.train.Int64List(value=[int(label)     ])) }))

def write_tfrecord(images, labels, filename):

    writer = tf.python_io.TFRecordWriter(filename)

    for image, label, k in zip(images, labels, range(labels.shape[0])):

        exam = make_example(image, label)

        writer.write(exam.SerializeToString())

        if (k%100 == 0):

            print("\rwriting", filename, "%6.2f%% complited." %(100.0*(k+1)/labels.shape[0]), end='')

    print("\rwriting", filename, "%6.2f%% complited." %(100.0))

    writer.close()

def main():

    train_images, train_labels = load_mnist(fashion_mnist_directory, 'train')

    test_images, test_labels   = load_mnist(fashion_mnist_directory, 't10k')

    write_tfrecord(train_images, train_labels, 'fashion_mnist_train.tfrecords')

    write_tfrecord(test_images, test_labels, 'fashion_mnist_test.tfrecords')

if __name__ == '__main__':

    main()

读取TFRecord数据来训练

以下代码读取TFRecord数据用于训练，改代码改编自官方例程：https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/how_tos/reading_data

原始代码运行时报错，已修复。

注意：在这个例子中，_, loss_value = sess.run([train_op, loss])，只执行一次Batch Input，无论[]中是什么，有多少个操作。

import argparse

import os.path

import sys

import time

import tensorflow as tf

from tensorflow.examples.tutorials.mnist import mnist

FLAGS = None

TRAIN_FILE = 'fashion_mnist_train.tfrecords'

VALIDATION_FILE = 'fashion_mnist_test.tfrecords'

def decode(serialized_example):

    features = tf.parse_single_example(serialized_example,

                                       features={'image_raw': tf.FixedLenFeature([], tf.string),

                                                 'label':     tf.FixedLenFeature([], tf.int64)})

    image = tf.decode_raw(features['image_raw'], tf.uint8)

    image.set_shape((mnist.IMAGE_PIXELS))

    label = tf.cast(features['label'], tf.int32)

    return image, label

def augment(image, label):

    """Placeholder for data augmentation."""

    # OPTIONAL: Could reshape into a 28x28 image and apply distortions here.

    return image, label

def normalize(image, label):

    """Convert `image` from [0, 255] -> [-0.5, 0.5] floats."""

    image = tf.cast(image, tf.float32) * (1. / 255) - 0.5

    return image, label

def inputs(train, batch_size, num_epochs):

    """Reads input data"""

    if not num_epochs:

        num_epochs = None

    filename = os.path.join(FLAGS.train_dir, TRAIN_FILE if train else VALIDATION_FILE)

    with tf.name_scope('input'):

        dataset = tf.data.TFRecordDataset(filename)

        dataset = dataset.map(decode)

        dataset = dataset.map(augment)

        dataset = dataset.map(normalize)

        dataset = dataset.shuffle(1000 + 3 * batch_size)

        dataset = dataset.repeat(num_epochs)

        dataset = dataset.batch(batch_size)

        iterator = dataset.make_one_shot_iterator()

    return iterator.get_next()

def run_training():

    with tf.Graph().as_default():

        image_batch, label_batch = inputs(train=True,

                                          batch_size=FLAGS.batch_size,

                                          num_epochs=FLAGS.num_epochs)

        logits = mnist.inference(image_batch, FLAGS.hidden1, FLAGS.hidden2)

        loss = mnist.loss(logits, label_batch)

        train_op = mnist.training(loss, FLAGS.learning_rate)

        init_op = tf.group(tf.global_variables_initializer(),

                           tf.local_variables_initializer())

        with tf.Session() as sess:

            sess.run(init_op)

            try:

                step = 0

                while True:  # Train until OutOfRangeError

                    start_time = time.time()

                    _, loss_value = sess.run([train_op, loss])

                    duration = time.time() - start_time

                    if step % 100 == 0:

                        print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))

                    step += 1

            except tf.errors.OutOfRangeError:

                print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))

def main(_):

    run_training()

if __name__ == '__main__':

    parser = argparse.ArgumentParser()

    parser.add_argument('--learning_rate', type=float, default=0.01, help='Initial learning rate.')

    parser.add_argument('--num_epochs',    type=int,   default=2,    help='Number of epochs to run trainer.')

    parser.add_argument('--hidden1',       type=int,   default=128,  help='Number of units in hidden layer 1.')

    parser.add_argument('--hidden2',       type=int,   default=32,   help='Number of units in hidden layer 2.')

    parser.add_argument('--batch_size',    type=int,   default=100,  help='Batch size.')

    parser.add_argument('--train_dir',     type=str,   default='./', help='Directory with the training data.')

    FLAGS, unparsed = parser.parse_known_args()

    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

参考了：

https://blog.csdn.net/gg_18826075157/article/details/78449104
https://github.com/zalandoresearch/fashion-mnist/blob/master/utils/mnist_reader.py

TFRecord读写简介+Demo 基于Ubuntu18.04+Tensorflow1.12 无WARNING的更多相关文章

kubeadm部署1.17.3[基于Ubuntu18.04]
基于 Ubuntu18.04 使用 kubeadm 部署Kubernetes 1.17.3 高可用集群环境所有节点初始化 # cat <<EOF>> /etc/hosts ...
基于Ubuntu18.04一站式部署(python-mysql-redis-nginx)
基于Ubuntu18.04一站式部署 Python3.6.8的安装 1. 安装依赖 ~$ sudo apt install openssl* zlib* 2. 安装python3.6.8(个人建议从官 ...
ubuntu18.04系统下无外部显示问题解决
记录一下自己作死过程. 由于学习的需要,在windows10下装了ubuntu18.04系统,第一次装这个系统时,也出现了无外部显示,那时候是老师帮忙搞好的,当时没太在意,只是走马关花的看了老师操作了 ...
Kubernetes 基于 ubuntu18.04 手工部署 (k8s)
由于工作的需要, 手工部署一个 Kubernetes 环境(k8s).(以前都是云上搞定,拿来用) 习惯把这种工作记录下来,自己备查也和别人分享网上相关文章很多, 我也参考了很多,这里推荐一个链接 ...
TensorFlow从入门到理解（一）：搭建开发环境【基于Ubuntu18.04】
*注:教程及本文章皆使用Python3+语言,执行.py文件都是用终端(如果使用Python2+和IDE都会和本文描述有点不符) 一.安装,测试,卸载 TensorFlow官网介绍得很全面,很完美了, ...
腾讯云服务器ubuntu18.04部署禅道系统
踩了不少坑,记录一下. 基于ubuntu18.04 一开始按照网上的攻略下载安装包 ZenTaoPMS.9.8.3.zbox_64.tar.gz,通过FileZilla传到linux的/opt下面,解 ...
【Tool】---ubuntu18.04配置oh-my-zsh工具
作为Linux忠实用户,应该没有人不知道bash shell工具了吧,其实除了bash还有许多其他的工具,zsh就是一款很好得选择,基于zsh shell得基础之上,oh-my-zsh工具更是超级利器 ...
ubuntu18.04下搭建深度学习环境anaconda2+ cuda9.0+cudnn7.0.5+tensorflow1.7【原创】【学习笔记】
PC:ubuntu18.04.i5.七彩虹GTX1060显卡.固态硬盘.机械硬盘作者:庄泽彬(欢迎转载,请注明作者) 说明:记录在ubuntu18.04环境下搭建深度学习的环境,之前安装了cuda9 ...
tensorflow/pytorch/mxnet的pip安装，非源代码编译，基于cuda10/cudnn7.4.1/ubuntu18.04.md
os安装目前对tensorflow和cuda支持最好的是ubuntu的18.04 ,16.04这种lts,推荐使用18.04版本.非lts的版本一般不推荐. Windows倒是也能用来装深度GPU环 ...

随机推荐

phpmyadmin error:#2002 - 服务器没有响应 (或者本地 MySQL 服务器的套接字没有正确配置)
1. 将 "phpMyAdmin/libraries"文件夹下的config.default.php文件中的$cfg['Servers'][$i]['host'] = 'local ...
原来select语句在MySQL中是这样执行的！看完又涨见识了！这回我要碾压面试官！
大家好,我是冰河~~ MySQL作为互联网行业使用最多的关系型数据库之一,与其免费.开源的特性是密不可分的.然而,很多小伙伴工作了很多年,只知道使用MySQL进行CRUD操作,这也导致很多小伙伴工作多 ...
Moonraker靶机
仅供个人娱乐靶机搭建与下载 Monraker靶机ip: 192.168.181.135 kali攻击者ip : 192.168.181.128 说明:获取目标主机的root权限并读取目录中的flag ...
php 几个算法
/** * 返回当前运行文件名 * @acces private * @return string */ private function run_filename() { $tmparr = exp ...
移植TensorFlow到Windows平台
2015年11月,Google宣布开源旗下机器学习工具TensorFlow,引发业界热潮.TensorFlow原生支持*unix系和安卓平台,但并不提供对Windows平台的支持.如果想在Window ...
ERROR: database "db" is being accessed by other users
执行DROP DATABASE testdb;的时候提示: ERROR: database "testdb" is being accessed by other users DE ...
Java面向对象01——什么是面向对象
面向过程&面向对象面向过程思想(微观): 步骤清晰简单,第一步做什么,第二部做什么....... 面向过程适合处理一些较为简单的问题面向对象思想(宏观): 物以类聚,分类的思维模式,思考问 ...
Hello World！！
已经工作了一年多,现在才开始写博客.话说,种一棵树最好的时机是十年前,其次是现在,我觉得不迟.俗话说滴水穿石,我想把一些东西,都慢慢积累起来,看见自己的成长.既方便查看,更不容易忘记.可能在网上已经有 ...
10分钟了解微服务、容器和Kubernetes
什么是微服务? 什么是微服务?你应该使用微服务吗?微服务与容器和 Kubernetes 有什么关系?如果这些问题在您的日常生活中不断出现,那么这篇文章适合您. 从根本上说,微服务只是一个运行在服务器或 ...
Python语言系列-07-面向对象2
重构父类__init__方法 #!/usr/bin/env python3 # author:Alnk(李成果) # 需求:Dog类要新增一个实例属性,但是Cat类不需要 class Animal(o ...

TFRecord读写简介+Demo 基于Ubuntu18.04+Tensorflow1.12 无WARNING

简介

将图片转换成TFRecord

读取TFRecord数据来训练

TFRecord读写简介+Demo 基于Ubuntu18.04+Tensorflow1.12 无WARNING的更多相关文章

随机推荐

热门专题