【学习笔记】tensorflow图片读取

图像基本概念
图像基本操作
- 图像基本操作API
图像读取API
狗图片读取
CIFAR-10二进制数据读取
TFRecords
- TFRecords存储
- TFRecords读取方法

图像基本概念

在图像数字化表示当中，分为黑白和彩色两种。在数字化表示图片的时候，有三个因素。分别是图片的长、图片的宽、图片的颜色通道数。那么黑白图片的颜色通道数为1，它只需要一个数字就可以表示一个像素位；而彩色照片就不一样了，它有三个颜色通道，分别为RGB，通过三个数字表示一个像素位。TensorFlow支持JPG、PNG图像格式，RGB、RGBA颜色空间。图像用与图像尺寸相同(height*width*chnanel)张量表示。图像所有像素存在磁盘文件，需要被加载到内存。

图像基本操作

目的：

增加图片数据的统一性
所有图片转换成指定大小
缩小图片数据量，防止增加开销

操作：

缩小图片大小

图像基本操作API

tf.image.resize_images(images, size)：缩小图片

images：4-D形状[batch, height, width, channels]或3-D形状的张量[height, width, channels]的图片数据
size：1-D int32张量：new_height, new_width，图像的新尺寸
返回4-D格式或者3-D格式图片

图像读取API

图像读取器：

tf.WholeFileReader：将文件的全部内容作为值输出的读取器

return：读取器实例
read(file_queue):输出将是一个文件名（key）和该文件的内容（值）

图像解码器：

tf.image.decode_jpeg(contents)：将JPEG编码的图像解码为uint8张量

return:uint8张量，3-D形状[height, width, channels]

tf.image.decode_png(contents)：将PNG编码的图像解码为uint8或uint16张量

return:张量类型，3-D形状[height, width, channels]

狗图片读取

import tensorflow as tf

import os

def readpic(filelist):

    """

    狗图片读取

    """

    # 构建文件队列

    file_queue = tf.train.string_input_producer(filelist)

    # 构造阅读器

    reader = tf.WholeFileReader()

    key, value = reader.read(file_queue)

    print(value)

    # 对读取到的图片进行解码

    image = tf.image.decode_jpeg(value)

    print(image)

    # 处理图片的大小

    img_resize = tf.image.resize_images(image, [250, 250])

    print(img_resize)

    img_resize.set_shape([250, 250, 3])

    # 批处理

    image_batch = tf.train.batch([img_resize], batch_size=10, num_threads=1, capacity=10)

    print(image_batch)

    return image_batch

if __name__ == '__main__':

    filelist = os.listdir("./data/dogs")

    filelist = ["./data/dogs/{}".format(i) for i in filelist]

    image_batch = readpic(filelist)

    with tf.Session() as sess:

        # 线程协调器

        coord = tf.train.Coordinator()

        # 开启读取文件线程

        threads = tf.train.start_queue_runners(sess, coord=coord)

        # 打印数据

        print(sess.run([image_batch]))

        coord.request_stop()

        coord.join()

案例流程：

构造图片文件队列
构造阅读器
读取图片数据
处理图片数据

CIFAR-10二进制数据读取

网站：https://www.cs.toronto.edu/~kriz/cifar.html

import tensorflow as tf

import os

# 定义cifar命令相关参数

tf.app.flags.DEFINE_string("cifar_dir", "./data/cifar-10-batches-bin", "cifar目录")

FLAGS = tf.app.flags.FLAGS

class CifarReader(object):

    """

    读取二进制文件，写入tfrecords，读取tfrecords

    """

    def __init__(self, filelist):

        self.filelist = filelist

        # 定义读取的二进制图片的一些属性

        self.width = 32

        self.height = 32

        self.channel = 3

        self.label_bytes = 1

        self.image_bytes = self.width * self.height * self.channel

        self.bytes = self.label_bytes + self.image_bytes

    def read_and_decode(self):

        # 构造文件队列

        file_queue = tf.train.string_input_producer(self.filelist)

        # 构造二进制文件阅读器

        reader = tf.FixedLengthRecordReader(self.bytes)

        key, value = reader.read(file_queue)

        # 解码

        label_image = tf.decode_raw(value, tf.uint8)

        print(label_image)

        # 分离出图片和标签数据

        label = tf.cast(tf.slice(label_image, [0], [self.label_bytes]), tf.int32)

        image = tf.slice(label_image, [self.label_bytes], [self.image_bytes])

        # 改变图片的形状 [3072] -> [32, 32, 3]

        image_reshape = tf.reshape(image, [self.height, self.width, self.channel])

        print(label, image_reshape)

        # 批处理数据

        label_batch, image_batch = tf.train.batch([label, image_reshape], batch_size=20, num_threads=1, capacity=20)

        print(label_batch, image_batch)

        return label_batch, image_batch

if __name__ == '__main__':

    filelist = os.listdir(FLAGS.cifar_dir)

    filelist = [os.path.join(FLAGS.cifar_dir, i) for i in filelist if i.endswith(".bin")]

    cf = CifarReader(filelist)

    label_batch, image_batch = cf.read_and_decode()

    with tf.Session() as sess:

        # 线程协调器

        coord = tf.train.Coordinator()

        # 开启读取文件线程

        threads = tf.train.start_queue_runners(sess, coord=coord)

        # 打印数据

        print(sess.run([label_batch, image_batch]))

        coord.request_stop()

        coord.join()

TFRecords

TFRecords是Tensorflow设计的一种内置文件格式，是一种二进制文件，它能更好的利用内存，更方便复制和移动。为了将二进制数据和标签(训练的类别标签)数据存储在同一个文件中。

TFRecords存储

建立TFRecord存储器：tf.python_io.TFRecordWriter(path)
- path: TFRecords文件的路径
- return：写文件
- 方法：
  - write(record):向文件中写入一个字符串记录
  - close():关闭文件写入器
- 注：字符串是一个序列化的Example：Example.SerializeToString()
构造每个样本的Example协议块
- tf.train.Example(features=None)
  - 写入tfrecords文件
  - features:tf.train.Features类型的特征实例
  - return：example格式协议块
- tf.train.Features(feature=None)
  - 构建每个样本的信息键值对
  - features:字典数据,key为要保存的名字，value为tf.train.Feature实例
  - return:Features类型
- tf.train.Feature(**options)
  - **options：例如：
    
    bytes_list=tf.train. BytesList(value=[Bytes])
    
    int64_list=tf.train. Int64List(value=[Value])

对于上例中【CIFAR-10二进制数据读取】读取到的数据进行存储：

import tensorflow as tf

import os

# 定义cifar命令相关参数

tf.app.flags.DEFINE_string("cifar_dir", "./data/cifar-10-batches-bin", "cifar目录")

tf.app.flags.DEFINE_string("cifar_tfrecords", "./temp/cifar.tfrecords", "保存的tfrecords文件路径")

FLAGS = tf.app.flags.FLAGS

class CifarReader(object):

    """

    读取二进制文件，写入tfrecords，读取tfrecords

    """

    def __init__(self, filelist):

        self.filelist = filelist

        # 定义读取的二进制图片的一些属性

        self.width = 32

        self.height = 32

        self.channel = 3

        self.label_bytes = 1

        self.image_bytes = self.width * self.height * self.channel

        self.bytes = self.label_bytes + self.image_bytes

    def read_and_decode(self):

        # 构造文件队列

        file_queue = tf.train.string_input_producer(self.filelist)

        # 构造二进制文件阅读器

        reader = tf.FixedLengthRecordReader(self.bytes)

        key, value = reader.read(file_queue)

        # 解码

        label_image = tf.decode_raw(value, tf.uint8)

        print(label_image)

        # 分离出图片和标签数据

        label = tf.cast(tf.slice(label_image, [0], [self.label_bytes]), tf.int32)

        image = tf.slice(label_image, [self.label_bytes], [self.image_bytes])

        # 改变图片的形状 [3072] -> [32, 32, 3]

        image_reshape = tf.reshape(image, [self.height, self.width, self.channel])

        print(label, image_reshape)

        # 批处理数据

        label_batch, image_batch = tf.train.batch([label, image_reshape], batch_size=20, num_threads=1, capacity=20)

        print(label_batch, image_batch)

        return label_batch, image_batch

    def write_to_tfrecords(self, label_batch, image_batch):

        """

        存储图片的目标值和特征值

        :param label_batch: 图片的目标值

        :param image_batch: 图片的特征值

        :return: None

        """

        # 建立tfrecords存储器

        writer = tf.python_io.TFRecordWriter(FLAGS.cifar_tfrecords)

        # 将所有样本写入文件

        for i in range(label_batch.shape[0]):

            label = int(label_batch[i].eval()[0])

            image = image_batch[i].eval().tostring()

            example = tf.train.Example(features=tf.train.Features(feature={

                "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label])),

                "image": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image]))

            }))

            writer.write(example.SerializeToString())

        writer.close()

if __name__ == '__main__':

    filelist = os.listdir(FLAGS.cifar_dir)

    filelist = [os.path.join(FLAGS.cifar_dir, i) for i in filelist if i.endswith(".bin")]

    cf = CifarReader(filelist)

    label_batch, image_batch = cf.read_and_decode()

    with tf.Session() as sess:

        # 线程协调器

        coord = tf.train.Coordinator()

        # 开启读取文件线程

        threads = tf.train.start_queue_runners(sess, coord=coord)

        # 存为tfrecords文件

        cf.write_to_tfrecords(label_batch, image_batch)

        # 打印数据

        print(sess.run([label_batch, image_batch]))

        coord.request_stop()

        coord.join()

TFRecords读取方法

同文件阅读器流程,中间需要解析过程

解析TFRecords的example协议内存块：

tf.parse_single_example(serialized,features=None,name=None)
- 解析一个单一的Example原型
- serialized：标量字符串Tensor，一个序列化的Example
- features：dict字典数据，键为读取的名字，值为FixedLenFeature
- return:一个键值对组成的字典，键为读取的名字
tf.FixedLenFeature(shape,dtype)
- shape：输入数据的形状，一般不指定,为空列表
- dtype：输入数据类型，与存储进文件的类型要一致，类型只能是float32,int64,string

读取上例保存的tfrecords文件：

# 定义cifar命令相关参数

tf.app.flags.DEFINE_string("cifar_dir", "./data/cifar-10-batches-bin", "cifar目录")

tf.app.flags.DEFINE_string("cifar_tfrecords", "./temp/cifar.tfrecords", "保存的tfrecords文件路径")

FLAGS = tf.app.flags.FLAGS

class CifarReader(object):

    """

    读取二进制文件，写入tfrecords，读取tfrecords

    """

    def __init__(self, filelist):

        self.filelist = filelist

        # 定义读取的二进制图片的一些属性

        self.width = 32

        self.height = 32

        self.channel = 3

        self.label_bytes = 1

        self.image_bytes = self.width * self.height * self.channel

        self.bytes = self.label_bytes + self.image_bytes

    def read_from_cfrecords(self):

        """

        读取cfrecords

        :return: None

        """

        # 构建文件队列

        file_queue = tf.train.string_input_producer([FLAGS.cifar_tfrecords])

        # 构建文件阅读器

        reader = tf.TFRecordReader()

        key, value = reader.read(file_queue)

        # 解析example

        features = tf.parse_single_example(value, features={

            "label": tf.FixedLenFeature([], tf.int64),

            "image": tf.FixedLenFeature([], tf.string)

        })

        # 解码

        image = tf.decode_raw(features["image"], tf.uint8)

        image_reshape = tf.reshape(image, [self.height, self.width, self.channel])

        label = tf.cast(features["label"], tf.int32)

        print(label, image_reshape)

        # 批处理

        label_batch, image_batch = tf.train.batch([label, image_reshape], batch_size=20, num_threads=1, capacity=20)

        print(label_batch, image_reshape)

        return label_batch, image_reshape

if __name__ == '__main__':

    label_batch, image_batch = cf.read_from_cfrecords()

    with tf.Session() as sess:

        # 线程协调器

        coord = tf.train.Coordinator()

        # 开启读取文件线程

        threads = tf.train.start_queue_runners(sess, coord=coord)

        # 打印数据

        print(sess.run([label_batch, image_batch]))

        coord.request_stop()

        coord.join()