cs231n knn

# coding: utf-8

# In[19]:

import random

import numpy as np

from cs231n.data_utils import load_CIFAR10

import matplotlib.pyplot as plt

from __future__ import print_function

get_ipython().run_line_magic('matplotlib', 'inline')

plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots

plt.rcParams['image.interpolation'] = 'nearest'

plt.rcParams['image.cmap'] = 'gray'

get_ipython().run_line_magic('load_ext', 'autoreload')

get_ipython().run_line_magic('autoreload', '')

# In[20]:

cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'

try:

   del X_train, y_train

   del X_test, y_test

   print('Clear previously loaded data.')

except:

   pass

X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

print(X_train.shape,y_train.shape, X_test.shape, y_test.shape)

# In[21]:

num_training = 5000

mask = list(range(num_training))

X_train = X_train[mask]

y_train = y_train[mask]

num_test = 50 #500 #加快速度，取50测试

mask = list(range(num_test))

X_test = X_test[mask]

y_test = y_test[mask]

print('ok')

# In[22]:

# 三维转一维

print(X_train.shape, X_test.shape) # (5000, 32, 32, 3) (500, 32, 32, 3)

X_train = np.reshape(X_train, (X_train.shape[0], -1))

X_test = np.reshape(X_test, (X_test.shape[0], -1))

print(X_train.shape, X_test.shape) # (5000, 3072) (500, 3072)      32*32*3=3072

# In[109]:

class NearestNeighbor(object):

    def train(self, X, y):

        self.Xtrain = X

        self.ytr = y

########################################################################

# 直接预测

    def predict_l1(self, X): # 曼哈顿距离

        num_test = X.shape[0]

        Ypred = np.zeros(num_test, dtype = self.ytr.dtype)

        for i in range(num_test):

            distances = np.sum(np.abs(self.Xtrain - X[i,:]), axis = 1)  # L1

            min_index = np.argmin(distances) # get the index with smallest distance

            Ypred[i] = self.ytr[min_index] # predict the label of the nearest example

        return Ypred

    def predict_l2(self, X): # 欧氏距离

        num_test = X.shape[0]

        Ypred = np.zeros(num_test, dtype = self.ytr.dtype)

        for i in range(num_test):

            distances = np.sqrt(np.sum(np.square(self.Xtrain - X[i,:]), axis = 1))

            min_index = np.argmin(distances) # get the index with smallest distance

            Ypred[i] = self.ytr[min_index] # predict the label of the nearest example

        return Ypred

########################################################################

# 以下计算欧氏距离l2

    def compute_dist_2_loop(self,Xtest):

        train_len = self.Xtrain.shape[0]

        test_len = Xtest.shape[0]

        dists = np.zeros((test_len,train_len))

        for i in range(test_len):

            for j in range(train_len):

#                 dists[i][j] = np.sum(np.abs(self.Xtrain[j]-Xtest[i])) # l1

                dists[i][j] = np.sqrt( np.sum( np.square(self.Xtrain[j]-Xtest[i]) ) ) # l2

                # dists[i][j]    dists[i,j]

        return dists # 1203599820.3775597

    def compute_dist_1_loop(self,Xtest):

        train_len = self.Xtrain.shape[0] #

        test_len = Xtest.shape[0]        #

        dists = np.zeros( (test_len, train_len) ) # 50*5000

        for i in range(test_len):

            dists[i] = np.sqrt(np.sum(np.square(self.Xtrain-Xtest[i]), axis=1))

            # dists[i,:]   dists[i]

#             dists[i,:] = np.linalg.norm(X[i,:]-self.X_train,axis=1)

            #np.linalg.norm范式

#     https://blog.csdn.net/lanchunhui/article/details/51004387

        return dists # 1203599820.3775597

    def compute_dist_no_loop(self,Xtest):

#         大概可以理解为 sqrt((a-b)^2) => sqrt( a^2 + b^2 - 2ab ) 展开，矩阵注意维度

        a2 = np.sum(self.Xtrain**2, axis=1) # 5000*1  # **2 or np.square

        b2 = np.sum(Xtest**2, axis=1)        # 50*1

        dot_matrix = np.dot(Xtest, self.Xtrain.T) # 50*5000

#         print(dot_matrix.shape)

#         print(a2.shape) # (5000,)

#         print(b2.shape) # (50,)

#         print(b2.T.shape) #对向量直接用.T  向量不变  (50,)

#         此时，a2 b2都是向量，要与点积得到的矩阵50*5000想相a加减，先将reshape，后用到广播机制

#         向量貌似e可以隐式转换为一个1行n列的矩阵，但不可隐式转为n行1列的矩阵

        return np.sqrt(a2 + np.reshape(b2,(-1,1)) - 2*dot_matrix) # reshape -1 自适应

#         https://blog.csdn.net/qq_41671051/article/details/80096269

#         https://blog.csdn.net/hqh131360239/article/details/79061535

    def _compute_distances_no_loops(self, X):

        num_test = X.shape[0]

        num_train = self.Xtrain.shape[0]

        dists = np.zeros((num_test, num_train))

        test_sum = np.sum(np.square(X), axis = 1) # 500*3072 - 500*1 以500,形式表示

        train_sum = np.sum(np.square(self.Xtrain), axis = 1) # 5000*3072 - 5000*1 以5000,形式表示

        dianji = np.dot(X, self.Xtrain.T)  #点积(转置)500*5000

        dists = np.sqrt(-2 * dianji + test_sum.reshape(-1,1) + train_sum) #平方展开，广播

        return dists

# In[ ]:

# 2 loop、1 loop、no loop三种方式，

# 2 loop、1 loop 耗时较长，时间差不多

# no loop 耗时短。并行运算，大大提高了速度。

# In[110]:

classifier = NearestNeighbor()

classifier.train(X_train, y_train)

# ans_l1 = classifier.predict_l1(X_test)

# print(np.mean(ans_l1==y_test)) # 0.22

# ans_l2 = classifier.predict_l2(X_test)

# print(np.mean(ans_l2==y_test)) # 0.18

ans = classifier.compute_dist_no_loop(X_test)

print(np.sum(ans)) # 1203599820.3775597

print('ok')

# In[85]:

#################################

# test area

import numpy as np

a = np.array([ [1,2,3], [4,5,6], [7,8,9] ])

# print(a)

# print(a[1][1],a[1,1])

# print(a[1])

# print(np.array((-2,4,2)))

print(np.square(a))

print(a**2)

cs231n knn的更多相关文章

CS231n 2017 学习笔记01——KNN（K-Nearest Neighbors）
本博客内容来自 Stanford University CS231N 2017 Lecture 2 - Image Classification 课程官网:http://cs231n.stanford ...
CS231n 2016 通关第二章-KNN 作业分析
KNN作业要求: 1.掌握KNN算法原理 2.实现具体K值的KNN算法 3.实现对K值的交叉验证 1.KNN原理见上一小节 2.实现KNN 过程分两步: 1.计算测试集与训练集的距离 2.通过比较la ...
CS231n 2016 通关第二章-KNN
课程内容全纪录: 1.讲解图像分类的难点 1.光照强度 2.主体变形 3.主体与背景咬合 4.主体与背景相接近 5.同类别间存在区别 2.KNN 1.最近邻算法 2.Knn 3.hyperpara ...
cs231n assignment1 KNN
title: cs231n assignment1 KNN tags: - KNN - cs231n categories: - 机器学习 date: 2019年9月16日 17:03:13 利用KN ...
【cs231n作业笔记】一：KNN分类器
安装anaconda,下载assignment作业代码作业代码数据集等2018版基于python3.6 下载提取码4put 本课程内容参考: cs231n官方笔记地址贺完结!CS231n官方笔记授 ...
【cs231n笔记】assignment1之KNN
k-Nearest Neighbor (kNN) 练习这篇博文是对cs231n课程assignment1的第一个问题KNN算法的完成,参考了一些网上的博客,不具有什么创造性,以个人学习笔记为目的发布 ...
CS231n 第一次作业KNN中本地CIFAR10数据集的载入
一.问题描述网上绝大多数作业参考都是在jupyter下运行的,数据集载入过程一般如下: from cs231n.data_utils import load_CIFAR10 #导入数据集,并打印出数 ...
CS231n——图像分类（KNN实现）
图像分类目标:已有固定的分类标签集合,然后对于输入的图像,从分类标签集合中找出一个分类标签,最后把分类标签分配给该输入图像. 图像分类流程输入:输入是包含N个图像的集合,每个图像的标签是K ...
『cs231n』作业1选讲_通过代码理解KNN&交叉验证&SVM
通过K近邻算法探究numpy向量运算提速茴香豆的“茴”字有... ... 使用三种计算图片距离的方式实现K近邻算法: 1.最为基础的双循环 2.利用numpy的broadca机制实现单循环 3.利用 ...

随机推荐

Gym - 100502G Outing (强连通缩点+树形依赖背包)
题目链接问题:有n个人,最多选k个,如果选了某个人就必须选他指定的另一个人,问最多能选多少个人. 将每个人所指定的人向他连一条单向边,则每一个点都有唯一的前驱,形成的图是个基环树森林,在同一个强连通 ...
HAWQ 官方文档创建filespace,tablespace,database,table
1.创建Filespace 创建Filespace必须是数据库超级用户( You must be a database superuser to create a filespace.)首先创建一个f ...
hadoop-pig学习笔记
A1 = LOAD '/luo/lzttxt01.txt' AS (col1:chararray,col2:int,col3:int,col4:int,col5:double,col6:double) ...
ORACLE初始化参数文件概述
ORACLE初始化参数文件概述在9i之前,参数文件只有一种,它是文本格式的,称为pfile,在9i及以后的版本中,新增了服务器参数文件,称为spfile,它是二进制格式的.这两种参数文件都是用来存储 ...
一次调用cloudera的的API和JSON想到的
JSON的null字段在我的项目中修改了YARN的资源池,到cloudera中就报错,查询还不报错,但是修改的时候,就报错,大致意思就是信息有异常,“包含了尖括号. 因为通过API+json交互,我 ...
bzoj 3625(CF 438E)The Child and Binary Tree——多项式开方
题目:https://www.lydsy.com/JudgeOnline/problem.php?id=3625 http://codeforces.com/contest/438/problem/E ...
resize2fs: Bad magic number in super-block while trying to open /dev/centos/root Couldn't find valid filesystem superblock
今天在进行lvm扩容之后,按照惯例进行 resize2fs 操作,发现报如下错误: # resize2fs /dev/centos/root resize2fs 1.42.9 (28-Dec-2013 ...
mysql的备份恢复等操作
备份数据库 shell> mysqldump -h host -u root -p dbname >dbname_backup.sql 恢复数据库 shell> mysqladmin ...
java代码异常捕获throws抛出异常
总结:Throwable 是所以异常的父类.error和Exception是继承它的类 Exception: 这类异常一般是外部错误,例如试图从文件尾后读取数据等,这并不是程序本身的错误,而是在应用环 ...
将chrome浏览器的默认背景颜色修改为浅绿色，以减缓长时间看电脑的眼睛不舒服的问题
修改chrome文件夹中的Custom.css, 此文件里面默认内容是空的. 在其中添加下面这段代码: 你也可以选择自己的喜欢的颜色, 前提是你知道你想要更改的颜色的十六进制颜色值, 例如:#CCEB ...

cs231n knn

cs231n knn的更多相关文章

随机推荐

热门专题