参考资料:
https://baike.baidu.com/item/%E7%9F%A9%E9%98%B5/18069?fr=aladdin
http://blog.csdn.net/c406495762/article/details/75172850
基础概念:
#0取行数,1取得列数
import numpy as np
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize, 1)) - dataSet
tile第二个参数为元组,元组第一个参数代表复制的个数,纵(list的个数), 第二个参数为横向(单个list中的元素复制的份数)
例如:
>>> a=[0,1,2] >>> b=tile(a,9)>>> barray([0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2])>>> >>> b=tile(a,(9,2))>>> barray([[0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2]])>>> b=tile(a,(9,1))>>> barray([[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]])>>> b=tile(a,(9,12))>>> barray([[0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]])
矩阵减法
>>> aarray([[101, 20], [101, 20], [101, 20], [101, 20]])>>> barray([[ 1, 101], [ 5, 89], [108, 5], [115, 8]])>>> a-barray([[100, -81], [ 96, -69], [ -7, 15], [-14, 12]])
#二维特征相减后平方
sqDiffMat = diffMat**2>>> carray([[100, -81], [ 96, -69], [ -7, 15], [-14, 12]])>>> c**2array([[10000, 6561], [ 9216, 4761], [ 49, 225], [ 196, 144]])
#sum()所有元素相加,sum(0)列相加,sum(1)行相加
sqDistances = sqDiffMat.sum(axis=1)>>> d=c**2>>> d.sum(axis=1)array([16561, 13977, 274, 340]) >>> d.sum(axis=0)array([19461, 11691])
#开方,计算出距离
distances = sqDistances**0.5>>> f=e**0.5>>> farray([ 128.68954892, 118.22436297, 16.55294536, 18.43908891])
sortedDistIndices = distances.argsort()
#定一个记录类别次数的字典 ,按大小排序,打印index>>> farray([ 128.68954892, 118.22436297, 16.55294536, 18.43908891])>>> g=f.argsort()>>> garray([2, 3, 1, 0])
取出前k个元素的类别
>>> for i in range(3):... print(i) ... labels[sortedDistIndices[i]]... 0'动作片'1'动作片'2'爱情片'
>>> for i in range(3):... voteIlabel = labels[sortedDistIndices[i]]... classCount.get(voteIlabel,0) ... 000>>> for i in range(3):... voteIlabel = labels[sortedDistIndices[i]]... classCount.get(voteIlabel,0) + 1... 111
#key=operator.itemgetter(1)根据字典的值进行排序 #key=operator.itemgetter(0)根据字典的键进行排序 #reverse降序排序字典 sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True) print('#######',sortedClassCount)
classCount~~~~~~ {}classCount~~~~~~ { '动作片': 2, '爱情片': 1}####### [('动作片', 2), ('爱情片', 1)]动作片
第一段代码总算跑通了,虽然到现在为止还不理解什么事人工智能,哈哈,