基于用户推荐算法的k-nearest neighbor算法,k-nearestneighbor,ch2study.py#
基于用户推荐算法的k-nearest neighbor算法,k-nearestneighbor,ch2study.py#
ch2study.py
#-*- coding:utf-8 -*-'''Created on 2012-9-4@author: jekey'''import codecs,csvfrom math import sqrtclass recommender: #参数,data 数据,k:邻居数,metric 算法,n最大推荐数量 def __init__(self,data={},k=3,metric='pearson', n=6): self.k=k self.n=n self.metric = metric if self.metric == 'pearson': self.fn = self.pearson #加载数据 def loadData(self,path=''): self.data={} users=[] i = 0 f = codecs.open(path+'Movie_Ratings.csv', 'r', 'utf8') reader = csv.reader(file(path+'Movie_Ratings.csv')) for line in reader: i+=1 if i==1: line.pop(0) users=line for user in line: self.data.setdefault(user,{}) else: book = line[0] for j in range(1,len(line)): k=j-1 if line[j]<>'': rat = int(line[j]) rating = {book:rat} self.data.get(users[k]).update(rating) #pearson 距离 def pearson(self,rate1,rate2): sum_xy = 0 sum_x=0 sum_y=0 sum_x2=0 sum_y2=0 n=0 for key in rate1: if key in rate2: n+=1 x=rate1[key] y=rate2[key] sum_xy += x*y sum_x +=x sum_y +=y sum_x2 +=x*x sum_y2 +=y*y #计算距离 if n==0: return 0 else: sx=sqrt(sum_x2-(pow(sum_x,2)/n)) sy=sqrt(sum_y2-(pow(sum_y,2)/n)) if sx<>0 and sy<>0: denominator=(sum_xy-sum_x*sum_y/n)/sx/sy else: denominator=0 return denominator def computeNearestNeighbor(self, username): """creates a sorted list of users based on their distance to username""" distances = [] for instance in self.data: if instance != username: distance = self.fn(self.data[username], self.data[instance]) distances.append((instance, distance)) # sort based on distance -- closest first distances.sort(key=lambda artistTuple: artistTuple[1], reverse=True) return distances #推荐 def recommend(self, user): """Give list of recommendations""" recommendations = {} nearest = self.computeNearestNeighbor(user) userRating = self.data[user] totalDistance =0.0 for i in range(self.k): totalDistance+=nearest[i][1] for i in range(self.k): weight=nearest[i][1]/totalDistance neighborRatings = self.data[nearest[i][0]] for rat in neighborRatings: if not rat in userRating: if rat not in recommendations: recommendations[rat]=neighborRatings[rat]*weight else: recommendations[rat] + neighborRatings[rat] * weight recommendations = list(recommendations.items())[:self.n]#转成list recommendations.sort(key=lambda artistTuple: artistTuple[1], reverse = True) return recommendationsif __name__ == '__main__': r=recommender() r.loadData('d:\\') print r.recommend('Heather');
Movie_Ratings.py
,"Patrick C","Heather","Bryan","Patrick T","Thomas","aaron","vanessa","greg","brian","ben","Katherine","Jonathan","Zwe","Erin","Chris","Zak","Matt","Chris","Josh","Amy","Valerie","Gary","Stephen","Jessica","Jeff""Alien",,,2,,5,4,,,4,,,,,,2,,,4,3,,,2,5,,4"Avatar",4,5,5,4,2,,4,3,,3,5,4,4,4,1,5,,,4,3,2,1,4,,4"Blade Runner",5,,,,5,4,,1,5,5,,,,,5,,,3,,3,3,1,,,5"Braveheart",4,,5,,4,4,3,4,4,,3,4,3,4,2,5,,4,,3,4,5,5,,4"Dodgeball",5,4,3,2,4,,4,5,3,4,5,5,3,3,3,3,,3,,4,3,4,3,,3"Forest Gump",4,5,4,3,3,,4,5,5,5,5,5,5,5,3,5,4,5,4,4,5,4,1,4,4"Gladiator",,5,5,,4,4,4,5,5,5,3,4,3,,3,5,,3,4,,3,3,4,,4"Jaws",,5,4,,4,,2,3,3,1,,3,2,3,2,,,5,5,3,2,4,2,3,5"Kazaam",2,,3,5,2,,,,1,,2,2,2,1,1,1,,,,1,1,1,,1,1"Lord of the Rings",4,4,3,2,,5,2,3,5,2,4,4,3,4,3,4,1,4,,,5,3,5,3,3"Napolean Dynamite",3,4,1,1,5,1,4,4,4,4,2,3,3,2,1,1,3,1,,2,5,4,2,2,3"Old School",3,,4,5,5,,3,5,4,4,,5,4,,5,5,,4,,3,3,5,,2,3"Pootie Tang",,,1,1,,,1,,1,1,,,2,,3,,,1,,,,,,,5"Pulp Fiction",,,4,,4,3,,4,5,4,,5,4,,5,5,,5,,4,3,3,3,4,3"Scarface",,4,,,5,4,4,4,,4,,4,,,3,3,,2,,,4,3,,,"Shawshank Redemption",5,,5,,,5,,5,5,5,,,4,,4,,,4,,2,5,5,,,"Snakes on a Plane",4,1,2,,4,,,,3,2,,,1,,3,,1,3,,2,1,2,,2,1"Spiderman",4,3,4,4,5,3,,4,4,4,5,3,4,3,2,3,3,2,4,3,5,2,3,3,4"Star Wars",5,5,4,5,5,3,3,5,5,5,5,5,4,3,4,5,,5,5,,5,5,5,5,5"The Dark Knight",4,5,5,4,4,,,4,4,5,,5,5,,3,5,,4,4,4,5,,4,5,5"The Happening",1,,,,1,,,,2,,,,1,,1,1,,,,,1,,1,,"The Matrix",3,4,3,5,4,4,4,4,5,4,4,5,4,4,4,5,,2,5,5,4,2,5,,5"Toy Story",4,3,3,4,4,5,4,5,5,5,5,5,4,5,3,4,4,4,4,4,5,4,4,5,5"Village",,2,3,1,1,,2,,4,3,,,4,,3,4,,1,4,,2,,2,,2"You Got Mail",1,,1,1,1,3,,,3,,,2,2,,,2,,1,,2,2,1,2,,2
相关内容
- python中thread的setDaemon、join的用法,pythonsetdaemon,#! /usr
- python将经纬度转换为kml文件,,读取文件,将其中的经纬
- python 获得日期是星期几,python星期几,如下代码:from
- python 获得一个月有多少天,python获得,在python的date
- Python 模板引擎性能对比,python性能对比,对比目标,j
- 用PYTHON写 的linux下more命令脚本,pythonmore,依赖:linux
- python使用PIL给图片加水印,,如下代码使用PIL给图片
- python 判断文件是否是图片以及图片的类型,,在python有
- 递归简单模拟Python的range,递归pythonrange,类似于Python的
- Python通过正则表达式读取tomcat的日志并打印日期,pyt
评论关闭