基于用户推荐算法的k-nearest neighbor算法,k-nearestneighbor,ch2study.py#


ch2study.py

#-*- coding:utf-8 -*-'''Created on 2012-9-4@author: jekey'''import codecs,csvfrom math import sqrtclass recommender:    #参数,data 数据,k:邻居数,metric 算法,n最大推荐数量    def __init__(self,data={},k=3,metric='pearson', n=6):        self.k=k        self.n=n        self.metric = metric        if self.metric == 'pearson':            self.fn = self.pearson    #加载数据    def loadData(self,path=''):        self.data={}        users=[]        i = 0        f = codecs.open(path+'Movie_Ratings.csv', 'r', 'utf8')        reader = csv.reader(file(path+'Movie_Ratings.csv'))        for line in reader:            i+=1            if i==1:                line.pop(0)                users=line                for user in line:                    self.data.setdefault(user,{})            else:                book = line[0]                for j in range(1,len(line)):                    k=j-1                    if line[j]<>'':                        rat = int(line[j])                        rating = {book:rat}                        self.data.get(users[k]).update(rating)    #pearson 距离    def pearson(self,rate1,rate2):        sum_xy = 0        sum_x=0        sum_y=0        sum_x2=0        sum_y2=0        n=0          for key in rate1:            if key in rate2:                n+=1                x=rate1[key]                y=rate2[key]                sum_xy += x*y                sum_x +=x                sum_y +=y                sum_x2 +=x*x                sum_y2 +=y*y        #计算距离        if n==0:            return 0        else:            sx=sqrt(sum_x2-(pow(sum_x,2)/n))            sy=sqrt(sum_y2-(pow(sum_y,2)/n))            if sx<>0 and sy<>0:                denominator=(sum_xy-sum_x*sum_y/n)/sx/sy            else:                denominator=0        return denominator         def computeNearestNeighbor(self, username):        """creates a sorted list of users based on their distance to username"""        distances = []        for instance in self.data:            if instance != username:                distance = self.fn(self.data[username], self.data[instance])                distances.append((instance, distance))        # sort based on distance -- closest first        distances.sort(key=lambda artistTuple: artistTuple[1], reverse=True)        return distances     #推荐       def recommend(self, user):        """Give list of recommendations"""        recommendations = {}        nearest = self.computeNearestNeighbor(user)        userRating = self.data[user]        totalDistance =0.0        for i in range(self.k):            totalDistance+=nearest[i][1]        for i in range(self.k):            weight=nearest[i][1]/totalDistance            neighborRatings = self.data[nearest[i][0]]            for rat in neighborRatings:                if not rat in userRating:                    if rat not in recommendations:                        recommendations[rat]=neighborRatings[rat]*weight                    else:                        recommendations[rat] + neighborRatings[rat] * weight        recommendations = list(recommendations.items())[:self.n]#转成list        recommendations.sort(key=lambda artistTuple: artistTuple[1], reverse = True)        return recommendationsif __name__ == '__main__':    r=recommender()    r.loadData('d:\\')    print r.recommend('Heather');

Movie_Ratings.py

,"Patrick C","Heather","Bryan","Patrick T","Thomas","aaron","vanessa","greg","brian","ben","Katherine","Jonathan","Zwe","Erin","Chris","Zak","Matt","Chris","Josh","Amy","Valerie","Gary","Stephen","Jessica","Jeff""Alien",,,2,,5,4,,,4,,,,,,2,,,4,3,,,2,5,,4"Avatar",4,5,5,4,2,,4,3,,3,5,4,4,4,1,5,,,4,3,2,1,4,,4"Blade Runner",5,,,,5,4,,1,5,5,,,,,5,,,3,,3,3,1,,,5"Braveheart",4,,5,,4,4,3,4,4,,3,4,3,4,2,5,,4,,3,4,5,5,,4"Dodgeball",5,4,3,2,4,,4,5,3,4,5,5,3,3,3,3,,3,,4,3,4,3,,3"Forest Gump",4,5,4,3,3,,4,5,5,5,5,5,5,5,3,5,4,5,4,4,5,4,1,4,4"Gladiator",,5,5,,4,4,4,5,5,5,3,4,3,,3,5,,3,4,,3,3,4,,4"Jaws",,5,4,,4,,2,3,3,1,,3,2,3,2,,,5,5,3,2,4,2,3,5"Kazaam",2,,3,5,2,,,,1,,2,2,2,1,1,1,,,,1,1,1,,1,1"Lord of the Rings",4,4,3,2,,5,2,3,5,2,4,4,3,4,3,4,1,4,,,5,3,5,3,3"Napolean Dynamite",3,4,1,1,5,1,4,4,4,4,2,3,3,2,1,1,3,1,,2,5,4,2,2,3"Old School",3,,4,5,5,,3,5,4,4,,5,4,,5,5,,4,,3,3,5,,2,3"Pootie Tang",,,1,1,,,1,,1,1,,,2,,3,,,1,,,,,,,5"Pulp Fiction",,,4,,4,3,,4,5,4,,5,4,,5,5,,5,,4,3,3,3,4,3"Scarface",,4,,,5,4,4,4,,4,,4,,,3,3,,2,,,4,3,,,"Shawshank Redemption",5,,5,,,5,,5,5,5,,,4,,4,,,4,,2,5,5,,,"Snakes on a Plane",4,1,2,,4,,,,3,2,,,1,,3,,1,3,,2,1,2,,2,1"Spiderman",4,3,4,4,5,3,,4,4,4,5,3,4,3,2,3,3,2,4,3,5,2,3,3,4"Star Wars",5,5,4,5,5,3,3,5,5,5,5,5,4,3,4,5,,5,5,,5,5,5,5,5"The Dark Knight",4,5,5,4,4,,,4,4,5,,5,5,,3,5,,4,4,4,5,,4,5,5"The Happening",1,,,,1,,,,2,,,,1,,1,1,,,,,1,,1,,"The Matrix",3,4,3,5,4,4,4,4,5,4,4,5,4,4,4,5,,2,5,5,4,2,5,,5"Toy Story",4,3,3,4,4,5,4,5,5,5,5,5,4,5,3,4,4,4,4,4,5,4,4,5,5"Village",,2,3,1,1,,2,,4,3,,,4,,3,4,,1,4,,2,,2,,2"You Got Mail",1,,1,1,1,3,,,3,,,2,2,,,2,,1,,2,2,1,2,,2

评论关闭