-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
48 lines (39 loc) · 1.67 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#Importing the Libraries
import numpy as np
import pandas as pd
from os.path import join
import os
import DropboxAPI
#Fetching DataSet from DropBox and Unzipping the File
url ='https://www.dropbox.com/sh/euppz607r6gsen2/AAAQCu8KjT7Ii1R60W2-Bm1Ua/MovieLens%20(Movie%20Ratings)?dl=1'
zipFileName = 'MovieLens (Movie Ratings).zip'
subzipFileName ='movielens100k/ml-100k'
userDataSet = 'u.data'
userTestDataSet = 'u1.test'
destPath = os.getcwd()
DropboxAPI.fetchData(url, zipFileName, destPath)
filePath = join(destPath, zipFileName.rsplit(".", 1)[0])
filePath = join(filePath,subzipFileName.rsplit(".", 1)[0])
#fullFilePath = join(filePath,userDataSet)
#Importing the Dataset
names = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv(join(filePath,userDataSet), sep='\t', names=names)
df_test = pd.read_csv(join(filePath,userTestDataSet), sep='\t', names=names)
#Calculating Number of Unique Users and Unique Movies
n_users = df.user_id.unique().shape[0]
n_items = df.item_id.unique().shape[0]
#Creating a Ratings Matrix with size (n_users X n_items)
ratings = np.zeros((n_users, n_items))
for col in df.itertuples():
ratings[col.user_id-1, col.item_id-1] = col.rating
ratings_test = np.zeros((n_users, n_items))
for col in df_test.itertuples():
ratings_test[col.user_id-1, col.item_id-1] = col.rating
#Calculating minumum number of movies rated by each user
nonzero_counts = np.count_nonzero(ratings, axis=1)
print ('Number of minumum movies rated by each user : ', min(nonzero_counts))
#Calculating sparsity of ratings matrix
sparsity = float(len(ratings.nonzero()[0]))
sparsity /= (ratings.shape[0] * ratings.shape[1])
sparsity *= 100
print('Sparsity of ratings matrix : ', sparsity)