In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [4]:
movie=pd.read_csv('movies.csv')
movie.head()
Out[4]:
movieId title genres
0 1 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
1 2 Jumanji (1995) Adventure|Children|Fantasy
2 3 Grumpier Old Men (1995) Comedy|Romance
3 4 Waiting to Exhale (1995) Comedy|Drama|Romance
4 5 Father of the Bride Part II (1995) Comedy
In [7]:
df=pd.read_csv('ratings.csv')
df.head()
Out[7]:
userId movieId rating timestamp
0 1 1 4.0 964982703
1 1 3 4.0 964981247
2 1 6 4.0 964982224
3 1 47 5.0 964983815
4 1 50 5.0 964982931

Merging to create a new data frame

In [10]:
df=df.merge(movie,on='movieId')
In [11]:
df.head()
Out[11]:
userId movieId rating timestamp title genres
0 1 1 4.0 964982703 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
1 5 1 4.0 847434962 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
2 7 1 4.5 1106635946 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
3 15 1 2.5 1510577970 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
4 17 1 4.5 1305696483 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
In [13]:
df.drop('timestamp',axis=1,inplace=True)
In [14]:
df.head()
Out[14]:
userId movieId rating title genres
0 1 1 4.0 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
1 5 1 4.0 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
2 7 1 4.5 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
3 15 1 2.5 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
4 17 1 4.5 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
In [15]:
df.shape
Out[15]:
(100836, 5)
In [31]:
rate_count=df.groupby('title')['rating'].count().sort_values(ascending=False)
rate_count
Out[31]:
title
Forrest Gump (1994)                                                               329
Shawshank Redemption, The (1994)                                                  317
Pulp Fiction (1994)                                                               307
Silence of the Lambs, The (1991)                                                  279
Matrix, The (1999)                                                                278
                                                                                 ... 
Late Night Shopping (2001)                                                          1
Late Night with Conan O'Brien: The Best of Triumph the Insult Comic Dog (2004)      1
Late Shift, The (1996)                                                              1
Latter Days (2003)                                                                  1
'71 (2014)                                                                          1
Name: rating, Length: 9719, dtype: int64

Recommending user based on user ratings using correlation

In [24]:
movie_mat=pd.pivot_table(df,values='rating',columns='title',index='userId')
movie_mat.head(10)
Out[24]:
title '71 (2014) 'Hellboy': The Seeds of Creation (2004) 'Round Midnight (1986) 'Salem's Lot (2004) 'Til There Was You (1997) 'Tis the Season for Love (2015) 'burbs, The (1989) 'night Mother (1986) (500) Days of Summer (2009) *batteries not included (1987) ... Zulu (2013) [REC] (2007) [REC]² (2009) [REC]³ 3 Génesis (2012) anohana: The Flower We Saw That Day - The Movie (2013) eXistenZ (1999) xXx (2002) xXx: State of the Union (2005) ¡Three Amigos! (1986) À nous la liberté (Freedom for Us) (1931)
userId
1 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN 4.0 NaN
2 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
5 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
6 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
7 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
8 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
9 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 1.0 NaN NaN NaN
10 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

10 rows × 9719 columns

In [25]:
corr=movie_mat.corr()

Suggesting movie for Matrix, The (1999)

In [48]:
x=corr['Matrix, The (1999)'].sort_values(ascending=False)
x
Out[48]:
title
The Fate of the Furious (2017)                            1.0
Play It Again, Sam (1972)                                 1.0
Seconds (1966)                                            1.0
Garage Days (2002)                                        1.0
American Pop (1981)                                       1.0
                                                         ... 
Zoom (2015)                                               NaN
Zulu (2013)                                               NaN
[REC]³ 3 Génesis (2012)                                   NaN
anohana: The Flower We Saw That Day - The Movie (2013)    NaN
À nous la liberté (Freedom for Us) (1931)                 NaN
Name: Matrix, The (1999), Length: 9719, dtype: float64

We getting correlation 1 because less number of user have rated it

Removing nan and 1

In [49]:
predict_df=pd.DataFrame({'correlation':x,'count':rate_count})
In [50]:
predict_df.head()
Out[50]:
correlation count
'71 (2014) NaN 1
'Hellboy': The Seeds of Creation (2004) NaN 1
'Round Midnight (1986) NaN 2
'Salem's Lot (2004) NaN 1
'Til There Was You (1997) NaN 2
In [51]:
predict_df.dropna(inplace=True)
In [55]:
print(predict_df[predict_df['count']>100].sort_values(by='correlation',ascending=False).head())
                    correlation  count
Matrix, The (1999)     1.000000    278
Die Hard (1988)        0.544466    145
Inception (2010)       0.514767    143
Braveheart (1995)      0.496045    237
Aliens (1986)          0.470865    126

Recommending user using Collaborative Filtering

In [56]:
df.head()
Out[56]:
userId movieId rating title genres
0 1 1 4.0 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
1 5 1 4.0 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
2 7 1 4.5 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
3 15 1 2.5 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
4 17 1 4.5 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
In [108]:
movie_mat=pd.pivot_table(df,values='rating',index='title',columns='userId').fillna(0)
movie_mat.head()
Out[108]:
userId 1 2 3 4 5 6 7 8 9 10 ... 601 602 603 604 605 606 607 608 609 610
title
'71 (2014) 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 4.0
'Hellboy': The Seeds of Creation (2004) 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
'Round Midnight (1986) 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
'Salem's Lot (2004) 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
'Til There Was You (1997) 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

5 rows × 610 columns

In [128]:
movie_mat_new=movie_mat.reset_index()
In [130]:
movie_mat_new.head()
Out[130]:
userId title 1 2 3 4 5 6 7 8 9 ... 601 602 603 604 605 606 607 608 609 610
0 '71 (2014) 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 4.0
1 'Hellboy': The Seeds of Creation (2004) 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 'Round Midnight (1986) 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 'Salem's Lot (2004) 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 'Til There Was You (1997) 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

5 rows × 611 columns

In [60]:
from scipy.sparse import csr_matrix
In [61]:
vec_movie_mat=csr_matrix(movie_mat.values)
In [62]:
from sklearn.neighbors import NearestNeighbors
knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
knn.fit(vec_movie_mat)
Out[62]:
NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

Suggesting movie for Matrix, The (1999)

In [141]:
movie_mat_new[movie_mat_new['title']=='Matrix, The (1999)']
Out[141]:
userId title 1 2 3 4 5 6 7 8 9 ... 601 602 603 604 605 606 607 608 609 610
5512 Matrix, The (1999) 5.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 ... 5.0 0.0 5.0 0.0 0.0 5.0 5.0 5.0 0.0 5.0

1 rows × 611 columns

In [142]:
x=movie_mat.iloc[5512,:].values.reshape(1,-1)
x
Out[142]:
array([[5. , 0. , 0. , 1. , 0. , 0. , 0. , 0. , 0. , 0.5, 0. , 0. , 5. ,
        0. , 4. , 3.5, 5. , 4.5, 4. , 0. , 4. , 0. , 0. , 4. , 5. , 0. ,
        0. , 4. , 0. , 5. , 0. , 0. , 5. , 3. , 0. , 0. , 0. , 0. , 5. ,
        0. , 2. , 5. , 0. , 0. , 5. , 0. , 0. , 5. , 4.5, 2.5, 0. , 5. ,
        0. , 0. , 0. , 0. , 5. , 0. , 1. , 0. , 0. , 5. , 3.5, 4. , 0. ,
        5. , 4.5, 4.5, 5. , 0. , 0. , 5. , 0. , 0. , 1. , 4. , 5. , 4.5,
        4. , 4.5, 0. , 4. , 0. , 0. , 0. , 4.5, 0. , 0. , 0. , 0. , 5. ,
        0. , 0. , 0. , 5. , 4. , 5. , 0. , 0. , 0. , 0. , 0. , 4. , 0. ,
        4. , 5. , 0. , 5. , 0. , 4. , 0.5, 5. , 0. , 0. , 3. , 0. , 0. ,
        0. , 4. , 0. , 0. , 5. , 4. , 0. , 5. , 0. , 0. , 0. , 4.5, 0. ,
        3. , 4.5, 0. , 0. , 5. , 0. , 4. , 0. , 0. , 4. , 3.5, 0. , 0. ,
        4. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 4. , 0.5, 0. , 0. , 3. ,
        0. , 0. , 0. , 4. , 0. , 0. , 0. , 0. , 4. , 4. , 0. , 0. , 4.5,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 3. , 4.5, 0. , 3.5, 0. , 5. ,
        0. , 0. , 0. , 3. , 4. , 0. , 4. , 3. , 0. , 0. , 0. , 0. , 3. ,
        5. , 5. , 5. , 4. , 5. , 0. , 4. , 2.5, 4.5, 0. , 0. , 0. , 5. ,
        5. , 5. , 5. , 3. , 0. , 0. , 0. , 0. , 3. , 0. , 5. , 5. , 2. ,
        4. , 3.5, 0. , 0. , 4. , 4.5, 4.5, 0. , 0. , 5. , 4.5, 4.5, 5. ,
        0. , 0. , 4. , 0. , 5. , 0. , 4. , 0. , 0. , 4. , 0. , 5. , 3.5,
        4. , 5. , 0. , 5. , 0. , 0. , 5. , 0. , 4.5, 0. , 5. , 0. , 0. ,
        4. , 0. , 4. , 4.5, 5. , 5. , 5. , 5. , 0. , 0. , 0. , 4. , 0. ,
        4. , 5. , 0. , 0. , 0. , 4.5, 0. , 0. , 5. , 0. , 0. , 0. , 4.5,
        0. , 3. , 0. , 0. , 0. , 4. , 0. , 0. , 0. , 4.5, 0. , 4. , 0. ,
        4. , 0. , 0. , 5. , 4. , 5. , 0. , 3.5, 3. , 4. , 0. , 0. , 3. ,
        5. , 0. , 0. , 4.5, 4. , 3.5, 5. , 0. , 0. , 4. , 3. , 0. , 0. ,
        3.5, 0. , 2. , 0. , 3.5, 0. , 5. , 0. , 4. , 5. , 5. , 0. , 0. ,
        0. , 0. , 0. , 0. , 5. , 5. , 0. , 0. , 0. , 0. , 0. , 0. , 4. ,
        4.5, 0. , 4. , 0. , 4. , 0. , 0. , 2.5, 0. , 0. , 4.5, 2.5, 0. ,
        0. , 0. , 0. , 4. , 4.5, 4. , 5. , 0. , 0. , 0. , 0. , 3.5, 0. ,
        0. , 0. , 4.5, 5. , 5. , 0. , 0. , 0. , 0. , 4. , 0. , 0. , 4.5,
        5. , 0. , 5. , 0. , 0. , 0. , 0. , 0. , 5. , 5. , 0. , 0. , 0. ,
        0. , 4.5, 0. , 5. , 4.5, 0. , 0. , 0. , 3. , 0. , 5. , 4.5, 1. ,
        5. , 0. , 4. , 4.5, 0. , 0. , 0. , 2. , 3.5, 0. , 0. , 4. , 0. ,
        5. , 0. , 3.5, 0. , 5. , 0. , 0. , 0. , 5. , 5. , 0. , 5. , 0. ,
        3.5, 0. , 5. , 0. , 0. , 2. , 0. , 0. , 0. , 5. , 5. , 0. , 0. ,
        0. , 0. , 0. , 4. , 4.5, 0.5, 0.5, 0. , 5. , 4. , 4. , 0. , 0. ,
        4. , 0. , 3.5, 0. , 0. , 4.5, 4.5, 0. , 4. , 0. , 0. , 5. , 0. ,
        0. , 4.5, 0. , 0. , 0. , 3. , 0. , 4. , 4. , 0. , 0. , 5. , 5. ,
        5. , 0. , 2.5, 0. , 0. , 0. , 0. , 0. , 4.5, 0. , 0. , 0. , 0. ,
        0. , 3.5, 0. , 5. , 0. , 0. , 4. , 0. , 0. , 2.5, 0. , 0. , 5. ,
        0. , 4. , 0. , 0. , 4. , 0. , 0. , 5. , 0. , 0. , 0. , 5. , 5. ,
        4. , 4. , 0. , 4. , 0. , 0. , 4. , 0. , 5. , 5. , 0. , 0. , 0. ,
        0. , 5. , 5. , 4.5, 4. , 4.5, 3.5, 5. , 5. , 0. , 4.5, 0. , 0. ,
        4.5, 4. , 5. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 4.5, 0. , 0. ,
        5. , 0. , 4. , 0. , 0. , 0. , 5. , 5. , 3.5, 5. , 0. , 0. , 0. ,
        5. , 4. , 0. , 0. , 4. , 5. , 0. , 2.5, 5. , 0. , 4. , 0. , 0. ,
        5. , 3. , 5. , 0. , 5. , 0. , 0. , 5. , 5. , 5. , 0. , 5. ]])
In [145]:
distance,indices= knn.kneighbors(x, n_neighbors = 6)
print(distance,indices)
[[3.77475828e-15 2.86062573e-01 2.99065234e-01 3.20385030e-01
  3.36553254e-01 3.39015510e-01]] [[5512 3011 8002 7391 8001 8003]]
In [146]:
for i in range(len(distance.flatten())):
    print(movie_mat.index[indices.flatten()][i],distance.flatten()[i])
 
Matrix, The (1999) 3.774758283725532e-15
Fight Club (1999) 0.2860625732802825
Star Wars: Episode V - The Empire Strikes Back (1980) 0.29906523379388084
Saving Private Ryan (1998) 0.3203850301290394
Star Wars: Episode IV - A New Hope (1977) 0.3365532537559339
Star Wars: Episode VI - Return of the Jedi (1983) 0.3390155103982786
In [ ]:
 

Recommending User based on content based filtering

In [147]:
df=pd.read_csv('tmdb_movies.csv')
In [148]:
df.head()
Out[148]:
budget genres homepage id keywords original_language original_title overview popularity production_companies production_countries release_date revenue runtime spoken_languages status tagline title vote_average vote_count
0 237000000 [{"id": 28, "name": "Action"}, {"id": 12, "nam... http://www.avatarmovie.com/ 19995 [{"id": 1463, "name": "culture clash"}, {"id":... en Avatar In the 22nd century, a paraplegic Marine is di... 150.437577 [{"name": "Ingenious Film Partners", "id": 289... [{"iso_3166_1": "US", "name": "United States o... 2009-12-10 2787965087 162.0 [{"iso_639_1": "en", "name": "English"}, {"iso... Released Enter the World of Pandora. Avatar 7.2 11800
1 300000000 [{"id": 12, "name": "Adventure"}, {"id": 14, "... http://disney.go.com/disneypictures/pirates/ 285 [{"id": 270, "name": "ocean"}, {"id": 726, "na... en Pirates of the Caribbean: At World's End Captain Barbossa, long believed to be dead, ha... 139.082615 [{"name": "Walt Disney Pictures", "id": 2}, {"... [{"iso_3166_1": "US", "name": "United States o... 2007-05-19 961000000 169.0 [{"iso_639_1": "en", "name": "English"}] Released At the end of the world, the adventure begins. Pirates of the Caribbean: At World's End 6.9 4500
2 245000000 [{"id": 28, "name": "Action"}, {"id": 12, "nam... http://www.sonypictures.com/movies/spectre/ 206647 [{"id": 470, "name": "spy"}, {"id": 818, "name... en Spectre A cryptic message from Bond’s past sends him o... 107.376788 [{"name": "Columbia Pictures", "id": 5}, {"nam... [{"iso_3166_1": "GB", "name": "United Kingdom"... 2015-10-26 880674609 148.0 [{"iso_639_1": "fr", "name": "Fran\u00e7ais"},... Released A Plan No One Escapes Spectre 6.3 4466
3 250000000 [{"id": 28, "name": "Action"}, {"id": 80, "nam... http://www.thedarkknightrises.com/ 49026 [{"id": 849, "name": "dc comics"}, {"id": 853,... en The Dark Knight Rises Following the death of District Attorney Harve... 112.312950 [{"name": "Legendary Pictures", "id": 923}, {"... [{"iso_3166_1": "US", "name": "United States o... 2012-07-16 1084939099 165.0 [{"iso_639_1": "en", "name": "English"}] Released The Legend Ends The Dark Knight Rises 7.6 9106
4 260000000 [{"id": 28, "name": "Action"}, {"id": 12, "nam... http://movies.disney.com/john-carter 49529 [{"id": 818, "name": "based on novel"}, {"id":... en John Carter John Carter is a war-weary, former military ca... 43.926995 [{"name": "Walt Disney Pictures", "id": 2}] [{"iso_3166_1": "US", "name": "United States o... 2012-03-07 284139100 132.0 [{"iso_639_1": "en", "name": "English"}] Released Lost in our world, found in another. John Carter 6.1 2124
In [149]:
df.shape
Out[149]:
(4803, 20)
In [151]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4803 entries, 0 to 4802
Data columns (total 20 columns):
budget                  4803 non-null int64
genres                  4803 non-null object
homepage                1712 non-null object
id                      4803 non-null int64
keywords                4803 non-null object
original_language       4803 non-null object
original_title          4803 non-null object
overview                4800 non-null object
popularity              4803 non-null float64
production_companies    4803 non-null object
production_countries    4803 non-null object
release_date            4802 non-null object
revenue                 4803 non-null int64
runtime                 4801 non-null float64
spoken_languages        4803 non-null object
status                  4803 non-null object
tagline                 3959 non-null object
title                   4803 non-null object
vote_average            4803 non-null float64
vote_count              4803 non-null int64
dtypes: float64(3), int64(4), object(13)
memory usage: 750.6+ KB
In [152]:
df.isnull().sum()
Out[152]:
budget                     0
genres                     0
homepage                3091
id                         0
keywords                   0
original_language          0
original_title             0
overview                   3
popularity                 0
production_companies       0
production_countries       0
release_date               1
revenue                    0
runtime                    2
spoken_languages           0
status                     0
tagline                  844
title                      0
vote_average               0
vote_count                 0
dtype: int64
In [154]:
df1=df[['original_title','overview']]
df1.head()
Out[154]:
original_title overview
0 Avatar In the 22nd century, a paraplegic Marine is di...
1 Pirates of the Caribbean: At World's End Captain Barbossa, long believed to be dead, ha...
2 Spectre A cryptic message from Bond’s past sends him o...
3 The Dark Knight Rises Following the death of District Attorney Harve...
4 John Carter John Carter is a war-weary, former military ca...
In [259]:
df1.reset_index(inplace=True)
df1.head()
Out[259]:
index original_title overview
0 0 Avatar In the 22nd century, a paraplegic Marine is di...
1 1 Pirates of the Caribbean: At World's End Captain Barbossa, long believed to be dead, ha...
2 2 Spectre A cryptic message from Bond’s past sends him o...
3 3 The Dark Knight Rises Following the death of District Attorney Harve...
4 4 John Carter John Carter is a war-weary, former military ca...
In [157]:
from sklearn.feature_extraction.text import TfidfVectorizer
In [160]:
tf=TfidfVectorizer(stop_words='english')
In [162]:
tf_mat=tf.fit_transform(df1['overview'].fillna(''))
In [163]:
tf_mat.shape
Out[163]:
(4803, 20978)
In [184]:
from sklearn.metrics.pairwise import cosine_similarity
In [186]:
cosine_sim = cosine_similarity(tf_mat)
In [284]:
df1[df1['original_title']=='The Dark Knight Rises']
Out[284]:
index original_title overview
3 3 The Dark Knight Rises Following the death of District Attorney Harve...

Suggesting movie for The Dark Knight Rises

In [287]:
similar=cosine_sim[3].tolist()
In [288]:
ind=[]
val=[]
for i in range(len(similar)):
    ind.append(i)
    val.append(similar[i])
     
In [289]:
dic={'index':ind,'value':val}
rec_df=pd.DataFrame(dic)
In [290]:
rec=rec_df.merge(df1,on='index')
rec.head()
Out[290]:
index value original_title overview
0 0 0.024995 Avatar In the 22nd century, a paraplegic Marine is di...
1 1 0.000000 Pirates of the Caribbean: At World's End Captain Barbossa, long believed to be dead, ha...
2 2 0.000000 Spectre A cryptic message from Bond’s past sends him o...
3 3 1.000000 The Dark Knight Rises Following the death of District Attorney Harve...
4 4 0.010433 John Carter John Carter is a war-weary, former military ca...
In [297]:
a=rec.sort_values(by='value',ascending=False).head(7)
In [298]:
 print(a['original_title'])
3                         The Dark Knight Rises
65                              The Dark Knight
299                              Batman Forever
428                              Batman Returns
1359                                     Batman
3854    Batman: The Dark Knight Returns, Part 2
119                               Batman Begins
Name: original_title, dtype: object

image