In [43]:
import pandas as pd
In [44]:
ratings = pd.read_csv("ratings-20151128.csv")
In [45]:
ratings[0:5]
Out[45]:
user politician like dislike
0 162108 4385 1 0
1 162108 5678 0 1
2 431695 53855 0 1
3 431695 53857 1 0
4 431695 53858 0 1
In [46]:
print("total ratings: " + str(ratings['user'].count()))
total ratings: 421318
In [47]:
print("total positive ratings: " + str(ratings[ratings['like']==1]['like'].count()))
total positive ratings: 130590
In [48]:
print("total negative ratings: " + str(ratings[ratings['dislike']==1]['dislike'].count()))
total negative ratings: 290725
In [49]:
double_0 = ratings[ratings['dislike']==0][ratings['like']==0]['like'].count()
double_1 = ratings[ratings['dislike']==1][ratings['like']==1]['like'].count()
print("total error ratings: " + str(double_0 + double_1) )
total error ratings: 3
In [50]:
print("unique users: " + str(len(ratings['user'].unique())))
unique users: 47267
In [51]:
print("rated politicians: " + str(len(ratings['politician'].unique())))
rated politicians: 17397