-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtfidf.py
More file actions
24 lines (18 loc) · 614 Bytes
/
tfidf.py
File metadata and controls
24 lines (18 loc) · 614 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# This code is supporting material for the book
# Building Machine Learning Systems with Python
# by Willi Richert and Luis Pedro Coelho
# published by PACKT Publishing
#
# It is made available under the MIT License
import scipy as sp
def tfidf(t, d, D):
tf = float(d.count(t)) / sum(d.count(w) for w in set(d))
idf = sp.log(float(len(D)) / (len([doc for doc in D if t in doc])))
return tf * idf
a, abb, abc = ["a"], ["a", "b", "b"], ["a", "b", "c"]
D = [a, abb, abc]
print(tfidf("a", a, D))
print(tfidf("b", abb, D))
print(tfidf("a", abc, D))
print(tfidf("b", abc, D))
print(tfidf("c", abc, D))