function [Y w] = tfidf2( X , param)
switch param
case 'Boolean'
% FUNCTION applies TF-IDF weighting to word count vector matrix.
%
% [Y w] = tfidf2( X );
%
% INPUT :
% X - word count vectors (one column = one document)
%
% OUTPUT :
% Y - TF-IDF weighted document-term matrix
% w - IDF weights (useful to process other documents)
%
% get inverse document frequencies
w = idf( X );
% TF * IDF
Y = tf1( X ) .* repmat( w, 1, size(X,2) );
case 'Logarithmic'
% get inverse document frequencies
w = idf( X );
% TF * IDF
Y = tf2( X ) .* repmat( w, 1, size(X,2) );
end
end
function Y = tf1( X )
% SUBFUNCTION computes word frequencies(Boolean)
Y = X ./ repmat( sum(X,1), size(X,1), 1 );
Y( isnan(Y) ) = 0;
end
function Y = tf2( X )
% SUBFUNCTION computes word frequencies(Logarithmic)
% Y = log(1+X) ;
Y = X ./ repmat( sum(X,1), size(X,1), 1 );
Y( isnan(Y) ) = 0;
end
function I = idf(X)
% SUBFUNCTION computes inverse document frequencies
% % count the number of words in each document
% counto the number of documents the term is repestead in
nz = sum( ( X > 0 ), 2 );
% compute idf for each document
I = log( size(X,2) ./ (nz(:) + 1) );
end