2022-11-06 12:32:03 +00:00
|
|
|
// lib/extramath.js
|
|
|
|
// https://github.com/gnh1201/welsonjs
|
|
|
|
|
2022-11-06 13:03:28 +00:00
|
|
|
// DTM(Document-term Matrix): https://en.wikipedia.org/wiki/Document-term_matrix
|
2022-11-06 12:32:03 +00:00
|
|
|
function DTM() {
|
|
|
|
this.data = [];
|
|
|
|
this.terms = [];
|
|
|
|
|
|
|
|
this.add = function(s) {
|
|
|
|
var w = s.trim().split(/\s+/);
|
|
|
|
for (var i = 0; i < w.length; i++) {
|
|
|
|
if (this.terms.indexOf(w[i]) < 0) this.terms.push(w[i]);
|
|
|
|
}
|
|
|
|
this.data.push(w);
|
|
|
|
};
|
|
|
|
|
2022-11-06 12:50:22 +00:00
|
|
|
this.toArray = function() {
|
2022-11-06 12:32:03 +00:00
|
|
|
var dtm = [];
|
|
|
|
for (var i = 0; i < this.data.length; i++) {
|
|
|
|
var dt = [];
|
|
|
|
for (var k = 0; k < this.terms.length; k++) {
|
|
|
|
dt.push(this.data[i].indexOf(this.terms[k]) < 0 ? 0 : 1);
|
|
|
|
}
|
|
|
|
dtm.push(dt);
|
|
|
|
}
|
|
|
|
return dtm;
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2022-11-06 13:05:05 +00:00
|
|
|
// Cosine similarity: https://en.wikipedia.org/wiki/Cosine_similarity
|
2022-11-06 12:41:10 +00:00
|
|
|
function cos(A, B) {
|
2022-11-06 12:32:03 +00:00
|
|
|
var dotproduct = 0;
|
|
|
|
var mA = 0;
|
|
|
|
var mB = 0;
|
2022-11-06 12:41:10 +00:00
|
|
|
for (i = 0; i < A.length; i++) {
|
2022-11-06 12:32:03 +00:00
|
|
|
dotproduct += (A[i] * B[i]);
|
|
|
|
mA += (A[i] * A[i]);
|
|
|
|
mB += (B[i] * B[i]);
|
|
|
|
}
|
|
|
|
mA = Math.sqrt(mA);
|
|
|
|
mB = Math.sqrt(mB);
|
2022-11-06 12:41:10 +00:00
|
|
|
var similarity = (dotproduct) / ((mA) * (mB))
|
2022-11-06 12:32:03 +00:00
|
|
|
return similarity;
|
|
|
|
}
|
|
|
|
|
2022-11-06 16:36:54 +00:00
|
|
|
// Cartesian product: https://en.wikipedia.org/wiki/Cartesian_product
|
|
|
|
function cartesianProduct(arr) {
|
|
|
|
return arr.reduce(function(a,b){
|
|
|
|
return a.map(function(x){
|
|
|
|
return b.map(function(y){
|
|
|
|
return x.concat([y]);
|
|
|
|
})
|
|
|
|
}).reduce(function(a,b){ return a.concat(b) },[])
|
|
|
|
}, [[]])
|
|
|
|
}
|
|
|
|
|
2022-11-06 12:32:03 +00:00
|
|
|
exports.DTM = DTM;
|
2022-11-06 12:41:10 +00:00
|
|
|
exports.cos = cos;
|
2022-11-06 12:50:22 +00:00
|
|
|
exports.measureSimilarity = function(s1, s2) {
|
|
|
|
var dtm = new DTM();
|
|
|
|
dtm.add(s1);
|
|
|
|
dtm.add(s2);
|
|
|
|
var mat = dtm.toArray();
|
|
|
|
return cos(mat[0], mat[1]);
|
|
|
|
};
|
2022-11-06 16:36:54 +00:00
|
|
|
exports.cartesianProduct = cartesianProduct;
|
2022-11-06 12:32:03 +00:00
|
|
|
|
2022-11-06 16:36:54 +00:00
|
|
|
exports.VERSIONINFO = "ExtraMath module (extramath.js) version 0.0.4";
|
2022-11-06 12:32:03 +00:00
|
|
|
exports.AUTHOR = "catswords@protonmail.com";
|
|
|
|
exports.global = global;
|
|
|
|
exports.require = global.require;
|