Example #1
0
 _.each(options.ngrams, function(ngram){
   var keywordsForNgram;
   var tf = new Tf();
   if (options.alternativeTokenizer === true) {
     natural.NGrams.setTokenizer(new natural.RegexpTokenizer({pattern: /\b[^\s]+\b/g, gaps: false}));
   }
   else if (options.alternativeTokenizer) {
     natural.NGrams.setTokenizer(new natural.RegexpTokenizer({pattern: new RegExp(options.alternativeTokenizer.source, "g"), gaps: false}));
   }
   var tokenized = _.map(natural.NGrams.ngrams(text, ngram), function(ngram){
     if (options.stem){
       ngram = _.map(ngram, stem);
     }
     return ngram.join(' ').toLowerCase();
   });
   tf.addDocument(tokenized);
   keywordsForNgram = tf.listMostFrequestTerms(0);
   keywordsForNgram = _.select(keywordsForNgram, function(item){
     return usePhrase(item.term, options);
   });
   results = results.concat(keywordsForNgram);
 });
Example #2
0
Trigrams.prototype.tokenize = function (str) {
  var tokens = [];
  var grams = natural.NGrams.trigrams(str);
  grams.forEach(function(set, i) {
    tokens[i] = "";
  	set.forEach(function(el, j) {
  	  tokens[i] += el + ' ';
  	});

  	tokens[i].trim();
  });
  return tokens;
};
Example #3
0
  _.each(_.keys(phrases), function(phrase){
    var ngramToTry, subPhrases;
    ngramToTry = phrase.split(' ').length - 1;

    if (ngramToTry < 1) return;

    _.each(natural.NGrams.ngrams(phrase, ngramToTry), function(ngram){
      var subPhrase = ngram.join(' ');
      if (phrases[subPhrase]){
        if (!cutoff || (phrases[phrase] / phrases[subPhrase]) >= (1 - cutoff)){
          delete combined[subPhrase];
        }
      }
    });
  });