Exemplo n.º 1
0
		Message.search(searchOptions, function(error, result){
			if (error) {
				log.error('|homepage.getHomepage| Error getting classification -> ' + keyword, widget);
				utility.errorResponseJSON(res, 'Error getting classification');
			} else {

				// Start with the mood as neutral (at 50)
				var moodValue = 50;
				if(result.total == 0) {
					return res.send(JSON.stringify({ result: moodValue }));
				} else {
					
					// Add each message's stemmed & stop-word-filtered content to the training set.
					var filter = new StopwordsFilter();
					var classifier = new natural.BayesClassifier();
					natural.PorterStemmer.attach();
					for(var i=0; i<result.messages.length; i++) {
						var filteredContent = filter.filter(result.messages[i].content, 'string');
						var stemmedContent = filteredContent.tokenizeAndStem();
						classifier.addDocument(stemmedContent, String(result.messages[i].mood));
					}

					classifier.train();

					// Next, classify the given keyword
					var stemmedKeyword = natural.PorterStemmer.stem(keyword);
					var keywordClassifications = classifier.getClassifications(stemmedKeyword);

					// Get the sum of all classification scores for the keyword (so we can convert to a percentage)
					var scoreSum= 0;
					for(var i=0; i<keywordClassifications.length; i++) {
						scoreSum += (keywordClassifications[i].value * 100);
					}

					// Calculate the percentage match for each mood out of 100
					var moodPercentages = { '1': 0, '2': 0, '3': 0, '4': 0, '5': 0, };
					for(var i=0; i<keywordClassifications.length; i++) {
						var adjustedValue = ((keywordClassifications[i].value * 100) * 100) / scoreSum;
						moodPercentages[keywordClassifications[i].label] = adjustedValue;
					}

					// Apply the adjusted values to the starting point of 50, to get the point on the scale.
					moodValue -= moodPercentages['1']/2, 10;
					moodValue -= moodPercentages['2']/4, 10;
					moodValue += moodPercentages['4']/4, 10;
					moodValue += moodPercentages['5']/2, 10;

					// If the score is zero, change to 5. Just so that red shows a little
					if(moodValue <= 0) {
						moodValue = 5;
					}

					return res.send(JSON.stringify({ result: moodValue }));
				}
			}
		});
Exemplo n.º 2
0
        tokens.forEach(t => {
            // Check the static Emoji mappings blacklist
            if (emojiMappings.blacklist.indexOf(t.toLowerCase()) !== -1) {
                return;
            }

            // Check the static Emoji mappings whitelist
            var whitelist = emojiMappings.whitelist[t.toLowerCase()];

            if (whitelist) {
                args = args.replace(new RegExp(`\\b${t}\\b(?!:)`, 'g'), whitelist);
                return;
            }

            var stem = natural.PorterStemmer.stem(t.toLowerCase());

            // Check to see if the word directly matches an Emoji shortname
            for (var key in emoji) {
                var stemmedShortname = natural.PorterStemmer.stem(key.toLowerCase());

                if (stemmedShortname === stem) {
                    args = args.replace(new RegExp(`\\b${t}\\b(?!:)`, 'g'), emoji[key].shortname);
                    return;
                }
            }

            // Check to see if the word matches an Emoji alias
            for (key in emoji) {
                var _emoji = emoji[key];

                if (_emoji.category === 'flags') {
                    continue;
                }

                if (_emoji.stemmedAliases.indexOf(stem) !== -1) {
                    args = args.replace(new RegExp(`\\b${t}\\b(?!:)`, 'g'), emoji[key].shortname);
                    return;
                }
            }

            // Check to see if the word matches an Emoji keyword
            for (key in emoji) {
                _emoji = emoji[key];

                if (_emoji.stemmedKeywords.indexOf(stem) !== -1) {
                    key.split('_').forEach(shortNameToken => {
                        shortNameToken = natural.PorterStemmer.stem(shortNameToken.toLowerCase());

                        if (shortNameToken === stem) {
                            args = args.replace(new RegExp(`\\b${t}\\b(?!:)`, 'g'), emoji[key].shortname);
                            return;
                        }
                    });
                }
            }
        });
Exemplo n.º 3
0
  function storeCourses(dbSchool, dbTerm, objDep, dbDep){
    emitter.emit('increaseCount')
    var courseCount = 0
      , courseEmitter = new EventEmitter()
    for ( var c in objDep.courses ){
      if ( !objDep.courses.hasOwnProperty(c) ){
        console.log('continuing', c);
        continue;
      }
      courseCount++
      objCourse = objDep.courses[c]
      var search = {
          name: objCourse.title
        , department: dbDep['_id']
        , number: objCourse.number
      }
      , course = new structs.Course(search)
      , upsertData = course.toObject();
      delete(upsertData._id);
      delete(upsertData.terms);

      upsertData.school = dbSchool['_id']
      upsertData.departmentAbbr = dbDep.abbr

      if( typeof objCourse['description'] !== undefined ){
        upsertData.description = objCourse['description']
        upsertData._tokens = natural.PorterStemmer.tokenizeAndStem([course.department.abbr,course.number,course.name,course.description].join(' ').trim())
      }else{
        upsertData._tokens = natural.PorterStemmer.tokenizeAndStem([course.department.abbr,course.number,course.name].join(' ').trim())
      }

      structs.Course.update(search, {$set:upsertData,$addToSet:{terms:dbTerm._id}}, {upsert: true}, (function(dbTerm, objCourse, search){
        return function(err, numAffected){
          if ( err ){ throw err }
          structs.Course.findOne(search, function(err, dbCourse){
            if ( err ){ throw err }
            storeSections(dbSchool, dbTerm, objCourse, dbCourse, dbDep);
            courseEmitter.emit('decreaseCount'); })
        }})(dbTerm, objCourse, search)
      )
    }
    if ( courseCount === 0 ){
        emitter.emit('decreaseCount')
    }
    courseEmitter.on('decreaseCount', function(){
      if ( --courseCount === 0 ){
        emitter.emit('decreaseCount')
      }
    })
  }
Exemplo n.º 4
0
 _stemText(text) {
   if (this.customStemmer) {
     return this.customStemmer(text)
   } else {
     return natural.PorterStemmer.tokenizeAndStem(text)
   }
 }
Exemplo n.º 5
0
function get_action(query){
	natural.PorterStemmer.attach();
	tokens = query.tokenizeAndStem();
	action = classifier.classify(tokens);
	console.log("=----------- action is: " + action);
	return action;
}
Exemplo n.º 6
0
exports.stem = function (words) {
    var ret = [];
    for (var i = 0, len = words.length; i < len; ++i) {
        ret.push(natural.PorterStemmer.stem(words[i]));
    }
    return ret;
};
Exemplo n.º 7
0
TextIndexer.prototype.search = function(patterns, callback) {
  if (Array.isArray(patterns)) {
    patterns = patterns.join(' ');
  }
  patterns = stemmer.tokenizeAndStem(patterns);
  KeyIndexer.prototype.search.call(this, patterns, callback);
}
Exemplo n.º 8
0
function FeatureSelector() {
	natural.PorterStemmer.attach();
	this.index = 0;
	this.textLen = 0;
	this.docCount = 0;
	this.classes = [];
	this.postings = [];
}
Exemplo n.º 9
0
                    key.split('_').forEach(shortNameToken => {
                        shortNameToken = natural.PorterStemmer.stem(shortNameToken.toLowerCase());

                        if (shortNameToken === stem) {
                            args = args.replace(new RegExp(`\\b${t}\\b(?!:)`, 'g'), emoji[key].shortname);
                            return;
                        }
                    });
Exemplo n.º 10
0
exports.create = function(req, res) {
  natural.PorterStemmer.attach();
  req.body.tags = req.body.tags.toLowerCase().tokenizeAndStem().sort()
  
  Functions.create(req.body, function(err, c2) {
    if(err) { return handleError(res, err); }
    return res.json(201, c2);
  });
};
Exemplo n.º 11
0
var _getWords = function(doc, field) {
  if (doc.hasOwnProperty(field)) {
    if (typeof doc[field] === 'string') {
      return stemmer.tokenizeAndStem(doc[field]);
    } else {
      return [doc[field]];
    }
  }
  return [];
}
Exemplo n.º 12
0
 var stem = function(word){
   // only bother stemming if the word will be used
   if (!usePhrase(word, options)) return word;
   var stem = natural.PorterStemmer.stem(word);
   // Store the shortest word that matches this stem for later destemming
   if (!unstemmed.hasOwnProperty(stem) || word.length < unstemmed[stem].length){
     unstemmed[stem] = word;
   }
   return stem;
 };
Exemplo n.º 13
0
 Array.from(fileList).forEach(filePath => {
   let fileContent = fs.readFileSync(filePath, {encoding: 'utf-8'});
   let fileContentTokenized = natural.PorterStemmer.tokenizeAndStem(fileContent);
   contentList.push({
     content: fileContent,
     tokens: [...new Set(fileContentTokenized)],
     type: 'file',
     filename: path.basename(filePath)
   });
 });
Exemplo n.º 14
0
Arquivo: mood.js Projeto: diesire/mood
    stemmer: function(text, language) {
        var stemmer;
        if (language === undefined) {
            language = 'es';
        }

        switch (language) {
            case 'es':
                //TODO: BUG: tools.stemmer_es uses PorterStemmer
                //natural.PorterStemmerEs.attach();
                natural.PorterStemmer.attach();
                break;
            case 'en':
                natural.PorterStemmer.attach();
                break;
            default:
                natural.PorterStemmer.attach();
        }
        return text.tokenizeAndStem();
    },
Exemplo n.º 15
0
    tagSchema.statics.tokenize = function (tagString) {
        var results = [],
            words = tokenizer.tokenize(tagString);

        for (var w in words) {
            var word = words[w].toLowerCase();

            if (useless[word] === undefined) {
                results.push(natural.PorterStemmer.stem(word));
            }
        }

        return results;
    };
Exemplo n.º 16
0
var organizeWords = function(text){
	//text is a string
	//return a dictionary/object
	var words = {};
	var tokenizer = new natural.WordTokenizer();
	natural.PorterStemmer.attach();

	var imptWords = text.toLowerCase().tokenizeAndStem();
	var textArr = tokenizer.tokenize(text);

	//add real words into words object with count of 0
	for(var j=0; j< imptWords.length; j++){
		if(eng_words.indexOf(imptWords[j])>-1)
			words[imptWords[j]] = 0;
	}

	for(var i=0; i< textArr.length; i++){
		var root = natural.PorterStemmer.stem(textArr[i]);
		if(Object.keys(words).indexOf(root) > -1)
			words[root] += 1;
	}
	return words;
};
Exemplo n.º 17
0
process.on('message', function(m) {
        //console.log('child process: counting "' + m + '"');
	var total = 0, unique = 0;
	var hash = {};
	var ary = tokenizer.tokenize(m);
	for (var id in ary) { // throw stemmed word into hash
	    // a stemmer removes endings like pluralization and 'ing'
	    hash[natural.PorterStemmer.stem(ary[id])] = true;
	    total ++;
	}

	for (var key in hash) // count unique word stems
	    unique ++;

        process.send({ message: m, total: total, unique: unique });
    });
Exemplo n.º 18
0
var Psychonaut = function (options) {
  var natural = require('natural');
  var BloomFilter = require('bloomfilter').BloomFilter;

  natural.PorterStemmer.attach();

  var bloom = new BloomFilter(
    32 * 256, // number of bits to allocate.
    16        // number of hash functions.
  );

  if (!options || (!options.content && typeof options.content !== 'object')) {
    throw new Error('You need content to match keywords from');
  }

  var getSuggested = function (word) {
    return options.content[word] || false;
  };

  var add = function (stemmed) {
    stemmed.forEach(function (word) {
      if (options.content[word]) {
        bloom.add(word);
      }
    });
  };

  this.tag = function (text) {
    var stemmed = text.tokenizeAndStem();

    add(stemmed);

    return stemmed;
  };

  this.check = function (text) {
    var stemmed = this.tag(text);

    for (var i = 0; i < stemmed.length; i ++) {
      if (bloom.test(stemmed[i])) {
        return getSuggested(stemmed[i]);
      }
    }

    return false;
  };
};
Exemplo n.º 19
0
 .then(function(translation) {
     if (translation.translations.length > 0) {
         return translation;
     } else {
         return translator.translate('en', lang, Natural.PorterStemmer.stem(translation.phrase))
             .then(function(translation) {
                 if (translation.translations.length > 0) {
                     return translation;
                 } else {
                     if (!transcription && translation.transcription) {
                         transcription = translation.transcription;
                     }
                     return yaTranslate(transcription);
                 }
             });
     }
 })
Exemplo n.º 20
0
// Split a string into tokens
// http://en.wikipedia.org/wiki/Tokenization_(lexical_analysis)
function tokenize(str) {
	var reserved = ['constructor']
	if (chomp(str) == ''){ return [] }
	// First, replace all punctuation with spaces
	// Then, split on spaces
	natural.PorterStemmer.attach();
	var words = str.tokenizeAndStem()
	//var words = str.replace(/[\.,-\/#!$%\^&\*;:{}=\-_`~()\'\"\[\]]/g," ").match(/\S+/g)

	if (words == null) { return []}
	// Avoid reserved words
	words.forEach(function(word,i){
		if (reserved.indexOf(word) >= 0) {
			words[i] = 'word_' + word
		}
	})
	return words
}
Exemplo n.º 21
0
    wordnet.lookup(word, function(results) {

      // get the word's stem
      stem = natural.PorterStemmer.stem(word);

      // create the data structure
      d = {
        word: stem,
        synonyms: [],
        speach: null,
        means: null
      }

      pos = [];

      if (results.length) {

        // loop through each result
        _.each(results, function(result) {
          d.synonyms = _.union(d.synonyms, result.synonyms);
          pos.push(result.pos);

        });

        // get the most frequent value
        d.speach = _.chain(pos).countBy().pairs().max(_.last).head().value() || null;
      } else {

        // otherwise, try and look it up
        possib = _.find(worddata.words, function(w) {
          return _.contains(w.aliases, stem);
        });

        // substitute the correct stuff
        if (possib) {
          d.speach = possib.speach;
          // d.action = possib.action || null;
          actions.push(possib.action);
        }
      }

      // callback
      callback(null, d);
    });
Exemplo n.º 22
0
Arquivo: app.js Projeto: ecto/lunr
app.get('/search/:query', function (req, res) {
  var q = natural.PorterStemmer.tokenizeAndStem(req.params.query);
  db.stemIndex.find({ '_id': { '$in': q } }, function (err, rawMatches) {
    var rawResults = {};
    var results = [];

    // Create an object of type stem => [ docs ]
    rawMatches.forEach(function (rawMatch) {
      if (!rawResults[rawMatch._id]) {
        rawResults[rawMatch._id] = rawMatch.e;
      }
    });

    // Aggregate all doc stem counts
    Object.keys(rawResults).forEach(function (stem) {
      rawResults[stem].forEach(function (doc) {
        var found = false;
        for (var i = 0; i < results.length; i++) {
          if (results[i]._id == doc._id) {
            found = i;
            break;
          }
        }
        if (typeof found != 'boolean') {
          results[found].count += doc.count;
        } else {
          results.push(doc);
        }
      });
    });

    // Sort docs by count
    results.sort(function (a, b) {
      if (a.count < b.count) return 1;
      else if (a.count > b.count) return -1;
      return 0;
    });

    res.send(results);
  });
});
Exemplo n.º 23
0
// make sure everything in the array is 3+ <8 chars, anything longer compress it
function defl8(arr)
{
  var ret = [];
  for(var i=0;i < arr.length; i++)
  {
    var str = arr[i].toLowerCase();
    str = natural.PorterStemmer.stem(str);
    if(str.length <= 2) continue;
    if(stop[str]) continue;
    if(str.length <= 8 && str.split(/\D+/).length > 1) { ret.push(str); continue } // smaller w/ some alpha in them
    // rest hash'd
    var x = mmh.murmur32HexSync(str);
    var ss = ""
    // use only the first 8 nibbles of the hash and make as unique a key as possible translating into the unpopular character ranges
    for(var j=0; j < 8; j++)
    {
      ss += unfreq.substr(parseInt(x.substr(j,1),16),1);
    }
    ret.push(ss);
  }
  return ret;
}
Exemplo n.º 24
0
Article.find({}, '', function(error, data){
	if(error) console.log("Error in query "+error);
	//log.info("Query "+query);
	console.log("Num articles "+data.length);
	natural.PorterStemmer.attach();
	for(var i=0;i<data.length;i++){
		//console.log(data[i].description);
		$ = cheerio.load("<body>"+data[i].description+"</body>", { normalizeWhitespace: true, xmlMode: true});
		//console.log($('img').attr('src'));
		//console.log($('img').get());
		//console.log($('body').text());
		//console.log($('body').text().tokenizeAndStem());
		tfidf.addDocument($('body').text());
	}
	for(var i=0;i<5;i++){
		console.log("article num "+i);
		$ = cheerio.load("<body>"+data[i].description+"</body>", { normalizeWhitespace: true, xmlMode: true});
		console.log($('body').text());
		tfidf.listTerms(i /*document index*/).forEach(function(item) {
		    console.log(item.term + ': ' + item.tfidf);
		});
	}
});
Exemplo n.º 25
0
 function storeDepartments(objSchool, dbSchool, dbTerm){
   //console.log('Inserting Departments')
   //console.log('Incresgin from department')
   emitter.emit('increaseCount')
   var depEmitter = new EventEmitter()
     , depCount = 0
   for ( var d in objSchool.departments ){
     depCount++
     if ( !objSchool.departments.hasOwnProperty(d) ){
       continue
     }
     objDep = objSchool.departments[d]
     //console.log('   inserting department', objDep.abbr)
     var search = {
         name: objDep.text
       , abbr: objDep.abbr
       , school: dbSchool['_id']
     }
     , department = new structs.Department(search)
     , upsertData = department.toObject();
     upsertData._tokens = natural.PorterStemmer.tokenizeAndStem(upsertData.abbr + " " + upsertData.name);
     delete(upsertData._id);
     structs.Department.update(search, upsertData, {upsert: true}, (function(dbSchool, dbTerm, objDep, search){
       return function(err, numAffected){
         structs.Department.findOne(search, function(err, dbDep){
           storeCourses(dbSchool, dbTerm, objDep, dbDep);
           depEmitter.emit('decreaseCount')
         })
       }})(dbSchool, dbTerm, objDep, search)
     )
   }
   depEmitter.on('decreaseCount', function(){
     if ( --depCount === 0 ){
       emitter.emit('decreaseCount')
     }
   })
 }
var fs = require('fs');  // for reading text files

// Sample data for these examples (coerced to strings):

var economy = ' ' + fs.readFileSync('data/texts/economy.txt');
var politics = ' ' + fs.readFileSync('data/texts/politics.txt');
var sports = ' ' + fs.readFileSync('data/texts/sports.txt');

var natural = require('natural'),
  tokenizer = new natural.WordTokenizer();

console.log("\n-- Tokenized sample text in politics.txt:");
console.log(tokenizer.tokenize(politics));

console.log("\n-- Use Porter Stemmer on a single word:");
console.log(natural.PorterStemmer.stem("dogs"));

natural.PorterStemmer.attach();  // add methods to string

console.log("\n-- Use Porter Stemmer text in file sports.txt:");
console.log(sports.tokenizeAndStem());

console.log("dog dogs Dog dogged".tokenizeAndStem());

var classifier = new natural.BayesClassifier();

classifier.addDocument(economy, 'economy');
classifier.addDocument(politics, 'politics');
classifier.addDocument(sports, 'sports');
classifier.train();
Exemplo n.º 27
0
Arquivo: app.js Projeto: ecto/lunr
app.put('/', function (req, res) {
  if (!req.body || !req.body._id || !req.body.title || !req.body.content) {
    res.send('You must supply all required document variables', []);
  } else {
    // TODO sanitize content and title
    // construct document index
    var di = {
      _id: req.body._id,
      t: req.body.title,
      c: req.body.content,
      ts: natural.PorterStemmer.tokenizeAndStem(req.body.title),
      cs: natural.PorterStemmer.tokenizeAndStem(req.body.content)
    }
    db.documentIndex.save(di);

    // Aggregate stem counts
    // Concatenate all stems, loop and increment counts
    var c = {};
    di.cs.concat(di.ts).map(function (s, i, a) {
      if (c[s]) c[s]++;
      else c[s] = 1;
    });

    // Update stem indexes
    Object.keys(c).map(function (s) {
      // Construct stem index
      var si = {
        _id: di._id,
        count: c[s]
      }

      // Search for existing stem index
      db.stemIndex.find({ _id: s }, function (err, stem) {
        // Does the stem index exist?
        if (stem[0]) {
          // Is the document included in the stem index?
          var found = false;
          for (var i = 0; i < stem[0].e.length; i++) {
            if (stem[0].e[i]._id == di._id) {
              found = true;
              break;
            }
          }

          if (found) { // This will only happen on document update
            // Document found in stem index, update it
            db.stemIndex.update({ '_id': s, 'e._id': di._id }, { '$set': { 'e.$.count': c[s] } });
          } else {
            // Document not found in stem index, push it
            db.stemIndex.update({ '_id': s }, { '$push': { 'e': si } });
          }
        } else {
          // Stem index not found, create it
          db.stemIndex.save({ '_id': s, 'e': [ si ] });
        }
      });
    });
    
    // Respond with constructed document index
    res.send(di);
  }
});
Exemplo n.º 28
0
var chats = require('./chats').list;

var natural = require('natural'),
    TfIdf = natural.TfIdf,
    seeds = new TfIdf();
natural.PorterStemmer.attach();

for(var i=0; i<chats.length; i++){
	seeds.addDocument(chats[i][0].tokenizeAndStem());
}


var lastThree = ["","",""];
var possibleChats = [];
var count = 0;
var repliesGiven = [];

function resetChat(){
	lastThree = ["","",""];
	possibleChats = [];
	count = 0;
	repliesGiven = [];
}

function analyse(query){

	query = query.text;
	
	if(query.toLowerCase() == "thanks"){
		resetChat();
		return "No problem";
Exemplo n.º 29
0
String.prototype.stem = function() {
  return stemmer.stem(String(this));
}
Exemplo n.º 30
0
 this.stemWord = function(word) {
     return Natural.PorterStemmer.stem(word);
 };