app.get("/api/1/messages/natural", function (req, res) {
	  console.log("natural language processing with"+req.query.q);
	  var natural = require('natural');
	  var sentiment=require('sentiment');
	  var nltk={};
	  tokenizer = new natural.WordTokenizer();
	  nltk.token=tokenizer.tokenize(req.query.q);
	  natural.LancasterStemmer.attach();
	  nltk.stem=req.query.q.tokenizeAndStem();
	  var resp=sentiment(req.query.q);
	  var sentimentValue="NEUTRAL";
	  if(resp.score > 0){
		  sentimentValue="POSITIVE";
	  }else if(resp.score < 0){
		  sentimentValue="NEGATIVE";
	  }
	  nltk.sentiment=sentimentValue;
	  console.log(nltk)
	  res.send(nltk);
	});
Example #2
0
    politicianDB.view('analysis', 'tweets_month_date', params, function (err, body) {

        var list = [];
        var wordsCount = {};
        if (!err) {
            console.log("trendyTopic")
            console.log(req.query.keyword);
            var stopWords = ['i', 'and', 'you', ',', '.', '?', 'will', 'just', 'http', 'co', 'it', 'the', 'a', 'so', 'today',
                '1', '2', '3', '4','25','10','12', '6', 'in', 'at', 'rt', 'on'];
            natural.LancasterStemmer.attach();
            var hashtagRegexp = /#([a-zA-Z0-9]+)/g;
            body.rows.forEach(function (doc) {
                console.log(doc);
                var months = doc.key[1];
                var temp = doc.value.match(hashtagRegexp);
                console.log(temp);

                //var temp = tokenizer.tokenize(doc.value);
                _.each(temp, function (word) {
                    
                    if (word.toLowerCase().indexOf(keyword.toLowerCase()) > -1) {
                        if (months in wordsCount){
                            wordsCount[months] += 1;
                        } else {
                            wordsCount[months] = 1;
                        }
                    }
                });
            });
            var tuples =[];
            for (var key in wordsCount) tuples.push([key, wordsCount[key]]);

            list = tuples
            res.send(200, list);
        } else {
            console.log(err);
            res.send(200, []);
        }
    });
Example #3
0
  request(url, function (error, response, body) {
    if (!error && response.statusCode == 200) {
      var text = stripHTML(body);
      // var keywords = getKeywords(body);
      natural.LancasterStemmer.attach();
      var tokens = text.tokenizeAndStem();
      var freq = {}, importantWord = '', importantFreq = 0;

      for (var i = 0; i < tokens.length; i++) {
        if (/[\d]+/.test(tokens[i]) || tokens[i]==='undefined' || tokens[i]==='nbsp' || tokens[i].length > 40) break;
        freq[tokens[i]] = freq[tokens[i]] || 0;
        freq[tokens[i]]++;
      };

      for (var i = 0; i < tokens.length; i++) {
        if (freq[tokens[i]] > importantFreq){
          importantWord = tokens[i];
          importantFreq = freq[tokens[i]];
        }
      };
      res.writeHead(200, headers);
      res.end(importantWord);
    }
  })
Example #4
0
var search = require('./search.js');
var COURSE_REGEX = /[a-z]{4}[0-9]{3}/gi;
var natural = require('natural')
var jsonfile = require('jsonfile')
var fs = require('fs');
var WORDS = require('../WORDS.js').WORDS;
natural.LancasterStemmer.attach();
var NGrams = natural.NGrams;
var path = require('path');

var COURSE_REGEX = /[a-z]{4}[0-9]{3}/gi;


// useful functions

Array.prototype.unique = function(){
	// from underscore.js
   var u = {}, a = [];
   for(var i = 0, l = this.length; i < l; ++i){
      if(u.hasOwnProperty(this[i])) {
         continue;
      }
      a.push(this[i]);
      u[this[i]] = 1;
   }
   return a;
}
Array.prototype.contains = function(element){
    return this.indexOf(element) > -1;
};
Example #5
0
    politicianRelationshipDB.view('analysis', 'tweets_by_name', params, function (err, body) {

        var wordsMap = {};
        if (!err) {

//            var stopWords = ['i', 'and', 'you', ',', '.', '?', 'will', 'just', 'http', 'co', 'it', 'the', 'a', 'so', 'today',
//                '1', '2', '3', '4','25','10','12', '6', 'in', 'at', 'rt', 'on'];
            natural.LancasterStemmer.attach();
            var mentionRegexp = /@([a-zA-Z0-9]+)/g;
            body.rows.forEach(function (doc) {
                var temp = doc.value.match(mentionRegexp);
                //var temp = tokenizer.tokenize(doc.value);
                _.each(temp, function (word) {

                    if (word.toLowerCase() in wordsMap) {
                        wordsMap[word.toLowerCase()] += 1;
                    } else {
                        wordsMap[word.toLowerCase()] = 1;
                    }


                });

            });

            console.log('wordsMap=' + wordsMap);
            var n = 0, m = 0;
            var list0 = [];
            var list = [];

            var tuples = [];

            for (var key in wordsMap) tuples.push([key, wordsMap[key]]);

            tuples.sort(function(a, b) {
                a = a[1];
                b = b[1];

                return a < b ? 1 : (a > b ? -1 : 0);
            });

            for (var i = 0; i < tuples.length; i++) {
                var key = tuples[i][0];
                var value = tuples[i][1];
                list.push(key);
//                if(value > 1) {
//                   // list.push({name: key, size: value});
//                    list.push(key);
//                    n++;
//                }

//                if (n % 2 == 0) {
//                    m++;
//                    list0.push({name: 'group' + m, children: list});
//                    console.log(list);
//                    list = [];
//                }
                if (i == 500) {
                    break;
                }
            }
//            _.each(sortedWordsMap, function (count, word) {
//
//            });

            console.log(list);
            res.send(200, list);
        } else {
            console.log(err);
            res.send(200, []);
        }
    });
Example #6
0
 function normalize(text) {
   natural.LancasterStemmer.attach();
   // maybe need to remove urls too
   return text.tokenizeAndStem();
 }
Example #7
0
    politicianRelationshipDB.view('analysis', 'tweets_by_name', params, function (err, body) {

        var wordsMap = {};
        if (!err) {
            //console.log('body.rows=' + body.rows);
            var stopWords = ['i', 'and', 'you', ',', '.', '?', 'will', 'just', 'http', 'co', 'it', 'the', 'a', 'so', 'today',
            '1', '2', '3', '4','25','10','12', '6', 'in', 'at', 'rt', 'on','https','he','she','no','not','is','are','am','yes','how','when','why','what',
            'via','think','let','day','wil','if','go','ask','tell','off','amp','back','good','say','speak','read','that','which','want','from','ok','about',
            'please','need','we','now'];
            natural.LancasterStemmer.attach();
            body.rows.forEach(function (doc) {
                var temp = doc.value.tokenizeAndStem();
                //var temp = tokenizer.tokenize(doc.value);
                _.each(temp, function (word) {
                    if (!_.contains(stopWords, word.toLowerCase())) {
                        if (word.toLowerCase() in wordsMap) {
                            wordsMap[word.toLowerCase()] += 1;
                        } else {
                            wordsMap[word.toLowerCase()] = 1;
                        }
                    }

                });

            });

            //console.log('wordsMap=' + wordsMap);
            var n = 0, m = 0;
            var list0 = [];
            var list = [];

            var tuples = [];

            for (var key in wordsMap) tuples.push([key, wordsMap[key]]);

            tuples.sort(function(a, b) {
                a = a[1];
                b = b[1];

                return a < b ? 1 : (a > b ? -1 : 0);
            });

            for (var i = 0; i < tuples.length; i++) {
                var key = tuples[i][0];
                var value = tuples[i][1];
                if(value > 1) {
                    list.push({name: key, size: value});
                    n++;
                }

                if (n % 10 == 0) {
                    m++;
                    list0.push({name: 'group' + m, children: list});
                    console.log(list);
                    list = [];
                }
                if (i == 200) {
                    break;
                }
            }
//            _.each(sortedWordsMap, function (count, word) {
//
//            });

            console.log(list0);
            if(list0.length == 0) {
                res.send(200, []);
            } else {
                res.send(200, {name: 'flare', children: list0});
            }
        } else {
            console.log(err);
            res.send(200, []);
        }
    });
Example #8
0
    politicianRelationshipDB.view('analysis', 'tweets_by_name', params, function (err, body) {

        var wordsMap = {};
        if (!err) {
            //console.log('body.rows=' + body.rows);
            var stopWords = ['i', 'and', 'you', ',', '.', '?', 'will', 'just', 'http', 'co', 'it', 'the', 'a', 'so', 'today',
            '1', '2', '3', '4','5','6','7','8','9','0', 'in', 'at', 'rt', 'on','amp'];
            natural.LancasterStemmer.attach();
            // var hashtagRegexp = /^[a-zA-Z0-9]([a-zA-Z0-9]+)/g;
            body.rows.forEach(function (doc) {
                // var temp = doc.value.match(hashtagRegexp);
                var temp = doc.value.tokenizeAndStem();
                //var temp = tokenizer.tokenize(doc.value);
                _.each(temp, function (word) {
                    if (!_.contains(stopWords, word.toLowerCase())) {
                        if (word.toLowerCase() in wordsMap) {
                            wordsMap[word.toLowerCase()] += 1;
                        } else {
                            wordsMap[word.toLowerCase()] = 1;
                        }
                    }

                });

            });
            var tuples =[];
            var list =[];
            for (var key in wordsMap) tuples.push([key, wordsMap[key]]);


            tuples.sort(function (a, b) {
                console.log(a[1]);
                console.log(b[1]);
                return (b[1] - a[1]);
            });
            // _.each(tuples, function (i,item) {

            //     console.log(item);

            //     if (i > 50){return false}
            // });
            var n = 0;
            for (var i = 0; i < tuples.length; i++) {
                var key = tuples[i][0];
                var value = tuples[i][1];

                if(value > 1) {
                    console.log(key);
                    console.log(value);
                    n++;
                    list.push([key]);
                   
                }
                if (n>5){break;}
            }

            res.send(200, list);
        } else {
            console.log(err);
            res.send(200, []);
        }
    });
Example #9
0
    politicianDB.view('analysis', 'geolocation',  function (err, body) {
        var list = [];
        var list0= [];
        var name_max = '';
        var number_max = 0;
        var name ='';
        var found = 0;
        
        if (!err) {
            console.log("keyword=" + keyword);
            var stopWords = ['i', 'and', 'you', ',', '.', '?', 'will', 'just', 'http', 'co', 'it', 'the', 'a', 'so', 'today',
                '1', '2', '3', '4','25','10','12', '6', 'in', 'at', 'rt', 'on','amp'];
            natural.LancasterStemmer.attach();
            var hashtagRegexp = /([a-zA-Z0-9]+)/g;
            var wordCount ={};
            body.rows.forEach(function (doc) {
                found = 0;   
                var temp = doc.value.match(hashtagRegexp);
                name = doc.key[1];
                console.log("name=" + name);
                //var temp = tokenizer.tokenize(doc.value);
                // if (keyword.toLowerCase() in temp){
                _.each(temp, function (word) {
                    if (word.toLowerCase().indexOf(keyword.toLowerCase()) > -1) {

                        console.log("word=" + word);
                        found = 1;
                        
                        if (name in wordCount) {

                            wordCount[name] += 1;
                        } else {
                            wordCount[name] = 1;
                        }
                    }
                        
                });
                if (found == 1){
                    console.log("doc found=" + doc.value);
                    list0.push({lat:doc.key[0][0], lng:doc.key[0][1], name:doc.key[1], content:doc.value});
                }

                

                // _.each(temp, function (word) {
                    
                //     if (word.toLowerCase().indexOf(keyword.toLowerCase()) > -1) {
                //         if (months in wordsCount){
                //             wordsCount[months] += 1;
                //         } else {
                //             wordsCount[months] = 1;
                //         }
                //     }
                // });
            });
            
            // wordCount.sort(function (a, b) {
            //     return (b.value - a.value);
            // });
            // for (var key in wordCount){
            //     if ( wordCount[key] > number_max){
            //         name_max = key;
            //         number_max = wordCount[key];        
            //     }
            // }
            // console.log("name_max=" + name_max);

            // // body.rows.forEach(function (doc) {
            // _.each(list0, function(item){
            //     console.log('item=' + item.lat + ' ' + item.lng + ' '+ item.name + ' '+ item.content);
            //     console.log('item name=' + item.name);
            //     if (name_max == item.name ){
            //         // list.push({lat:doc.key[0][0], lng:doc.key[0][1], name:doc.key[1], content:doc.value});
            //         console.log('inside item=' + item.lat + ' ' + item.lng + ' '+ item.name + ' '+ item.content);
            //         list.push({lat:item.lat, lng:item.lng, name:item.name, content:item.content});
                    
            //     }
            // });

            //console.log(list);
            res.send(200, list0);
        } else {
            console.log(err);
            res.send(200, []);
        }
    });
Example #10
0
   stream._transform = function (chunk, enc, done) {
	done(null, JSON.stringify(natural.LancasterStemmer.tokenizeAndStem(chunk.toString())));
	stream.emit("data", JSON.stringify(natural.LancasterStemmer.tokenizeAndStem(chunk.toString())))
   };