コード例 #1
0
var full_text_datasets = []; // 存放整個文章輸入wordnet後的json
  //格式為{ {第一個word的同義詞資訊}, 
  //       {第二個word的同義詞資訊},
  //       {第三個word的同義詞資訊},
  //        ......       }
  // 每一個同義詞資訊是一個object array,其中每個元素都是他的同義詞
function SearchingWordNet( userSearchString, res, req ) {
  var natural = require('natural');
  var wordnet = new natural.WordNet('/usr/local/lib/node_modules/WNdb/dict');
  var i = 0 ;

  //將使用者從text area輸入的文章切token,先把逗號和句號替換成空白,再用空白切
  //可能的bug:萬一有縮寫用到句號的就會被切散成分開的字串 
  replace_comma = String(req.body.userSearchString).replace(/\,/g,' ');
  replace_dot = replace_comma.replace(/\./g,' ');
  temp_token_sets = replace_dot.split(" ");
  inputs_into_token = [] ;
  full_text_datasets = [];

  // 不知道為甚麼split會把雙空白相連的這兩個字元的其中一個當作是內文。。。
  // 還有如果直接del array[3]之類的,array總長度還是不會變(javascript有夠笨 = =)
  // ex: var A = ["1", "2", "3"];
  //     del A[1];
  // 印出A會變成:["1", , "3"]
  // 所以重新建立一個新的把不是空的元素丟進去
  for ( var i = 0 ; i < temp_token_sets.length ; i++)  
    if ( temp_token_sets[i].length != 0 ) 
      inputs_into_token.push(temp_token_sets[i]);
  //console.log('///' + replace_comma + '\n-------------------');
  //console.log(replace_dot + '\n-------------------');
  //console.log(inputs_into_token.length + '\n-------------------');
  //console.log(inputs_into_token + '\n-------------------');
  // inputs_into_token裏面正常的token,可以開始塞進wordnet

  //然後一一餵給wordnet拿出資料   //並存入json檔中
  for ( walk = 0 ; walk < inputs_into_token.length ; walk++ ){
    current = inputs_into_token[walk];  
      // 將使用者輸入的字切成token後送到wordnet去抓同義字資訊
      wordnet.lookup(current, function(results) {
        // 每個字可能有很多同義字,每個同義字是一個one_wordInfo的object
        //用foreach把所有的這種object塞到wordnetDatas的array
        results.forEach(function(result) {
          one_wordInfo = new WordInfo(result.wordInfo, result.synsetOffset, 
                                      result.pos, result.lemma,
                                      result.synonyms, result.gloss );
          wordnetDatas.push(one_wordInfo);
          //console.log('#$$ ' + JSON.stringify(one_wordInfo));
         
        }); // forEach
        
      } // function(results) 
    ); // lookup
    full_text_datasets.push(wordnetDatas);
    //console.log('=======\n' + JSON.stringify(wordnetDatas));
    //console.log('********\n' + JSON.stringify(full_text_datasets));
  } // for
  console.log('$' + wordnetDatas);

  setTimeout(function(){
    res.render('index',{ title: 'NTNU Bioinformatics courses',
                         wordnetDatas: wordnetDatas,
                         targetStr : 'You have searched : '+ userSearch });
  }, 5000);
  wordnetDatas = [];
  full_text_datasets = [];
} // SearchingWordNet()
コード例 #2
0
ファイル: app.js プロジェクト: zweiein/WordnetWebsite
function SearchingWordNet( userSearchString, res ) {
  var natural = require('natural');
  var wordnet = new natural.WordNet('/usr/local/lib/node_modules/WNdb/dict');
  var i = 0 ;
  

  wordnet.lookup(String(userSearch), function(results) {
      results.forEach(function(result) {
        var one_wordInfo = new WordInfo(result.synsetOffset, 
                                        result.pos,
                                        result.lemma,
                                        result.synonyms,
                                        result.gloss
    	                                  );
        //json = one_wordInfo.toJSONString();
        //wordnetDatas[i] = JSON.parse(json);
        wordnetDatas[i]  = one_wordInfo;
        i++;
      });
      //return wordnetDatas;
      res.render('index',{ title: 'NTNU Bioinformatics courses',
    	                  wordnetDatas: wordnetDatas,
    	                  targetStr : 'You have searched : '+ userSearch });
  });
} // SearchingWordNet()
コード例 #3
0
ファイル: index.js プロジェクト: sourabhdesai/mysite
	var WordNetter = function(seed,level) {
		this.name = seed;
		if(level == 0)
			return;
		this.children = [];
		var netter = this;
		wn.lookup(this.name,function (results) {
			if(results.length == 0) {
				return;
			}
			// var result = findMostSynResult(results);c
			results.forEach(function(result) {
				if(result.synonyms.length == 0) {
					return;
				}
				result.synonyms.forEach(function (synonym) {
					if (used.indexOf(synonym) == -1 && trie.contains(synonym) ) {
						used.push(synonym);
						netter.children.push( new WordNetter(synonym, level - 1) );
						console.log("!");
					}
				});
			});
		});

		function findMostSynResult(results) {
			var most = results[0];
			for (var i = results.length - 1; i >= 0; i--) {
				if(results[i].synonyms.length > most.synonyms.length)
					most = results[i];
			};
			return most;
		};
	};
コード例 #4
0
ファイル: server.js プロジェクト: AkshatSh/Topics
// Feature not currently supported
// At the end of a response look at the newWords and generate a sentiment score
// The new score is based on the average sentiment score of each peice of text with the word
function trainWords(){
    for (var key in newWords){
        var wordPos = [];

        // Only look at adjectives 
        wordnet.lookup(key, function(results){
            var i = 0;
            results.forEach(function(result){
                var found = false;
                if (result.pos == "a"){
                    i++;
                }
                if (i > 0){
                    wordPos.push(i);
                    console.log(wordPos);
                }
            });
        });

        // If the word has been mentioned only two times ignore it
        if (newWords[key].count < 2){
            delete newWords[key];
        } else {

            // Compute the score for the new word
            newWords[key].SA = (newWords[key].SA/newWords[key].count);
        }
    }
}
コード例 #5
0
ファイル: utils1.js プロジェクト: erelsgl/nlu-server
function wordnetsyn(word, callback) {
var out = []
	wordnet.lookup('offer', function(results) {
		out.concat = out.concat(results['synonyms'])
		callback(null, out)
	})
}
コード例 #6
0
ファイル: context.js プロジェクト: muggle98/hahaha
var synset = function (word) {

    wordnet.lookup(word, function (results) {
   //     console.log(word);
   //     console.log(results[0].synonyms);
            return results[0].synonyms;
        });
}
コード例 #7
0
ファイル: wordnet.js プロジェクト: aeisenhaber/ss-parser
exports.define = function (word, cb) {
  wordnet.lookup(word, function (results) {
    if (!_.isEmpty(results)) {
      cb(null, results[0].def);
    } else {
      cb("no results");
    }
  });
};
コード例 #8
0
ファイル: context.js プロジェクト: muggle98/hahaha
exports.synsetCosineSimilarity = function (line1, line2) //based on words and their wordnet synsets in the sentence
{
    wordsA = line1.tokenizeAndStem(); //remove stop words, stemming
    wordsB = line2.tokenizeAndStem();

    wordsAsyn = [];
    wordsBsyn = [];

    for(var i=0;i<wordsA.length;i++)
    {
        var set = wordnet.lookup(wordsA[i], function (results) {
            console.log(results);
            return results[0].synonyms;
        });
        console.log(set);
        if (set) {
            wordsAsyn.concat(set);
        } else {
            wordsAsyn.push(wordsA[i]);
        }
       
    }

    for (var i = 0; i < wordsB.length; i++) {
        var set = wordnet.lookup(wordsB[i], function (results) {
            return results[0].synonyms;
        });
        console.log(set);
        if (set) {
            wordsBsyn.concat(synset(wordsB[i]));
        } else {
            wordsBsyn.push(wordsB[i]);
        }

    }
    console.log("line A syn");
    console.log(wordsAsyn);
    console.log("line B syn");
    console.log(wordsBsyn);
    return COSINE.textCosineSimilarity(wordsAsyn, wordsBsyn);

}
コード例 #9
0
ファイル: utils1.js プロジェクト: erelsgl/nlu-server
function wordnetquickfetch(seed, callback)
{
	wordnet.lookupSynonyms(seed, function(results) {
		var output = []
		_.each(results, function(value, key, list){ 
			if (value['pos'] = "v")
				output.push(value['lemma'].split("_").join(" "))
		}, this)
		callback(null, _.unique(output))
	})
}
コード例 #10
0
ファイル: analyzer.js プロジェクト: cuzzo/word_count
function is_negative(word, prefix) {
  var re = new RegExp("^" + prefix);
  word = word.replace(re, "");
  var is_negative = false;
  wordnet.lookup(word, function(results) {
    console.log(results);
    is_negative = results.length === 0 ? true : false;
    return;
  });
  return is_negative;
}
コード例 #11
0
ファイル: index.js プロジェクト: 1egoman/que-nlp
  lookupWord = function(word, callback) {



    // define the word
    wordnet.lookup(word, function(results) {

      // get the word's stem
      stem = natural.PorterStemmer.stem(word);

      // create the data structure
      d = {
        word: stem,
        synonyms: [],
        speach: null,
        means: null
      }

      pos = [];

      if (results.length) {

        // loop through each result
        _.each(results, function(result) {
          d.synonyms = _.union(d.synonyms, result.synonyms);
          pos.push(result.pos);

        });

        // get the most frequent value
        d.speach = _.chain(pos).countBy().pairs().max(_.last).head().value() || null;
      } else {

        // otherwise, try and look it up
        possib = _.find(worddata.words, function(w) {
          return _.contains(w.aliases, stem);
        });

        // substitute the correct stuff
        if (possib) {
          d.speach = possib.speach;
          // d.action = possib.action || null;
          actions.push(possib.action);
        }
      }

      // callback
      callback(null, d);
    });


  }
コード例 #12
0
	wordnet.lookup(word, function(results){
		if(results.length>0 ){
			wordnet.getSynonyms(results[0], function(results){
				results.forEach(function(result){
					//console.log(result.lemma);
					
					emoji_syn[emoji].push(result.lemma);
					
				});
			});
		}
		synonyms_count++;
			if(synonyms_count === emojiarray.length){
				callback();
			}
		 });
コード例 #13
0
ファイル: context.js プロジェクト: muggle98/hahaha
exports.lookupWord = function (word) {

    stemmer.attach();
    console.log('i stemmed the words.'.tokenizeAndStem());

    wordnet.lookup(word, function (results) {
        results.forEach(function (result) {
            console.log('------------------------------------');
            console.log(result.synsetOffset);
            console.log(result.pos);
            console.log(result.lemma);
            console.log(result.synonyms);
            console.log(result.pos);
            console.log(result.gloss);
        });
    });

}
コード例 #14
0
ファイル: wordnet.js プロジェクト: aeisenhaber/ss-parser
var wdlookup = exports.lookup = function (word, pointerSymbol, cb) {

  var match;
  var pos = null;

  pointerSymbol = pointerSymbol || "~";
  match = word.match(/~(\w)$/);
  if (match) {
    pos = match[1];
    word = word.replace(match[0], "");
  }

  var itor = function (word1, next) {
    wordnet.get(word1.synsetOffset, word1.pos, function (sub) {
      next(null, sub.lemma);
    });
  };

  var synets = [];

  wordnet.lookup(word, function (results) {
    results.forEach(function (result) {
      result.ptrs.forEach(function (part) {
        if (pos !== null && part.pos === pos && part.pointerSymbol === pointerSymbol) {
          synets.push(part);
        } else if (pos === null && part.pointerSymbol === pointerSymbol) {
          synets.push(part);
        }
      });
    });

    async.map(synets, itor,
      function (err, items) {
        items = _.uniq(items);
        items = items.map(function (x) {
          return x.replace(/_/g, " ");
        });
        cb(err, items);
      }
    );
  });
};
コード例 #15
0
ファイル: analyzer.js プロジェクト: cuzzo/word_count
function get_synonyms(word, pos, callback) {
  wordnet.lookup(word, function(results) {
    var possible_synonyms = [];
    _.each(results, function(result) {
      if (result["pos"] !== pos) {
        return false;
      }
      _.each(result["synonyms"], function(synonym) {
        if (synonym === word) {
          return false;
        };
        if (_.contains(possible_synonyms, synonym)) {
          return false;
        }
        possible_synonyms.push(synonym);
      });
    });
    callback(word, pos, possible_synonyms);
  });
}
コード例 #16
0
ファイル: ActivityItem.js プロジェクト: monteslu/Saturn.io
		function lookup_next_word () {
			if (words.length == 0) {
				return add_topics();
			}
			word = words.shift();

			var neither = 0;
			var noun = 0;
			var verb = 0;

			if (word.length > 3) {
				//console.log("> Word: "+word);
				if (word.substring(0, word.length-3) == "ing") {
					verb = 100;
					classify_word(word, noun, verb, neither);
				} else 
				if (word.match(/^[0-9]*$/)) {
					classify_word(word, noun, verb, neither);
				} else {
					wordnet.lookup(word, function(results) {

						results.forEach(function(result) {
							if (result.pos == "n") {
								noun++;
							} else 
							if (result.pos == "v") {
								verb++;
							} else
							if (result.pos == "a" || result.pos == "r" || result.pos == "s") {
								neither++;
							}
						});
						classify_word(word, noun, verb, neither);
					});
					return;
				}
			} else {
				save_junk_topic(word);
				lookup_next_word();
			}
		}
コード例 #17
0
ファイル: wordnet.js プロジェクト: aeisenhaber/ss-parser
exports.explore = function (word, cb) {
  var ptrs = [];

  wordnet.lookup(word, function (results) {

    for (var i = 0; i < results.length; i++) {
      ptrs.push(results[i].ptrs);
    }

    ptrs = _.uniq(_.flatten(ptrs));
    ptrs = _.map(ptrs, function (item) {
      return { pos: item.pos, sym: item.pointerSymbol };
    });

    ptrs = _.chain(ptrs)
    .groupBy("pos")
    .map(function (value, key) {
      return {
        pos: key,
        ptr: _.uniq(_.map(value, "sym"))
      };
    })
    .value();

    var itor = function (item, next) {
      var itor2 = function (ptr, next2) {
        wdlookup(word + "~" + item.pos, ptr, function (err, res) {
          // console.log(err);
          // console.log(word, item.pos, ":", ptr, res.join(", "));
          // console.log(res);
          next2();
        });
      };
      async.map(item.ptr, itor2, next);
    };
    async.each(ptrs, itor, function () {
      cb();
    });
  });
};
コード例 #18
0
    tokensFromString.forEach(function(currentWord){

            //Look up the definition for the token
            wordnet.lookup(currentWord, function(results) {

                found = false;
                count++;
                results.forEach(function(result) {
                    if(found != true){

                        if(currentWord != 'a'){
                            //Determine if the word is a noun
                            if(result.pos == 'n'){
                                console.log(currentWord.green+' : '+result.pos.green);
                                nounList.push(currentWord);
                                found = true;
                                
                            }else if(result.pos == 'a'){
                                console.log(currentWord.green+' : '+result.pos.green);
                                found = true;   
                            }
                        }
                    }
                });

                if(count == tokensFromString.length){
                    console.log('Finished getting word definitions');
                    performSearch(nounList);
                    //Call next function
                }
            });
        



    });
コード例 #19
0

console.log('\n-- tfidf for word "Congress" in three test documents:');
console.log('Congress:');
tfidf.tfidfs('Congress', function(i, measure) {
  console.log('document #' + i + ' is ' + measure);
});


console.log('\n-- tfidf for word "taxes" in three test documents:');
console.log('taxes:');
tfidf.tfidfs('taxes', function(i, measure) {
  console.log('document #' + i + ' is ' + measure);
});

var wordnet_data_path = process.env.WORDNET_DATA;
console.log("Wordnet data path: " + wordnet_data_path);
var wordnet = new natural.WordNet(wordnet_data_path);

var pos_map = {v: 'verb', n: 'noun', a: 'adjective', s: 'adjective', r: 'adverb'};

wordnet.lookup('bank', function(results) {
  results.forEach(function(result) {
    console.log('\n-- Wordnet data for "bank":');
    console.log(' part of speech: ' + pos_map[result.pos]);
    console.log(' lemma: ' + result.lemma);
    console.log(' synonyms: ' + result.synonyms);
    console.log(' gloss: ' + result.gloss);
  });
});
コード例 #20
0
ファイル: wordnet_lookup.js プロジェクト: maximilienand/nlp
var natural = require('natural');
var wordnet = new natural.WordNet();


wordnet.lookup('node', function(results) {
    results.forEach(function(result) {
        console.log('------------------------------------');
        console.log("synsetOffset:"+result.synsetOffset);
        console.log("pos:"+result.pos);
        console.log("lemma:"+result.lemma);
        console.log("synonyms:"+result.synonyms);
        console.log("gloss:"+result.gloss);
    });
});
コード例 #21
0
ファイル: index.js プロジェクト: sourabhdesai/mysite
exports.generateData = function() {

	var natural = require('natural');
	var wp = require('wordpos');
	var hashes = require('hashes');
	var wait = require('wait.for');
	var fs = require("fs");

	var data = fs.readFileSync("text/ofk.txt").toString();

	var tokenizer = new natural.WordTokenizer();
	var tokens    = tokenizer.tokenize(data);

	var hashtable = new hashes.HashTable();
	var WordPOS   = new wp();

	tokens.forEach(function (token) {
		token = token.toLowerCase();
		var notSW = natural.stopwords.indexOf(token) == -1;
		// token = natural.PorterStemmer.stem(token);
		if (notSW) {
			var pair = hashtable.get(token);
			if (pair) {
				hashtable.add(token,pair.value+1,true);
			} else {
				hashtable.add(token,1,true);
			}
		}
	});
	var pairs = hashtable.getKeyValuePairs();

	pairs.sort(function(pair1,pair2) {
		return pair2.value - pair1.value;
	});

	console.log(100 * pairs.length/tokens.length + "%"); // Percentage of non stopword nouns in corpus

	var wn = new natural.WordNet();
	var trie = new natural.Trie();
	trie.addStrings(tokens);
	console.log(wn.lookup('node',function(r){return r;}));
	var used = [];

	var WordNetter = function(seed,level) {
		this.name = seed;
		if(level == 0)
			return;
		this.children = [];
		var netter = this;
		wn.lookup(this.name,function (results) {
			if(results.length == 0) {
				return;
			}
			// var result = findMostSynResult(results);c
			results.forEach(function(result) {
				if(result.synonyms.length == 0) {
					return;
				}
				result.synonyms.forEach(function (synonym) {
					if (used.indexOf(synonym) == -1 && trie.contains(synonym) ) {
						used.push(synonym);
						netter.children.push( new WordNetter(synonym, level - 1) );
						console.log("!");
					}
				});
			});
		});

		function findMostSynResult(results) {
			var most = results[0];
			for (var i = results.length - 1; i >= 0; i--) {
				if(results[i].synonyms.length > most.synonyms.length)
					most = results[i];
			};
			return most;
		};
	};

	var data =
	{
		name : "root",
		children : new Array(10)
	};
	for(var i = 0; i < 10; i++) {
		data.children[i] = new WordNetter(pairs[i].key,3);
	}

	// Really hacky way of waiting for asychronous calls to end...HORRIBLE practice, I know
	setTimeout(function(){

		function removeEmpties(data) {
			if(data.children) {
				if(data.children.length == 0) {
					data.children = undefined;
				} else {
					for (var i = data.children.length - 1; i >= 0; i--) {
						removeEmpties(data.children[i]);
						console.log("&");
					};
				}
			}
		}

		removeEmpties(data);

		fs.writeFile("flare.json", JSON.stringify(data,null,'\t'), function (err) {
		    if(err) {
		        console.log(err);
		    } else {
		        console.log("flare.json was saved!");
		    }
		});
	},10000);
};
コード例 #22
0
ファイル: wordnet.js プロジェクト: aeisenhaber/ss-parser
 var itor = function (word1, next) {
   wordnet.get(word1.synsetOffset, word1.pos, function (sub) {
     next(null, sub.lemma);
   });
 };