Example #1
0
documentModel.parseText = function(text, options) {
    _.defaults(options, {
         // this will set how tokens are merged together
         // with a value larger than 1, tokens will be merged
         // otherwise, the token list will be kept as is
        maxTokensToMerge: 1,
        
        // this will only return merged token (original tokens will be ignored)
        keepMergedOnly: false,
        
        // tokens that exists in the list will be removed 
        // from final result and not merged
        ignoredList: [
            'của', 'là', 'và', 'có', 'đã',
            'những', 'các', 'với',
            'cũng', 'đó', 'như', 'nhiều',
            'còn', 'mà', 'thế', 'đi', 'nhưng',
            'nhất', 'theo', 'sẽ',
            'đang', 'rất', 'hơn'
        ],
        
        // try to be smart or not?
        tryToBeSmart: 0
    });

    var tokenized = tokenizer.tokenize(text);
    var tokens = tokenized.tokens;
    text = tokenizer.normalize(text);

    // merge tokens to form new tokens
    if (options.maxTokensToMerge > 1) {
        var newTokens = documentModel._mergeTokens(
            text,
            // only send not ignored tokens
            _.difference(tokens, options.ignoredList),
            options.maxTokensToMerge
        );
        
        if (options.keepMergedOnly) {
            // ignore original tokens
            // without the special ones
            options.ignoredList = options.ignoredList.concat(_.difference(tokens, tokenized.special));
        }
        
        tokens = tokens.concat(newTokens);
    }
    
    if (options.tryToBeSmart) {
        tokens = documentModel._removeIrrelevantTokens(text, tokens, 50, options.ignoredList);
        // tokens = documentModel._removeSingleAppearTokens(tokens);
        // tokens = documentModel._removeCompoundTokens(tokens);
    }

    // filter out ignored tokens
    if (options.ignoredList.length > 0) {
        tokens = _.difference(tokens, options.ignoredList);
    }
    
    return tokens;
};
module.exports = function (config, callback) {
    if (!intf) {
        return callback(false);
    }

    function addIp(ip, callback) {
        con.info('Adding local IP ' + ip + ' for forwards; if asked for password, give your local (sudo) password.');
        var ifconfig = pspawn('sudo', ['ifconfig', intf, 'add', ip]);
        ifconfig.on('exit', callback);
    }

    function addMissingIps(exitCode) {
        if (exitCode === 0 && missing.length > 0) {
            addIp(missing.shift(), addMissingIps);
        } else {
            callback(exitCode === 0);
        }
    }

    var allForwards = _.flatten(_.values(config.forwards).concat(_.values(config.localForwards)));
    var ips = [];
    allForwards.forEach(function (f) {
        var m = f.from.match(/^([0-9.]+):/);
        ips.push(m[1]);
    });
    ips = _.uniq(ips);

    var currentIps = _.pluck(os.networkInterfaces()[intf], 'address');
    var missing = _.difference(ips, currentIps);

    // Add any missing IP:s and finally call the callback.

    addMissingIps(0);
};
Example #3
0
    it("testRemainingWords", function() {
        var remainingWords = [
            "qwerty",
            "a1sdfg",
            "on",
            "123",
            "12zxcvb",
            "456.789"
        ];
        var storyResults = new StoryCheckResults(
            [],
            [],
            [],
            [],
            remainingWords,
            0,
            0
        );

        var numbers = storyResults.getNumbers();
        var badWords = _.difference(remainingWords, numbers);
        console.log(numbers);
        // check badWords
        expect(badWords.length).toBe(4);
        expect(badWords[0]).toBe("qwerty");
        expect(badWords[1]).toBe("a1sdfg");
        expect(badWords[2]).toBe("on");
        expect(badWords[3]).toBe("12zxcvb");
    });
Example #4
0
               .exec(function (err, knownNames)
        {
            var unknownNames = _.difference(names, knownNames);
            var unknownSrcs = _.map(unknownNames, function (name){
                return _.find(srcs, function (src){
                    return src.name == name;
                });
            });
            
            async.parallel(
                _.map(unknownSrcs, function (src){
                    return function (callback){
						request(src.srcUrl, function(err, response, body){
							if(err){
								callback(err);
								return;
							}
							src.code = body;
                            Package.create(src, function (err, p) {
                                callback(err);
                            });
						});
                    };
                })
            );
        });
Example #5
0
File: api.js Project: reality/dbot
                        }, function() {
                            // Queue notifies for offline ops
                          if(this.config.offlineReporting == true) {
                            if(!_.include(this.config.noMissingChans, cName)) {
                                _.each(offlineOps, function(op) {
                                    if(!this.pending[op.id]) this.pending[op.id] = [];
                                    this.pending[op.id].push({
                                        'time': new Date().getTime(),
                                        'channel': cName,
                                        'user': user.id,
                                        'message': message
                                    });
                                    this.pNotify[op.id] = true;
                                }, this);
                            }
                          }

                            // Send notifies to online ops
                            ops = _.difference(ops, _.keys(offlineOps));
                            message = this.internalAPI.formatNotify(type, server,
                                        user, cName, message);
                            this.internalAPI.notify(server, ops, message);
                            if(_.has(this.config.chan_redirs, cName)) {
                                dbot.say(server, this.config.chan_redirs[cName], message);
                            }
                        }.bind(this));
Example #6
0
SchemaArray.prototype.push = function ()
{
    // Values are passed through the setter before being allowed onto the array if arrayType is set.
    // In the case of rejection, the setter returns undefined, which is not appended to the array.
    var values;
    if (this._properties.arrayType)
    {
        values = [].map.call(arguments, function (value)
        {
            return setter.call(this._self, this._index, value, undefined, this._properties.arrayType);
        }, this);
    } else
    {
        values = arguments;
    }

    if (this._properties.unique)
    {
        values = _.difference(values, _.toArray(this));
    }

    var ret = [].push.apply(this, values);

    return ret;
};
Example #7
0
  callback: function (data) {
    if (data.method === 'create') {
      messagesCollection.add(data.model);
      if(messagesCollection.length > 50){
        messagesCollection.remove(messagesCollection.first(messagesCollection.length - 50));
      }
    } else if (data.method === 'update') {
      messagesCollection.remove(data.model);
    } else if (data.method === 'delete') {
      var record = _.find(messagesCollection.models, function (record) {
        return record.id === data.model.id;
      });

      if (record == null) {
        console.log("Could not record: " + model.id);
      }

      var diff = _.difference(_.keys(record.attributes), _.keys(data.model));
      _.each(diff, function(key) {
        return record.unset(key);
      });

      return record.set(data.model, data.options);
    }
  }
Example #8
0
 this.on("sessions:change:connectedParticipants", function(event, session, cop) {
     if (!event.get("open")) {
         return;
     }
     var prev = session.previous("connectedParticipants");
     var added = _.difference(cop, prev);
     var removed = _.difference(prev, cop);
     _.each(added, function(user) {
         if (user.id) {
             this.startHistory("sessions", session.id, user.id);
         }
     }.bind(this));
     _.each(removed, function(user) {
         if (user.id) {
             this.stopHistory("sessions", session.id, user.id);
         }
     }.bind(this));
 });
Example #9
0
    , done: function done (err, window) {
        var globals;

        if (window) {
          globals = _.difference(Object.keys(window), regular);
          window.close();
        }

        fn(err, globals);
      }
Example #10
0
  server.post( '/:schema', basicValidations, function ( req, res ) {
    var condition       = {},
        schema          = req.params.schema,
        fields          = '',
        keys            = [],
        allFields       = [],
        warning         = {};

    for ( var key in schemas[schema].schema.paths ) {
      keys.push( key );
      allFields.push( key );
    }

    condition = req.body.condition !== undefined ? req.body.condition: {};

    keys = __.difference( req.body.fields, keys);
    if ( keys.length > 0 ) {
      if ( keys.length === req.body.fields.length ) {
        res.json( errors.noKeysFound( keys ) );
      }
      warning.keysNotFound = warnings.keysNotFound( keys );
    }

    if ( req.body.fields !== undefined ) {
      if ( req.body.fields.length === 1 && req.body.fields[0] === '_id') {
        fields = allFields.join(' ');
      } else {
        if ( req.body.fields.indexOf('_id') < 0 ) {
          req.body.fields.push('-_id');
        }
        fields = req.body.fields.join(' ');
      }
    } else {
      fields = '-_id';
    }

    schemas[schema].find( condition ).select( fields ).exec(
      function ( err, docs ) {
        if ( err ) { throw err; }

        var toSend = { data:docs };
        if ( isEmpty( warning ) ) { toSend.warnings = warning; }

        res.json( toSend );
      }
    );
  });
Example #11
0
documentModel._removeCompoundTokens = function(tokens)
{
    var compoundTokens= [];
    
    _.each(tokens, function(token) {
        if (token.indexOf(' ') != -1 && !_.include(compoundTokens, token)) {
            var containedTokens = [];
            
            _.each(tokens, function(token2) {
                if (token2 != token && token2.indexOf(' ') != -1 && !_.include(containedTokens, token2)) {
                    if (token.indexOf(token2) != -1) {
                        containedTokens.push(token2);
                    }
                }
            });
            
            if (containedTokens.length == 1) {
                compoundTokens.push(token);
            }
        }
    });

    return _.difference(tokens, compoundTokens);
}
Example #12
0
LibSynphony.prototype.selectGPCWordsWithArrayCompare = function(
    aDesiredGPCs,
    aKnownGPCs,
    restrictToKnownGPCs,
    allowUpperCase,
    aSyllableLengths,
    aSelectedGroups,
    aPartsOfSpeech
) {
    var word_already_exists,
        aSelectedWordObjects,
        aWordObjects,
        aVocabKey,
        aCriteria;
    var groups = this.chooseVocabGroups(aSelectedGroups);

    aWordObjects = [];
    aSelectedWordObjects = [];
    aVocabKey = this.constructSourceArrayNames(aDesiredGPCs, aSyllableLengths); //result: "a__1" or "a_a__1" or "wicc_a_a__1"
    aCriteria = aKnownGPCs;

    //let's concatenate all vocabulary into 1 array
    for (var g = 0, len = groups.length; g < len; g++) {
        //eg: group1, group2...
        for (var i = 0, ilen = aVocabKey.length; i < ilen; i++) {
            //eg: ["a_a__1"]
            if (groups[g][aVocabKey[i]]) {
                //make sure it exists
                aWordObjects = aWordObjects.concat(groups[g][aVocabKey[i]]);
            }
        }
    }

    //this is the place to branch into checking for taught graphemes vs
    //selecting all words that have the current grapheme.
    if (!restrictToKnownGPCs) {
        //select all words that have the current_gpc
        aSelectedWordObjects = _.uniq(aWordObjects);
    } else {
        //we start restricting the word list using criteria that include
        //known graphemes, part of speech, etc.

        //add uppercase gpcs to aCriteria***
        if (allowUpperCase) {
            //if true then we add uppercase
            for (var k = 0, klen = aKnownGPCs.length; k < klen; k++) {
                var temp = [];
                for (
                    var j = 0, jlen = theOneLanguageDataInstance.GPCS.length;
                    j < jlen;
                    j++
                ) {
                    if (
                        theOneLanguageDataInstance.GPCS[j]["GPC"] ===
                        aKnownGPCs[k]
                    ) {
                        if (
                            theOneLanguageDataInstance.GPCS[j]["GPCuc"] !== ""
                        ) {
                            temp.push(
                                theOneLanguageDataInstance.GPCS[j]["GPCuc"]
                            );
                        }
                    }
                }
                aCriteria = aCriteria.concat(temp);
            }
        }

        //lets add symbols that can be matched at any time
        //these can come from the AlwaysMatch, SyllableBreak, StressSymbol, or MorphemeBreak fields
        //anything else must exist in the aKnownGPCs in order to be accepted
        if (alwaysMatch.length > 0) {
            aCriteria = aCriteria.concat(alwaysMatch);
        } else {
            if (
                typeof theOneLanguageDataInstance["AlwaysMatch"] !==
                    "undefined" &&
                theOneLanguageDataInstance["AlwaysMatch"] !== ""
            ) {
                alwaysMatch = alwaysMatch.concat(
                    theOneLanguageDataInstance["AlwaysMatch"]
                );
            }
            if (
                typeof theOneLanguageDataInstance["SyllableBreak"] !==
                    "undefined" &&
                theOneLanguageDataInstance["SyllableBreak"] !== ""
            ) {
                alwaysMatch.push(theOneLanguageDataInstance["SyllableBreak"]);
            }
            if (
                typeof theOneLanguageDataInstance["StressSymbol"] !==
                    "undefined" &&
                theOneLanguageDataInstance["StressSymbol"] !== ""
            ) {
                alwaysMatch.push(theOneLanguageDataInstance["StressSymbol"]);
            }
            if (
                typeof theOneLanguageDataInstance["MorphemeBreak"] !==
                    "undefined" &&
                theOneLanguageDataInstance["MorphemeBreak"] !== ""
            ) {
                alwaysMatch.push(theOneLanguageDataInstance["MorphemeBreak"]);
            }
            aCriteria = aCriteria.concat(alwaysMatch);
        }

        //start checking words
        for (var w = 0, wlen = aWordObjects.length; w < wlen; w++) {
            var keep = true;
            //first we check for allowable gpcs
            var gpcform = aWordObjects[w]["GPCForm"];
            var test_word = _.difference(gpcform, aCriteria);
            if (test_word.length > 0) {
                keep = false;
            }

            //then we check for part of speech constraint
            var ps_check = false;
            if (aPartsOfSpeech.length > 0) {
                if (aWordObjects[w]["PartOfSpeech"]) {
                    for (
                        var p = 0, plen = aPartsOfSpeech.length;
                        p < plen;
                        p++
                    ) {
                        if (
                            aWordObjects[w]["PartOfSpeech"] ===
                            aPartsOfSpeech[p]
                        ) {
                            ps_check = true;
                        }
                    }
                }
                if (ps_check === false) keep = false;
            }

            //if keep is still true, then this word object
            //has passed all checks and is suitable for use
            if (keep === true) {
                word_already_exists = false;
                for (
                    var m = 0, mlen = aSelectedWordObjects.length;
                    m < mlen;
                    m++
                ) {
                    //check to see that we don't add more than one instance of the word to our list
                    if (aSelectedWordObjects[m] === aWordObjects[w]) {
                        word_already_exists = true;
                    }
                }
                if (word_already_exists === false) {
                    aSelectedWordObjects.push(aWordObjects[w]);
                }
            }
        } //end of wordObject loop
    }

    return aSelectedWordObjects;
};
Example #13
0
LibSynphony.prototype.checkStory = function(
    aFocusWordList,
    aWordCumulativeList,
    aGPCsKnown,
    storyHTML,
    sightWords
) {
    var letters;
    var story_vocab;

    if (aGPCsKnown.length > 0) {
        letters = this.fullGPC2Regular(aGPCsKnown).join("|");
        // break the text into words
        story_vocab = this.getWordsFromHtmlString(storyHTML, letters);
    } else {
        letters = "";
        // break the text into words
        story_vocab = this.getWordsFromHtmlString(storyHTML);
    }
    // Just in case users want ? for glottal or similar nonsense with other special
    // RE characters.  See https://issues.bloomlibrary.org/youtrack/issue/BL-7075.
    if (letters.includes("\\")) letters = letters.replace("\\", "\\\\");
    if (letters.includes("?")) letters = letters.replace("?", "\\?");
    if (letters.includes("+")) letters = letters.replace("+", "\\+");
    if (letters.includes("*")) letters = letters.replace("*", "\\*");

    // get unique word list
    var story_vocab_compacted = _.uniq(story_vocab);

    // count total words in the story
    var total_words = _.filter(story_vocab, function(word) {
        return isNaN(word) === true;
    }).length;

    // if aGPCsKnown is empty, return now
    // BL-2359: Need to allow stages based on word lists rather than known graphemes
    //if (aGPCsKnown.length === 0)
    //    return new StoryCheckResults([], [], [], [], [], 0, total_words);

    // first we do diffs on aFocusWordList and aWordCumulativeList with story_vocab words
    var story_focus_words = _.intersection(
        aFocusWordList,
        story_vocab_compacted
    );
    var story_cumulative_words = _.intersection(
        _.pluck(aWordCumulativeList, "Name"),
        story_vocab
    );
    this.array_sort_length(story_focus_words);

    /* TODO: has to handle utf8 */

    // FIRST PASS: we handle words which are currently in focus
    var focus_words = _.intersection(story_focus_words, story_vocab_compacted);
    var remaining_words = _.difference(story_vocab_compacted, focus_words);
    this.array_sort_length(focus_words);

    // SECOND PASS: we handle words which are part of the cumulative word bank
    // aWordCumulativeList is an object that contains the following fields:
    // GPCForm,GPCS,GPCcount,Name,Reverse,SyllShape,Syllables
    var cumulative_words = _.intersection(
        story_cumulative_words,
        remaining_words
    );
    remaining_words = _.difference(remaining_words, cumulative_words);
    this.array_sort_length(cumulative_words);

    // THIRD PASS: we handle words which have not been matched yet to check if they are
    // decodable at this point. This can match words which are longer than the syllable
    // selectors specify but contain all the gpcs. We do this using a regular expression
    // with the array of knownGPCs. This is not the most accurate method; we should
    // first segment the word with all gpcs, then test with known gpcs. This also checks
    // for the possibility that the word is not yet in our database.
    // This only works for simple gpc notation, not complex.
    // Why not for full gpc? Once you have covered the regular spelling patterns (in English)
    // you will match all the other words, so everything gets tagged as 'possible'. Not useful!!
    var possible_words = [];
    if (
        typeof theOneLanguageDataInstance === "undefined" ||
        theOneLanguageDataInstance["UseFullGPCNotation"] === false
    ) {
        // allow punctuation characters in the words
        // BL-1216 Use negative look-ahead to keep letters from being counted as punctuation
        // even if Unicode says something is a punctuation character when the user
        // has specified it as a letter (like single quote).
        var re = new XRegExp(
            "^((" +
                letters +
                ")+((?![" +
                letters +
                "])[\\p{P}]*(" +
                letters +
                ")*)*)$",
            "gi"
        );
        possible_words = _.filter(remaining_words, function(word) {
            return word.match(re);
        });

        // BL-1217: exclude words with unknown graphemes, specifically 'aa' when only 'a' is known
        if (typeof theOneLanguageDataInstance !== "undefined") {
            // get the unknown letters
            var unknownGPCs = _.difference(
                _.pluck(theOneLanguageDataInstance["GPCS"], "GPC"),
                letters.split("|")
            ); // .join('|');
            if (Array.isArray(unknownGPCs) && unknownGPCs.length > 0) {
                // remove from the list of unknownGPCs characters used to build multi-graphs in the list aGPCsKnown
                unknownGPCs = _.filter(unknownGPCs, function(gpc) {
                    return letters.indexOf(gpc) === -1;
                });

                if (unknownGPCs.length > 0) {
                    re = new XRegExp("(" + unknownGPCs.join("|") + ")+", "gi");
                    possible_words = _.filter(possible_words, function(word) {
                        return !word.match(re);
                    });
                }
            }
        }

        remaining_words = _.difference(remaining_words, possible_words);
        this.array_sort_length(possible_words);
    }

    // FOURTH PASS: we handle sight words
    // NOTE: Handle sight words after checking for decodability, because a sight word may become decodable.
    var sight_words = [];
    if (sightWords.length > 0) {
        sight_words = _.intersection(sightWords.split(" "), remaining_words);
        remaining_words = _.difference(remaining_words, sight_words);
        this.array_sort_length(sight_words);
    }

    // FIFTH PASS: we handle everything else that's left over

    var readable =
        focus_words.length + cumulative_words.length + possible_words.length;
    return new StoryCheckResults(
        focus_words,
        cumulative_words,
        possible_words,
        sight_words,
        remaining_words,
        readable,
        total_words
    );
};
Example #14
0
documentModel._removeIrrelevantTokens = function(text, tokens, thresholdPercentage, ignoredTokens) {
    var irrelevantTokens = [];
    var processedTokens = [];
    
    _.each(tokens, function(token) {
        if (!_.include(processedTokens, token) /* && !_.include(ignoredTokens, token) */) {
            processedTokens.push(token);
            
            var tokensThatContainIt = [];
            var tokensThatContainItManyTime = [];
            var tokenCount = 0, offset, offset_tmp;
            var requiredCount = 0;

            // find all tokens that contain our token
            // this may catch some silly tokens
            _.each(tokens, function(token2) {
                if (token2 != token) {
                    offset_tmp = token2.indexOf(token);
                    if (offset_tmp != -1
                        && (offset_tmp == 0 || token2[offset_tmp - 1] == ' ')
                        && (offset_tmp == token2.length - token.length || token2[offset_tmp + token.length] == ' ')) {
                        tokensThatContainIt.push(token2);
                    }
                }
            });
            
            offset = 0;
            while (true) {
                offset_tmp = text.indexOf(token, offset);
                if (offset_tmp != -1) {
                    // found the token
                    offset = offset_tmp + 1;
                    tokenCount++;
                } else {
                    break; // while (true)
                }
            }

            if (tokensThatContainIt.length > 1) {
                // test all tokensThatContainIt to find the one that happen in more than 
                // threshold percent of tokensThatContainItMerged
                // only do this if we found more than 1 tokensThatContainItMerged
                requiredCount = tokenCount * thresholdPercentage / 100;

                _.each(tokensThatContainIt, function(token2) {
                    if (!_.include(tokensThatContainItManyTime, token2)) {
                        var token2Count = 0;
                        
                        offset = 0;
                        while (true) {
                            offset_tmp = text.indexOf(token2, offset);
                            if (offset_tmp != -1
                                && (offset_tmp == 0 || text[offset_tmp - 1] == ' ')
                                && (offset_tmp == text.length - token2.length || text[offset_tmp + 1] == ' ')) {
                                // found the token2
                                offset = offset_tmp + 1;
                                token2Count++;
                            } else {
                                break; // while (true)
                            }
                        }
                        
                        if (token2Count > 1 && token2Count >= requiredCount) {
                            tokensThatContainItManyTime.push(token2);
                        }
                    }
                });
            }
            
            if (tokensThatContainItManyTime.length > 0) {
                // there are other tokens that contain it and 
                // appear many time, it's likely that this token
                // is irrelevant...
                irrelevantTokens.push(token);
                
                // we also consider tokens that contain the token to be irrelevant
                // with the exception of tokens that contains tokens-that-contain-it-many-time 
                // make sense?
                var alsoIrrelevantTokens = [];
                _.each(tokensThatContainIt, function(token2) {
                    var inContainsManyTime = false;
                    
                    _.each(tokensThatContainItManyTime, function(token3) {
                        if (token2 == token3) {
                            inContainsManyTime = true;
                        } else {
                            offset_tmp = token2.indexOf(token3);
                            if (offset_tmp != -1
                                && (offset_tmp == 0 || token2[offset_tmp - 1] == ' ')
                                && (offset_tmp == token2.length - token3.length || token2[offset_tmp + token3.length] == ' ')) {
                                inContainsManyTime = true;
                            }
                        }
                    });
                    
                    if (!inContainsManyTime) {
                        alsoIrrelevantTokens.push(token2);
                    }
                });

                irrelevantTokens = irrelevantTokens.concat(alsoIrrelevantTokens);
            }
        }
    });

    return _.difference(tokens, irrelevantTokens);
}
Example #15
0
 _.without = function(array) {
   return _.difference(array, slice.call(arguments, 1));
 };
Example #16
0
	_.each(dataset,  function(sentence, key, list){ 
		seen_vocabulary = _.uniq(seen_vocabulary.concat(tokenizedataset([sentence])))
		unseen_word = _.difference(total_vocabulary, seen_vocabulary)
		unseen_word_ratio = unseen_word.length/total_vocabulary.length
		fs.appendFileSync(dir+"unseen_curves", seen_vocabulary.length+"\t"+unseen_word_ratio+"\n",'utf8', function (err) {console.log("error "+err); return 0 })
	})