Example #1
0
function trainParallelBatches(options) {
    checkThreadSupport();

    var numThreads = options && options.numThreads;
    var batchSize = options && options.batchSize;

    if (isNaN(numThreads)) {
        numThreads = os.cpus().length;
    }

    if (isNaN(batchSize)) {
        batchSize = 2500;
    }

    var totalDocs = this.docs.length;
    var threadPool = Threads.createPool(numThreads);
    var docFeatures = {};
    var finished = 0;
    var self = this;

    var abort = false;
    var onError = function(err) {
        if (!err || abort) return;
        abort = true;
        threadPool.destroy(true);
        self.events.emit('doneTrainingError', err);
    };

    // Init pool; send the features array and the parsing function
    var str = JSON.stringify(this.features);
    threadPool.all.eval('var FEATURES = ' + str + ';', onError);
    threadPool.all.eval(docsToFeatures, onError);

    // Convert docs to observation objects
    var obsDocs = [];
    for (var i = this.lastAdded; i < totalDocs; i++) {
        var observation = this.docs[i].text;
        if (typeof observation === 'string')
            observation = this.stemmer.tokenizeAndStem(observation);
        obsDocs.push({
            index: i,
            observation: observation
        });
    }

    // Split the docs in batches
    var obsBatches = [];
    var i = 0;
    while (true) {
        var batch = obsDocs.slice(i * batchSize, (i+1) * batchSize);
        if (!batch || !batch.length) break;
        obsBatches.push(batch);
        i++;
    }
    obsDocs = null;
    self.events.emit('startedTraining', {
        docs: totalDocs,
        batches: obsBatches.length
    });

    // Called when a batch completes processing
    var onFeaturesResult = function(docs) {
        self.events.emit('processedBatch', {
            size: docs.length,
            docs: totalDocs,
            batches: obsBatches.length,
            index: finished
        });

        for (var j = 0; j < docs.length; j++) {
            docFeatures[docs[j].index] = docs[j].features;
        }
    };

    // Called when all batches finish processing
    var onFinished = function() {
        threadPool.destroy(true);
        abort = true;

        for (var j = self.lastAdded; j < totalDocs; j++) {
            self.classifier.addExample(docFeatures[j], self.docs[j].label);
            self.events.emit('trainedWithDocument', {
                index: j,
                total: totalDocs,
                doc: self.docs[j]
            });
            self.lastAdded++;
        }

        self.events.emit('doneTraining', true);
        self.classifier.train();
    };

    // Called to send the next batch to be processed
    var batchIndex = 0;
    var sendNext = function() {
        if (abort) return;
        if (batchIndex >= obsBatches.length) {
            return;
        }

        sendBatch(JSON.stringify(obsBatches[batchIndex]));
        batchIndex++;
    };

    // Called to send a batch of docs to the threads
    var sendBatch = function(batchJson) {
        if (abort) return;
        threadPool.any.eval('docsToFeatures(' + batchJson + ');', function(err, docs) {
            if (err) {
                return onError(err);
            }

            finished++;

            if (docs) {
                docs = JSON.parse(docs);
                setTimeout(onFeaturesResult.bind(null, docs));
            }

            if (finished >= obsBatches.length) {
                setTimeout(onFinished);
            }

            setTimeout(sendNext);
        });
    };

    // Start processing
    for (var i = 0; i < numThreads; i++) {
        sendNext();
    }
}
Example #2
0
function trainParallel(numThreads, callback) {
    checkThreadSupport();

    if (!callback) {
        callback = numThreads;
        numThreads = undefined;
    }

    if (isNaN(numThreads)) {
        numThreads = os.cpus().length;
    }

    var totalDocs = this.docs.length;
    var threadPool = Threads.createPool(numThreads);
    var docFeatures = {};
    var finished = 0;
    var self = this;

    // Init pool; send the features array and the parsing function
    threadPool.all.eval('var FEATURES = ' + JSON.stringify(this.features));
    threadPool.all.eval(docsToFeatures);

    // Convert docs to observation objects
    var obsDocs = [];
    for (var i = this.lastAdded; i < totalDocs; i++) {
        var observation = this.docs[i].text;
        if (typeof observation === 'string')
            observation = this.stemmer.tokenizeAndStem(observation);
        obsDocs.push({
            index: i,
            observation: observation
        });
    }

    // Called when a batch completes processing
    var onFeaturesResult = function(docs) {
        setTimeout(function() {
            self.events.emit('processedBatch', {
                size: docs.length,
                docs: totalDocs,
                batches: numThreads,
                index: finished
            });
        });

        for (var j = 0; j < docs.length; j++) {
            docFeatures[docs[j].index] = docs[j].features;
        }
    };

    // Called when all batches finish processing
    var onFinished = function(err) {
        if (err) {
            threadPool.destroy();
            return callback(err);
        }

        for (var j = self.lastAdded; j < totalDocs; j++) {
            self.classifier.addExample(docFeatures[j], self.docs[j].label);
            self.events.emit('trainedWithDocument', {
                index: j,
                total: totalDocs,
                doc: self.docs[j]
            });
            self.lastAdded++;
        }

        self.events.emit('doneTraining', true);
        self.classifier.train();

        threadPool.destroy();
        callback(null);
    };

    // Split the docs and start processing
    var batchSize = Math.ceil(obsDocs.length / numThreads);
    var lastError;

    for (var i = 0; i < numThreads; i++) {
        var batchDocs = obsDocs.slice(i * batchSize, (i+1) * batchSize);
        var batchJson = JSON.stringify(batchDocs);

        threadPool.any.eval('docsToFeatures(' + batchJson + ')', function(err, docs) {
            lastError = err || lastError;
            finished++;

            if (docs) {
                docs = JSON.parse(docs);
                onFeaturesResult(docs);
            }

            if (finished >= numThreads) {
                onFinished(lastError);
            }
        });
    }
}
/*

If you don't want a thread to have the privilege of writing to stdout, simply delete its global puts symbol

*/

var Threads= require('webworker-threads');

function cb (err, msg) {
  if (err) {
    process.stdout.write("["+ this.id+ "] -> "+ err+ '\n');
  }
  this.destroy();
}

function ƒ () {
  puts("["+ thread.id+ "] -> puts('Hello!')\n");
}

var i= Math.abs(parseInt(process.argv[2], 10)) || 1;
console.log('Using '+ i+ ' threads');

Threads.createPool(i).all.eval('delete (global= this).puts; ('+ ƒ+ ')();', cb);

var T= require('webworker-threads');

var i= process.argv[2] || 1;
console.log("Creating a pool of "+ i+ " threads");

var pool= T.createPool(i);

pool.all.eval('('+ ƒ+ ')()');

pool.on('myEvent', function myEventHandler (data) {
  console.log("Received myEvent with data -> "+ data);
  if (data === "QUIT") {
    setTimeout(function () { pool.destroy() }, 1e3);
  }
});

function ƒ () {
  thread.on('myEvent', function cb (data) {
    thread.emit('myEvent', data);
  });
}


console.log("pool.any.emit('myEvent', 'POOL.ANY')");
pool.any.emit('myEvent', "POOL.ANY");

console.log("pool.all.emit('myEvent', 'POOL.ALL')");
pool.all.emit('myEvent', "POOL.ALL");
function eventHandler (data) {
  ctr++;
  this.emit('b', 0);
}

function boot () {
  thread.on('b', eventHandler);
  
  function eventHandler (data) {
    thread.emit('a', 0);
  }
}

var ctr= 0;
var i= +process.argv[2] || 1;
console.log('Using '+ i+ ' threads');

var pool= t.createPool(i);
pool.all.eval(boot).all.eval('boot()').on('a', eventHandler).all.emit('b', 0).all.emit('b', 0).all.emit('b', 0);


var s= Date.now();
function display () {
  var e= Date.now()- s;
  var ppps= (ctr*1e3/e).toFixed(1);
  console.log("ping-pongs: "+ ctr+ ", ping-pongs-per-second: "+ ppps);
}

setInterval(display, 1e3);