Exemplo n.º 1
0
module.exports = function(config, done) {
    var primary = Dyno(config);
    var s3 = new AWS.S3();

    var log = config.log || console.log;
    var scanOpts = config.hasOwnProperty('segment') && config.segments ?
        { Segment: config.segment, TotalSegments: config.segments } : undefined;

    if (config.backup)
        if (!config.backup.bucket || !config.backup.prefix || !config.backup.jobid)
            return done(new Error('Must provide a bucket, prefix and jobid for backups'));

    var index = !isNaN(parseInt(config.segment)) ? config.segment.toString() : 0;
    var key = [config.backup.prefix, config.backup.jobid, index].join('/');
    var count = 0;
    var size = 0;

    var stringify = new stream.Transform({ objectMode: true });
    stringify._transform = function(record, enc, callback) {
        var line = Dyno.serialize(record);

        setImmediate(function() {
            stringify.push(line + '\n');
            count++;
            callback();
        });
    };

    var data = primary.scanStream(scanOpts)
        .on('error', next)
      .pipe(stringify)
        .on('error', next)
      .pipe(zlib.createGzip());

    log('[segment %s] Starting backup job %s of %s', index, config.backup.jobid, config.region + '/' + config.table);

    s3.upload({
        Bucket: config.backup.bucket,
        Key: key,
        Body: data
    }, function(err) {
        if (err) return next(err);
        log('[segment %s] Uploaded dynamo backup to s3://%s/%s', index, config.backup.bucket, key);
        log('[segment %s] Wrote %s items to backup', index, count);
        next();
    }).on('httpUploadProgress', function(progress) {
        log('[segment %s] Uploaded %s bytes', index, progress.loaded);
        size = progress.total;
    });

    function next(err) {
        if (err) return done(err);
        done(null, { size: size, count: count });
    }
};
Exemplo n.º 2
0
/**
 * Cardboard client generator
 * @param {object} config - a configuration object
 * @param {string} config.table - the name of a DynamoDB table to connect to
 * @param {string} config.region - the AWS region containing the DynamoDB table
 * @param {string} config.bucket - the name of an S3 bucket to use
 * @param {string} config.prefix - the name of a folder within the indicated S3 bucket
 * @param {dyno} [config.dyno] - a pre-configured [dyno client](https://github.com/mapbox/dyno) for connecting to DynamoDB
 * @param {s3} [config.s3] - a pre-configured [S3 client](http://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html)
 * @returns {cardboard} a cardboard client
 * @example
 * var cardboard = require('cardboard')({
 *   table: 'my-cardboard-table',
 *   region: 'us-east-1',
 *   bucket: 'my-cardboard-bucket',
 *   prefix: 'my-cardboard-prefix'
 * });
 * @example
 * var cardboard = require('cardboard')({
 *   dyno: require('dyno')(dynoConfig),
 *   bucket: 'my-cardboard-bucket',
 *   prefix: 'my-cardboard-prefix'
 * });
 */
function Cardboard(config) {
    config = config || {};
    config.MAX_GEOMETRY_SIZE = config.MAX_GEOMETRY_SIZE || MAX_GEOMETRY_SIZE;

    // Allow caller to pass in aws-sdk clients
    if (!config.s3) config.s3 = new AWS.S3(config);
    if (!config.dyno) config.dyno = Dyno(config);

    if (!config.table && !config.dyno) throw new Error('No table set');
    if (!config.region && !config.dyno) throw new Error('No region set');
    if (!config.bucket) throw new Error('No bucket set');
    if (!config.prefix) throw new Error('No s3 prefix set');

    var utils = require('./lib/utils')(config);

    /**
     * A client configured to interact with a backend cardboard database
     */
    var cardboard = {};
    cardboard.batch = require('./lib/batch')(config);

    /**
     * Insert or update a single GeoJSON feature
     * @param {object} feature - a GeoJSON feature
     * @param {string} dataset - the name of the dataset that this feature belongs to
     * @param {function} callback - the callback function to handle the response
     * @example
     * // Create a point, allowing Cardboard to assign it an id.
     * var feature = {
     *   type: 'Feature',
     *   properties: {},
     *   geometry: {
     *     type: 'Point',
     *     coordinates: [0, 0]
     *   }
     * };
     *
     * cardboard.put(feature, 'my-dataset', function(err, result) {
     *   if (err) throw err;
     *   !!result.id; // true: an id has been assigned
     * });
     * @example
     * // Create a point, using a custom id.
     * var feature = {
     *   id: 'my-custom-id',
     *   type: 'Feature',
     *   properties: {},
     *   geometry: {
     *     type: 'Point',
     *     coordinates: [0, 0]
     *   }
     * };
     *
     * cardboard.put(feature, 'my-dataset', function(err, result) {
     *   if (err) throw err;
     *   result.id === feature.id; // true: the custom id was preserved
     * });
     * @example
     * // Create a point, then move it.
     * var feature = {
     *   type: 'Feature',
     *   properties: {},
     *   geometry: {
     *     type: 'Point',
     *     coordinates: [0, 0]
     *   }
     * };
     *
     * cardboard.put(feature, 'my-dataset', function(err, result) {
     *   if (err) throw err;
     *   result.geometry.coordinates = [1, 1];
     *
     *   cardboard.put(result, 'my-dataset', function(err, final) {
     *     if (err) throw err;
     *     final.geometry.coordinates[0] === 1; // true: the feature was moved
     *   });
     * });
     */
    cardboard.put = function(feature, dataset, callback) {
        var encoded;
        try { encoded = utils.toDatabaseRecord(feature, dataset); }
        catch (err) { return callback(err); }

        var q = queue(1);
        if (encoded[1]) q.defer(config.s3.putObject.bind(config.s3), encoded[1]);
        q.defer(config.dyno.putItem, encoded[0]);
        q.await(function(err) {
            var result = geobuf.geobufToFeature(encoded[0].val || encoded[1].Body);
            result.id = utils.idFromRecord(encoded[0]);
            callback(err, result);
        });
    };

    /**
     * Remove a single GeoJSON feature
     * @param {string} primary - the id for a feature
     * @param {string} dataset - the name of the dataset that this feature belongs to
     * @param {function} callback - the callback function to handle the response
     * @example
     * // Create a point, then delete it
     * var feature = {
     *   id: 'my-custom-id',
     *   type: 'Feature',
     *   properties: {},
     *   geometry: {
     *     type: 'Point',
     *     coordinates: [0, 0]
     *   }
     * };
     *
     * cardboard.put(feature, 'my-dataset', function(err, result) {
     *   if (err) throw err;
     *
     *   cardboard.del(result.id, 'my-dataset', function(err, result) {
     *     if (err) throw err;
     *     !!result; // true: the feature was removed
     *   });
     * });
     * @example
     * // Attempt to delete a feature that does not exist
     * cardboard.del('non-existent-feature', 'my-dataset', function(err, result) {
     *   err.message === 'Feature does not exist'; // true
     *   !!result; // false: nothing was removed
     * });
     */
    cardboard.del = function(primary, dataset, callback) {
        var key = { dataset: dataset, id: 'id!' + primary };

        config.dyno.deleteItem(key, { expected: { id: 'NOT_NULL'} }, function(err) {
            if (err && err.code === 'ConditionalCheckFailedException') return callback(new Error('Feature does not exist'));
            if (err) return callback(err, true);
            else callback();
        });
    };

    /**
     * Retrieve a single GeoJSON feature
     * @param {string} primary - the id for a feature
     * @param {string} dataset - the name of the dataset that this feature belongs to
     * @param {function} callback - the callback function to handle the response
     * @example
     * // Create a point, then retrieve it.
     * var feature = {
     *   type: 'Feature',
     *   properties: {},
     *   geometry: {
     *     type: 'Point',
     *     coordinates: [0, 0]
     *   }
     * };
     *
     * cardboard.put(feature, 'my-dataset', function(err, result) {
     *   if (err) throw err;
     *   result.geometry.coordinates = [1, 1];
     *
     *   cardboard.get(result.id, 'my-dataset', function(err, final) {
     *     if (err) throw err;
     *     final === result; // true: the feature was retrieved
     *   });
     * });
     * @example
     * // Attempt to retrieve a feature that does not exist
     * cardboard.get('non-existent-feature', 'my-dataset', function(err, result) {
     *   err.message === 'Feature non-existent-feature does not exist'; // true
     *   !!result; // false: nothing was retrieved
     * });
     */
    cardboard.get = function(primary, dataset, callback) {
        var key = { dataset: dataset, id: 'id!' + primary };

        config.dyno.getItem(key, function(err, item) {
            if (err) return callback(err);
            if (!item) return callback(new Error('Feature ' + primary + ' does not exist'));
            utils.resolveFeatures([item], function(err, features) {
                if (err) return callback(err);
                callback(null, features.features[0]);
            });
        });
    };

    /**
     * Create a DynamoDB table with Cardboard's schema
     * @param {string} [tableName] - the name of the table to create, if not provided, defaults to the tablename defined in client configuration.
     * @param {function} callback - the callback function to handle the response
     * @example
     * // Create the cardboard table specified by the client config
     * cardboard.createTable(function(err) {
     *   if (err) throw err;
     * });
     * @example
     * // Create the another cardboard table
     * cardboard.createTable('new-cardboard-table', function(err) {
     *   if (err) throw err;
     * });
     */
    cardboard.createTable = function(tableName, callback) {
        if (typeof tableName === 'function') {
            callback = tableName;
            tableName = null;
        }

        var table = require('./lib/table.json');
        table.TableName = tableName || config.table;
        config.dyno.createTable(table, callback);
    };

    /**
     * List the ids available in a dataset
     * @private
     * @param {string} dataset - the name of the dataset
     * @param {function} callback - the callback function to handle the response
     */
    function listIds(dataset, callback) {
        var query = { dataset: { EQ: dataset }, id: {BEGINS_WITH: 'id!'} };
        var opts = { attributes: ['id'], pages: 0 };

        config.dyno.query(query, opts, function(err, items) {
            if (err) return callback(err);
            callback(err, items.map(utils.idFromRecord));
        });
    }

    /**
     * Remove an entire dataset
     * @param {string} dataset - the name of the dataset
     * @param {function} callback - the callback function to handle the response
     */
    cardboard.delDataset = function(dataset, callback) {
        listIds(dataset, function(err, res) {
            var keys = res.map(function(id) {
                return { dataset: dataset, id: 'id!' + id };
            });

            keys.push({ dataset: dataset, id: 'metadata!' + dataset });

            config.dyno.deleteItems(keys, function(err) {
                callback(err);
            });
        });
    };

    /**
     * List the GeoJSON features that belong to a particular dataset
     * @param {string} dataset - the name of the dataset
     * @param {object} [pageOptions] - pagination options
     * @param {string} [pageOptions.start] - start reading features past the provided id
     * @param {number} [pageOptions.maxFeatures] - maximum number of features to return
     * @param {function} [callback] - the callback function to handle the response
     * @returns {object} a readable stream
     * @example
     * // List all the features in a dataset
     * cardboard.list('my-dataset', function(err, collection) {
     *   if (err) throw err;
     *   collection.type === 'FeatureCollection'; // true
     * });
     * @example
     * // Stream all the features in a dataset
     * cardboard.list('my-dataset')
     *   .on('data', function(feature) {
     *     console.log('Got feature: %j', feature);
     *   })
     *   .on('end', function() {
     *     console.log('All done!');
     *   });
     * @example
     * // List one page with a max of 10 features from a dataset
     * cardboard.list('my-dataset', { maxFeatures: 10 }, function(err, collection) {
     *   if (err) throw err;
     *   collection.type === 'FeatureCollection'; // true
     *   collection.features.length <= 10; // true
     * });
     * @example
     * // Paginate through all the features in a dataset
     * (function list(start) {
     *   cardabord.list('my-dataset', {
     *     maxFeatures: 10,
     *     start: start
     *   }, function(err, collection) {
     *     if (err) throw err;
     *     if (!collection.features.length) return console.log('All done!');
     *     list(collection.features.slice(-1)[0].id);
     *   });
     * })();
     */
    cardboard.list = function(dataset, pageOptions, callback) {
        var opts = {};

        if (typeof pageOptions === 'function') {
            callback = pageOptions;
            opts.pages = 0;
            pageOptions = {};
        }

        pageOptions = pageOptions || {};
        if (pageOptions.start) opts.start = {
            dataset: dataset,
            id: 'id!' + pageOptions.start
        };
        if (pageOptions.maxFeatures) opts.limit = pageOptions.maxFeatures;

        var query = { dataset: { EQ: dataset }, id: { BEGINS_WITH: 'id!' } };

        if (!callback) {
            var resolver = new stream.Transform({ objectMode: true, highWaterMark: 50 });

            resolver.items = [];

            resolver._resolve = function(callback) {
                utils.resolveFeatures(resolver.items, function(err, collection) {
                    if (err) return callback(err);

                    resolver.items = [];

                    collection.features.forEach(function(feature) {
                        resolver.push(feature);
                    });

                    callback();
                });
            };

            resolver._transform = function(item, enc, callback) {
                resolver.items.push(item);
                if (resolver.items.length < 25) return callback();

                resolver._resolve(callback);
            };

            resolver._flush = function(callback) {
                if (!resolver.items.length) return callback();

                resolver._resolve(callback);
            };

            return config.dyno.query(query)
                .on('error', function(err) { resolver.emit('error', err); })
              .pipe(resolver);
        }

        config.dyno.query(query, opts, function(err, items) {
            if (err) return callback(err);
            utils.resolveFeatures(items, function(err, features) {
                if (err) return callback(err);
                callback(null, features);
            });
        });
    };

    /**
     * List datasets available in this database
     * @param {function} callback - the callback function to handle the response
     * @example
     * cardboard.listDatasets(function(err, datasets) {
     *   if (err) throw err;
     *   Array.isArray(datasets); // true
     *   console.log(datasets[0]); // 'my-dataset'
     * });
     */
    cardboard.listDatasets = function(callback) {
        var opts = { attributes: ['dataset'], pages:0 };

        config.dyno.scan(opts, function(err, items) {
            if (err) return callback(err);

            var datasets = _.uniq(items.map(function(item) {
                return item.dataset;
            }));

            callback(err, datasets);
        });
    };

    /**
     * Get cached metadata about a dataset
     * @param {string} dataset - the name of the dataset
     * @param {function} callback - the callback function to handle the response
     * @example
     * cardboard.getDatasetInfo('my-dataset', function(err, metadata) {
     *   if (err) throw err;
     *   console.log(Object.keys(metadatata));
     *   // [
     *   //   'dataset',
     *   //   'id',
     *   //   'west',
     *   //   'south',
     *   //   'east',
     *   //   'north',
     *   //   'count',
     *   //   'size',
     *   //   'updated'
     *   // ]
     * });
     */
    cardboard.getDatasetInfo = function(dataset, callback) {
        Metadata(config.dyno, dataset).getInfo(callback);
    };

    /**
     * Calculate metadata about a dataset
     * @param {string} dataset - the name of the dataset
     * @param {function} callback - the callback function to handle the response
     * @example
     * cardboard.calculateDatasetInfo('my-dataset', function(err, metadata) {
     *   if (err) throw err;
     *   console.log(Object.keys(metadatata));
     *   // [
     *   //   'dataset',
     *   //   'id',
     *   //   'west',
     *   //   'south',
     *   //   'east',
     *   //   'north',
     *   //   'count',
     *   //   'size',
     *   //   'updated'
     *   // ]
     * });
     */
    cardboard.calculateDatasetInfo = function(dataset, callback) {
        Metadata(config.dyno, dataset).calculateInfo(callback);
    };

    /**
     * A module for incremental metadata adjustments
     * @name cardboard.metadata
     */
    var metadata = {};

    /**
     * Pre-flight function to request information about the size and extent of a feature
     *
     * @param {string} dataset - the name of the dataset
     * @param {object} feature - a GeoJSON feature being added to the dataset
     * @returns {object} an object describing the feature's size and extent
     */
    metadata.featureInfo = function(dataset, feature) {
        return Metadata(config.dyno, dataset).getFeatureInfo(feature);
    };

    /**
     * Incrementally update a dataset's metadata with a new feature. This operation **will** create a metadata record if one does not exist.
     * @static
     * @memberof cardboard.metadata
     * @param {string} dataset - the name of the dataset
     * @param {object} feature - a GeoJSON feature (or backend record) being added to the dataset
     * @param {function} callback - a function to handle the response
     */
    metadata.addFeature = function(dataset, feature, callback) {
        Metadata(config.dyno, dataset).addFeature(feature, callback);
    };

    /**
     *
     * Update a dataset's metadata with a change to a single feature. This operation **will not** create a metadata record if one does not exist.
     * @static
     * @memberof cardboard.metadata
     * @param {string} dataset - the name of the dataset
     * @param {object} from - a GeoJSON feature (or backend record) representing the state of the feature *before* the update
     * @param {object} to - a GeoJSON feature (or backend record) representing the state of the feature *after* the update
     * @param {function} callback - a function to handle the response
     */
    metadata.updateFeature = function(dataset, from, to, callback) {
        Metadata(config.dyno, dataset).updateFeature(from, to, callback);
    };

    /**
     * Given a GeoJSON feature to remove, perform all required metadata updates. This operation **will not** create a metadata record if one does not exist. This operation **will not** shrink metadata bounds.
     * @static
     * @memberof cardboard.metadata
     * @param {string} dataset - the name of the dataset
     * @param {object} feature - a GeoJSON feature (or backend record) to remove from the dataset
     * @param {function} callback - a function to handle the response
     */
    metadata.deleteFeature = function(dataset, feature, callback) {
        Metadata(config.dyno, dataset).deleteFeature(feature, callback);
    };

    cardboard.metadata = metadata;

    /**
     * Find GeoJSON features that intersect a bounding box
     * @param {number[]} bbox - the bounding box as `[west, south, east, north]`
     * @param {string} dataset - the name of the dataset
     * @param {Object} [options] - Paginiation options. If omitted, the the bbox will
     *   return the first page, limited to 100 features
     * @param {number} [options.maxFeatures] - maximum number of features to return
     * @param {Object} [options.start] - Exclusive start key to use for loading the next page. This is a feature id.
     * @param {function} callback - the callback function to handle the response
     * @example
     * var bbox = [-120, 30, -115, 32]; // west, south, east, north
     * carboard.bboxQuery(bbox, 'my-dataset', function(err, collection) {
     *   if (err) throw err;
     *   collection.type === 'FeatureCollection'; // true
     * });
     */
    cardboard.bboxQuery = function(bbox, dataset, options, callback) {
        if (typeof options === 'function') {
            callback = options;
            options = {};
        }

        if (!options.maxFeatures) options.maxFeatures = 100;

        // List all features with a filterquery for the bounds.
        // This isnt meant to be fast, but it is meant to page by feature id.

        var query = {
            dataset: { EQ: dataset },
            id: {BEGINS_WITH: 'id!'}
        };

        var queryOptions = {
            pages: 1,
            limit: options.maxFeatures,
            filter: {
                west: { LE: bbox[2] },
                east: { GE: bbox[0] },
                north: { GE: bbox[1] },
                south: { LE: bbox[3] }
            }
        };

        if (options.start) {
            queryOptions.start =  {
                dataset: dataset,
                id: 'id!'+options.start
            };
        }

        var maxPages = 10;
        var page = 0;
        var combinedFeatures = [];

        function getPageOfBbox() {
            config.dyno.query(query, queryOptions, function(err, items, meta) {
                if (err) return callback(err);
                utils.resolveFeatures(items, function(err, data) {
                    if (err) return callback(err);
                    combinedFeatures = combinedFeatures.concat(data.features);
                    if (combinedFeatures.length >= options.maxFeatures || page >= maxPages || !meta[0].last) {
                        data.features = combinedFeatures.slice(0, options.maxFeatures);
                        return callback(err, data);
                    }
                    page += 1;
                    queryOptions.start = meta[0].last;
                    getPageOfBbox();
                });
            });
        }
        getPageOfBbox();
    };

    return cardboard;
}
Exemplo n.º 3
0
module.exports = function(config, done) {
    var primary = Dyno(config.primary);
    var replica = Dyno(config.replica);
    primary.tableName = config.primary.table;
    replica.tableName = config.replica.table;
    primary.name = 'primary';
    replica.name = 'replica';

    var log = config.log || console.log;
    var scanOpts = config.hasOwnProperty('segment') && config.segments ?
        { Segment: config.segment, TotalSegments: config.segments } : undefined;

    var discrepancies = 0;
    var itemsScanned = 0;
    var itemsCompared = 0;
    var start = Date.now();

    function report() {
        var elapsed = (Date.now() - start) / 1000;
        var scanRate = Math.min(itemsScanned, (itemsScanned / elapsed).toFixed(2));
        var compareRate = Math.min(itemsCompared, (itemsCompared / elapsed).toFixed(2));
        log('[progress] Scan rate: %s items @ %s items/s | Compare rate: %s items/s', itemsScanned, scanRate, compareRate);
    }

    var reporter = setInterval(report, 60000).unref();

    function Aggregate() {
        var aggregation = new stream.Transform({ objectMode: true });
        aggregation.records = [];

        aggregation._transform = function(record, enc, callback) {
            aggregation.records.push(record);

            if (aggregation.records.length < 25) return callback();

            aggregation.push(aggregation.records);
            aggregation.records = [];
            callback();
        };

        aggregation._flush = function(callback) {
            if (aggregation.records.length) aggregation.push(aggregation.records);
            callback();
        };

        return aggregation;
    }

    function Compare(readFrom, compareTo, keySchema, deleteMissing) {
        var noItem = deleteMissing ? 'extraneous' : 'missing';
        var comparison = new stream.Transform({ objectMode: true });
        comparison.discrepancies = 0;

        comparison._transform = function(records, enc, callback) {
            var params = { RequestItems: {} };
            params.RequestItems[readFrom.tableName] = { Keys: [] };
            itemsScanned += records.length;

            var recordKeys = records.reduce(function(recordKeys, record) {
                var key = keySchema.reduce(function(key, attribute) {
                    key[attribute] = record[attribute];
                    return key;
                }, {});
                params.RequestItems[readFrom.tableName].Keys.push(key);
                recordKeys.push(key);
                return recordKeys;
            }, []);

            var indexedRecords = records.reduce(function(indexedRecords, record, i) {
                indexedRecords[JSON.stringify(recordKeys[i])] = record;
                return indexedRecords;
            }, {});

            if (config.backfill) {
                Object.keys(indexedRecords).forEach(function(key) {
                    var record = indexedRecords[key];
                    log('[backfill] %s', key);
                    comparison.discrepancies++;
                    itemsCompared++;
                    comparison.push({ put: record });
                });

                return callback();
            }

            var items = [];
            (function read(params) {
                readFrom.batchGetItem(params, function(err, data) {
                    if (err) return callback(err);

                    items = items.concat(data.Responses[readFrom.tableName]);

                    if (Object.keys(data.UnprocessedKeys).length)
                        return read({ RequestItems: data.UnprocessedKeys });

                    gotAll();
                })
            })(params);

            function gotAll() {
                var itemKeys = items.reduce(function(itemKeys, item) {
                    itemKeys.push(keySchema.reduce(function(key, attribute) {
                        key[attribute] = item[attribute];
                        return key;
                    }, {}));
                    return itemKeys;
                }, []);

                var indexedItems = items.reduce(function(indexedItems, item, i) {
                    indexedItems[JSON.stringify(itemKeys[i])] = item;
                    return indexedItems;
                }, {});

                var q = queue();

                // Find missing records -- scan gave us a key but the batch read did not find a match
                recordKeys.forEach(function(key) {
                    var item = indexedItems[JSON.stringify(key)];

                    if (!item) {
                        q.defer(function(next) {
                            compareTo.getItem({ Key: key, ConsistentRead: true }, function(err, data) {
                                itemsCompared++;
                                if (err) return next(err);
                                var record = data.Item;

                                if (record) {
                                    comparison.discrepancies++;
                                    log('[%s] %j', noItem, key);
                                    if (!config.repair) return next();
                                    if (deleteMissing) comparison.push({ remove: key });
                                    else comparison.push({ put: record });
                                }

                                next();
                            });
                        });
                    }
                });

                // Find differing records -- iterate through each item that we did find in the batch read
                _(indexedItems).each(function(item, key) {
                    itemsCompared++;
                    var record = indexedRecords[key];
                    var recordString = Dyno.serialize(record);
                    var itemString = Dyno.serialize(item);

                    try { assert.deepEqual(JSON.parse(recordString), JSON.parse(itemString)); }
                    catch (notEqual) {
                        q.defer(function(next) {
                            compareTo.getItem({ Key: JSON.parse(key), ConsistentRead: true }, function(err, data) {
                                if (err) return next(err);
                                var record = data.Item;

                                var recordString = Dyno.serialize(record);

                                try { assert.deepEqual(JSON.parse(recordString), JSON.parse(itemString)); }
                                catch (notEqual) {
                                    comparison.discrepancies++;
                                    log('[different] %s', key);
                                    if (!config.repair) return next();
                                    comparison.push({ put: record });
                                }

                                next();
                            });
                        });
                    }
                });

                q.awaitAll(function(err) { callback(err); });
            }
        };

        return comparison;
    }

    function Write() {
        var writer = new stream.Writable({ objectMode: true, highWaterMark: 40 });
        writer.params = { RequestItems: {} };
        writer.params.RequestItems[replica.tableName] = [];
        writer.pending = false;

        writer._write = function(item, enc, callback) {
            if (!item.put && !item.remove) {
                return callback(new Error('Invalid item sent to writer: ' + JSON.stringify(item)));
            }

            var buffer = writer.params.RequestItems[replica.tableName];
            buffer.push(item.put ? { PutRequest: { Item: item.put } } : { DeleteRequest: { Key: item.remove } });
            if (buffer.length < 25) return callback();

            (function write(params) {
                writer.pending = true;
                replica.batchWriteItem(params, function(err, data) {
                    writer.pending = false;
                    if (err) return callback(err);

                    if (data.UnprocessedKeys && Object.keys(data.UnprocessedKeys).length)
                        return write({ RequestItems: data.UnprocessedKeys });

                    writer.params.RequestItems[replica.tableName] = [];
                    callback();
                });
            })(writer.params);
        };

        var streamEnd = writer.end.bind(writer);
        writer.end = function() {
            if (writer.pending) return setImmediate(writer.end);

            if (!writer.params.RequestItems[replica.tableName].length)
                return streamEnd();

            (function write(params) {
                replica.batchWriteItem(params, function(err, data) {
                    if (err) return streamEnd(err);

                    if (data.UnprocessedKeys && Object.keys(data.UnprocessedKeys).length)
                        return write({ RequestItems: data.UnprocessedKeys });

                    streamEnd();
                });
            })(writer.params);
        };

        return writer;
    }

    primary.describeTable(function(err, description) {
        if (err) return done(err);
        var keySchema = _(description.Table.KeySchema).pluck('AttributeName');
        scanPrimary(keySchema);
    });

    function scanPrimary(keySchema) {
        var aggregate = Aggregate();
        var compare = Compare(replica, primary, keySchema, false);
        var write = Write();

        log('Scanning primary table and comparing to replica');

        primary.scanStream(scanOpts)
            .on('error', finish)
          .pipe(aggregate)
            .on('error', finish)
          .pipe(compare)
            .on('error', finish)
          .pipe(write)
            .on('error', finish)
            .on('finish', function() {
                discrepancies += compare.discrepancies;
                log('[discrepancies] %s', compare.discrepancies);
                if (!config.backfill) return scanReplica(keySchema);
                finish();
            });
    }

    function scanReplica(keySchema) {
        var aggregate = Aggregate();
        var compare = Compare(primary, replica, keySchema, true);
        var write = Write();

        log('Scanning replica table and comparing to primary');

        replica.scanStream(scanOpts)
            .on('error', finish)
          .pipe(aggregate)
            .on('error', finish)
          .pipe(compare)
            .on('error', finish)
          .pipe(write)
            .on('error', finish)
            .on('finish', function() {
                discrepancies += compare.discrepancies;
                log('[discrepancies] %s', compare.discrepancies);
                finish();
            });
    }

    function finish(err) {
        clearInterval(reporter);
        report();
        done(err, discrepancies);
    }
};
Exemplo n.º 4
0
var aws = require('aws-sdk');
var stream = require('stream');
var Dyno = require('dyno');
var CSVTransform = require('./transform-stream');
var zlib = require('zlib');

var dateFormat = require('dateformat');
var ts = dateFormat(new Date(), "mmddyyyy-HHMMss")

  var dyno = Dyno({
    table: 'OBDTable_mmmYYYY',
    region: 'ap-southeast-1',
    //endpoint: 'http://localhost:4567'
  });

//exports.handler = function (event, context) {
function backupTable(context) {
//function backupTable(tablename, callback) {
  var tablename = 'OBDTable_mmmYYYY';

  //var data_stream = dyno.scanStream();

  var params = {
    //TableName: 'Table',
    //IndexName: 'Index',
    KeyConditionExpression: 'obd_dev_id = :hkey and #ts BETWEEN :rkey_l AND :rkey_h',
    ExpressionAttributeValues: {
      ':hkey': '213EP2016000570',
      ':rkey_l': 1480565971000,
      ':rkey_h': 1480566618000
    },
Exemplo n.º 5
0
module.exports = function(config) {
    if (!config.bucket) throw new Error('No bucket set');
    if (!config.prefix) throw new Error('No s3 prefix set');
    if (!config.s3) config.s3 = new AWS.S3(config);
    if (!config.dyno) config.dyno = Dyno(config);

    var utils = require('./utils')(config);

    /**
     * A module for batch requests
     * @name cardboard.batch
     */
    var batch = {};

    /**
     * Insert or update a set of GeoJSON features
     * @static
     * @memberof cardboard.batch
     * @param {object} collection - a GeoJSON FeatureCollection containing features to insert and/or update
     * @param {string} dataset - the name of the dataset that these features belongs to
     * @param {function} callback - the callback function to handle the response
     */
    batch.put = function(collection, dataset, callback) {
        var records = [];
        var geobufs = [];

        var encoded;
        var q = queue(150);

        for (var i = 0; i < collection.features.length; i++) {
            try { encoded = utils.toDatabaseRecord(collection.features[i], dataset); }
            catch (err) { return callback(err); }

            records.push(encoded[0]);
            geobufs.push(encoded[0].val || encoded[1].Body);
            if (encoded[1]) q.defer(config.s3.putObject.bind(config.s3), encoded[1]);
        }

        q.awaitAll(function(err) {
            if (err) return callback(err);
            config.dyno.putItems(records, function(err) {
                if (err && err.unprocessed) {
                    var table = Object.keys(err.unprocessed)[0];
                    var unprocessed = err.unprocessed[table].map(function(item) {
                        var id = utils.idFromRecord(item.PutRequest.Item);
                        var i = _.findIndex(records, function(record) {
                            return utils.idFromRecord(record) === id;
                        });

                        return geobuf.geobufToFeature(geobufs[i]);
                    });

                    err.unprocessed = { type: 'FeatureCollection', features: unprocessed };
                }

                if (err) return callback(err);

                var features = geobufs.map(geobuf.geobufToFeature.bind(geobuf));
                callback(null, { type: 'FeatureCollection', features: features });
            });
        });
    };

    /**
     * Remove a set of features
     * @static
     * @memberof cardboard.batch
     * @param {string[]} ids - an array of feature ids to remove
     * @param {string} dataset - the name of the dataset that these features belong to
     * @param {function} callback - the callback function to handle the response
     */
    batch.remove = function(ids, dataset, callback) {
        var keys = ids.map(function(id) {
            return { dataset: dataset, id: 'id!' + id };
        });

        config.dyno.deleteItems(keys, function(err) {
            if (err && err.unprocessed) {
                var table = Object.keys(err.unprocessed)[0];
                err.unprocessed = err.unprocessed[table].map(function(item) {
                    return utils.idFromRecord(item.DeleteRequest.Key);
                });
            }

            callback(err);
        });
    };

    return batch;
};
var path = require('path');
var events = path.resolve(__dirname, 'fixtures', 'events');
var replicate = require('..').replicate;
var backup = require('..').backup;
var _ = require('underscore');
var crypto = require('crypto');
var AWS = require('aws-sdk');
var s3 = new AWS.S3();
var queue = require('queue-async');

replica.start();

var dyno = Dyno({
    table: replica.tableName,
    region: 'mock',
    accessKeyId: 'mock',
    secretAccessKey: 'mock',
    endpoint: 'http://localhost:4567'
});

process.env.ReplicaTable = replica.tableName;
process.env.ReplicaRegion = 'mock';
process.env.ReplicaEndpoint = 'http://localhost:4567';
process.env.AWS_ACCESS_KEY_ID = 'mock';
process.env.AWS_SECRET_ACCESS_KEY = 'mock';
process.env.BackupBucket = 'mapbox';

test('[agent] use http agent for replication tests', function(assert) {
    streambot.agent = require('http').globalAgent;
    assert.end();
});
Exemplo n.º 7
0
const Dyno = require('dyno')
const R = require('ramda')
const {
	convertItemsToPutRequests,
	readAllFrom
} = require('./utils')

const eventsTable = process.env.AWS_DYNAMODB_EVENTS_TABLE

const eventsDyno = Dyno({
  table: eventsTable,
  accessKeyId: process.env.AWS_STORIES_USER_ACCESS_KEY,
	secretAccessKey: process.env.AWS_STORIES_USER_SECRET,
	region: process.env.AWS_STORIES_REGION
})

//const writeStream = eventsDyno.putStream()

function writeEvents(events) {
  return new Promise((resolve, reject) => {
    eventsDyno.batchWriteAll({
      RequestItems: {
        [eventsTable]: convertItemsToPutRequests(events)
      }
  }, 10 /* retries */)
    .sendAll((error, data) => {
      if (error) {
        reject(error)
      }
      else {
        const { UnprocessedItems } = data
    key = obj;

}
catch (err) { key = JSON.parse(key); }


s3url.Key = [
    s3url.Key,
    table,
    crypto.createHash('md5')
        .update(Dyno.serialize(key))
        .digest('hex')
].join('/');

var dyno = Dyno({
    region: region,
    table: table
});

dyno.getItem({ Key: key }, function(err, data) {
    if (err) throw err;
    var dynamoRecord = data.Item;
    
    s3.getObject(s3url, function(err, data) {
        if (err && err.statusCode !== 404) throw err;
        var s3data = err ? undefined : Dyno.deserialize(data.Body.toString());

        console.log('DynamoDB record');
        console.log('--------------');
        console.log(dynamoRecord);
        console.log('');