Esempio n. 1
0
finish(function (results) {

    function dateStamp() {
        var d = new Date();
        return orzo.sprintf('%02d%02d%02d', d.getFullYear(), d.getMonth() + 1, d.getDate());
    }

    worklog.close();

    doWith(orzo.fileWriter(orzo.sprintf(conf.tweets.crawlerOutFile, dateStamp())),
        function (fw) {
            results.sorted.each(function (key, data) {
                if (key !== '__ignored__') {
                    data.forEach(function (dataItem) {
                        fw.writeln(JSON.stringify({
                            id: dataItem.id,
                            time: parseInt(dataItem.time),
                            account: dataItem.account,
                            text: dataItem.text
                        }));
                    });
                }
            });
        },
        function (err) {
            orzo.printf('error: %s\n', err);
        }
    );

    orzo.printf('ignored %s records\n', results.get('__ignored__')[0]);
});
Esempio n. 2
0
        orzo.html.query(page, 'div.tweet', function (tweetBlock) {
            var out = {id: null, time: '-', account: url, text: null};

            applyOnFirst(tweetBlock, 'span.js-short-timestamp', function (item) {
                out.time = item.dataset().time;
            });
            applyOnFirst(tweetBlock, 'p.tweet-text', function (item) {
                out.text = item.text();
            });
            out.id = tweetBlock.dataset()['item-id'];

            if (worklog.getLatestTimestamp() <= parseInt(out.time)) {
                emit(out.time, out);

            } else {
                emit('__ignored__', 1);
            }
        });