/** * Create a bundler object that will use configured proxy settings to fetch a given URL and * bundle the contents. If the request comes from the RSS reader, it will produce only the * text of the retrieved document. * @param {string} url - The URL to fetch * @param {object} config - The configuration options for the bundle server * @param {bool} reqfromReader - Whether the request comes from the RSS reader or not * @param enforcedDirection - the direction being enforced after readability is applied (RSS reader request only) * @return {Bundler} A bundler object that will fetch the requested resource */ function makeBundler(url, config, reqFromReader, enforcedDirection) { //backward compatible default value enforcedDirection = ((typeof enforcedDirection !== 'undefined') && ((enforcedDirection === 'ltr') || (enforcedDirection === 'rtl'))) ? enforcedDirection : ""; bs_log('Making bundler for ' + url); var bundler = new b.Bundler(url); bundler.on('originalRequest', function (options, callback) { options.strictSSL = config.strictSSL; callback(null, options); }); bundler.on('resourceRequest', function (options, callback, $, response) { options.strictSSL = config.strictSSL; callback(null, options); }); if (config.useProxy) { bundler.on('originalRequest', b.proxyTo(config.proxyAddress)); bundler.on('resourceRequest', b.proxyTo(config.proxyAddress)); } if (typeof config.userAgent !== 'undefined' && config.userAgent.length > 0) { bundler.on('originalRequest', b.spoofHeaders({'User-Agent': config.userAgent})); bundler.on('resourceRequest', b.spoofHeaders({'User-Agent': config.userAgent})); } bs_log('Making a readability-mode page.'); bundler.on('originalReceived', function (requestFn, originalDoc, url, callback) { var diff = {}; // Wrap the whole call to the readability-mode library in a try block try { makeReadable(originalDoc, {charset: 'utf-8'}, function (err, article, meta) { if (err) { var message = _t.__('Error: %s', err.message); bs_log(message); diff[originalDoc] = message; } else { //we only enforce direction in readibility mode //deciding about direction var direction_tag = "" if (enforcedDirection === "ltr") { direction_tag = ' dir="ltr"'; } else if (enforcedDirection === "rtl") { bs_log("Bundle is RTL"); direction_tag = ' dir="rtl"'; } var content = '<html' + direction_tag +'><head><meta content="text/html; charset=UTF-8" http-equiv="Content-Type"/></head>'; if (article.content.slice(0, 6) !== '<body>') { content += '<body>' + article.content + '</body></html>'; } else { content += article.content + '</html>'; } diff[originalDoc] = content; article.close(); callback(null, diff); } }); } catch (ex) { // Now, in case the library encounters some exception, we can just pass the exception on as an error like usual. callback(ex, null); } }); bundler.on('originalReceived', function (requestFn, originalDoc, url, callback) { try { b.replaceImages(requestFn, originalDoc, url, callback); } catch (ex) { callback(ex, null); } }); return bundler; }
function handleRequests(req, res) { var url = qs.parse(urllib.parse(req.url).query).url; var ping = qs.parse(urllib.parse(req.url).query).ping; console.log('Got request for ' + url); var bundleMaker = new bundler.Bundler(url); var isSameHost = utils.sameHostPredicate(url); // Only bundle resources belonging to the same host. Note: this does not stop them from being fetched. bundleMaker.on('originalReceived', bundler.predicated(isSameHost, bundler.replaceImages)); bundleMaker.on('originalReceived', bundler.predicated(isSameHost, bundler.replaceJSFiles)); bundleMaker.on('originalReceived', bundler.predicated(isSameHost, bundler.replaceCSSFiles)); bundleMaker.on('originalReceived', bundler.predicated(isSameHost, bundler.replaceURLCalls)); if (config.useProxy) { bundleMaker.on('originalRequest', bundler.proxyTo(config.proxyAddress)); bundleMaker.on('resourceRequest', bundler.proxyTo(config.proxyAddress)); } // Clone some headers from the incoming request to go into the original request. bundleMaker.on('originalRequest', bundler.spoofHeaders(utils.extractHeaders(req, config.cloneHeaders))); // Set the Host header to the hostname of the requested site. // This handler is attached before the spoofHeaders handlers so that, if // a Host header is provided in the config, it will overwrite this one. bundleMaker.on('originalRequest', utils.spoofHostAsDestination(url)); bundleMaker.on('resourceRequest', utils.spoofHostAsDestination(url)); bundleMaker.on('originalRequest', utils.reverseProxy(remaps)); bundleMaker.on('resourceRequest', utils.reverseProxy(remaps)); // Spoof certain headers on every request. bundleMaker.on('originalRequest', bundler.spoofHeaders(config.spoofHeaders)); bundleMaker.on('resourceRequest', bundler.spoofHeaders(config.spoofHeaders)); bundleMaker.on('originalRequest', bundler.followRedirects( config.followFirstRedirect, config.followAllRedirects, config.redirectLimit)); bundleMaker.on('resourceReceived', bundler.bundleCSSRecursively); bundleMaker.on('originalRequest', printOptions); bundleMaker.on('resourceRequest', printOptions); // This is a patch over an issue where a site's own URL would come back through bundler // and it (the source in the diff) would be lost (made an empty string). // This was leading to an infinite loop of prepending the diff to the beginning of the // bundle in memory, causing an out-of-memory error. // TODO - Ideally this patch should be replaced with a complete fix. bundleMaker.on('diffsReceived', bundler.filterDiffs(function (source, diff) { return source.length > 0; })); if (ping) { res.writeHead(200, { 'Content-Type': 'text/html; charset=utf-8' }); res.write("OK"); res.end(); } else { bundleMaker.bundle(function(err, bundle) { if (err) { console.log('Failed to create bundle for ' + req.url); console.log('Error: ' + err.message); console.trace() renderErrorPage(req, res, err); } else { res.writeHead(200, { 'Content-Type': 'text/html; charset=utf-8' }); res.write(bundle); res.end(); } }); } }