Ejemplo n.º 1
0
Crawler.prototype.downloadAsset = function (url) {
  var urlTag = url.replace("#", "");
  if (this.visitedAssets[urlTag]) {
    this.next();
    return;
  }
  this.visitedAssets[urlTag] = true;
  var path = Path.resolve(Path.getDirFromUrl(url));
  var url  = Path.castToTopLevelURL(url);
  this.downloader.download({
    url: url, 
    path: path
  });
};
Ejemplo n.º 2
0
/*
 * Website Crawler
 */
function Crawler(options) {
  this.mode = options.mode || "crawl";
  this.pageLoadTimeout = options.pageLoadTimeout || 5000;
  this.onSuccess = options.onSuccess || function(){};
  this.onFailure = options.onFailure || function(){};
  this.downloadAssets = global.Cfg.downloadAssets || false;
  this.visitedURLs = {};
  this.visitedAssets = {};
  this.urlQueue = [];
  this.assetQueue = [];
  this.downloader = this.generateDownloader(options);
  options.list = options.list || [];
  for (var l in options.list) {
    var item = options.list[l];
    this.pushToQueue(Path.castToTopLevelURL(item));
  }
  this.next();
};
Ejemplo n.º 3
0
 urls.forEach(function (url) {
   if (url && !self.visitedURLs[url]) {
     self.pushToQueue(Path.castToTopLevelURL(url));
   }
 });