Ejemplo n.º 1
0
 // print start message
 function onStart() {
   paused = false;
   this.log.info(
     '[%s] started on %s',
     ansi(process.pid).blue,
     ansi(new Date()).yellow);
 }
Ejemplo n.º 2
0
  /**
   *  Handle the start signal.
   */
  function start(nm) {
    if(paused === true) {
      this.log.info(
        '[%s] started by %s at %s',
        ansi(process.pid).blue,
        ansi(nm).underline,
        ansi(new Date()).yellow);
      crawler.start();
      // allow the process to exit
      process.stdin.pause();
    }else{
      this.log.warn('[%s] cannot start from %s, not paused',
        ansi(process.pid).blue,
        ansi(nm).underline);
    }

    // NOTE: the paused flag will be set in the `crawlstart` listener
  }
Ejemplo n.º 3
0
  /**
   *  Handle the stop signal.
   */
  function stop(nm) {
    
    if(paused === false) {
      this.log.info(
        '[%s] stopped by %s at %s',
        ansi(process.pid).blue,
        ansi(nm).underline,
        ansi(new Date()).yellow);
      crawler.stop();
      // otherwise the process might exit
      process.stdin.resume();
    }else{
      this.log.warn('[%s] cannot stop from %s, already paused',
        ansi(process.pid).blue,
        ansi(nm).underline);
    }

    paused = true;
  }
Ejemplo n.º 4
0
  return function onFetchComplete(item, buf) {
    var contentType = item.stateData.contentType
      , file = tempfile('.html')
      , output = file + '.json'
      , cmd
      , opts
      , result
      , pass = false
      , map = {}
      , args = []
      , format = 'json'
      , json = this.json;

    function cleanFile() {
      fs.unlinkSync(file);
      files.pop();
    }

    function cleanOutput() {
      fs.unlinkSync(output);
      files.pop();
    }

    this.log.debug('test content type %s with pattern %s',
      contentType, contentPattern);

    if(!contentPattern.test(contentType)) {
      return this.log.warn(
        'invalid content type %s from %s (skipped)',
        ansi(contentType).underline,
        ansi(item.url).underline);
    }

    files.push(output, file);

    /* istanbul ignore else: tough to mock no buffer error */
    if(buf && buf.length) {
      if(this.errorsOnly) {
        args.push('--errors-only'); 
      }

      if(this.format) {
        format = this.format; 
      }

      args.push('--format', format);

      // support concat of remaining args, ie: -- -Xss512M
      if(req.result.skip && req.result.skip.length) {
        args = args.concat(req.result.skip); 
      }

      this.log.debug('write file: %s', file);
      fs.writeFileSync(file, buf);

      // track files so we can map
      // .html file paths to the remote URL
      map['file:' + file] = {
        url: item.url,
        output: output,
        item: item
      }

      // setup command options
      args = util.format(' %s ', args.join(' '));
      cmd = util.format(
        'java -jar %s%s %s', info.jar, args, file);
      opts = {env: process.env, stdio: [0, 1, fs.openSync(output, 'w')]};
      this.log.debug('%s', cmd);

      try {
        execSync(cmd, opts);
        cleanFile();
      }catch(e) {
        pass = e;
        cleanFile();
        this.log.error('validation failed on %s', item.url); 
      }

      // read in validation output and parse
      try {
        result = '' + fs.readFileSync(output); 
        cleanOutput();
      }catch(e) {
        /* istanbul ignore next: not going to mock io error */
        cleanOutput();
        /* istanbul ignore next: not going to mock io error */
        this.log.error('failed to read output file %s', output); 
      }

      if(!userFormat && result && format === 'json') {
        try {
          result = JSON.parse(result); 
        }catch(e) {
        
          // TODO: dump unparsed result data to a log file
          // TODO: something likely went very wrong

          /* istanbul ignore next: not going to mock parse error */
          this.log.error('failed to parse output file %s', output); 
        }

        // handle validation result
        /* istanbul ignore else: tough to mock bad output from validator */
        if(result && result.messages) {
          if(!result.messages.length) {
            //pass = true;
            this.log.info('validation passed %s', ansi(item.url).green); 
          }else{
            if(json) {
              printJson(req, item, result);
            }else{
              printer.call(this, result, map);
            }

            if(abort) {
              this.raise(this.errors.EVALIDATE_ABORT); 
              /* istanbul ignore next: always in test env */
              if(env.test) {
                return crawler.stop();
              }
            }
          }
        }
      // can't handle this format
      }else{
        if(json) {
          printJson(req, item, result);
        }else{
          req.stdout.write(item.url + '\n'); 
          req.stdout.write(result); 
          req.stdout.write('\n'); 
        }
        if((pass instanceof Error) && abort) {
          this.raise(this.errors.EVALIDATE_ABORT); 
          /* istanbul ignore next: always in test env */
          if(env.test) {
            return crawler.stop();
          }
        }
      }
    }else{
      this.log.warn('no buffer available for %s', item.url);
    }
  }