// print start message function onStart() { paused = false; this.log.info( '[%s] started on %s', ansi(process.pid).blue, ansi(new Date()).yellow); }
/** * Handle the start signal. */ function start(nm) { if(paused === true) { this.log.info( '[%s] started by %s at %s', ansi(process.pid).blue, ansi(nm).underline, ansi(new Date()).yellow); crawler.start(); // allow the process to exit process.stdin.pause(); }else{ this.log.warn('[%s] cannot start from %s, not paused', ansi(process.pid).blue, ansi(nm).underline); } // NOTE: the paused flag will be set in the `crawlstart` listener }
/** * Handle the stop signal. */ function stop(nm) { if(paused === false) { this.log.info( '[%s] stopped by %s at %s', ansi(process.pid).blue, ansi(nm).underline, ansi(new Date()).yellow); crawler.stop(); // otherwise the process might exit process.stdin.resume(); }else{ this.log.warn('[%s] cannot stop from %s, already paused', ansi(process.pid).blue, ansi(nm).underline); } paused = true; }
return function onFetchComplete(item, buf) { var contentType = item.stateData.contentType , file = tempfile('.html') , output = file + '.json' , cmd , opts , result , pass = false , map = {} , args = [] , format = 'json' , json = this.json; function cleanFile() { fs.unlinkSync(file); files.pop(); } function cleanOutput() { fs.unlinkSync(output); files.pop(); } this.log.debug('test content type %s with pattern %s', contentType, contentPattern); if(!contentPattern.test(contentType)) { return this.log.warn( 'invalid content type %s from %s (skipped)', ansi(contentType).underline, ansi(item.url).underline); } files.push(output, file); /* istanbul ignore else: tough to mock no buffer error */ if(buf && buf.length) { if(this.errorsOnly) { args.push('--errors-only'); } if(this.format) { format = this.format; } args.push('--format', format); // support concat of remaining args, ie: -- -Xss512M if(req.result.skip && req.result.skip.length) { args = args.concat(req.result.skip); } this.log.debug('write file: %s', file); fs.writeFileSync(file, buf); // track files so we can map // .html file paths to the remote URL map['file:' + file] = { url: item.url, output: output, item: item } // setup command options args = util.format(' %s ', args.join(' ')); cmd = util.format( 'java -jar %s%s %s', info.jar, args, file); opts = {env: process.env, stdio: [0, 1, fs.openSync(output, 'w')]}; this.log.debug('%s', cmd); try { execSync(cmd, opts); cleanFile(); }catch(e) { pass = e; cleanFile(); this.log.error('validation failed on %s', item.url); } // read in validation output and parse try { result = '' + fs.readFileSync(output); cleanOutput(); }catch(e) { /* istanbul ignore next: not going to mock io error */ cleanOutput(); /* istanbul ignore next: not going to mock io error */ this.log.error('failed to read output file %s', output); } if(!userFormat && result && format === 'json') { try { result = JSON.parse(result); }catch(e) { // TODO: dump unparsed result data to a log file // TODO: something likely went very wrong /* istanbul ignore next: not going to mock parse error */ this.log.error('failed to parse output file %s', output); } // handle validation result /* istanbul ignore else: tough to mock bad output from validator */ if(result && result.messages) { if(!result.messages.length) { //pass = true; this.log.info('validation passed %s', ansi(item.url).green); }else{ if(json) { printJson(req, item, result); }else{ printer.call(this, result, map); } if(abort) { this.raise(this.errors.EVALIDATE_ABORT); /* istanbul ignore next: always in test env */ if(env.test) { return crawler.stop(); } } } } // can't handle this format }else{ if(json) { printJson(req, item, result); }else{ req.stdout.write(item.url + '\n'); req.stdout.write(result); req.stdout.write('\n'); } if((pass instanceof Error) && abort) { this.raise(this.errors.EVALIDATE_ABORT); /* istanbul ignore next: always in test env */ if(env.test) { return crawler.stop(); } } } }else{ this.log.warn('no buffer available for %s', item.url); } }