Пример #1
0
function scrapStationDetail(){
	console.log('station detail')
	if(!stations_url.length){
		makegeoJSON();
		return;
	}
	var station_url = stations_url.pop();
	scrap('http://www.vigicrues.gouv.fr/'+station_url.url+'&ong=3', function(err,$){
		 var spans = $('.contenu_cadre  p span'); 
		 var txt =  $('.contenu_cadre div').text();
		 var reg = /\r\n\t  Station : \r\n\t  (.*)\r\n\t  Département : \r\n\t  ([0-9]{2})\r\n\t   \r\n\r\n\t  Cours d\'eau : \r\n\t  (.*)\r\n\t   \r\n\r\n\t  Coordonnées Lambert II : \r\n\t  X=([0-9]+) m, Y=([0-9]+)/m
		 //var reg =/.*Station.*/
		 var matches = txt.match(reg);
		 var lambert2 ='+proj=lcc +lat_1=46.8 +lat_0=46.8 +lon_0=0 +k_0=0.99987742 +x_0=600000 +y_0=2200000 +a=6378249.2 +b=6356515 +towgs84=-168,-60,320,0,0,0,0 +pm=paris +units=m +no_defs ';
		 console.log([parseInt(matches[4],10),parseInt(matches[5],10)])
		 var latlng = proj4(lambert2).inverse([parseInt(matches[4],10),parseInt(matches[5],10)]);
		 var lng = latlng[0];
		 var lat = latlng[1]; 
		 var station ={
			 bassin:station_url.bassin,
			 url:'http://www.vigicrues.gouv.fr/'+station_url.url,
			 station:matches[1],
			 department:matches[2],
			 river:matches[3],
			 X:matches[4],
			 Y:matches[5],
			 lat:lat,
			 lng:lng

		 } 
		 console.log(station)
		 stations.push(station);
		 scrapStationDetail();
	});
}
Пример #2
0
function getMessage(msg, callback)
{
	var url = msg.url;
	var date =msg.date;
	var output = [];

	scrap({url: url, method: 'POST', jar:cookieJar, encoding:'iso-8859-1'}, function(err,$,code,html,resp)
	{
		var asunto = "";
		$('.legendFicha').each(function(index)
		{
			if(this.text().match(/Asunto:/))
			asunto = this.next(false).text().trim();
		})
		var body = $('#cuerpoMensaje').html();
		var matches = body.match(/(https?:\/\/www.youtube.com\/watch\?v=[^\s|\^"<]+)/g);

		if( matches )
		{
			// make them unique
			matches = matches.filter(function(item, pos) {
	    		return matches.indexOf(item) == pos;
			});

			output.push( renderTitle(asunto, date) );
			output.push( "<div>" );
			matches.forEach(function(m)
			{
				output.push( renderLink(m) );
			})			
			output.push( "</div>" );
		}
		callback(null, output.join('\n'));
	});
}
		fs.readFile(configPath, function (err, contents) {
			if (err) { return loaded_callback(err); }

			var config;

			try { config = JSON.parse(contents, 'utf8'); }
			catch (loadErr) { return loaded_callback(loadErr); }

			if (!config || !config.rules) { return loaded_callback(new Error("Invalid configuration file")); }

			// Grab the eslint rules page
			scrap(eslintRulesPage, function (err, $) {
				if (err) { return loaded_callback(err); }

				// Scrap rule categories
				async.each($('h2'), function (item, headerProcessed_callback) {
					item = $(item);

					var headerText = item.text();
					var summary = item.next('p');
					var category = {
						name: headerText,
						description: summary.text(),
						rules: []
					};
					templateContext.categories.push(category);

					// Scrap rules for this category
					async.eachSeries(summary.next().find('li'), function (rule, ruleProcessed_callback) {
						rule = $(rule);

						var ruleParts = rule.text().trim().split(' - ');
						var ruleName = ruleParts[0];
						var configRule = config.rules[ruleName];

						category.rules.push({
							used: configRule && configRule.length ? configRule[0] : configRule,
							value: configRule,
							name: ruleName,
							link: eslintRulesPage + ruleName + ".html",
							description: ruleParts[1]
						});

						ruleProcessed_callback(null);
					}, headerProcessed_callback);
				}, function (err) {
					if (err) { return loaded_callback(err); }

					loaded_callback(null, templateContext);
				});
			});
		});
Пример #4
0
function scrapBassins(){
	scrap('http://www.vigicrues.gouv.fr/niv2.php', function(err, $) {
	  var links = $('.contenu_cadre a');
	  links.each(function(pos,item){
		  
		  bassins.push({
			  url:item.attribs.href,
			  text:item.children[0].data
		  });
	  }); 
	  scrapStations();
	});
}
Пример #5
0
	$( '[id$=_member]>div:first-child' ).each( function() {
		i++;
		// console.log( i + ": " + $( this ).html() ); 
		var minder = {};
		minder.name = $( this ).find( 'h2' ).first().text();
		minder.membership = $( this ).find( 'h4' ).text();
		minder.url = 'http://www.ichild.co.uk' + $( this ).find( 'a' ).attr( 'href' );
		// /directory/childcare/registered_childminders/manchester/M21/clairelmackie/
		minder.postcode = 'Unknown';
		if ( $( this ).find( 'a' ).length )
			minder.postcode = $( this ).find( 'a' ).attr( 'href' ).match( /([a-z]+[0-9]+)/i )[1];
		minder.distance = 'Unknown';
		if ( $( this ).parent().find( 'div.grid_3 h4:contains("miles")' ).length ) {
			if ( matches = $( this ).parent().find( 'div.grid_3 h4:contains("miles")' ).text().match( /(\d+\.\d+)/i ) )
				minder.distance = matches[1];
		}
		minder.vacancy = "No";
		if ( $( this ).parent().find( 'div.grid_3 .vacancy' ).length )
			minder.vacancy = "Yes";
		scrap(minder.url, function(err, $) {
			if ( err ) {
				console.log( err.code + " - " + minder.url );
				return;
			}

			// console.log(  );
			$( '#view_listing h3:contains("Childcare Offered")' ).siblings( 'ul' ).find( 'li' ).each( function() {
				minder[ $( this ).find( 'h4' ).text() ] = 'No';
				if ( $( this ).find( '.checkbox_ticked' ) )
					minder[ $( this ).find( 'h4' ).text() ] = 'Yes';
			} );
			$( '#view_listing .details ul li' ).each( function() {
				minder[ $( this ).find( 'h6' ).text() ] = 'No';
				if ( $( this ).find( '.checkbox_ticked' ) )
					minder[ $( this ).find( 'h6' ).text() ] = 'Yes';
			} );
			minder.fees_statement = $( '.home_section_title:contains("&pound;")' ).text().replace( '\n', '' ).replace( '\r', '' ).replace( /\s+/g, ' ' ).trim();
			$( 'strong:contains("Current vacancies:")' ).parent().find( 'div,span' )
			minder.current_baby_vacancies = 0;
			if ( $( 'span[style*="#FF809F"]' ).first().length )
				minder.current_baby_vacancies = $( 'span[style*="#FF809F"]' ).first().text().match( /(\d+)/i )[1];
			// if ( $( 'span[style*="#FF809F"]' ) )
			console.log( JSON.stringify( minder ) + ',' );
		});
		minders.push( minder );
	} );
Пример #6
0
function scrapStations(){
	console.log('station ')
	
	if(!bassins.length){
		scrapStationDetail();
		return;
	}
	var bassin = bassins.pop();
	
	scrap('http://www.vigicrues.gouv.fr/'+bassin.url, function(err,$){
		$('area[shape=circle]')
			.each(function(pos,area){
				stations_url.push({
					url:area.attribs.href,
					bassin:bassin.text
				})
			});
			scrapStations();
	});
}
Пример #7
0
function getPageMessages(pagina, callback)
{
	headers['X-AjaxPro-Method'] = 'listaMensajes';
	var payload = '{"sControls":"<root><carpeta>1</carpeta><filtro></filtro><noLeidos>false</noLeidos><pagina>' + pagina + '</pagina><orden>fecha</orden><sentido>DESC</sentido></root>"}';

	scrap({url: messagesUrl, preParse:preParseValue, method: 'POST', jar:cookieJar, headers:headers, body:payload }, function(err,$,code,html,resp)
	{
		var elements = $("tr td[onclick]");
		var links = [];
		elements.each(function(index)
		{	
			if(index % 3 == 2)
			{
				var el = this;
				var onclick = $(el).attr('onclick').trim();
				var url = onclick.match(/location.href='(.+)'/)[1];
				var date = $(el).text();

				links.push({ url: oneMessageUrl + url, date: date });
			}
		});
		callback(null,links);
	});	
}
Пример #8
0
  var iterator = function(term, href, depth, done) {
    var url;
    var parentNode;
    var newEdge = false;

    if (arguments.length === 2) {
      depth = 0;
      done = href;
      url = 'http://LANG.wikipedia.org/w/index.php?search=TERM'
        .replace('LANG', options.language)
        .replace('TERM', encodeURIComponent(term));
    } else {
      newEdge = true;
      parentNode = term;
      url = 'http://LANG.wikipedia.orgHREF'
        .replace('LANG', options.language)
        .replace('HREF', href);
    }

    scrap(url, function(err, $) {
      if (err) {
        logger.error('Error on requesting', {internal: err});
        return done();
      }

      var count = 0;
      var newNode;

      try {
        newNode = $('#firstHeading span').text();
      } catch(err) {
        logger.error('Error on scrapping', {internal: err});
        return done();
      }

      if (depth > options.depth) {
        return done();
      }

      emitter.emit('new node', newNode);

      if (newEdge) {
        emitter.emit('new edge', {
          from: parentNode,
          to: newNode
        });
      }

      var related = [];

      $('#mw-content-text p a').each(function(i, link) {
        if (parseInt(options.relation, 10) === count) return false;
        if ($(link).text().charAt(0) !== '[') {
          count++;
          related.push($(link).attr('href'));
        }
      });

      async.each(
        related,
        function(relatedNodeHref, callback) {
          iterator(newNode, relatedNodeHref, depth + 1, callback);
        },
        done
      );
    });
  };
Пример #9
0
scrap({url:loginUrl, jar:cookieJar}, function(err,$)
{
	var form = {};

	$('[type=hidden]').each(function(i,el)
	{
		var key = $(el).attr('name');
		var val = $(el).attr('value');

		form[key] = val;
	});

	form.txtUserName = secret.name;
	form.txtUserPass = secret.pwd;
	form.__EVENTTARGET = 'cmdLogin';
	form.__EVENTARGUMENT = undefined;

	request.post({url:loginUrl, jar:cookieJar, form: form, headers:headers, followAllRedirects:true}, function(err,response,body)
	{
		renderPageHeader();

		var paginas = [1,2];

		async.map(paginas, getPageMessages, function(err,messages)
		{
			// flatten array and filter
			messages = [].concat.apply([],messages).filter(function(m)
			{
				var components = m.date.split(' ');
				var c1 = components[0].split('/');
				var c2 = components[1].split(':');
				var date = new Date( c1[2], c1[1]-1, c1[0], c2[0], c2[1])
				return date >= new Date(2015,8,1) // 01/Sep/2015
			});			
			
			async.map(messages, getMessage, function(err,outputs)
			{
				//console.log(outputs.length);

				outputs.forEach(function(output)
				{ 
					if(output.length) 
						console.log(output); 
				});

				renderPageFooter();
			})
		});
	});
});
Пример #10
0
var hn = function(bot, data, nick, args, end) {

  var param = args[0];

  if (args.length > 1 || (param !== 'top' && param !== 'random')) {
    bot.message('Comando inválido. Exemplos: !hn top | !hn random');
    return false;
  }

  var hackerNewsURL = 'https://news.ycombinator.com/';

  scrap(hackerNewsURL, function(err, $) {
    if (err) {
      bot.message('Erro ao requisitar Hacker News');
      return false;
    }

    var news = {};
    var currentItem = {};

    $('tr').each(function() {
      if (currentItem.done) {
        news[currentItem.points] = currentItem;
        currentItem = {};
      }

      // scraping do título e da URL da notícia

      if ($(this).find('td.title a').length === 1) {
        currentItem.title = $(this).find('td.title a').text();
        currentItem.url = $(this).find('td.title a').attr('href');
      }

      if ($(this).find('td.subtext').length === 1) {

        // scraping da thread da notícia

        var threadURI = $(this).find('td.subtext a').next().attr('href');
        currentItem.thread = hackerNewsURL + threadURI;

        // scraping da quantidade de pontos e comentários

        var subtext = $(this).find('td.subtext').text().split(' ');
        currentItem.points = parseInt(subtext[0]);
        if (!isNaN(subtext[subtext.length - 2])) {
          currentItem.comments = parseInt(subtext[subtext.length - 2]);
        } else {
          currentItem.comments = 0;
        }

        // terminando de "parsear" a notícia

        currentItem.done = true;
      }
    });

    var printItem = function(item) {
      bot.message(item.title + ' (' + item.url + ')');
      bot.message('HN: ' + item.thread);
      bot.message('Pontos: ' + item.points + ' | Comentários: ' + item.comments);
      end();
    };

    var keyPoints = Object.keys(news);

    if (param === 'top') {
      var topItemKey = keyPoints[keyPoints.length - 1];
      var topItem = news[topItemKey];
      printItem(topItem);
    } else {
      var randomKey = keyPoints[Math.floor(Math.random() * keyPoints.length)];
      var randomItem = news[randomKey];
      printItem(randomItem);
    }
  });

};