Example #1
0
 return new Promise((resolve, reject) => {
   xray(this.url, {
   metatags: xray('meta', [{
     name: '@name',
     description: '@content'
   }]),
   meta: 'meta'
   })((err, data) => {
     if (err) reject(err);
     let filtered = data.metatags.filter(d => d.name === 'keywords');
     filtered.length > 0 ? resolve(filtered[0].description.split(',')) : [];
   })
 })
Example #2
0
  return new Promise(function (resolve, reject) {
  xray(`http://finance.yahoo.com/q/op?s=${ticker}+Options`, {
      allExpiration: xray('.Start-0 option',
        [{
          value: '@value'
        }])
    })
  ((err, obj) => {
    // console.log(obj);
    resolve(obj);
    // return obj.allExpiration.map((res) => tickerScrape(ticker, res.value));
  })
})
var _translate = function(arg, callbackFn) {

        console.log("translate called");
        console.dir(arg);

        xray(SLOVICKA_CZ_EN_URL + '?q=' + arg.word)
            .select({
                $root: '#fastTrans',
                translations: ['#fastMeanings a']
            })
            .run(function(err, object) {
                console.info('run xray.. error: ', err, "object: ", object);
                console.dir(err);
                console.dir(object);

                // enrich object
                var translation = object;
                translation.word = arg.word;
                translation.from = arg.from;
                translation.to = arg.to;

                callbackFn(translation);
            });

};
Example #4
0
var futPlayer = function scrapePlayers(playerRating){
    var x = Xray();

        x('http://www.futhead.com/16/players/' + playerRating + '/test', 'body', [{
            player: 'h1 a',
            position: '.playercard-position',
            rating: '.playercard-rating',
            nation: '.playercard-nation img@src',
            club: '.playercard-club img@src',
            playerImage: '.playercard-picture img@src',
            workrates: '.playercard-workrates',
            stats: x('.card-large', {
              pace: '.playercard-attr1',
              shooting: '.playercard-attr2',
              passing: '.playercard-attr3',
              dribbling: '.playercard-attr4',
              defending: '.playercard-attr5',
              physical: '.playercard-attr6'
                })
           
    }])(function(err, title) {
      console.log("json file created for player with rank " + playerRating);
    }).write('results.json')

}
Example #5
0
	return new Promise(function(resolve, reject) {
		x('http://skateboarding.transworld.net/tag/wednesday-wallpaper', '.article-hover-deets', ['.view-more a@href'])
		(function(err, data) {
			if(err) return reject(err);
			resolve(data);
		});
	});
Example #6
0
File: index.js Project: apis-is/car
exports.is = function(carPlate, cb) {
  xray('http://www.samgongustofa.is/umferd/okutaeki/okutaekjaskra/uppfletting?vq=' + carPlate)
    .select([{
      $root: '.vehicleinfo ul',
      type: 'li:nth-child(1) span',
      subType: 'li:nth-child(1) span',
      color: 'li:nth-child(1) span',
      registryNumber: 'li:nth-child(2) span',
      number: 'li:nth-child(3) span',
      factoryNumber: 'li:nth-child(4) span',
      registeredAt: 'li:nth-child(5) span',
      pollution: 'li:nth-child(6) span',
      weight: 'li:nth-child(7) span',
      status: 'li:nth-child(8) span',
      nextCheck: 'li:nth-child(9) span'
    }])
    .run(function(err, array) {
      var cleaned = _.map(array, function(car) {
        car.type = car.type.substring(0,car.type.indexOf('-')-1);
        car.subType = car.subType.substring(car.subType.indexOf('-')+2,car.subType.indexOf('(')-1);
        car.color = car.color.substring(car.color.indexOf('(')+1,car.color.indexOf(')'));
       return car;
      });
      return cb(err, cleaned);
    });
};
Example #7
0
var futClubs = function scrapeClubs(club){
    var x = Xray();

        x('http://www.futhead.com/16/clubs/' + club + '/', '.player-page-listing', [{
            players: x('a',
                {
                    player: '.playercard-name',
                    position: '.playercard-position',
                    rating: '.playercard-rating',
                    nation: '.playercard-nation img@src',
                    club: '.playercard-club img@src',
                    playerImage: '.playercard-picture img@src',
                    workrates: '.playercard-workrates',
                    stats: x('.card-small', {
                      pace: '.playercard-attr1',
                      shooting: '.playercard-attr2',
                      passing: '.playercard-attr3',
                      dribbling: '.playercard-attr4',
                      defending: '.playercard-attr5',
                      physical: '.playercard-attr6'
                        })
                    })
           
           
           
    }])(function(err, title) {
      console.log("json file '" + club + "' created");
    }).write(club + '.json')

}
Example #8
0
app.get('/commits', function(req, res, next) {

  var scraper = new Xray();
  scraper('https://github.com/onedr0p/manage-this-node/commits/master',
    '.table-list-cell',
    [{
      title: '.commit-title',
      author: 'a.commit-author',
      date: 'time'
    }]
  )(function(err, result){
    var commitLog = [];
    _.forEach(result, function(n, key) {

      var commitTitle = n.title.trim();
      commitTitle = commitTitle.replace(/[^A-Za-z 0-9 \.,\?""!@#\$%\^&\*\(\)-_=\+;:<>\/\\\|\}\{\[\]`~]+/g, '');

      commitLog.push({
        title: commitTitle.trim(),
        author: n.author.trim(),
        date: n.date.trim()
      });
    });

    // send json to front-end
    res.send(commitLog);
  });
});
Example #9
0
 statesLinks.forEach(stateLink => {
   const state = lastPath(stateLink);
   if (federations.indexOf(state) === -1) {
     states.push(state);
     xRay(stateLink, '.continent', ['a@href'])
       .write(`happycow/states/${state}.json`);
   }
 });
Example #10
0
 .map(function(catObj) {
     // scrape promotion details page
     return [catObj, xray(catObj.details_link, {
         promo_details_title: '#merchant-detail h5',
         promo_details_description: '#merchant-detail p',
         promo_image: '#banner img@src'
     }).stream()];
 })
Example #11
0
.map(function(catObj){
    return [catObj, xray(catObj.href, 'ul.list2 li', [{
        title: 'span.promo-title',
        merchant: 'span.merchant-name',
        merchant_logo: 'img@src',
        valid_until: 'span.valid-until',
        details_link: 'a@href',
    }]).stream()];
}) // returns stream of [{title: 'title'}, {...}]
Example #12
0
exports.get = function(url,cb){
    xr(url)
        .select([{content:'.section-content',pics:'img[src]'}]).run(function(err,data){
        //console.log(data);
        var htmlstring = "";

        data.map(function(ele,index){
            //if(ele.content){
            
            
            //INSERT FUNCTION TO SEND ALL OF THIS CONTENT TO CLOUD MONGO TO BE FURTHER EVALUATED
            //******************************************************************************
            
            //SEND (USERNAME IN FUTURE),URL,TEXT,PICTURES - MAYBE WE SHOULD NUMBER THE ENTRIES IN ORDER TO BETTER KEEP THEM LINED UP?  IN 
            //THAT CASE WE WOULD NEED TO INCLUDE THE INDEX WITH THE SUBMISSION TO THE DB
            
            /*
            db.collection('magni').update({date:todaysDate},{$set:{content:ele.content,pics:ele.pics}},{upsert:true},function(err,res){
                //LOG THE RESULT
                //RETURN IF FINISHED
                
                if(index == data.length - 1){
                    return cb(null,'done')
                }
            })
            
            */
            
            //******************************************************************************
            
            
            console.log(ele.content)
                if(ele.content == undefined){
                    htmlstring += ''
                }
                else{
                    htmlstring += '<div><p>'+ele.content+'</p></div><br>'
                }
                if(index == data.length - 1){
                    //console.log(htmlstring)
                    block(htmlstring)
                }   
            //}
        })


        function block(htmlstring){
            //console.log(htmlstring)
            blockspring.runParsed("html-to-pdf",{"html":htmlstring},function(res){
            return cb(null,res.params.my_pdf)
        })   
        }


    })
}
Example #13
0
	return new Promise(function(resolve, reject) {
		x(url, 'body', [{
			title: '.main h1',
			dl: '.article-body a[href$=".jpg"]@href'
		}])
		(function(err, data) {
			if(err) return reject(err);
			resolve(data);
		});
	});
Example #14
0
var genericScrape = function(url, callback) {
  x(url, 'html', {
    title: 'title',
    links: 'a@href'
  })((err, obj) => {
    if(err || !obj) {
      callback(err)
      return;
    }
    callback(null, obj);
  })
}
Example #15
0
    run : function (queryUrl) {

        var self = this;

        xray(queryUrl)
            .select([{
                link: '.r a[href]'
            }])
            .paginate('#nav td:last-child a[href]')
            .limit(2)
            .run(function(error, results) {
                
                if (error) {
                    debug("Couldn't get page because of error: %s", error);
                    return;
                }

                if(results.length < 1) {
                    debug("No results from google");
                    return;
                }

                for(i in results) {

                    // Extract real target url
                    var url = results[i].link.replace("/url?q=", "").split("&")[0];
                    
                    /*
                     * Url filters
                     */
                    
                    if(self.config.filters && self.config.filters.url) {
                        
                        var filter;
                        for(var i in self.config.filters.url) {
                            
                            filter = require('./filters/url/' + self.config.filters.url[i]);
                            
                            if(!filter.validUrl(url)) {
                                continue;
                            }
                        }
                    }
                    
                    self.totalResults++;

                    self.crawl(url);
                }
            });
    },
      _init: function(res, username) {
        // Libraries
        const Xray = require('x-ray');
        this._moment = require('moment');
        this._xray = Xray();

        // Arguments
        this._res = res;
        this._username = username;

        this._scrapeTheFoodProviderWebsite();

        return this;
      },
Example #17
0
    return new Promise(function (resolve, reject) {
      var xray = Xray();
      xray(appUrl, 'link[rel=manifest]@href')(function(err, manifestTarget) {
        debug(err, manifestTarget);

        if (err) {
          return reject(err);
        }

        return resolve({
          manifestTarget: manifestTarget
        });
      });
    }).then(function (result) {
Example #18
0
exports.start = function() {
  xray(urlHost)
    .select([{
      $root: '.m-results-business',
      name: '.m-results-business--name a',
      address: '.m-results-business--address',
      url: '.m-results-business--online a',
      desc: '.m-results-business--services',
      services: '.m-services',
      openTime: '.m-opening-hours',
      coords: '.m-results-business--map-link[onclick]',
      tel: '.m-bip-otras-direcciones--telefonos p',
      img: '.media-container-img[src]',
      page: '.m-results-pagination li.last > a[href]'
    }])
    .paginate('.m-results-pagination li.last > a[href]')
    .limit(191)

    .run(function (err, json) {

      if (err) throw err;

      json.forEach(function (vet) {
        var error = false;
        if (vet.name) {
          vet.name = vet.name.replace(/(\r\n|\n|\r|\t)/gm, '').trim();
        }
        if (vet.address) {
          vet.address = vet.address.split(',')[0].trim() + ', ' + vet.address.split(',')[1].trim();
        }
        if (vet.desc) {
          vet.desc = vet.desc.replace(/(\r\n|\n|\r|\t)/gm, '').trim();
        }
        if (vet.services) {
          vet.services = vet.services.replace(/(\r\n|\n|\r|\t)/gm, '').trim();
        }
        if (vet.openTime) {
          vet.openTime = vet.openTime.replace(/(\r\n|\n|\r|\t)/gm, ' ');
        }
        if (vet.coords) {
          var lat = vet.coords.split('|')[2].split('&')[0].split(',')[0];
          var long = vet.coords.split('|')[2].split('&')[0].split(',')[1];
          vet.coords = [lat, long];
        }
        grabarVet(vet);

      });
    });
};
Example #19
0
internals.Odds.prototype.get = function (callback) {

    const xray = new Xray();
    const url = this.settings.url;
    const selector = this.settings.selector;

    xray(url, [selector])((err, results) => {

        if (err) {
            return callback(err);
        }

        const odds = this.settings.convert(results);
        return callback(null, odds);
    });
};
export function  search(req, res)
{
  var xray = new Xray();
  xray('http://www.tayara.tn/tunisie/'+req.params.name, '.item',
    [{
      Titre: '.item-img img@alt',
      Prix: '.price',
      Image: '.item-img img@src',
      alt: '.item-img img@alt'
    }]
  )(function(err, results){

    fs.writeFile("./output.json", JSON.stringify(results, null, '\t'));
    res.end();
  })

}
Example #21
0
const scrap = (restaurant, restaurantLink) => {
  xRay(restaurantLink, 'div@itemscope', {
    title: '.title h1@title',
    tags: ['.feature-row ul.tags li.label@title'],
    typeOfFood: 'div#typeOfFood',
    description: 'span[itemprop=description]',
    priceRange: 'span[itemprop=priceRange]',
    streetAddress: 'span[itemprop=streetAddress]',
    postalCode: 'span[itemprop=postalCode]',
    phone: 'span[itemprop=telephone] a@href',
    venueHours: '#venueHours p',
    listingFeatures: ['#listingFeatures ul li'],
    fb: '.add-list a.fb@href',
    gmaps: '.map-holder img@src'
  })
    .write(`happycow/restaurants/${restaurant}.json`);
}
Example #22
0
  function handleGet({ url }) {
    const x = xray();

    return new Promise((resolve, reject) => {
      const scrape = x(url, '.restaurant',
        {
          name: '.title-area h1',
          map: '.bb-show-map@href',
          rating: ['.rating li img@alt'],
          address: '.address-t-record',
          telephone: '.info-t-record strong',
          email: '.bb-contact span:nth-child(2)',
          webpage: '.bb-contact a@href',
          // price: x('.highlighted-box-right p:not(".download")',
          //           [{
          //             priceLabel: '.left',
          //             priceAmount: '.right',
          //           }]),
          // schedule: ['.extra-info.schedule p'],
          // details: x('#bb-tab-1 .data',
          //             [{
          //               detailLabel: '.first',
          //               detailText: '.second',
          //             }]),
          // specialties: x('#bb-tab-2 .data',
          //                 [{
          //                   specialtyLabel: '.first',
          //                   specialtyText: '.second',
          //                 }]),
          // services: ['.fullservices .icon img@alt'],
          // valoration: x('.tr-valorations .valoration-case',
          //               [{
          //                 valorationLabel: '.first p',
          //                 valorationText: '.valoration img@alt',
          //               }]),
          // tags: ['.tourist-record li'],
        });

      scrape((err, arr) => {
        if (err) reject(err);
        else resolve(arr);
      });
    })
    .catch(error => ({ error, source: 'handleGet' }));
  }
Example #23
0
var scrapeUrl = function(url, callback) {
  x(url, '.g', [{
    link: 'a@href',
  }]).paginate('#pnnext@href').limit(5)
  (function(err, obj) {
    if(err || !obj) {
      console.log('An exception occured.')
      callback(err);
      return;
    }
    obj.forEach((item) => {
      var start = item.link.indexOf('q=')
      var end = item.link.indexOf('&sa')
      item.link = item.link.substring(start + 2, end)
    })

    callback(null, obj)
  })
}
Example #24
0
      return new Promise(function (resolve, reject) {

        xray(`http://finance.yahoo.com/q/op?s=${ticker}+Options&date=${expiration}`, {
          stockLast: xray('.time_rtq_ticker span'),
          allExpiration: xray('.Start-0 option',
              [{
              unixEpoch: '@value',
              readableValue: ''
            }]),
          currentExpiration: [expiration],
          calls: xray('#optionsCallsTable .quote-table-overflow tr', 
            [{
              strikeValue: xray('td:nth-child(1) a  | trim'),
              // last: xray('td:nth-child(3) | trim'),
              // bid: xray('td:nth-child(4) | trim'),
              // ask: xray('td:nth-child(5) | trim'),
              // change: xray('td:nth-child(6) | trim'),
              // percentChange: xray('td:nth-child(7) | trim'),
              // volume: xray('td:nth-child(8) | trim'),
              // openInterest: xray('td:nth-child(9) | trim'),
              // impliedVolatility: xray('td:nth-child(10) | trim')
            }]
          ),
          puts: xray('#optionsPutsTable .quote-table-overflow tr', 
            [{
              strikeValue: xray('td:nth-child(1) a | trim'),
              // last: xray('td:nth-child(3) | trim'),
              // bid: xray('td:nth-child(4) | trim'),
              // ask: xray('td:nth-child(5) | trim'),
              // change: xray('td:nth-child(6) | trim'),
              // percentChange: xray('td:nth-child(7) | trim'),
              // volume: xray('td:nth-child(8) | trim'),
              // openInterest: xray('td:nth-child(9) | trim'),
              // impliedVolatility: xray('td:nth-child(10) | trim')
            }]
          ),
        })
        // .write('results.json');
        // .stream()
        ((err, data) => {
          console.log('exp', expiration); // this works
          if (err) reject(err);
          resolve(data);
      })
    })
Example #25
0
        const q = async.queue((subreddit, callback) => {
            xray(`http://www.reddit.com/r/${subreddit}/new/`)
                .select(["a.title[href]"])
                .throws(false)
                .paginate(".nextprev a:last-child[href]")
                .limit(config.reddit.maxPages)
                .run((err, links) => {
                    if(!!err) {
                        console.error(err);
                        return callback(err);
                    }

                    videoIds.push(..._.map(
                        _.filter(links, link => link.search(/youtube\.com(?:.*)v=([\w-]*?)(?:&|\/|$)/i) !== -1),
                        link => link.match(/youtube\.com(?:.*)v=([\w-]*?)(?:&|\/|$)/i)[1]
                    ));

                    callback();
                });
        }, config.reddit.concurrency);
Example #26
0
function handleGet({ url }) {
  const x = xray();

  return new Promise((resolve, reject) => {
    const scrape = x(url, '.pagination-first-line', ['a']);

    scrape((err, arr) => {
      if (err) reject(err);
      else {
        const urls = [];
        const pages = parseInt(arr[arr.length - 1], 10);
        for (let i = 1; i <= pages; i++) {
          urls.push(`http://www.viamichelin.es/web/Restaurantes/Restaurantes-Espana?page=${i}`);
        }
        resolve(urls);
      }
    });
  })
  .catch(error => ({ error, source: 'handleGet' }));
}
Example #27
0
  function handleGet({ url }) {
    const x = xray();

    return new Promise((resolve, reject) => {
      const scrape = x(url, '.poi-item',
        [{
          name: '.poi-item-name a',
          url: '.poi-item-name a@href',
          stars: ['.poi-item-stars .star@class'],
          bib: '.poi-item-stars .bib-gourmand@class',
          priceMin: '.poi-item-price em:first-child',
          priceMax: '.poi-item-price em:last-child',
          address: '.poi-item-address',
        }])
        .paginate('.pagination-current-page + a@href');

      scrape((err, arr) => {
        if (err) reject(err);
        else resolve(arr);
      });
    })
    .catch(error => ({ error, source: 'handleGet' }));
  }
Example #28
0
  return new Promise((resolve, reject) => {
    const xray = new Xray();

    xray(url, 'tr',
      [{
        class: '@class',
        tap: '.draft_tap',
        brewery: '.draft_brewery',
        beer: '.draft_name',
        pint: '.draft_price',
        growler: '.draft_growler',
        origin: '.draft_origin',
        abv: '.draft_abv',
      }]
    )((err, results) => {
      if (err) reject(err);

      const json = results && results
        .slice(0, results.length - 2)
        .map(result => {
          const returnObj = Object.assign({}, result);
          returnObj.classes = result.class.trim().split(' ');
          returnObj.abv = isNaN(result.abv) ? 0 : result.abv;

          const slashIndex = result.brewery.indexOf('/');
          if (slashIndex !== -1) {
            returnObj.brewery =
              `${result.brewery.slice(0, slashIndex)} / ${result.brewery.slice(slashIndex + 1)}`;
          }

          return returnObj;
        });

      resolve(JSON.stringify(json, null, '\t'));
    });
  });
Example #29
0
    static get(dateString, callback) {

        let dateComponents = dateString.split('-');
        if (dateComponents.length !== 3) {
            dateComponents = dateString.split('/');
            if (dateComponents.length !== 3) {
                throw `Invalid date format. Use 'YYYY-MM-DD'`;
            }
        }
        const year = dateComponents[0];
        const month = dateComponents[1];
        const day = dateComponents[2];

        const url = `https://www.mlb.com/probable-pitchers/${year}-${month}-${day}`;
        const scope = 'body';
        const selector = {
            pitchers: ['div.probable-pitchers__pitcher-name a@href'],
            names: ['div.probable-pitchers__pitcher-name a'],
            throws: ['div.probable-pitchers__pitcher-details span.probable-pitchers__pitcher-pitch-hand'],
            teams: ['div.probable-pitchers__team-names span.probable-pitchers__team-name--away, div.probable-pitchers__team-names span.probable-pitchers__team-name--home'],
            games: ['div.probable-pitchers__matchup@data-gamePk'],
            startTimes: ['div.probable-pitchers__game-details div.probable-pitchers__game-date-time time@dateitme'],
            easternTimes: ['div.pitcher@eastern_time'],
            timezones: ['div.pitcher@local_time_zone']
        };

        const x = Xray();
        x(url, scope, selector)((err, result) => {

            if (err) {
                return callback(err);
            }
            const matchups = Probables.convertResult(result);
            return callback(null, matchups);
        });
    }
Example #30
0
var Xray = require('x-ray');
var xray = Xray();
var request = require('superagent')
var util = require('util')
var Rx = require('rxjs')
var _ = require('lodash')
var Firebase = require('firebase')
var githubRef = new Firebase('https://biznobo-sandbox.firebaseio.com/github/users')
var Nightmare = require('nightmare');
var nightmare = Nightmare({ show: true })
var $ = require('jquery')
// nightmare
//   .goto('http://google.com')
//   // .type('input[title="Search"]', 'github nightmare')
//   // .click('#uh-search-button')
//   // .wait('#main')
//   .evaluate(function () {
//     console.log('hihihih');
//     console.log(document.querySelector('#main .searchCenterMiddle li a').href);
//     return document.querySelector('#main .searchCenterMiddle li a').href
//   })
//   // .end()
//   .then(function (result) {
//     console.log(result)
//   })
//
// nightmare.end()
//
var google = nightmare
  .goto('https://www.crunchbase.com/search')
  .wait('#founded_after')