return new Promise((resolve, reject) => { xray(this.url, { metatags: xray('meta', [{ name: '@name', description: '@content' }]), meta: 'meta' })((err, data) => { if (err) reject(err); let filtered = data.metatags.filter(d => d.name === 'keywords'); filtered.length > 0 ? resolve(filtered[0].description.split(',')) : []; }) })
return new Promise(function (resolve, reject) { xray(`http://finance.yahoo.com/q/op?s=${ticker}+Options`, { allExpiration: xray('.Start-0 option', [{ value: '@value' }]) }) ((err, obj) => { // console.log(obj); resolve(obj); // return obj.allExpiration.map((res) => tickerScrape(ticker, res.value)); }) })
var _translate = function(arg, callbackFn) { console.log("translate called"); console.dir(arg); xray(SLOVICKA_CZ_EN_URL + '?q=' + arg.word) .select({ $root: '#fastTrans', translations: ['#fastMeanings a'] }) .run(function(err, object) { console.info('run xray.. error: ', err, "object: ", object); console.dir(err); console.dir(object); // enrich object var translation = object; translation.word = arg.word; translation.from = arg.from; translation.to = arg.to; callbackFn(translation); }); };
var futPlayer = function scrapePlayers(playerRating){ var x = Xray(); x('http://www.futhead.com/16/players/' + playerRating + '/test', 'body', [{ player: 'h1 a', position: '.playercard-position', rating: '.playercard-rating', nation: '.playercard-nation img@src', club: '.playercard-club img@src', playerImage: '.playercard-picture img@src', workrates: '.playercard-workrates', stats: x('.card-large', { pace: '.playercard-attr1', shooting: '.playercard-attr2', passing: '.playercard-attr3', dribbling: '.playercard-attr4', defending: '.playercard-attr5', physical: '.playercard-attr6' }) }])(function(err, title) { console.log("json file created for player with rank " + playerRating); }).write('results.json') }
return new Promise(function(resolve, reject) { x('http://skateboarding.transworld.net/tag/wednesday-wallpaper', '.article-hover-deets', ['.view-more a@href']) (function(err, data) { if(err) return reject(err); resolve(data); }); });
exports.is = function(carPlate, cb) { xray('http://www.samgongustofa.is/umferd/okutaeki/okutaekjaskra/uppfletting?vq=' + carPlate) .select([{ $root: '.vehicleinfo ul', type: 'li:nth-child(1) span', subType: 'li:nth-child(1) span', color: 'li:nth-child(1) span', registryNumber: 'li:nth-child(2) span', number: 'li:nth-child(3) span', factoryNumber: 'li:nth-child(4) span', registeredAt: 'li:nth-child(5) span', pollution: 'li:nth-child(6) span', weight: 'li:nth-child(7) span', status: 'li:nth-child(8) span', nextCheck: 'li:nth-child(9) span' }]) .run(function(err, array) { var cleaned = _.map(array, function(car) { car.type = car.type.substring(0,car.type.indexOf('-')-1); car.subType = car.subType.substring(car.subType.indexOf('-')+2,car.subType.indexOf('(')-1); car.color = car.color.substring(car.color.indexOf('(')+1,car.color.indexOf(')')); return car; }); return cb(err, cleaned); }); };
var futClubs = function scrapeClubs(club){ var x = Xray(); x('http://www.futhead.com/16/clubs/' + club + '/', '.player-page-listing', [{ players: x('a', { player: '.playercard-name', position: '.playercard-position', rating: '.playercard-rating', nation: '.playercard-nation img@src', club: '.playercard-club img@src', playerImage: '.playercard-picture img@src', workrates: '.playercard-workrates', stats: x('.card-small', { pace: '.playercard-attr1', shooting: '.playercard-attr2', passing: '.playercard-attr3', dribbling: '.playercard-attr4', defending: '.playercard-attr5', physical: '.playercard-attr6' }) }) }])(function(err, title) { console.log("json file '" + club + "' created"); }).write(club + '.json') }
app.get('/commits', function(req, res, next) { var scraper = new Xray(); scraper('https://github.com/onedr0p/manage-this-node/commits/master', '.table-list-cell', [{ title: '.commit-title', author: 'a.commit-author', date: 'time' }] )(function(err, result){ var commitLog = []; _.forEach(result, function(n, key) { var commitTitle = n.title.trim(); commitTitle = commitTitle.replace(/[^A-Za-z 0-9 \.,\?""!@#\$%\^&\*\(\)-_=\+;:<>\/\\\|\}\{\[\]`~]+/g, ''); commitLog.push({ title: commitTitle.trim(), author: n.author.trim(), date: n.date.trim() }); }); // send json to front-end res.send(commitLog); }); });
statesLinks.forEach(stateLink => { const state = lastPath(stateLink); if (federations.indexOf(state) === -1) { states.push(state); xRay(stateLink, '.continent', ['a@href']) .write(`happycow/states/${state}.json`); } });
.map(function(catObj) { // scrape promotion details page return [catObj, xray(catObj.details_link, { promo_details_title: '#merchant-detail h5', promo_details_description: '#merchant-detail p', promo_image: '#banner img@src' }).stream()]; })
.map(function(catObj){ return [catObj, xray(catObj.href, 'ul.list2 li', [{ title: 'span.promo-title', merchant: 'span.merchant-name', merchant_logo: 'img@src', valid_until: 'span.valid-until', details_link: 'a@href', }]).stream()]; }) // returns stream of [{title: 'title'}, {...}]
exports.get = function(url,cb){ xr(url) .select([{content:'.section-content',pics:'img[src]'}]).run(function(err,data){ //console.log(data); var htmlstring = ""; data.map(function(ele,index){ //if(ele.content){ //INSERT FUNCTION TO SEND ALL OF THIS CONTENT TO CLOUD MONGO TO BE FURTHER EVALUATED //****************************************************************************** //SEND (USERNAME IN FUTURE),URL,TEXT,PICTURES - MAYBE WE SHOULD NUMBER THE ENTRIES IN ORDER TO BETTER KEEP THEM LINED UP? IN //THAT CASE WE WOULD NEED TO INCLUDE THE INDEX WITH THE SUBMISSION TO THE DB /* db.collection('magni').update({date:todaysDate},{$set:{content:ele.content,pics:ele.pics}},{upsert:true},function(err,res){ //LOG THE RESULT //RETURN IF FINISHED if(index == data.length - 1){ return cb(null,'done') } }) */ //****************************************************************************** console.log(ele.content) if(ele.content == undefined){ htmlstring += '' } else{ htmlstring += '<div><p>'+ele.content+'</p></div><br>' } if(index == data.length - 1){ //console.log(htmlstring) block(htmlstring) } //} }) function block(htmlstring){ //console.log(htmlstring) blockspring.runParsed("html-to-pdf",{"html":htmlstring},function(res){ return cb(null,res.params.my_pdf) }) } }) }
return new Promise(function(resolve, reject) { x(url, 'body', [{ title: '.main h1', dl: '.article-body a[href$=".jpg"]@href' }]) (function(err, data) { if(err) return reject(err); resolve(data); }); });
var genericScrape = function(url, callback) { x(url, 'html', { title: 'title', links: 'a@href' })((err, obj) => { if(err || !obj) { callback(err) return; } callback(null, obj); }) }
run : function (queryUrl) { var self = this; xray(queryUrl) .select([{ link: '.r a[href]' }]) .paginate('#nav td:last-child a[href]') .limit(2) .run(function(error, results) { if (error) { debug("Couldn't get page because of error: %s", error); return; } if(results.length < 1) { debug("No results from google"); return; } for(i in results) { // Extract real target url var url = results[i].link.replace("/url?q=", "").split("&")[0]; /* * Url filters */ if(self.config.filters && self.config.filters.url) { var filter; for(var i in self.config.filters.url) { filter = require('./filters/url/' + self.config.filters.url[i]); if(!filter.validUrl(url)) { continue; } } } self.totalResults++; self.crawl(url); } }); },
_init: function(res, username) { // Libraries const Xray = require('x-ray'); this._moment = require('moment'); this._xray = Xray(); // Arguments this._res = res; this._username = username; this._scrapeTheFoodProviderWebsite(); return this; },
return new Promise(function (resolve, reject) { var xray = Xray(); xray(appUrl, 'link[rel=manifest]@href')(function(err, manifestTarget) { debug(err, manifestTarget); if (err) { return reject(err); } return resolve({ manifestTarget: manifestTarget }); }); }).then(function (result) {
exports.start = function() { xray(urlHost) .select([{ $root: '.m-results-business', name: '.m-results-business--name a', address: '.m-results-business--address', url: '.m-results-business--online a', desc: '.m-results-business--services', services: '.m-services', openTime: '.m-opening-hours', coords: '.m-results-business--map-link[onclick]', tel: '.m-bip-otras-direcciones--telefonos p', img: '.media-container-img[src]', page: '.m-results-pagination li.last > a[href]' }]) .paginate('.m-results-pagination li.last > a[href]') .limit(191) .run(function (err, json) { if (err) throw err; json.forEach(function (vet) { var error = false; if (vet.name) { vet.name = vet.name.replace(/(\r\n|\n|\r|\t)/gm, '').trim(); } if (vet.address) { vet.address = vet.address.split(',')[0].trim() + ', ' + vet.address.split(',')[1].trim(); } if (vet.desc) { vet.desc = vet.desc.replace(/(\r\n|\n|\r|\t)/gm, '').trim(); } if (vet.services) { vet.services = vet.services.replace(/(\r\n|\n|\r|\t)/gm, '').trim(); } if (vet.openTime) { vet.openTime = vet.openTime.replace(/(\r\n|\n|\r|\t)/gm, ' '); } if (vet.coords) { var lat = vet.coords.split('|')[2].split('&')[0].split(',')[0]; var long = vet.coords.split('|')[2].split('&')[0].split(',')[1]; vet.coords = [lat, long]; } grabarVet(vet); }); }); };
internals.Odds.prototype.get = function (callback) { const xray = new Xray(); const url = this.settings.url; const selector = this.settings.selector; xray(url, [selector])((err, results) => { if (err) { return callback(err); } const odds = this.settings.convert(results); return callback(null, odds); }); };
export function search(req, res) { var xray = new Xray(); xray('http://www.tayara.tn/tunisie/'+req.params.name, '.item', [{ Titre: '.item-img img@alt', Prix: '.price', Image: '.item-img img@src', alt: '.item-img img@alt' }] )(function(err, results){ fs.writeFile("./output.json", JSON.stringify(results, null, '\t')); res.end(); }) }
const scrap = (restaurant, restaurantLink) => { xRay(restaurantLink, 'div@itemscope', { title: '.title h1@title', tags: ['.feature-row ul.tags li.label@title'], typeOfFood: 'div#typeOfFood', description: 'span[itemprop=description]', priceRange: 'span[itemprop=priceRange]', streetAddress: 'span[itemprop=streetAddress]', postalCode: 'span[itemprop=postalCode]', phone: 'span[itemprop=telephone] a@href', venueHours: '#venueHours p', listingFeatures: ['#listingFeatures ul li'], fb: '.add-list a.fb@href', gmaps: '.map-holder img@src' }) .write(`happycow/restaurants/${restaurant}.json`); }
function handleGet({ url }) { const x = xray(); return new Promise((resolve, reject) => { const scrape = x(url, '.restaurant', { name: '.title-area h1', map: '.bb-show-map@href', rating: ['.rating li img@alt'], address: '.address-t-record', telephone: '.info-t-record strong', email: '.bb-contact span:nth-child(2)', webpage: '.bb-contact a@href', // price: x('.highlighted-box-right p:not(".download")', // [{ // priceLabel: '.left', // priceAmount: '.right', // }]), // schedule: ['.extra-info.schedule p'], // details: x('#bb-tab-1 .data', // [{ // detailLabel: '.first', // detailText: '.second', // }]), // specialties: x('#bb-tab-2 .data', // [{ // specialtyLabel: '.first', // specialtyText: '.second', // }]), // services: ['.fullservices .icon img@alt'], // valoration: x('.tr-valorations .valoration-case', // [{ // valorationLabel: '.first p', // valorationText: '.valoration img@alt', // }]), // tags: ['.tourist-record li'], }); scrape((err, arr) => { if (err) reject(err); else resolve(arr); }); }) .catch(error => ({ error, source: 'handleGet' })); }
var scrapeUrl = function(url, callback) { x(url, '.g', [{ link: 'a@href', }]).paginate('#pnnext@href').limit(5) (function(err, obj) { if(err || !obj) { console.log('An exception occured.') callback(err); return; } obj.forEach((item) => { var start = item.link.indexOf('q=') var end = item.link.indexOf('&sa') item.link = item.link.substring(start + 2, end) }) callback(null, obj) }) }
return new Promise(function (resolve, reject) { xray(`http://finance.yahoo.com/q/op?s=${ticker}+Options&date=${expiration}`, { stockLast: xray('.time_rtq_ticker span'), allExpiration: xray('.Start-0 option', [{ unixEpoch: '@value', readableValue: '' }]), currentExpiration: [expiration], calls: xray('#optionsCallsTable .quote-table-overflow tr', [{ strikeValue: xray('td:nth-child(1) a | trim'), // last: xray('td:nth-child(3) | trim'), // bid: xray('td:nth-child(4) | trim'), // ask: xray('td:nth-child(5) | trim'), // change: xray('td:nth-child(6) | trim'), // percentChange: xray('td:nth-child(7) | trim'), // volume: xray('td:nth-child(8) | trim'), // openInterest: xray('td:nth-child(9) | trim'), // impliedVolatility: xray('td:nth-child(10) | trim') }] ), puts: xray('#optionsPutsTable .quote-table-overflow tr', [{ strikeValue: xray('td:nth-child(1) a | trim'), // last: xray('td:nth-child(3) | trim'), // bid: xray('td:nth-child(4) | trim'), // ask: xray('td:nth-child(5) | trim'), // change: xray('td:nth-child(6) | trim'), // percentChange: xray('td:nth-child(7) | trim'), // volume: xray('td:nth-child(8) | trim'), // openInterest: xray('td:nth-child(9) | trim'), // impliedVolatility: xray('td:nth-child(10) | trim') }] ), }) // .write('results.json'); // .stream() ((err, data) => { console.log('exp', expiration); // this works if (err) reject(err); resolve(data); }) })
const q = async.queue((subreddit, callback) => { xray(`http://www.reddit.com/r/${subreddit}/new/`) .select(["a.title[href]"]) .throws(false) .paginate(".nextprev a:last-child[href]") .limit(config.reddit.maxPages) .run((err, links) => { if(!!err) { console.error(err); return callback(err); } videoIds.push(..._.map( _.filter(links, link => link.search(/youtube\.com(?:.*)v=([\w-]*?)(?:&|\/|$)/i) !== -1), link => link.match(/youtube\.com(?:.*)v=([\w-]*?)(?:&|\/|$)/i)[1] )); callback(); }); }, config.reddit.concurrency);
function handleGet({ url }) { const x = xray(); return new Promise((resolve, reject) => { const scrape = x(url, '.pagination-first-line', ['a']); scrape((err, arr) => { if (err) reject(err); else { const urls = []; const pages = parseInt(arr[arr.length - 1], 10); for (let i = 1; i <= pages; i++) { urls.push(`http://www.viamichelin.es/web/Restaurantes/Restaurantes-Espana?page=${i}`); } resolve(urls); } }); }) .catch(error => ({ error, source: 'handleGet' })); }
function handleGet({ url }) { const x = xray(); return new Promise((resolve, reject) => { const scrape = x(url, '.poi-item', [{ name: '.poi-item-name a', url: '.poi-item-name a@href', stars: ['.poi-item-stars .star@class'], bib: '.poi-item-stars .bib-gourmand@class', priceMin: '.poi-item-price em:first-child', priceMax: '.poi-item-price em:last-child', address: '.poi-item-address', }]) .paginate('.pagination-current-page + a@href'); scrape((err, arr) => { if (err) reject(err); else resolve(arr); }); }) .catch(error => ({ error, source: 'handleGet' })); }
return new Promise((resolve, reject) => { const xray = new Xray(); xray(url, 'tr', [{ class: '@class', tap: '.draft_tap', brewery: '.draft_brewery', beer: '.draft_name', pint: '.draft_price', growler: '.draft_growler', origin: '.draft_origin', abv: '.draft_abv', }] )((err, results) => { if (err) reject(err); const json = results && results .slice(0, results.length - 2) .map(result => { const returnObj = Object.assign({}, result); returnObj.classes = result.class.trim().split(' '); returnObj.abv = isNaN(result.abv) ? 0 : result.abv; const slashIndex = result.brewery.indexOf('/'); if (slashIndex !== -1) { returnObj.brewery = `${result.brewery.slice(0, slashIndex)} / ${result.brewery.slice(slashIndex + 1)}`; } return returnObj; }); resolve(JSON.stringify(json, null, '\t')); }); });
static get(dateString, callback) { let dateComponents = dateString.split('-'); if (dateComponents.length !== 3) { dateComponents = dateString.split('/'); if (dateComponents.length !== 3) { throw `Invalid date format. Use 'YYYY-MM-DD'`; } } const year = dateComponents[0]; const month = dateComponents[1]; const day = dateComponents[2]; const url = `https://www.mlb.com/probable-pitchers/${year}-${month}-${day}`; const scope = 'body'; const selector = { pitchers: ['div.probable-pitchers__pitcher-name a@href'], names: ['div.probable-pitchers__pitcher-name a'], throws: ['div.probable-pitchers__pitcher-details span.probable-pitchers__pitcher-pitch-hand'], teams: ['div.probable-pitchers__team-names span.probable-pitchers__team-name--away, div.probable-pitchers__team-names span.probable-pitchers__team-name--home'], games: ['div.probable-pitchers__matchup@data-gamePk'], startTimes: ['div.probable-pitchers__game-details div.probable-pitchers__game-date-time time@dateitme'], easternTimes: ['div.pitcher@eastern_time'], timezones: ['div.pitcher@local_time_zone'] }; const x = Xray(); x(url, scope, selector)((err, result) => { if (err) { return callback(err); } const matchups = Probables.convertResult(result); return callback(null, matchups); }); }
var Xray = require('x-ray'); var xray = Xray(); var request = require('superagent') var util = require('util') var Rx = require('rxjs') var _ = require('lodash') var Firebase = require('firebase') var githubRef = new Firebase('https://biznobo-sandbox.firebaseio.com/github/users') var Nightmare = require('nightmare'); var nightmare = Nightmare({ show: true }) var $ = require('jquery') // nightmare // .goto('http://google.com') // // .type('input[title="Search"]', 'github nightmare') // // .click('#uh-search-button') // // .wait('#main') // .evaluate(function () { // console.log('hihihih'); // console.log(document.querySelector('#main .searchCenterMiddle li a').href); // return document.querySelector('#main .searchCenterMiddle li a').href // }) // // .end() // .then(function (result) { // console.log(result) // }) // // nightmare.end() // var google = nightmare .goto('https://www.crunchbase.com/search') .wait('#founded_after')