FeedReader.addFeed = function(url, callback){ feedparser.parseUrl(url, function(error, meta, articles){ if(error) console.error(error); else{ var Feed = mongoose.model('Feed'); var feed = new Feed({ _id: new mongoose.Types.ObjectId, name: meta.title, link: meta.link, xmlUrl: meta.xmlUrl !== null ? meta.xmlUrl : url }); console.log('Add feed. %s - %s - %s', meta.title, meta.link, meta.xmlUrl); feed.save(function(err, f){ if(err) throw err; else{ console.log('Success to save feed.'); callback(f, articles); } }); } }); };
exports.findByGoogleRSS = function(artist, done) { var results = []; parser.parseUrl('http://news.google.com/news/feeds?q=' + encodeURIComponent(artist)) .on('article', function(article) { var excerpt; Apricot.parse(article.description, function(err, doc) { if (err) { process.exit(); } var $ = jquery.create(doc.window); excerpt = $('div.lh font:nth-child(5)')[0].innerHTML; }); results.push({ url: article.link, title: article.title, excerpt: excerpt }); }) .on('end', function(err) { done(err, results); }) .on('error', function(err) { done(err); }); }
Provider.prototype.getTorrent = function getTorrent(title, needed, callback){ var parser = new FeedParser(), link, self = this; var res = self.res; if(!needed || !title){ throw new Error('No arguments specified!'); } // Ugly hax needed = needed.replace('S', '').split('E'); // Convert from string to number needed[0] = parseInt(needed[0], 10); needed[1] = parseInt(needed[1], 10); if(this.withZero){ if(needed[0] < 10){ needed[0] = '0' + needed[0]; } if(needed[1] < 10){ needed[1] = '0' + needed[1]; } } parser.parseUrl(self.url.replace('%show%', self.fixTitle(title)), function(error, meta, articles){ if(error){ console.log(error); }else if(articles.length == 0){ console.log('No torrents found. This might be an invalid show or episode, or the provider might be down.'); }else{ // Abuse of every (http://stackoverflow.com/questions/6260756/how-to-stop-javascript-foreach) articles.every(function(article){ return self.parse(article, needed, callback); }); } }); }
(function(conseiller){ var parser = new FeedParser(); parser.on('end', function(n) { parser = null; }); parser.parseUrl(conseiller.feed, function(error, meta, articles) { feedCallback(error, meta, articles, conseiller); }); })(conseiller);
app.post('/api/feed', function (req, res) { // newFeed = createFeed(req.body.url); // newFeed.save (function (err) { // if (err) { // res.send("Error on /api/feed (POST). MongoDB error.", 400); // } else { // res.send(newFeed, 200); // } // }); feedparser.parseUrl(req.body.url) .on('article', function(article){ console.log(article); var title = article.title; var permalink = article.link; newFeed = new Feed({ title: title, permalink: permalink, }); newFeed.save (function (err) { if (err) { res.send(err, 400); } else { console.log("Feed saved"); } }); }); });
/** * Parse job feed. * * @param {String} name: Jenkins job name * @param {Function} cb: standard cb(err, result) callback */ function parseFeed(name, cb) { var url = this.url + '/job/' + name + '/rssAll'; feedparser.parseUrl(url, function (err, meta, articles) { cb(err, articles); }); }
function r2j (uri,cb){ var parser = new FeedParser(); parser.parseUrl(uri,function(err, meta, articles){ if(err) return console.error(err); cb(JSON.stringify(articles)); }); }
Feed.findById(new mongoose.Types.ObjectId(feedId), function(err, feed){ if(err) throw err; else{ if(feed.xmlUrl !== null) feedparser.parseUrl(feed.xmlUrl, function(err, meta, articles){ callback(err, meta, articles, feed); }); } });
//get feeds every 30 minutes //setInterval(getFeeds, 3000); //setTimeout(getFeeds, 1000); function getFeeds() { console.log('----------------------feeds-----------------'); //feedparser.parseUrl('http://feeds.washingtonpost.com/rss/entertainment').on('article', callback); feedparser.parseUrl('http://rss.cnn.com/rss/edition_entertainment.rss').on('article', callback); feedparser.parseUrl('http://feeds.mercurynews.com/mngi/rss/CustomRssServlet/568/200223.xml').on('article', callback); function callback(article) { if (article && article.title) { calculateDocumentTermMatrix(article); } } // feedparser.parseUrl('http://feeds.mercurynews.com/mngi/rss/CustomRssServlet/568/200223.xml').on('end', callback1); // function callback1() { // console.log('~~~~~persist inverted INdex ~~~~'); // setTimeout(persistInvertedIndex, 20000); // } }
FeedService.prototype.add = function(feedUrl, title, color, user, callback) { var _this = this; debugLog('Trying to get the feed'); // Load articles parser.parseUrl(feedUrl, function(err, meta, articles) { // TODO: Error handling in a better manner if (err) { // Invalid feed URL callback(err); return; } // Create feed title = sanitize(title).trim(); if (title.length === 0) { // Get the title from the feed title = sanitize(meta.title).xss(); } // Validate color try { check(color).len(6).isAlphanumeric(); } catch (e) { console.log('wrong color'); color = '007180'; } var feed = { title: title, link: meta.link, color: color, feed_url: feedUrl, xml_url: meta.xmlUrl, image_path: meta.image.url, subscribers: [] }; _this.db.feeds.save(feed); articles.forEach(function (article) { // Add one article debugLog('Saving article'); _this.addArticle(feed, { title: sanitize(article.title).xss(), text: sanitize(article.description).xss(), summary: sanitize(sanitize(stripHtml(article.summary)).xss()).entityDecode(), link: article.link, published_at: article.pubdate, updated_at: article.date, image: article.image.url }); }); // Done callback(null, feed); }); };
function(next){ console.log('Fetching %s.', source); // URL regular expression from: http://blog.mattheworiordan.com/post/13174566389/url-regular-expression-for-links-with-or-without-the if (source.match(/((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[.\!\/\\w]*))?)/)){ feedparser.parseUrl(source, next); } else { feedparser.parseFile(source, next); } },
function parseFeed(url){ var destinations = getObjects(feedDoc.at("$map").get(), 'url', url); feedparser.parseUrl(url, function (error, meta, articles){ if (error) console.error(error); else { //console.log('Feed info'); //console.log('%s - %s - %s', meta.title, meta.link, meta.description ); //console.log('Articles'); var feedItems = feedDoc.at(url).get(); if (typeof feedItems == 'undefined') { console.log('Doc should be created before..'); feedDoc.at(url).set([]); } articles.reverse().forEach(function (article){ //console.log('Got article: %s', JSON.stringify(article)); //console.log('%s - %s (%s)', article.date, article.title, article.link); var isExist = feedExist(feedItems, article.link); //getObjects(doc.at(reqObj).get(),'link',feedItem.link).length if (isExist == false){ var feedItem = { 'text':article.title, 'author':article.author, 'status': 2, 'url':article.link, 'timestamp': moment(article.pubDate).format('H:mm, D.M.YYYY') }; //console.log(article.pubDate, '-' ,feedItem.timestamp); feedDoc.at(url).insert(0, feedItem, function (error, data) { if (error) { console.log("Error at adding feed. " + error); } else { console.log("Add feed item: "+meta.title +" - " + feedItem.text); } }); if ((destinations)&&(destinations.length != 0)){ destinations.forEach(function(dest){ feedItem.status = (dest.status == "") ? 2 : dest.status; getDoc(dest.doc).at(dest.topic).push(feedItem, function (error, data) { if (error) { console.log("Error at adding feed. " + error); } }); }); } } else { //TODO update feed if necessary } }); } }); }
function init () { var fp = require('feedparser'), Promise = require('rsvp').Promise, promise = new Promise(), articles = [], index = 0; function add (article) { // Make an Article object, add it to an array var obj = {}; obj.index = index; obj.title = article['title']; obj.content = article['description']; obj.link = article['link']; obj.description = article['summary']; obj.content = obj.content.replace(/\<p[^>]*\>\<object[^>]*\>[^<]*\<\/object\>\<\/p\>/g, ''); obj.content = obj.content.replace(/\<script[^>]*\>[^<]*\<\/script\>/g, ''); obj.content = obj.content.replace(/\<iframe[^>]*\>[^<]*\<\/iframe\>/g, ''); // This is to remove <script> tags from the HTML (ie. perez.videoplayer) // See http://stackoverflow.com/questions/6659351/removing-all-script-tags-from-html-with-js-regular-expression // obj.content = obj.content.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, ''); // This shoudl remove <iframe> tags from the HTML // obj.content = obj.content.replace(/\<iframe[^>]*\>[^\<]\<\/iframe\>/, ''); articles.push(obj); index++; if( articles.length === 10 ){ promise.resolve(articles); } } fp.parseUrl('http://i.perezhilton.com/?feed=atom').on('article', add); fp.parseUrl('http://perezhilton.com/cocoperez/?feed=atom').on('article', add); fp.parseUrl('http://perezhilton.com/perezitos/?feed=atom').on('article', add); return promise; }
FB.__connect( function( FB ) { accessToken = FB.getAccessToken(); Data.fetchAll( FB, function( err, results ) { data.albums = results.albums; data.events = results.events; } ); feedparser.parseUrl( config.RSS_FEED, function( err, meta, articles ) { data.news = articles; } ); } );
FeedReader.read = function(){ feedparser.parseUrl('feeds.gawker.com/gizmodo/full', function(error, meta, articles){ if(error) console.error(error); else{ console.log('%s - %s - %s', meta.title, meta.link, meta.xmlUrl); articles.forEach(function(article){ console.log("%s - %s(%s)", article.date, article.title, article.link); }); } }); };
exports.index = function(req, res){ if (cache.get('github') == null) { var parser = require('feedparser') , tw = '' , git = ''; //it's very very bad parser.parseUrl('https://github.com/pomeo.atom', function (err, meta, articles) { for (var i=0; i<2; i++){ git += articles[i].description.replace(new RegExp('(href\=\")[^http]', 'g'), 'href="https:\/\/github.com\/'); var re = new RegExp('([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z)|([^><]+?(?=</time>))','g'); var t,str = ''; git = git.replace(re,function(b){ if (!b.match('([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z)')){ var currentHours = t.getHours(); var currentMinutes = t.getMinutes(); var currentSeconds = t.getSeconds(); var currentDays = t.getDate(); currentHours = (currentHours < 10 ? '0' : '') + currentHours; currentMinutes = (currentMinutes < 10 ? '0' : '') + currentMinutes; currentSeconds = (currentSeconds < 10 ? '0' : '') + currentSeconds; currentDays = (currentDays < 10 ? '0' : '') + currentDays; var myDays = ['Sun','Mon','Tue','Wed','Thu','Fri','Sat','Sun']; var myMonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']; str = myDays[t.getDate()] + ' ' + myMonths[t.getMonth()] + ' ' + currentDays + ' ' + currentHours + ':' + currentMinutes + ':' + currentSeconds + ' +0000 ' + t.getFullYear(); return relative_time(str); } else { t = new Date(b); return b; } }); } T.get('statuses/user_timeline', { screen_name: 'pomeo', count: 2 }, function (err, reply) { tw = twitterCall(reply); cache.put('twitter', tw, 1440000); cache.put('github', git, 1440000); res.locals.d = tw; res.locals.g = git; res.render('index', { title: 'Sergey Ovechkin' }); }); }); } else { var tw = cache.get('twitter'); var git = cache.get('github'); res.locals.d = tw; res.locals.g = git; res.render('index', { title: 'Full-Stack Web Developer' }); } };
Jenkins.prototype.feed = function (opts, cb) { opts = opts || {}; var url; if (opts.jobName) { url = this._jobUrl(opts.jobName) + '/rssAll'; } else if (opts.viewName) { url = this.url + '/view/' + opts.viewName + '/rssAll'; } else { url = this.url + '/rssAll'; } feedparser.parseUrl(url, function (err, meta, articles) { cb(err, articles); }); };
var getFeedMeta = function (url, cb) { var req = { uri: url }; feedparser.parseUrl(req) .on('response', function (response) { if (response.statusCode !== 200) { cb({ 'err' : "Couldn't retrieve feed." }); } }) .on ('meta', function (meta) { cb(null, meta); }); };
/** * Parse and process a feed * @param feedUrl - the url to parse * @param callback - the callback which is called to indicate async lib that the parsing of this feed is done */ function parseAndProcessFeed(feedUrl, callback) { var now = new Date(); var item; // when parsing is finished, iterate over articles to store them in an array of all articles of all streams feedparser.parseUrl(feedUrl).on('complete', function onComplete(meta, articles) { for (var article in articles) { // some people put a future date as the pubDate of their articles to stay on top of aggregated feeds, f**k them if (now > Date.parse(articles[article].date)) { item = new RssItem(articles[article].title, articles[article].summary, articles[article].link, articles[article].author, articles[article].date); items.push(item); } } // tell async that this parse and process is finished callback(); }); }
function downloadIfNotExists(show, needed, res){ var parser = new FeedParser(), link; if(!needed || !show){ throw new Error('No arguments specified!'); } // Ugly hax needed = needed.replace('S', '').split('E'); needed[0] = parseInt(needed[0], 10); needed[1] = parseInt(needed[1], 10); if(needed[0] < 10){ needed[0] = '0' + needed[0]; } if(needed[1] < 10){ needed[1] = '0' + needed[1]; } parser.parseUrl('http://eztv.ptain.info/cgi-bin/eztv.pl?name=' + show, function findTorrent(error, meta, articles){ if(error){ console.log(error); }else if(articles.length == 0){ console.log('No torrents found. This might be an invalid show or episode, or eztv.ptain.info might be down.'); }else{ // Abuse of every (http://stackoverflow.com/questions/6260756/how-to-stop-javascript-foreach) articles.every(function (article){ var title = article.title.match(/S(\d*)E(\d*)/); var link = article.link; if(title){ var season = title[1]; var episode = title[2]; if(needed && needed[0] == season && needed[1] == episode){ callback(link, res); return false; } } return true; }); function callback(link, res){ if(link){ console.log('Starting download...'); download(link, res); }else{ console.log('Could not find S' + needed.join('E') + ' for show ' + show); res.send('Could not find S' + needed.join('E') + ' for show ' + show); } } } }); }
async.map(urls, function(url, cb) { if (!url) { cb([]); return; } try { feedparser.parseUrl(url, function(err, meta, articles) { if (err) { console.log('error by aggregator[1] "' + aggregatorName + '" in parsing "' + url + '":', err); cb(null, []); return; } cb(null, articles); }); } catch (err) { console.log('error by aggregator[2] "' + aggregatorName + '" in parsing "' + url + '":', err); cb(null, []); } },
var RSSFeed = function(url, perArticle) { if (!process) process = function(x) { return x; }; function onArticle(a) { //console.dir(a); var maxlen = a['title'].length; if (a['description']!=undefined) maxlen = Math.max(maxlen, a['description'].length); var w; if (a['date']) w = new Date(a['date']).getTime(); else w = Date.now(); var x = util.objNew( util.MD5(a['guid']), a['title'] ); x.createdAt = w; util.objAddDescription(x, a['description']); if (a['georss:point']) { util.objAddGeoLocation(x, a['georss:point'][0], a['georss:point'][1] ); } if (a['geo:lat']) { util.objAddGeoLocation(x, parseFloat(a['geo:lat']['#']), parseFloat(a['geo:long']['#']) ); } util.objAddTag(x, 'RSSItem'); util.objAddValue(x, 'rssItemURL', a['link']); perArticle(x, a); } try { feedparser.parseUrl(url).on('article', onArticle); } catch (e) { console.error(e); } }
var updateFeed = function (feed, lastUpdate, cb) { var req = { uri: feed.url }; if (lastUpdate) { req.headers = req.headers || {}; req.headers['If-Modified-Since'] = new Date(lastUpdate); } /*if ("lastModifiedDate" in feed) { req.headers = req.headers || {}; req.headers['If-Modified-Since'] = feed.lastModifiedDate; } if ("lastETag" in feed) { req.headers = req.headers || {}; req.headers['If-None-Match'] = feed.lastETag; }*/ var update = false; feedparser.parseUrl(req) .on('response', function (response) { if (response.statusCode === 200) { feed.lastModifiedDate = (new Date(response.headers['last-modified'])).getTime(); feed.lastETag = response.headers['etag']; update = true; } else { update = false; if (response.statusCode === 304) { cb(null, update, []); } else { cb({ 'err' : "Couldn't retrieve feed.", 'statusCode': response.statusCode}); } } }) .on ('complete', function (meta, articles) { cb(null, update, articles, meta); }); };
jobs.process('feed', function(job, done){ try {//this could fly if i validate the url via middleware/client feedparser.parseUrl(job.data.url, function(error, meta, articles){ if(error){done(error);} client.get(job.data.url, function(error, last_updated){ if(error){done(error);} if(articles){ if(!last_updated){ //Set the latest client.set(job.data.url, articles[0].pubDate, function(error, data){ if(error){done(error);} jobs.create('feed', job.data).delay(minute).save(); done(); }); }else{ client.lrange(job.data.url+'_subscribers', 0, 1, function(error, subscribers){ if(subscribers){ broadcast_feed(articles, last_updated, subscribers); }else{done(error);} }); jobs.create('feed', job.data).delay(minute).save(); client.set(job.data.url, articles[0].pubDate); done(); } }else{ jobs.create('feed', job.data).delay(minute).save(); done('No Articles O.o!'); } }); }); }catch(err){ done(err); } });
function getData(source, category, url, parser, urlField) { console.log("in data section") if (parser == 'feed') { if (!urlField) urlField = "guid"; feedparser.parseUrl(url).on('article', function(article) { console.log("sending to diffbot") shared.makediffbotAPIcall(article[urlField], category, source, function(object) { shared.getNumberofShares(article[urlField], function(shares) { object.shares = shares; console.log(object); shared.saveObjectToMongoDB(object, object.category, db); }); }) }); } else if (parser == 'rss') { if (!urlField) urlField = "url"; rssparser.parseURL(url, function(err, out) { out.items.forEach(function(article) { shared.makediffbotAPIcall(article[urlField], category, source, function(object) { shared.getNumberofShares(article[urlField], function(shares) { object.shares = shares; console.log(object); shared.saveObjectToMongoDB(object, object.category, db); }); }) }); }); } // TODO: ELSE WHAT? Also, error handling, and scoping issue (language issue; will address soon) }
function() { feedparser.parseUrl('http://api.twitter.com/1/statuses/user_timeline.rss?screen_name=nsteinmetz').on('article', displayTitle) },
self.loadFeed = function(item, callback) { // Asynchronously load the actual RSS feed // The properties you add should start with an _ to denote that // they shouldn't become data attributes or get stored back to MongoDB item._entries = []; var now = Date.now(); // Take all properties into account, not just the feed, so the cache // doesn't prevent us from seeing a change in the limit property right away var key = self.getKey(item); // If we already have it, deliver it if (self.currentInCache(key, now)) { item._entries = cache[key].data; item._failed = cache[key].failed; return callback(); } // If we're already waiting for it, join the queue if (pending[key]) { pending[key].push({ item: item, callback: function() { return callback(); } }); return; } // Start a pending queue for this request pending[key] = []; feedparser.parseUrl(item.feed).on('complete', function(meta, articles) { var end = Date.now(); articles = articles.slice(0, item.limit); // map is native in node item._entries = articles.map(function(article) { return { title: article.title, body: article.description, date: article.pubDate, link: article.link }; }); // Cache for fast access later cache[key] = { when: now, data: item._entries }; return done(); }).on('error', function(error) { // Cache failures too, don't go crazy trying to get // to a feed that's down item._failed = true; cache[key] = { when: now, failed: true }; return done(); }); function done() { // Notify everyone else who was waiting for this // fetch to finish _.each(pending[key], function(i) { i.item._entries = item._entries; i.item._failed = item._failed; return i.callback(); }); delete pending[key]; return callback(); } };
function() { feedparser.parseUrl('http://archives.steinmetz.fr/tutoriels/feeds/all.atom.xml').on('article', displayTitle) },
setTimeout(function() { parser.parseUrl(params.url,inspect) },interval.max)
setTimeout(function() { parser.parseUrl(params.url,inspect) },interval.current*1000)