Esempio n. 1
0
FeedReader.addFeed = function(url, callback){
    feedparser.parseUrl(url,
        function(error, meta, articles){
            if(error) console.error(error);
            else{
                var Feed = mongoose.model('Feed');
                var feed = new Feed({
                    _id: new mongoose.Types.ObjectId,
                    name: meta.title,
                    link: meta.link,
                    xmlUrl: meta.xmlUrl !== null ? meta.xmlUrl : url
                });
                
                console.log('Add feed. %s - %s - %s', meta.title, meta.link, meta.xmlUrl);
                
                feed.save(function(err, f){
                    if(err) throw err;
                    else{
                        console.log('Success to save feed.');
                        callback(f, articles);
                    }
                });
            }
        });
};
Esempio n. 2
0
exports.findByGoogleRSS = function(artist, done) {
  var results = [];

  parser.parseUrl('http://news.google.com/news/feeds?q=' + encodeURIComponent(artist))

  .on('article', function(article) {
    var excerpt;

    Apricot.parse(article.description, function(err, doc) {
      if (err) { process.exit(); }

      var $ = jquery.create(doc.window);

      excerpt = $('div.lh font:nth-child(5)')[0].innerHTML;
    });

    results.push({
      url: article.link,
      title: article.title,
      excerpt: excerpt
    });
  })

  .on('end', function(err) {
     done(err, results);
  })

  .on('error', function(err) {
    done(err);
  });
}
Esempio n. 3
0
Provider.prototype.getTorrent = function getTorrent(title, needed, callback){
	var parser = new FeedParser(), link, self = this;
	var res = self.res;
	if(!needed || !title){
		throw new Error('No arguments specified!');
	}
	// Ugly hax
	needed = needed.replace('S', '').split('E');
	// Convert from string to number
	needed[0] = parseInt(needed[0], 10);
	needed[1] = parseInt(needed[1], 10);
	if(this.withZero){
		if(needed[0] < 10){
			needed[0] = '0' + needed[0];
		}
		if(needed[1] < 10){
			needed[1] = '0' + needed[1];
		}
	}
	parser.parseUrl(self.url.replace('%show%', self.fixTitle(title)), function(error, meta, articles){
		if(error){
			console.log(error);
		}else if(articles.length == 0){
			console.log('No torrents found. This might be an invalid show or episode, or the provider might be down.');
		}else{
			// Abuse of every (http://stackoverflow.com/questions/6260756/how-to-stop-javascript-foreach)
			articles.every(function(article){
				return self.parse(article, needed, callback);
			});
		}
	});

}
Esempio n. 4
0
 (function(conseiller){
   var parser = new FeedParser();
   parser.on('end', function(n) { parser = null; });
   parser.parseUrl(conseiller.feed, function(error, meta, articles) {
     feedCallback(error, meta, articles, conseiller);
   });
 })(conseiller);
Esempio n. 5
0
app.post('/api/feed', function (req, res) {
    // newFeed = createFeed(req.body.url);

    // newFeed.save (function (err) {
    //     if (err) {
    //         res.send("Error on /api/feed (POST). MongoDB error.", 400);
    //     } else {
    //         res.send(newFeed, 200);
    //     }
    // });



    feedparser.parseUrl(req.body.url)
    .on('article', function(article){
        console.log(article);
        
        var title = article.title;
        var permalink = article.link;

        newFeed = new Feed({
            title: title,
            permalink: permalink,
        });

        newFeed.save (function (err) {
            if (err) {
                res.send(err, 400);
            } else {
                console.log("Feed saved");
            }
        });
    });
});
Esempio n. 6
0
/**
 * Parse job feed.
 *
 * @param {String} name: Jenkins job name
 * @param {Function} cb: standard cb(err, result) callback
 */
function parseFeed(name, cb) {

  var url = this.url + '/job/' + name + '/rssAll';
  feedparser.parseUrl(url, function (err, meta, articles) {
    cb(err, articles);
  });
}
Esempio n. 7
0
function r2j (uri,cb){
	var parser = new FeedParser();
    parser.parseUrl(uri,function(err, meta, articles){
        if(err) return console.error(err);
        cb(JSON.stringify(articles));
    });
}
Esempio n. 8
0
 Feed.findById(new mongoose.Types.ObjectId(feedId), function(err, feed){
     if(err) throw err;
     else{
         if(feed.xmlUrl !== null)
             feedparser.parseUrl(feed.xmlUrl, function(err, meta, articles){
                 callback(err, meta, articles, feed);
             });
     }
 });
	//get feeds every 30 minutes
	//setInterval(getFeeds, 3000);
	//setTimeout(getFeeds, 1000);
	function getFeeds() {
		console.log('----------------------feeds-----------------');
		//feedparser.parseUrl('http://feeds.washingtonpost.com/rss/entertainment').on('article', callback);
		feedparser.parseUrl('http://rss.cnn.com/rss/edition_entertainment.rss').on('article', callback);
		feedparser.parseUrl('http://feeds.mercurynews.com/mngi/rss/CustomRssServlet/568/200223.xml').on('article', callback);
		function callback(article) {
			if (article && article.title) {
				calculateDocumentTermMatrix(article);
			}
		}

		// feedparser.parseUrl('http://feeds.mercurynews.com/mngi/rss/CustomRssServlet/568/200223.xml').on('end', callback1);
		// function callback1() {
			// console.log('~~~~~persist inverted INdex ~~~~');
			// setTimeout(persistInvertedIndex, 20000);
		// }

	}
Esempio n. 10
0
FeedService.prototype.add = function(feedUrl, title, color, user, callback) {
	var _this = this;
	debugLog('Trying to get the feed');

	// Load articles
	parser.parseUrl(feedUrl, function(err, meta, articles) {
		// TODO: Error handling in a better manner
		if (err) {
			// Invalid feed URL
			callback(err);
			return;
		}
		// Create feed
		title = sanitize(title).trim();
		if (title.length === 0) {
			// Get the title from the feed
			title = sanitize(meta.title).xss();
		}
		// Validate color
		try {
			check(color).len(6).isAlphanumeric();
		} catch (e) {
			console.log('wrong color');
			color = '007180';
		}
		

		var feed = {
			title: title,
			link: meta.link,
			color: color,
			feed_url: feedUrl,
			xml_url: meta.xmlUrl,
			image_path: meta.image.url,
			subscribers: []
		};
		_this.db.feeds.save(feed);

		articles.forEach(function (article) {
			// Add one article
			debugLog('Saving article');
			_this.addArticle(feed, {
				title: sanitize(article.title).xss(),
				text: sanitize(article.description).xss(),
				summary: sanitize(sanitize(stripHtml(article.summary)).xss()).entityDecode(),
				link: article.link,
				published_at: article.pubdate,
				updated_at: article.date,
				image: article.image.url
			});
		});

		// Done
		callback(null, feed);
	});
};
Esempio n. 11
0
      function(next){
        console.log('Fetching %s.', source);

        // URL regular expression from: http://blog.mattheworiordan.com/post/13174566389/url-regular-expression-for-links-with-or-without-the
        if (source.match(/((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[.\!\/\\w]*))?)/)){
          feedparser.parseUrl(source, next);
        } else {
          feedparser.parseFile(source, next);
        }
      },
Esempio n. 12
0
function parseFeed(url){
    var destinations = getObjects(feedDoc.at("$map").get(), 'url', url);
    feedparser.parseUrl(url, function (error, meta, articles){
        if (error) console.error(error);
        else {
            //console.log('Feed info');
            //console.log('%s - %s - %s', meta.title, meta.link, meta.description  );
            //console.log('Articles');
            var feedItems = feedDoc.at(url).get();

            if (typeof feedItems == 'undefined') {
                console.log('Doc should be created before..');
                feedDoc.at(url).set([]);
            }

            articles.reverse().forEach(function (article){
                //console.log('Got article: %s', JSON.stringify(article));
                //console.log('%s - %s (%s)', article.date, article.title, article.link);
                var isExist = feedExist(feedItems, article.link); //getObjects(doc.at(reqObj).get(),'link',feedItem.link).length
                if (isExist == false){
                    var feedItem = {
                        'text':article.title,
                        'author':article.author,
                        'status': 2,
                        'url':article.link,
                        'timestamp': moment(article.pubDate).format('H:mm, D.M.YYYY')
                    };
                    //console.log(article.pubDate, '-' ,feedItem.timestamp);

                    feedDoc.at(url).insert(0, feedItem, function (error, data) {
                        if (error) {
                            console.log("Error at adding feed. " + error);
                        } else {
                            console.log("Add feed item: "+meta.title +" - " + feedItem.text);
                        }
                    });

                    if ((destinations)&&(destinations.length != 0)){
                        destinations.forEach(function(dest){
                            feedItem.status = (dest.status == "") ? 2 : dest.status;
                            getDoc(dest.doc).at(dest.topic).push(feedItem, function (error, data) {
                                if (error) {
                                    console.log("Error at adding feed. " + error);
                                }
                            });
                        });
                    }
                }
                else {
                    //TODO update feed if necessary
                }
            });
        }
    });
}
Esempio n. 13
0
function init () {
  var fp       = require('feedparser'),
      Promise  = require('rsvp').Promise,
      promise  = new Promise(),
      articles = [],
      index    = 0;

  function add (article) {
    // Make an Article object, add it to an array
    var obj     = {};
    obj.index   = index;
    obj.title   = article['title'];
    obj.content = article['description'];
    obj.link    = article['link'];
    obj.description = article['summary'];

    obj.content = obj.content.replace(/\<p[^>]*\>\<object[^>]*\>[^<]*\<\/object\>\<\/p\>/g, '');
    obj.content = obj.content.replace(/\<script[^>]*\>[^<]*\<\/script\>/g, '');
    obj.content = obj.content.replace(/\<iframe[^>]*\>[^<]*\<\/iframe\>/g, '');

    // This is to remove <script> tags from the HTML (ie. perez.videoplayer)
    // See http://stackoverflow.com/questions/6659351/removing-all-script-tags-from-html-with-js-regular-expression
    // obj.content = obj.content.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '');

    // This shoudl remove <iframe> tags from the HTML
    // obj.content = obj.content.replace(/\<iframe[^>]*\>[^\<]\<\/iframe\>/, '');
    
    articles.push(obj);

    index++;

    if( articles.length === 10 ){
      promise.resolve(articles);
    }
  }

  fp.parseUrl('http://i.perezhilton.com/?feed=atom').on('article', add);
  fp.parseUrl('http://perezhilton.com/cocoperez/?feed=atom').on('article', add);
  fp.parseUrl('http://perezhilton.com/perezitos/?feed=atom').on('article', add);

  return promise;
}
Esempio n. 14
0
    FB.__connect( function( FB ) {
        accessToken = FB.getAccessToken();

        Data.fetchAll( FB, function( err, results ) {
            data.albums = results.albums;
            data.events = results.events;
        } );

        feedparser.parseUrl( config.RSS_FEED, function( err, meta, articles ) {
            data.news = articles;
        } );
    } );
Esempio n. 15
0
FeedReader.read = function(){
    feedparser.parseUrl('feeds.gawker.com/gizmodo/full',
        function(error, meta, articles){
            if(error) console.error(error);
            else{
                console.log('%s - %s - %s', meta.title, meta.link, meta.xmlUrl);
                articles.forEach(function(article){
                    console.log("%s - %s(%s)", article.date, article.title, article.link);
                });
            }
        });
};
Esempio n. 16
0
exports.index = function(req, res){
  if (cache.get('github') == null) {
    var parser = require('feedparser')
    , tw = ''
    , git = '';

    //it's very very bad
    parser.parseUrl('https://github.com/pomeo.atom', function (err, meta, articles) {
      for (var i=0; i<2; i++){
        git += articles[i].description.replace(new RegExp('(href\=\")[^http]', 'g'), 'href="https:\/\/github.com\/');
        var re = new RegExp('([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z)|([^><]+?(?=</time>))','g');

        var t,str = '';

        git = git.replace(re,function(b){
          if (!b.match('([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z)')){
            var currentHours = t.getHours();
            var currentMinutes = t.getMinutes();
            var currentSeconds = t.getSeconds();
            var currentDays = t.getDate();
            currentHours = (currentHours < 10 ? '0' : '') + currentHours;
            currentMinutes = (currentMinutes < 10 ? '0' : '') + currentMinutes;
            currentSeconds = (currentSeconds < 10 ? '0' : '') + currentSeconds;
            currentDays = (currentDays < 10 ? '0' : '') + currentDays;
            var myDays = ['Sun','Mon','Tue','Wed','Thu','Fri','Sat','Sun'];
            var myMonths = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'];
            str = myDays[t.getDate()] + ' ' + myMonths[t.getMonth()] + ' ' + currentDays + ' ' + currentHours + ':' + currentMinutes + ':' + currentSeconds + ' +0000 ' + t.getFullYear();
            return relative_time(str);
          } else {
            t = new Date(b);
            return b;
          }
        });
      }
      T.get('statuses/user_timeline', { screen_name: 'pomeo', count: 2 },  function (err, reply) {
        tw = twitterCall(reply);
        cache.put('twitter', tw, 1440000);
        cache.put('github', git, 1440000);
        res.locals.d = tw;
        res.locals.g = git;
        res.render('index', { title: 'Sergey Ovechkin' });
      });
    });
  } else {
    var tw = cache.get('twitter');
    var git = cache.get('github');
    res.locals.d = tw;
    res.locals.g = git;
    res.render('index', { title: 'Full-Stack Web Developer' });
  }
};
Esempio n. 17
0
Jenkins.prototype.feed = function (opts, cb) {
  opts = opts || {};
  var url;
  if (opts.jobName) {
    url = this._jobUrl(opts.jobName) + '/rssAll';
  } else if (opts.viewName) {
    url = this.url + '/view/' + opts.viewName + '/rssAll';
  } else {
    url = this.url + '/rssAll';
  }
  feedparser.parseUrl(url, function (err, meta, articles) {
    cb(err, articles);
  });
};
Esempio n. 18
0
var getFeedMeta = function (url, cb) {
    var req = {
        uri: url
    };

    feedparser.parseUrl(req)
        .on('response', function (response) {
            if (response.statusCode !== 200) {
                cb({ 'err' : "Couldn't retrieve feed." });
            }
        })
        .on ('meta', function (meta) {
        cb(null, meta);
    });
};
Esempio n. 19
0
/**
 * Parse and process a feed
 * @param feedUrl - the url to parse
 * @param callback - the callback which is called to indicate async lib that the parsing of this feed is done
 */
function parseAndProcessFeed(feedUrl, callback) {
	var now = new Date();
	var item;
	// when parsing is finished, iterate over articles to store them in an array of all articles of all streams
	feedparser.parseUrl(feedUrl).on('complete', function onComplete(meta, articles) {
		for (var article in articles) {
			// some people put a future date as the pubDate of their articles to stay on top of aggregated feeds, f**k them
			if (now > Date.parse(articles[article].date)) {
				item = new RssItem(articles[article].title, articles[article].summary, articles[article].link, articles[article].author, articles[article].date);
				items.push(item);
			}
		}
		// tell async that this parse and process is finished
		callback();
	});
}
Esempio n. 20
0
function downloadIfNotExists(show, needed, res){
	var parser = new FeedParser(), link;
	if(!needed || !show){
		throw new Error('No arguments specified!');
	}
	// Ugly hax
	needed = needed.replace('S', '').split('E');
	needed[0] = parseInt(needed[0], 10);
	needed[1] = parseInt(needed[1], 10);
	if(needed[0] < 10){
		needed[0] = '0' + needed[0];
	}
	if(needed[1] < 10){
		needed[1] = '0' + needed[1];
	}
	parser.parseUrl('http://eztv.ptain.info/cgi-bin/eztv.pl?name=' + show, function findTorrent(error, meta, articles){
		if(error){
			console.log(error);
		}else if(articles.length == 0){
			console.log('No torrents found. This might be an invalid show or episode, or eztv.ptain.info might be down.');
		}else{
			// Abuse of every (http://stackoverflow.com/questions/6260756/how-to-stop-javascript-foreach)
			articles.every(function (article){
				var title = article.title.match(/S(\d*)E(\d*)/);
				var link = article.link;
				if(title){
					var season = title[1];
					var episode = title[2];
					if(needed && needed[0] == season && needed[1] == episode){
						callback(link, res);
						return false;
					}
				}
				return true;
			});
			function callback(link, res){
				if(link){
					console.log('Starting download...');
					download(link, res);
				}else{
					console.log('Could not find S' + needed.join('E') + ' for show ' + show);
					res.send('Could not find S' + needed.join('E') + ' for show ' + show);
				}
			}
		}
	});
}
Esempio n. 21
0
 async.map(urls, function(url, cb) {
   if (!url) {
     cb([]);
     return;
   }
   try {
     feedparser.parseUrl(url, function(err, meta, articles) {
       if (err) {
         console.log('error by aggregator[1] "' + aggregatorName + '" in parsing "' + url + '":', err);
         cb(null, []);
         return;
       }
       cb(null, articles);
     });
   } catch (err) {
     console.log('error by aggregator[2] "' + aggregatorName + '" in parsing "' + url + '":', err);
     cb(null, []);
   }
 },
Esempio n. 22
0
var RSSFeed = function(url, perArticle) {
	if (!process)
		process = function(x) { return x; };
	

	function onArticle(a) {
		//console.dir(a);
		
		var maxlen = a['title'].length;
		if (a['description']!=undefined)
			maxlen = Math.max(maxlen, a['description'].length);
		
        var w;
        if (a['date'])
            w = new Date(a['date']).getTime();
        else
            w = Date.now();
            
        var x = util.objNew( util.MD5(a['guid']), a['title'] );
        x.createdAt = w;
        
        util.objAddDescription(x, a['description']);        
        
		if (a['georss:point']) {
            util.objAddGeoLocation(x, a['georss:point'][0], a['georss:point'][1] );
		}
		if (a['geo:lat']) {
            util.objAddGeoLocation(x, parseFloat(a['geo:lat']['#']), parseFloat(a['geo:long']['#']) );
		}
        util.objAddTag(x, 'RSSItem');
        util.objAddValue(x, 'rssItemURL', a['link']);
		perArticle(x, a);
		
	}	

    try {
    	feedparser.parseUrl(url).on('article', onArticle);
    }
    catch (e) {
        console.error(e);
    }
	
}
Esempio n. 23
0
var updateFeed = function (feed, lastUpdate, cb) {
    var req = {
        uri: feed.url
    };

    if (lastUpdate) {
        req.headers = req.headers || {};
        req.headers['If-Modified-Since'] = new Date(lastUpdate);
    }

    /*if ("lastModifiedDate" in feed) {
        req.headers = req.headers || {};
        req.headers['If-Modified-Since'] = feed.lastModifiedDate;
    }

    if ("lastETag" in feed) {
        req.headers = req.headers || {};
        req.headers['If-None-Match'] = feed.lastETag;
    }*/

    var update = false;

    feedparser.parseUrl(req)
        .on('response', function (response) {
            if (response.statusCode === 200) {
                feed.lastModifiedDate = (new Date(response.headers['last-modified'])).getTime();
                feed.lastETag = response.headers['etag'];
                update = true;
            } else {
                update = false;

                if (response.statusCode === 304) {
                    cb(null, update, []);
                } else {
                    cb({ 'err' : "Couldn't retrieve feed.", 'statusCode': response.statusCode});
                }
            }
        })
        .on ('complete', function (meta, articles) {
            cb(null, update, articles, meta);
        });
};
Esempio n. 24
0
jobs.process('feed', function(job, done){
  try {//this could fly if i validate the url via middleware/client
    feedparser.parseUrl(job.data.url, function(error, meta, articles){

      if(error){done(error);}

      client.get(job.data.url, function(error, last_updated){
        if(error){done(error);}

        if(articles){
          if(!last_updated){ //Set the latest
            client.set(job.data.url, articles[0].pubDate, function(error, data){
              if(error){done(error);}
              jobs.create('feed', job.data).delay(minute).save();
              done();
            });
          }else{
            client.lrange(job.data.url+'_subscribers', 0, 1, function(error, subscribers){
              if(subscribers){
                broadcast_feed(articles, last_updated, subscribers);
              }else{done(error);}
            });
            jobs.create('feed', job.data).delay(minute).save();
            client.set(job.data.url, articles[0].pubDate);
            done();
          }
        }else{
          jobs.create('feed', job.data).delay(minute).save();
          done('No Articles O.o!');
        }

      });
    });
  }catch(err){
    done(err);
  }
});
Esempio n. 25
0
function getData(source, category, url, parser, urlField)
{
	console.log("in data section")

	if (parser == 'feed')
	{
		if (!urlField)
			urlField = "guid";
		feedparser.parseUrl(url).on('article', function(article) {
			console.log("sending to diffbot")
			shared.makediffbotAPIcall(article[urlField], category, source, function(object) {		
				shared.getNumberofShares(article[urlField], function(shares) {
						object.shares = shares;
						console.log(object);
						shared.saveObjectToMongoDB(object, object.category, db);
				});
			})
		});
	}
	else if (parser == 'rss')
	{
		if (!urlField)
			urlField = "url";
		rssparser.parseURL(url, function(err, out) {
			out.items.forEach(function(article) {
				shared.makediffbotAPIcall(article[urlField], category, source, function(object) {		
					shared.getNumberofShares(article[urlField], function(shares) {
							object.shares = shares;
							console.log(object);
							shared.saveObjectToMongoDB(object, object.category, db);
					});
				})
			});
		});
	}
	// TODO: ELSE WHAT? Also, error handling, and scoping issue (language issue; will address soon)
}
Esempio n. 26
0
		function() { feedparser.parseUrl('http://api.twitter.com/1/statuses/user_timeline.rss?screen_name=nsteinmetz').on('article', displayTitle)  },
Esempio n. 27
0
  self.loadFeed = function(item, callback) {
    
    // Asynchronously load the actual RSS feed
    // The properties you add should start with an _ to denote that
    // they shouldn't become data attributes or get stored back to MongoDB
    item._entries = [];

    var now = Date.now();
    // Take all properties into account, not just the feed, so the cache
    // doesn't prevent us from seeing a change in the limit property right away

    var key = self.getKey(item);

    // If we already have it, deliver it
    if (self.currentInCache(key, now)) {
      item._entries = cache[key].data;
      item._failed = cache[key].failed;
      return callback();
    }

    // If we're already waiting for it, join the queue
    if (pending[key]) {
      pending[key].push({
        item: item,
        callback: function() {
          return callback();
        }
      });
      return;
    }

    // Start a pending queue for this request
    pending[key] = [];

    feedparser.parseUrl(item.feed).on('complete', function(meta, articles) {
      var end = Date.now();
      articles = articles.slice(0, item.limit);

      // map is native in node
      item._entries = articles.map(function(article) {
        return {
          title: article.title,
          body: article.description,
          date: article.pubDate,
          link: article.link
        };
      });

      // Cache for fast access later
      cache[key] = { when: now, data: item._entries };
      return done();
    }).on('error', function(error) {
      // Cache failures too, don't go crazy trying to get
      // to a feed that's down
      item._failed = true;
      cache[key] = { when: now, failed: true };
      return done();
    });
    function done() {
      // Notify everyone else who was waiting for this
      // fetch to finish
      _.each(pending[key], function(i) {
        i.item._entries = item._entries;
        i.item._failed = item._failed;
        return i.callback();
      });
      delete pending[key];
      return callback();
    }
  };
Esempio n. 28
0
		function() { feedparser.parseUrl('http://archives.steinmetz.fr/tutoriels/feeds/all.atom.xml').on('article', displayTitle) },
Esempio n. 29
0
			setTimeout(function() { parser.parseUrl(params.url,inspect) },interval.max)
Esempio n. 30
0
		setTimeout(function() { parser.parseUrl(params.url,inspect) },interval.current*1000)