Exemplo n.º 1
0
 var onclose = function () {
   if (Object.keys(post).length) {
     post.title = stripHtml(post.title);
     self.emit('post', post)
     post = {}
   }
 }
Exemplo n.º 2
0
 var onclosetag = function (name) {
   name = name.toLowerCase()
   if (name === 'item') {
     if (!post.guid) {
       post.guid = post.link
     }
     if (post['content:encoded']) {
       // If we have an encoded description it's what we really want
       post.description = post['content:encoded']
       delete post['content:encoded']
     }
     if (!post.title) {
       console.error('Feed item does not have title: '+post.link)
       return
     }
     post.title = stripHtml(post.title);
     self.emit('post', post)
     parser.onattribute = null;
     parser.onopentag = function (node) {
       var name = node.name.toLowerCase()
       if (name === 'item') itemlistener()
     }
   }
 }
Exemplo n.º 3
0
FeedParser.prototype.handleItem = function handleItem (node, type, options){
  if (!type || !node) return {};

  var item = {}
    , normalize = !options || (options && options.normalize)
    ;

  if (normalize) {
    ['title','description','summary','date','pubdate','pubDate','link','guid','author','comments', 'origlink'].forEach(function (property){
      item[property] = null;
    });
    item.image = {};
    item.source = {};
    item.categories = [];
    item.enclosures = [];
  }

  Object.keys(node).forEach(function(name){
    var el = node[name]
      , attrs = utils.get(el, '@')
      , enclosure;
    if (normalize) {
      switch(name){
      case('title'):
        item.title = utils.get(el);
        break;
      case('description'):
      case('summary'):
        item.summary = utils.get(el);
        if (!item.description) item.description = utils.get(el);
        break;
      case('content'):
      case('content:encoded'):
        item.description = utils.get(el);
        break;
      case('pubdate'):
      case('published'):
      case('issued'):
      case('modified'):
      case('updated'):
      case('dc:date'):
        var date = utils.get(el) ? new Date(utils.get(el)) : null;
        if (!date) break;
        if (item.pubdate === null || name == 'pubdate' || name == 'published' || name == 'issued')
          item.pubdate = item.pubDate = date;
        if (item.date === null || name == 'modified' || name == 'updated')
          item.date = date;
        break;
      case('link'):
        if (Array.isArray(el)) {
          el.forEach(function (link){
            if (link['@']['href']) { // Atom
              if (utils.get(link['@'], 'rel')) {
                if (link['@']['rel'] == 'canonical') item.origlink = link['@']['href'];
                if (link['@']['rel'] == 'alternate') item.link = link['@']['href'];
                if (link['@']['rel'] == 'self' && !item.link) item.link = link['@']['href'];
                if (link['@']['rel'] == 'replies') item.comments = link['@']['href'];
                if (link['@']['rel'] == 'enclosure') {
                  enclosure = {};
                  enclosure.url = link['@']['href'];
                  enclosure.type = utils.get(link['@'], 'type');
                  enclosure.length = utils.get(link['@'], 'length');
                  if (indexOfObject(item.enclosures, enclosure, ['url', 'type']) === -1) {
                    item.enclosures.push(enclosure);
                  }
                }
              } else {
                item.link = link['@']['href'];
              }
            } else if (Object.keys(link['@']).length === 0) { // RSS
              if (!item.link) item.link = utils.get(link);
            }
          });
        } else {
          if (el['@']['href']) { // Atom
            if (utils.get(el['@'], 'rel')) {
              if (el['@']['rel'] == 'canonical') item.origlink = el['@']['href'];
              if (el['@']['rel'] == 'alternate') item.link = el['@']['href'];
              if (el['@']['rel'] == 'self' && !item.link) item.link = el['@']['href'];
              if (el['@']['rel'] == 'replies') item.comments = el['@']['href'];
              if (el['@']['rel'] == 'enclosure') {
                enclosure = {};
                enclosure.url = el['@']['href'];
                enclosure.type = utils.get(el['@'], 'type');
                enclosure.length = utils.get(el['@'], 'length');
                if (indexOfObject(item.enclosures, enclosure, ['url', 'type']) === -1) {
                  item.enclosures.push(enclosure);
                }
              }
            } else {
              item.link = el['@']['href'];
            }
          } else if (Object.keys(el['@']).length === 0) { // RSS
            if (!item.link) item.link = utils.get(el);
          }
        }
        if (!item.guid) item.guid = item.link;
        break;
      case('guid'):
      case('id'):
        item.guid = utils.get(el);
        // http://cyber.law.harvard.edu/rss/rss.html#ltguidgtSubelementOfLtitemgt
        // If the guid element has an attribute named "isPermaLink" with a value
        // of true, the reader may assume that it is a permalink to the item,
        // that is, a url that can be opened in a Web browser, that points to
        // the full item described by the <item> element.
        // isPermaLink is optional, its default value is true. If its value is
        // false, the guid may not be assumed to be a url, or a url to anything
        // in particular.
        if (item.guid && type == 'rss' && name == 'guid' && attrs.ispermalink !== 'false') {
          item.permalink = item.guid;
        }
        break;
      case('author'):
        var author = {};
        if (utils.get(el)) { // RSS
          author = addressparser(utils.get(el))[0];
          if (author) {
            el['name'] = author.name;
            el['email'] = author.address;
            item.author = author.name || author.address;
          }
          // addressparser failed
          else {
            item.author = utils.get(el);
          }
        } else {
          item.author = utils.get(el.name) || utils.get(el.email) || utils.get(el.uri);
        }
        break;
      case('dc:creator'):
        item.author = utils.get(el);
        break;
      case('comments'):
        item.comments = utils.get(el);
        break;
      case('source'):
        if ('rss' == type) {
          item.source['title'] = utils.get(el);
          item.source['url'] = utils.get(el['@'], 'url');
        } else if ('atom' == type) {
          if (el.title && utils.get(el.title))
            item.source['title'] = utils.get(el.title);
          if (el.link && utils.get(el.link['@'], 'href'))
          item.source['url'] = utils.get(el.link['@'], 'href');
        }
        if (item.source['url'] && !this.meta.xmlurl) {
          this.meta.xmlurl = this.meta.xmlUrl = item.source['url'];
          if (this.xmlbase && this.xmlbase.length === 0) {
            this.xmlbase.unshift({ '#name': 'xml', '#': item.source['url']});
            this.stack[0] = utils.reresolve(this.stack[0], item.source['url']);
          }
        }
        break;
      case('enclosure'):
        if (Array.isArray(el)) {
          el.forEach(function (enc){
            enclosure = {};
            enclosure.url = utils.get(enc['@'], 'url');
            enclosure.type = utils.get(enc['@'], 'type');
            enclosure.length = utils.get(enc['@'], 'length');
            if (~indexOfObject(item.enclosures, enclosure, ['url', 'type'])) {
              item.enclosures.splice(indexOfObject(item.enclosures, enclosure, ['url', 'type']), 1, enclosure);
            } else {
              item.enclosures.push(enclosure);
            }
          });
        } else {
          enclosure = {};
          enclosure.url = utils.get(el['@'], 'url');
          enclosure.type = utils.get(el['@'], 'type');
          enclosure.length = utils.get(el['@'], 'length');
          if (~indexOfObject(item.enclosures, enclosure, ['url', 'type'])) {
            item.enclosures.splice(indexOfObject(item.enclosures, enclosure, ['url', 'type']), 1, enclosure);
          } else {
            item.enclosures.push(enclosure);
          }
        }
        break;
      case('media:content'):
        if (Array.isArray(el)) {
          el.forEach(function (enc){
            enclosure = {};
            enclosure.url = utils.get(enc['@'], 'url');
            enclosure.type = utils.get(enc['@'], 'type') || utils.get(enc['@'], 'medium');
            enclosure.length = utils.get(enc['@'], 'filesize');
            if (indexOfObject(item.enclosures, enclosure, ['url', 'type']) === -1) {
              item.enclosures.push(enclosure);
            }
          });
        } else {
          enclosure = {};
          enclosure.url = utils.get(el['@'], 'url');
          enclosure.type = utils.get(el['@'], 'type') || utils.get(el['@'], 'medium');
          enclosure.length = utils.get(el['@'], 'filesize');
          if (indexOfObject(item.enclosures, enclosure, ['url', 'type']) === -1) {
            item.enclosures.push(enclosure);
          }
        }
        break;
      case('enc:enclosure'): // Can't find this in use for an example to debug. Only example found does not comply with the spec -- can't code THAT!
        break;
      case('category'):
      case('dc:subject'):
      case('itunes:category'):
      case('media:category'):
        /* We handle all the kinds of categories within the switch loop because item.categories
         * is an array, unlike the other properties, and therefore can handle multiple values
         */
        var _category = ''
          , _categories = []
          ;
        if (Array.isArray(el)) {
          el.forEach(function (category){
            if ('category' == name && 'atom' == type) {
              if (category['@'] && utils.get(category['@'], 'term')) item.categories.push(utils.get(category['@'], 'term'));
            } else if ('category' == name && utils.get(category) && 'rss' == type) {
              item.categories.push(utils.get(category).trim());
            } else if ('dc:subject' == name && utils.get(category)) {
              _categories = utils.get(category).split(' ').map(function (cat){ return cat.trim(); });
              if (_categories.length) item.categories = item.categories.concat(_categories);
            } else if ('itunes:category' == name) {
              if (category['@'] && utils.get(category['@'], 'text')) _category = utils.get(category['@'], 'text');
              if (category[name]) {
                if (Array.isArray(category[name])) {
                  category[name].forEach(function (subcategory){
                    if (subcategory['@'] && utils.get(subcategory['@'], 'text')) item.categories.push(_category + '/' + utils.get(subcategory['@'], 'text'));
                  });
                } else {
                  if (category[name]['@'] && utils.get(category[name]['@'], 'text'))
                    item.categories.push(_category + '/' + utils.get(category[name]['@'], 'text'));
                }
              } else {
                item.categories.push(_category);
              }
            } else if ('media:category' == name) {
              item.categories.push(utils.get(category));
            }
          });
        } else {
          if ('category' == name && 'atom' == type) {
            if (utils.get(el['@'], 'term')) item.categories.push(utils.get(el['@'], 'term'));
          } else if ('category' == name && utils.get(el) && 'rss' == type) {
            item.categories.push(utils.get(el).trim());
          } else if ('dc:subject' == name && utils.get(el)) {
            _categories = utils.get(el).split(' ').map(function (cat){ return cat.trim(); });
            if (_categories.length) item.categories = item.categories.concat(_categories);
          } else if ('itunes:category' == name) {
            if (el['@'] && utils.get(el['@'], 'text')) _category = utils.get(el['@'], 'text');
            if (el[name]) {
              if (Array.isArray(el[name])) {
                el[name].forEach(function (subcategory){
                  if (subcategory['@'] && utils.get(subcategory['@'], 'text')) item.categories.push(_category + '/' + utils.get(subcategory['@'], 'text'));
                });
              } else {
                if (el[name]['@'] && utils.get(el[name]['@'], 'text'))
                  item.categories.push(_category + '/' + utils.get(el[name]['@'], 'text'));
              }
            } else {
              item.categories.push(_category);
            }
          } else if ('media:category' == name) {
            item.categories.push(utils.get(el));
          }
        }
        break;
      case('feedburner:origlink'):
      case('pheedo:origlink'):
        if (!item.origlink) {
          item.origlink = utils.get(el);
        }
        break;
      } // switch end
    }
    // Fill with all native other namespaced properties
    if (name.indexOf('#') !== 0) {
      if (~name.indexOf(':')) item[name] = el;
      else item[type + ':' + name] = el;
    }
  }, this); // forEach end

  if (normalize) {
    if (!item.description) {
      if (node['itunes:summary']) item.description = utils.get(node['itunes:summary']);
    }
    if (!item.author) {
      if (node['itunes:author']) item.author = utils.get(node['itunes:author']);
      else if (node['itunes:owner'] && node['itunes:owner']['itunes:name']) item.author = utils.get(node['itunes:owner']['itunes:name']);
      else if (node['dc:publisher']) item.author = utils.get(node['dc:publisher']);
    }
    if (!item.image.url) {
      if (node['itunes:image']) item.image.url = utils.get(node['itunes:image']['@'], 'href');
      else if (node['media:thumbnail']) {
        if (Array.isArray(node['media:thumbnail'])) {
          item.image.url = utils.get(node['media:thumbnail'][0]['@'], 'url');
        } else {
          item.image.url = utils.get(node['media:thumbnail']['@'], 'url');
        }
      }
      else if (node['media:content'] && node['media:content']['media:thumbnail']) item.image.url = utils.get(node['media:content']['media:thumbnail']['@'], 'url');
      else if (node['media:group'] && node['media:group']['media:thumbnail']) item.image.url = utils.get(node['media:group']['media:thumbnail']['@'], 'url');
      else if (node['media:group'] && node['media:group']['media:content'] && node['media:group']['media:content']['media:thumbnail']) item.image.url = utils.get(node['media:group']['media:content']['media:thumbnail']['@'], 'url');
    }
    if (item.categories.length) {
      item.categories = utils.unique(item.categories);
    }
    if (!item.link) {
      if (item.guid && /^https?:/.test(item.guid)) {
        item.link = item.guid;
      }
    }
    item.title = item.title && resanitize.stripHtml(item.title);
  }

  return item;
};
Exemplo n.º 4
0
FeedParser.prototype.handleMeta = function handleMeta (node, type, options) {
  if (!type || !node) return {};

  var meta = {}
    , normalize = !options || (options && options.normalize)
    ;

  if (normalize) {
    ['title','description','date', 'pubdate', 'pubDate','link', 'xmlurl', 'xmlUrl','author','language','favicon','copyright','generator'].forEach(function (property){
      meta[property] = null;
    });
    meta.cloud = {};
    meta.image = {};
    meta.categories = [];
  }

  Object.keys(node).forEach(function(name){
    var el = node[name];

    if (normalize) {
      switch(name){
      case('title'):
        meta.title = utils.get(el);
        break;
      case('description'):
      case('subtitle'):
        meta.description = utils.get(el);
        break;
      case('pubdate'):
      case('lastbuilddate'):
      case('published'):
      case('modified'):
      case('updated'):
      case('dc:date'):
        var date = utils.get(el) ? new Date(utils.get(el)) : null;
        if (!date) break;
        if (meta.pubdate === null || name == 'pubdate' || name == 'published')
          meta.pubdate = meta.pubDate = date;
        if (meta.date === null || name == 'lastbuilddate' || name == 'modified' || name == 'updated')
          meta.date = date;
        break;
      case('link'):
      case('atom:link'):
      case('atom10:link'):
        if (Array.isArray(el)) {
          el.forEach(function (link){
            if (link['@']['href']) { // Atom
              if (utils.get(link['@'], 'rel')) {
                if (link['@']['rel'] == 'alternate') meta.link = link['@']['href'];
                else if (link['@']['rel'] == 'self') {
                  meta.xmlurl = meta.xmlUrl = link['@']['href'];
                  if (this.xmlbase && this.xmlbase.length === 0) {
                    this.xmlbase.unshift({ '#name': 'xml', '#': meta.xmlurl});
                    this.stack[0] = utils.reresolve(this.stack[0], meta.xmlurl);
                  }
                }
                else if (link['@']['rel'] == 'hub' && !(meta.cloud.href || meta.cloud.domain)) {
                  meta.cloud.type = 'hub';
                  meta.cloud.href = link['@']['href'];
                }
              } else {
                meta.link = link['@']['href'];
              }
            } else if (Object.keys(link['@']).length === 0) { // RSS
              if (!meta.link) meta.link = utils.get(link);
            }
            if (meta.link && this.xmlbase && this.xmlbase.length === 0) {
              this.xmlbase.unshift({ '#name': 'xml', '#': meta.link});
              this.stack[0] = utils.reresolve(this.stack[0], meta.link);
            }
          }, this);
        } else {
          if (el['@']['href']) { // Atom
            if (utils.get(el['@'], 'rel')) {
              if (el['@']['rel'] == 'alternate') meta.link = el['@']['href'];
              else if (el['@']['rel'] == 'self') {
                meta.xmlurl = meta.xmlUrl = el['@']['href'];
                if (this.xmlbase && this.xmlbase.length === 0) {
                  this.xmlbase.unshift({ '#name': 'xml', '#': meta.xmlurl});
                  this.stack[0] = utils.reresolve(this.stack[0], meta.xmlurl);
                }
              }
              else if (el['@']['rel'] == 'hub' && !(meta.cloud.href || meta.cloud.domain)) {
                meta.cloud.type = 'hub';
                meta.cloud.href = el['@']['href'];
              }
            } else {
              meta.link = el['@']['href'];
            }
          } else if (Object.keys(el['@']).length === 0) { // RSS
            if (!meta.link) meta.link = utils.get(el);
          }
          if (meta.link && this.xmlbase && this.xmlbase.length === 0) {
            this.xmlbase.unshift({ '#name': 'xml', '#': meta.link});
            this.stack[0] = utils.reresolve(this.stack[0], meta.link);
          }
        }
        break;
      case('managingeditor'):
      case('webmaster'):
      case('author'):
        var author = {};
        if (name == 'author') {
          meta.author = utils.get(el.name) || utils.get(el.email) || utils.get(el.uri);
        }
        else if (utils.get(el)) {
          author = addressparser(utils.get(el))[0];
          if (author) {
            el['name'] = author.name;
            el['email'] = author.address;
          }
          if (meta.author === null || name == 'managingeditor') {
            meta.author = author.name || author.address || utils.get(el);
          }
        }
        break;
      case('cloud'):
        // I can't believe someone actually would put two cloud elements in their channel
        // but it happened
        // Nevertheless, there can be only one
        meta.cloud = {}; // This will ensure that rssCloud "wins" here,
                         // If pubsubhubbub is also declared, it's still available
                         // in the link elements
        if (Array.isArray(el)) {
          Object.keys(el[0]['@']).forEach(function (attr) {
            if (utils.has(el[0]['@'], attr)) {
              meta.cloud[attr] = el[0]['@'][attr];
            }
          });
        }
        else {
          Object.keys(el['@']).forEach(function (attr) {
            if (utils.has(el['@'], attr)) {
              meta.cloud[attr] = el['@'][attr];
            }
          });
        }
        meta.cloud.type = 'rsscloud';
        break;
      case('language'):
        meta.language = utils.get(el);
        break;
      case('image'):
      case('logo'):
        if (el.url)
          meta.image.url = utils.get(el.url);
        if (el.title)
          meta.image.title = utils.get(el.title);
        if (!meta.image.url && utils.get(el))
          meta.image.url = utils.get(el);
        break;
      case('icon'):
        meta.favicon = utils.get(el);
        break;
      case('copyright'):
      case('rights'):
      case('dc:rights'):
        meta.copyright = utils.get(el);
        break;
      case('generator'):
        meta.generator = utils.get(el);
        if (utils.get(el['@'], 'version'))
          meta.generator += (meta.generator ? ' ' : '') + 'v' + el['@'].version;
        if (utils.get(el['@'], 'uri'))
          meta.generator += meta.generator ? ' (' + el['@'].uri + ')' : el['@'].uri;
        break;
      case('category'):
      case('dc:subject'):
      case('itunes:category'):
      case('media:category'):
        /* We handle all the kinds of categories within the switch loop because meta.categories
         * is an array, unlike the other properties, and therefore can handle multiple values
         */
        var _category = ''
          , _categories = []
          ;
        if (Array.isArray(el)) {
          el.forEach(function (category){
            if ('category' == name && 'atom' == type) {
              if (category['@'] && utils.get(category['@'], 'term')) meta.categories.push(utils.get(category['@'], 'term'));
            } else if ('category' == name && utils.get(category) && 'rss' == type) {
              meta.categories.push(utils.get(category).trim());
            } else if ('dc:subject' == name && utils.get(category)) {
              _categories = utils.get(category).split(' ').map(function (cat){ return cat.trim(); });
              if (_categories.length) meta.categories = meta.categories.concat(_categories);
            } else if ('itunes:category' == name) {
              if (category['@'] && utils.get(category['@'], 'text')) _category = utils.get(category['@'], 'text');
              if (category[name]) {
                if (Array.isArray(category[name])) {
                  category[name].forEach(function (subcategory){
                    if (subcategory['@'] && utils.get(subcategory['@'], 'text')) meta.categories.push(_category + '/' + utils.get(subcategory['@'], 'text'));
                  });
                } else {
                  if (category[name]['@'] && utils.get(category[name]['@'], 'text'))
                    meta.categories.push(_category + '/' + utils.get(category[name]['@'], 'text'));
                }
              } else {
                meta.categories.push(_category);
              }
            } else if ('media:category' == name) {
              meta.categories.push(utils.get(category));
            }
          });
        } else {
          if ('category' == name && 'atom' == type) {
            if (utils.get(el['@'], 'term')) meta.categories.push(utils.get(el['@'], 'term'));
          } else if ('category' == name && utils.get(el) && 'rss' == type) {
            meta.categories.push(utils.get(el).trim());
          } else if ('dc:subject' == name && utils.get(el)) {
            _categories = utils.get(el).split(' ').map(function (cat){ return cat.trim(); });
            if (_categories.length) meta.categories = meta.categories.concat(_categories);
          } else if ('itunes:category' == name) {
            if (el['@'] && utils.get(el['@'], 'text')) _category = utils.get(el['@'], 'text');
            if (el[name]) {
              if (Array.isArray(el[name])) {
                el[name].forEach(function (subcategory){
                  if (subcategory['@'] && utils.get(subcategory['@'], 'text')) meta.categories.push(_category + '/' + utils.get(subcategory['@'], 'text'));
                });
              } else {
                if (el[name]['@'] && utils.get(el[name]['@'], 'text'))
                  meta.categories.push(_category + '/' + utils.get(el[name]['@'], 'text'));
              }
            } else {
              meta.categories.push(_category);
            }
          } else if ('media:category' == name) {
            meta.categories.push(utils.get(el));
          }
        }
        break;
      } // switch end
    }
    // Fill with all native other namespaced properties
    if (name.indexOf('#') !== 0) {
      if (~name.indexOf(':')) meta[name] = el;
      else meta[type + ':' + name] = el;
    }
  }, this); // forEach end

  if (normalize) {
    if (!meta.description) {
      if (node['itunes:summary']) meta.description = utils.get(node['itunes:summary']);
      else if (node['tagline']) meta.description = utils.get(node['tagline']);
    }
    if (!meta.author) {
      if (node['itunes:author']) meta.author = utils.get(node['itunes:author']);
      else if (node['itunes:owner'] && node['itunes:owner']['itunes:name']) meta.author = utils.get(node['itunes:owner']['itunes:name']);
      else if (node['dc:creator']) meta.author = utils.get(node['dc:creator']);
      else if (node['dc:publisher']) meta.author = utils.get(node['dc:publisher']);
    }
    if (!meta.language) {
      if (node['@'] && node['@']['xml:lang']) meta.language = utils.get(node['@'], 'xml:lang');
      else if (node['dc:language']) meta.language = utils.get(node['dc:language']);
    }
    if (!meta.image.url) {
      if (node['itunes:image']) meta.image.url = utils.get(node['itunes:image']['@'], 'href');
      else if (node['media:thumbnail']) {
        if (Array.isArray(node['media:thumbnail'])) {
          node['media:thumbnail'] = node['media:thumbnail'][0];
        }
        meta.image.url = utils.get(node['media:thumbnail']['@'], 'url');
      }
    }
    if (!meta.copyright) {
      if (node['media:copyright']) meta.copyright = utils.get(node['media:copyright']);
      else if (node['dc:rights']) meta.copyright = utils.get(node['dc:rights']);
      else if (node['creativecommons:license']) meta.copyright = utils.get(node['creativecommons:license']);
      else if (node['cc:license']) {
        if (Array.isArray(node['cc:license']) && node['cc:license'][0]['@'] && node['cc:license'][0]['@']['rdf:resource']) {
          meta.copyright = utils.get(node['cc:license'][0]['@'], 'rdf:resource');
        } else if (node['cc:license']['@'] && node['cc:license']['@']['rdf:resource']) {
          meta.copyright = utils.get(node['cc:license']['@'], 'rdf:resource');
        }
      }
    }
    if (!meta.generator) {
      if (node['admin:generatoragent']) {
        if (Array.isArray(node['admin:generatoragent']) && node['admin:generatoragent'][0]['@'] && node['admin:generatoragent'][0]['@']['rdf:resource']) {
          meta.generator = utils.get(node['admin:generatoragent'][0]['@'], 'rdf:resource');
        } else if (node['admin:generatoragent']['@'] && node['admin:generatoragent']['@']['rdf:resource']) {
          meta.generator = utils.get(node['admin:generatoragent']['@'], 'rdf:resource');
        }
      }
    }
    if (meta.categories.length) {
      meta.categories = utils.unique(meta.categories);
    }
    if (!meta.link) {
      if (meta['atom:id'] && utils.get(meta['atom:id']) && /^https?:/.test(utils.get(meta['atom:id']))) {
        meta.link = utils.get(meta['atom:id']);
      }
    }
    if (!meta.xmlurl && this.options.feedurl) {
      meta.xmlurl = meta.xmlUrl = this.options.feedurl;
    }
    meta.title = meta.title && resanitize.stripHtml(meta.title);
    meta.description = meta.description && resanitize.stripHtml(meta.description);
  }

  return meta;
};
Exemplo n.º 5
0
FeedParser.prototype.handleItem = function handleItem (node, type, options){
  if (!type || !node) return {};

  var item = {}
    , normalize = !options || (options && options.normalize)
    ;

  if (normalize) {
    ['title','description','summary','date','pubdate','pubDate','link','guid','author','comments', 'origlink'].forEach(function (property){
      item[property] = null;
    });
    item.image = {};
    item.source = {};
    item.categories = [];
    item.enclosures = [];
  }

  Object.keys(node).forEach(function(name){
    var el = node[name]
      , enclosure = {};
    if (normalize) {
      switch(name){
      case('title'):
        item.title = utils.get(el);
        break;
      case('description'):
      case('summary'):
        item.summary = utils.get(el);
        if (!item.description) item.description = utils.get(el);
        break;
      case('content'):
      case('content:encoded'):
        item.description = utils.get(el);
        break;
      case('pubdate'):
      case('published'):
      case('issued'):
      case('modified'):
      case('updated'):
      case('dc:date'):
        var date = utils.get(el) ? new Date(el['#']) : null;
        if (!date) break;
        if (item.pubdate === null || name == 'pubdate' || name == 'published' || name == 'issued')
          item.pubdate = item.pubDate = date;
        if (item.date === null || name == 'modified' || name == 'updated')
          item.date = date;
        break;
      case('link'):
        if (Array.isArray(el)) {
          el.forEach(function (link){
            if (link['@']['href']) { // Atom
              if (utils.get(link['@'], 'rel')) {
                if (link['@']['rel'] == 'canonical') item.origlink = link['@']['href'];
                if (link['@']['rel'] == 'alternate') item.link = link['@']['href'];
                if (link['@']['rel'] == 'replies') item.comments = link['@']['href'];
                if (link['@']['rel'] == 'enclosure') {
                  enclosure.url = link['@']['href'];
                  enclosure.type = utils.get(link['@'], 'type');
                  enclosure.length = utils.get(link['@'], 'length');
                  item.enclosures.push(enclosure);
                }
              } else {
                item.link = link['@']['href'];
              }
            } else if (Object.keys(link['@']).length === 0) { // RSS
              if (!item.link) item.link = utils.get(link);
            }
          });
        } else {
          if (el['@']['href']) { // Atom
            if (utils.get(el['@'], 'rel')) {
              if (el['@']['rel'] == 'canonical') item.origlink = el['@']['href'];
              if (el['@']['rel'] == 'alternate') item.link = el['@']['href'];
              if (el['@']['rel'] == 'replies') item.comments = el['@']['href'];
              if (el['@']['rel'] == 'enclosure') {
                enclosure.url = el['@']['href'];
                enclosure.type = utils.get(el['@'], 'type');
                enclosure.length = utils.get(el['@'], 'length');
                item.enclosures.push(enclosure);
              }
            } else {
              item.link = el['@']['href'];
            }
          } else if (Object.keys(el['@']).length === 0) { // RSS
            if (!item.link) item.link = utils.get(el);
          }
        }
        if (!item.guid) item.guid = item.link;
        break;
      case('guid'):
      case('id'):
        item.guid = utils.get(el);
        break;
      case('author'):
        var author = {};
        if (utils.get(el)) { // RSS
          author = addressparser(utils.get(el))[0];
          el['name'] = author.name;
          el['email'] = author.address;
          item.author = author.name || author.address;
        } else {
          item.author = utils.get(el.name) || utils.get(el.email) || utils.get(el.uri);
        }
        break;
      case('dc:creator'):
        item.author = utils.get(el);
        break;
      case('comments'):
        item.comments = utils.get(el);
        break;
      case('source'):
        if ('rss' == type) {
          item.source['title'] = utils.get(el);
          item.source['url'] = utils.get(el['@'], 'url');
        } else if ('atom' == type) {
          if (el.title && utils.get(el.title))
            item.source['title'] = utils.get(el.title);
          if (el.link && utils.get(el.link['@'], 'href'))
          item.source['url'] = utils.get(el.link['@'], 'href');
        }
        break;
      case('enclosure'):
        if (Array.isArray(el)) {
          el.forEach(function (enc){
            enclosure.url = utils.get(enc['@'], 'url');
            enclosure.type = utils.get(enc['@'], 'type');
            enclosure.length = utils.get(enc['@'], 'length');
            if (indexOfObject(item.enclosures, enclosure, ['url', 'type']) === -1) {
              item.enclosures.splice(indexOfObject(item.enclosures, enclosure, ['url', 'type']), 1, enclosure);
            } else {
              item.enclosures.push(enclosure);
            }
          });
        } else {
          enclosure.url = utils.get(el['@'], 'url');
          enclosure.type = utils.get(el['@'], 'type');
          enclosure.length = utils.get(el['@'], 'length');
          if (indexOfObject(item.enclosures, enclosure, ['url', 'type']) === -1) {
            item.enclosures.splice(indexOfObject(item.enclosures, enclosure, ['url', 'type']), 1, enclosure);
          } else {
            item.enclosures.push(enclosure);
          }
        }
        break;
      case('media:content'):
        if (Array.isArray(el)) {
          el.forEach(function (enc){
            enclosure.url = utils.get(enc['@'], 'url');
            enclosure.type = utils.get(enc['@'], 'type') || utils.get(enc['@'], 'medium');
            enclosure.length = utils.get(enc['@'], 'filesize');
            if (indexOfObject(item.enclosures, enclosure, ['url', 'type']) === -1) {
              item.enclosures.push(enclosure);
            }
          });
        } else {
          enclosure.url = utils.get(el['@'], 'url');
          enclosure.type = utils.get(el['@'], 'type') || utils.get(el['@'], 'medium');
          enclosure.length = utils.get(el['@'], 'filesize');
          if (indexOfObject(item.enclosures, enclosure, ['url', 'type']) === -1) {
            item.enclosures.push(enclosure);
          }
        }
        break;
      case('enc:enclosure'): // Can't find this in use for an example to debug. Only example found does not comply with the spec -- can't code THAT!
        break;
      case('category'):
      case('dc:subject'):
      case('itunes:category'):
      case('media:category'):
        /* We handle all the kinds of categories within the switch loop because item.categories
         * is an array, unlike the other properties, and therefore can handle multiple values
         */
        var _category = ''
          , _categories = []
          ;
        if (Array.isArray(el)) {
          el.forEach(function (category){
            if ('category' == name && 'atom' == type) {
              if (category['@'] && utils.get(category['@'], 'term')) item.categories.push(utils.get(category['@'], 'term'));
            } else if ('category' == name && utils.get(category) && 'rss' == type) {
              _categories = utils.get(category).split(',').map(function (cat){ return cat.trim(); });
              if (_categories.length) item.categories = item.categories.concat(_categories);
            } else if ('dc:subject' == name && utils.get(category)) {
              _categories = utils.get(category).split(' ').map(function (cat){ return cat.trim(); });
              if (_categories.length) item.categories = item.categories.concat(_categories);
            } else if ('itunes:category' == name) {
              if (category['@'] && utils.get(category['@'], 'text')) _category = utils.get(category['@'], 'text');
              if (category[name]) {
                if (Array.isArray(category[name])) {
                  category[name].forEach(function (subcategory){
                    if (subcategory['@'] && utils.get(subcategory['@'], 'text')) item.categories.push(_category + '/' + utils.get(subcategory['@'], 'text'));
                  });
                } else {
                  if (category[name]['@'] && utils.get(category[name]['@'], 'text'))
                    item.categories.push(_category + '/' + utils.get(category[name]['@'], 'text'));
                }
              } else {
                item.categories.push(_category);
              }
            } else if ('media:category' == name) {
              item.categories.push(utils.get(category));
            }
          });
        } else {
          if ('category' == name && 'atom' == type) {
            if (utils.get(el['@'], 'term')) item.categories.push(utils.get(el['@'], 'term'));
          } else if ('category' == name && utils.get(el) && 'rss' == type) {
            _categories = utils.get(el).split(',').map(function (cat){ return cat.trim(); });
            if (_categories.length) item.categories = item.categories.concat(_categories);
          } else if ('dc:subject' == name && utils.get(el)) {
            _categories = utils.get(el).split(' ').map(function (cat){ return cat.trim(); });
            if (_categories.length) item.categories = item.categories.concat(_categories);
          } else if ('itunes:category' == name) {
            if (el['@'] && utils.get(el['@'], 'text')) _category = utils.get(el['@'], 'text');
            if (el[name]) {
              if (Array.isArray(el[name])) {
                el[name].forEach(function (subcategory){
                  if (subcategory['@'] && utils.get(subcategory['@'], 'text')) item.categories.push(_category + '/' + utils.get(subcategory['@'], 'text'));
                });
              } else {
                if (el[name]['@'] && utils.get(el[name]['@'], 'text'))
                  item.categories.push(_category + '/' + utils.get(el[name]['@'], 'text'));
              }
            } else {
              item.categories.push(_category);
            }
          } else if ('media:category' == name) {
            item.categories.push(utils.get(el));
          }
        }
        break;
      case('feedburner:origlink'):
      case('pheedo:origlink'):
        if (!item.origlink) {
          item.origlink = utils.get(el);
        }
        break;
      } // switch end
    }
    // Fill with all native other namespaced properties
    if (name.indexOf('#') !== 0) {
      if (~name.indexOf(':')) item[name] = el;
      else item[type + ':' + name] = el;
    }
  }); // forEach end

  if (normalize) {
    if (!item.description) {
      if (node['itunes:summary']) item.description = utils.get(node['itunes:summary']);
    }
    if (!item.author) {
      if (node['itunes:author']) item.author = utils.get(node['itunes:author']);
      else if (node['itunes:owner'] && node['itunes:owner']['itunes:name']) item.author = utils.get(node['itunes:owner']['itunes:name']);
      else if (node['dc:publisher']) item.author = utils.get(node['dc:publisher']);
    }
    if (!item.image.url) {
      if (node['itunes:image']) item.image.url = utils.get(node['itunes:image']['@'], 'href');
      else if (node['media:thumbnail']) {
        if (Array.isArray(node['media:thumbnail'])) {
          item.image.url = utils.get(node['media:thumbnail'][0]['@'], 'url');
        } else {
          item.image.url = utils.get(node['media:thumbnail']['@'], 'url');
        }
      }
      else if (node['media:content'] && node['media:content']['media:thumbnail']) item.image.url = utils.get(node['media:content']['media:thumbnail']['@'], 'url');
      else if (node['media:group'] && node['media:group']['media:thumbnail']) item.image.url = utils.get(node['media:group']['media:thumbnail']['@'], 'url');
      else if (node['media:group'] && node['media:group']['media:content'] && node['media:group']['media:content']['media:thumbnail']) item.image.url = utils.get(node['media:group']['media:content']['media:thumbnail']['@'], 'url');
    }
    if (item.categories.length) {
      item.categories = utils.unique(item.categories);
    }
    item.title = item.title && resanitize.stripHtml(item.title);
  }
  return item;
};