示例#1
0
文件: api.js 项目: Whoaa512/OkCupid
	var parseProfile = function(html, callback)
	{
		var profile = {}
		var handler = new htmlparser.DefaultHandler(function(err, dom) {
			if (err) {
                sys.debug("Error: " + err);
			} else {
				profile.username = select(dom, 'span#basic_info_sn')[0].children[0].data
				var lastContacted = select(dom, 'div#contacted p')
				if (lastContacted.length > 0)
				{
					profile.lastContacted = lastContacted[0].children[0].data				
				}
				else
				{
					profile.lastContacted = null
				}
            }
			callback(profile)
        });

        var parser = new htmlparser.Parser(handler);
        parser.parseComplete(html);
					
	}
    xhr.onreadystatechange = function() {
      sys.puts("State: " + this.readyState);

      if (this.readyState == 4) {
        var rawHtml = this.responseText;
        var htmlparser = require("htmlparser");
        var handler = new htmlparser.DefaultHandler(function (error, dom) {
          if (error)
            console.log("error")
        });
        var parser = new htmlparser.Parser(handler);
        parser.parseComplete(rawHtml);

        var TabOfActivities = new Array();
        //sys.puts(sys.inspect(handler.dom, false, null));
        for (var i = 5; i < 15; i++) {
          var Activity = new Object();
          Activity.title=getActivityTitle(handler.dom,i);
          Activity.author=getActivityAuthor(handler.dom,i);
          Activity.datePub='';
          Activity.link=getActivityLink(handler.dom,i);
          Activity.source='Jira';
          Activity.id=Activity.link+Activity.author;
          TabOfActivities[i-5]= Activity;
        };  
        for (var i = 0; i < TabOfActivities.length; i++) { 
          ArticleExist(TabOfActivities,i);
        }; 
      };
    }
示例#3
0
function parsePage(html, callback) {
	statsd.increment('pages_parsed');
	var handler = new htmlparser.DefaultHandler(function (error, dom) {
		if (error) {
			return dumpHtml(html, error, callback);
		}
		if (!dom) {
			return dumpHtml(html, new Error("No DOM object!"), callback);
		}
		var price;
		try {
			var bolds = htmlparser.DomUtils.getElementsByTagName("b", dom);
			price = bolds[2].children[0].data;
		} catch (e) {
			return dumpHtml(html, e, callback);
		}
		if (price[0] != '£') {
			return dumpHtml(html, new Error("Expected currency symbol to be £; got " + price[0]), callback);
		}
		price = parseFloat(price.slice(1));
		if (isNaN(price)) {
			var errMsg = "Failed to convert price string to float; result is NaN";
			log.error({price: price}, errMsg);
			callback(new Error(errMsg));
		}
		var result = {
			price: price,
			currency: "GBP"
		};
		callback(null, result);
	});
	var parser = new htmlparser.Parser(handler);
	parser.parseComplete(html);
}
示例#4
0
function _get_body(page, callback) {
    var handler = new np.DefaultHandler(function(error, dom) {
        if (error) {
            callback(error);
        }
    }, {
        ignoreWhitespace: true
    });
    var parser = new np.Parser(handler);
    parser.parseComplete(page);

    for (var i in handler.dom) {
        var node = handler.dom[i];

        if (node.name == 'html') {
            console.log('found html');
            for (var j in node.children) {
                var n = node.children[j];
                if (n.name == 'body') {
                    console.log('found body');
                    return callback(null, n.children);
                }
            }
        }
    }
}
示例#5
0
	request({url:'http://ultrastar-es.org/es/canciones?busqueda='+query,  jar: j}, function (error, response, body) {
	  if (!error && response.statusCode == 200) {
	     var handler = new htmlparser.DefaultHandler(function(err, dom) {
	         var canciones = select(dom, 'ul.canciones')[0];
	         var final = [];
	         for(var c in canciones.children){
		         var song = canciones.children[c];
		         var image;
		         var author;
		         var title;
		         var download;
		         var img = select(song, 'img')[0];
		         var a = select(song, 'a');
		         if(img!=undefined){
			         image = img.attribs.src;
			         author = a[3].children[0].raw
			         title = a[4].children[0].raw
			         for(var x=0;x<a.length;x++){
				         if( a[x].attribs!=undefined && a[x].attribs.href.indexOf("torrent")!=-1){
					         download = a[x].attribs.href;
				         }
			         }
			         console.log(download);
					 final.push({"title":title, "artist":author, "remote":true, "image":image, "download":"http://ultrastar-es.org"+download});
		         }
	         }
	        songsCallback(final, event);

	          
	  	});
	  	 var parser = new htmlparser.Parser(handler);
	     parser.parseComplete(body);
	  }
	})
示例#6
0
var parseHtml = function (body, pageNum, callback) {
    var handler = new htmlparser.DefaultHandler();
    var parser = new htmlparser.Parser(handler);
    parser.parseComplete(body);
    iterate(handler.dom, pageNum);
    callback(null, null);
}
示例#7
0
test('insert an element before the target element in the tree', function (t) {

  t.plan(1);

  var inFile = "./test/data/insertAppend/before.html"
  var expectedFile = "./test/data/insertAppend/after.html"
  var outFile = "/tmp/insertAppend.html"
  var targetId = "xxxxx5"
  var newHtml = "<h1 makomi-id=\"yyyyy1\">I'm a new element</h1>"


  var handler = new htmlparser.DefaultHandler(function (error, insertDom) {
    testUtil.compareToExpectedOutput(t,expectedFile,function(cb) {
      mkSrc.parseFile(inFile,function(er,dom) {
        console.log("parsed file")
        mkSrc.insertAppend(dom,targetId,insertDom,function(newDom) {
          console.log("insert append")
          mkSrc.writeHtml(outFile,newDom,function(html) {
            cb(html)
          })
        })
      })
    })
  });
  var parser = new htmlparser.Parser(handler);
  parser.parseComplete(newHtml);


});
示例#8
0
Job.prototype.parseHtml = function (data, callback, response) {
    headers = response && response.headers ? response.headers : {};
    if (this.options.jsdom) {
        var features = {
                FetchExternalResources: false,
                ProcessExternalResources: false,
                QuerySelector: false
        };
        var $, window = require('jsdom').jsdom(data, null, {features:features}).createWindow(),
            jquery = require('jquery'),
            default_$ = jquery.create(window);
        $ = function (selector, context) {
            return context ? jquery.create(context) : default_$(selector);
        };
        callback(null, $, data, headers, response);
    } else {
        var self = this, handler, parser, $, htmlparser = require('htmlparser');
        handler = new htmlparser.DefaultHandler(function (err, dom) {
            if (err) {
                callback(err);
            } else {
                $ = function (selector, context) {
                    //Allow the user to specify a custom context (thanks to github.com/jimbishopp)
                    return self.$(selector, context || dom);
                };
                callback(null, $, data, headers, response);
            }
        }, {verbose: true, ignoreWhitespace: true});
        parser = new htmlparser.Parser(handler);
        parser.parseComplete(data);
    }
};
示例#9
0
function _resBuild(htmlCode) {

	var
		error,
		els,
		pHandler = new htmlparser.DefaultHandler(function(err,doc){
			if ( err )
				error = err;
		});

	parser = new htmlparser.Parser(pHandler);
	parser.parseComplete(htmlCode);
	els = error ? [] : pHandler.dom;

	// Initialize and bless them
	var id = 0;
	els.forEach(function(node){
		node._pos = id;
		_initDomNode(node,null,"R"+(++id));
	});
	_resBless(els);

	// If there will be a find() on the returned object, it will also include searching on the main nodes
	els._findOutside = true;

	return els;
}
示例#10
0
文件: index.js 项目: villadora/ex-fx
    request("http://bitcoincharts.com/markets/", function(err, resp, body) {
        var handler = new htmlparser.DefaultHandler(function(err, dom) {
            if (err) {
                console.error("Error: " + err);
            } else {
                // soupselect happening here...
                var table = select(dom, 'table#markets'),
                    trs = select(table[0], 'tbody tr');
                for (var i = 1; i < trs.length; ++i) {
                    try {
                        var tr = trs[i];
                        var symbol = tr.attribs.id,
                            market = tr.attribs.market,
                            stat = readrow(tr.children);
                        stat.symbol = symbol;
                        symbols[symbol] = stat;
                        symbols2market[symbol] = market;
                    } catch (e) {
                        console.error(e);
                    }
                }

                if (callback)
                    callback();
            }
        });

        var parser = new htmlparser.Parser(handler);
        parser.parseComplete(body);
    });
示例#11
0
 rest.get('http://www.urbandictionary.com/define.php?term='+word).on('complete', function(body) {
     //console.log(body);
     var handler = new htmlparser.DefaultHandler(function(err, dom) {
         if (err) {
             console.log("Error: " + err);
         } else {
             //console.log('dom',dom)
             var meaning = select(dom, '.meaning');
             //console.log('meaning', meaning[0].children[0]);
             if (typeof meaning[0] !== 'undefined') { //Note Lock is not defined, for example
                 meaning = meaning[0].children[0].data;
             } else meaning = "The definition for " + word + " is not defined";
             meaning = meaning.replace(/&#39;/g, "'"); //replace character code for quote with '
             //meaning = meaning.replace(/\W/g, ' ');
             originalmeaning = meaning; 
             mearning = meaning.replace(/f**k/gi, "sexual intercourse");
             meaning = meaning.replace(/c**k/gi, "heart");
             meaning = meaning.replace(/penis/gi, "heart");
             meaning = meaning.replace(/ball/gi, "brain");
             meaning = meaning.replace(/breast/gi, "eye");
             meaning = meaning.replace(/lesbian/gi, "woman");
             console.log(meaning);
             //if meaning !=== oldmeaning we may noot want to read at all.
             echoResponse.response.outputSpeech.text = mearning;
             echoResponse.response.card.content = meaning;
             sendResponse();
         }
     });
     var parser = new htmlparser.Parser(handler);
     parser.parseComplete(body);
 });
    webpack(webpackConfig, function webpackCallback(err, result) {
      if (err) {
        return callback(err);
      }
      var jsIntegrity = result.compilation.assets['bundle.js'].integrity;
      expect(jsIntegrity).toMatch(/^sha/);
      var cssIntegrity = result.compilation.assets['styles.css'].integrity;
      expect(cssIntegrity).toMatch(/^sha/);

      var handler = new htmlparser.DefaultHandler(function htmlparserCallback(error, dom) {
        if (error) {
          callback(error);
        } else {
          var scripts = select(dom, 'script');
          expect(scripts.length).toEqual(1);
          expect(scripts[0].attribs.crossorigin).toEqual('anonymous');
          expect(scripts[0].attribs.integrity).toEqual(jsIntegrity);

          var links = select(dom, 'link');
          expect(links.length).toEqual(1);
          expect(links[0].attribs.crossorigin).toEqual('anonymous');
          expect(links[0].attribs.integrity).toEqual(cssIntegrity);

          callback();
        }
      });
      var parser = new htmlparser.Parser(handler);
      parser.parseComplete(fs.readFileSync(path.join(tmpDir.name, 'index.html'), 'utf-8'));
      tmpDir.removeCallback();
    });
示例#13
0
blueprints.prototype._parse_file = function(source, cb) {
   var handler = new htmlparser.DefaultHandler(cb, { verbose: false }),
       parser = new htmlparser.Parser(handler);

   parser.parseComplete(this._pre_process(source));

};
示例#14
0
        page.evaluate(function () { return document.documentElement.innerHTML; }, function (result) {

          // now we have the whole body, parse it and select the nodes we want...
          var handler = new htmlparser.DefaultHandler(function(err, dom) {
              if (err) {
                  console.log("Error: " + err);
              } else {

                  var heroes = select(dom, '.hero-list__item a');

                  var heroesArr = [];
                  heroes.forEach(function(hero) {
                    var tempHero = {name:hero.children[3].children[0].raw, slug:hero.attribs['href']};
                    heroesArr.push(tempHero);
                  });

                  page.close();
                  console.log('got all hero names and slugs');
                  getHeroes(ph,heroesArr,scrapedAllHeroes);
              }
          });

          var parser = new htmlparser.Parser(handler);
          parser.parseComplete(result);

        });
示例#15
0
  grunt.registerTask('build-ut', 'Builds a UI testing version of your application', function() {
    var applicationPath = grunt.config.get('build').path;
    var applicationAbsolutePath = __dirname + '/' + applicationPath;
    var buildIndexFilePath = applicationAbsolutePath + '/' + grunt.config.get('build').utFileName;
    var sourceIndexFilePath = applicationAbsolutePath + '/' + grunt.config.get('build').sourceFileName;
    var htmlparser = require("htmlparser");
    var rawHtml = fs.readFileSync(sourceIndexFilePath, 'ascii');

    var handler = new htmlparser.DefaultHandler(function (error, dom) {
      if (error) {
        console.log(error.red);
      }
    });
    var parser = new htmlparser.Parser(handler);
    parser.parseComplete(rawHtml);

    var regexp = xRegExp('<!-- UT-INCLUDE -->', 's');
    var text = xRegExp.exec(rawHtml, regexp);
    var utReplace = '';

    if(text) {
      text = text[0];

      grunt.config.get('build').utJavascriptFiles.forEach(function(filePath) {
        utReplace += '<script type="text/javascript" src="' + filePath + '"></script>\n';
      });

      rawHtml = rawHtml.replace(text, utReplace);
    }
    rawHtml = rawHtml.replace('app/components/core/constants.js', 'app/components/core/constants-ut.js');

    console.log(('writing out ' + buildIndexFilePath + ' file').green);
    fs.writeFileSync(buildIndexFilePath, rawHtml, 'ascii');
  });
示例#16
0
function get_text_from_html(html){
    var text = "";
    var handler = new htmlparser.DefaultHandler(function(err, dom) {
        if (err) {
		    text = "";
        }
	    else {
	        var getText = function(dom){
	            for(var pk in dom){
	                var pv = dom[pk];
	                if(typeof pv == 'object'){
	                    if(pv['type'] == 'tag'){
	                        getText(pv['children']);
	                    }else if(pv['type'] == 'text'){
	                        text += pv['data'];
	                    }
	                }
	            }
	         };
	         getText(dom);
	    }
    }, { verbose: false });
	var parser = new htmlparser.Parser(handler);
	parser.parseComplete(html);
	return exports.html_decode(text);
}
示例#17
0
  request.get(url, {gzip: true}, function (error, response, body) {
    var feeds = [];
    var handler = new htmlparser.DefaultHandler(function (error, dom) {
      if (error) {
        done(error, null);
      }
      else {
        select(dom, "head link").forEach(function(link) {
          if ((link.attribs.type === "application/atom+xml" || link.attribs.type === "application/rss+xml") && link.attribs.href) {
            var feedUrl = urlparser.parse(link.attribs.href);
            var feed = {rel: "alternate", type: link.attribs.type};
            if(feedUrl.hostname) {
              feed.href = link.attribs.href;
            }
            else {
              feed.href = urlparser.format(urlparser.resolve(url, link.attribs.href));
            }
            if(link.attribs.title) {
              feed.title = link.attribs.title;
            }
            else {
              feed.title = select(dom, "title")[0].children[0].raw;
            }
            feeds.push(feed)
          }
        });

        var title = '';
        if(select(dom, "title")[0] && select(dom, "title")[0].children && select(dom, "title")[0].children[0])
          title = select(dom, "title")[0].children[0].raw;

        if(feeds.length === 0) {
          var atom = select(dom, "feed");
          if(atom.length > 0 && atom[0].attribs.xmlns.toLowerCase() === 'http://www.w3.org/2005/Atom'.toLowerCase()) {
            feeds.push({
              rel: "self",
              type: "application/atom+xml",
              href: url,
              title: title
            });
          }
        }
        if(feeds.length === 0) {
          var rss = select(dom, "rss")[0];
          if(rss) {
            feeds.push({
              rel: "self",
              type: "application/rss+xml",
              href: url,
              title: title
            });
          }
        }
        done(null, feeds);
      }
    });
    var parser = new htmlparser.Parser(handler);
    parser.parseComplete(body);
  });
示例#18
0
function compile(options, filename, filepath, content) {
  var template = new Template(filename, filepath, content),
      handler = new htmlparser.DefaultHandler(getHandler(template), options),
      parser = new htmlparser.Parser(handler, { includeLocation: true });

  parser.parseComplete(template.content);
  return template;
};
示例#19
0
文件: body.js 项目: dvv/F
	function parseHTML(data, next) {
		var handler = new HTMLParser.DefaultHandler(next, {
			ignoreWhitespace: true,
			verbose: false
		});
		var parser = new HTMLParser.Parser(handler);
		parser.parseComplete(data);
	}
    .add('htmlparser v' + htmlparserVersion + ' (https://github.com/tautologistics/node-htmlparser/)', function () {
        for (var i = 0; i < testPages.length; i++) {
            var handler = new htmlparser.DefaultHandler(),
                parser = new htmlparser.Parser(handler);

            parser.parseComplete(testPages[i]);
        }
    })
示例#21
0
function parseRSSFeed(rss) {
    const handler = new htmlparser.RssHandler();
    const parser = new htmlparser.Parser(handler);
    parser.parseComplete(rss);
    if (!handler.dom.items.length) {
        return next(new Error('No RSS itmes found'));
    }
    var item = handler.dom.items.shift();
    console.log(`${item.title}\n${item.link}`);
}
示例#22
0
function parseHtml(html, cb){
	var htmlparser = require('htmlparser'),
		select = require('soupselect').select,
		parser = new htmlparser.Parser(new htmlparser.DefaultHandler(function(err, dom) {
			cb(function(selector) {
				return select(dom, selector);
			});
		}));
	parser.parseComplete(html);
}
示例#23
0
文件: serial.js 项目: minhajksm/code
function parseRSSFeed (rss) {
  var handler = new htmlparser.RssHandler();
  var parser = new htmlparser.Parser(handler);
  parser.parseComplete(rss);
  if (!handler.dom.items.length)
    return next(new Error('No RSS items found'));
  var item = handler.dom.items.shift();
  console.log(item.title);
  console.log(item.link);
}
示例#24
0
	parseHTML: function(data){
		try{
			var handler = new htmlparser.DefaultHandler();
			var parser = new htmlparser.Parser(handler);
			parser.parseComplete(data);

			return handler.dom;
		}catch(e){
			return null;
		}
	},
示例#25
0
function parse(html,handler){
	var
		pHandler, parser;
	pHandler = new htmlparser.DefaultHandler(function(err,doc){
		if ( err )
			return handler(err,null);
		return handler(null,zcsel.initDom(doc));
	});
	parser = new htmlparser.Parser(pHandler);
	return parser.parseComplete(html);
}
示例#26
0
文件: index.js 项目: boerhani/seven
seven.prototype.dom = function(data) {
	if(typeof data =="string"){ // check arguments
		debug('Dom tags'); 
		var handler = new htmlparser.DefaultHandler();
		var parser = new htmlparser.Parser(handler);
		parser.parseComplete(data);
		return JSON.stringify(handler.dom, null, 2);
	}else{
		 console.error(new Error('Check arguments! [data]'));
	}
};
示例#27
0
文件: htmlDom.js 项目: Djiit/openwhyd
exports.parseHtmlDom = function(html, cb){
	var parser = new htmlparser.Parser(new htmlparser.DefaultHandler(function(error, dom) {
		if (error) {
			console.error(error);
			cb({error: error});
		}
		else
			cb(exports.parseDom(dom));
	}));
	parser.parseComplete(html);
}
        res.on('end', function() {
            var parser,
                handler = new htmlparser.DefaultHandler(function (error, dom) {
                    var proxies = find(matchProxyList, dom),
                        nodes = parseNodes(proxies);

                    callback(null, nodes);
                }, { verbose: false, ignoreWhitespace: true });

            parser = new htmlparser.Parser(handler);
            parser.parseComplete(data);
        });
示例#29
0
 function(rss) {
   var handler = new htmlparser.RssHandler();
   var parser = new htmlparser.Parser(handler);
   parser.parseComplete(rss);
   if (handler.dom.items.length) {
     var item = handler.dom.items.shift();
     console.log(item.title);
     console.log(item.link);
   } else {
     next('No RSS items found.');
   } 
 }
示例#30
0
function parseRSSFeed(rss) {
  const handler = new htmlparser.RssHandler();
  const parser = new htmlparser.Parser(handler);
  parser.parseComplete(rss);
  if (!handler.dom.items.length) {
    console.log(`Error parsing RSS feed: no elements found`);
    return next(new Error("No RSS items found"));
  }
  const item = handler.dom.items.shift();
  console.log(`RSS Feed Title: ${item.title}`);
  console.log(`RSS Feed URL  : ${item.link}`);
}