var parseProfile = function(html, callback) { var profile = {} var handler = new htmlparser.DefaultHandler(function(err, dom) { if (err) { sys.debug("Error: " + err); } else { profile.username = select(dom, 'span#basic_info_sn')[0].children[0].data var lastContacted = select(dom, 'div#contacted p') if (lastContacted.length > 0) { profile.lastContacted = lastContacted[0].children[0].data } else { profile.lastContacted = null } } callback(profile) }); var parser = new htmlparser.Parser(handler); parser.parseComplete(html); }
xhr.onreadystatechange = function() { sys.puts("State: " + this.readyState); if (this.readyState == 4) { var rawHtml = this.responseText; var htmlparser = require("htmlparser"); var handler = new htmlparser.DefaultHandler(function (error, dom) { if (error) console.log("error") }); var parser = new htmlparser.Parser(handler); parser.parseComplete(rawHtml); var TabOfActivities = new Array(); //sys.puts(sys.inspect(handler.dom, false, null)); for (var i = 5; i < 15; i++) { var Activity = new Object(); Activity.title=getActivityTitle(handler.dom,i); Activity.author=getActivityAuthor(handler.dom,i); Activity.datePub=''; Activity.link=getActivityLink(handler.dom,i); Activity.source='Jira'; Activity.id=Activity.link+Activity.author; TabOfActivities[i-5]= Activity; }; for (var i = 0; i < TabOfActivities.length; i++) { ArticleExist(TabOfActivities,i); }; }; }
function parsePage(html, callback) { statsd.increment('pages_parsed'); var handler = new htmlparser.DefaultHandler(function (error, dom) { if (error) { return dumpHtml(html, error, callback); } if (!dom) { return dumpHtml(html, new Error("No DOM object!"), callback); } var price; try { var bolds = htmlparser.DomUtils.getElementsByTagName("b", dom); price = bolds[2].children[0].data; } catch (e) { return dumpHtml(html, e, callback); } if (price[0] != '£') { return dumpHtml(html, new Error("Expected currency symbol to be £; got " + price[0]), callback); } price = parseFloat(price.slice(1)); if (isNaN(price)) { var errMsg = "Failed to convert price string to float; result is NaN"; log.error({price: price}, errMsg); callback(new Error(errMsg)); } var result = { price: price, currency: "GBP" }; callback(null, result); }); var parser = new htmlparser.Parser(handler); parser.parseComplete(html); }
function _get_body(page, callback) { var handler = new np.DefaultHandler(function(error, dom) { if (error) { callback(error); } }, { ignoreWhitespace: true }); var parser = new np.Parser(handler); parser.parseComplete(page); for (var i in handler.dom) { var node = handler.dom[i]; if (node.name == 'html') { console.log('found html'); for (var j in node.children) { var n = node.children[j]; if (n.name == 'body') { console.log('found body'); return callback(null, n.children); } } } } }
request({url:'http://ultrastar-es.org/es/canciones?busqueda='+query, jar: j}, function (error, response, body) { if (!error && response.statusCode == 200) { var handler = new htmlparser.DefaultHandler(function(err, dom) { var canciones = select(dom, 'ul.canciones')[0]; var final = []; for(var c in canciones.children){ var song = canciones.children[c]; var image; var author; var title; var download; var img = select(song, 'img')[0]; var a = select(song, 'a'); if(img!=undefined){ image = img.attribs.src; author = a[3].children[0].raw title = a[4].children[0].raw for(var x=0;x<a.length;x++){ if( a[x].attribs!=undefined && a[x].attribs.href.indexOf("torrent")!=-1){ download = a[x].attribs.href; } } console.log(download); final.push({"title":title, "artist":author, "remote":true, "image":image, "download":"http://ultrastar-es.org"+download}); } } songsCallback(final, event); }); var parser = new htmlparser.Parser(handler); parser.parseComplete(body); } })
var parseHtml = function (body, pageNum, callback) { var handler = new htmlparser.DefaultHandler(); var parser = new htmlparser.Parser(handler); parser.parseComplete(body); iterate(handler.dom, pageNum); callback(null, null); }
test('insert an element before the target element in the tree', function (t) { t.plan(1); var inFile = "./test/data/insertAppend/before.html" var expectedFile = "./test/data/insertAppend/after.html" var outFile = "/tmp/insertAppend.html" var targetId = "xxxxx5" var newHtml = "<h1 makomi-id=\"yyyyy1\">I'm a new element</h1>" var handler = new htmlparser.DefaultHandler(function (error, insertDom) { testUtil.compareToExpectedOutput(t,expectedFile,function(cb) { mkSrc.parseFile(inFile,function(er,dom) { console.log("parsed file") mkSrc.insertAppend(dom,targetId,insertDom,function(newDom) { console.log("insert append") mkSrc.writeHtml(outFile,newDom,function(html) { cb(html) }) }) }) }) }); var parser = new htmlparser.Parser(handler); parser.parseComplete(newHtml); });
Job.prototype.parseHtml = function (data, callback, response) { headers = response && response.headers ? response.headers : {}; if (this.options.jsdom) { var features = { FetchExternalResources: false, ProcessExternalResources: false, QuerySelector: false }; var $, window = require('jsdom').jsdom(data, null, {features:features}).createWindow(), jquery = require('jquery'), default_$ = jquery.create(window); $ = function (selector, context) { return context ? jquery.create(context) : default_$(selector); }; callback(null, $, data, headers, response); } else { var self = this, handler, parser, $, htmlparser = require('htmlparser'); handler = new htmlparser.DefaultHandler(function (err, dom) { if (err) { callback(err); } else { $ = function (selector, context) { //Allow the user to specify a custom context (thanks to github.com/jimbishopp) return self.$(selector, context || dom); }; callback(null, $, data, headers, response); } }, {verbose: true, ignoreWhitespace: true}); parser = new htmlparser.Parser(handler); parser.parseComplete(data); } };
function _resBuild(htmlCode) { var error, els, pHandler = new htmlparser.DefaultHandler(function(err,doc){ if ( err ) error = err; }); parser = new htmlparser.Parser(pHandler); parser.parseComplete(htmlCode); els = error ? [] : pHandler.dom; // Initialize and bless them var id = 0; els.forEach(function(node){ node._pos = id; _initDomNode(node,null,"R"+(++id)); }); _resBless(els); // If there will be a find() on the returned object, it will also include searching on the main nodes els._findOutside = true; return els; }
request("http://bitcoincharts.com/markets/", function(err, resp, body) { var handler = new htmlparser.DefaultHandler(function(err, dom) { if (err) { console.error("Error: " + err); } else { // soupselect happening here... var table = select(dom, 'table#markets'), trs = select(table[0], 'tbody tr'); for (var i = 1; i < trs.length; ++i) { try { var tr = trs[i]; var symbol = tr.attribs.id, market = tr.attribs.market, stat = readrow(tr.children); stat.symbol = symbol; symbols[symbol] = stat; symbols2market[symbol] = market; } catch (e) { console.error(e); } } if (callback) callback(); } }); var parser = new htmlparser.Parser(handler); parser.parseComplete(body); });
rest.get('http://www.urbandictionary.com/define.php?term='+word).on('complete', function(body) { //console.log(body); var handler = new htmlparser.DefaultHandler(function(err, dom) { if (err) { console.log("Error: " + err); } else { //console.log('dom',dom) var meaning = select(dom, '.meaning'); //console.log('meaning', meaning[0].children[0]); if (typeof meaning[0] !== 'undefined') { //Note Lock is not defined, for example meaning = meaning[0].children[0].data; } else meaning = "The definition for " + word + " is not defined"; meaning = meaning.replace(/'/g, "'"); //replace character code for quote with ' //meaning = meaning.replace(/\W/g, ' '); originalmeaning = meaning; mearning = meaning.replace(/f**k/gi, "sexual intercourse"); meaning = meaning.replace(/c**k/gi, "heart"); meaning = meaning.replace(/penis/gi, "heart"); meaning = meaning.replace(/ball/gi, "brain"); meaning = meaning.replace(/breast/gi, "eye"); meaning = meaning.replace(/lesbian/gi, "woman"); console.log(meaning); //if meaning !=== oldmeaning we may noot want to read at all. echoResponse.response.outputSpeech.text = mearning; echoResponse.response.card.content = meaning; sendResponse(); } }); var parser = new htmlparser.Parser(handler); parser.parseComplete(body); });
webpack(webpackConfig, function webpackCallback(err, result) { if (err) { return callback(err); } var jsIntegrity = result.compilation.assets['bundle.js'].integrity; expect(jsIntegrity).toMatch(/^sha/); var cssIntegrity = result.compilation.assets['styles.css'].integrity; expect(cssIntegrity).toMatch(/^sha/); var handler = new htmlparser.DefaultHandler(function htmlparserCallback(error, dom) { if (error) { callback(error); } else { var scripts = select(dom, 'script'); expect(scripts.length).toEqual(1); expect(scripts[0].attribs.crossorigin).toEqual('anonymous'); expect(scripts[0].attribs.integrity).toEqual(jsIntegrity); var links = select(dom, 'link'); expect(links.length).toEqual(1); expect(links[0].attribs.crossorigin).toEqual('anonymous'); expect(links[0].attribs.integrity).toEqual(cssIntegrity); callback(); } }); var parser = new htmlparser.Parser(handler); parser.parseComplete(fs.readFileSync(path.join(tmpDir.name, 'index.html'), 'utf-8')); tmpDir.removeCallback(); });
blueprints.prototype._parse_file = function(source, cb) { var handler = new htmlparser.DefaultHandler(cb, { verbose: false }), parser = new htmlparser.Parser(handler); parser.parseComplete(this._pre_process(source)); };
page.evaluate(function () { return document.documentElement.innerHTML; }, function (result) { // now we have the whole body, parse it and select the nodes we want... var handler = new htmlparser.DefaultHandler(function(err, dom) { if (err) { console.log("Error: " + err); } else { var heroes = select(dom, '.hero-list__item a'); var heroesArr = []; heroes.forEach(function(hero) { var tempHero = {name:hero.children[3].children[0].raw, slug:hero.attribs['href']}; heroesArr.push(tempHero); }); page.close(); console.log('got all hero names and slugs'); getHeroes(ph,heroesArr,scrapedAllHeroes); } }); var parser = new htmlparser.Parser(handler); parser.parseComplete(result); });
grunt.registerTask('build-ut', 'Builds a UI testing version of your application', function() { var applicationPath = grunt.config.get('build').path; var applicationAbsolutePath = __dirname + '/' + applicationPath; var buildIndexFilePath = applicationAbsolutePath + '/' + grunt.config.get('build').utFileName; var sourceIndexFilePath = applicationAbsolutePath + '/' + grunt.config.get('build').sourceFileName; var htmlparser = require("htmlparser"); var rawHtml = fs.readFileSync(sourceIndexFilePath, 'ascii'); var handler = new htmlparser.DefaultHandler(function (error, dom) { if (error) { console.log(error.red); } }); var parser = new htmlparser.Parser(handler); parser.parseComplete(rawHtml); var regexp = xRegExp('<!-- UT-INCLUDE -->', 's'); var text = xRegExp.exec(rawHtml, regexp); var utReplace = ''; if(text) { text = text[0]; grunt.config.get('build').utJavascriptFiles.forEach(function(filePath) { utReplace += '<script type="text/javascript" src="' + filePath + '"></script>\n'; }); rawHtml = rawHtml.replace(text, utReplace); } rawHtml = rawHtml.replace('app/components/core/constants.js', 'app/components/core/constants-ut.js'); console.log(('writing out ' + buildIndexFilePath + ' file').green); fs.writeFileSync(buildIndexFilePath, rawHtml, 'ascii'); });
function get_text_from_html(html){ var text = ""; var handler = new htmlparser.DefaultHandler(function(err, dom) { if (err) { text = ""; } else { var getText = function(dom){ for(var pk in dom){ var pv = dom[pk]; if(typeof pv == 'object'){ if(pv['type'] == 'tag'){ getText(pv['children']); }else if(pv['type'] == 'text'){ text += pv['data']; } } } }; getText(dom); } }, { verbose: false }); var parser = new htmlparser.Parser(handler); parser.parseComplete(html); return exports.html_decode(text); }
request.get(url, {gzip: true}, function (error, response, body) { var feeds = []; var handler = new htmlparser.DefaultHandler(function (error, dom) { if (error) { done(error, null); } else { select(dom, "head link").forEach(function(link) { if ((link.attribs.type === "application/atom+xml" || link.attribs.type === "application/rss+xml") && link.attribs.href) { var feedUrl = urlparser.parse(link.attribs.href); var feed = {rel: "alternate", type: link.attribs.type}; if(feedUrl.hostname) { feed.href = link.attribs.href; } else { feed.href = urlparser.format(urlparser.resolve(url, link.attribs.href)); } if(link.attribs.title) { feed.title = link.attribs.title; } else { feed.title = select(dom, "title")[0].children[0].raw; } feeds.push(feed) } }); var title = ''; if(select(dom, "title")[0] && select(dom, "title")[0].children && select(dom, "title")[0].children[0]) title = select(dom, "title")[0].children[0].raw; if(feeds.length === 0) { var atom = select(dom, "feed"); if(atom.length > 0 && atom[0].attribs.xmlns.toLowerCase() === 'http://www.w3.org/2005/Atom'.toLowerCase()) { feeds.push({ rel: "self", type: "application/atom+xml", href: url, title: title }); } } if(feeds.length === 0) { var rss = select(dom, "rss")[0]; if(rss) { feeds.push({ rel: "self", type: "application/rss+xml", href: url, title: title }); } } done(null, feeds); } }); var parser = new htmlparser.Parser(handler); parser.parseComplete(body); });
function compile(options, filename, filepath, content) { var template = new Template(filename, filepath, content), handler = new htmlparser.DefaultHandler(getHandler(template), options), parser = new htmlparser.Parser(handler, { includeLocation: true }); parser.parseComplete(template.content); return template; };
function parseHTML(data, next) { var handler = new HTMLParser.DefaultHandler(next, { ignoreWhitespace: true, verbose: false }); var parser = new HTMLParser.Parser(handler); parser.parseComplete(data); }
.add('htmlparser v' + htmlparserVersion + ' (https://github.com/tautologistics/node-htmlparser/)', function () { for (var i = 0; i < testPages.length; i++) { var handler = new htmlparser.DefaultHandler(), parser = new htmlparser.Parser(handler); parser.parseComplete(testPages[i]); } })
function parseRSSFeed(rss) { const handler = new htmlparser.RssHandler(); const parser = new htmlparser.Parser(handler); parser.parseComplete(rss); if (!handler.dom.items.length) { return next(new Error('No RSS itmes found')); } var item = handler.dom.items.shift(); console.log(`${item.title}\n${item.link}`); }
function parseHtml(html, cb){ var htmlparser = require('htmlparser'), select = require('soupselect').select, parser = new htmlparser.Parser(new htmlparser.DefaultHandler(function(err, dom) { cb(function(selector) { return select(dom, selector); }); })); parser.parseComplete(html); }
function parseRSSFeed (rss) { var handler = new htmlparser.RssHandler(); var parser = new htmlparser.Parser(handler); parser.parseComplete(rss); if (!handler.dom.items.length) return next(new Error('No RSS items found')); var item = handler.dom.items.shift(); console.log(item.title); console.log(item.link); }
parseHTML: function(data){ try{ var handler = new htmlparser.DefaultHandler(); var parser = new htmlparser.Parser(handler); parser.parseComplete(data); return handler.dom; }catch(e){ return null; } },
function parse(html,handler){ var pHandler, parser; pHandler = new htmlparser.DefaultHandler(function(err,doc){ if ( err ) return handler(err,null); return handler(null,zcsel.initDom(doc)); }); parser = new htmlparser.Parser(pHandler); return parser.parseComplete(html); }
seven.prototype.dom = function(data) { if(typeof data =="string"){ // check arguments debug('Dom tags'); var handler = new htmlparser.DefaultHandler(); var parser = new htmlparser.Parser(handler); parser.parseComplete(data); return JSON.stringify(handler.dom, null, 2); }else{ console.error(new Error('Check arguments! [data]')); } };
exports.parseHtmlDom = function(html, cb){ var parser = new htmlparser.Parser(new htmlparser.DefaultHandler(function(error, dom) { if (error) { console.error(error); cb({error: error}); } else cb(exports.parseDom(dom)); })); parser.parseComplete(html); }
res.on('end', function() { var parser, handler = new htmlparser.DefaultHandler(function (error, dom) { var proxies = find(matchProxyList, dom), nodes = parseNodes(proxies); callback(null, nodes); }, { verbose: false, ignoreWhitespace: true }); parser = new htmlparser.Parser(handler); parser.parseComplete(data); });
function(rss) { var handler = new htmlparser.RssHandler(); var parser = new htmlparser.Parser(handler); parser.parseComplete(rss); if (handler.dom.items.length) { var item = handler.dom.items.shift(); console.log(item.title); console.log(item.link); } else { next('No RSS items found.'); } }
function parseRSSFeed(rss) { const handler = new htmlparser.RssHandler(); const parser = new htmlparser.Parser(handler); parser.parseComplete(rss); if (!handler.dom.items.length) { console.log(`Error parsing RSS feed: no elements found`); return next(new Error("No RSS items found")); } const item = handler.dom.items.shift(); console.log(`RSS Feed Title: ${item.title}`); console.log(`RSS Feed URL : ${item.link}`); }