module.exports.chunk_url = function(url, callback) { fu.read_url(url, function(err, txt) { _get_body(txt, function(err, nodes) { nodes = remove_tags(nodes, ['header', 'script']); nodes = remove_content(nodes, /id="toc"/); var flat_nodes = flatten_nodes(nodes); var sections = node_sections(flat_nodes); callback(null, sections); }) }) }
this.put(page, function(err, page){ file_utils.read_url(page.url, function(err, txt) { page.txt = txt; console.log(__filename + '::_reindex_links:: finding links in txt'); file_utils.links_in_txt(txt, function(err, links) { if (err) { console.log(__filename + ':: error in _reindex_links::links_in_txt'); console.log(err); callback(err); } else { self._update_links(self, page, links, callback); } }) }) })
_reindex_irc_lines: function(page, callback) { var self = this; var lines_being_put = 0; file_utils.read_url(page.url, function(err, txt) { file_utils.lines_in_txt(txt, function(err, new_lines) { var lines_model = require('models/lines'); lines_model.model(function(err, model) { model.delete({ url: page.url }, function() { new_lines.forEach(function(line, i) { if (! (i % 1000)) { console.log(__filename + ':: indexing line ' + i + ' of ' + new_lines.length + ' of page ' + page.url); } line.url = page.url; ++lines_being_put; model.put(line, function() {--lines_being_put; }); }); var interval = setInterval(function() { console.log(page.url + ': remaining lines: ' + lines_being_put); if (lines_being_put < 1) { console.log('ending parse of ' + page.url); clearInterval(interval); self.put(page, function(err, page) { page.indexed = new Date(); self.put(page, callback); }); } }, 2000); }); }); }); }); },