.map(({data, path}) => { const html = fs.readFileSync(`./public/${path}/index.html`).toString() const title = html .match(/<title[\s\S]*?>([\s\S]*?)<\/title>/)[1] .replace(/ \| Numenta.org$/, '') // @TODO refactor string const markup = html .match(/<main[\s\S]*?>([\s\S]*?)<\/main>/)[1] .replace(/<!--.*?-->/g, ' ') .replace(/\n+/g, ' ') const content = htmlToText(markup) .replace(/\\n/g, ' ') .replace(/[^\x00-\x7F]/g, ' ') .replace(/\s+/g, ' ') const details = Object .keys(data) .filter((key) => ( typeof data[key] === 'string' && data[key].length && dataSkip.indexOf(key) !== -1 )) .map((key) => data[key]) const text = [title, content, details.join(' ')].join(' ') return {path, text, title} })
return this.executeRequest(options).then(function(b) { return h2p(b); });