search(tree, phrases, function (match, position, parent, phrase) { var pattern = patterns[phrase]; var replace = pattern.replace; var note = pattern.note; var matchedString = nlcstToString(match); var value = quotation(nlcstToString(match), '“', '”'); var newvalue = quotation(replace, '“', '”'); var message = undefined; if (!replace.length) { message = value + ' is not Shopify style. Avoid using it.'; if (note) { message += ' (' + note + ')'; } } else if (matchedString !== replace){ message = value + ' is not Shopify style. Use ' + newvalue + ' instead.'; if (note) { message += ' (' + note + ')'; } } else if (matchedString === replace){ return transformer; } if (message) { message = file.warn(message, { 'start': match[0].position.start, 'end': match[match.length - 1].position.end }); } message.ruleId = phrase; message.source = 'retext-shopify'; }, {'allowApostrophes': false, 'allowDashes': true});
/* Merge words joined by certain punctuation marks. */ function mergeInnerWordSlash(child, index, parent) { var siblings = parent.children; var prev; var next; var prevValue; var nextValue; var queue; var tail; var count; prev = siblings[index - 1]; next = siblings[index + 1]; if ( prev && prev.type === 'WordNode' && (child.type === 'SymbolNode' || child.type === 'PunctuationNode') && toString(child) === C_SLASH ) { prevValue = toString(prev); tail = child; queue = [child]; count = 1; if (next && next.type === 'WordNode') { nextValue = toString(next); tail = next; queue = queue.concat(next.children); count++; } if ( prevValue.length < 3 && (!nextValue || nextValue.length < 3) ) { /* Add all found tokens to `prev`s children. */ prev.children = prev.children.concat(queue); siblings.splice(index, count); /* Update position. */ if (prev.position && tail.position) { prev.position.end = tail.position.end; } /* Next, iterate over the node *now* at the current * position. */ return index; } } }
/** * Merge a sentence into its next sentence, when the * sentence ends with a certain word. * * @param {NLCSTNode} child - Node. * @param {number} index - Position of `child` in `parent`. * @param {NLCSTParagraphNode} parent - Parent of `child`. * @return {undefined|number} */ function mergePrefixExceptions(child, index, parent) { var children = child.children; var node; var next; if ( children && children.length && index !== parent.children.length - 1 ) { node = children[children.length - 1]; if ( node && nlcstToString(node) === '.' ) { node = children[children.length - 2]; if ( node && node.type === 'WordNode' && EXPRESSION_ABBREVIATION_PREFIX.test( nlcstToString(node).toLowerCase() ) ) { next = parent.children[index + 1]; child.children = children.concat(next.children); parent.children.splice(index + 1, 1); /* * Update position. */ if (next.position && child.position) { child.position.end = next.position.end; } /* * Next, iterate over the current node again. */ return index - 1; } } } }
/* Move certain punctuation following a terminal * marker (thus in the next sentence) to the * previous sentence. */ function mergeAffixSymbol(child, index, parent) { var children = child.children; var first; var second; var prev; if (children && children.length !== 0 && index !== 0) { first = children[0]; second = children[1]; prev = parent.children[index - 1]; if ( (first.type === 'SymbolNode' || first.type === 'PunctuationNode') && AFFIX_SYMBOL.test(toString(first)) ) { prev.children.push(children.shift()); /* Update position. */ if (first.position && prev.position) { prev.position.end = first.position.end; } if (second && second.position && child.position) { child.position.start = second.position.start; } /* Next, iterate over the previous node again. */ return index - 1; } } }
/** * Handle matches for an `and` pattern. And-patterns * trigger a warning when every category is present. * * For example, when `master` and `slave` occur in a * context together, they trigger a warning. * * @param {Array.<Object>} matches - List of matches * matching `pattern` in a context. * @param {Object} pattern - And-pattern object. * @param {VFile} file - Virtual file. */ function and(matches, pattern, file) { var categories = pattern.categories.concat(); var note = pattern.note; var id = pattern.id; var length = matches.length; var index = -1; var phrases = []; var suggestions = []; var match; var position; var siblings; var first; while (++index < length) { match = matches[index]; siblings = match.parent.children; position = categories.indexOf(match.type); if (position !== -1) { categories.splice(position, 1); phrases.push(toString(siblings.slice(match.start, match.end + 1))); suggestions.push(byValue(pattern.considerate, match.type)); if (!first) { first = match.nodes[0]; } if (categories.length === 0) { warn(file, id, phrases, suggestions, first, note, ' / '); } } } }
search(tree, phrases, function (match, position, parent, phrase) { var pattern = patterns[phrase]; var replace = pattern.replace; var value = quotation(nlcstToString(match), '“', '”'); var message; if (pattern.omit && !replace.length) { message = 'Remove ' + value; } else { message = 'Replace ' + value + ' with ' + quotation(replace, '“', '”').join(', '); if (pattern.omit) { message += ', or remove it'; } } message = file.warn(message, { 'start': match[0].position.start, 'end': match[match.length - 1].position.end }); message.ruleId = phrase; message.source = 'retext-simplify'; });
/** * Normalize `value`. * * @param {string} value - Value to normalize. * @param {Object?} options - Control stripping * apostrophes and dashes. * @return {string} - Normalized `value`. */ function normalize(value, options) { var result = (typeof value === 'string' ? value : toString(value)) .toLowerCase(); var settings = options || {}; var allowApostrophes = settings.allowApostrophes || false; var allowDashes = settings.allowDashes || false; if (allowApostrophes && allowDashes) { return result; } if (allowApostrophes) { return result .replace(APOSTROPHE, QUOTE) .replace(DASH, EMPTY); } if (allowDashes) { return result .replace(APOSTROPHE, EMPTY) .replace(QUOTE, EMPTY); } return result.replace(ALL, EMPTY); }
search(tree, rules, function(match, position, parent, phrase) { var pattern = rules[phrase]; var replace = pattern.replace; var matchedString = nlcstToString(match); var value = quotation(matchedString, '“', '”'); var message = undefined; if (pattern.forbid === true) { message = 'Remove ' + value; } else if (!pattern.cased || matchedString !== replace[0]) { message = 'Replace ' + value + ' with ' + quotation(replace, '“', '”').join(', '); if (pattern.omit) { message += ', or remove it'; } } if (message) { message = file.warn(message, { 'start': match[0].position.start, 'end': match[match.length - 1].position.end }); message.ruleId = phrase; message.source = 'mapbox'; } });
/** * Stringify an NLCST node. * * @param {NLCSTNode} nlcst * @return {string} */ function nlcstToString(nlcst) { var values, length, children; if (typeof nlcst.value === 'string') { return nlcst.value; } children = nlcst.children; length = children.length; /** * Shortcut: This is pretty common, and a small performance win. */ if (length === 1 && 'value' in children[0]) { return children[0].value; } values = []; while (length--) { values[length] = nlcstToString(children[length]); } return values.join(''); }
/** * Patch all words in `parent`. * * @param {NLCSTParentNode} node - Parent */ function one(node) { var children = node.children; var length = children.length; var index = -1; var values = []; var words = []; var child; var tags; while (++index < length) { child = children[index]; if (child.type === 'WordNode') { values.push(nlcstToString(child)); words.push(child); } } tags = tagger.tag(values); index = -1; length = tags.length; while (++index < length) { patch(words[index], tags[index][1]); } }
/* Patch a position on each node in `nodes`. * `offset` is the offset in `file` this run of content * starts at. */ function patch(config, nodes, offset) { var position = config.location.toPosition; var length = nodes.length; var index = -1; var start = offset; var children; var node; var end; while (++index < length) { node = nodes[index]; children = node.children; if (children) { patch(config, children, start); } end = start + toString(node).length; node.position = { start: position(start), end: position(end) }; start = end; } return nodes; }
// Merge a sentence into its next sentence, when the sentence ends with a // certain word. function mergePrefixExceptions(child, index, parent) { var children = child.children var period var node var next if (children && children.length > 1) { period = children[children.length - 1] if (period && toString(period) === '.') { node = children[children.length - 2] if ( node && node.type === 'WordNode' && abbreviationPrefix.test(toString(node).toLowerCase()) ) { // Merge period into abbreviation. node.children.push(period) children.pop() // Update position. if (period.position && node.position) { node.position.end = period.position.end } // Merge sentences. next = parent.children[index + 1] if (next) { child.children = children.concat(next.children) parent.children.splice(index + 1, 1) // Update position. if (next.position && child.position) { child.position.end = next.position.end } // Next, iterate over the current node again. return index - 1 } } } } }
/** * Merge certain punctuation marks into their * preceding words. * * @param {NLCSTNode} child - Node. * @param {number} index - Position of `child` in `parent`. * @param {NLCSTSentenceNode} parent - Parent of `child`. * @return {undefined|number} */ function mergeFinalWordSymbol(child, index, parent) { var children; var prev; var next; if ( index !== 0 && ( child.type === 'SymbolNode' || child.type === 'PunctuationNode' ) && nlcstToString(child) === '-' ) { children = parent.children; prev = children[index - 1]; next = children[index + 1]; if ( ( !next || next.type !== 'WordNode' ) && ( prev && prev.type === 'WordNode' ) ) { /* * Remove `child` from parent. */ children.splice(index, 1); /* * Add the punctuation mark at the end of the * previous node. */ prev.children.push(child); /* * Update position. */ if (prev.position && child.position) { prev.position.end = child.position.end; } /* * Next, iterate over the node *now* at the * current position (which was the next node). */ return index; } } }
/** * Merge a sentence into its previous sentence, when * the sentence starts with a lower case letter. * * @param {NLCSTNode} child - Node. * @param {number} index - Position of `child` in `parent`. * @param {NLCSTParagraphNode} parent - Parent of `child`. * @return {undefined|number} */ function mergeInitialLowerCaseLetterSentences(child, index, parent) { var children = child.children; var position; var node; var siblings; var prev; if ( children && children.length && index !== 0 ) { position = -1; while (children[++position]) { node = children[position]; if (node.type === 'WordNode') { if (!EXPRESSION_LOWER_INITIAL.test(nlcstToString(node))) { return; } siblings = parent.children; prev = siblings[index - 1]; prev.children = prev.children.concat(children); siblings.splice(index, 1); /* * Update position. */ if (prev.position && child.position) { prev.position.end = child.position.end; } /* * Next, iterate over the node *now* at * the current position. */ return index; } if ( node.type === 'SymbolNode' || node.type === 'PunctuationNode' ) { return; } } } }
/** * Patch a `phonetics` property on `node` (a word-node). * When a `stem` property is available on the node’s data * object, a `stemmedPhonetics` is added on the data * as well. * * @param {NLCSTWordNode} node - Node. */ function patch(node) { var data = node.data || {}; var value = nlcstToString(node); data.phonetics = algorithm(value); if ('stem' in data) { data.stemmedPhonetics = algorithm(data.stem); } node.data = data; }
/** * Merge a sentence into its previous sentence, when * the sentence starts with a comma. * * @param {NLCSTNode} child - Node. * @param {number} index - Position of `child` in `parent`. * @param {NLCSTParagraphNode} parent - Parent of `child`. * @return {undefined|number} */ function mergeAffixExceptions(child, index, parent) { var children = child.children; var node; var position; var value; var previousChild; if (!children || !children.length || index === 0) { return; } position = -1; while (children[++position]) { node = children[position]; if (node.type === 'WordNode') { return; } if ( node.type === 'SymbolNode' || node.type === 'PunctuationNode' ) { value = nlcstToString(node); if (value !== ',' && value !== ';') { return; } previousChild = parent.children[index - 1]; previousChild.children = previousChild.children.concat(children); /* * Update position. */ if (previousChild.position && child.position) { previousChild.position.end = child.position.end; } parent.children.splice(index, 1); /* * Next, iterate over the node *now* at the current * position. */ return index; } } }
search(tree, phrases, function (match, position, parent, phrase) { var value = quotation(nlcstToString(match), '“', '”'); var message = 'Warning: ' + value + ' is a cliche'; message = file.warn(message, { 'start': match[0].position.start, 'end': match[match.length - 1].position.end }); message.cliche = phrase; message.source = 'retext-cliche'; });
/* Merge a sentence into its next sentence, * when the sentence ends with a certain word. */ function mergeEnglishPrefixExceptions(sentence, index, paragraph) { var children = sentence.children; var period = children[children.length - 1]; var word = children[children.length - 2]; var value; var next; if (period && toString(period) === '.' && word && word.type === 'WordNode') { value = toString(word); if (ABBREVIATION.test(lower(value)) || ABBREVIATION_SENSITIVE.test(value)) { /* Merge period into abbreviation. */ word.children.push(period); children.pop(); if (period.position && word.position) { word.position.end = period.position.end; } /* Merge sentences. */ next = paragraph.children[index + 1]; if (next) { sentence.children = children.concat(next.children); paragraph.children.splice(index + 1, 1); /* Update position. */ if (next.position && sentence.position) { sentence.position.end = next.position.end; } /* Next, iterate over the current node again. */ return index - 1; } } } }
/** * Convert `node` (NLCST) into a DOM node. * * @param {NLCSTNode} node - Node to convert. * @param {Object.<string, string?>} names - Map of NLCST * node types mapping to DOM tag names. * @return {HTMLElement} - Converted `node`. */ function toDOMNode(node, names) { var $node; var name = names[node.type]; var children = node.children; var length = children && children.length; var index = -1; var attributes = node.attributes; var key; var value; /* * Ensure text-nodes are only created when with value * and without name. */ if (!name && 'value' in node) { $node = document.createTextNode(''); } else { $node = document.createElement(name || 'span'); } /* * Ignore attributes when operating on a `#text`. */ if (attributes && 'setAttribute' in $node) { for (key in attributes) { value = attributes[key]; if (value !== null && value !== undefined) { $node.setAttribute(key, value); } } } /* * Set `textContent` when with `value`, otherwise, * append each child. */ if ('value' in node) { $node.textContent = nlcstToString(node); } else if ('children' in node) { while (++index < length) { $node.appendChild(toDOMNode(children[index], names)); } } return $node; }
/** * Check whether or not a `node` is important. * * @param {Node} node - Node to check. * @return {boolean} - Whether `node` is important. */ function isImportant(node) { return ( node && node.data && node.data.partOfSpeech && ( node.data.partOfSpeech.indexOf('N') === 0 || ( node.data.partOfSpeech === 'JJ' && isUpperCase(nlcstToString(node).charAt(0)) ) ) ); }
/** * Handle matches for a `simple` pattern. Simple-patterns * need no extra logic, every match is triggered as a * warning. * * @param {Array.<Object>} matches - List of matches * matching `pattern` in a context. * @param {Object} pattern - Simple-pattern object. * @param {VFile} file - Virtual file. */ function simple(matches, pattern, file) { var note = pattern.note; var id = pattern.id; var length = matches.length; var index = -1; var match; var nodes; while (++index < length) { match = matches[index]; nodes = match.nodes; warn(file, id, toString(nodes), pattern.considerate, nodes[0], note); } }
.forEach(function(name) { var doc = fs.readFileSync(path.join(root, name)) var json = JSON.parse(doc) var fn = 'tokenize' + json.type.slice(0, json.type.indexOf('Node')) var nlcst if (fn === 'tokenizeRoot') { fn = 'parse' } nlcst = dutch[fn](toString(json)) nlcst = JSON.stringify(nlcst, 0, 2) + '\n' fs.writeFileSync('test/fixture/' + name, nlcst) })
/** * Detect if a value is used to negate something * * @param {Node} node - Node to check. * @return {boolean} */ function isNegation(node) { var value; value = nlcstToString(node).toLowerCase(); if ( value === 'not' || value === 'neither' || value === 'nor' || /n['’]t/.test(value) ) { return true; } return false; }
return function (node) { var value; var polarity; if ('value' in node || node.type === 'WordNode') { value = nlcstToString(node); if (config && has.call(config, value)) { polarity = config[value]; } else if (has.call(polarities, value)) { polarity = polarities[value]; } if (polarity) { patch(node, polarity); } } };
return function (node) { var children = []; var tokens = node.children; var type = node.type; var length = tokens.length; var index = -1; var lastIndex = length - 1; var start = 0; var first; var last; var parent; while (++index < length) { if ( index === lastIndex || ( tokens[index].type === childType && expression.test(nlcstToString(tokens[index])) ) ) { first = tokens[start]; last = tokens[index]; parent = { 'type': type, 'children': tokens.slice(start, index + 1) }; if (first.position && last.position) { parent.position = { 'start': first.position.start, 'end': last.position.end }; } children.push(parent); start = index + 1; } } return children; };
/* Merge a sentence into its previous sentence, when * the sentence starts with a lower case letter. */ function mergeInitialDigitSentences(child, index, parent) { var children = child.children; var siblings = parent.children; var prev = siblings[index - 1]; var head = children[0]; if (prev && head && head.type === 'WordNode' && DIGIT.test(toString(head))) { prev.children = prev.children.concat(children); siblings.splice(index, 1); /* Update position. */ if (prev.position && child.position) { prev.position.end = child.position.end; } /* Next, iterate over the node *now* at * the current position. */ return index; } }
// A function that splits. function tokenizer(node) { var children = [] var tokens = node.children var type = node.type var length = tokens.length var index = -1 var lastIndex = length - 1 var start = 0 var first var last var parent while (++index < length) { if ( index === lastIndex || (tokens[index].type === childType && expression.test(toString(tokens[index]))) ) { first = tokens[start] last = tokens[index] parent = { type: type, children: tokens.slice(start, index + 1) } if (first.position && last.position) { parent.position = { start: first.position.start, end: last.position.end } } children.push(parent) start = index + 1 } } return children }
/* Merge words joined by certain punctuation marks. */ function mergeInnerWordSymbol(child, index, parent) { var siblings; var sibling; var prev; var last; var position; var tokens; var queue; if (index !== 0 && (child.type === 'SymbolNode' || child.type === 'PunctuationNode')) { siblings = parent.children; prev = siblings[index - 1]; if (prev && prev.type === 'WordNode') { position = index - 1; tokens = []; queue = []; /* - If a token which is neither word nor * inner word symbol is found, the loop * is broken. * - If an inner word symbol is found, * it's queued. * - If a word is found, it's queued (and * the queue stored and emptied). */ while (siblings[++position]) { sibling = siblings[position]; if (sibling.type === 'WordNode') { tokens = tokens.concat(queue, sibling.children); queue = []; } else if ( ( sibling.type === 'SymbolNode' || sibling.type === 'PunctuationNode' ) && INNER_WORD_SYMBOL.test(toString(sibling)) ) { queue.push(sibling); } else { break; } } if (tokens.length !== 0) { /* If there is a queue, remove its length * from `position`. */ if (queue.length !== 0) { position -= queue.length; } /* Remove every (one or more) inner-word punctuation * marks and children of words. */ siblings.splice(index, position - index); /* Add all found tokens to `prev`s children. */ prev.children = prev.children.concat(tokens); last = tokens[tokens.length - 1]; /* Update position. */ if (prev.position && last.position) { prev.position.end = last.position.end; } /* Next, iterate over the node *now* at the current * position. */ return index; } } } }
visit(cst, 'WordNode', function (node) { node.data = { 'stem': nlcstToString(node) }; });
/* Merge initialisms. */ function mergeInitialisms(child, index, parent) { var siblings; var prev; var children; var length; var position; var otherChild; var isAllDigits; var value; if (index !== 0 && toString(child) === '.') { siblings = parent.children; prev = siblings[index - 1]; children = prev.children; length = children && children.length; if ( prev.type === 'WordNode' && length !== 1 && length % 2 !== 0 ) { position = length; isAllDigits = true; while (children[--position]) { otherChild = children[position]; value = toString(otherChild); if (position % 2 === 0) { /* Initialisms consist of one * character values. */ if (value.length > 1) { return; } if (!NUMERICAL.test(value)) { isAllDigits = false; } } else if (value !== '.') { if (position < length - 2) { break; } else { return; } } } if (!isAllDigits) { /* Remove `child` from parent. */ siblings.splice(index, 1); /* Add child to the previous children. */ children.push(child); /* Update position. */ if (prev.position && child.position) { prev.position.end = child.position.end; } /* Next, iterate over the node *now* at the current * position. */ return index; } } } }