Ejemplo n.º 1
0
  _forEach(doc, function (field, fieldName) {
    var fieldOptions = _defaults(_find(batchOptions.fieldOptions, ['fieldName', fieldName]) || {}, batchOptions.defaultFieldOptions)

    // console.log(fieldName)
    // console.log(fieldOptions.store)

    if (fieldName === 'id') {
      fieldOptions.stopwords = '' // because you cant run stopwords on id field
    } else {
      fieldOptions.stopwords = batchOptions.stopwords
    }

    // store the field BEFORE mutating.
    if (fieldOptions.store) docToStore[fieldName] = field

    // filter out invalid values from being indexes
    if (Array.isArray(field)) {
      // make filter fields searchable
      field = field.join(' ')
    } else if (field === null) {
      // skip null values
      delete doc[fieldName]
      indexerOptions.log.debug(doc.id + ': ' + fieldName + ' field is null, SKIPPING')
      // only index fields that are strings or numbers
    } else if (!(_isString(field) || _isNumber(field))) {
      // don't index unsearchable types
      delete doc[fieldName]
      indexerOptions.log.debug(doc.id + ': ' + fieldName +
        ' field not string or array, SKIPPING')
    }

    var vecOps = {
      separator: fieldOptions.separator || batchOptions.separator,
      stopwords: fieldOptions.stopwords || batchOptions.stopwords,
      nGramLength: fieldOptions.nGramLength || batchOptions.nGramLength
    }
    var v = tv.getVector(field + '', vecOps)
    var freq = tf.getTermFrequency(v, {
      scheme: 'doubleLogNormalization0.5',
      weight: fieldOptions.weight
    })
    freq.push([ [ wildChar ], 0 ]) // can do wildcard searh on this field
    if (fieldOptions.searchable) {
      freqsForComposite.push(freq)
    }
    if (fieldOptions.fieldedSearch) {
      freq.forEach(function (item) {
        var token = item[0].join(indexerOptions.nGramSeparator)
        getKeys(batchOptions, docIndexEntries, doc, token, item, fieldName)
        return
      })
    }
  })
Ejemplo n.º 2
0
        _.forEach(doc, function (field, fieldName) {
          var fieldOptions = _.defaults(_.find(batchOptions.fieldOptions, 'fieldName', fieldName) || {}, batchOptions.defaultFieldOptions);
          if (fieldName == 'id') fieldOptions.stopwords = '';   // because you cant run stopwords on id field
          else fieldOptions.stopwords = batchOptions.stopwords;
          if (_.isArray(field)) field = field.join(' '); // make filter fields searchable
          var v = tv.getVector(field + '', {
            separator: batchOptions.separator,
            stopwords: fieldOptions.stopwords,
            nGramLength: fieldOptions.nGramLength
          });
          v.push(['*', 1]); //can do wildcard searh on this field

          var freq = tf.getTermFrequency(v, {
            scheme: 'doubleLogNormalization0.5',
            weight: fieldOptions.weight
          });
          if (fieldOptions.searchable)
            freqsForComposite.push(freq);
          var deleteKeys = [];
          if (fieldOptions.fieldedSearch) {
            freq.forEach(function (item) {
              batchOptions.filters.forEach(function (filter) {
                _.forEach(doc[filter], function (filterKey) {
                  docIndexEntries.push({
                    type: 'put',
                    key: 'TF○' + fieldName + '○' + item[0] + '○' + filter + '○' + filterKey,
                    value: [doc.id]
                  });
                  docIndexEntries.push({
                    type: 'put',
                    key: 'RI○' + fieldName + '○' + item[0] + '○' + filter + '○' + filterKey,
                    value: [[item[1], doc.id]]
                  });
                });
              });
              docIndexEntries.push({
                type: 'put',
                key: 'TF○' + fieldName + '○' + item[0] + '○○',
                value: [doc.id]
              });
              docIndexEntries.push({
                type: 'put',
                key: 'RI○' + fieldName + '○' + item[0] + '○○',
                value: [[item[1], doc.id]]
              });
            });
          };
        });
Ejemplo n.º 3
0
  _forEach(doc, function (field, fieldName) {
    var fieldOptions = _defaults(_find(batchOptions.fieldOptions, ['fieldName', fieldName]) || {}, batchOptions.defaultFieldOptions)

    // console.log(fieldName)
    // console.log(fieldOptions.store)

    if (fieldName === 'id') {
      fieldOptions.stopwords = '' // because you cant run stopwords on id field
    } else {
      fieldOptions.stopwords = batchOptions.stopwords
    }

    // store the field BEFORE mutating.
    if (fieldOptions.store) docToStore[fieldName] = field

    // now the field will have been stored if configured to do so, we can skip the indexing for the field if it is:
    //   a) null
    //   b) not a string and not a number
    if (skipField(doc, fieldName, indexerOptions)) {
      return;
    }

    if (Array.isArray(field)) field = field.join(' ') // make filter fields searchable


    var vecOps = {
      separator: fieldOptions.separator || batchOptions.separator,
      stopwords: fieldOptions.stopwords || batchOptions.stopwords,
      nGramLength: fieldOptions.nGramLength || batchOptions.nGramLength
    }
    var v = tv.getVector(field + '', vecOps)
    var freq = tf.getTermFrequency(v, {
      scheme: 'doubleLogNormalization0.5',
      weight: fieldOptions.weight
    })
    freq.push([ [ wildChar ], 0 ]) // can do wildcard searh on this field
    if (fieldOptions.searchable) {
      freqsForComposite.push(freq)
    }
    if (fieldOptions.fieldedSearch) {
      freq.forEach(function (item) {
        var token = item[0].join(indexerOptions.nGramSeparator)
        getKeys(batchOptions, docIndexEntries, doc, token, item, fieldName)
        return
      })
    }
  })