Exemplo n.º 1
0
          .map((container) => {
            // build the base record off the idents
            var insert = mmsUtils.extractIds(container)

            insert._id = insert.mmsUuid

            // gather all the infos

            var xml = mmsUtils.returnXmlNode(container.full_xml)
            insert.rightsAgents = mmsUtils.extractMmsHashRightsAgents(container)

            if (xml) {
              insert.agents = mmsUtils.extractAgents(xml)
              insert.subjects = mmsUtils.extractSubjects(xml)
              insert.divisions = mmsUtils.extractDivision(xml)
              insert.notes = mmsUtils.extractNotes(xml)
              insert.titles = mmsUtils.extractTitles(xml)
              insert.languages = mmsUtils.extractLanguage(xml)
              insert.dates = mmsUtils.extractDates(xml)
              insert.abstracts = mmsUtils.extractAbstracts(xml)
              insert.typeOfResource = mmsUtils.extractTypeOfResource(xml)
              insert.genres = mmsUtils.extractGenres(xml)
              insert.physicalDescriptions = mmsUtils.extractPhysicalDescription(xml)
              insert.originInfos = mmsUtils.extractOriginInfo(xml)

              // get some hieararchy about it
              var h = mmsUtils.extractCollectionAndContainer(xml)

              insert.collectionUuid = h.collection
              insert.containerUuid = h.container
              insert.parents = h.parents
            } else {
              db.logError('MMS container ingest - no/invalid XML for this container ', insert._id)
              return ''
            }

            if (!insert.collectionUuid) {
              db.logError('MMS container ingest - no collection uuid found for this container ', insert._id)
              console.log(container.full_xml)
              return ''
            }

            var percent = Math.floor(++totalInserted / totalLines * 100)
            if (percent > previousPercent) {
              previousPercent = percent
              process.stdout.cursorTo(20)
              process.stdout.write(clc.black.bgMagentaBright('Container: ' + percent + '%'))
            }

            return insert
          })
Exemplo n.º 2
0
          .map((collection) => {
            // build the base record off the idents
            var insert = mmsUtils.extractIds(collection)
            insert._id = insert.mmsUuid

            // gather all the infos
            var xml = mmsUtils.returnXmlNode(collection.full_xml)
            insert.rightsAgents = mmsUtils.extractMmsHashRightsAgents(collection)
            if (xml) {
              insert.agents = mmsUtils.extractAgents(xml)
              insert.subjects = mmsUtils.extractSubjects(xml)
              insert.divisions = mmsUtils.extractDivision(xml)
              insert.notes = mmsUtils.extractNotes(xml)
              insert.titles = mmsUtils.extractTitles(xml)
              insert.languages = mmsUtils.extractLanguage(xml)
              insert.typeOfResource = mmsUtils.extractTypeOfResource(xml)
              insert.dates = mmsUtils.extractDates(xml)
              insert.abstracts = mmsUtils.extractAbstracts(xml)
              insert.genres = mmsUtils.extractGenres(xml)
              insert.physicalDescriptions = mmsUtils.extractPhysicalDescription(xml)
              insert.originInfos = mmsUtils.extractOriginInfo(xml)
            } else {
              db.logError('MMS collection ingest - no/invalid XML for this collection ', insert._id)
            }

            var percent = Math.floor(++totalInserted / totalLines * 100)
            if (percent > previousPercent) {
              previousPercent = percent
              process.stdout.cursorTo(0)
              process.stdout.write(clc.black.bgYellowBright('MMS Collection: ' + percent + '%'))
            }

            return insert
          })
Exemplo n.º 3
0
 agents.update({viaf: agent.viaf[0]}, { $set: agent }, {upsert: true}, (err, result) => {
   if (err) {
     if (err.toString().search('nameControlled_1 dup key') > -1) {
       exports.mergeAgentOnNameConflict(agent, (err, useAgent) => {
         if (err) console.log(err)
         // overwrite the existing agent if that was sucessful
         if (useAgent) {
           agents.update({nameControlled: useAgent.nameControlled}, useAgent, (err, result) => {
             if (err) console.log(err)
             if (cb) cb(null, null)
           })
         } else {
           if (cb) cb(null, null)
         }
       })
     } else if (err.toString().search('viaf_1 dup key') > -1) {
       if (cb) cb(null, null)
     } else {
       db.logError('Agent Serialization - Catalog - Cannot update/insert record:', JSON.stringify({'agent': agent, 'error': err}))
       if (cb) cb(null, null)
     }
   } else {
     if (cb) cb(null, null)
   }
 })
Exemplo n.º 4
0
 agents.update({ nameControlled: agent.nameControlled }, { $set: agent }, {upsert: true}, function (err, result) {
   if (err) {
     if (err.toString().search('nameControlled_1 dup key') === -1) {
       db.logError('Agent Serialization - Catalog - Cannot update/insert record:', JSON.stringify({'agent': agent, 'error': err}))
     }
   }
   if (cb) cb(null, null)
 })
Exemplo n.º 5
0
exports.returnXmlNode = function (xml) {
  try {
    var xmlDoc = libxmljs.parseXml(xml)
    return xmlDoc
  } catch (err) {
    db.logError('MMS - returnXmlNode ' + err, xml)
    return false
  }
}
 setInterval(() => {
   if (!worked) {
     // console.log(`${cluster.worker.id} has not worked in the last few min:`, workedLastOn)
     db.logError(`${cluster.worker.id} has not worked in the last few min:`, JSON.stringify(workedLastOn))
     console.log(`Going to restart worker: ${cluster.worker.id}`)
     process.send({ restart: { start: workStart, end: workEnd } })
     process.exit(1)
   }
   worked = false
 }, 120000)
Exemplo n.º 7
0
exports.extractIds = function (record) {
  var idThesaurus = lexicon.maps.mmsIdentifers

  var idents = {}

  // get the mss system identifers

  if (record['id']) {
    idents['mmsDb'] = record['id'] + ''
  }
  if (record['type']) {
    idents['mmsType'] = record['type']
  }
  if (record['uuid']) {
    idents['mmsUuid'] = record['uuid']
  }

  if (record['solr_doc_hash']) {
    if (record['solr_doc_hash']['identifier_local_image_id']) {
      if (typeof record['solr_doc_hash']['identifier_local_image_id'] === 'string') {
        idents['captureIds'] = [record['solr_doc_hash']['identifier_local_image_id']]
      } else {
        idents['captureIds'] = []
        for (var x in record['solr_doc_hash']['identifier_local_image_id']) {
          if (record['solr_doc_hash']['identifier_local_image_id'][x] !== null) {
            idents['captureIds'].push(record['solr_doc_hash']['identifier_local_image_id'][x])
          }
        }
      }
    }

    if (record['solr_doc_hash']['collection_uuid']) {
      if (record['solr_doc_hash']['collection_uuid'].length === 36) {
        idents['collectionUuid'] = record['solr_doc_hash']['collection_uuid']
      }
    }

    if (record['solr_doc_hash']['container_uuid']) {
      if (record['solr_doc_hash']['container_uuid'].length === 36) {
        idents['containerUuid'] = record['solr_doc_hash']['container_uuid']
      }
    }
  }

  // the solr doc hash title is sometimes messed up with encoding errors
  // so pull it out below from the xml
  // if (record['solr_doc_hash']['title']){
  //  if (record['solr_doc_hash']['title'].length>0){
  //    idents['title'] = ""
  //    for (var x in record['solr_doc_hash']['title']){
  //      idents['title']+= record['solr_doc_hash']['title'][x] + ' '

  //    }
  //    idents['title']=idents['title'].trim()
  //    idents['titleLast'] = record['solr_doc_hash']['title'][record['solr_doc_hash']['title'].length-1].trim()
  //  }

  // }

  if (record['desc_xml']) {
    // record['desc_xml'] = '<mods xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://uri.nypl.org/schema/nypl_mods" version="3.4" xsi:schemaLocation="http://uri.nypl.org/schema/nypl_mods http://uri.nypl.org/schema/nypl_mods"> <titleInfo ID="titleInfo_0" usage="primary" supplied="no" lang="eng"><nonSort>The </nonSort><title>Newtonian system of philosophy, adapted to the capacities of young gentlemen and ladies ... being the substance of six lectures read to the Lilliputian Society, by Tom Telescope, A. M., and collected and methodized for the benefit of the youth of these Kingdoms, by their old friend Mr. Newbery ...</title></titleInfo><name ID="name_0" type="personal" authority="naf" valueURI="" authorityRecordId=""><namePart>Newbery, John (1713-1767)</namePart><affiliation/><role><roleTerm valueURI="http://id.loc.gov/vocabulary/relators/aut" authority="marcrelator" type="code">aut</roleTerm><roleTerm valueURI="http://id.loc.gov/vocabulary/relators/aut" authority="marcrelator" type="text">Author</roleTerm></role></name><name ID="name_1" type="personal" authority="naf" valueURI="" authorityRecordId=""><namePart>Goldsmith, Oliver (1730?-1774)</namePart><affiliation/><role><roleTerm valueURI="http://id.loc.gov/vocabulary/relators/aut" authority="marcrelator" type="code">aut</roleTerm><roleTerm valueURI="http://id.loc.gov/vocabulary/relators/aut" authority="marcrelator" type="text">Author</roleTerm></role></name><name ID="name_2" type="personal" authority="naf" valueURI="" authorityRecordId=""><namePart>Telescope, Tom</namePart><affiliation/><role><roleTerm valueURI="http://id.loc.gov/vocabulary/relators/aut" authority="marcrelator" type="code">aut</roleTerm><roleTerm valueURI="http://id.loc.gov/vocabulary/relators/aut" authority="marcrelator" type="text">Author</roleTerm></role></name><originInfo ID="originInfo_0"><dateIssued encoding="w3cdtf" keyDate="yes">1761</dateIssued><place><placeTerm>London</placeTerm></place></originInfo><note ID="note_0" type="content">"To the young gentlemen and ladies of Great Britain and Ireland, this philosophy of tops and balls is ... inscribed, by ... J. Newbery": 3d prelim. p.</note><note ID="note_1" type="content">For variations see: Babson Institute Library Newton Collection, 115//</note><note ID="note_2" type="content">Imperfect: p. 111-112 mutilated, affecting 2 words of text.</note><note ID="note_3" type="content">Publisher\'s advertisements: p. 126-140.</note><note ID="note_4" type="content">Sometimes attributed to Oliver Goldsmith.</note><identifier ID="identifier_0" type="local_hades" displayLabel="Hades struc ID (legacy)">618679</identifier><identifier ID="identifier_1" type="local_other" displayLabel="RLIN/OCLC">NYPG784271303-B</identifier><identifier ID="identifier_2" type="local_catnyp" displayLabel="CATNYP ID (legacy)">b1493851</identifier><identifier ID="identifier_3" type="local_bnumber" displayLabel="NYPL catalog ID (B-number)">b10483503</identifier><location ID="location_0"><physicalLocation authority="marcorg" type="repository">nn</physicalLocation><physicalLocation type="division">Berg Collection</physicalLocation><shelfLocator>Berg Coll. 77-645</shelfLocator></location></mods>'

    try {
      var xmlDoc = libxmljs.parseXml(record['desc_xml'])
    } catch (err) {
      db.logError('extract mms identifiers - invalid MODS record', {error: err, data: record})
      return idents
    }

    idents['dates'] = []

    var children = xmlDoc.root().childNodes()
    for (var aChild in children) {
      var n = children[aChild]

      if (n.name() === 'identifier') {
        var type = false
        var value = n.text()

        var attrs = n.attrs()

        for (var aAttr in attrs) {
          var a = attrs[aAttr]

          if (a.name() === 'type') type = a.value()

          // mms stores their identifiers differntly when it is a local_other, it uses the display label, overwrite the type if has a display label
          if (a.name() === 'displayLabel' || a.name() === 'display_label') type = a.value()
        }

        if (idThesaurus[type]) {
          // blah
          value = value.replace('archives_components_', '')

          if (type.search('catalog ID') > -1 || type.search('local_bnumber') > -1 || type.search('local_b') > -1) {
            value = utils.normalizeBnumber(value)
          }

          // this might happen, just to keep track of it
          if (idents[idThesaurus[type]]) {
            // unless we are about to overwrite a bnumbe with obviously not bnumber
            if ((type.search('catalog ID') > -1 || type.search('local_bnumber') > -1 || type.search('local_b') > -1) && value && value.search('b') !== -1) {
              db.logError('extract mms identifiers - overwriting identifiers', type + '| was : ' + idents[idThesaurus[type]] + ' is now: ' + value)
            }
          }

          idents[idThesaurus[type]] = value
        } else {
          db.logError('extract mms identifiers - unknown Identifier', n.toString())
        }
      }

      if (n.name() === 'location') {
        var locations = n.childNodes()

        for (var aLoc in locations) {
          if (locations[aLoc].name() === 'shelfLocator' || locations[aLoc].name() === 'shelfocator') idents['callNumber'] = locations[aLoc].text()
        }
      }

      if (n.name() === 'titleInfo') {
        idents['title'] = n.text().trim()
      }

      if (n.name() === 'originInfo') {
        idents['originInfo'] = n.text().trim()

        // also get the specifc types of dates

        for (var aGrandChild in n.childNodes()) {
          var nn = n.childNodes()[aGrandChild]
          if (nn.name() === 'dateIssued') {
            idents['dateIssued'] = nn.text().trim()
            idents['dates'].push(nn.text().trim())
          }
          if (nn.name() === 'dateCreated') {
            idents['dateCreated'] = nn.text().trim()
            idents['dates'].push(nn.text().trim())
          }
        }
      }
    }
  }

  // normalize the bnumber
  if (idents['bNumber']) {
    idents['bNumber'] = utils.normalizeBnumber(idents['bNumber'])
  }

  // use the date if the title is not present otherwise set it to false
  if (!idents['title']) {
    if (idents['originInfo']) idents['title'] = idents['originInfo']
  }

  if (!idents['title']) {
    idents['title'] = false
  }

  idents['sourceSystem'] = 'mms'

  return idents
}