async.map(uturi, function(myturi, cb){ var repo = githubUrlToObject(myturi.id) || bitbucketUrlToObject(myturi.id); if (repo) { var myid = repo.repo; var uid = myid, i = 1; while (uid in reservedIds) { uid = myid + '-' + i++; } reservedIds[uid] = true; var r = { '@id': myid, '@type': myturi.type || 'Code', }; if (!this.packager.isClassOrSubClassOf(r['@type'], 'Code')) { return cb(new Error('URL of code repositories must be of @type Code (or a subclass of Code)')); } r.codeRepository = repo.https_url; this.log('HEAD', repo.tarball_url); //see https://developer.github.com/v3/#user-agent-required request.head({url:repo.tarball_url, followAllRedirects:true, headers: {'User-Agent': 'dcat'}}, function(err, resp){ if (err) return cb(err); this.log(resp.statusCode, repo.tarball_url); if (resp.statusCode >= 400) { return cb(this._error('could not HEAD ' + repo.tarball_url), resp.statusCode); } r.encoding = { '@type': 'MediaObject', contentUrl: repo.tarball_url, encodingFormat: resp.headers['content-type'] }; if ('content-length' in resp.headers) { r.encoding.contentSize = parseInt(resp.headers['content-length'], 10); } if ('last-modified' in resp.headers) { r.encoding.dateModified = (new Date(resp.headers['last-modified'])).toISOString(); } return cb(null, r); }.bind(this)); } else { this.log('HEAD', myturi.id); request.head({url: myturi.id, followAllRedirects:true}, function(err, resp){ if (err) return cb(err); this.log(resp.statusCode, myturi.id); if (resp.statusCode >= 400) { return cb(this._error('could not HEAD ' + myturi.id), resp.statusCode); } var prefix = (opts.namespace)? (opts.namespace + '/') : ''; var mymime = resp.headers['content-type'] , mypath = url.parse(myturi.id).pathname , myid = prefix + (path.basename(mypath, path.extname(mypath)).trim().replace(/ /g, '-').toLowerCase() || 'p'); var uid = myid, i = 1; while (uid in reservedIds) { uid = myid + '-' + i++; } reservedIds[uid] = true; var r = { '@id': uid, '@type': myturi.type || Dcat.type(mymime) || 'CreativeWork' }; var contentSize; if ('content-length' in resp.headers) { contentSize = parseInt(resp.headers['content-length'], 10); } if (this.packager.isClassOrSubClassOf(r['@type'], 'SoftwareApplication')) { r.downloadUrl = myturi.id; r.fileFormat = resp.headers['content-type']; if ('last-modified' in resp.headers) { r.dateModified = (new Date(resp.headers['last-modified'])).toISOString(); } if (!('content-encoding' in resp.headers) && (contentSize !== undefined)) { r.fileSize = contentSize; } } else { var encoding = { contentUrl: myturi.id, encodingFormat: mymime }; if ('last-modified' in resp.headers) { encoding.dateModified = (new Date(resp.headers['last-modified'])).toISOString(); } if ('content-encoding' in resp.headers) { encoding.encoding = { '@type': 'MediaObject', encodingFormat: resp.headers['content-encoding'] }; if ( contentSize !== undefined ) { encoding.encoding.contentSize = contentSize; } } else if (contentSize !== undefined) { encoding.contentSize = contentSize; } if (this.packager.isClassOrSubClassOf(r['@type'], 'Dataset')) { r.distribution = _.extend({'@type': 'DataDownload'}, encoding); } else if (this.packager.isClassOrSubClassOf(r['@type'], 'Code')) { r.encoding = _.extend({'@type': 'MediaObject'}, encoding); //try to get programming language for MIME var inferedType = Dcat.type(mymine); if (inferedType === 'Code') { var m2 = mymime.split('/')[1]; r.programmingLanguage = { name: m2.split('-')[1] || m2 }; } } else { r.encoding = _.extend({'@type': 'MediaObject'}, encoding); } } cb(null, r); }.bind(this)); } }.bind(this), function(err, resources){
function _isUrl(tGlobsOrTurl) { var x = (typeof tGlobsOrTurl === 'string') ? tGlobsOrTurl : tGlobsOrTurl.id; return isUrl(x) || githubUrlToObject(x) || bitbucketUrlToObject(x); };