app.get('/pcap/:command', function(req, res) { if (!req.user || !req.user.permissions.pcap) { res.send(403, 'Forbidden!'); return; } var transit = []; var pcapUrl = config.pcap.url + '/' + req.param('command'); pcapUrl += '?' + querystring.stringify(req.query); var curl = spawn('curl', ['-s', pcapUrl]); var tshark = spawn('tshark', ['-i', '-', '-T', 'pdml']); var xml = new XmlStream(tshark.stdout); xml.collect('proto'); xml.collect('field'); curl.stdout.pipe(tshark.stdin); curl.stdout.on('data', function (data) { transit.push(data); }); var npcaps = 0; xml.on('end', function() { res.end(']}'); curl.stdout.unpipe(tshark.stdin); curl.kill('SIGKILL'); tshark.kill('SIGKILL'); }); xml.on('endElement: packet', function(packet) { var psize = parseInt(packet.proto[0].$.size); if (!npcaps) { res.set('Content-Type', 'application/json'); res.write('{objects: [\n'); // skip global header readRawBytes(24, transit); } else { res.write(',\n'); } // skip packet header readRawBytes(16, transit); packet.hexdump = readRawBytes(psize, transit).toString('hex'); res.write(JSON.stringify(packet)); npcaps++; }); });
JobPostImport.importXML = function(input, processFunction, afterParsing){ console.log('---XML---'); var parsingEnded = 0; var jobsread = 0; var jobsprocessed = 0; var jobIDs=[] var stream = new Stream() var xml = new XmlStream(stream); xml.collect('element'); xml.on('endElement: post', function(jobObject) { jobObject = removeElementTags( jobObject ); jobsread++; processFunction(jobObject, function(jobId) { jobsprocessed++; jobIDs.push(jobId); console.log( 'ended?:' + parsingEnded); if( parsingEnded && jobsread == jobsprocessed && jobsread > 0 ) { console.log( "AFTER PARSING 1"); afterParsing( jobsprocessed,jobIDs ); } }); }) xml.on('end', function() { parsingEnded = 1; console.log( 'ended!' ); if( jobsread == jobsprocessed && jobsread > 0 ) { console.log( "AFTER PARSING 2"); afterParsing( jobsprocessed,jobIDs ); } }); stream.emit('data', input); stream.emit('end'); }
osm.parse = function (filePath, callbacks) { // create read stream, pass to xml-stream var read_stream = fs.createReadStream(filePath); var xml = new XmlStream(read_stream); // this prevents xml-stream from overwriting sub-elements of the same type. xml.collect('nd'); xml.collect('tag'); // node stream xml.on('endElement: node', function(node) { callback_if(callbacks.node, node); }); //way stream xml.on('endElement: way', function(way) { callback_if(callbacks.way, way) }); //callback when stream is over xml.on('end', function(){ callback_if(callbacks.end, false) }) }
return new Promise(function(resolve, reject){ let stream = fs.createReadStream(dataDir+"Tags.xml"), xml = new XmlStream(stream), count = 0; xml.collect('row'); xml.on('endElement: row', function(item) { console.log(item["$"].TagName); count++; }); xml.on("end", function(){ console.log("Done Tags!"); stream.close(); resolve(count); }); });
stream.on('readable', function () { try { var xml = new XmlStream(stream, 'utf8'); xml.preserve('comments', false); xml.collect('comment'); xml.on('error', end); xml.on('end', end); xml.on('endElement: comments', function (item) { comments = item.$children.map(function (comment) { return { props: comment.$, text: comment.$text } }); end(); }); } catch (err) { end(err); } });
var callbackCareerBuilder = function(err, data, page, callbackErr) { if (err) { console.log("callbackCareerBuilder error!"); callbackErr(err, null); } else if (data) { console.log("Processing career builder page " + page); var xml = new XmlStream(data, 'utf8'); //Get total CB pages xml.preserve('TotalPages', true); xml.on('endElement: TotalPages', function(item) { numCBPages = parseInt(item['$text']); console.log("Total number of CB Pages: " + numCBPages); }); xml.preserve('JobSearchResult', true); xml.collect('subitem'); var i = (page -1) * 25 + 1; xml.on('endElement: JobSearchResult', function(item) { console.log("Adding career builder job " + i); careerBuilderResults[i] = item; // careerBuilderResults[i].company = JSON.stringify(item.Company['$name']) +' : ' // + JSON.stringify(item.Company['$text']); // careerBuilderResults[i].companyDID = JSON.stringify(item.CompanyDID['$name']) +' : ' // + JSON.stringify(item.CompanyDID['$text']); // careerBuilderResults[i].companyDetailsURL = JSON.stringify(item.CompanyDetailsURL['$name']) +' : ' // + JSON.stringify(item.CompanyDetailsURL['$text']); // careerBuilderResults[i].DID = JSON.stringify(item.DID['$name']) +' : ' // + JSON.stringify(item.DID['$text']); i++; }); xml.on('endElement: Results', function() { callbackErr(null, careerBuilderResults); }); } };
var callbackIndeed = function(err, data, startIndex, callbackErr) { if (err) { console.log("callbackIndeed error!"); callbackErr(err, null); } else if (data) { console.log("Processing indeed start index " + startIndex); var xml = new XmlStream(data, 'utf8'); //Get total indeed results xml.preserve('totalresults', true); xml.on('endElement: totalresults', function(item) { numIndeedResults = parseInt(item['$text']); console.log("Total number of Indeed results: " + numIndeedResults); }); xml.preserve('result', true); xml.collect('subitem'); var i = startIndex + 1; xml.on('endElement: result', function(item) { console.log("Adding indeed job " + i); indeedResults[i] = item; // indeedResults[i] = {}; // indeedResults[i].jobtitle = JSON.stringify(item.jobtitle['$name']) +' : ' // + JSON.stringify(item.jobtitle['$text']); // indeedResults[i].company = JSON.stringify(item.company['$name']) +' : ' // + JSON.stringify(item.company['$text']); // indeedResults[i].city = JSON.stringify(item.city['$name']) +' : ' // + JSON.stringify(item.city['$text']); // indeedResults[i].state = JSON.stringify(item.state['$name']) +' : ' // + JSON.stringify(item.state['$text']); i++; }); xml.on('endElement: results', function(item) { callbackErr(null, indeedResults); }); } };
return new Promise(function(resolve, reject){ let stream = fs.createReadStream(dataDir+"Badges.xml"), xml = new XmlStream(stream), count = 0; xml.collect('row'); xml.on('endElement: row', function(item) { if (item["$"].UserId === "8152"){ console.log(item["$"]); } count++; if ((count % 10000) === 0){ console.log("Processed "+count); } }); xml.on("end", function(){ console.log("Done Tags!"); stream.close(); resolve(count); }); });
db.once('open', function callback () { //Schéma Oeuvre dans la BDD function OeuvreBaseSchema(){ Schema.apply(this, arguments); this.add({ name : {type :String,required : true}, description : String }); } util.inherits(OeuvreBaseSchema,Schema); var OeuvreSchema = new OeuvreBaseSchema(); var BookOSchema = new OeuvreBaseSchema({ serie : String, numOrderSerie : Number, isOriginal : Boolean, _originalId : mongoose.Schema.Types.ObjectId, language : String, authors : [String], translator : String }); var Oeuvre = mongoose.model('oeuvres',OeuvreSchema); var BookO = Oeuvre.discriminator('bookOeuvre',BookOSchema); /* var oeuvreSchema = mongoose.Schema({ name : {type :String,required : true}, description : String },{collection: 'Oeuvres', discriminatorKey : '_type'}); var BookOSchema = oeuvreSchema.extend({ serie : String, numOrderSerie : Number, isOriginal : Boolean, _originalId : mongoose.Schema.Types.ObjectId, language : String, authors : [String], translator : String }); var BookO = mongoose.model('bookOeuvre',BookOSchema); */ //Schéma Produit dans la BDD function ProductBaseSchema(){ Schema.apply(this, arguments); this.add({ name : String, cover: String, web : String, _oeuvreId : mongoose.Schema.Types.ObjectId }); } util.inherits(ProductBaseSchema,Schema); var ProductSchema = new ProductBaseSchema(); var BookPSchema = new ProductBaseSchema({ serie : String, numOrderSerie : Number, isbn13 : String, isbn10 : String, publisher : String, pages : String, format : String }); var Product = mongoose.model('products',ProductSchema); var BookP = Product.discriminator('bookProduct',BookPSchema); /*var ProductSchema = mongoose.Schema({ name : String, cover: String, web : String, _oeuvreId : mongoose.Schema.Types.ObjectId },{collection: 'Products', discriminatorKey : '_type'}); var BookPSchema = ProductSchema.extend({ serie : String, numOrderSerie : Number, isbn13 : String, isbn10 : String, publisher : String, pages : String, format : String }); var BookP = mongoose.model('bookProduct',BookPSchema); */ // Create a file stream and pass it to XmlStream var stream = fs.createReadStream(path.join(__dirname, '/sources/Romans.gcs')); var xml = new XmlStream(stream); var endParse = false; var nbParse = 0; //xml.preserve('collection', true); xml.collect('line'); xml.on('endElement: item', function(item) { nbParse++; //eyes.inspect(item); var tAuthors = item.authors; var vAuthors = []; if (tAuthors && tAuthors.line) { tAuthors.line.forEach(function(unAuteur){ vAuthors.push(unAuteur.col); }); } var cover = item.$.cover; if (cover){ var fullPathCover = path.join(__dirname,'/sources',cover.substring(1,cover.length)); var tablePathCover = fullPathCover.split("/"); var fileCoverSource = tablePathCover.pop(); if (fileCoverSource.split(".").length > 1){ fullPathCover = path.join(__dirname,'/sources/Romans_pictures',fileCoverSource); }else fullPathCover = null; }else fullPathCover = null; var bookO = new BookO({ name: item.$.title, serie : item.$.serie, numOrderSerie : 0, isOriginal : false, _originalId : null, language : 'Français', authors : vAuthors, translator : item.$.translator, description : item.description }); bookO.save(function(err,bookO){ if (err) return console.error(err); else{ var bookP = new BookP({ name : item.$.title, cover: null, serie : item.$.serie, numOrderSerie : 0, isbn10 : item.$.isbn, web : item.$.web, _oeuvreId : bookO._id, publisher : item.$.publisher, pages : item.$.pages }); bookP.save(function(err,bookP){ if (err) return console.error(err); else { if (fullPathCover) { var destFile = path.join(__dirname,'/public/productsImg/',fileCoverSource); var tablePathCover = destFile.split("/"); var fileNameCover = tablePathCover.pop(); var pathNameCover = tablePathCover.join("/"); fileNameCover = bookP._id + fileNameCover; destFile = pathNameCover + '/' + fileNameCover; //Copier le fichier au bon emplacement et le renommer avec l'id fs.writeFileSync(destFile,fs.readFileSync(fullPathCover)); bookP.cover = '/productsImg/'+fileNameCover; } bookP.save(function(err,bookP){ nbParse--; console.log(nbParse); if (err) return console.error(err); else if (endParse && nbParse == 0) mongoose.connection.close(); }); }; }); }; }); }); xml.on('end',function(err){ if (err) return console.error(err); else endParse = true; }); });
var fs = require('fs') ,path = require('path') ,XmlStream = require('xml-stream') ,wtfWiki = require('wtf_wikipedia'); var stream = fs.createReadStream(path.join(__dirname, 'test.xml')); var xml = new XmlStream(stream); var start = new Date(); xml.preserve('verse', true); xml.collect('line'); xml.on('endElement: verse', function(verse) { console.log(verse); }); xml.on('endElement: root', function() { var end = new Date() - start; console.log("Execution time %dms", end); });
xml =new XmlStream(stream), paths = new Array(), allPaths = new Array(), svgData, svgObject = {g:{},data:{}}, guid, totalLength = 0, resolution = 3, transform = new Array(), processing = "void setup() {\r\nsize(400, 400);\r\nnoLoop();\r\n}\r\n \r\n void position(x,y,l) {\r\n if(l == 0) {\r\nstroke(#FFCC00);\r\n} else {\r\nstroke(#00CCFF);\r\n}\r\n ellipse(x,y,1,1)}\r\n void draw() {\r\n background(255);\r\nnoFill()\;\r\n", gcode = ""; xml.collect('path'); xml.on('endElement: path', function(item) { if(item.$.transform){ var regExp = /\(([^)]+)\)/; var transform = regExp.exec(item.$.transform)[1].split(' ') svgObject.g[guid].transform[0] = parseFloat(svgObject.g[guid].transform[0]) + parseFloat(transform[0]); svgObject.g[guid].transform[1] = parseFloat(svgObject.g[guid].transform[1]) + parseFloat(transform[1]); } var pathData = item.$.d var pathSegmentPattern = /[a-z][^a-z]*/ig; // split on every letter var pathSegments = pathData.match(pathSegmentPattern); // add to an array with all the paths (l,L,h,H,v,V,s,S,c,C,M) paths.push(pathSegments);
(function () { "use strict"; var fs = require('fs'), path = require('path'), XmlStream = require('xml-stream'), stream = fs.createReadStream(path.join(__dirname, '../xml/tol.xml')), xml = new XmlStream(stream), lookup = {}, // hash object to easily lookup parsed NODEs by ID root; // will be the normalized root node for the parsed tree // make sure to collect all children nodes we care about, otherwise xml-stream // will reduce them all to one entry. xml.collect('NODE'); // children of the current NODE xml.collect('OTHERNAME'); //altername names for this current NODE // this callback will fire when a NODE has completely finished parsing. // The nice thing about this is that we are assured that each NODE containing // NODES children won't reach this callback until all of those children have // already been parsed, normalized, and thrown into the lookup, allowing us to easily // reconstruct the normalized tree. xml.on('endElement: NODE', function (node) { var $attrs = node['$'], normalized = { id: +$attrs.ID }; if (node.NAME) { normalized.name = node.NAME; } else { normalized.internal = 1; } // to reduce the size of the payload, do not include integer fields on the // normalized JSON object whose value is "0", or empty string or "null". // Any consumer can normalize appropriately or // keep the convention, for example, that `!normalized.confidence` means this node has // a confidence level of "confident". I know this sounds wierd, but we're trying to // send 215 megabytes of XML data to a browser, so everything to make this output small, yet // still somewhat readable is a-okay. if (node.DESCRIPTION) { normalized.description = node.DESCRIPTION; } if (+$attrs.HASPAGE) { normalized.hasPage = +$attrs.HASPAGE; } if (+$attrs.PHYLESIS) { normalized.phylesis = +$attrs.PHYLESIS; } if (+$attrs.INCOMPLETESUBGROUPS) { normalized.incompleteSubgroups = +$attrs.incompleteSubgroups; } if (+$attrs.ANCESTORWITHPAGE) { normalized.ancestorPage = +$attrs.ANCESTORWITHPAGE; } if (+$attrs.EXTINCT) { normalized.extinct = +$attrs.EXTINCT; } if (+$attrs.CONFIDENCE) { normalized.confidence = +$attrs.confidence; } if (+$attrs.LEAF) { normalized.leaf = +$attrs.leaf; } if (+$attrs.ITALICIZENAME) { normalized.italicizeName = +$attrs.ITALICIZENAME; } if (+$attrs.SHOWAUTHORITY) { normalized.showAuthority = +$attrs.SHOWAUTHORITY; } if (+$attrs.SHOWAUTHORITYCONTAINING) { normalized.showAuthorityContaining = +$attrs.SHOWAUTHORITYCONTAINING; } if (+$attrs.IS_NEW_COMBINATION) { normalized.isNewCombination = +$attrs.IS_NEW_COMBINATION; if ($attrs.COMBINATION_DATE !== 'null') { normalized.combinationDate = $attrs.COMBINATION_DATE; } } if (node.OTHERNAMES && node.OTHERNAMES.OTHERNAME.length) { normalized.otherNames = node.OTHERNAMES.OTHERNAME.map(function (name) { var $attrs = name['$'], obj = { name: name.NAME, sequence: +$attrs.SEQUENCE }; if (+$attrs.ISIMPORTANT) { obj.isImportant = +$attrs.ISIMPORTANT; } if (+$attrs.ISPREFERRED) { obj.isPreferred = +$attrs.ISPREFERRED; } if (+$attrs.ITALICIZENAME) { obj.italicizeName = +$attrs.ITALICIZENAME; } if ($attrs.DATE !== 'null') { obj.date = $attrs.DATE; } return obj; }); } if (+$attrs.CHILDCOUNT > 0) { normalized.nodes = node.NODES.NODE.map(function (child) { return lookup[child['$'].ID]; }); } lookup[normalized.id] = normalized; root = normalized; }); xml.on('end', function () { console.log(JSON.stringify(root)); }); xml.on('error', function (message) { console.log(message); }); }());
} else { opt.feature = opt.feature.split(','); opt.feature = _.map(opt.feature, function (el) {return el.toUpperCase();}); } var done = false; var locus_count = 0; var gene_ct = 0; var unique_ct = 0; var repo = {}; var outstream = fs.createWriteStream(opt.outfile, {'flags': 'w'}); var instream = fs.createReadStream(opt.infile); var xml = new xmlstream(instream); xml.collect('INSDFeature'); xml.collect('INSDQualifier'); var bytes_processed = 0; xml.on('data', function (data) { bytes_processed += data.length; }); xml.on('endElement: INSDSeq', function(seq) { locus_count++; util.print('Locus: '+locus_count+', Genes: '+gene_ct+', Unique pairs: '+unique_ct+', Bytes: '+(bytes_processed/1000).toFixed(0)+' KB'+"\u001B[0G"); var sp_name = seq.INSDSeq_organism; var features = seq["INSDSeq_feature-table"].INSDFeature; _.each(features, function (feature) {
return new Promise(function(resolve, reject) { stream.on('error', function(err) { reject(err); return; }); var xml = new XmlStream(stream); var statusmap = { "publish": "published", "draft": "draft" }; var exportDate = null; var users = []; var posts = []; var tags = []; var posts_tags = []; var author2user = {}; var termname2tag = {}; var errorCheck = false xml.on('endElement: pubDate', function(pd) { if (errorCheck) { console.error("Publish Date " + JSON.stringify(pd).slice(0, 50)); } if (exportDate !== null) return; exportDate = new Date(pd.$text); }); xml.on('endElement: wp:category', function(category) { if (errorCheck) { console.error("Category " + JSON.stringify(category).slice(0, 50)); } var tag = { "id": parseInt(category['wp:term_id'], 10), "slug": category['wp:category_nicename'], "name": category['wp:cat_name'], "description": category['wp:category_description'] }; tags.push(tag); termname2tag[tag.slug] = tag.id; }); xml.on('endElement: wp:tag', function(category) { if (errorCheck) { console.error("Tag " + JSON.stringify(category).slice(0, 50)); } var tag = { "id": parseInt(category['wp:term_id'], 10), "slug": category['wp:tag_slug'], "name": category['wp:tag_name'], "description": "" }; if (tag.slug in termname2tag) return; tags.push(tag); termname2tag[tag.slug] = tag.id; }); xml.on('endElement: wp:wp_author', function(author) { if (errorCheck) { console.error("Author " + JSON.stringify(author).slice(0, 50)); } var user = { 'name': author['wp:author_display_name'], 'slug': author['wp:author_login'], 'email': author['wp:author_email'] }; users.push(user); author2user[user.slug] = 1; /* TODO: users.length when Ghost supports importing users */ }); var slugs = {}; xml.collect('category'); xml.preserve('content:encoded', true); xml.on('endElement: item', function(item) { if (errorCheck) { console.error("Item " + JSON.stringify(item).slice(0, 50)); } if (item['wp:post_type'] != "post" && item['wp:post_type'] != "page") return; var date; if (item['wp:post_date_gmt'] !== "0000-00-00 00:00:00") { date = item['wp:post_date_gmt'].match(/(\d{4})-(\d+)-(\d+) (\d+):(\d+):(\d+)/); } else { date = item['wp:post_date'].match(/(\d{4})-(\d+)-(\d+) (\d+):(\d+):(\d+)/); } date = date.map(function(e) { return parseInt(e, 10); }); var d = new Date(Date.UTC(date[1], date[2]-1, date[3], date[4], date[5], date[6], 0)); var pubDate = d; if (item['pubDate'].match("-0001") === null) { pubDate = new Date(item['pubDate']); } var post = { "id": parseInt(item['wp:post_id'], 10), "title": item.title, "slug": item['wp:post_name'], "markdown": treat(item['content:encoded']), "html": treatHTML(item['content:encoded']), "image": null, "featured": item['wp:is_sticky'] === "1", "page": item['wp:post_type'] == "page" ? 1 : 0, "status": item['wp:status'] in statusmap ? statusmap[item['wp:status']] : "draft", "language": "en_US", "meta_title": null, "meta_description": null, "author_id": author2user[item['dc:creator']], "created_at": d.getTime(), "created_by": 1, "updated_at": d.getTime(), "updated_by": 1, "published_at": pubDate.getTime(), "published_by": 1 }; if (!post.title) { post.title = 'Untitled post'; } if (!post.slug) { post.slug = slugify(post.title); } // This can happen because WP allows posts to share slugs... if (post.slug in slugs) { var slug = slugify(post.title); if (slug === "" || slug in slugs) { var n = 2; post.slug = post.slug.replace(/-\d*$/, ''); while (post.slug + "-" + n in slugs) { n++; } slug = post.slug + "-" + n; } //console.error("!> slug '" + post.slug + "' was repeated; the post '" + post.title + "' now has slug '" + slug + "'"); post.slug = slug; } slugs[post.slug] = post; if (typeof item.category !== "undefined") { for (var i = 0; i < item.category.length; i++) { if (!item.category[i].$) continue; posts_tags.push({ "tag_id": termname2tag[item.category[i].$.nicename], "post_id": post.id }); } } posts.push(post); // console.error("made it"); }); xml.on('end', function() { console.error("made it again"); var ghost = { "meta":{ "exported_on": exportDate.getTime() * 1000, "version":"000" }, "data":{ "posts": posts, "tags": tags, "posts_tags": posts_tags //"users": users } }; resolve(ghost); }); });