function parseMetadataOutput(metadata, fileOutput) { var returnedMetadata = {}; var metadataString = metadata; var inputRegex = /input \#(\d+),\s?([\w\s\,]+),\s?from/gi; var inputMatch = inputRegex.exec(metadataString) while (inputMatch) { var nextInputMatch = inputRegex.exec(metadataString); var inputString = utils.sliceStringByRegexMatches(metadataString, inputMatch, nextInputMatch); console.log(inputString); var metadataInfoRegex = /metadata:/i; var streamRegex = /stream\s*\#(\d+)\:(\d+)(\((\w+)\))?\:/ig; var metadataInfoMatch = metadataInfoRegex.exec(inputString); if (metadataInfoMatch) { // This is the metadata info for the whole // media file. It's bordered either by a stream // definition or nothing. // We have to parse both the string as well as the file output // we get from running the metadata command. The title and description // can have spaces, so we can't really parse the raw string, but the file output // doesn't have the encoding or creation time listed for some reason. var metadataInfoString = utils.sliceStringByRegexMatches(inputString, metadataInfoMatch); var streamBorderMatch = streamRegex.exec(metadataInfoString); streamRegex.lastIndex = 0; // We're gonna re-use this regex, so reset it. metadataInfoString = utils.sliceStringByRegexMatches(metadataInfoString, null, streamBorderMatch); console.log(metadataInfoString); returnedMetadata['info'] = parseMediaMetadataInfoString(metadataInfoString); utils.extend(true, returnedMetadata['info'], parseMediaMetadataOutputFile(fileOutput)); } returnedMetadata['streams'] = []; var streamMatch = streamRegex.exec(inputString); while (streamMatch) { var nextStreamMatch = streamRegex.exec(inputString); var metadataInfoMatch = metadataInfoRegex.exec(inputString); var streamInfoString = null; // Streams can be bordered either by the metadata // info area, or by another stream definition, or by the // end of the metadata string. if (!nextStreamMatch && !metadataInfoMatch) { // Nothing after this stream definition area streamInfoString = utils.sliceStringByRegexMatches(inputString, streamMatch); } else { if (nextStreamMatch && metadataInfoMatch) { // Both are after this stream definition, // so see which one is closer if (metadataInfoMatch.index < nextStreamMatch.index && metadataInfoMatch.index > streamMatch.index) { streamInfoString = utils.sliceStringByRegexMatches( inputString, streamMatch, metadataInfoMatch ); } else { streamInfoString = utils.sliceStringByRegexMatches( inputString, streamMatch, nextStreamMatch ); } } else { streamInfoString = utils.sliceStringByRegexMatches( inputString, streamMatch, nextStreamMatch || metadataInfoMatch ); } } returnedMetadata['streams'].push( parseMediaStreamInfoString(streamInfoString)); streamMatch = nextStreamMatch; } inputMatch = nextInputMatch; } return returnedMetadata; }
function parseMediaStreamInfoString(streamInfoString) { var returnObject = {}; var outputMatch = /output\s*\#\d+/i.exec(streamInfoString); streamInfoString = utils.sliceStringByRegexMatches(streamInfoString, null, outputMatch); console.log(streamInfoString); // Each stream also has some metadata attached // I'm not parsing this at the moment as it doesn't seem // to contain anything useful for what I need. var headerMatch = streamInfoString.match(/stream\s*\#(\d+)\:(\d+)(\((\w+)\))?\:/i); if (!headerMatch) { throw new Error("parseMediaStreamInfoString: Invalid stream info string! " + streamInfoString); } returnObject['index'] = parseInt(headerMatch[2]); returnObject['name'] = headerMatch[4]; // Don't you just love these long regexes? // This does rely on the information being presented // in relatively the same order, so I do need to watch our // if I ever recompile the ffmpeg Javascript library. // Alternatively, I could just parse this as CSV and extract // what I need...but that doesn't work, as some of the strings (pixel format) // are arbitrary and have nothing I can really match as a pattern, so I have to rely // on the order being the same regardless... var videoMatch = streamInfoString.match( /video\:\s*(([^\s^\,]+)\s*(\(([a-z0-9\s^\)]+?)\))?)(\s*\(([a-z0-9]+)\s*\/\s*([^\)]+)\))?,\s*([^\(]+?)(\((.+?)\))?,(\s*((\d+)x(\d+))\s*(\[(.+?)\])?)?(,\s*(\d+\s*kb\/s|N\/A))?(,\s*([a-z]+\s*\d+\:\d+\s*[a-z]+\s*\d+\:\d+))?\s*(,\s*([\.0-9]+)\s*fps)?/i ); var audioMatch = streamInfoString.match( /audio\:\s*(([^\s^\,]+)(\s*\(([a-z0-9]+?)\))?)(\s*\(([a-z0-9]+)\s*\/\s*([^\)]+)\))?(,\s*((\d+)\s*([a-z]+)))?(,\s*([^\s^,]+))?(\s*,\s*([^\s^,]+)\s*(\(([^)]+)\))?)?(,\s*((\d+)\s*([a-z]+\s*\/\s*[a-z]+?))\s*(\((.+?)\))?)?/i ); var dataMatch = streamInfoString.match( /data\:\s*([^\s]+)\s*(\((.+)\))?\s*\(([^\s]+)\s*\/\s*([^\s]+)\)/i ); var metadataMatch = streamInfoString.match(/metadata\s*\:/i); var metadataString = ""; if (metadataMatch) { metadataString = utils.sliceStringByRegexMatches(streamInfoString, metadataMatch); } if (videoMatch) { returnObject['type'] = 'video'; returnObject['metadata'] = {}; returnObject['codec'] = videoMatch[2]; returnObject['codec_option'] = videoMatch[4]; returnObject['codec_name'] = videoMatch[6]; returnObject['pixel_format'] = videoMatch[8]; returnObject['pixel_format_option'] = videoMatch[10]; returnObject['width'] = parseInt(videoMatch[13]); returnObject['height'] = parseInt(videoMatch[14]); returnObject['bitrate'] = videoMatch[18]; returnObject['ratios'] = videoMatch[16] || videoMatch[20]; returnObject['fps'] = parseFloat(videoMatch[22]); if (metadataString) { var rotationMatch = metadataString.match(/rotate\s*\:\s*(\d+)/i) if (rotationMatch) { returnObject['metadata']['rotation'] = parseInt(rotationMatch[1]); } } } else if (audioMatch) { returnObject['type'] = 'audio'; returnObject['metadata'] = {}; returnObject['codec'] = audioMatch[2]; returnObject['codec_option'] = audioMatch[4]; returnObject['codec_name'] = audioMatch[6]; returnObject['sample_rate'] = audioMatch[9]; returnObject['channels'] = audioMatch[13]; returnObject['decoder'] = audioMatch[15]; returnObject['bitrate'] = audioMatch[19]; returnObject['bitrate_option'] = audioMatch[23]; } else if (dataMatch) { returnObject['type'] = 'data'; returnObject['metadata'] = {}; returnObject['codec'] = dataMatch[1]; returnObject['codec_option'] = dataMatch[3]; returnObject['codec_name'] = dataMatch[4]; } return returnObject; }