var dataframeJoinColumnExprTest = function(file, joinType) { var people = buildPeopleTable(file); var df1 = sqlContext.sql("SELECT name, age FROM people"); var df2 = sqlContext.sql("SELECT name, DOB FROM people"); var colExpr = df1.col("name").equalTo(df2.col("name")); var joinedDf = df1.join(df2, colExpr, joinType); return joinedDf.head().toString(); }
var udf6Test = function() { var SqlTimestamp = require(EclairJS_Globals.NAMESPACE + '/sql/SqlTimestamp'); var fields = []; fields.push(DataTypes.createStructField("day", DataTypes.StringType, true)); fields.push(DataTypes.createStructField("month", DataTypes.StringType, true)); fields.push(DataTypes.createStructField("year", DataTypes.StringType, true)); fields.push(DataTypes.createStructField("hour", DataTypes.StringType, true)); fields.push(DataTypes.createStructField("minute", DataTypes.StringType, true)); fields.push(DataTypes.createStructField("second", DataTypes.StringType, true)); var schema = DataTypes.createStructType(fields); var df = sqlContext.createDataFrame([["26", "6", "1999", "9", "12", "30"]], schema); df.registerTempTable("mytable"); //df.show(); sqlContext.udf().register("udfTest", function(day, month, year, hour, minute, second, SqlTimestamp) { //var SqlTimestamp = require("eclairjs/sql/SqlTimestamp"); //print(day+";"+ month+";"+year+";"+hour+";"+minute+";"+second); var ts = new SqlTimestamp(new Date(year, month-1, day, hour, minute, second)); return ts; }, DataTypes.TimestampType, [SqlTimestamp]); var result = sqlContext.sql("SELECT *, udfTest(mytable.day, mytable.month, mytable.year, mytable.hour, mytable.minute, mytable.second) as timestamp FROM mytable").collect(); return JSON.stringify(result); }
var dataFrameParquetTest = function(file) { var peopleDataFrame = buildPeopleTable(file); var parquetWriter = peopleDataFrame.write(); parquetWriter.mode('overwrite').parquet("/tmp/people.parquet"); var parquetFileDF = sqlContext.read().parquet("/tmp/people.parquet"); parquetFileDF.registerTempTable("parquetFile"); tweenties = sqlContext.sql("SELECT name FROM parquetFile WHERE age >= 20 AND age <= 29"); return JSON.stringify(tweenties.take(10)); }
var udf10Test = function() { createStringTableDF(sqlContext); sqlContext.udf().register("udfTest", function(col1, col2, col3, col4, col5, col6, col7, col8, col9, col10) { return col1 + col2 + col3 + col4 + col5 + col6 + col7 + col8 + col9 + col10; }, DataTypes.StringType); var smt = "SELECT *, " + "udfTest(mytable.col1, mytable.col2, mytable.col3, mytable.col4, mytable.col5, mytable.col6, mytable.col7, mytable.col8, mytable.col9, mytable.col10) " + "as transformedByUDF FROM mytable"; var result = sqlContext.sql(smt).collect(); return JSON.stringify(result); }
var programmaticallySpecifyingSchema = function(file) { var peopleDataFrame = buildPeopleTable(file); // SQL can be run over RDDs that have been registered as tables. var results = sqlContext.sql("SELECT name FROM people"); //The results of SQL queries are DataFrames and support all the normal RDD operations. //The columns of a row in the result can be accessed by ordinal. var names = results.toRDD().map(function(row) { return "Name: " + row.getString(0); }); return names.take(10).toString(); }
var dataframeAggTest = function(file) { var peopleDataFrame = buildPeopleTable(file); // SQL can be run over RDDs that have been registered as tables. var results = sqlContext.sql("SELECT name, age, expense FROM people"); var m = {}; m["age"] = "max"; m["expense"] = "sum"; var x = results.agg(m); var rows = x.take(10); var s = JSON.stringify(rows[0]); return s; }
var udf1Test = function() { var fields = []; fields.push(DataTypes.createStructField("test", DataTypes.StringType, true)); var schema = DataTypes.createStructType(fields); var df = sqlContext.createDataFrame([["test 1"], ["string 2"],["string 3"]], schema); df.registerTempTable("mytable"); sqlContext.udf().register("stringLengthTest", function(str) { return str.length(); }, DataTypes.IntegerType); var result = sqlContext.sql("SELECT *, stringLengthTest(mytable.test) as transformedByUDF FROM mytable").collect(); return JSON.stringify(result); }
var udf3Test = function() { var fields = []; fields.push(DataTypes.createStructField("test", DataTypes.StringType, true)); fields.push(DataTypes.createStructField("item2", DataTypes.IntegerType, true)); fields.push(DataTypes.createStructField("floatNum", DataTypes.DoubleType, true)); var schema = DataTypes.createStructType(fields); var df = sqlContext.createDataFrame([["test 1", 1, 3.0], ["string 2", 2, 1.1],["string 3", 3, 2.2]], schema); df.registerTempTable("mytable"); sqlContext.udf().register("udfTest", function(str, num, floatNum) { return str.length() + num + floatNum; }, DataTypes.FloatType); var result = sqlContext.sql("SELECT *, udfTest(mytable.test, mytable.item2, mytable.floatNum) as transformedByUDF FROM mytable").collect(); return JSON.stringify(result); }
var udf5Test = function() { var fields = []; fields.push(DataTypes.createStructField("test", DataTypes.StringType, true)); fields.push(DataTypes.createStructField("item2", DataTypes.IntegerType, true)); fields.push(DataTypes.createStructField("floatNum", DataTypes.DoubleType, true)); fields.push(DataTypes.createStructField("floatNum2", DataTypes.FloatType, true)); fields.push(DataTypes.createStructField("dob", DataTypes.TimestampType, true)); var schema = DataTypes.createStructType(fields); var df = sqlContext.createDataFrame([["test 1", 1, 3.0, 2.2, new SqlTimestamp("1996-03-07 00:00:00")]], schema); df.registerTempTable("mytable"); sqlContext.udf().register("udfTest", function(str, num, floatNum, floatNum2, ts) { return str +" "+ num +" "+ floatNum +" "+ floatNum2 +" "+ ts; }, DataTypes.StringType); var result = sqlContext.sql("SELECT *, udfTest(mytable.test, mytable.item2, mytable.floatNum, mytable.floatNum2, mytable.dob) as transformedByUDF FROM mytable").collect(); return JSON.stringify(result); }
var udf22Test = function() { createStringTableDF(sqlContext); sqlContext.udf().register("udfTest", function(col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14, col15, col16, col17, col18, col19, col20, col21, col22 ) { return col1 + col2 + col3 + col4 + col5 + col6 + col7 + col8 + col9 + col10 + col11 + col12 + col13 + col14 + col15 + col16 + col17 + col18 + col19 + col20 + col21 + col22; }, DataTypes.StringType); var smt = "SELECT *, " + "udfTest(" + "mytable.col1, mytable.col2, mytable.col3, mytable.col4, mytable.col5, mytable.col6, mytable.col7, " + "mytable.col8, mytable.col9, mytable.col10, mytable.col11, mytable.col12, mytable.col13, mytable.col14," + "mytable.col15, mytable.col16, mytable.col17, mytable.col18, mytable.col19, mytable.col20, mytable.col21, mytable.col22" + ") " + "as transformedByUDF FROM mytable"; var result = sqlContext.sql(smt).collect(); return JSON.stringify(result); }