Пример #1
0
var dataframeJoinColumnExprTest = function(file, joinType) {
	
	var people = buildPeopleTable(file);
	var df1 = sqlContext.sql("SELECT name, age FROM people");
	var df2 = sqlContext.sql("SELECT name, DOB FROM people");

	var colExpr = df1.col("name").equalTo(df2.col("name"));
	var joinedDf = df1.join(df2, colExpr, joinType);
	
	return joinedDf.head().toString();
	
}
var udf6Test = function() {
    var SqlTimestamp = require(EclairJS_Globals.NAMESPACE + '/sql/SqlTimestamp');
    var fields = [];
    fields.push(DataTypes.createStructField("day", DataTypes.StringType, true));
    fields.push(DataTypes.createStructField("month", DataTypes.StringType, true));
    fields.push(DataTypes.createStructField("year", DataTypes.StringType, true));
    fields.push(DataTypes.createStructField("hour", DataTypes.StringType, true));
    fields.push(DataTypes.createStructField("minute", DataTypes.StringType, true));
    fields.push(DataTypes.createStructField("second", DataTypes.StringType, true));
    var schema = DataTypes.createStructType(fields);
    var df = sqlContext.createDataFrame([["26", "6", "1999", "9", "12", "30"]], schema);
    df.registerTempTable("mytable");

    //df.show();
    sqlContext.udf().register("udfTest", function(day, month, year, hour, minute, second, SqlTimestamp) {
        //var SqlTimestamp = require("eclairjs/sql/SqlTimestamp");
        //print(day+";"+ month+";"+year+";"+hour+";"+minute+";"+second);
        var ts = new SqlTimestamp(new Date(year, month-1, day, hour, minute, second));
        return ts;
    }, DataTypes.TimestampType, [SqlTimestamp]);

    var result = sqlContext.sql("SELECT *, udfTest(mytable.day, mytable.month, mytable.year, mytable.hour, mytable.minute, mytable.second) as timestamp FROM mytable").collect();

    return JSON.stringify(result);
}
Пример #3
0
var dataFrameParquetTest = function(file) {
	var peopleDataFrame = buildPeopleTable(file);
	var parquetWriter = peopleDataFrame.write();
	parquetWriter.mode('overwrite').parquet("/tmp/people.parquet");
	var parquetFileDF = sqlContext.read().parquet("/tmp/people.parquet");
	parquetFileDF.registerTempTable("parquetFile");
	tweenties = sqlContext.sql("SELECT name FROM parquetFile WHERE age >= 20 AND age <= 29");
	return JSON.stringify(tweenties.take(10));
}
var udf10Test = function() {

    createStringTableDF(sqlContext);

    sqlContext.udf().register("udfTest", function(col1, col2, col3, col4, col5, col6, col7, col8, col9, col10) {
        return col1 + col2 + col3 + col4 + col5 + col6 + col7 + col8 + col9 + col10;
    }, DataTypes.StringType);
    var smt = "SELECT *, " +
        "udfTest(mytable.col1, mytable.col2, mytable.col3, mytable.col4, mytable.col5, mytable.col6, mytable.col7, mytable.col8,  mytable.col9,  mytable.col10) " +
        "as transformedByUDF FROM mytable";
    var result = sqlContext.sql(smt).collect();
    return JSON.stringify(result);
}
Пример #5
0
var programmaticallySpecifyingSchema = function(file) {

	var peopleDataFrame = buildPeopleTable(file);
	// SQL can be run over RDDs that have been registered as tables.
	var results = sqlContext.sql("SELECT name FROM people");

	//The results of SQL queries are DataFrames and support all the normal RDD operations.
	//The columns of a row in the result can be accessed by ordinal.
	var names = results.toRDD().map(function(row) {
		return "Name: " + row.getString(0);
	});

    return names.take(10).toString();
}
Пример #6
0
var dataframeAggTest = function(file) {
	var peopleDataFrame = buildPeopleTable(file);
	// SQL can be run over RDDs that have been registered as tables.
	var results = sqlContext.sql("SELECT name, age, expense FROM people");

	var m = {};
	m["age"] = "max";
	m["expense"] =  "sum";
	var x = results.agg(m);
	var rows = x.take(10);
	var s = JSON.stringify(rows[0]);

	return s;
	
}
var udf1Test = function() {

    var fields = [];
    fields.push(DataTypes.createStructField("test", DataTypes.StringType, true));
    var schema = DataTypes.createStructType(fields);
    var df = sqlContext.createDataFrame([["test 1"], ["string 2"],["string 3"]], schema);
    df.registerTempTable("mytable");

    sqlContext.udf().register("stringLengthTest", function(str) {
            return str.length();
    }, DataTypes.IntegerType);

    var result = sqlContext.sql("SELECT *, stringLengthTest(mytable.test) as transformedByUDF FROM mytable").collect();

    return JSON.stringify(result);
}
var udf3Test = function() {

    var fields = [];
    fields.push(DataTypes.createStructField("test", DataTypes.StringType, true));
    fields.push(DataTypes.createStructField("item2", DataTypes.IntegerType, true));
    fields.push(DataTypes.createStructField("floatNum", DataTypes.DoubleType, true));
    var schema = DataTypes.createStructType(fields);
    var df = sqlContext.createDataFrame([["test 1", 1, 3.0], ["string 2", 2, 1.1],["string 3", 3, 2.2]], schema);
    df.registerTempTable("mytable");

    sqlContext.udf().register("udfTest", function(str, num, floatNum) {
        return str.length() + num + floatNum;
    }, DataTypes.FloatType);

    var result = sqlContext.sql("SELECT *, udfTest(mytable.test, mytable.item2, mytable.floatNum) as transformedByUDF FROM mytable").collect();

    return JSON.stringify(result);
}
var udf5Test = function() {

    var fields = [];
    fields.push(DataTypes.createStructField("test", DataTypes.StringType, true));
    fields.push(DataTypes.createStructField("item2", DataTypes.IntegerType, true));
    fields.push(DataTypes.createStructField("floatNum", DataTypes.DoubleType, true));
    fields.push(DataTypes.createStructField("floatNum2", DataTypes.FloatType, true));
    fields.push(DataTypes.createStructField("dob", DataTypes.TimestampType, true));
    var schema = DataTypes.createStructType(fields);
    var df = sqlContext.createDataFrame([["test 1", 1, 3.0, 2.2, new SqlTimestamp("1996-03-07 00:00:00")]], schema);
    df.registerTempTable("mytable");

    sqlContext.udf().register("udfTest", function(str, num, floatNum, floatNum2, ts) {
        return str +" "+ num +" "+ floatNum +" "+ floatNum2 +" "+ ts;
    }, DataTypes.StringType);

    var result = sqlContext.sql("SELECT *, udfTest(mytable.test, mytable.item2, mytable.floatNum, mytable.floatNum2, mytable.dob) as transformedByUDF FROM mytable").collect();

    return JSON.stringify(result);
}
var udf22Test = function() {

    createStringTableDF(sqlContext);

    sqlContext.udf().register("udfTest",
        function(col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11,
                 col12, col13, col14, col15, col16, col17, col18, col19, col20, col21, col22
        ) {
            return col1 + col2 + col3 + col4 + col5 + col6 + col7 + col8 + col9 + col10 + col11 + col12 + col13 + col14 +
                col15 + col16 + col17 + col18 + col19 + col20 + col21 + col22;
        }, DataTypes.StringType);
    var smt = "SELECT *, " +
        "udfTest(" +
        "mytable.col1, mytable.col2, mytable.col3, mytable.col4, mytable.col5, mytable.col6, mytable.col7, " +
        "mytable.col8,  mytable.col9,  mytable.col10, mytable.col11,  mytable.col12, mytable.col13, mytable.col14," +
        "mytable.col15, mytable.col16, mytable.col17, mytable.col18, mytable.col19, mytable.col20, mytable.col21, mytable.col22" +
        ") " +
        "as transformedByUDF FROM mytable";
    var result = sqlContext.sql(smt).collect();
    return JSON.stringify(result);
}