diff --git a/data-modeling/src/main/resources/dbToMiddleLevel.xlsx b/data-modeling/src/main/resources/dbToMiddleLevel.xlsx new file mode 100644 index 0000000..2693b01 Binary files /dev/null and b/data-modeling/src/main/resources/dbToMiddleLevel.xlsx differ diff --git a/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/cli/Command.scala b/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/cli/Command.scala index 91ba0cf..7f6733b 100644 --- a/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/cli/Command.scala +++ b/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/cli/Command.scala @@ -4,6 +4,7 @@ import com.github.sharpdata.sharpetl.modeling.excel.parser.DwdTableParser import com.github.sharpdata.sharpetl.core.cli.{BatchJobCommand, CommonCommand} import com.github.sharpdata.sharpetl.core.util.IOUtil.getFullPath import com.github.sharpdata.sharpetl.core.util.{ETLLogger, IOUtil} +import com.github.sharpdata.sharpetl.modeling.formatConversion.createSqlParser.createTableDDLList import com.github.sharpdata.sharpetl.modeling.sql.gen.DwdWorkflowGen.genWorkflow import picocli.CommandLine @@ -93,3 +94,46 @@ class GenerateDwdStepCommand extends BatchJobCommand { }) } } + +@CommandLine.Command(name = "generate-ods-ddl") +class GenerateSqlAutomateGenerateFiles extends Runnable { + @CommandLine.Option( + names = Array("-f", "--file"), + description = Array("Excel file path"), + required = true + ) + var filePath: String = _ + + @CommandLine.Option( + names = Array("-h", "--help"), + usageHelp = true, + description = Array("Sample parameters: -f=/path/to/config.xlsx") + ) + var helpRequested = false + + @CommandLine.Option( + names = Array("--output"), + required = true, + description = Array("Write to sql file path") + ) + var output: String = _ + + + override def run(): Unit = { + val createSql = createTableDDLList(filePath) + createSql.foreach(it => { + val workflowName = s"create_${it._1}" + writeFile(workflowName, it._2) + }) + } + + + def writeFile(filename: String, sqlContent: String): Unit = { + val path = getFullPath(output) + val file = new File(s"$path/$filename.sql") + ETLLogger.info(s"Write sql file to $file") + val sqlWriter = new BufferedWriter(new FileWriter(file)) + sqlWriter.write(sqlContent) + sqlWriter.close() + } +} diff --git a/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/excel/model/OdsTable.scala b/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/excel/model/OdsTable.scala index 397d6b9..1436422 100644 --- a/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/excel/model/OdsTable.scala +++ b/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/excel/model/OdsTable.scala @@ -28,6 +28,7 @@ object OdsModelingSheetHeader { val COLUMN_TYPE = "column_type" val INCREMENTAL_COLUMN = "incremental_column" val PRIMARY_COLUMN = "is_PK" + val IS_NULLABLE = "is_nullable" val TARGET_COLUMN = "target_column" @@ -59,6 +60,7 @@ object OdsTable { targetTable: String, sourceColumn: String, targetColumn: String, + sourceType: String, extraColumnExpression: String, incrementalColumn: Boolean, primaryKeyColumn: Boolean) diff --git a/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/excel/parser/OdsTableParser.scala b/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/excel/parser/OdsTableParser.scala index 69a79c3..7d88145 100644 --- a/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/excel/parser/OdsTableParser.scala +++ b/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/excel/parser/OdsTableParser.scala @@ -85,6 +85,7 @@ object OdsTableParser { targetTable = getStringCellOrNull(TARGET_TABLE, row), sourceColumn = getStringCellOrNull(SOURCE_COLUMN, row), targetColumn = getStringCellOrNull(TARGET_COLUMN, row), + sourceType = getStringCellOrNull(COLUMN_TYPE,row), extraColumnExpression = getStringCellOrNull(EXTRA_COLUMN_EXPRESSION, row), incrementalColumn = getBoolCell(INCREMENTAL_COLUMN, row), primaryKeyColumn = getBoolCell(PRIMARY_COLUMN, row) diff --git a/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/formatConversion/model/DbType.scala b/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/formatConversion/model/DbType.scala new file mode 100644 index 0000000..5034690 --- /dev/null +++ b/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/formatConversion/model/DbType.scala @@ -0,0 +1,21 @@ +package com.github.sharpdata.sharpetl.modeling.formatConversion.model + +object DbType { + val ORACLE = "oracle" + val SQL_SERVER = "sql_server" + val MSSQL = "mssql" + val HIVE = "hive" + val CLICKHOUSE = "clickhouse" + val POSTGRES = "postgres" + val REDSHIFT = "redshift" + val MYSQL = "mysql" +} + +object OdsTablePartial { + final case class ModelingColumn(source: String, target: String) +} + +object TableConfigSheetHeader { + val SOURCE = "source" + val TARGET = "target" +} diff --git a/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/formatConversion/parser/createSqlParser.scala b/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/formatConversion/parser/createSqlParser.scala new file mode 100644 index 0000000..47abca4 --- /dev/null +++ b/data-modeling/src/main/scala/com/github/sharpdata/sharpetl/modeling/formatConversion/parser/createSqlParser.scala @@ -0,0 +1,107 @@ +package com.github.sharpdata.sharpetl.modeling.formatConversion + +import com.github.sharpdata.sharpetl.core.util.ExcelUtil.{getBoolCell, getStringCellOrNull, readHeaders, readSheet} +import com.github.sharpdata.sharpetl.modeling.excel.model.OdsModelingSheetHeader.{COLUMN_TYPE, EXTRA_COLUMN_EXPRESSION, INCREMENTAL_COLUMN, + ODS_MODELING_SHEET_NAME, PRIMARY_COLUMN, SOURCE_COLUMN, TARGET_COLUMN} +import com.github.sharpdata.sharpetl.modeling.excel.model.OdsTable.OdsModelingColumn +import com.github.sharpdata.sharpetl.modeling.excel.model.OdsTableConfigSheetHeader +import com.github.sharpdata.sharpetl.modeling.excel.model.OdsTableConfigSheetHeader.{ODS_TABLE_CONFIG_SHEET_NAME, SOURCE_TABLE, TARGET_TABLE, TARGET_TYPE} +import com.github.sharpdata.sharpetl.modeling.formatConversion.model.TableConfigSheetHeader +import org.apache.poi.ss.usermodel.Row + +object createSqlParser { + val typeConversionFilePath: String = this + .getClass + .getClassLoader + .getResource("dbToMiddleLevel.xlsx") + .getPath + + val typeTestPath: String = "~/Desktop/dbToMiddleLevel.xlsx" + + def readDbFieldType(filePath: String): (Map[String, String], Map[String, String]) = { + val tableConfigSheet = readSheet(filePath, ODS_TABLE_CONFIG_SHEET_NAME) + implicit val headersOds: Map[String, Int] = readHeaders(tableConfigSheet.head) + val (sourceDbType, targetDbType) = tableConfigSheet + .tail + .map(rowExtractDdType(headersOds)).head + (readTableConfig(sourceDbType), readTableConfig(targetDbType)) + } + + def createTableDDLList(filePath: String): List[(String, String)] = { + val sourceFieldTypeList = getSourceFieldType(filePath) + sourceFieldTypeList.map(it => { + val targetTableName = it._1._2 + val odsModelingColumnSeq = it._2 + (targetTableName, createTableDDL(targetTableName, odsModelingColumnSeq, filePath)) + }).toList + } + + + def createTableDDL(targetTableName:String, odsModelingColumnSeq: Seq[OdsModelingColumn],filePath:String):String= { + val targetColumns = odsModelingColumnSeq.map(_.targetColumn) + val fromSource = odsModelingColumnSeq.map(_.sourceType) + val (sourceToMiddle, middleToTarget) = readDbFieldType(filePath) + val sourceTypeAndLength = fromSource.map(it => { + val sourceTypeList = it.split('(') + val typeLength: String = it.replace(sourceTypeList(0), "") + (sourceTypeList(0), typeLength) + }) + sqlCreate(targetTableName,sourceTypeAndLength, sourceToMiddle, middleToTarget,targetColumns) + } + + def sqlCreate(targetTableName:String ,sourceTypeAndLength: Seq[(String, String)], sourceToMiddle: Map[String, String], + middleToTarget: Map[String, String],targetColumns:Seq[String]):String = { + val fromSourceType = sourceTypeAndLength.map(_._1) + val fromSourceLength = sourceTypeAndLength.map(_._2) + val targetColWithType = fromSourceType + .map(sourceToMiddle.get(_).head) + .map(middleToTarget.get(_).head) + .zip(fromSourceLength) + .map(it => s"""${it._1}${it._2}""") + .zip(targetColumns) + val columns = targetColWithType.map(it => s"${it._2} ${it._1}").mkString(",\n") + s"""create table $targetTableName( + |=$columns + |)""".stripMargin + } + + def getSourceFieldType(filePath: String): Map[(String,String),Seq[OdsModelingColumn]] = { + val modelingSheet = readSheet(filePath, ODS_MODELING_SHEET_NAME) + implicit val headers: Map[String, Int] = readHeaders(modelingSheet.head) + modelingSheet + .tail + .map(row => OdsModelingColumn( + sourceTable = getStringCellOrNull(SOURCE_TABLE, row), + targetTable = getStringCellOrNull(TARGET_TABLE, row), + sourceColumn = getStringCellOrNull(SOURCE_COLUMN, row), + targetColumn = getStringCellOrNull(TARGET_COLUMN, row), + sourceType = getStringCellOrNull(COLUMN_TYPE, row), + extraColumnExpression = getStringCellOrNull(EXTRA_COLUMN_EXPRESSION, row), + incrementalColumn = getBoolCell(INCREMENTAL_COLUMN, row), + primaryKeyColumn = getBoolCell(PRIMARY_COLUMN, row))) + .groupBy(it=>(it.sourceTable,it.targetTable)) + } + + def readTableConfig(dbName: String): Map[String, String] = { + val ModelingSheet = readSheet(typeTestPath, dbName) + implicit val headers: Map[String, Int] = readHeaders(ModelingSheet.head) + ModelingSheet + .tail + .map(rowToColumn).toMap + } + + private def rowToColumn(implicit headers: Map[String, Int]): Row => (String, String) = { + row => + ( + getStringCellOrNull(TableConfigSheetHeader.SOURCE, row), + getStringCellOrNull(TableConfigSheetHeader.TARGET, row) + ) + } + + private def rowExtractDdType(implicit headers: Map[String, Int]): Row => (String, String) = { + row => + (s"""${getStringCellOrNull(OdsTableConfigSheetHeader.SOURCE_TYPE, row)}ToMiddle""", + s"""MiddleTo${getStringCellOrNull(OdsTableConfigSheetHeader.TARGET_TYPE, row)}""" + ) + } +} diff --git a/data-modeling/src/test/resources/copyOfOds.xlsx b/data-modeling/src/test/resources/copyOfOds.xlsx new file mode 100644 index 0000000..770e9f9 Binary files /dev/null and b/data-modeling/src/test/resources/copyOfOds.xlsx differ diff --git a/data-modeling/src/test/scala/com/github/sharpdata/sharpetl/modeling/excel/parser/tableParserSpec.scala b/data-modeling/src/test/scala/com/github/sharpdata/sharpetl/modeling/excel/parser/tableParserSpec.scala new file mode 100644 index 0000000..ae43506 --- /dev/null +++ b/data-modeling/src/test/scala/com/github/sharpdata/sharpetl/modeling/excel/parser/tableParserSpec.scala @@ -0,0 +1,19 @@ +package com.github.sharpdata.sharpetl.modeling.excel.parser + +import com.github.sharpdata.sharpetl.modeling.formatConversion.createSqlParser.{createTableDDL, createTableDDLList} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should + +class tableParserSpec extends AnyFlatSpec with should.Matchers { + + it should "create change" in { + val filePath = this + .getClass + .getClassLoader + .getResource("copyOfOds.xlsx") + .getPath + createTableDDLList(filePath).head._2 should be ("create table t_order(\norder_sn varchar(128),\nproduct_code varchar(128),\nproduct_name varchar(128),\nproduct_version varchar(128),\nproduct_status varchar(128),\nuser_code varchar(128),\nuser_name varchar(128),\nuser_age int,\nuser_address varchar(128),\nproduct_count int,\nprice decimal(10, 4),\ndiscount decimal(10, 4),\norder_status varchar(128),\norder_create_time timestamp,\norder_update_time timestamp\n)") + createTableDDLList(filePath)(1)._2 should be ("create table t_user(\nuser_code varchar(128),\nuser_name varchar(128),\nuser_age int,\nuser_address varchar(128),\ncreate_time timestamp,\nupdate_time timestamp\n)") + createTableDDLList(filePath)(2)._2 should be ("create table t_product(\nproduct_code varchar(128),\nproduct_name varchar(128),\nproduct_version varchar(128),\nproduct_status varchar(128),\ncreate_time timestamp,\nupdate_time timestamp\n)") + } +} diff --git a/spark/src/main/scala/com/github/sharpdata/sharpetl/spark/cli/Command.scala b/spark/src/main/scala/com/github/sharpdata/sharpetl/spark/cli/Command.scala index a0d0b37..3bb163b 100644 --- a/spark/src/main/scala/com/github/sharpdata/sharpetl/spark/cli/Command.scala +++ b/spark/src/main/scala/com/github/sharpdata/sharpetl/spark/cli/Command.scala @@ -1,6 +1,6 @@ package com.github.sharpdata.sharpetl.spark.cli -import com.github.sharpdata.sharpetl.modeling.cli.{GenerateDwdStepCommand, GenerateSqlFiles} +import com.github.sharpdata.sharpetl.modeling.cli.{GenerateDwdStepCommand, GenerateSqlAutomateGenerateFiles, GenerateSqlFiles} import com.github.sharpdata.sharpetl.spark.utils.JavaVersionChecker import com.github.sharpdata.sharpetl.core.api.WfEvalResult.throwFirstException import com.github.sharpdata.sharpetl.core.api.{LogDrivenInterpreter, WfEvalResult} @@ -93,7 +93,9 @@ class BatchSparkJobCommand extends BatchJobCommand { classOf[BatchSparkJobCommand], classOf[GenerateSqlFiles], classOf[EncryptionCommand], - classOf[GenerateDwdStepCommand] + classOf[GenerateDwdStepCommand], + classOf[GenerateSqlAutomateGenerateFiles] + ) ) class Command extends Runnable {