/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.carbondata.processing.newflow; import java.io.File; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.apache.carbondata.common.CarbonIterator; import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; import org.apache.carbondata.core.metadata.CarbonMetadata; import org.apache.carbondata.core.metadata.schema.table.CarbonTable; import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn; import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure; import org.apache.carbondata.core.util.CarbonProperties; import org.apache.carbondata.processing.model.CarbonLoadModel; import org.apache.carbondata.processing.newflow.constants.DataLoadProcessorConstants; import org.apache.carbondata.processing.newflow.sort.SortScopeOptions; import org.apache.carbondata.processing.newflow.steps.CarbonRowDataWriterProcessorStepImpl; import org.apache.carbondata.processing.newflow.steps.DataConverterProcessorStepImpl; import org.apache.carbondata.processing.newflow.steps.DataConverterProcessorWithBucketingStepImpl; import org.apache.carbondata.processing.newflow.steps.DataWriterBatchProcessorStepImpl; import org.apache.carbondata.processing.newflow.steps.DataWriterProcessorStepImpl; import org.apache.carbondata.processing.newflow.steps.InputProcessorStepImpl; import org.apache.carbondata.processing.newflow.steps.SortProcessorStepImpl; import org.apache.carbondata.processing.util.CarbonDataProcessorUtil; /** * It builds the pipe line of steps for loading data to carbon. */ public final class DataLoadProcessBuilder { private static final LogService LOGGER = LogServiceFactory.getLogService(DataLoadProcessBuilder.class.getName()); public AbstractDataLoadProcessorStep build(CarbonLoadModel loadModel, String storeLocation, CarbonIterator[] inputIterators) throws Exception { CarbonDataLoadConfiguration configuration = createConfiguration(loadModel, storeLocation); SortScopeOptions.SortScope sortScope = CarbonDataProcessorUtil.getSortScope(configuration); if (!configuration.isSortTable() || sortScope.equals(SortScopeOptions.SortScope.NO_SORT)) { return buildInternalForNoSort(inputIterators, configuration); } else if (configuration.getBucketingInfo() != null) { return buildInternalForBucketing(inputIterators, configuration); } else if (sortScope.equals(SortScopeOptions.SortScope.BATCH_SORT)) { return buildInternalForBatchSort(inputIterators, configuration); } else { return buildInternal(inputIterators, configuration); } } private AbstractDataLoadProcessorStep buildInternal(CarbonIterator[] inputIterators, CarbonDataLoadConfiguration configuration) { // 1. Reads the data input iterators and parses the data. AbstractDataLoadProcessorStep inputProcessorStep = new InputProcessorStepImpl(configuration, inputIterators); // 2. Converts the data like dictionary or non dictionary or complex objects depends on // data types and configurations. AbstractDataLoadProcessorStep converterProcessorStep = new DataConverterProcessorStepImpl(configuration, inputProcessorStep); // 3. Sorts the data by SortColumn AbstractDataLoadProcessorStep sortProcessorStep = new SortProcessorStepImpl(configuration, converterProcessorStep); // 4. Writes the sorted data in carbondata format. return new DataWriterProcessorStepImpl(configuration, sortProcessorStep); } private AbstractDataLoadProcessorStep buildInternalForNoSort(CarbonIterator[] inputIterators, CarbonDataLoadConfiguration configuration) { // 1. Reads the data input iterators and parses the data. AbstractDataLoadProcessorStep inputProcessorStep = new InputProcessorStepImpl(configuration, inputIterators); // 2. Converts the data like dictionary or non dictionary or complex objects depends on // data types and configurations. AbstractDataLoadProcessorStep converterProcessorStep = new DataConverterProcessorStepImpl(configuration, inputProcessorStep); // 3. Writes the sorted data in carbondata format. AbstractDataLoadProcessorStep writerProcessorStep = new CarbonRowDataWriterProcessorStepImpl(configuration, converterProcessorStep); return writerProcessorStep; } private AbstractDataLoadProcessorStep buildInternalForBatchSort(CarbonIterator[] inputIterators, CarbonDataLoadConfiguration configuration) { // 1. Reads the data input iterators and parses the data. AbstractDataLoadProcessorStep inputProcessorStep = new InputProcessorStepImpl(configuration, inputIterators); // 2. Converts the data like dictionary or non dictionary or complex objects depends on // data types and configurations. AbstractDataLoadProcessorStep converterProcessorStep = new DataConverterProcessorStepImpl(configuration, inputProcessorStep); // 3. Sorts the data by SortColumn or not AbstractDataLoadProcessorStep sortProcessorStep = new SortProcessorStepImpl(configuration, converterProcessorStep); // 4. Writes the sorted data in carbondata format. return new DataWriterBatchProcessorStepImpl(configuration, sortProcessorStep); } private AbstractDataLoadProcessorStep buildInternalForBucketing(CarbonIterator[] inputIterators, CarbonDataLoadConfiguration configuration) throws Exception { // 1. Reads the data input iterators and parses the data. AbstractDataLoadProcessorStep inputProcessorStep = new InputProcessorStepImpl(configuration, inputIterators); // 2. Converts the data like dictionary or non dictionary or complex objects depends on // data types and configurations. AbstractDataLoadProcessorStep converterProcessorStep = new DataConverterProcessorWithBucketingStepImpl(configuration, inputProcessorStep); // 3. Sorts the data by SortColumn or not AbstractDataLoadProcessorStep sortProcessorStep = new SortProcessorStepImpl(configuration, converterProcessorStep); // 4. Writes the sorted data in carbondata format. return new DataWriterProcessorStepImpl(configuration, sortProcessorStep); } private CarbonDataLoadConfiguration createConfiguration(CarbonLoadModel loadModel, String storeLocation) throws Exception { if (!new File(storeLocation).mkdirs()) { LOGGER.error("Error while creating the temp store path: " + storeLocation); } CarbonDataLoadConfiguration configuration = new CarbonDataLoadConfiguration(); String databaseName = loadModel.getDatabaseName(); String tableName = loadModel.getTableName(); String tempLocationKey = CarbonDataProcessorUtil .getTempStoreLocationKey(databaseName, tableName, loadModel.getTaskNo(), false); CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation); CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.STORE_LOCATION_HDFS, loadModel.getStorePath()); CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable(); AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier(); configuration.setTableIdentifier(identifier); configuration.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime()); configuration.setHeader(loadModel.getCsvHeaderColumns()); configuration.setPartitionId(loadModel.getPartitionId()); configuration.setSegmentId(loadModel.getSegmentId()); configuration.setTaskNo(loadModel.getTaskNo()); configuration.setDataLoadProperty(DataLoadProcessorConstants.COMPLEX_DELIMITERS, new String[] { loadModel.getComplexDelimiterLevel1(), loadModel.getComplexDelimiterLevel2() }); configuration.setDataLoadProperty(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT, loadModel.getSerializationNullFormat().split(",")[1]); configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP, loadModel.getFactTimeStamp()); configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ENABLE, loadModel.getBadRecordsLoggerEnable().split(",")[1]); configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ACTION, loadModel.getBadRecordsAction().split(",")[1]); configuration.setDataLoadProperty(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD, loadModel.getIsEmptyDataBadRecord().split(",")[1]); configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_FILE_PATH, loadModel.getFactFilePath()); configuration .setDataLoadProperty(CarbonCommonConstants.LOAD_SORT_SCOPE, loadModel.getSortScope()); configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB, loadModel.getBatchSortSizeInMb()); CarbonMetadata.getInstance().addCarbonTable(carbonTable); List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(carbonTable.getFactTableName()); List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(carbonTable.getFactTableName()); Map<String, String> dateFormatMap = CarbonDataProcessorUtil.getDateFormatMap(loadModel.getDateFormat()); List<DataField> dataFields = new ArrayList<>(); List<DataField> complexDataFields = new ArrayList<>(); // First add dictionary and non dictionary dimensions because these are part of mdk key. // And then add complex data types and measures. for (CarbonColumn column : dimensions) { DataField dataField = new DataField(column); dataField.setDateFormat(dateFormatMap.get(column.getColName())); if (column.isComplex()) { complexDataFields.add(dataField); } else { dataFields.add(dataField); } } dataFields.addAll(complexDataFields); for (CarbonColumn column : measures) { // This dummy measure is added when no measure was present. We no need to load it. if (!(column.getColName().equals("default_dummy_measure"))) { dataFields.add(new DataField(column)); } } configuration.setDataFields(dataFields.toArray(new DataField[dataFields.size()])); configuration.setBucketingInfo(carbonTable.getBucketingInfo(carbonTable.getFactTableName())); // configuration for one pass load: dictionary server info configuration.setUseOnePass(loadModel.getUseOnePass()); configuration.setDictionaryServerHost(loadModel.getDictionaryServerHost()); configuration.setDictionaryServerPort(loadModel.getDictionaryServerPort()); configuration.setPreFetch(loadModel.isPreFetch()); configuration.setNumberOfSortColumns(carbonTable.getNumberOfSortColumns()); configuration.setNumberOfNoDictSortColumns(carbonTable.getNumberOfNoDictSortColumns()); return configuration; } }