{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE StrictData #-} {-# LANGUAGE NoImplicitPrelude #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} {-# OPTIONS_GHC -fno-warn-unused-matches #-} -- Derived from AWS service descriptions, licensed under Apache 2.0. -- | -- Module : Amazonka.MachineLearning.Types.RedshiftDataSpec -- Copyright : (c) 2013-2021 Brendan Hay -- License : Mozilla Public License, v. 2.0. -- Maintainer : Brendan Hay <brendan.g.hay+amazonka@gmail.com> -- Stability : auto-generated -- Portability : non-portable (GHC extensions) module Amazonka.MachineLearning.Types.RedshiftDataSpec where import qualified Amazonka.Core as Core import qualified Amazonka.Lens as Lens import Amazonka.MachineLearning.Types.RedshiftDatabase import Amazonka.MachineLearning.Types.RedshiftDatabaseCredentials import qualified Amazonka.Prelude as Prelude -- | Describes the data specification of an Amazon Redshift @DataSource@. -- -- /See:/ 'newRedshiftDataSpec' smart constructor. data RedshiftDataSpec = RedshiftDataSpec' { -- | Describes the schema location for an Amazon Redshift @DataSource@. RedshiftDataSpec -> Maybe Text dataSchemaUri :: Prelude.Maybe Prelude.Text, -- | A JSON string that represents the schema for an Amazon Redshift -- @DataSource@. The @DataSchema@ defines the structure of the observation -- data in the data file(s) referenced in the @DataSource@. -- -- A @DataSchema@ is not required if you specify a @DataSchemaUri@. -- -- Define your @DataSchema@ as a series of key-value pairs. @attributes@ -- and @excludedVariableNames@ have an array of key-value pairs for their -- value. Use the following format to define your @DataSchema@. -- -- { \"version\": \"1.0\", -- -- \"recordAnnotationFieldName\": \"F1\", -- -- \"recordWeightFieldName\": \"F2\", -- -- \"targetFieldName\": \"F3\", -- -- \"dataFormat\": \"CSV\", -- -- \"dataFileContainsHeader\": true, -- -- \"attributes\": [ -- -- { \"fieldName\": \"F1\", \"fieldType\": \"TEXT\" }, { \"fieldName\": -- \"F2\", \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F3\", -- \"fieldType\": \"CATEGORICAL\" }, { \"fieldName\": \"F4\", -- \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F5\", \"fieldType\": -- \"CATEGORICAL\" }, { \"fieldName\": \"F6\", \"fieldType\": \"TEXT\" }, { -- \"fieldName\": \"F7\", \"fieldType\": \"WEIGHTED_INT_SEQUENCE\" }, { -- \"fieldName\": \"F8\", \"fieldType\": \"WEIGHTED_STRING_SEQUENCE\" } ], -- -- \"excludedVariableNames\": [ \"F6\" ] } RedshiftDataSpec -> Maybe Text dataSchema :: Prelude.Maybe Prelude.Text, -- | A JSON string that represents the splitting and rearrangement processing -- to be applied to a @DataSource@. If the @DataRearrangement@ parameter is -- not provided, all of the input data is used to create the @Datasource@. -- -- There are multiple parameters that control what data is used to create a -- datasource: -- -- - __@percentBegin@__ -- -- Use @percentBegin@ to indicate the beginning of the range of the -- data used to create the Datasource. If you do not include -- @percentBegin@ and @percentEnd@, Amazon ML includes all of the data -- when creating the datasource. -- -- - __@percentEnd@__ -- -- Use @percentEnd@ to indicate the end of the range of the data used -- to create the Datasource. If you do not include @percentBegin@ and -- @percentEnd@, Amazon ML includes all of the data when creating the -- datasource. -- -- - __@complement@__ -- -- The @complement@ parameter instructs Amazon ML to use the data that -- is not included in the range of @percentBegin@ to @percentEnd@ to -- create a datasource. The @complement@ parameter is useful if you -- need to create complementary datasources for training and -- evaluation. To create a complementary datasource, use the same -- values for @percentBegin@ and @percentEnd@, along with the -- @complement@ parameter. -- -- For example, the following two datasources do not share any data, -- and can be used to train and evaluate a model. The first datasource -- has 25 percent of the data, and the second one has 75 percent of the -- data. -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25, \"complement\":\"true\"}}@ -- -- - __@strategy@__ -- -- To change how Amazon ML splits the data for a datasource, use the -- @strategy@ parameter. -- -- The default value for the @strategy@ parameter is @sequential@, -- meaning that Amazon ML takes all of the data records between the -- @percentBegin@ and @percentEnd@ parameters for the datasource, in -- the order that the records appear in the input data. -- -- The following two @DataRearrangement@ lines are examples of -- sequentially ordered training and evaluation datasources: -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\"}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\", \"complement\":\"true\"}}@ -- -- To randomly split the input data into the proportions indicated by -- the percentBegin and percentEnd parameters, set the @strategy@ -- parameter to @random@ and provide a string that is used as the seed -- value for the random data splitting (for example, you can use the S3 -- path to your data as the random seed string). If you choose the -- random split strategy, Amazon ML assigns each row of data a -- pseudo-random number between 0 and 100, and then selects the rows -- that have an assigned number between @percentBegin@ and -- @percentEnd@. Pseudo-random numbers are assigned using both the -- input seed string value and the byte offset as a seed, so changing -- the data results in a different split. Any existing ordering is -- preserved. The random splitting strategy ensures that variables in -- the training and evaluation data are distributed similarly. It is -- useful in the cases where the input data may have an implicit sort -- order, which would otherwise result in training and evaluation -- datasources containing non-similar data records. -- -- The following two @DataRearrangement@ lines are examples of -- non-sequentially ordered training and evaluation datasources: -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\"}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\", \"complement\":\"true\"}}@ RedshiftDataSpec -> Maybe Text dataRearrangement :: Prelude.Maybe Prelude.Text, -- | Describes the @DatabaseName@ and @ClusterIdentifier@ for an Amazon -- Redshift @DataSource@. RedshiftDataSpec -> RedshiftDatabase databaseInformation :: RedshiftDatabase, -- | Describes the SQL Query to execute on an Amazon Redshift database for an -- Amazon Redshift @DataSource@. RedshiftDataSpec -> Text selectSqlQuery :: Prelude.Text, -- | Describes AWS Identity and Access Management (IAM) credentials that are -- used connect to the Amazon Redshift database. RedshiftDataSpec -> RedshiftDatabaseCredentials databaseCredentials :: RedshiftDatabaseCredentials, -- | Describes an Amazon S3 location to store the result set of the -- @SelectSqlQuery@ query. RedshiftDataSpec -> Text s3StagingLocation :: Prelude.Text } deriving (RedshiftDataSpec -> RedshiftDataSpec -> Bool (RedshiftDataSpec -> RedshiftDataSpec -> Bool) -> (RedshiftDataSpec -> RedshiftDataSpec -> Bool) -> Eq RedshiftDataSpec forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a /= :: RedshiftDataSpec -> RedshiftDataSpec -> Bool $c/= :: RedshiftDataSpec -> RedshiftDataSpec -> Bool == :: RedshiftDataSpec -> RedshiftDataSpec -> Bool $c== :: RedshiftDataSpec -> RedshiftDataSpec -> Bool Prelude.Eq, ReadPrec [RedshiftDataSpec] ReadPrec RedshiftDataSpec Int -> ReadS RedshiftDataSpec ReadS [RedshiftDataSpec] (Int -> ReadS RedshiftDataSpec) -> ReadS [RedshiftDataSpec] -> ReadPrec RedshiftDataSpec -> ReadPrec [RedshiftDataSpec] -> Read RedshiftDataSpec forall a. (Int -> ReadS a) -> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a readListPrec :: ReadPrec [RedshiftDataSpec] $creadListPrec :: ReadPrec [RedshiftDataSpec] readPrec :: ReadPrec RedshiftDataSpec $creadPrec :: ReadPrec RedshiftDataSpec readList :: ReadS [RedshiftDataSpec] $creadList :: ReadS [RedshiftDataSpec] readsPrec :: Int -> ReadS RedshiftDataSpec $creadsPrec :: Int -> ReadS RedshiftDataSpec Prelude.Read, Int -> RedshiftDataSpec -> ShowS [RedshiftDataSpec] -> ShowS RedshiftDataSpec -> String (Int -> RedshiftDataSpec -> ShowS) -> (RedshiftDataSpec -> String) -> ([RedshiftDataSpec] -> ShowS) -> Show RedshiftDataSpec forall a. (Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a showList :: [RedshiftDataSpec] -> ShowS $cshowList :: [RedshiftDataSpec] -> ShowS show :: RedshiftDataSpec -> String $cshow :: RedshiftDataSpec -> String showsPrec :: Int -> RedshiftDataSpec -> ShowS $cshowsPrec :: Int -> RedshiftDataSpec -> ShowS Prelude.Show, (forall x. RedshiftDataSpec -> Rep RedshiftDataSpec x) -> (forall x. Rep RedshiftDataSpec x -> RedshiftDataSpec) -> Generic RedshiftDataSpec forall x. Rep RedshiftDataSpec x -> RedshiftDataSpec forall x. RedshiftDataSpec -> Rep RedshiftDataSpec x forall a. (forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a $cto :: forall x. Rep RedshiftDataSpec x -> RedshiftDataSpec $cfrom :: forall x. RedshiftDataSpec -> Rep RedshiftDataSpec x Prelude.Generic) -- | -- Create a value of 'RedshiftDataSpec' with all optional fields omitted. -- -- Use <https://hackage.haskell.org/package/generic-lens generic-lens> or <https://hackage.haskell.org/package/optics optics> to modify other optional fields. -- -- The following record fields are available, with the corresponding lenses provided -- for backwards compatibility: -- -- 'dataSchemaUri', 'redshiftDataSpec_dataSchemaUri' - Describes the schema location for an Amazon Redshift @DataSource@. -- -- 'dataSchema', 'redshiftDataSpec_dataSchema' - A JSON string that represents the schema for an Amazon Redshift -- @DataSource@. The @DataSchema@ defines the structure of the observation -- data in the data file(s) referenced in the @DataSource@. -- -- A @DataSchema@ is not required if you specify a @DataSchemaUri@. -- -- Define your @DataSchema@ as a series of key-value pairs. @attributes@ -- and @excludedVariableNames@ have an array of key-value pairs for their -- value. Use the following format to define your @DataSchema@. -- -- { \"version\": \"1.0\", -- -- \"recordAnnotationFieldName\": \"F1\", -- -- \"recordWeightFieldName\": \"F2\", -- -- \"targetFieldName\": \"F3\", -- -- \"dataFormat\": \"CSV\", -- -- \"dataFileContainsHeader\": true, -- -- \"attributes\": [ -- -- { \"fieldName\": \"F1\", \"fieldType\": \"TEXT\" }, { \"fieldName\": -- \"F2\", \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F3\", -- \"fieldType\": \"CATEGORICAL\" }, { \"fieldName\": \"F4\", -- \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F5\", \"fieldType\": -- \"CATEGORICAL\" }, { \"fieldName\": \"F6\", \"fieldType\": \"TEXT\" }, { -- \"fieldName\": \"F7\", \"fieldType\": \"WEIGHTED_INT_SEQUENCE\" }, { -- \"fieldName\": \"F8\", \"fieldType\": \"WEIGHTED_STRING_SEQUENCE\" } ], -- -- \"excludedVariableNames\": [ \"F6\" ] } -- -- 'dataRearrangement', 'redshiftDataSpec_dataRearrangement' - A JSON string that represents the splitting and rearrangement processing -- to be applied to a @DataSource@. If the @DataRearrangement@ parameter is -- not provided, all of the input data is used to create the @Datasource@. -- -- There are multiple parameters that control what data is used to create a -- datasource: -- -- - __@percentBegin@__ -- -- Use @percentBegin@ to indicate the beginning of the range of the -- data used to create the Datasource. If you do not include -- @percentBegin@ and @percentEnd@, Amazon ML includes all of the data -- when creating the datasource. -- -- - __@percentEnd@__ -- -- Use @percentEnd@ to indicate the end of the range of the data used -- to create the Datasource. If you do not include @percentBegin@ and -- @percentEnd@, Amazon ML includes all of the data when creating the -- datasource. -- -- - __@complement@__ -- -- The @complement@ parameter instructs Amazon ML to use the data that -- is not included in the range of @percentBegin@ to @percentEnd@ to -- create a datasource. The @complement@ parameter is useful if you -- need to create complementary datasources for training and -- evaluation. To create a complementary datasource, use the same -- values for @percentBegin@ and @percentEnd@, along with the -- @complement@ parameter. -- -- For example, the following two datasources do not share any data, -- and can be used to train and evaluate a model. The first datasource -- has 25 percent of the data, and the second one has 75 percent of the -- data. -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25, \"complement\":\"true\"}}@ -- -- - __@strategy@__ -- -- To change how Amazon ML splits the data for a datasource, use the -- @strategy@ parameter. -- -- The default value for the @strategy@ parameter is @sequential@, -- meaning that Amazon ML takes all of the data records between the -- @percentBegin@ and @percentEnd@ parameters for the datasource, in -- the order that the records appear in the input data. -- -- The following two @DataRearrangement@ lines are examples of -- sequentially ordered training and evaluation datasources: -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\"}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\", \"complement\":\"true\"}}@ -- -- To randomly split the input data into the proportions indicated by -- the percentBegin and percentEnd parameters, set the @strategy@ -- parameter to @random@ and provide a string that is used as the seed -- value for the random data splitting (for example, you can use the S3 -- path to your data as the random seed string). If you choose the -- random split strategy, Amazon ML assigns each row of data a -- pseudo-random number between 0 and 100, and then selects the rows -- that have an assigned number between @percentBegin@ and -- @percentEnd@. Pseudo-random numbers are assigned using both the -- input seed string value and the byte offset as a seed, so changing -- the data results in a different split. Any existing ordering is -- preserved. The random splitting strategy ensures that variables in -- the training and evaluation data are distributed similarly. It is -- useful in the cases where the input data may have an implicit sort -- order, which would otherwise result in training and evaluation -- datasources containing non-similar data records. -- -- The following two @DataRearrangement@ lines are examples of -- non-sequentially ordered training and evaluation datasources: -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\"}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\", \"complement\":\"true\"}}@ -- -- 'databaseInformation', 'redshiftDataSpec_databaseInformation' - Describes the @DatabaseName@ and @ClusterIdentifier@ for an Amazon -- Redshift @DataSource@. -- -- 'selectSqlQuery', 'redshiftDataSpec_selectSqlQuery' - Describes the SQL Query to execute on an Amazon Redshift database for an -- Amazon Redshift @DataSource@. -- -- 'databaseCredentials', 'redshiftDataSpec_databaseCredentials' - Describes AWS Identity and Access Management (IAM) credentials that are -- used connect to the Amazon Redshift database. -- -- 's3StagingLocation', 'redshiftDataSpec_s3StagingLocation' - Describes an Amazon S3 location to store the result set of the -- @SelectSqlQuery@ query. newRedshiftDataSpec :: -- | 'databaseInformation' RedshiftDatabase -> -- | 'selectSqlQuery' Prelude.Text -> -- | 'databaseCredentials' RedshiftDatabaseCredentials -> -- | 's3StagingLocation' Prelude.Text -> RedshiftDataSpec newRedshiftDataSpec :: RedshiftDatabase -> Text -> RedshiftDatabaseCredentials -> Text -> RedshiftDataSpec newRedshiftDataSpec RedshiftDatabase pDatabaseInformation_ Text pSelectSqlQuery_ RedshiftDatabaseCredentials pDatabaseCredentials_ Text pS3StagingLocation_ = RedshiftDataSpec' :: Maybe Text -> Maybe Text -> Maybe Text -> RedshiftDatabase -> Text -> RedshiftDatabaseCredentials -> Text -> RedshiftDataSpec RedshiftDataSpec' { $sel:dataSchemaUri:RedshiftDataSpec' :: Maybe Text dataSchemaUri = Maybe Text forall a. Maybe a Prelude.Nothing, $sel:dataSchema:RedshiftDataSpec' :: Maybe Text dataSchema = Maybe Text forall a. Maybe a Prelude.Nothing, $sel:dataRearrangement:RedshiftDataSpec' :: Maybe Text dataRearrangement = Maybe Text forall a. Maybe a Prelude.Nothing, $sel:databaseInformation:RedshiftDataSpec' :: RedshiftDatabase databaseInformation = RedshiftDatabase pDatabaseInformation_, $sel:selectSqlQuery:RedshiftDataSpec' :: Text selectSqlQuery = Text pSelectSqlQuery_, $sel:databaseCredentials:RedshiftDataSpec' :: RedshiftDatabaseCredentials databaseCredentials = RedshiftDatabaseCredentials pDatabaseCredentials_, $sel:s3StagingLocation:RedshiftDataSpec' :: Text s3StagingLocation = Text pS3StagingLocation_ } -- | Describes the schema location for an Amazon Redshift @DataSource@. redshiftDataSpec_dataSchemaUri :: Lens.Lens' RedshiftDataSpec (Prelude.Maybe Prelude.Text) redshiftDataSpec_dataSchemaUri :: (Maybe Text -> f (Maybe Text)) -> RedshiftDataSpec -> f RedshiftDataSpec redshiftDataSpec_dataSchemaUri = (RedshiftDataSpec -> Maybe Text) -> (RedshiftDataSpec -> Maybe Text -> RedshiftDataSpec) -> Lens RedshiftDataSpec RedshiftDataSpec (Maybe Text) (Maybe Text) forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RedshiftDataSpec' {Maybe Text dataSchemaUri :: Maybe Text $sel:dataSchemaUri:RedshiftDataSpec' :: RedshiftDataSpec -> Maybe Text dataSchemaUri} -> Maybe Text dataSchemaUri) (\s :: RedshiftDataSpec s@RedshiftDataSpec' {} Maybe Text a -> RedshiftDataSpec s {$sel:dataSchemaUri:RedshiftDataSpec' :: Maybe Text dataSchemaUri = Maybe Text a} :: RedshiftDataSpec) -- | A JSON string that represents the schema for an Amazon Redshift -- @DataSource@. The @DataSchema@ defines the structure of the observation -- data in the data file(s) referenced in the @DataSource@. -- -- A @DataSchema@ is not required if you specify a @DataSchemaUri@. -- -- Define your @DataSchema@ as a series of key-value pairs. @attributes@ -- and @excludedVariableNames@ have an array of key-value pairs for their -- value. Use the following format to define your @DataSchema@. -- -- { \"version\": \"1.0\", -- -- \"recordAnnotationFieldName\": \"F1\", -- -- \"recordWeightFieldName\": \"F2\", -- -- \"targetFieldName\": \"F3\", -- -- \"dataFormat\": \"CSV\", -- -- \"dataFileContainsHeader\": true, -- -- \"attributes\": [ -- -- { \"fieldName\": \"F1\", \"fieldType\": \"TEXT\" }, { \"fieldName\": -- \"F2\", \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F3\", -- \"fieldType\": \"CATEGORICAL\" }, { \"fieldName\": \"F4\", -- \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F5\", \"fieldType\": -- \"CATEGORICAL\" }, { \"fieldName\": \"F6\", \"fieldType\": \"TEXT\" }, { -- \"fieldName\": \"F7\", \"fieldType\": \"WEIGHTED_INT_SEQUENCE\" }, { -- \"fieldName\": \"F8\", \"fieldType\": \"WEIGHTED_STRING_SEQUENCE\" } ], -- -- \"excludedVariableNames\": [ \"F6\" ] } redshiftDataSpec_dataSchema :: Lens.Lens' RedshiftDataSpec (Prelude.Maybe Prelude.Text) redshiftDataSpec_dataSchema :: (Maybe Text -> f (Maybe Text)) -> RedshiftDataSpec -> f RedshiftDataSpec redshiftDataSpec_dataSchema = (RedshiftDataSpec -> Maybe Text) -> (RedshiftDataSpec -> Maybe Text -> RedshiftDataSpec) -> Lens RedshiftDataSpec RedshiftDataSpec (Maybe Text) (Maybe Text) forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RedshiftDataSpec' {Maybe Text dataSchema :: Maybe Text $sel:dataSchema:RedshiftDataSpec' :: RedshiftDataSpec -> Maybe Text dataSchema} -> Maybe Text dataSchema) (\s :: RedshiftDataSpec s@RedshiftDataSpec' {} Maybe Text a -> RedshiftDataSpec s {$sel:dataSchema:RedshiftDataSpec' :: Maybe Text dataSchema = Maybe Text a} :: RedshiftDataSpec) -- | A JSON string that represents the splitting and rearrangement processing -- to be applied to a @DataSource@. If the @DataRearrangement@ parameter is -- not provided, all of the input data is used to create the @Datasource@. -- -- There are multiple parameters that control what data is used to create a -- datasource: -- -- - __@percentBegin@__ -- -- Use @percentBegin@ to indicate the beginning of the range of the -- data used to create the Datasource. If you do not include -- @percentBegin@ and @percentEnd@, Amazon ML includes all of the data -- when creating the datasource. -- -- - __@percentEnd@__ -- -- Use @percentEnd@ to indicate the end of the range of the data used -- to create the Datasource. If you do not include @percentBegin@ and -- @percentEnd@, Amazon ML includes all of the data when creating the -- datasource. -- -- - __@complement@__ -- -- The @complement@ parameter instructs Amazon ML to use the data that -- is not included in the range of @percentBegin@ to @percentEnd@ to -- create a datasource. The @complement@ parameter is useful if you -- need to create complementary datasources for training and -- evaluation. To create a complementary datasource, use the same -- values for @percentBegin@ and @percentEnd@, along with the -- @complement@ parameter. -- -- For example, the following two datasources do not share any data, -- and can be used to train and evaluate a model. The first datasource -- has 25 percent of the data, and the second one has 75 percent of the -- data. -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25, \"complement\":\"true\"}}@ -- -- - __@strategy@__ -- -- To change how Amazon ML splits the data for a datasource, use the -- @strategy@ parameter. -- -- The default value for the @strategy@ parameter is @sequential@, -- meaning that Amazon ML takes all of the data records between the -- @percentBegin@ and @percentEnd@ parameters for the datasource, in -- the order that the records appear in the input data. -- -- The following two @DataRearrangement@ lines are examples of -- sequentially ordered training and evaluation datasources: -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\"}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\", \"complement\":\"true\"}}@ -- -- To randomly split the input data into the proportions indicated by -- the percentBegin and percentEnd parameters, set the @strategy@ -- parameter to @random@ and provide a string that is used as the seed -- value for the random data splitting (for example, you can use the S3 -- path to your data as the random seed string). If you choose the -- random split strategy, Amazon ML assigns each row of data a -- pseudo-random number between 0 and 100, and then selects the rows -- that have an assigned number between @percentBegin@ and -- @percentEnd@. Pseudo-random numbers are assigned using both the -- input seed string value and the byte offset as a seed, so changing -- the data results in a different split. Any existing ordering is -- preserved. The random splitting strategy ensures that variables in -- the training and evaluation data are distributed similarly. It is -- useful in the cases where the input data may have an implicit sort -- order, which would otherwise result in training and evaluation -- datasources containing non-similar data records. -- -- The following two @DataRearrangement@ lines are examples of -- non-sequentially ordered training and evaluation datasources: -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\"}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\", \"complement\":\"true\"}}@ redshiftDataSpec_dataRearrangement :: Lens.Lens' RedshiftDataSpec (Prelude.Maybe Prelude.Text) redshiftDataSpec_dataRearrangement :: (Maybe Text -> f (Maybe Text)) -> RedshiftDataSpec -> f RedshiftDataSpec redshiftDataSpec_dataRearrangement = (RedshiftDataSpec -> Maybe Text) -> (RedshiftDataSpec -> Maybe Text -> RedshiftDataSpec) -> Lens RedshiftDataSpec RedshiftDataSpec (Maybe Text) (Maybe Text) forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RedshiftDataSpec' {Maybe Text dataRearrangement :: Maybe Text $sel:dataRearrangement:RedshiftDataSpec' :: RedshiftDataSpec -> Maybe Text dataRearrangement} -> Maybe Text dataRearrangement) (\s :: RedshiftDataSpec s@RedshiftDataSpec' {} Maybe Text a -> RedshiftDataSpec s {$sel:dataRearrangement:RedshiftDataSpec' :: Maybe Text dataRearrangement = Maybe Text a} :: RedshiftDataSpec) -- | Describes the @DatabaseName@ and @ClusterIdentifier@ for an Amazon -- Redshift @DataSource@. redshiftDataSpec_databaseInformation :: Lens.Lens' RedshiftDataSpec RedshiftDatabase redshiftDataSpec_databaseInformation :: (RedshiftDatabase -> f RedshiftDatabase) -> RedshiftDataSpec -> f RedshiftDataSpec redshiftDataSpec_databaseInformation = (RedshiftDataSpec -> RedshiftDatabase) -> (RedshiftDataSpec -> RedshiftDatabase -> RedshiftDataSpec) -> Lens RedshiftDataSpec RedshiftDataSpec RedshiftDatabase RedshiftDatabase forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RedshiftDataSpec' {RedshiftDatabase databaseInformation :: RedshiftDatabase $sel:databaseInformation:RedshiftDataSpec' :: RedshiftDataSpec -> RedshiftDatabase databaseInformation} -> RedshiftDatabase databaseInformation) (\s :: RedshiftDataSpec s@RedshiftDataSpec' {} RedshiftDatabase a -> RedshiftDataSpec s {$sel:databaseInformation:RedshiftDataSpec' :: RedshiftDatabase databaseInformation = RedshiftDatabase a} :: RedshiftDataSpec) -- | Describes the SQL Query to execute on an Amazon Redshift database for an -- Amazon Redshift @DataSource@. redshiftDataSpec_selectSqlQuery :: Lens.Lens' RedshiftDataSpec Prelude.Text redshiftDataSpec_selectSqlQuery :: (Text -> f Text) -> RedshiftDataSpec -> f RedshiftDataSpec redshiftDataSpec_selectSqlQuery = (RedshiftDataSpec -> Text) -> (RedshiftDataSpec -> Text -> RedshiftDataSpec) -> Lens RedshiftDataSpec RedshiftDataSpec Text Text forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RedshiftDataSpec' {Text selectSqlQuery :: Text $sel:selectSqlQuery:RedshiftDataSpec' :: RedshiftDataSpec -> Text selectSqlQuery} -> Text selectSqlQuery) (\s :: RedshiftDataSpec s@RedshiftDataSpec' {} Text a -> RedshiftDataSpec s {$sel:selectSqlQuery:RedshiftDataSpec' :: Text selectSqlQuery = Text a} :: RedshiftDataSpec) -- | Describes AWS Identity and Access Management (IAM) credentials that are -- used connect to the Amazon Redshift database. redshiftDataSpec_databaseCredentials :: Lens.Lens' RedshiftDataSpec RedshiftDatabaseCredentials redshiftDataSpec_databaseCredentials :: (RedshiftDatabaseCredentials -> f RedshiftDatabaseCredentials) -> RedshiftDataSpec -> f RedshiftDataSpec redshiftDataSpec_databaseCredentials = (RedshiftDataSpec -> RedshiftDatabaseCredentials) -> (RedshiftDataSpec -> RedshiftDatabaseCredentials -> RedshiftDataSpec) -> Lens RedshiftDataSpec RedshiftDataSpec RedshiftDatabaseCredentials RedshiftDatabaseCredentials forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RedshiftDataSpec' {RedshiftDatabaseCredentials databaseCredentials :: RedshiftDatabaseCredentials $sel:databaseCredentials:RedshiftDataSpec' :: RedshiftDataSpec -> RedshiftDatabaseCredentials databaseCredentials} -> RedshiftDatabaseCredentials databaseCredentials) (\s :: RedshiftDataSpec s@RedshiftDataSpec' {} RedshiftDatabaseCredentials a -> RedshiftDataSpec s {$sel:databaseCredentials:RedshiftDataSpec' :: RedshiftDatabaseCredentials databaseCredentials = RedshiftDatabaseCredentials a} :: RedshiftDataSpec) -- | Describes an Amazon S3 location to store the result set of the -- @SelectSqlQuery@ query. redshiftDataSpec_s3StagingLocation :: Lens.Lens' RedshiftDataSpec Prelude.Text redshiftDataSpec_s3StagingLocation :: (Text -> f Text) -> RedshiftDataSpec -> f RedshiftDataSpec redshiftDataSpec_s3StagingLocation = (RedshiftDataSpec -> Text) -> (RedshiftDataSpec -> Text -> RedshiftDataSpec) -> Lens RedshiftDataSpec RedshiftDataSpec Text Text forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RedshiftDataSpec' {Text s3StagingLocation :: Text $sel:s3StagingLocation:RedshiftDataSpec' :: RedshiftDataSpec -> Text s3StagingLocation} -> Text s3StagingLocation) (\s :: RedshiftDataSpec s@RedshiftDataSpec' {} Text a -> RedshiftDataSpec s {$sel:s3StagingLocation:RedshiftDataSpec' :: Text s3StagingLocation = Text a} :: RedshiftDataSpec) instance Prelude.Hashable RedshiftDataSpec instance Prelude.NFData RedshiftDataSpec instance Core.ToJSON RedshiftDataSpec where toJSON :: RedshiftDataSpec -> Value toJSON RedshiftDataSpec' {Maybe Text Text RedshiftDatabase RedshiftDatabaseCredentials s3StagingLocation :: Text databaseCredentials :: RedshiftDatabaseCredentials selectSqlQuery :: Text databaseInformation :: RedshiftDatabase dataRearrangement :: Maybe Text dataSchema :: Maybe Text dataSchemaUri :: Maybe Text $sel:s3StagingLocation:RedshiftDataSpec' :: RedshiftDataSpec -> Text $sel:databaseCredentials:RedshiftDataSpec' :: RedshiftDataSpec -> RedshiftDatabaseCredentials $sel:selectSqlQuery:RedshiftDataSpec' :: RedshiftDataSpec -> Text $sel:databaseInformation:RedshiftDataSpec' :: RedshiftDataSpec -> RedshiftDatabase $sel:dataRearrangement:RedshiftDataSpec' :: RedshiftDataSpec -> Maybe Text $sel:dataSchema:RedshiftDataSpec' :: RedshiftDataSpec -> Maybe Text $sel:dataSchemaUri:RedshiftDataSpec' :: RedshiftDataSpec -> Maybe Text ..} = [Pair] -> Value Core.object ( [Maybe Pair] -> [Pair] forall a. [Maybe a] -> [a] Prelude.catMaybes [ (Text "DataSchemaUri" Text -> Text -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..=) (Text -> Pair) -> Maybe Text -> Maybe Pair forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b Prelude.<$> Maybe Text dataSchemaUri, (Text "DataSchema" Text -> Text -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..=) (Text -> Pair) -> Maybe Text -> Maybe Pair forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b Prelude.<$> Maybe Text dataSchema, (Text "DataRearrangement" Text -> Text -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..=) (Text -> Pair) -> Maybe Text -> Maybe Pair forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b Prelude.<$> Maybe Text dataRearrangement, Pair -> Maybe Pair forall a. a -> Maybe a Prelude.Just (Text "DatabaseInformation" Text -> RedshiftDatabase -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..= RedshiftDatabase databaseInformation), Pair -> Maybe Pair forall a. a -> Maybe a Prelude.Just (Text "SelectSqlQuery" Text -> Text -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..= Text selectSqlQuery), Pair -> Maybe Pair forall a. a -> Maybe a Prelude.Just (Text "DatabaseCredentials" Text -> RedshiftDatabaseCredentials -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..= RedshiftDatabaseCredentials databaseCredentials), Pair -> Maybe Pair forall a. a -> Maybe a Prelude.Just (Text "S3StagingLocation" Text -> Text -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..= Text s3StagingLocation) ] )