{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE StrictData #-} {-# LANGUAGE NoImplicitPrelude #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} {-# OPTIONS_GHC -fno-warn-unused-matches #-} -- Derived from AWS service descriptions, licensed under Apache 2.0. -- | -- Module : Amazonka.MachineLearning.Types.RDSDataSpec -- Copyright : (c) 2013-2021 Brendan Hay -- License : Mozilla Public License, v. 2.0. -- Maintainer : Brendan Hay <brendan.g.hay+amazonka@gmail.com> -- Stability : auto-generated -- Portability : non-portable (GHC extensions) module Amazonka.MachineLearning.Types.RDSDataSpec where import qualified Amazonka.Core as Core import qualified Amazonka.Lens as Lens import Amazonka.MachineLearning.Types.RDSDatabase import Amazonka.MachineLearning.Types.RDSDatabaseCredentials import qualified Amazonka.Prelude as Prelude -- | The data specification of an Amazon Relational Database Service (Amazon -- RDS) @DataSource@. -- -- /See:/ 'newRDSDataSpec' smart constructor. data RDSDataSpec = RDSDataSpec' { -- | The Amazon S3 location of the @DataSchema@. RDSDataSpec -> Maybe Text dataSchemaUri :: Prelude.Maybe Prelude.Text, -- | A JSON string that represents the schema for an Amazon RDS @DataSource@. -- The @DataSchema@ defines the structure of the observation data in the -- data file(s) referenced in the @DataSource@. -- -- A @DataSchema@ is not required if you specify a @DataSchemaUri@ -- -- Define your @DataSchema@ as a series of key-value pairs. @attributes@ -- and @excludedVariableNames@ have an array of key-value pairs for their -- value. Use the following format to define your @DataSchema@. -- -- { \"version\": \"1.0\", -- -- \"recordAnnotationFieldName\": \"F1\", -- -- \"recordWeightFieldName\": \"F2\", -- -- \"targetFieldName\": \"F3\", -- -- \"dataFormat\": \"CSV\", -- -- \"dataFileContainsHeader\": true, -- -- \"attributes\": [ -- -- { \"fieldName\": \"F1\", \"fieldType\": \"TEXT\" }, { \"fieldName\": -- \"F2\", \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F3\", -- \"fieldType\": \"CATEGORICAL\" }, { \"fieldName\": \"F4\", -- \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F5\", \"fieldType\": -- \"CATEGORICAL\" }, { \"fieldName\": \"F6\", \"fieldType\": \"TEXT\" }, { -- \"fieldName\": \"F7\", \"fieldType\": \"WEIGHTED_INT_SEQUENCE\" }, { -- \"fieldName\": \"F8\", \"fieldType\": \"WEIGHTED_STRING_SEQUENCE\" } ], -- -- \"excludedVariableNames\": [ \"F6\" ] } RDSDataSpec -> Maybe Text dataSchema :: Prelude.Maybe Prelude.Text, -- | A JSON string that represents the splitting and rearrangement processing -- to be applied to a @DataSource@. If the @DataRearrangement@ parameter is -- not provided, all of the input data is used to create the @Datasource@. -- -- There are multiple parameters that control what data is used to create a -- datasource: -- -- - __@percentBegin@__ -- -- Use @percentBegin@ to indicate the beginning of the range of the -- data used to create the Datasource. If you do not include -- @percentBegin@ and @percentEnd@, Amazon ML includes all of the data -- when creating the datasource. -- -- - __@percentEnd@__ -- -- Use @percentEnd@ to indicate the end of the range of the data used -- to create the Datasource. If you do not include @percentBegin@ and -- @percentEnd@, Amazon ML includes all of the data when creating the -- datasource. -- -- - __@complement@__ -- -- The @complement@ parameter instructs Amazon ML to use the data that -- is not included in the range of @percentBegin@ to @percentEnd@ to -- create a datasource. The @complement@ parameter is useful if you -- need to create complementary datasources for training and -- evaluation. To create a complementary datasource, use the same -- values for @percentBegin@ and @percentEnd@, along with the -- @complement@ parameter. -- -- For example, the following two datasources do not share any data, -- and can be used to train and evaluate a model. The first datasource -- has 25 percent of the data, and the second one has 75 percent of the -- data. -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25, \"complement\":\"true\"}}@ -- -- - __@strategy@__ -- -- To change how Amazon ML splits the data for a datasource, use the -- @strategy@ parameter. -- -- The default value for the @strategy@ parameter is @sequential@, -- meaning that Amazon ML takes all of the data records between the -- @percentBegin@ and @percentEnd@ parameters for the datasource, in -- the order that the records appear in the input data. -- -- The following two @DataRearrangement@ lines are examples of -- sequentially ordered training and evaluation datasources: -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\"}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\", \"complement\":\"true\"}}@ -- -- To randomly split the input data into the proportions indicated by -- the percentBegin and percentEnd parameters, set the @strategy@ -- parameter to @random@ and provide a string that is used as the seed -- value for the random data splitting (for example, you can use the S3 -- path to your data as the random seed string). If you choose the -- random split strategy, Amazon ML assigns each row of data a -- pseudo-random number between 0 and 100, and then selects the rows -- that have an assigned number between @percentBegin@ and -- @percentEnd@. Pseudo-random numbers are assigned using both the -- input seed string value and the byte offset as a seed, so changing -- the data results in a different split. Any existing ordering is -- preserved. The random splitting strategy ensures that variables in -- the training and evaluation data are distributed similarly. It is -- useful in the cases where the input data may have an implicit sort -- order, which would otherwise result in training and evaluation -- datasources containing non-similar data records. -- -- The following two @DataRearrangement@ lines are examples of -- non-sequentially ordered training and evaluation datasources: -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\"}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\", \"complement\":\"true\"}}@ RDSDataSpec -> Maybe Text dataRearrangement :: Prelude.Maybe Prelude.Text, -- | Describes the @DatabaseName@ and @InstanceIdentifier@ of an Amazon RDS -- database. RDSDataSpec -> RDSDatabase databaseInformation :: RDSDatabase, -- | The query that is used to retrieve the observation data for the -- @DataSource@. RDSDataSpec -> Text selectSqlQuery :: Prelude.Text, -- | The AWS Identity and Access Management (IAM) credentials that are used -- connect to the Amazon RDS database. RDSDataSpec -> RDSDatabaseCredentials databaseCredentials :: RDSDatabaseCredentials, -- | The Amazon S3 location for staging Amazon RDS data. The data retrieved -- from Amazon RDS using @SelectSqlQuery@ is stored in this location. RDSDataSpec -> Text s3StagingLocation :: Prelude.Text, -- | The role (DataPipelineDefaultResourceRole) assumed by an Amazon Elastic -- Compute Cloud (Amazon EC2) instance to carry out the copy operation from -- Amazon RDS to an Amazon S3 task. For more information, see -- <https://docs.aws.amazon.com/datapipeline/latest/DeveloperGuide/dp-iam-roles.html Role templates> -- for data pipelines. RDSDataSpec -> Text resourceRole :: Prelude.Text, -- | The role (DataPipelineDefaultRole) assumed by AWS Data Pipeline service -- to monitor the progress of the copy task from Amazon RDS to Amazon S3. -- For more information, see -- <https://docs.aws.amazon.com/datapipeline/latest/DeveloperGuide/dp-iam-roles.html Role templates> -- for data pipelines. RDSDataSpec -> Text serviceRole :: Prelude.Text, -- | The subnet ID to be used to access a VPC-based RDS DB instance. This -- attribute is used by Data Pipeline to carry out the copy task from -- Amazon RDS to Amazon S3. RDSDataSpec -> Text subnetId :: Prelude.Text, -- | The security group IDs to be used to access a VPC-based RDS DB instance. -- Ensure that there are appropriate ingress rules set up to allow access -- to the RDS DB instance. This attribute is used by Data Pipeline to carry -- out the copy operation from Amazon RDS to an Amazon S3 task. RDSDataSpec -> [Text] securityGroupIds :: [Prelude.Text] } deriving (RDSDataSpec -> RDSDataSpec -> Bool (RDSDataSpec -> RDSDataSpec -> Bool) -> (RDSDataSpec -> RDSDataSpec -> Bool) -> Eq RDSDataSpec forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a /= :: RDSDataSpec -> RDSDataSpec -> Bool $c/= :: RDSDataSpec -> RDSDataSpec -> Bool == :: RDSDataSpec -> RDSDataSpec -> Bool $c== :: RDSDataSpec -> RDSDataSpec -> Bool Prelude.Eq, ReadPrec [RDSDataSpec] ReadPrec RDSDataSpec Int -> ReadS RDSDataSpec ReadS [RDSDataSpec] (Int -> ReadS RDSDataSpec) -> ReadS [RDSDataSpec] -> ReadPrec RDSDataSpec -> ReadPrec [RDSDataSpec] -> Read RDSDataSpec forall a. (Int -> ReadS a) -> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a readListPrec :: ReadPrec [RDSDataSpec] $creadListPrec :: ReadPrec [RDSDataSpec] readPrec :: ReadPrec RDSDataSpec $creadPrec :: ReadPrec RDSDataSpec readList :: ReadS [RDSDataSpec] $creadList :: ReadS [RDSDataSpec] readsPrec :: Int -> ReadS RDSDataSpec $creadsPrec :: Int -> ReadS RDSDataSpec Prelude.Read, Int -> RDSDataSpec -> ShowS [RDSDataSpec] -> ShowS RDSDataSpec -> String (Int -> RDSDataSpec -> ShowS) -> (RDSDataSpec -> String) -> ([RDSDataSpec] -> ShowS) -> Show RDSDataSpec forall a. (Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a showList :: [RDSDataSpec] -> ShowS $cshowList :: [RDSDataSpec] -> ShowS show :: RDSDataSpec -> String $cshow :: RDSDataSpec -> String showsPrec :: Int -> RDSDataSpec -> ShowS $cshowsPrec :: Int -> RDSDataSpec -> ShowS Prelude.Show, (forall x. RDSDataSpec -> Rep RDSDataSpec x) -> (forall x. Rep RDSDataSpec x -> RDSDataSpec) -> Generic RDSDataSpec forall x. Rep RDSDataSpec x -> RDSDataSpec forall x. RDSDataSpec -> Rep RDSDataSpec x forall a. (forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a $cto :: forall x. Rep RDSDataSpec x -> RDSDataSpec $cfrom :: forall x. RDSDataSpec -> Rep RDSDataSpec x Prelude.Generic) -- | -- Create a value of 'RDSDataSpec' with all optional fields omitted. -- -- Use <https://hackage.haskell.org/package/generic-lens generic-lens> or <https://hackage.haskell.org/package/optics optics> to modify other optional fields. -- -- The following record fields are available, with the corresponding lenses provided -- for backwards compatibility: -- -- 'dataSchemaUri', 'rDSDataSpec_dataSchemaUri' - The Amazon S3 location of the @DataSchema@. -- -- 'dataSchema', 'rDSDataSpec_dataSchema' - A JSON string that represents the schema for an Amazon RDS @DataSource@. -- The @DataSchema@ defines the structure of the observation data in the -- data file(s) referenced in the @DataSource@. -- -- A @DataSchema@ is not required if you specify a @DataSchemaUri@ -- -- Define your @DataSchema@ as a series of key-value pairs. @attributes@ -- and @excludedVariableNames@ have an array of key-value pairs for their -- value. Use the following format to define your @DataSchema@. -- -- { \"version\": \"1.0\", -- -- \"recordAnnotationFieldName\": \"F1\", -- -- \"recordWeightFieldName\": \"F2\", -- -- \"targetFieldName\": \"F3\", -- -- \"dataFormat\": \"CSV\", -- -- \"dataFileContainsHeader\": true, -- -- \"attributes\": [ -- -- { \"fieldName\": \"F1\", \"fieldType\": \"TEXT\" }, { \"fieldName\": -- \"F2\", \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F3\", -- \"fieldType\": \"CATEGORICAL\" }, { \"fieldName\": \"F4\", -- \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F5\", \"fieldType\": -- \"CATEGORICAL\" }, { \"fieldName\": \"F6\", \"fieldType\": \"TEXT\" }, { -- \"fieldName\": \"F7\", \"fieldType\": \"WEIGHTED_INT_SEQUENCE\" }, { -- \"fieldName\": \"F8\", \"fieldType\": \"WEIGHTED_STRING_SEQUENCE\" } ], -- -- \"excludedVariableNames\": [ \"F6\" ] } -- -- 'dataRearrangement', 'rDSDataSpec_dataRearrangement' - A JSON string that represents the splitting and rearrangement processing -- to be applied to a @DataSource@. If the @DataRearrangement@ parameter is -- not provided, all of the input data is used to create the @Datasource@. -- -- There are multiple parameters that control what data is used to create a -- datasource: -- -- - __@percentBegin@__ -- -- Use @percentBegin@ to indicate the beginning of the range of the -- data used to create the Datasource. If you do not include -- @percentBegin@ and @percentEnd@, Amazon ML includes all of the data -- when creating the datasource. -- -- - __@percentEnd@__ -- -- Use @percentEnd@ to indicate the end of the range of the data used -- to create the Datasource. If you do not include @percentBegin@ and -- @percentEnd@, Amazon ML includes all of the data when creating the -- datasource. -- -- - __@complement@__ -- -- The @complement@ parameter instructs Amazon ML to use the data that -- is not included in the range of @percentBegin@ to @percentEnd@ to -- create a datasource. The @complement@ parameter is useful if you -- need to create complementary datasources for training and -- evaluation. To create a complementary datasource, use the same -- values for @percentBegin@ and @percentEnd@, along with the -- @complement@ parameter. -- -- For example, the following two datasources do not share any data, -- and can be used to train and evaluate a model. The first datasource -- has 25 percent of the data, and the second one has 75 percent of the -- data. -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25, \"complement\":\"true\"}}@ -- -- - __@strategy@__ -- -- To change how Amazon ML splits the data for a datasource, use the -- @strategy@ parameter. -- -- The default value for the @strategy@ parameter is @sequential@, -- meaning that Amazon ML takes all of the data records between the -- @percentBegin@ and @percentEnd@ parameters for the datasource, in -- the order that the records appear in the input data. -- -- The following two @DataRearrangement@ lines are examples of -- sequentially ordered training and evaluation datasources: -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\"}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\", \"complement\":\"true\"}}@ -- -- To randomly split the input data into the proportions indicated by -- the percentBegin and percentEnd parameters, set the @strategy@ -- parameter to @random@ and provide a string that is used as the seed -- value for the random data splitting (for example, you can use the S3 -- path to your data as the random seed string). If you choose the -- random split strategy, Amazon ML assigns each row of data a -- pseudo-random number between 0 and 100, and then selects the rows -- that have an assigned number between @percentBegin@ and -- @percentEnd@. Pseudo-random numbers are assigned using both the -- input seed string value and the byte offset as a seed, so changing -- the data results in a different split. Any existing ordering is -- preserved. The random splitting strategy ensures that variables in -- the training and evaluation data are distributed similarly. It is -- useful in the cases where the input data may have an implicit sort -- order, which would otherwise result in training and evaluation -- datasources containing non-similar data records. -- -- The following two @DataRearrangement@ lines are examples of -- non-sequentially ordered training and evaluation datasources: -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\"}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\", \"complement\":\"true\"}}@ -- -- 'databaseInformation', 'rDSDataSpec_databaseInformation' - Describes the @DatabaseName@ and @InstanceIdentifier@ of an Amazon RDS -- database. -- -- 'selectSqlQuery', 'rDSDataSpec_selectSqlQuery' - The query that is used to retrieve the observation data for the -- @DataSource@. -- -- 'databaseCredentials', 'rDSDataSpec_databaseCredentials' - The AWS Identity and Access Management (IAM) credentials that are used -- connect to the Amazon RDS database. -- -- 's3StagingLocation', 'rDSDataSpec_s3StagingLocation' - The Amazon S3 location for staging Amazon RDS data. The data retrieved -- from Amazon RDS using @SelectSqlQuery@ is stored in this location. -- -- 'resourceRole', 'rDSDataSpec_resourceRole' - The role (DataPipelineDefaultResourceRole) assumed by an Amazon Elastic -- Compute Cloud (Amazon EC2) instance to carry out the copy operation from -- Amazon RDS to an Amazon S3 task. For more information, see -- <https://docs.aws.amazon.com/datapipeline/latest/DeveloperGuide/dp-iam-roles.html Role templates> -- for data pipelines. -- -- 'serviceRole', 'rDSDataSpec_serviceRole' - The role (DataPipelineDefaultRole) assumed by AWS Data Pipeline service -- to monitor the progress of the copy task from Amazon RDS to Amazon S3. -- For more information, see -- <https://docs.aws.amazon.com/datapipeline/latest/DeveloperGuide/dp-iam-roles.html Role templates> -- for data pipelines. -- -- 'subnetId', 'rDSDataSpec_subnetId' - The subnet ID to be used to access a VPC-based RDS DB instance. This -- attribute is used by Data Pipeline to carry out the copy task from -- Amazon RDS to Amazon S3. -- -- 'securityGroupIds', 'rDSDataSpec_securityGroupIds' - The security group IDs to be used to access a VPC-based RDS DB instance. -- Ensure that there are appropriate ingress rules set up to allow access -- to the RDS DB instance. This attribute is used by Data Pipeline to carry -- out the copy operation from Amazon RDS to an Amazon S3 task. newRDSDataSpec :: -- | 'databaseInformation' RDSDatabase -> -- | 'selectSqlQuery' Prelude.Text -> -- | 'databaseCredentials' RDSDatabaseCredentials -> -- | 's3StagingLocation' Prelude.Text -> -- | 'resourceRole' Prelude.Text -> -- | 'serviceRole' Prelude.Text -> -- | 'subnetId' Prelude.Text -> RDSDataSpec newRDSDataSpec :: RDSDatabase -> Text -> RDSDatabaseCredentials -> Text -> Text -> Text -> Text -> RDSDataSpec newRDSDataSpec RDSDatabase pDatabaseInformation_ Text pSelectSqlQuery_ RDSDatabaseCredentials pDatabaseCredentials_ Text pS3StagingLocation_ Text pResourceRole_ Text pServiceRole_ Text pSubnetId_ = RDSDataSpec' :: Maybe Text -> Maybe Text -> Maybe Text -> RDSDatabase -> Text -> RDSDatabaseCredentials -> Text -> Text -> Text -> Text -> [Text] -> RDSDataSpec RDSDataSpec' { $sel:dataSchemaUri:RDSDataSpec' :: Maybe Text dataSchemaUri = Maybe Text forall a. Maybe a Prelude.Nothing, $sel:dataSchema:RDSDataSpec' :: Maybe Text dataSchema = Maybe Text forall a. Maybe a Prelude.Nothing, $sel:dataRearrangement:RDSDataSpec' :: Maybe Text dataRearrangement = Maybe Text forall a. Maybe a Prelude.Nothing, $sel:databaseInformation:RDSDataSpec' :: RDSDatabase databaseInformation = RDSDatabase pDatabaseInformation_, $sel:selectSqlQuery:RDSDataSpec' :: Text selectSqlQuery = Text pSelectSqlQuery_, $sel:databaseCredentials:RDSDataSpec' :: RDSDatabaseCredentials databaseCredentials = RDSDatabaseCredentials pDatabaseCredentials_, $sel:s3StagingLocation:RDSDataSpec' :: Text s3StagingLocation = Text pS3StagingLocation_, $sel:resourceRole:RDSDataSpec' :: Text resourceRole = Text pResourceRole_, $sel:serviceRole:RDSDataSpec' :: Text serviceRole = Text pServiceRole_, $sel:subnetId:RDSDataSpec' :: Text subnetId = Text pSubnetId_, $sel:securityGroupIds:RDSDataSpec' :: [Text] securityGroupIds = [Text] forall a. Monoid a => a Prelude.mempty } -- | The Amazon S3 location of the @DataSchema@. rDSDataSpec_dataSchemaUri :: Lens.Lens' RDSDataSpec (Prelude.Maybe Prelude.Text) rDSDataSpec_dataSchemaUri :: (Maybe Text -> f (Maybe Text)) -> RDSDataSpec -> f RDSDataSpec rDSDataSpec_dataSchemaUri = (RDSDataSpec -> Maybe Text) -> (RDSDataSpec -> Maybe Text -> RDSDataSpec) -> Lens RDSDataSpec RDSDataSpec (Maybe Text) (Maybe Text) forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RDSDataSpec' {Maybe Text dataSchemaUri :: Maybe Text $sel:dataSchemaUri:RDSDataSpec' :: RDSDataSpec -> Maybe Text dataSchemaUri} -> Maybe Text dataSchemaUri) (\s :: RDSDataSpec s@RDSDataSpec' {} Maybe Text a -> RDSDataSpec s {$sel:dataSchemaUri:RDSDataSpec' :: Maybe Text dataSchemaUri = Maybe Text a} :: RDSDataSpec) -- | A JSON string that represents the schema for an Amazon RDS @DataSource@. -- The @DataSchema@ defines the structure of the observation data in the -- data file(s) referenced in the @DataSource@. -- -- A @DataSchema@ is not required if you specify a @DataSchemaUri@ -- -- Define your @DataSchema@ as a series of key-value pairs. @attributes@ -- and @excludedVariableNames@ have an array of key-value pairs for their -- value. Use the following format to define your @DataSchema@. -- -- { \"version\": \"1.0\", -- -- \"recordAnnotationFieldName\": \"F1\", -- -- \"recordWeightFieldName\": \"F2\", -- -- \"targetFieldName\": \"F3\", -- -- \"dataFormat\": \"CSV\", -- -- \"dataFileContainsHeader\": true, -- -- \"attributes\": [ -- -- { \"fieldName\": \"F1\", \"fieldType\": \"TEXT\" }, { \"fieldName\": -- \"F2\", \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F3\", -- \"fieldType\": \"CATEGORICAL\" }, { \"fieldName\": \"F4\", -- \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F5\", \"fieldType\": -- \"CATEGORICAL\" }, { \"fieldName\": \"F6\", \"fieldType\": \"TEXT\" }, { -- \"fieldName\": \"F7\", \"fieldType\": \"WEIGHTED_INT_SEQUENCE\" }, { -- \"fieldName\": \"F8\", \"fieldType\": \"WEIGHTED_STRING_SEQUENCE\" } ], -- -- \"excludedVariableNames\": [ \"F6\" ] } rDSDataSpec_dataSchema :: Lens.Lens' RDSDataSpec (Prelude.Maybe Prelude.Text) rDSDataSpec_dataSchema :: (Maybe Text -> f (Maybe Text)) -> RDSDataSpec -> f RDSDataSpec rDSDataSpec_dataSchema = (RDSDataSpec -> Maybe Text) -> (RDSDataSpec -> Maybe Text -> RDSDataSpec) -> Lens RDSDataSpec RDSDataSpec (Maybe Text) (Maybe Text) forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RDSDataSpec' {Maybe Text dataSchema :: Maybe Text $sel:dataSchema:RDSDataSpec' :: RDSDataSpec -> Maybe Text dataSchema} -> Maybe Text dataSchema) (\s :: RDSDataSpec s@RDSDataSpec' {} Maybe Text a -> RDSDataSpec s {$sel:dataSchema:RDSDataSpec' :: Maybe Text dataSchema = Maybe Text a} :: RDSDataSpec) -- | A JSON string that represents the splitting and rearrangement processing -- to be applied to a @DataSource@. If the @DataRearrangement@ parameter is -- not provided, all of the input data is used to create the @Datasource@. -- -- There are multiple parameters that control what data is used to create a -- datasource: -- -- - __@percentBegin@__ -- -- Use @percentBegin@ to indicate the beginning of the range of the -- data used to create the Datasource. If you do not include -- @percentBegin@ and @percentEnd@, Amazon ML includes all of the data -- when creating the datasource. -- -- - __@percentEnd@__ -- -- Use @percentEnd@ to indicate the end of the range of the data used -- to create the Datasource. If you do not include @percentBegin@ and -- @percentEnd@, Amazon ML includes all of the data when creating the -- datasource. -- -- - __@complement@__ -- -- The @complement@ parameter instructs Amazon ML to use the data that -- is not included in the range of @percentBegin@ to @percentEnd@ to -- create a datasource. The @complement@ parameter is useful if you -- need to create complementary datasources for training and -- evaluation. To create a complementary datasource, use the same -- values for @percentBegin@ and @percentEnd@, along with the -- @complement@ parameter. -- -- For example, the following two datasources do not share any data, -- and can be used to train and evaluate a model. The first datasource -- has 25 percent of the data, and the second one has 75 percent of the -- data. -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25, \"complement\":\"true\"}}@ -- -- - __@strategy@__ -- -- To change how Amazon ML splits the data for a datasource, use the -- @strategy@ parameter. -- -- The default value for the @strategy@ parameter is @sequential@, -- meaning that Amazon ML takes all of the data records between the -- @percentBegin@ and @percentEnd@ parameters for the datasource, in -- the order that the records appear in the input data. -- -- The following two @DataRearrangement@ lines are examples of -- sequentially ordered training and evaluation datasources: -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\"}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\", \"complement\":\"true\"}}@ -- -- To randomly split the input data into the proportions indicated by -- the percentBegin and percentEnd parameters, set the @strategy@ -- parameter to @random@ and provide a string that is used as the seed -- value for the random data splitting (for example, you can use the S3 -- path to your data as the random seed string). If you choose the -- random split strategy, Amazon ML assigns each row of data a -- pseudo-random number between 0 and 100, and then selects the rows -- that have an assigned number between @percentBegin@ and -- @percentEnd@. Pseudo-random numbers are assigned using both the -- input seed string value and the byte offset as a seed, so changing -- the data results in a different split. Any existing ordering is -- preserved. The random splitting strategy ensures that variables in -- the training and evaluation data are distributed similarly. It is -- useful in the cases where the input data may have an implicit sort -- order, which would otherwise result in training and evaluation -- datasources containing non-similar data records. -- -- The following two @DataRearrangement@ lines are examples of -- non-sequentially ordered training and evaluation datasources: -- -- Datasource for evaluation: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\"}}@ -- -- Datasource for training: -- @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\", \"complement\":\"true\"}}@ rDSDataSpec_dataRearrangement :: Lens.Lens' RDSDataSpec (Prelude.Maybe Prelude.Text) rDSDataSpec_dataRearrangement :: (Maybe Text -> f (Maybe Text)) -> RDSDataSpec -> f RDSDataSpec rDSDataSpec_dataRearrangement = (RDSDataSpec -> Maybe Text) -> (RDSDataSpec -> Maybe Text -> RDSDataSpec) -> Lens RDSDataSpec RDSDataSpec (Maybe Text) (Maybe Text) forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RDSDataSpec' {Maybe Text dataRearrangement :: Maybe Text $sel:dataRearrangement:RDSDataSpec' :: RDSDataSpec -> Maybe Text dataRearrangement} -> Maybe Text dataRearrangement) (\s :: RDSDataSpec s@RDSDataSpec' {} Maybe Text a -> RDSDataSpec s {$sel:dataRearrangement:RDSDataSpec' :: Maybe Text dataRearrangement = Maybe Text a} :: RDSDataSpec) -- | Describes the @DatabaseName@ and @InstanceIdentifier@ of an Amazon RDS -- database. rDSDataSpec_databaseInformation :: Lens.Lens' RDSDataSpec RDSDatabase rDSDataSpec_databaseInformation :: (RDSDatabase -> f RDSDatabase) -> RDSDataSpec -> f RDSDataSpec rDSDataSpec_databaseInformation = (RDSDataSpec -> RDSDatabase) -> (RDSDataSpec -> RDSDatabase -> RDSDataSpec) -> Lens RDSDataSpec RDSDataSpec RDSDatabase RDSDatabase forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RDSDataSpec' {RDSDatabase databaseInformation :: RDSDatabase $sel:databaseInformation:RDSDataSpec' :: RDSDataSpec -> RDSDatabase databaseInformation} -> RDSDatabase databaseInformation) (\s :: RDSDataSpec s@RDSDataSpec' {} RDSDatabase a -> RDSDataSpec s {$sel:databaseInformation:RDSDataSpec' :: RDSDatabase databaseInformation = RDSDatabase a} :: RDSDataSpec) -- | The query that is used to retrieve the observation data for the -- @DataSource@. rDSDataSpec_selectSqlQuery :: Lens.Lens' RDSDataSpec Prelude.Text rDSDataSpec_selectSqlQuery :: (Text -> f Text) -> RDSDataSpec -> f RDSDataSpec rDSDataSpec_selectSqlQuery = (RDSDataSpec -> Text) -> (RDSDataSpec -> Text -> RDSDataSpec) -> Lens RDSDataSpec RDSDataSpec Text Text forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RDSDataSpec' {Text selectSqlQuery :: Text $sel:selectSqlQuery:RDSDataSpec' :: RDSDataSpec -> Text selectSqlQuery} -> Text selectSqlQuery) (\s :: RDSDataSpec s@RDSDataSpec' {} Text a -> RDSDataSpec s {$sel:selectSqlQuery:RDSDataSpec' :: Text selectSqlQuery = Text a} :: RDSDataSpec) -- | The AWS Identity and Access Management (IAM) credentials that are used -- connect to the Amazon RDS database. rDSDataSpec_databaseCredentials :: Lens.Lens' RDSDataSpec RDSDatabaseCredentials rDSDataSpec_databaseCredentials :: (RDSDatabaseCredentials -> f RDSDatabaseCredentials) -> RDSDataSpec -> f RDSDataSpec rDSDataSpec_databaseCredentials = (RDSDataSpec -> RDSDatabaseCredentials) -> (RDSDataSpec -> RDSDatabaseCredentials -> RDSDataSpec) -> Lens RDSDataSpec RDSDataSpec RDSDatabaseCredentials RDSDatabaseCredentials forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RDSDataSpec' {RDSDatabaseCredentials databaseCredentials :: RDSDatabaseCredentials $sel:databaseCredentials:RDSDataSpec' :: RDSDataSpec -> RDSDatabaseCredentials databaseCredentials} -> RDSDatabaseCredentials databaseCredentials) (\s :: RDSDataSpec s@RDSDataSpec' {} RDSDatabaseCredentials a -> RDSDataSpec s {$sel:databaseCredentials:RDSDataSpec' :: RDSDatabaseCredentials databaseCredentials = RDSDatabaseCredentials a} :: RDSDataSpec) -- | The Amazon S3 location for staging Amazon RDS data. The data retrieved -- from Amazon RDS using @SelectSqlQuery@ is stored in this location. rDSDataSpec_s3StagingLocation :: Lens.Lens' RDSDataSpec Prelude.Text rDSDataSpec_s3StagingLocation :: (Text -> f Text) -> RDSDataSpec -> f RDSDataSpec rDSDataSpec_s3StagingLocation = (RDSDataSpec -> Text) -> (RDSDataSpec -> Text -> RDSDataSpec) -> Lens RDSDataSpec RDSDataSpec Text Text forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RDSDataSpec' {Text s3StagingLocation :: Text $sel:s3StagingLocation:RDSDataSpec' :: RDSDataSpec -> Text s3StagingLocation} -> Text s3StagingLocation) (\s :: RDSDataSpec s@RDSDataSpec' {} Text a -> RDSDataSpec s {$sel:s3StagingLocation:RDSDataSpec' :: Text s3StagingLocation = Text a} :: RDSDataSpec) -- | The role (DataPipelineDefaultResourceRole) assumed by an Amazon Elastic -- Compute Cloud (Amazon EC2) instance to carry out the copy operation from -- Amazon RDS to an Amazon S3 task. For more information, see -- <https://docs.aws.amazon.com/datapipeline/latest/DeveloperGuide/dp-iam-roles.html Role templates> -- for data pipelines. rDSDataSpec_resourceRole :: Lens.Lens' RDSDataSpec Prelude.Text rDSDataSpec_resourceRole :: (Text -> f Text) -> RDSDataSpec -> f RDSDataSpec rDSDataSpec_resourceRole = (RDSDataSpec -> Text) -> (RDSDataSpec -> Text -> RDSDataSpec) -> Lens RDSDataSpec RDSDataSpec Text Text forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RDSDataSpec' {Text resourceRole :: Text $sel:resourceRole:RDSDataSpec' :: RDSDataSpec -> Text resourceRole} -> Text resourceRole) (\s :: RDSDataSpec s@RDSDataSpec' {} Text a -> RDSDataSpec s {$sel:resourceRole:RDSDataSpec' :: Text resourceRole = Text a} :: RDSDataSpec) -- | The role (DataPipelineDefaultRole) assumed by AWS Data Pipeline service -- to monitor the progress of the copy task from Amazon RDS to Amazon S3. -- For more information, see -- <https://docs.aws.amazon.com/datapipeline/latest/DeveloperGuide/dp-iam-roles.html Role templates> -- for data pipelines. rDSDataSpec_serviceRole :: Lens.Lens' RDSDataSpec Prelude.Text rDSDataSpec_serviceRole :: (Text -> f Text) -> RDSDataSpec -> f RDSDataSpec rDSDataSpec_serviceRole = (RDSDataSpec -> Text) -> (RDSDataSpec -> Text -> RDSDataSpec) -> Lens RDSDataSpec RDSDataSpec Text Text forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RDSDataSpec' {Text serviceRole :: Text $sel:serviceRole:RDSDataSpec' :: RDSDataSpec -> Text serviceRole} -> Text serviceRole) (\s :: RDSDataSpec s@RDSDataSpec' {} Text a -> RDSDataSpec s {$sel:serviceRole:RDSDataSpec' :: Text serviceRole = Text a} :: RDSDataSpec) -- | The subnet ID to be used to access a VPC-based RDS DB instance. This -- attribute is used by Data Pipeline to carry out the copy task from -- Amazon RDS to Amazon S3. rDSDataSpec_subnetId :: Lens.Lens' RDSDataSpec Prelude.Text rDSDataSpec_subnetId :: (Text -> f Text) -> RDSDataSpec -> f RDSDataSpec rDSDataSpec_subnetId = (RDSDataSpec -> Text) -> (RDSDataSpec -> Text -> RDSDataSpec) -> Lens RDSDataSpec RDSDataSpec Text Text forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RDSDataSpec' {Text subnetId :: Text $sel:subnetId:RDSDataSpec' :: RDSDataSpec -> Text subnetId} -> Text subnetId) (\s :: RDSDataSpec s@RDSDataSpec' {} Text a -> RDSDataSpec s {$sel:subnetId:RDSDataSpec' :: Text subnetId = Text a} :: RDSDataSpec) -- | The security group IDs to be used to access a VPC-based RDS DB instance. -- Ensure that there are appropriate ingress rules set up to allow access -- to the RDS DB instance. This attribute is used by Data Pipeline to carry -- out the copy operation from Amazon RDS to an Amazon S3 task. rDSDataSpec_securityGroupIds :: Lens.Lens' RDSDataSpec [Prelude.Text] rDSDataSpec_securityGroupIds :: ([Text] -> f [Text]) -> RDSDataSpec -> f RDSDataSpec rDSDataSpec_securityGroupIds = (RDSDataSpec -> [Text]) -> (RDSDataSpec -> [Text] -> RDSDataSpec) -> Lens RDSDataSpec RDSDataSpec [Text] [Text] forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\RDSDataSpec' {[Text] securityGroupIds :: [Text] $sel:securityGroupIds:RDSDataSpec' :: RDSDataSpec -> [Text] securityGroupIds} -> [Text] securityGroupIds) (\s :: RDSDataSpec s@RDSDataSpec' {} [Text] a -> RDSDataSpec s {$sel:securityGroupIds:RDSDataSpec' :: [Text] securityGroupIds = [Text] a} :: RDSDataSpec) (([Text] -> f [Text]) -> RDSDataSpec -> f RDSDataSpec) -> (([Text] -> f [Text]) -> [Text] -> f [Text]) -> ([Text] -> f [Text]) -> RDSDataSpec -> f RDSDataSpec forall b c a. (b -> c) -> (a -> b) -> a -> c Prelude.. ([Text] -> f [Text]) -> [Text] -> f [Text] forall s t a b. (Coercible s a, Coercible t b) => Iso s t a b Lens.coerced instance Prelude.Hashable RDSDataSpec instance Prelude.NFData RDSDataSpec instance Core.ToJSON RDSDataSpec where toJSON :: RDSDataSpec -> Value toJSON RDSDataSpec' {[Text] Maybe Text Text RDSDatabase RDSDatabaseCredentials securityGroupIds :: [Text] subnetId :: Text serviceRole :: Text resourceRole :: Text s3StagingLocation :: Text databaseCredentials :: RDSDatabaseCredentials selectSqlQuery :: Text databaseInformation :: RDSDatabase dataRearrangement :: Maybe Text dataSchema :: Maybe Text dataSchemaUri :: Maybe Text $sel:securityGroupIds:RDSDataSpec' :: RDSDataSpec -> [Text] $sel:subnetId:RDSDataSpec' :: RDSDataSpec -> Text $sel:serviceRole:RDSDataSpec' :: RDSDataSpec -> Text $sel:resourceRole:RDSDataSpec' :: RDSDataSpec -> Text $sel:s3StagingLocation:RDSDataSpec' :: RDSDataSpec -> Text $sel:databaseCredentials:RDSDataSpec' :: RDSDataSpec -> RDSDatabaseCredentials $sel:selectSqlQuery:RDSDataSpec' :: RDSDataSpec -> Text $sel:databaseInformation:RDSDataSpec' :: RDSDataSpec -> RDSDatabase $sel:dataRearrangement:RDSDataSpec' :: RDSDataSpec -> Maybe Text $sel:dataSchema:RDSDataSpec' :: RDSDataSpec -> Maybe Text $sel:dataSchemaUri:RDSDataSpec' :: RDSDataSpec -> Maybe Text ..} = [Pair] -> Value Core.object ( [Maybe Pair] -> [Pair] forall a. [Maybe a] -> [a] Prelude.catMaybes [ (Text "DataSchemaUri" Text -> Text -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..=) (Text -> Pair) -> Maybe Text -> Maybe Pair forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b Prelude.<$> Maybe Text dataSchemaUri, (Text "DataSchema" Text -> Text -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..=) (Text -> Pair) -> Maybe Text -> Maybe Pair forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b Prelude.<$> Maybe Text dataSchema, (Text "DataRearrangement" Text -> Text -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..=) (Text -> Pair) -> Maybe Text -> Maybe Pair forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b Prelude.<$> Maybe Text dataRearrangement, Pair -> Maybe Pair forall a. a -> Maybe a Prelude.Just (Text "DatabaseInformation" Text -> RDSDatabase -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..= RDSDatabase databaseInformation), Pair -> Maybe Pair forall a. a -> Maybe a Prelude.Just (Text "SelectSqlQuery" Text -> Text -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..= Text selectSqlQuery), Pair -> Maybe Pair forall a. a -> Maybe a Prelude.Just (Text "DatabaseCredentials" Text -> RDSDatabaseCredentials -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..= RDSDatabaseCredentials databaseCredentials), Pair -> Maybe Pair forall a. a -> Maybe a Prelude.Just (Text "S3StagingLocation" Text -> Text -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..= Text s3StagingLocation), Pair -> Maybe Pair forall a. a -> Maybe a Prelude.Just (Text "ResourceRole" Text -> Text -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..= Text resourceRole), Pair -> Maybe Pair forall a. a -> Maybe a Prelude.Just (Text "ServiceRole" Text -> Text -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..= Text serviceRole), Pair -> Maybe Pair forall a. a -> Maybe a Prelude.Just (Text "SubnetId" Text -> Text -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..= Text subnetId), Pair -> Maybe Pair forall a. a -> Maybe a Prelude.Just (Text "SecurityGroupIds" Text -> [Text] -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..= [Text] securityGroupIds) ] )