{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE StrictData #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# OPTIONS_GHC -fno-warn-unused-imports #-}
{-# OPTIONS_GHC -fno-warn-unused-matches #-}

-- Derived from AWS service descriptions, licensed under Apache 2.0.

-- |
-- Module      : Amazonka.MachineLearning.Types.RDSDataSpec
-- Copyright   : (c) 2013-2021 Brendan Hay
-- License     : Mozilla Public License, v. 2.0.
-- Maintainer  : Brendan Hay <brendan.g.hay+amazonka@gmail.com>
-- Stability   : auto-generated
-- Portability : non-portable (GHC extensions)
module Amazonka.MachineLearning.Types.RDSDataSpec where

import qualified Amazonka.Core as Core
import qualified Amazonka.Lens as Lens
import Amazonka.MachineLearning.Types.RDSDatabase
import Amazonka.MachineLearning.Types.RDSDatabaseCredentials
import qualified Amazonka.Prelude as Prelude

-- | The data specification of an Amazon Relational Database Service (Amazon
-- RDS) @DataSource@.
--
-- /See:/ 'newRDSDataSpec' smart constructor.
data RDSDataSpec = RDSDataSpec'
  { -- | The Amazon S3 location of the @DataSchema@.
    RDSDataSpec -> Maybe Text
dataSchemaUri :: Prelude.Maybe Prelude.Text,
    -- | A JSON string that represents the schema for an Amazon RDS @DataSource@.
    -- The @DataSchema@ defines the structure of the observation data in the
    -- data file(s) referenced in the @DataSource@.
    --
    -- A @DataSchema@ is not required if you specify a @DataSchemaUri@
    --
    -- Define your @DataSchema@ as a series of key-value pairs. @attributes@
    -- and @excludedVariableNames@ have an array of key-value pairs for their
    -- value. Use the following format to define your @DataSchema@.
    --
    -- { \"version\": \"1.0\",
    --
    -- \"recordAnnotationFieldName\": \"F1\",
    --
    -- \"recordWeightFieldName\": \"F2\",
    --
    -- \"targetFieldName\": \"F3\",
    --
    -- \"dataFormat\": \"CSV\",
    --
    -- \"dataFileContainsHeader\": true,
    --
    -- \"attributes\": [
    --
    -- { \"fieldName\": \"F1\", \"fieldType\": \"TEXT\" }, { \"fieldName\":
    -- \"F2\", \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F3\",
    -- \"fieldType\": \"CATEGORICAL\" }, { \"fieldName\": \"F4\",
    -- \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F5\", \"fieldType\":
    -- \"CATEGORICAL\" }, { \"fieldName\": \"F6\", \"fieldType\": \"TEXT\" }, {
    -- \"fieldName\": \"F7\", \"fieldType\": \"WEIGHTED_INT_SEQUENCE\" }, {
    -- \"fieldName\": \"F8\", \"fieldType\": \"WEIGHTED_STRING_SEQUENCE\" } ],
    --
    -- \"excludedVariableNames\": [ \"F6\" ] }
    RDSDataSpec -> Maybe Text
dataSchema :: Prelude.Maybe Prelude.Text,
    -- | A JSON string that represents the splitting and rearrangement processing
    -- to be applied to a @DataSource@. If the @DataRearrangement@ parameter is
    -- not provided, all of the input data is used to create the @Datasource@.
    --
    -- There are multiple parameters that control what data is used to create a
    -- datasource:
    --
    -- -   __@percentBegin@__
    --
    --     Use @percentBegin@ to indicate the beginning of the range of the
    --     data used to create the Datasource. If you do not include
    --     @percentBegin@ and @percentEnd@, Amazon ML includes all of the data
    --     when creating the datasource.
    --
    -- -   __@percentEnd@__
    --
    --     Use @percentEnd@ to indicate the end of the range of the data used
    --     to create the Datasource. If you do not include @percentBegin@ and
    --     @percentEnd@, Amazon ML includes all of the data when creating the
    --     datasource.
    --
    -- -   __@complement@__
    --
    --     The @complement@ parameter instructs Amazon ML to use the data that
    --     is not included in the range of @percentBegin@ to @percentEnd@ to
    --     create a datasource. The @complement@ parameter is useful if you
    --     need to create complementary datasources for training and
    --     evaluation. To create a complementary datasource, use the same
    --     values for @percentBegin@ and @percentEnd@, along with the
    --     @complement@ parameter.
    --
    --     For example, the following two datasources do not share any data,
    --     and can be used to train and evaluate a model. The first datasource
    --     has 25 percent of the data, and the second one has 75 percent of the
    --     data.
    --
    --     Datasource for evaluation:
    --     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25}}@
    --
    --     Datasource for training:
    --     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25, \"complement\":\"true\"}}@
    --
    -- -   __@strategy@__
    --
    --     To change how Amazon ML splits the data for a datasource, use the
    --     @strategy@ parameter.
    --
    --     The default value for the @strategy@ parameter is @sequential@,
    --     meaning that Amazon ML takes all of the data records between the
    --     @percentBegin@ and @percentEnd@ parameters for the datasource, in
    --     the order that the records appear in the input data.
    --
    --     The following two @DataRearrangement@ lines are examples of
    --     sequentially ordered training and evaluation datasources:
    --
    --     Datasource for evaluation:
    --     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\"}}@
    --
    --     Datasource for training:
    --     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\", \"complement\":\"true\"}}@
    --
    --     To randomly split the input data into the proportions indicated by
    --     the percentBegin and percentEnd parameters, set the @strategy@
    --     parameter to @random@ and provide a string that is used as the seed
    --     value for the random data splitting (for example, you can use the S3
    --     path to your data as the random seed string). If you choose the
    --     random split strategy, Amazon ML assigns each row of data a
    --     pseudo-random number between 0 and 100, and then selects the rows
    --     that have an assigned number between @percentBegin@ and
    --     @percentEnd@. Pseudo-random numbers are assigned using both the
    --     input seed string value and the byte offset as a seed, so changing
    --     the data results in a different split. Any existing ordering is
    --     preserved. The random splitting strategy ensures that variables in
    --     the training and evaluation data are distributed similarly. It is
    --     useful in the cases where the input data may have an implicit sort
    --     order, which would otherwise result in training and evaluation
    --     datasources containing non-similar data records.
    --
    --     The following two @DataRearrangement@ lines are examples of
    --     non-sequentially ordered training and evaluation datasources:
    --
    --     Datasource for evaluation:
    --     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\"}}@
    --
    --     Datasource for training:
    --     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\", \"complement\":\"true\"}}@
    RDSDataSpec -> Maybe Text
dataRearrangement :: Prelude.Maybe Prelude.Text,
    -- | Describes the @DatabaseName@ and @InstanceIdentifier@ of an Amazon RDS
    -- database.
    RDSDataSpec -> RDSDatabase
databaseInformation :: RDSDatabase,
    -- | The query that is used to retrieve the observation data for the
    -- @DataSource@.
    RDSDataSpec -> Text
selectSqlQuery :: Prelude.Text,
    -- | The AWS Identity and Access Management (IAM) credentials that are used
    -- connect to the Amazon RDS database.
    RDSDataSpec -> RDSDatabaseCredentials
databaseCredentials :: RDSDatabaseCredentials,
    -- | The Amazon S3 location for staging Amazon RDS data. The data retrieved
    -- from Amazon RDS using @SelectSqlQuery@ is stored in this location.
    RDSDataSpec -> Text
s3StagingLocation :: Prelude.Text,
    -- | The role (DataPipelineDefaultResourceRole) assumed by an Amazon Elastic
    -- Compute Cloud (Amazon EC2) instance to carry out the copy operation from
    -- Amazon RDS to an Amazon S3 task. For more information, see
    -- <https://docs.aws.amazon.com/datapipeline/latest/DeveloperGuide/dp-iam-roles.html Role templates>
    -- for data pipelines.
    RDSDataSpec -> Text
resourceRole :: Prelude.Text,
    -- | The role (DataPipelineDefaultRole) assumed by AWS Data Pipeline service
    -- to monitor the progress of the copy task from Amazon RDS to Amazon S3.
    -- For more information, see
    -- <https://docs.aws.amazon.com/datapipeline/latest/DeveloperGuide/dp-iam-roles.html Role templates>
    -- for data pipelines.
    RDSDataSpec -> Text
serviceRole :: Prelude.Text,
    -- | The subnet ID to be used to access a VPC-based RDS DB instance. This
    -- attribute is used by Data Pipeline to carry out the copy task from
    -- Amazon RDS to Amazon S3.
    RDSDataSpec -> Text
subnetId :: Prelude.Text,
    -- | The security group IDs to be used to access a VPC-based RDS DB instance.
    -- Ensure that there are appropriate ingress rules set up to allow access
    -- to the RDS DB instance. This attribute is used by Data Pipeline to carry
    -- out the copy operation from Amazon RDS to an Amazon S3 task.
    RDSDataSpec -> [Text]
securityGroupIds :: [Prelude.Text]
  }
  deriving (RDSDataSpec -> RDSDataSpec -> Bool
(RDSDataSpec -> RDSDataSpec -> Bool)
-> (RDSDataSpec -> RDSDataSpec -> Bool) -> Eq RDSDataSpec
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: RDSDataSpec -> RDSDataSpec -> Bool
$c/= :: RDSDataSpec -> RDSDataSpec -> Bool
== :: RDSDataSpec -> RDSDataSpec -> Bool
$c== :: RDSDataSpec -> RDSDataSpec -> Bool
Prelude.Eq, ReadPrec [RDSDataSpec]
ReadPrec RDSDataSpec
Int -> ReadS RDSDataSpec
ReadS [RDSDataSpec]
(Int -> ReadS RDSDataSpec)
-> ReadS [RDSDataSpec]
-> ReadPrec RDSDataSpec
-> ReadPrec [RDSDataSpec]
-> Read RDSDataSpec
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [RDSDataSpec]
$creadListPrec :: ReadPrec [RDSDataSpec]
readPrec :: ReadPrec RDSDataSpec
$creadPrec :: ReadPrec RDSDataSpec
readList :: ReadS [RDSDataSpec]
$creadList :: ReadS [RDSDataSpec]
readsPrec :: Int -> ReadS RDSDataSpec
$creadsPrec :: Int -> ReadS RDSDataSpec
Prelude.Read, Int -> RDSDataSpec -> ShowS
[RDSDataSpec] -> ShowS
RDSDataSpec -> String
(Int -> RDSDataSpec -> ShowS)
-> (RDSDataSpec -> String)
-> ([RDSDataSpec] -> ShowS)
-> Show RDSDataSpec
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RDSDataSpec] -> ShowS
$cshowList :: [RDSDataSpec] -> ShowS
show :: RDSDataSpec -> String
$cshow :: RDSDataSpec -> String
showsPrec :: Int -> RDSDataSpec -> ShowS
$cshowsPrec :: Int -> RDSDataSpec -> ShowS
Prelude.Show, (forall x. RDSDataSpec -> Rep RDSDataSpec x)
-> (forall x. Rep RDSDataSpec x -> RDSDataSpec)
-> Generic RDSDataSpec
forall x. Rep RDSDataSpec x -> RDSDataSpec
forall x. RDSDataSpec -> Rep RDSDataSpec x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep RDSDataSpec x -> RDSDataSpec
$cfrom :: forall x. RDSDataSpec -> Rep RDSDataSpec x
Prelude.Generic)

-- |
-- Create a value of 'RDSDataSpec' with all optional fields omitted.
--
-- Use <https://hackage.haskell.org/package/generic-lens generic-lens> or <https://hackage.haskell.org/package/optics optics> to modify other optional fields.
--
-- The following record fields are available, with the corresponding lenses provided
-- for backwards compatibility:
--
-- 'dataSchemaUri', 'rDSDataSpec_dataSchemaUri' - The Amazon S3 location of the @DataSchema@.
--
-- 'dataSchema', 'rDSDataSpec_dataSchema' - A JSON string that represents the schema for an Amazon RDS @DataSource@.
-- The @DataSchema@ defines the structure of the observation data in the
-- data file(s) referenced in the @DataSource@.
--
-- A @DataSchema@ is not required if you specify a @DataSchemaUri@
--
-- Define your @DataSchema@ as a series of key-value pairs. @attributes@
-- and @excludedVariableNames@ have an array of key-value pairs for their
-- value. Use the following format to define your @DataSchema@.
--
-- { \"version\": \"1.0\",
--
-- \"recordAnnotationFieldName\": \"F1\",
--
-- \"recordWeightFieldName\": \"F2\",
--
-- \"targetFieldName\": \"F3\",
--
-- \"dataFormat\": \"CSV\",
--
-- \"dataFileContainsHeader\": true,
--
-- \"attributes\": [
--
-- { \"fieldName\": \"F1\", \"fieldType\": \"TEXT\" }, { \"fieldName\":
-- \"F2\", \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F3\",
-- \"fieldType\": \"CATEGORICAL\" }, { \"fieldName\": \"F4\",
-- \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F5\", \"fieldType\":
-- \"CATEGORICAL\" }, { \"fieldName\": \"F6\", \"fieldType\": \"TEXT\" }, {
-- \"fieldName\": \"F7\", \"fieldType\": \"WEIGHTED_INT_SEQUENCE\" }, {
-- \"fieldName\": \"F8\", \"fieldType\": \"WEIGHTED_STRING_SEQUENCE\" } ],
--
-- \"excludedVariableNames\": [ \"F6\" ] }
--
-- 'dataRearrangement', 'rDSDataSpec_dataRearrangement' - A JSON string that represents the splitting and rearrangement processing
-- to be applied to a @DataSource@. If the @DataRearrangement@ parameter is
-- not provided, all of the input data is used to create the @Datasource@.
--
-- There are multiple parameters that control what data is used to create a
-- datasource:
--
-- -   __@percentBegin@__
--
--     Use @percentBegin@ to indicate the beginning of the range of the
--     data used to create the Datasource. If you do not include
--     @percentBegin@ and @percentEnd@, Amazon ML includes all of the data
--     when creating the datasource.
--
-- -   __@percentEnd@__
--
--     Use @percentEnd@ to indicate the end of the range of the data used
--     to create the Datasource. If you do not include @percentBegin@ and
--     @percentEnd@, Amazon ML includes all of the data when creating the
--     datasource.
--
-- -   __@complement@__
--
--     The @complement@ parameter instructs Amazon ML to use the data that
--     is not included in the range of @percentBegin@ to @percentEnd@ to
--     create a datasource. The @complement@ parameter is useful if you
--     need to create complementary datasources for training and
--     evaluation. To create a complementary datasource, use the same
--     values for @percentBegin@ and @percentEnd@, along with the
--     @complement@ parameter.
--
--     For example, the following two datasources do not share any data,
--     and can be used to train and evaluate a model. The first datasource
--     has 25 percent of the data, and the second one has 75 percent of the
--     data.
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25, \"complement\":\"true\"}}@
--
-- -   __@strategy@__
--
--     To change how Amazon ML splits the data for a datasource, use the
--     @strategy@ parameter.
--
--     The default value for the @strategy@ parameter is @sequential@,
--     meaning that Amazon ML takes all of the data records between the
--     @percentBegin@ and @percentEnd@ parameters for the datasource, in
--     the order that the records appear in the input data.
--
--     The following two @DataRearrangement@ lines are examples of
--     sequentially ordered training and evaluation datasources:
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\"}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\", \"complement\":\"true\"}}@
--
--     To randomly split the input data into the proportions indicated by
--     the percentBegin and percentEnd parameters, set the @strategy@
--     parameter to @random@ and provide a string that is used as the seed
--     value for the random data splitting (for example, you can use the S3
--     path to your data as the random seed string). If you choose the
--     random split strategy, Amazon ML assigns each row of data a
--     pseudo-random number between 0 and 100, and then selects the rows
--     that have an assigned number between @percentBegin@ and
--     @percentEnd@. Pseudo-random numbers are assigned using both the
--     input seed string value and the byte offset as a seed, so changing
--     the data results in a different split. Any existing ordering is
--     preserved. The random splitting strategy ensures that variables in
--     the training and evaluation data are distributed similarly. It is
--     useful in the cases where the input data may have an implicit sort
--     order, which would otherwise result in training and evaluation
--     datasources containing non-similar data records.
--
--     The following two @DataRearrangement@ lines are examples of
--     non-sequentially ordered training and evaluation datasources:
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\"}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\", \"complement\":\"true\"}}@
--
-- 'databaseInformation', 'rDSDataSpec_databaseInformation' - Describes the @DatabaseName@ and @InstanceIdentifier@ of an Amazon RDS
-- database.
--
-- 'selectSqlQuery', 'rDSDataSpec_selectSqlQuery' - The query that is used to retrieve the observation data for the
-- @DataSource@.
--
-- 'databaseCredentials', 'rDSDataSpec_databaseCredentials' - The AWS Identity and Access Management (IAM) credentials that are used
-- connect to the Amazon RDS database.
--
-- 's3StagingLocation', 'rDSDataSpec_s3StagingLocation' - The Amazon S3 location for staging Amazon RDS data. The data retrieved
-- from Amazon RDS using @SelectSqlQuery@ is stored in this location.
--
-- 'resourceRole', 'rDSDataSpec_resourceRole' - The role (DataPipelineDefaultResourceRole) assumed by an Amazon Elastic
-- Compute Cloud (Amazon EC2) instance to carry out the copy operation from
-- Amazon RDS to an Amazon S3 task. For more information, see
-- <https://docs.aws.amazon.com/datapipeline/latest/DeveloperGuide/dp-iam-roles.html Role templates>
-- for data pipelines.
--
-- 'serviceRole', 'rDSDataSpec_serviceRole' - The role (DataPipelineDefaultRole) assumed by AWS Data Pipeline service
-- to monitor the progress of the copy task from Amazon RDS to Amazon S3.
-- For more information, see
-- <https://docs.aws.amazon.com/datapipeline/latest/DeveloperGuide/dp-iam-roles.html Role templates>
-- for data pipelines.
--
-- 'subnetId', 'rDSDataSpec_subnetId' - The subnet ID to be used to access a VPC-based RDS DB instance. This
-- attribute is used by Data Pipeline to carry out the copy task from
-- Amazon RDS to Amazon S3.
--
-- 'securityGroupIds', 'rDSDataSpec_securityGroupIds' - The security group IDs to be used to access a VPC-based RDS DB instance.
-- Ensure that there are appropriate ingress rules set up to allow access
-- to the RDS DB instance. This attribute is used by Data Pipeline to carry
-- out the copy operation from Amazon RDS to an Amazon S3 task.
newRDSDataSpec ::
  -- | 'databaseInformation'
  RDSDatabase ->
  -- | 'selectSqlQuery'
  Prelude.Text ->
  -- | 'databaseCredentials'
  RDSDatabaseCredentials ->
  -- | 's3StagingLocation'
  Prelude.Text ->
  -- | 'resourceRole'
  Prelude.Text ->
  -- | 'serviceRole'
  Prelude.Text ->
  -- | 'subnetId'
  Prelude.Text ->
  RDSDataSpec
newRDSDataSpec :: RDSDatabase
-> Text
-> RDSDatabaseCredentials
-> Text
-> Text
-> Text
-> Text
-> RDSDataSpec
newRDSDataSpec
  RDSDatabase
pDatabaseInformation_
  Text
pSelectSqlQuery_
  RDSDatabaseCredentials
pDatabaseCredentials_
  Text
pS3StagingLocation_
  Text
pResourceRole_
  Text
pServiceRole_
  Text
pSubnetId_ =
    RDSDataSpec' :: Maybe Text
-> Maybe Text
-> Maybe Text
-> RDSDatabase
-> Text
-> RDSDatabaseCredentials
-> Text
-> Text
-> Text
-> Text
-> [Text]
-> RDSDataSpec
RDSDataSpec'
      { $sel:dataSchemaUri:RDSDataSpec' :: Maybe Text
dataSchemaUri = Maybe Text
forall a. Maybe a
Prelude.Nothing,
        $sel:dataSchema:RDSDataSpec' :: Maybe Text
dataSchema = Maybe Text
forall a. Maybe a
Prelude.Nothing,
        $sel:dataRearrangement:RDSDataSpec' :: Maybe Text
dataRearrangement = Maybe Text
forall a. Maybe a
Prelude.Nothing,
        $sel:databaseInformation:RDSDataSpec' :: RDSDatabase
databaseInformation = RDSDatabase
pDatabaseInformation_,
        $sel:selectSqlQuery:RDSDataSpec' :: Text
selectSqlQuery = Text
pSelectSqlQuery_,
        $sel:databaseCredentials:RDSDataSpec' :: RDSDatabaseCredentials
databaseCredentials = RDSDatabaseCredentials
pDatabaseCredentials_,
        $sel:s3StagingLocation:RDSDataSpec' :: Text
s3StagingLocation = Text
pS3StagingLocation_,
        $sel:resourceRole:RDSDataSpec' :: Text
resourceRole = Text
pResourceRole_,
        $sel:serviceRole:RDSDataSpec' :: Text
serviceRole = Text
pServiceRole_,
        $sel:subnetId:RDSDataSpec' :: Text
subnetId = Text
pSubnetId_,
        $sel:securityGroupIds:RDSDataSpec' :: [Text]
securityGroupIds = [Text]
forall a. Monoid a => a
Prelude.mempty
      }

-- | The Amazon S3 location of the @DataSchema@.
rDSDataSpec_dataSchemaUri :: Lens.Lens' RDSDataSpec (Prelude.Maybe Prelude.Text)
rDSDataSpec_dataSchemaUri :: (Maybe Text -> f (Maybe Text)) -> RDSDataSpec -> f RDSDataSpec
rDSDataSpec_dataSchemaUri = (RDSDataSpec -> Maybe Text)
-> (RDSDataSpec -> Maybe Text -> RDSDataSpec)
-> Lens RDSDataSpec RDSDataSpec (Maybe Text) (Maybe Text)
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RDSDataSpec' {Maybe Text
dataSchemaUri :: Maybe Text
$sel:dataSchemaUri:RDSDataSpec' :: RDSDataSpec -> Maybe Text
dataSchemaUri} -> Maybe Text
dataSchemaUri) (\s :: RDSDataSpec
s@RDSDataSpec' {} Maybe Text
a -> RDSDataSpec
s {$sel:dataSchemaUri:RDSDataSpec' :: Maybe Text
dataSchemaUri = Maybe Text
a} :: RDSDataSpec)

-- | A JSON string that represents the schema for an Amazon RDS @DataSource@.
-- The @DataSchema@ defines the structure of the observation data in the
-- data file(s) referenced in the @DataSource@.
--
-- A @DataSchema@ is not required if you specify a @DataSchemaUri@
--
-- Define your @DataSchema@ as a series of key-value pairs. @attributes@
-- and @excludedVariableNames@ have an array of key-value pairs for their
-- value. Use the following format to define your @DataSchema@.
--
-- { \"version\": \"1.0\",
--
-- \"recordAnnotationFieldName\": \"F1\",
--
-- \"recordWeightFieldName\": \"F2\",
--
-- \"targetFieldName\": \"F3\",
--
-- \"dataFormat\": \"CSV\",
--
-- \"dataFileContainsHeader\": true,
--
-- \"attributes\": [
--
-- { \"fieldName\": \"F1\", \"fieldType\": \"TEXT\" }, { \"fieldName\":
-- \"F2\", \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F3\",
-- \"fieldType\": \"CATEGORICAL\" }, { \"fieldName\": \"F4\",
-- \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F5\", \"fieldType\":
-- \"CATEGORICAL\" }, { \"fieldName\": \"F6\", \"fieldType\": \"TEXT\" }, {
-- \"fieldName\": \"F7\", \"fieldType\": \"WEIGHTED_INT_SEQUENCE\" }, {
-- \"fieldName\": \"F8\", \"fieldType\": \"WEIGHTED_STRING_SEQUENCE\" } ],
--
-- \"excludedVariableNames\": [ \"F6\" ] }
rDSDataSpec_dataSchema :: Lens.Lens' RDSDataSpec (Prelude.Maybe Prelude.Text)
rDSDataSpec_dataSchema :: (Maybe Text -> f (Maybe Text)) -> RDSDataSpec -> f RDSDataSpec
rDSDataSpec_dataSchema = (RDSDataSpec -> Maybe Text)
-> (RDSDataSpec -> Maybe Text -> RDSDataSpec)
-> Lens RDSDataSpec RDSDataSpec (Maybe Text) (Maybe Text)
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RDSDataSpec' {Maybe Text
dataSchema :: Maybe Text
$sel:dataSchema:RDSDataSpec' :: RDSDataSpec -> Maybe Text
dataSchema} -> Maybe Text
dataSchema) (\s :: RDSDataSpec
s@RDSDataSpec' {} Maybe Text
a -> RDSDataSpec
s {$sel:dataSchema:RDSDataSpec' :: Maybe Text
dataSchema = Maybe Text
a} :: RDSDataSpec)

-- | A JSON string that represents the splitting and rearrangement processing
-- to be applied to a @DataSource@. If the @DataRearrangement@ parameter is
-- not provided, all of the input data is used to create the @Datasource@.
--
-- There are multiple parameters that control what data is used to create a
-- datasource:
--
-- -   __@percentBegin@__
--
--     Use @percentBegin@ to indicate the beginning of the range of the
--     data used to create the Datasource. If you do not include
--     @percentBegin@ and @percentEnd@, Amazon ML includes all of the data
--     when creating the datasource.
--
-- -   __@percentEnd@__
--
--     Use @percentEnd@ to indicate the end of the range of the data used
--     to create the Datasource. If you do not include @percentBegin@ and
--     @percentEnd@, Amazon ML includes all of the data when creating the
--     datasource.
--
-- -   __@complement@__
--
--     The @complement@ parameter instructs Amazon ML to use the data that
--     is not included in the range of @percentBegin@ to @percentEnd@ to
--     create a datasource. The @complement@ parameter is useful if you
--     need to create complementary datasources for training and
--     evaluation. To create a complementary datasource, use the same
--     values for @percentBegin@ and @percentEnd@, along with the
--     @complement@ parameter.
--
--     For example, the following two datasources do not share any data,
--     and can be used to train and evaluate a model. The first datasource
--     has 25 percent of the data, and the second one has 75 percent of the
--     data.
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25, \"complement\":\"true\"}}@
--
-- -   __@strategy@__
--
--     To change how Amazon ML splits the data for a datasource, use the
--     @strategy@ parameter.
--
--     The default value for the @strategy@ parameter is @sequential@,
--     meaning that Amazon ML takes all of the data records between the
--     @percentBegin@ and @percentEnd@ parameters for the datasource, in
--     the order that the records appear in the input data.
--
--     The following two @DataRearrangement@ lines are examples of
--     sequentially ordered training and evaluation datasources:
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\"}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\", \"complement\":\"true\"}}@
--
--     To randomly split the input data into the proportions indicated by
--     the percentBegin and percentEnd parameters, set the @strategy@
--     parameter to @random@ and provide a string that is used as the seed
--     value for the random data splitting (for example, you can use the S3
--     path to your data as the random seed string). If you choose the
--     random split strategy, Amazon ML assigns each row of data a
--     pseudo-random number between 0 and 100, and then selects the rows
--     that have an assigned number between @percentBegin@ and
--     @percentEnd@. Pseudo-random numbers are assigned using both the
--     input seed string value and the byte offset as a seed, so changing
--     the data results in a different split. Any existing ordering is
--     preserved. The random splitting strategy ensures that variables in
--     the training and evaluation data are distributed similarly. It is
--     useful in the cases where the input data may have an implicit sort
--     order, which would otherwise result in training and evaluation
--     datasources containing non-similar data records.
--
--     The following two @DataRearrangement@ lines are examples of
--     non-sequentially ordered training and evaluation datasources:
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\"}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\", \"complement\":\"true\"}}@
rDSDataSpec_dataRearrangement :: Lens.Lens' RDSDataSpec (Prelude.Maybe Prelude.Text)
rDSDataSpec_dataRearrangement :: (Maybe Text -> f (Maybe Text)) -> RDSDataSpec -> f RDSDataSpec
rDSDataSpec_dataRearrangement = (RDSDataSpec -> Maybe Text)
-> (RDSDataSpec -> Maybe Text -> RDSDataSpec)
-> Lens RDSDataSpec RDSDataSpec (Maybe Text) (Maybe Text)
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RDSDataSpec' {Maybe Text
dataRearrangement :: Maybe Text
$sel:dataRearrangement:RDSDataSpec' :: RDSDataSpec -> Maybe Text
dataRearrangement} -> Maybe Text
dataRearrangement) (\s :: RDSDataSpec
s@RDSDataSpec' {} Maybe Text
a -> RDSDataSpec
s {$sel:dataRearrangement:RDSDataSpec' :: Maybe Text
dataRearrangement = Maybe Text
a} :: RDSDataSpec)

-- | Describes the @DatabaseName@ and @InstanceIdentifier@ of an Amazon RDS
-- database.
rDSDataSpec_databaseInformation :: Lens.Lens' RDSDataSpec RDSDatabase
rDSDataSpec_databaseInformation :: (RDSDatabase -> f RDSDatabase) -> RDSDataSpec -> f RDSDataSpec
rDSDataSpec_databaseInformation = (RDSDataSpec -> RDSDatabase)
-> (RDSDataSpec -> RDSDatabase -> RDSDataSpec)
-> Lens RDSDataSpec RDSDataSpec RDSDatabase RDSDatabase
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RDSDataSpec' {RDSDatabase
databaseInformation :: RDSDatabase
$sel:databaseInformation:RDSDataSpec' :: RDSDataSpec -> RDSDatabase
databaseInformation} -> RDSDatabase
databaseInformation) (\s :: RDSDataSpec
s@RDSDataSpec' {} RDSDatabase
a -> RDSDataSpec
s {$sel:databaseInformation:RDSDataSpec' :: RDSDatabase
databaseInformation = RDSDatabase
a} :: RDSDataSpec)

-- | The query that is used to retrieve the observation data for the
-- @DataSource@.
rDSDataSpec_selectSqlQuery :: Lens.Lens' RDSDataSpec Prelude.Text
rDSDataSpec_selectSqlQuery :: (Text -> f Text) -> RDSDataSpec -> f RDSDataSpec
rDSDataSpec_selectSqlQuery = (RDSDataSpec -> Text)
-> (RDSDataSpec -> Text -> RDSDataSpec)
-> Lens RDSDataSpec RDSDataSpec Text Text
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RDSDataSpec' {Text
selectSqlQuery :: Text
$sel:selectSqlQuery:RDSDataSpec' :: RDSDataSpec -> Text
selectSqlQuery} -> Text
selectSqlQuery) (\s :: RDSDataSpec
s@RDSDataSpec' {} Text
a -> RDSDataSpec
s {$sel:selectSqlQuery:RDSDataSpec' :: Text
selectSqlQuery = Text
a} :: RDSDataSpec)

-- | The AWS Identity and Access Management (IAM) credentials that are used
-- connect to the Amazon RDS database.
rDSDataSpec_databaseCredentials :: Lens.Lens' RDSDataSpec RDSDatabaseCredentials
rDSDataSpec_databaseCredentials :: (RDSDatabaseCredentials -> f RDSDatabaseCredentials)
-> RDSDataSpec -> f RDSDataSpec
rDSDataSpec_databaseCredentials = (RDSDataSpec -> RDSDatabaseCredentials)
-> (RDSDataSpec -> RDSDatabaseCredentials -> RDSDataSpec)
-> Lens
     RDSDataSpec
     RDSDataSpec
     RDSDatabaseCredentials
     RDSDatabaseCredentials
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RDSDataSpec' {RDSDatabaseCredentials
databaseCredentials :: RDSDatabaseCredentials
$sel:databaseCredentials:RDSDataSpec' :: RDSDataSpec -> RDSDatabaseCredentials
databaseCredentials} -> RDSDatabaseCredentials
databaseCredentials) (\s :: RDSDataSpec
s@RDSDataSpec' {} RDSDatabaseCredentials
a -> RDSDataSpec
s {$sel:databaseCredentials:RDSDataSpec' :: RDSDatabaseCredentials
databaseCredentials = RDSDatabaseCredentials
a} :: RDSDataSpec)

-- | The Amazon S3 location for staging Amazon RDS data. The data retrieved
-- from Amazon RDS using @SelectSqlQuery@ is stored in this location.
rDSDataSpec_s3StagingLocation :: Lens.Lens' RDSDataSpec Prelude.Text
rDSDataSpec_s3StagingLocation :: (Text -> f Text) -> RDSDataSpec -> f RDSDataSpec
rDSDataSpec_s3StagingLocation = (RDSDataSpec -> Text)
-> (RDSDataSpec -> Text -> RDSDataSpec)
-> Lens RDSDataSpec RDSDataSpec Text Text
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RDSDataSpec' {Text
s3StagingLocation :: Text
$sel:s3StagingLocation:RDSDataSpec' :: RDSDataSpec -> Text
s3StagingLocation} -> Text
s3StagingLocation) (\s :: RDSDataSpec
s@RDSDataSpec' {} Text
a -> RDSDataSpec
s {$sel:s3StagingLocation:RDSDataSpec' :: Text
s3StagingLocation = Text
a} :: RDSDataSpec)

-- | The role (DataPipelineDefaultResourceRole) assumed by an Amazon Elastic
-- Compute Cloud (Amazon EC2) instance to carry out the copy operation from
-- Amazon RDS to an Amazon S3 task. For more information, see
-- <https://docs.aws.amazon.com/datapipeline/latest/DeveloperGuide/dp-iam-roles.html Role templates>
-- for data pipelines.
rDSDataSpec_resourceRole :: Lens.Lens' RDSDataSpec Prelude.Text
rDSDataSpec_resourceRole :: (Text -> f Text) -> RDSDataSpec -> f RDSDataSpec
rDSDataSpec_resourceRole = (RDSDataSpec -> Text)
-> (RDSDataSpec -> Text -> RDSDataSpec)
-> Lens RDSDataSpec RDSDataSpec Text Text
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RDSDataSpec' {Text
resourceRole :: Text
$sel:resourceRole:RDSDataSpec' :: RDSDataSpec -> Text
resourceRole} -> Text
resourceRole) (\s :: RDSDataSpec
s@RDSDataSpec' {} Text
a -> RDSDataSpec
s {$sel:resourceRole:RDSDataSpec' :: Text
resourceRole = Text
a} :: RDSDataSpec)

-- | The role (DataPipelineDefaultRole) assumed by AWS Data Pipeline service
-- to monitor the progress of the copy task from Amazon RDS to Amazon S3.
-- For more information, see
-- <https://docs.aws.amazon.com/datapipeline/latest/DeveloperGuide/dp-iam-roles.html Role templates>
-- for data pipelines.
rDSDataSpec_serviceRole :: Lens.Lens' RDSDataSpec Prelude.Text
rDSDataSpec_serviceRole :: (Text -> f Text) -> RDSDataSpec -> f RDSDataSpec
rDSDataSpec_serviceRole = (RDSDataSpec -> Text)
-> (RDSDataSpec -> Text -> RDSDataSpec)
-> Lens RDSDataSpec RDSDataSpec Text Text
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RDSDataSpec' {Text
serviceRole :: Text
$sel:serviceRole:RDSDataSpec' :: RDSDataSpec -> Text
serviceRole} -> Text
serviceRole) (\s :: RDSDataSpec
s@RDSDataSpec' {} Text
a -> RDSDataSpec
s {$sel:serviceRole:RDSDataSpec' :: Text
serviceRole = Text
a} :: RDSDataSpec)

-- | The subnet ID to be used to access a VPC-based RDS DB instance. This
-- attribute is used by Data Pipeline to carry out the copy task from
-- Amazon RDS to Amazon S3.
rDSDataSpec_subnetId :: Lens.Lens' RDSDataSpec Prelude.Text
rDSDataSpec_subnetId :: (Text -> f Text) -> RDSDataSpec -> f RDSDataSpec
rDSDataSpec_subnetId = (RDSDataSpec -> Text)
-> (RDSDataSpec -> Text -> RDSDataSpec)
-> Lens RDSDataSpec RDSDataSpec Text Text
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RDSDataSpec' {Text
subnetId :: Text
$sel:subnetId:RDSDataSpec' :: RDSDataSpec -> Text
subnetId} -> Text
subnetId) (\s :: RDSDataSpec
s@RDSDataSpec' {} Text
a -> RDSDataSpec
s {$sel:subnetId:RDSDataSpec' :: Text
subnetId = Text
a} :: RDSDataSpec)

-- | The security group IDs to be used to access a VPC-based RDS DB instance.
-- Ensure that there are appropriate ingress rules set up to allow access
-- to the RDS DB instance. This attribute is used by Data Pipeline to carry
-- out the copy operation from Amazon RDS to an Amazon S3 task.
rDSDataSpec_securityGroupIds :: Lens.Lens' RDSDataSpec [Prelude.Text]
rDSDataSpec_securityGroupIds :: ([Text] -> f [Text]) -> RDSDataSpec -> f RDSDataSpec
rDSDataSpec_securityGroupIds = (RDSDataSpec -> [Text])
-> (RDSDataSpec -> [Text] -> RDSDataSpec)
-> Lens RDSDataSpec RDSDataSpec [Text] [Text]
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RDSDataSpec' {[Text]
securityGroupIds :: [Text]
$sel:securityGroupIds:RDSDataSpec' :: RDSDataSpec -> [Text]
securityGroupIds} -> [Text]
securityGroupIds) (\s :: RDSDataSpec
s@RDSDataSpec' {} [Text]
a -> RDSDataSpec
s {$sel:securityGroupIds:RDSDataSpec' :: [Text]
securityGroupIds = [Text]
a} :: RDSDataSpec) (([Text] -> f [Text]) -> RDSDataSpec -> f RDSDataSpec)
-> (([Text] -> f [Text]) -> [Text] -> f [Text])
-> ([Text] -> f [Text])
-> RDSDataSpec
-> f RDSDataSpec
forall b c a. (b -> c) -> (a -> b) -> a -> c
Prelude.. ([Text] -> f [Text]) -> [Text] -> f [Text]
forall s t a b. (Coercible s a, Coercible t b) => Iso s t a b
Lens.coerced

instance Prelude.Hashable RDSDataSpec

instance Prelude.NFData RDSDataSpec

instance Core.ToJSON RDSDataSpec where
  toJSON :: RDSDataSpec -> Value
toJSON RDSDataSpec' {[Text]
Maybe Text
Text
RDSDatabase
RDSDatabaseCredentials
securityGroupIds :: [Text]
subnetId :: Text
serviceRole :: Text
resourceRole :: Text
s3StagingLocation :: Text
databaseCredentials :: RDSDatabaseCredentials
selectSqlQuery :: Text
databaseInformation :: RDSDatabase
dataRearrangement :: Maybe Text
dataSchema :: Maybe Text
dataSchemaUri :: Maybe Text
$sel:securityGroupIds:RDSDataSpec' :: RDSDataSpec -> [Text]
$sel:subnetId:RDSDataSpec' :: RDSDataSpec -> Text
$sel:serviceRole:RDSDataSpec' :: RDSDataSpec -> Text
$sel:resourceRole:RDSDataSpec' :: RDSDataSpec -> Text
$sel:s3StagingLocation:RDSDataSpec' :: RDSDataSpec -> Text
$sel:databaseCredentials:RDSDataSpec' :: RDSDataSpec -> RDSDatabaseCredentials
$sel:selectSqlQuery:RDSDataSpec' :: RDSDataSpec -> Text
$sel:databaseInformation:RDSDataSpec' :: RDSDataSpec -> RDSDatabase
$sel:dataRearrangement:RDSDataSpec' :: RDSDataSpec -> Maybe Text
$sel:dataSchema:RDSDataSpec' :: RDSDataSpec -> Maybe Text
$sel:dataSchemaUri:RDSDataSpec' :: RDSDataSpec -> Maybe Text
..} =
    [Pair] -> Value
Core.object
      ( [Maybe Pair] -> [Pair]
forall a. [Maybe a] -> [a]
Prelude.catMaybes
          [ (Text
"DataSchemaUri" Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..=) (Text -> Pair) -> Maybe Text -> Maybe Pair
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Prelude.<$> Maybe Text
dataSchemaUri,
            (Text
"DataSchema" Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..=) (Text -> Pair) -> Maybe Text -> Maybe Pair
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Prelude.<$> Maybe Text
dataSchema,
            (Text
"DataRearrangement" Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..=)
              (Text -> Pair) -> Maybe Text -> Maybe Pair
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Prelude.<$> Maybe Text
dataRearrangement,
            Pair -> Maybe Pair
forall a. a -> Maybe a
Prelude.Just
              (Text
"DatabaseInformation" Text -> RDSDatabase -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..= RDSDatabase
databaseInformation),
            Pair -> Maybe Pair
forall a. a -> Maybe a
Prelude.Just
              (Text
"SelectSqlQuery" Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..= Text
selectSqlQuery),
            Pair -> Maybe Pair
forall a. a -> Maybe a
Prelude.Just
              (Text
"DatabaseCredentials" Text -> RDSDatabaseCredentials -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..= RDSDatabaseCredentials
databaseCredentials),
            Pair -> Maybe Pair
forall a. a -> Maybe a
Prelude.Just
              (Text
"S3StagingLocation" Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..= Text
s3StagingLocation),
            Pair -> Maybe Pair
forall a. a -> Maybe a
Prelude.Just (Text
"ResourceRole" Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..= Text
resourceRole),
            Pair -> Maybe Pair
forall a. a -> Maybe a
Prelude.Just (Text
"ServiceRole" Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..= Text
serviceRole),
            Pair -> Maybe Pair
forall a. a -> Maybe a
Prelude.Just (Text
"SubnetId" Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..= Text
subnetId),
            Pair -> Maybe Pair
forall a. a -> Maybe a
Prelude.Just
              (Text
"SecurityGroupIds" Text -> [Text] -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..= [Text]
securityGroupIds)
          ]
      )