{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE StrictData #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# OPTIONS_GHC -fno-warn-unused-imports #-}
{-# OPTIONS_GHC -fno-warn-unused-matches #-}

-- Derived from AWS service descriptions, licensed under Apache 2.0.

-- |
-- Module      : Amazonka.MachineLearning.Types.RedshiftDataSpec
-- Copyright   : (c) 2013-2021 Brendan Hay
-- License     : Mozilla Public License, v. 2.0.
-- Maintainer  : Brendan Hay <brendan.g.hay+amazonka@gmail.com>
-- Stability   : auto-generated
-- Portability : non-portable (GHC extensions)
module Amazonka.MachineLearning.Types.RedshiftDataSpec where

import qualified Amazonka.Core as Core
import qualified Amazonka.Lens as Lens
import Amazonka.MachineLearning.Types.RedshiftDatabase
import Amazonka.MachineLearning.Types.RedshiftDatabaseCredentials
import qualified Amazonka.Prelude as Prelude

-- | Describes the data specification of an Amazon Redshift @DataSource@.
--
-- /See:/ 'newRedshiftDataSpec' smart constructor.
data RedshiftDataSpec = RedshiftDataSpec'
  { -- | Describes the schema location for an Amazon Redshift @DataSource@.
    RedshiftDataSpec -> Maybe Text
dataSchemaUri :: Prelude.Maybe Prelude.Text,
    -- | A JSON string that represents the schema for an Amazon Redshift
    -- @DataSource@. The @DataSchema@ defines the structure of the observation
    -- data in the data file(s) referenced in the @DataSource@.
    --
    -- A @DataSchema@ is not required if you specify a @DataSchemaUri@.
    --
    -- Define your @DataSchema@ as a series of key-value pairs. @attributes@
    -- and @excludedVariableNames@ have an array of key-value pairs for their
    -- value. Use the following format to define your @DataSchema@.
    --
    -- { \"version\": \"1.0\",
    --
    -- \"recordAnnotationFieldName\": \"F1\",
    --
    -- \"recordWeightFieldName\": \"F2\",
    --
    -- \"targetFieldName\": \"F3\",
    --
    -- \"dataFormat\": \"CSV\",
    --
    -- \"dataFileContainsHeader\": true,
    --
    -- \"attributes\": [
    --
    -- { \"fieldName\": \"F1\", \"fieldType\": \"TEXT\" }, { \"fieldName\":
    -- \"F2\", \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F3\",
    -- \"fieldType\": \"CATEGORICAL\" }, { \"fieldName\": \"F4\",
    -- \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F5\", \"fieldType\":
    -- \"CATEGORICAL\" }, { \"fieldName\": \"F6\", \"fieldType\": \"TEXT\" }, {
    -- \"fieldName\": \"F7\", \"fieldType\": \"WEIGHTED_INT_SEQUENCE\" }, {
    -- \"fieldName\": \"F8\", \"fieldType\": \"WEIGHTED_STRING_SEQUENCE\" } ],
    --
    -- \"excludedVariableNames\": [ \"F6\" ] }
    RedshiftDataSpec -> Maybe Text
dataSchema :: Prelude.Maybe Prelude.Text,
    -- | A JSON string that represents the splitting and rearrangement processing
    -- to be applied to a @DataSource@. If the @DataRearrangement@ parameter is
    -- not provided, all of the input data is used to create the @Datasource@.
    --
    -- There are multiple parameters that control what data is used to create a
    -- datasource:
    --
    -- -   __@percentBegin@__
    --
    --     Use @percentBegin@ to indicate the beginning of the range of the
    --     data used to create the Datasource. If you do not include
    --     @percentBegin@ and @percentEnd@, Amazon ML includes all of the data
    --     when creating the datasource.
    --
    -- -   __@percentEnd@__
    --
    --     Use @percentEnd@ to indicate the end of the range of the data used
    --     to create the Datasource. If you do not include @percentBegin@ and
    --     @percentEnd@, Amazon ML includes all of the data when creating the
    --     datasource.
    --
    -- -   __@complement@__
    --
    --     The @complement@ parameter instructs Amazon ML to use the data that
    --     is not included in the range of @percentBegin@ to @percentEnd@ to
    --     create a datasource. The @complement@ parameter is useful if you
    --     need to create complementary datasources for training and
    --     evaluation. To create a complementary datasource, use the same
    --     values for @percentBegin@ and @percentEnd@, along with the
    --     @complement@ parameter.
    --
    --     For example, the following two datasources do not share any data,
    --     and can be used to train and evaluate a model. The first datasource
    --     has 25 percent of the data, and the second one has 75 percent of the
    --     data.
    --
    --     Datasource for evaluation:
    --     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25}}@
    --
    --     Datasource for training:
    --     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25, \"complement\":\"true\"}}@
    --
    -- -   __@strategy@__
    --
    --     To change how Amazon ML splits the data for a datasource, use the
    --     @strategy@ parameter.
    --
    --     The default value for the @strategy@ parameter is @sequential@,
    --     meaning that Amazon ML takes all of the data records between the
    --     @percentBegin@ and @percentEnd@ parameters for the datasource, in
    --     the order that the records appear in the input data.
    --
    --     The following two @DataRearrangement@ lines are examples of
    --     sequentially ordered training and evaluation datasources:
    --
    --     Datasource for evaluation:
    --     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\"}}@
    --
    --     Datasource for training:
    --     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\", \"complement\":\"true\"}}@
    --
    --     To randomly split the input data into the proportions indicated by
    --     the percentBegin and percentEnd parameters, set the @strategy@
    --     parameter to @random@ and provide a string that is used as the seed
    --     value for the random data splitting (for example, you can use the S3
    --     path to your data as the random seed string). If you choose the
    --     random split strategy, Amazon ML assigns each row of data a
    --     pseudo-random number between 0 and 100, and then selects the rows
    --     that have an assigned number between @percentBegin@ and
    --     @percentEnd@. Pseudo-random numbers are assigned using both the
    --     input seed string value and the byte offset as a seed, so changing
    --     the data results in a different split. Any existing ordering is
    --     preserved. The random splitting strategy ensures that variables in
    --     the training and evaluation data are distributed similarly. It is
    --     useful in the cases where the input data may have an implicit sort
    --     order, which would otherwise result in training and evaluation
    --     datasources containing non-similar data records.
    --
    --     The following two @DataRearrangement@ lines are examples of
    --     non-sequentially ordered training and evaluation datasources:
    --
    --     Datasource for evaluation:
    --     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\"}}@
    --
    --     Datasource for training:
    --     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\", \"complement\":\"true\"}}@
    RedshiftDataSpec -> Maybe Text
dataRearrangement :: Prelude.Maybe Prelude.Text,
    -- | Describes the @DatabaseName@ and @ClusterIdentifier@ for an Amazon
    -- Redshift @DataSource@.
    RedshiftDataSpec -> RedshiftDatabase
databaseInformation :: RedshiftDatabase,
    -- | Describes the SQL Query to execute on an Amazon Redshift database for an
    -- Amazon Redshift @DataSource@.
    RedshiftDataSpec -> Text
selectSqlQuery :: Prelude.Text,
    -- | Describes AWS Identity and Access Management (IAM) credentials that are
    -- used connect to the Amazon Redshift database.
    RedshiftDataSpec -> RedshiftDatabaseCredentials
databaseCredentials :: RedshiftDatabaseCredentials,
    -- | Describes an Amazon S3 location to store the result set of the
    -- @SelectSqlQuery@ query.
    RedshiftDataSpec -> Text
s3StagingLocation :: Prelude.Text
  }
  deriving (RedshiftDataSpec -> RedshiftDataSpec -> Bool
(RedshiftDataSpec -> RedshiftDataSpec -> Bool)
-> (RedshiftDataSpec -> RedshiftDataSpec -> Bool)
-> Eq RedshiftDataSpec
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: RedshiftDataSpec -> RedshiftDataSpec -> Bool
$c/= :: RedshiftDataSpec -> RedshiftDataSpec -> Bool
== :: RedshiftDataSpec -> RedshiftDataSpec -> Bool
$c== :: RedshiftDataSpec -> RedshiftDataSpec -> Bool
Prelude.Eq, ReadPrec [RedshiftDataSpec]
ReadPrec RedshiftDataSpec
Int -> ReadS RedshiftDataSpec
ReadS [RedshiftDataSpec]
(Int -> ReadS RedshiftDataSpec)
-> ReadS [RedshiftDataSpec]
-> ReadPrec RedshiftDataSpec
-> ReadPrec [RedshiftDataSpec]
-> Read RedshiftDataSpec
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [RedshiftDataSpec]
$creadListPrec :: ReadPrec [RedshiftDataSpec]
readPrec :: ReadPrec RedshiftDataSpec
$creadPrec :: ReadPrec RedshiftDataSpec
readList :: ReadS [RedshiftDataSpec]
$creadList :: ReadS [RedshiftDataSpec]
readsPrec :: Int -> ReadS RedshiftDataSpec
$creadsPrec :: Int -> ReadS RedshiftDataSpec
Prelude.Read, Int -> RedshiftDataSpec -> ShowS
[RedshiftDataSpec] -> ShowS
RedshiftDataSpec -> String
(Int -> RedshiftDataSpec -> ShowS)
-> (RedshiftDataSpec -> String)
-> ([RedshiftDataSpec] -> ShowS)
-> Show RedshiftDataSpec
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RedshiftDataSpec] -> ShowS
$cshowList :: [RedshiftDataSpec] -> ShowS
show :: RedshiftDataSpec -> String
$cshow :: RedshiftDataSpec -> String
showsPrec :: Int -> RedshiftDataSpec -> ShowS
$cshowsPrec :: Int -> RedshiftDataSpec -> ShowS
Prelude.Show, (forall x. RedshiftDataSpec -> Rep RedshiftDataSpec x)
-> (forall x. Rep RedshiftDataSpec x -> RedshiftDataSpec)
-> Generic RedshiftDataSpec
forall x. Rep RedshiftDataSpec x -> RedshiftDataSpec
forall x. RedshiftDataSpec -> Rep RedshiftDataSpec x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep RedshiftDataSpec x -> RedshiftDataSpec
$cfrom :: forall x. RedshiftDataSpec -> Rep RedshiftDataSpec x
Prelude.Generic)

-- |
-- Create a value of 'RedshiftDataSpec' with all optional fields omitted.
--
-- Use <https://hackage.haskell.org/package/generic-lens generic-lens> or <https://hackage.haskell.org/package/optics optics> to modify other optional fields.
--
-- The following record fields are available, with the corresponding lenses provided
-- for backwards compatibility:
--
-- 'dataSchemaUri', 'redshiftDataSpec_dataSchemaUri' - Describes the schema location for an Amazon Redshift @DataSource@.
--
-- 'dataSchema', 'redshiftDataSpec_dataSchema' - A JSON string that represents the schema for an Amazon Redshift
-- @DataSource@. The @DataSchema@ defines the structure of the observation
-- data in the data file(s) referenced in the @DataSource@.
--
-- A @DataSchema@ is not required if you specify a @DataSchemaUri@.
--
-- Define your @DataSchema@ as a series of key-value pairs. @attributes@
-- and @excludedVariableNames@ have an array of key-value pairs for their
-- value. Use the following format to define your @DataSchema@.
--
-- { \"version\": \"1.0\",
--
-- \"recordAnnotationFieldName\": \"F1\",
--
-- \"recordWeightFieldName\": \"F2\",
--
-- \"targetFieldName\": \"F3\",
--
-- \"dataFormat\": \"CSV\",
--
-- \"dataFileContainsHeader\": true,
--
-- \"attributes\": [
--
-- { \"fieldName\": \"F1\", \"fieldType\": \"TEXT\" }, { \"fieldName\":
-- \"F2\", \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F3\",
-- \"fieldType\": \"CATEGORICAL\" }, { \"fieldName\": \"F4\",
-- \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F5\", \"fieldType\":
-- \"CATEGORICAL\" }, { \"fieldName\": \"F6\", \"fieldType\": \"TEXT\" }, {
-- \"fieldName\": \"F7\", \"fieldType\": \"WEIGHTED_INT_SEQUENCE\" }, {
-- \"fieldName\": \"F8\", \"fieldType\": \"WEIGHTED_STRING_SEQUENCE\" } ],
--
-- \"excludedVariableNames\": [ \"F6\" ] }
--
-- 'dataRearrangement', 'redshiftDataSpec_dataRearrangement' - A JSON string that represents the splitting and rearrangement processing
-- to be applied to a @DataSource@. If the @DataRearrangement@ parameter is
-- not provided, all of the input data is used to create the @Datasource@.
--
-- There are multiple parameters that control what data is used to create a
-- datasource:
--
-- -   __@percentBegin@__
--
--     Use @percentBegin@ to indicate the beginning of the range of the
--     data used to create the Datasource. If you do not include
--     @percentBegin@ and @percentEnd@, Amazon ML includes all of the data
--     when creating the datasource.
--
-- -   __@percentEnd@__
--
--     Use @percentEnd@ to indicate the end of the range of the data used
--     to create the Datasource. If you do not include @percentBegin@ and
--     @percentEnd@, Amazon ML includes all of the data when creating the
--     datasource.
--
-- -   __@complement@__
--
--     The @complement@ parameter instructs Amazon ML to use the data that
--     is not included in the range of @percentBegin@ to @percentEnd@ to
--     create a datasource. The @complement@ parameter is useful if you
--     need to create complementary datasources for training and
--     evaluation. To create a complementary datasource, use the same
--     values for @percentBegin@ and @percentEnd@, along with the
--     @complement@ parameter.
--
--     For example, the following two datasources do not share any data,
--     and can be used to train and evaluate a model. The first datasource
--     has 25 percent of the data, and the second one has 75 percent of the
--     data.
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25, \"complement\":\"true\"}}@
--
-- -   __@strategy@__
--
--     To change how Amazon ML splits the data for a datasource, use the
--     @strategy@ parameter.
--
--     The default value for the @strategy@ parameter is @sequential@,
--     meaning that Amazon ML takes all of the data records between the
--     @percentBegin@ and @percentEnd@ parameters for the datasource, in
--     the order that the records appear in the input data.
--
--     The following two @DataRearrangement@ lines are examples of
--     sequentially ordered training and evaluation datasources:
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\"}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\", \"complement\":\"true\"}}@
--
--     To randomly split the input data into the proportions indicated by
--     the percentBegin and percentEnd parameters, set the @strategy@
--     parameter to @random@ and provide a string that is used as the seed
--     value for the random data splitting (for example, you can use the S3
--     path to your data as the random seed string). If you choose the
--     random split strategy, Amazon ML assigns each row of data a
--     pseudo-random number between 0 and 100, and then selects the rows
--     that have an assigned number between @percentBegin@ and
--     @percentEnd@. Pseudo-random numbers are assigned using both the
--     input seed string value and the byte offset as a seed, so changing
--     the data results in a different split. Any existing ordering is
--     preserved. The random splitting strategy ensures that variables in
--     the training and evaluation data are distributed similarly. It is
--     useful in the cases where the input data may have an implicit sort
--     order, which would otherwise result in training and evaluation
--     datasources containing non-similar data records.
--
--     The following two @DataRearrangement@ lines are examples of
--     non-sequentially ordered training and evaluation datasources:
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\"}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\", \"complement\":\"true\"}}@
--
-- 'databaseInformation', 'redshiftDataSpec_databaseInformation' - Describes the @DatabaseName@ and @ClusterIdentifier@ for an Amazon
-- Redshift @DataSource@.
--
-- 'selectSqlQuery', 'redshiftDataSpec_selectSqlQuery' - Describes the SQL Query to execute on an Amazon Redshift database for an
-- Amazon Redshift @DataSource@.
--
-- 'databaseCredentials', 'redshiftDataSpec_databaseCredentials' - Describes AWS Identity and Access Management (IAM) credentials that are
-- used connect to the Amazon Redshift database.
--
-- 's3StagingLocation', 'redshiftDataSpec_s3StagingLocation' - Describes an Amazon S3 location to store the result set of the
-- @SelectSqlQuery@ query.
newRedshiftDataSpec ::
  -- | 'databaseInformation'
  RedshiftDatabase ->
  -- | 'selectSqlQuery'
  Prelude.Text ->
  -- | 'databaseCredentials'
  RedshiftDatabaseCredentials ->
  -- | 's3StagingLocation'
  Prelude.Text ->
  RedshiftDataSpec
newRedshiftDataSpec :: RedshiftDatabase
-> Text -> RedshiftDatabaseCredentials -> Text -> RedshiftDataSpec
newRedshiftDataSpec
  RedshiftDatabase
pDatabaseInformation_
  Text
pSelectSqlQuery_
  RedshiftDatabaseCredentials
pDatabaseCredentials_
  Text
pS3StagingLocation_ =
    RedshiftDataSpec' :: Maybe Text
-> Maybe Text
-> Maybe Text
-> RedshiftDatabase
-> Text
-> RedshiftDatabaseCredentials
-> Text
-> RedshiftDataSpec
RedshiftDataSpec'
      { $sel:dataSchemaUri:RedshiftDataSpec' :: Maybe Text
dataSchemaUri = Maybe Text
forall a. Maybe a
Prelude.Nothing,
        $sel:dataSchema:RedshiftDataSpec' :: Maybe Text
dataSchema = Maybe Text
forall a. Maybe a
Prelude.Nothing,
        $sel:dataRearrangement:RedshiftDataSpec' :: Maybe Text
dataRearrangement = Maybe Text
forall a. Maybe a
Prelude.Nothing,
        $sel:databaseInformation:RedshiftDataSpec' :: RedshiftDatabase
databaseInformation = RedshiftDatabase
pDatabaseInformation_,
        $sel:selectSqlQuery:RedshiftDataSpec' :: Text
selectSqlQuery = Text
pSelectSqlQuery_,
        $sel:databaseCredentials:RedshiftDataSpec' :: RedshiftDatabaseCredentials
databaseCredentials = RedshiftDatabaseCredentials
pDatabaseCredentials_,
        $sel:s3StagingLocation:RedshiftDataSpec' :: Text
s3StagingLocation = Text
pS3StagingLocation_
      }

-- | Describes the schema location for an Amazon Redshift @DataSource@.
redshiftDataSpec_dataSchemaUri :: Lens.Lens' RedshiftDataSpec (Prelude.Maybe Prelude.Text)
redshiftDataSpec_dataSchemaUri :: (Maybe Text -> f (Maybe Text))
-> RedshiftDataSpec -> f RedshiftDataSpec
redshiftDataSpec_dataSchemaUri = (RedshiftDataSpec -> Maybe Text)
-> (RedshiftDataSpec -> Maybe Text -> RedshiftDataSpec)
-> Lens RedshiftDataSpec RedshiftDataSpec (Maybe Text) (Maybe Text)
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RedshiftDataSpec' {Maybe Text
dataSchemaUri :: Maybe Text
$sel:dataSchemaUri:RedshiftDataSpec' :: RedshiftDataSpec -> Maybe Text
dataSchemaUri} -> Maybe Text
dataSchemaUri) (\s :: RedshiftDataSpec
s@RedshiftDataSpec' {} Maybe Text
a -> RedshiftDataSpec
s {$sel:dataSchemaUri:RedshiftDataSpec' :: Maybe Text
dataSchemaUri = Maybe Text
a} :: RedshiftDataSpec)

-- | A JSON string that represents the schema for an Amazon Redshift
-- @DataSource@. The @DataSchema@ defines the structure of the observation
-- data in the data file(s) referenced in the @DataSource@.
--
-- A @DataSchema@ is not required if you specify a @DataSchemaUri@.
--
-- Define your @DataSchema@ as a series of key-value pairs. @attributes@
-- and @excludedVariableNames@ have an array of key-value pairs for their
-- value. Use the following format to define your @DataSchema@.
--
-- { \"version\": \"1.0\",
--
-- \"recordAnnotationFieldName\": \"F1\",
--
-- \"recordWeightFieldName\": \"F2\",
--
-- \"targetFieldName\": \"F3\",
--
-- \"dataFormat\": \"CSV\",
--
-- \"dataFileContainsHeader\": true,
--
-- \"attributes\": [
--
-- { \"fieldName\": \"F1\", \"fieldType\": \"TEXT\" }, { \"fieldName\":
-- \"F2\", \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F3\",
-- \"fieldType\": \"CATEGORICAL\" }, { \"fieldName\": \"F4\",
-- \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F5\", \"fieldType\":
-- \"CATEGORICAL\" }, { \"fieldName\": \"F6\", \"fieldType\": \"TEXT\" }, {
-- \"fieldName\": \"F7\", \"fieldType\": \"WEIGHTED_INT_SEQUENCE\" }, {
-- \"fieldName\": \"F8\", \"fieldType\": \"WEIGHTED_STRING_SEQUENCE\" } ],
--
-- \"excludedVariableNames\": [ \"F6\" ] }
redshiftDataSpec_dataSchema :: Lens.Lens' RedshiftDataSpec (Prelude.Maybe Prelude.Text)
redshiftDataSpec_dataSchema :: (Maybe Text -> f (Maybe Text))
-> RedshiftDataSpec -> f RedshiftDataSpec
redshiftDataSpec_dataSchema = (RedshiftDataSpec -> Maybe Text)
-> (RedshiftDataSpec -> Maybe Text -> RedshiftDataSpec)
-> Lens RedshiftDataSpec RedshiftDataSpec (Maybe Text) (Maybe Text)
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RedshiftDataSpec' {Maybe Text
dataSchema :: Maybe Text
$sel:dataSchema:RedshiftDataSpec' :: RedshiftDataSpec -> Maybe Text
dataSchema} -> Maybe Text
dataSchema) (\s :: RedshiftDataSpec
s@RedshiftDataSpec' {} Maybe Text
a -> RedshiftDataSpec
s {$sel:dataSchema:RedshiftDataSpec' :: Maybe Text
dataSchema = Maybe Text
a} :: RedshiftDataSpec)

-- | A JSON string that represents the splitting and rearrangement processing
-- to be applied to a @DataSource@. If the @DataRearrangement@ parameter is
-- not provided, all of the input data is used to create the @Datasource@.
--
-- There are multiple parameters that control what data is used to create a
-- datasource:
--
-- -   __@percentBegin@__
--
--     Use @percentBegin@ to indicate the beginning of the range of the
--     data used to create the Datasource. If you do not include
--     @percentBegin@ and @percentEnd@, Amazon ML includes all of the data
--     when creating the datasource.
--
-- -   __@percentEnd@__
--
--     Use @percentEnd@ to indicate the end of the range of the data used
--     to create the Datasource. If you do not include @percentBegin@ and
--     @percentEnd@, Amazon ML includes all of the data when creating the
--     datasource.
--
-- -   __@complement@__
--
--     The @complement@ parameter instructs Amazon ML to use the data that
--     is not included in the range of @percentBegin@ to @percentEnd@ to
--     create a datasource. The @complement@ parameter is useful if you
--     need to create complementary datasources for training and
--     evaluation. To create a complementary datasource, use the same
--     values for @percentBegin@ and @percentEnd@, along with the
--     @complement@ parameter.
--
--     For example, the following two datasources do not share any data,
--     and can be used to train and evaluate a model. The first datasource
--     has 25 percent of the data, and the second one has 75 percent of the
--     data.
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25, \"complement\":\"true\"}}@
--
-- -   __@strategy@__
--
--     To change how Amazon ML splits the data for a datasource, use the
--     @strategy@ parameter.
--
--     The default value for the @strategy@ parameter is @sequential@,
--     meaning that Amazon ML takes all of the data records between the
--     @percentBegin@ and @percentEnd@ parameters for the datasource, in
--     the order that the records appear in the input data.
--
--     The following two @DataRearrangement@ lines are examples of
--     sequentially ordered training and evaluation datasources:
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\"}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\", \"complement\":\"true\"}}@
--
--     To randomly split the input data into the proportions indicated by
--     the percentBegin and percentEnd parameters, set the @strategy@
--     parameter to @random@ and provide a string that is used as the seed
--     value for the random data splitting (for example, you can use the S3
--     path to your data as the random seed string). If you choose the
--     random split strategy, Amazon ML assigns each row of data a
--     pseudo-random number between 0 and 100, and then selects the rows
--     that have an assigned number between @percentBegin@ and
--     @percentEnd@. Pseudo-random numbers are assigned using both the
--     input seed string value and the byte offset as a seed, so changing
--     the data results in a different split. Any existing ordering is
--     preserved. The random splitting strategy ensures that variables in
--     the training and evaluation data are distributed similarly. It is
--     useful in the cases where the input data may have an implicit sort
--     order, which would otherwise result in training and evaluation
--     datasources containing non-similar data records.
--
--     The following two @DataRearrangement@ lines are examples of
--     non-sequentially ordered training and evaluation datasources:
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\"}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\", \"complement\":\"true\"}}@
redshiftDataSpec_dataRearrangement :: Lens.Lens' RedshiftDataSpec (Prelude.Maybe Prelude.Text)
redshiftDataSpec_dataRearrangement :: (Maybe Text -> f (Maybe Text))
-> RedshiftDataSpec -> f RedshiftDataSpec
redshiftDataSpec_dataRearrangement = (RedshiftDataSpec -> Maybe Text)
-> (RedshiftDataSpec -> Maybe Text -> RedshiftDataSpec)
-> Lens RedshiftDataSpec RedshiftDataSpec (Maybe Text) (Maybe Text)
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RedshiftDataSpec' {Maybe Text
dataRearrangement :: Maybe Text
$sel:dataRearrangement:RedshiftDataSpec' :: RedshiftDataSpec -> Maybe Text
dataRearrangement} -> Maybe Text
dataRearrangement) (\s :: RedshiftDataSpec
s@RedshiftDataSpec' {} Maybe Text
a -> RedshiftDataSpec
s {$sel:dataRearrangement:RedshiftDataSpec' :: Maybe Text
dataRearrangement = Maybe Text
a} :: RedshiftDataSpec)

-- | Describes the @DatabaseName@ and @ClusterIdentifier@ for an Amazon
-- Redshift @DataSource@.
redshiftDataSpec_databaseInformation :: Lens.Lens' RedshiftDataSpec RedshiftDatabase
redshiftDataSpec_databaseInformation :: (RedshiftDatabase -> f RedshiftDatabase)
-> RedshiftDataSpec -> f RedshiftDataSpec
redshiftDataSpec_databaseInformation = (RedshiftDataSpec -> RedshiftDatabase)
-> (RedshiftDataSpec -> RedshiftDatabase -> RedshiftDataSpec)
-> Lens
     RedshiftDataSpec RedshiftDataSpec RedshiftDatabase RedshiftDatabase
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RedshiftDataSpec' {RedshiftDatabase
databaseInformation :: RedshiftDatabase
$sel:databaseInformation:RedshiftDataSpec' :: RedshiftDataSpec -> RedshiftDatabase
databaseInformation} -> RedshiftDatabase
databaseInformation) (\s :: RedshiftDataSpec
s@RedshiftDataSpec' {} RedshiftDatabase
a -> RedshiftDataSpec
s {$sel:databaseInformation:RedshiftDataSpec' :: RedshiftDatabase
databaseInformation = RedshiftDatabase
a} :: RedshiftDataSpec)

-- | Describes the SQL Query to execute on an Amazon Redshift database for an
-- Amazon Redshift @DataSource@.
redshiftDataSpec_selectSqlQuery :: Lens.Lens' RedshiftDataSpec Prelude.Text
redshiftDataSpec_selectSqlQuery :: (Text -> f Text) -> RedshiftDataSpec -> f RedshiftDataSpec
redshiftDataSpec_selectSqlQuery = (RedshiftDataSpec -> Text)
-> (RedshiftDataSpec -> Text -> RedshiftDataSpec)
-> Lens RedshiftDataSpec RedshiftDataSpec Text Text
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RedshiftDataSpec' {Text
selectSqlQuery :: Text
$sel:selectSqlQuery:RedshiftDataSpec' :: RedshiftDataSpec -> Text
selectSqlQuery} -> Text
selectSqlQuery) (\s :: RedshiftDataSpec
s@RedshiftDataSpec' {} Text
a -> RedshiftDataSpec
s {$sel:selectSqlQuery:RedshiftDataSpec' :: Text
selectSqlQuery = Text
a} :: RedshiftDataSpec)

-- | Describes AWS Identity and Access Management (IAM) credentials that are
-- used connect to the Amazon Redshift database.
redshiftDataSpec_databaseCredentials :: Lens.Lens' RedshiftDataSpec RedshiftDatabaseCredentials
redshiftDataSpec_databaseCredentials :: (RedshiftDatabaseCredentials -> f RedshiftDatabaseCredentials)
-> RedshiftDataSpec -> f RedshiftDataSpec
redshiftDataSpec_databaseCredentials = (RedshiftDataSpec -> RedshiftDatabaseCredentials)
-> (RedshiftDataSpec
    -> RedshiftDatabaseCredentials -> RedshiftDataSpec)
-> Lens
     RedshiftDataSpec
     RedshiftDataSpec
     RedshiftDatabaseCredentials
     RedshiftDatabaseCredentials
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RedshiftDataSpec' {RedshiftDatabaseCredentials
databaseCredentials :: RedshiftDatabaseCredentials
$sel:databaseCredentials:RedshiftDataSpec' :: RedshiftDataSpec -> RedshiftDatabaseCredentials
databaseCredentials} -> RedshiftDatabaseCredentials
databaseCredentials) (\s :: RedshiftDataSpec
s@RedshiftDataSpec' {} RedshiftDatabaseCredentials
a -> RedshiftDataSpec
s {$sel:databaseCredentials:RedshiftDataSpec' :: RedshiftDatabaseCredentials
databaseCredentials = RedshiftDatabaseCredentials
a} :: RedshiftDataSpec)

-- | Describes an Amazon S3 location to store the result set of the
-- @SelectSqlQuery@ query.
redshiftDataSpec_s3StagingLocation :: Lens.Lens' RedshiftDataSpec Prelude.Text
redshiftDataSpec_s3StagingLocation :: (Text -> f Text) -> RedshiftDataSpec -> f RedshiftDataSpec
redshiftDataSpec_s3StagingLocation = (RedshiftDataSpec -> Text)
-> (RedshiftDataSpec -> Text -> RedshiftDataSpec)
-> Lens RedshiftDataSpec RedshiftDataSpec Text Text
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\RedshiftDataSpec' {Text
s3StagingLocation :: Text
$sel:s3StagingLocation:RedshiftDataSpec' :: RedshiftDataSpec -> Text
s3StagingLocation} -> Text
s3StagingLocation) (\s :: RedshiftDataSpec
s@RedshiftDataSpec' {} Text
a -> RedshiftDataSpec
s {$sel:s3StagingLocation:RedshiftDataSpec' :: Text
s3StagingLocation = Text
a} :: RedshiftDataSpec)

instance Prelude.Hashable RedshiftDataSpec

instance Prelude.NFData RedshiftDataSpec

instance Core.ToJSON RedshiftDataSpec where
  toJSON :: RedshiftDataSpec -> Value
toJSON RedshiftDataSpec' {Maybe Text
Text
RedshiftDatabase
RedshiftDatabaseCredentials
s3StagingLocation :: Text
databaseCredentials :: RedshiftDatabaseCredentials
selectSqlQuery :: Text
databaseInformation :: RedshiftDatabase
dataRearrangement :: Maybe Text
dataSchema :: Maybe Text
dataSchemaUri :: Maybe Text
$sel:s3StagingLocation:RedshiftDataSpec' :: RedshiftDataSpec -> Text
$sel:databaseCredentials:RedshiftDataSpec' :: RedshiftDataSpec -> RedshiftDatabaseCredentials
$sel:selectSqlQuery:RedshiftDataSpec' :: RedshiftDataSpec -> Text
$sel:databaseInformation:RedshiftDataSpec' :: RedshiftDataSpec -> RedshiftDatabase
$sel:dataRearrangement:RedshiftDataSpec' :: RedshiftDataSpec -> Maybe Text
$sel:dataSchema:RedshiftDataSpec' :: RedshiftDataSpec -> Maybe Text
$sel:dataSchemaUri:RedshiftDataSpec' :: RedshiftDataSpec -> Maybe Text
..} =
    [Pair] -> Value
Core.object
      ( [Maybe Pair] -> [Pair]
forall a. [Maybe a] -> [a]
Prelude.catMaybes
          [ (Text
"DataSchemaUri" Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..=) (Text -> Pair) -> Maybe Text -> Maybe Pair
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Prelude.<$> Maybe Text
dataSchemaUri,
            (Text
"DataSchema" Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..=) (Text -> Pair) -> Maybe Text -> Maybe Pair
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Prelude.<$> Maybe Text
dataSchema,
            (Text
"DataRearrangement" Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..=)
              (Text -> Pair) -> Maybe Text -> Maybe Pair
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Prelude.<$> Maybe Text
dataRearrangement,
            Pair -> Maybe Pair
forall a. a -> Maybe a
Prelude.Just
              (Text
"DatabaseInformation" Text -> RedshiftDatabase -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..= RedshiftDatabase
databaseInformation),
            Pair -> Maybe Pair
forall a. a -> Maybe a
Prelude.Just
              (Text
"SelectSqlQuery" Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..= Text
selectSqlQuery),
            Pair -> Maybe Pair
forall a. a -> Maybe a
Prelude.Just
              (Text
"DatabaseCredentials" Text -> RedshiftDatabaseCredentials -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..= RedshiftDatabaseCredentials
databaseCredentials),
            Pair -> Maybe Pair
forall a. a -> Maybe a
Prelude.Just
              (Text
"S3StagingLocation" Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..= Text
s3StagingLocation)
          ]
      )