{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE StrictData #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# OPTIONS_GHC -fno-warn-unused-imports #-}
{-# OPTIONS_GHC -fno-warn-unused-matches #-}

-- Derived from AWS service descriptions, licensed under Apache 2.0.

-- |
-- Module      : Amazonka.MachineLearning.Types.S3DataSpec
-- Copyright   : (c) 2013-2021 Brendan Hay
-- License     : Mozilla Public License, v. 2.0.
-- Maintainer  : Brendan Hay <brendan.g.hay+amazonka@gmail.com>
-- Stability   : auto-generated
-- Portability : non-portable (GHC extensions)
module Amazonka.MachineLearning.Types.S3DataSpec where

import qualified Amazonka.Core as Core
import qualified Amazonka.Lens as Lens
import qualified Amazonka.Prelude as Prelude

-- | Describes the data specification of a @DataSource@.
--
-- /See:/ 'newS3DataSpec' smart constructor.
data S3DataSpec = S3DataSpec'
  { -- | A JSON string that represents the schema for an Amazon S3 @DataSource@.
    -- The @DataSchema@ defines the structure of the observation data in the
    -- data file(s) referenced in the @DataSource@.
    --
    -- You must provide either the @DataSchema@ or the @DataSchemaLocationS3@.
    --
    -- Define your @DataSchema@ as a series of key-value pairs. @attributes@
    -- and @excludedVariableNames@ have an array of key-value pairs for their
    -- value. Use the following format to define your @DataSchema@.
    --
    -- { \"version\": \"1.0\",
    --
    -- \"recordAnnotationFieldName\": \"F1\",
    --
    -- \"recordWeightFieldName\": \"F2\",
    --
    -- \"targetFieldName\": \"F3\",
    --
    -- \"dataFormat\": \"CSV\",
    --
    -- \"dataFileContainsHeader\": true,
    --
    -- \"attributes\": [
    --
    -- { \"fieldName\": \"F1\", \"fieldType\": \"TEXT\" }, { \"fieldName\":
    -- \"F2\", \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F3\",
    -- \"fieldType\": \"CATEGORICAL\" }, { \"fieldName\": \"F4\",
    -- \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F5\", \"fieldType\":
    -- \"CATEGORICAL\" }, { \"fieldName\": \"F6\", \"fieldType\": \"TEXT\" }, {
    -- \"fieldName\": \"F7\", \"fieldType\": \"WEIGHTED_INT_SEQUENCE\" }, {
    -- \"fieldName\": \"F8\", \"fieldType\": \"WEIGHTED_STRING_SEQUENCE\" } ],
    --
    -- \"excludedVariableNames\": [ \"F6\" ] }
    S3DataSpec -> Maybe Text
dataSchema :: Prelude.Maybe Prelude.Text,
    -- | Describes the schema location in Amazon S3. You must provide either the
    -- @DataSchema@ or the @DataSchemaLocationS3@.
    S3DataSpec -> Maybe Text
dataSchemaLocationS3 :: Prelude.Maybe Prelude.Text,
    -- | A JSON string that represents the splitting and rearrangement processing
    -- to be applied to a @DataSource@. If the @DataRearrangement@ parameter is
    -- not provided, all of the input data is used to create the @Datasource@.
    --
    -- There are multiple parameters that control what data is used to create a
    -- datasource:
    --
    -- -   __@percentBegin@__
    --
    --     Use @percentBegin@ to indicate the beginning of the range of the
    --     data used to create the Datasource. If you do not include
    --     @percentBegin@ and @percentEnd@, Amazon ML includes all of the data
    --     when creating the datasource.
    --
    -- -   __@percentEnd@__
    --
    --     Use @percentEnd@ to indicate the end of the range of the data used
    --     to create the Datasource. If you do not include @percentBegin@ and
    --     @percentEnd@, Amazon ML includes all of the data when creating the
    --     datasource.
    --
    -- -   __@complement@__
    --
    --     The @complement@ parameter instructs Amazon ML to use the data that
    --     is not included in the range of @percentBegin@ to @percentEnd@ to
    --     create a datasource. The @complement@ parameter is useful if you
    --     need to create complementary datasources for training and
    --     evaluation. To create a complementary datasource, use the same
    --     values for @percentBegin@ and @percentEnd@, along with the
    --     @complement@ parameter.
    --
    --     For example, the following two datasources do not share any data,
    --     and can be used to train and evaluate a model. The first datasource
    --     has 25 percent of the data, and the second one has 75 percent of the
    --     data.
    --
    --     Datasource for evaluation:
    --     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25}}@
    --
    --     Datasource for training:
    --     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25, \"complement\":\"true\"}}@
    --
    -- -   __@strategy@__
    --
    --     To change how Amazon ML splits the data for a datasource, use the
    --     @strategy@ parameter.
    --
    --     The default value for the @strategy@ parameter is @sequential@,
    --     meaning that Amazon ML takes all of the data records between the
    --     @percentBegin@ and @percentEnd@ parameters for the datasource, in
    --     the order that the records appear in the input data.
    --
    --     The following two @DataRearrangement@ lines are examples of
    --     sequentially ordered training and evaluation datasources:
    --
    --     Datasource for evaluation:
    --     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\"}}@
    --
    --     Datasource for training:
    --     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\", \"complement\":\"true\"}}@
    --
    --     To randomly split the input data into the proportions indicated by
    --     the percentBegin and percentEnd parameters, set the @strategy@
    --     parameter to @random@ and provide a string that is used as the seed
    --     value for the random data splitting (for example, you can use the S3
    --     path to your data as the random seed string). If you choose the
    --     random split strategy, Amazon ML assigns each row of data a
    --     pseudo-random number between 0 and 100, and then selects the rows
    --     that have an assigned number between @percentBegin@ and
    --     @percentEnd@. Pseudo-random numbers are assigned using both the
    --     input seed string value and the byte offset as a seed, so changing
    --     the data results in a different split. Any existing ordering is
    --     preserved. The random splitting strategy ensures that variables in
    --     the training and evaluation data are distributed similarly. It is
    --     useful in the cases where the input data may have an implicit sort
    --     order, which would otherwise result in training and evaluation
    --     datasources containing non-similar data records.
    --
    --     The following two @DataRearrangement@ lines are examples of
    --     non-sequentially ordered training and evaluation datasources:
    --
    --     Datasource for evaluation:
    --     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\"}}@
    --
    --     Datasource for training:
    --     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\", \"complement\":\"true\"}}@
    S3DataSpec -> Maybe Text
dataRearrangement :: Prelude.Maybe Prelude.Text,
    -- | The location of the data file(s) used by a @DataSource@. The URI
    -- specifies a data file or an Amazon Simple Storage Service (Amazon S3)
    -- directory or bucket containing data files.
    S3DataSpec -> Text
dataLocationS3 :: Prelude.Text
  }
  deriving (S3DataSpec -> S3DataSpec -> Bool
(S3DataSpec -> S3DataSpec -> Bool)
-> (S3DataSpec -> S3DataSpec -> Bool) -> Eq S3DataSpec
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: S3DataSpec -> S3DataSpec -> Bool
$c/= :: S3DataSpec -> S3DataSpec -> Bool
== :: S3DataSpec -> S3DataSpec -> Bool
$c== :: S3DataSpec -> S3DataSpec -> Bool
Prelude.Eq, ReadPrec [S3DataSpec]
ReadPrec S3DataSpec
Int -> ReadS S3DataSpec
ReadS [S3DataSpec]
(Int -> ReadS S3DataSpec)
-> ReadS [S3DataSpec]
-> ReadPrec S3DataSpec
-> ReadPrec [S3DataSpec]
-> Read S3DataSpec
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [S3DataSpec]
$creadListPrec :: ReadPrec [S3DataSpec]
readPrec :: ReadPrec S3DataSpec
$creadPrec :: ReadPrec S3DataSpec
readList :: ReadS [S3DataSpec]
$creadList :: ReadS [S3DataSpec]
readsPrec :: Int -> ReadS S3DataSpec
$creadsPrec :: Int -> ReadS S3DataSpec
Prelude.Read, Int -> S3DataSpec -> ShowS
[S3DataSpec] -> ShowS
S3DataSpec -> String
(Int -> S3DataSpec -> ShowS)
-> (S3DataSpec -> String)
-> ([S3DataSpec] -> ShowS)
-> Show S3DataSpec
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [S3DataSpec] -> ShowS
$cshowList :: [S3DataSpec] -> ShowS
show :: S3DataSpec -> String
$cshow :: S3DataSpec -> String
showsPrec :: Int -> S3DataSpec -> ShowS
$cshowsPrec :: Int -> S3DataSpec -> ShowS
Prelude.Show, (forall x. S3DataSpec -> Rep S3DataSpec x)
-> (forall x. Rep S3DataSpec x -> S3DataSpec) -> Generic S3DataSpec
forall x. Rep S3DataSpec x -> S3DataSpec
forall x. S3DataSpec -> Rep S3DataSpec x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep S3DataSpec x -> S3DataSpec
$cfrom :: forall x. S3DataSpec -> Rep S3DataSpec x
Prelude.Generic)

-- |
-- Create a value of 'S3DataSpec' with all optional fields omitted.
--
-- Use <https://hackage.haskell.org/package/generic-lens generic-lens> or <https://hackage.haskell.org/package/optics optics> to modify other optional fields.
--
-- The following record fields are available, with the corresponding lenses provided
-- for backwards compatibility:
--
-- 'dataSchema', 's3DataSpec_dataSchema' - A JSON string that represents the schema for an Amazon S3 @DataSource@.
-- The @DataSchema@ defines the structure of the observation data in the
-- data file(s) referenced in the @DataSource@.
--
-- You must provide either the @DataSchema@ or the @DataSchemaLocationS3@.
--
-- Define your @DataSchema@ as a series of key-value pairs. @attributes@
-- and @excludedVariableNames@ have an array of key-value pairs for their
-- value. Use the following format to define your @DataSchema@.
--
-- { \"version\": \"1.0\",
--
-- \"recordAnnotationFieldName\": \"F1\",
--
-- \"recordWeightFieldName\": \"F2\",
--
-- \"targetFieldName\": \"F3\",
--
-- \"dataFormat\": \"CSV\",
--
-- \"dataFileContainsHeader\": true,
--
-- \"attributes\": [
--
-- { \"fieldName\": \"F1\", \"fieldType\": \"TEXT\" }, { \"fieldName\":
-- \"F2\", \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F3\",
-- \"fieldType\": \"CATEGORICAL\" }, { \"fieldName\": \"F4\",
-- \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F5\", \"fieldType\":
-- \"CATEGORICAL\" }, { \"fieldName\": \"F6\", \"fieldType\": \"TEXT\" }, {
-- \"fieldName\": \"F7\", \"fieldType\": \"WEIGHTED_INT_SEQUENCE\" }, {
-- \"fieldName\": \"F8\", \"fieldType\": \"WEIGHTED_STRING_SEQUENCE\" } ],
--
-- \"excludedVariableNames\": [ \"F6\" ] }
--
-- 'dataSchemaLocationS3', 's3DataSpec_dataSchemaLocationS3' - Describes the schema location in Amazon S3. You must provide either the
-- @DataSchema@ or the @DataSchemaLocationS3@.
--
-- 'dataRearrangement', 's3DataSpec_dataRearrangement' - A JSON string that represents the splitting and rearrangement processing
-- to be applied to a @DataSource@. If the @DataRearrangement@ parameter is
-- not provided, all of the input data is used to create the @Datasource@.
--
-- There are multiple parameters that control what data is used to create a
-- datasource:
--
-- -   __@percentBegin@__
--
--     Use @percentBegin@ to indicate the beginning of the range of the
--     data used to create the Datasource. If you do not include
--     @percentBegin@ and @percentEnd@, Amazon ML includes all of the data
--     when creating the datasource.
--
-- -   __@percentEnd@__
--
--     Use @percentEnd@ to indicate the end of the range of the data used
--     to create the Datasource. If you do not include @percentBegin@ and
--     @percentEnd@, Amazon ML includes all of the data when creating the
--     datasource.
--
-- -   __@complement@__
--
--     The @complement@ parameter instructs Amazon ML to use the data that
--     is not included in the range of @percentBegin@ to @percentEnd@ to
--     create a datasource. The @complement@ parameter is useful if you
--     need to create complementary datasources for training and
--     evaluation. To create a complementary datasource, use the same
--     values for @percentBegin@ and @percentEnd@, along with the
--     @complement@ parameter.
--
--     For example, the following two datasources do not share any data,
--     and can be used to train and evaluate a model. The first datasource
--     has 25 percent of the data, and the second one has 75 percent of the
--     data.
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25, \"complement\":\"true\"}}@
--
-- -   __@strategy@__
--
--     To change how Amazon ML splits the data for a datasource, use the
--     @strategy@ parameter.
--
--     The default value for the @strategy@ parameter is @sequential@,
--     meaning that Amazon ML takes all of the data records between the
--     @percentBegin@ and @percentEnd@ parameters for the datasource, in
--     the order that the records appear in the input data.
--
--     The following two @DataRearrangement@ lines are examples of
--     sequentially ordered training and evaluation datasources:
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\"}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\", \"complement\":\"true\"}}@
--
--     To randomly split the input data into the proportions indicated by
--     the percentBegin and percentEnd parameters, set the @strategy@
--     parameter to @random@ and provide a string that is used as the seed
--     value for the random data splitting (for example, you can use the S3
--     path to your data as the random seed string). If you choose the
--     random split strategy, Amazon ML assigns each row of data a
--     pseudo-random number between 0 and 100, and then selects the rows
--     that have an assigned number between @percentBegin@ and
--     @percentEnd@. Pseudo-random numbers are assigned using both the
--     input seed string value and the byte offset as a seed, so changing
--     the data results in a different split. Any existing ordering is
--     preserved. The random splitting strategy ensures that variables in
--     the training and evaluation data are distributed similarly. It is
--     useful in the cases where the input data may have an implicit sort
--     order, which would otherwise result in training and evaluation
--     datasources containing non-similar data records.
--
--     The following two @DataRearrangement@ lines are examples of
--     non-sequentially ordered training and evaluation datasources:
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\"}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\", \"complement\":\"true\"}}@
--
-- 'dataLocationS3', 's3DataSpec_dataLocationS3' - The location of the data file(s) used by a @DataSource@. The URI
-- specifies a data file or an Amazon Simple Storage Service (Amazon S3)
-- directory or bucket containing data files.
newS3DataSpec ::
  -- | 'dataLocationS3'
  Prelude.Text ->
  S3DataSpec
newS3DataSpec :: Text -> S3DataSpec
newS3DataSpec Text
pDataLocationS3_ =
  S3DataSpec' :: Maybe Text -> Maybe Text -> Maybe Text -> Text -> S3DataSpec
S3DataSpec'
    { $sel:dataSchema:S3DataSpec' :: Maybe Text
dataSchema = Maybe Text
forall a. Maybe a
Prelude.Nothing,
      $sel:dataSchemaLocationS3:S3DataSpec' :: Maybe Text
dataSchemaLocationS3 = Maybe Text
forall a. Maybe a
Prelude.Nothing,
      $sel:dataRearrangement:S3DataSpec' :: Maybe Text
dataRearrangement = Maybe Text
forall a. Maybe a
Prelude.Nothing,
      $sel:dataLocationS3:S3DataSpec' :: Text
dataLocationS3 = Text
pDataLocationS3_
    }

-- | A JSON string that represents the schema for an Amazon S3 @DataSource@.
-- The @DataSchema@ defines the structure of the observation data in the
-- data file(s) referenced in the @DataSource@.
--
-- You must provide either the @DataSchema@ or the @DataSchemaLocationS3@.
--
-- Define your @DataSchema@ as a series of key-value pairs. @attributes@
-- and @excludedVariableNames@ have an array of key-value pairs for their
-- value. Use the following format to define your @DataSchema@.
--
-- { \"version\": \"1.0\",
--
-- \"recordAnnotationFieldName\": \"F1\",
--
-- \"recordWeightFieldName\": \"F2\",
--
-- \"targetFieldName\": \"F3\",
--
-- \"dataFormat\": \"CSV\",
--
-- \"dataFileContainsHeader\": true,
--
-- \"attributes\": [
--
-- { \"fieldName\": \"F1\", \"fieldType\": \"TEXT\" }, { \"fieldName\":
-- \"F2\", \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F3\",
-- \"fieldType\": \"CATEGORICAL\" }, { \"fieldName\": \"F4\",
-- \"fieldType\": \"NUMERIC\" }, { \"fieldName\": \"F5\", \"fieldType\":
-- \"CATEGORICAL\" }, { \"fieldName\": \"F6\", \"fieldType\": \"TEXT\" }, {
-- \"fieldName\": \"F7\", \"fieldType\": \"WEIGHTED_INT_SEQUENCE\" }, {
-- \"fieldName\": \"F8\", \"fieldType\": \"WEIGHTED_STRING_SEQUENCE\" } ],
--
-- \"excludedVariableNames\": [ \"F6\" ] }
s3DataSpec_dataSchema :: Lens.Lens' S3DataSpec (Prelude.Maybe Prelude.Text)
s3DataSpec_dataSchema :: (Maybe Text -> f (Maybe Text)) -> S3DataSpec -> f S3DataSpec
s3DataSpec_dataSchema = (S3DataSpec -> Maybe Text)
-> (S3DataSpec -> Maybe Text -> S3DataSpec)
-> Lens S3DataSpec S3DataSpec (Maybe Text) (Maybe Text)
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\S3DataSpec' {Maybe Text
dataSchema :: Maybe Text
$sel:dataSchema:S3DataSpec' :: S3DataSpec -> Maybe Text
dataSchema} -> Maybe Text
dataSchema) (\s :: S3DataSpec
s@S3DataSpec' {} Maybe Text
a -> S3DataSpec
s {$sel:dataSchema:S3DataSpec' :: Maybe Text
dataSchema = Maybe Text
a} :: S3DataSpec)

-- | Describes the schema location in Amazon S3. You must provide either the
-- @DataSchema@ or the @DataSchemaLocationS3@.
s3DataSpec_dataSchemaLocationS3 :: Lens.Lens' S3DataSpec (Prelude.Maybe Prelude.Text)
s3DataSpec_dataSchemaLocationS3 :: (Maybe Text -> f (Maybe Text)) -> S3DataSpec -> f S3DataSpec
s3DataSpec_dataSchemaLocationS3 = (S3DataSpec -> Maybe Text)
-> (S3DataSpec -> Maybe Text -> S3DataSpec)
-> Lens S3DataSpec S3DataSpec (Maybe Text) (Maybe Text)
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\S3DataSpec' {Maybe Text
dataSchemaLocationS3 :: Maybe Text
$sel:dataSchemaLocationS3:S3DataSpec' :: S3DataSpec -> Maybe Text
dataSchemaLocationS3} -> Maybe Text
dataSchemaLocationS3) (\s :: S3DataSpec
s@S3DataSpec' {} Maybe Text
a -> S3DataSpec
s {$sel:dataSchemaLocationS3:S3DataSpec' :: Maybe Text
dataSchemaLocationS3 = Maybe Text
a} :: S3DataSpec)

-- | A JSON string that represents the splitting and rearrangement processing
-- to be applied to a @DataSource@. If the @DataRearrangement@ parameter is
-- not provided, all of the input data is used to create the @Datasource@.
--
-- There are multiple parameters that control what data is used to create a
-- datasource:
--
-- -   __@percentBegin@__
--
--     Use @percentBegin@ to indicate the beginning of the range of the
--     data used to create the Datasource. If you do not include
--     @percentBegin@ and @percentEnd@, Amazon ML includes all of the data
--     when creating the datasource.
--
-- -   __@percentEnd@__
--
--     Use @percentEnd@ to indicate the end of the range of the data used
--     to create the Datasource. If you do not include @percentBegin@ and
--     @percentEnd@, Amazon ML includes all of the data when creating the
--     datasource.
--
-- -   __@complement@__
--
--     The @complement@ parameter instructs Amazon ML to use the data that
--     is not included in the range of @percentBegin@ to @percentEnd@ to
--     create a datasource. The @complement@ parameter is useful if you
--     need to create complementary datasources for training and
--     evaluation. To create a complementary datasource, use the same
--     values for @percentBegin@ and @percentEnd@, along with the
--     @complement@ parameter.
--
--     For example, the following two datasources do not share any data,
--     and can be used to train and evaluate a model. The first datasource
--     has 25 percent of the data, and the second one has 75 percent of the
--     data.
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":0, \"percentEnd\":25, \"complement\":\"true\"}}@
--
-- -   __@strategy@__
--
--     To change how Amazon ML splits the data for a datasource, use the
--     @strategy@ parameter.
--
--     The default value for the @strategy@ parameter is @sequential@,
--     meaning that Amazon ML takes all of the data records between the
--     @percentBegin@ and @percentEnd@ parameters for the datasource, in
--     the order that the records appear in the input data.
--
--     The following two @DataRearrangement@ lines are examples of
--     sequentially ordered training and evaluation datasources:
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\"}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"sequential\", \"complement\":\"true\"}}@
--
--     To randomly split the input data into the proportions indicated by
--     the percentBegin and percentEnd parameters, set the @strategy@
--     parameter to @random@ and provide a string that is used as the seed
--     value for the random data splitting (for example, you can use the S3
--     path to your data as the random seed string). If you choose the
--     random split strategy, Amazon ML assigns each row of data a
--     pseudo-random number between 0 and 100, and then selects the rows
--     that have an assigned number between @percentBegin@ and
--     @percentEnd@. Pseudo-random numbers are assigned using both the
--     input seed string value and the byte offset as a seed, so changing
--     the data results in a different split. Any existing ordering is
--     preserved. The random splitting strategy ensures that variables in
--     the training and evaluation data are distributed similarly. It is
--     useful in the cases where the input data may have an implicit sort
--     order, which would otherwise result in training and evaluation
--     datasources containing non-similar data records.
--
--     The following two @DataRearrangement@ lines are examples of
--     non-sequentially ordered training and evaluation datasources:
--
--     Datasource for evaluation:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\"}}@
--
--     Datasource for training:
--     @{\"splitting\":{\"percentBegin\":70, \"percentEnd\":100, \"strategy\":\"random\", \"randomSeed\"=\"s3:\/\/my_s3_path\/bucket\/file.csv\", \"complement\":\"true\"}}@
s3DataSpec_dataRearrangement :: Lens.Lens' S3DataSpec (Prelude.Maybe Prelude.Text)
s3DataSpec_dataRearrangement :: (Maybe Text -> f (Maybe Text)) -> S3DataSpec -> f S3DataSpec
s3DataSpec_dataRearrangement = (S3DataSpec -> Maybe Text)
-> (S3DataSpec -> Maybe Text -> S3DataSpec)
-> Lens S3DataSpec S3DataSpec (Maybe Text) (Maybe Text)
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\S3DataSpec' {Maybe Text
dataRearrangement :: Maybe Text
$sel:dataRearrangement:S3DataSpec' :: S3DataSpec -> Maybe Text
dataRearrangement} -> Maybe Text
dataRearrangement) (\s :: S3DataSpec
s@S3DataSpec' {} Maybe Text
a -> S3DataSpec
s {$sel:dataRearrangement:S3DataSpec' :: Maybe Text
dataRearrangement = Maybe Text
a} :: S3DataSpec)

-- | The location of the data file(s) used by a @DataSource@. The URI
-- specifies a data file or an Amazon Simple Storage Service (Amazon S3)
-- directory or bucket containing data files.
s3DataSpec_dataLocationS3 :: Lens.Lens' S3DataSpec Prelude.Text
s3DataSpec_dataLocationS3 :: (Text -> f Text) -> S3DataSpec -> f S3DataSpec
s3DataSpec_dataLocationS3 = (S3DataSpec -> Text)
-> (S3DataSpec -> Text -> S3DataSpec)
-> Lens S3DataSpec S3DataSpec Text Text
forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b
Lens.lens (\S3DataSpec' {Text
dataLocationS3 :: Text
$sel:dataLocationS3:S3DataSpec' :: S3DataSpec -> Text
dataLocationS3} -> Text
dataLocationS3) (\s :: S3DataSpec
s@S3DataSpec' {} Text
a -> S3DataSpec
s {$sel:dataLocationS3:S3DataSpec' :: Text
dataLocationS3 = Text
a} :: S3DataSpec)

instance Prelude.Hashable S3DataSpec

instance Prelude.NFData S3DataSpec

instance Core.ToJSON S3DataSpec where
  toJSON :: S3DataSpec -> Value
toJSON S3DataSpec' {Maybe Text
Text
dataLocationS3 :: Text
dataRearrangement :: Maybe Text
dataSchemaLocationS3 :: Maybe Text
dataSchema :: Maybe Text
$sel:dataLocationS3:S3DataSpec' :: S3DataSpec -> Text
$sel:dataRearrangement:S3DataSpec' :: S3DataSpec -> Maybe Text
$sel:dataSchemaLocationS3:S3DataSpec' :: S3DataSpec -> Maybe Text
$sel:dataSchema:S3DataSpec' :: S3DataSpec -> Maybe Text
..} =
    [Pair] -> Value
Core.object
      ( [Maybe Pair] -> [Pair]
forall a. [Maybe a] -> [a]
Prelude.catMaybes
          [ (Text
"DataSchema" Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..=) (Text -> Pair) -> Maybe Text -> Maybe Pair
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Prelude.<$> Maybe Text
dataSchema,
            (Text
"DataSchemaLocationS3" Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..=)
              (Text -> Pair) -> Maybe Text -> Maybe Pair
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Prelude.<$> Maybe Text
dataSchemaLocationS3,
            (Text
"DataRearrangement" Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..=)
              (Text -> Pair) -> Maybe Text -> Maybe Pair
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Prelude.<$> Maybe Text
dataRearrangement,
            Pair -> Maybe Pair
forall a. a -> Maybe a
Prelude.Just
              (Text
"DataLocationS3" Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
Core..= Text
dataLocationS3)
          ]
      )