{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE StrictData #-} {-# LANGUAGE NoImplicitPrelude #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} {-# OPTIONS_GHC -fno-warn-unused-matches #-} -- Derived from AWS service descriptions, licensed under Apache 2.0. -- | -- Module : Amazonka.SageMaker.Types.S3DataSource -- Copyright : (c) 2013-2021 Brendan Hay -- License : Mozilla Public License, v. 2.0. -- Maintainer : Brendan Hay <brendan.g.hay+amazonka@gmail.com> -- Stability : auto-generated -- Portability : non-portable (GHC extensions) module Amazonka.SageMaker.Types.S3DataSource where import qualified Amazonka.Core as Core import qualified Amazonka.Lens as Lens import qualified Amazonka.Prelude as Prelude import Amazonka.SageMaker.Types.S3DataDistribution import Amazonka.SageMaker.Types.S3DataType -- | Describes the S3 data source. -- -- /See:/ 'newS3DataSource' smart constructor. data S3DataSource = S3DataSource' { -- | If you want Amazon SageMaker to replicate the entire dataset on each ML -- compute instance that is launched for model training, specify -- @FullyReplicated@. -- -- If you want Amazon SageMaker to replicate a subset of data on each ML -- compute instance that is launched for model training, specify -- @ShardedByS3Key@. If there are /n/ ML compute instances launched for a -- training job, each instance gets approximately 1\//n/ of the number of -- S3 objects. In this case, model training on each machine uses only the -- subset of training data. -- -- Don\'t choose more ML compute instances for training than available S3 -- objects. If you do, some nodes won\'t get any data and you will pay for -- nodes that aren\'t getting any training data. This applies in both File -- and Pipe modes. Keep this in mind when developing algorithms. -- -- In distributed training, where you use multiple ML compute EC2 -- instances, you might choose @ShardedByS3Key@. If the algorithm requires -- copying training data to the ML storage volume (when @TrainingInputMode@ -- is set to @File@), this copies 1\//n/ of the number of objects. S3DataSource -> Maybe S3DataDistribution s3DataDistributionType :: Prelude.Maybe S3DataDistribution, -- | A list of one or more attribute names to use that are found in a -- specified augmented manifest file. S3DataSource -> Maybe [Text] attributeNames :: Prelude.Maybe [Prelude.Text], -- | If you choose @S3Prefix@, @S3Uri@ identifies a key name prefix. Amazon -- SageMaker uses all objects that match the specified key name prefix for -- model training. -- -- If you choose @ManifestFile@, @S3Uri@ identifies an object that is a -- manifest file containing a list of object keys that you want Amazon -- SageMaker to use for model training. -- -- If you choose @AugmentedManifestFile@, S3Uri identifies an object that -- is an augmented manifest file in JSON lines format. This file contains -- the data you want to use for model training. @AugmentedManifestFile@ can -- only be used if the Channel\'s input mode is @Pipe@. S3DataSource -> S3DataType s3DataType :: S3DataType, -- | Depending on the value specified for the @S3DataType@, identifies either -- a key name prefix or a manifest. For example: -- -- - A key name prefix might look like this: -- @s3:\/\/bucketname\/exampleprefix@ -- -- - A manifest might look like this: -- @s3:\/\/bucketname\/example.manifest@ -- -- A manifest is an S3 object which is a JSON file consisting of an -- array of elements. The first element is a prefix which is followed -- by one or more suffixes. SageMaker appends the suffix elements to -- the prefix to get a full set of @S3Uri@. Note that the prefix must -- be a valid non-empty @S3Uri@ that precludes users from specifying a -- manifest whose individual @S3Uri@ is sourced from different S3 -- buckets. -- -- The following code example shows a valid manifest format: -- -- @[ {\"prefix\": \"s3:\/\/customer_bucket\/some\/prefix\/\"},@ -- -- @ \"relative\/path\/to\/custdata-1\",@ -- -- @ \"relative\/path\/custdata-2\",@ -- -- @ ...@ -- -- @ \"relative\/path\/custdata-N\"@ -- -- @]@ -- -- This JSON is equivalent to the following @S3Uri@ list: -- -- @s3:\/\/customer_bucket\/some\/prefix\/relative\/path\/to\/custdata-1@ -- -- @s3:\/\/customer_bucket\/some\/prefix\/relative\/path\/custdata-2@ -- -- @...@ -- -- @s3:\/\/customer_bucket\/some\/prefix\/relative\/path\/custdata-N@ -- -- The complete set of @S3Uri@ in this manifest is the input data for -- the channel for this data source. The object that each @S3Uri@ -- points to must be readable by the IAM role that Amazon SageMaker -- uses to perform tasks on your behalf. S3DataSource -> Text s3Uri :: Prelude.Text } deriving (S3DataSource -> S3DataSource -> Bool (S3DataSource -> S3DataSource -> Bool) -> (S3DataSource -> S3DataSource -> Bool) -> Eq S3DataSource forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a /= :: S3DataSource -> S3DataSource -> Bool $c/= :: S3DataSource -> S3DataSource -> Bool == :: S3DataSource -> S3DataSource -> Bool $c== :: S3DataSource -> S3DataSource -> Bool Prelude.Eq, ReadPrec [S3DataSource] ReadPrec S3DataSource Int -> ReadS S3DataSource ReadS [S3DataSource] (Int -> ReadS S3DataSource) -> ReadS [S3DataSource] -> ReadPrec S3DataSource -> ReadPrec [S3DataSource] -> Read S3DataSource forall a. (Int -> ReadS a) -> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a readListPrec :: ReadPrec [S3DataSource] $creadListPrec :: ReadPrec [S3DataSource] readPrec :: ReadPrec S3DataSource $creadPrec :: ReadPrec S3DataSource readList :: ReadS [S3DataSource] $creadList :: ReadS [S3DataSource] readsPrec :: Int -> ReadS S3DataSource $creadsPrec :: Int -> ReadS S3DataSource Prelude.Read, Int -> S3DataSource -> ShowS [S3DataSource] -> ShowS S3DataSource -> String (Int -> S3DataSource -> ShowS) -> (S3DataSource -> String) -> ([S3DataSource] -> ShowS) -> Show S3DataSource forall a. (Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a showList :: [S3DataSource] -> ShowS $cshowList :: [S3DataSource] -> ShowS show :: S3DataSource -> String $cshow :: S3DataSource -> String showsPrec :: Int -> S3DataSource -> ShowS $cshowsPrec :: Int -> S3DataSource -> ShowS Prelude.Show, (forall x. S3DataSource -> Rep S3DataSource x) -> (forall x. Rep S3DataSource x -> S3DataSource) -> Generic S3DataSource forall x. Rep S3DataSource x -> S3DataSource forall x. S3DataSource -> Rep S3DataSource x forall a. (forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a $cto :: forall x. Rep S3DataSource x -> S3DataSource $cfrom :: forall x. S3DataSource -> Rep S3DataSource x Prelude.Generic) -- | -- Create a value of 'S3DataSource' with all optional fields omitted. -- -- Use <https://hackage.haskell.org/package/generic-lens generic-lens> or <https://hackage.haskell.org/package/optics optics> to modify other optional fields. -- -- The following record fields are available, with the corresponding lenses provided -- for backwards compatibility: -- -- 's3DataDistributionType', 's3DataSource_s3DataDistributionType' - If you want Amazon SageMaker to replicate the entire dataset on each ML -- compute instance that is launched for model training, specify -- @FullyReplicated@. -- -- If you want Amazon SageMaker to replicate a subset of data on each ML -- compute instance that is launched for model training, specify -- @ShardedByS3Key@. If there are /n/ ML compute instances launched for a -- training job, each instance gets approximately 1\//n/ of the number of -- S3 objects. In this case, model training on each machine uses only the -- subset of training data. -- -- Don\'t choose more ML compute instances for training than available S3 -- objects. If you do, some nodes won\'t get any data and you will pay for -- nodes that aren\'t getting any training data. This applies in both File -- and Pipe modes. Keep this in mind when developing algorithms. -- -- In distributed training, where you use multiple ML compute EC2 -- instances, you might choose @ShardedByS3Key@. If the algorithm requires -- copying training data to the ML storage volume (when @TrainingInputMode@ -- is set to @File@), this copies 1\//n/ of the number of objects. -- -- 'attributeNames', 's3DataSource_attributeNames' - A list of one or more attribute names to use that are found in a -- specified augmented manifest file. -- -- 's3DataType', 's3DataSource_s3DataType' - If you choose @S3Prefix@, @S3Uri@ identifies a key name prefix. Amazon -- SageMaker uses all objects that match the specified key name prefix for -- model training. -- -- If you choose @ManifestFile@, @S3Uri@ identifies an object that is a -- manifest file containing a list of object keys that you want Amazon -- SageMaker to use for model training. -- -- If you choose @AugmentedManifestFile@, S3Uri identifies an object that -- is an augmented manifest file in JSON lines format. This file contains -- the data you want to use for model training. @AugmentedManifestFile@ can -- only be used if the Channel\'s input mode is @Pipe@. -- -- 's3Uri', 's3DataSource_s3Uri' - Depending on the value specified for the @S3DataType@, identifies either -- a key name prefix or a manifest. For example: -- -- - A key name prefix might look like this: -- @s3:\/\/bucketname\/exampleprefix@ -- -- - A manifest might look like this: -- @s3:\/\/bucketname\/example.manifest@ -- -- A manifest is an S3 object which is a JSON file consisting of an -- array of elements. The first element is a prefix which is followed -- by one or more suffixes. SageMaker appends the suffix elements to -- the prefix to get a full set of @S3Uri@. Note that the prefix must -- be a valid non-empty @S3Uri@ that precludes users from specifying a -- manifest whose individual @S3Uri@ is sourced from different S3 -- buckets. -- -- The following code example shows a valid manifest format: -- -- @[ {\"prefix\": \"s3:\/\/customer_bucket\/some\/prefix\/\"},@ -- -- @ \"relative\/path\/to\/custdata-1\",@ -- -- @ \"relative\/path\/custdata-2\",@ -- -- @ ...@ -- -- @ \"relative\/path\/custdata-N\"@ -- -- @]@ -- -- This JSON is equivalent to the following @S3Uri@ list: -- -- @s3:\/\/customer_bucket\/some\/prefix\/relative\/path\/to\/custdata-1@ -- -- @s3:\/\/customer_bucket\/some\/prefix\/relative\/path\/custdata-2@ -- -- @...@ -- -- @s3:\/\/customer_bucket\/some\/prefix\/relative\/path\/custdata-N@ -- -- The complete set of @S3Uri@ in this manifest is the input data for -- the channel for this data source. The object that each @S3Uri@ -- points to must be readable by the IAM role that Amazon SageMaker -- uses to perform tasks on your behalf. newS3DataSource :: -- | 's3DataType' S3DataType -> -- | 's3Uri' Prelude.Text -> S3DataSource newS3DataSource :: S3DataType -> Text -> S3DataSource newS3DataSource S3DataType pS3DataType_ Text pS3Uri_ = S3DataSource' :: Maybe S3DataDistribution -> Maybe [Text] -> S3DataType -> Text -> S3DataSource S3DataSource' { $sel:s3DataDistributionType:S3DataSource' :: Maybe S3DataDistribution s3DataDistributionType = Maybe S3DataDistribution forall a. Maybe a Prelude.Nothing, $sel:attributeNames:S3DataSource' :: Maybe [Text] attributeNames = Maybe [Text] forall a. Maybe a Prelude.Nothing, $sel:s3DataType:S3DataSource' :: S3DataType s3DataType = S3DataType pS3DataType_, $sel:s3Uri:S3DataSource' :: Text s3Uri = Text pS3Uri_ } -- | If you want Amazon SageMaker to replicate the entire dataset on each ML -- compute instance that is launched for model training, specify -- @FullyReplicated@. -- -- If you want Amazon SageMaker to replicate a subset of data on each ML -- compute instance that is launched for model training, specify -- @ShardedByS3Key@. If there are /n/ ML compute instances launched for a -- training job, each instance gets approximately 1\//n/ of the number of -- S3 objects. In this case, model training on each machine uses only the -- subset of training data. -- -- Don\'t choose more ML compute instances for training than available S3 -- objects. If you do, some nodes won\'t get any data and you will pay for -- nodes that aren\'t getting any training data. This applies in both File -- and Pipe modes. Keep this in mind when developing algorithms. -- -- In distributed training, where you use multiple ML compute EC2 -- instances, you might choose @ShardedByS3Key@. If the algorithm requires -- copying training data to the ML storage volume (when @TrainingInputMode@ -- is set to @File@), this copies 1\//n/ of the number of objects. s3DataSource_s3DataDistributionType :: Lens.Lens' S3DataSource (Prelude.Maybe S3DataDistribution) s3DataSource_s3DataDistributionType :: (Maybe S3DataDistribution -> f (Maybe S3DataDistribution)) -> S3DataSource -> f S3DataSource s3DataSource_s3DataDistributionType = (S3DataSource -> Maybe S3DataDistribution) -> (S3DataSource -> Maybe S3DataDistribution -> S3DataSource) -> Lens S3DataSource S3DataSource (Maybe S3DataDistribution) (Maybe S3DataDistribution) forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\S3DataSource' {Maybe S3DataDistribution s3DataDistributionType :: Maybe S3DataDistribution $sel:s3DataDistributionType:S3DataSource' :: S3DataSource -> Maybe S3DataDistribution s3DataDistributionType} -> Maybe S3DataDistribution s3DataDistributionType) (\s :: S3DataSource s@S3DataSource' {} Maybe S3DataDistribution a -> S3DataSource s {$sel:s3DataDistributionType:S3DataSource' :: Maybe S3DataDistribution s3DataDistributionType = Maybe S3DataDistribution a} :: S3DataSource) -- | A list of one or more attribute names to use that are found in a -- specified augmented manifest file. s3DataSource_attributeNames :: Lens.Lens' S3DataSource (Prelude.Maybe [Prelude.Text]) s3DataSource_attributeNames :: (Maybe [Text] -> f (Maybe [Text])) -> S3DataSource -> f S3DataSource s3DataSource_attributeNames = (S3DataSource -> Maybe [Text]) -> (S3DataSource -> Maybe [Text] -> S3DataSource) -> Lens S3DataSource S3DataSource (Maybe [Text]) (Maybe [Text]) forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\S3DataSource' {Maybe [Text] attributeNames :: Maybe [Text] $sel:attributeNames:S3DataSource' :: S3DataSource -> Maybe [Text] attributeNames} -> Maybe [Text] attributeNames) (\s :: S3DataSource s@S3DataSource' {} Maybe [Text] a -> S3DataSource s {$sel:attributeNames:S3DataSource' :: Maybe [Text] attributeNames = Maybe [Text] a} :: S3DataSource) ((Maybe [Text] -> f (Maybe [Text])) -> S3DataSource -> f S3DataSource) -> ((Maybe [Text] -> f (Maybe [Text])) -> Maybe [Text] -> f (Maybe [Text])) -> (Maybe [Text] -> f (Maybe [Text])) -> S3DataSource -> f S3DataSource forall b c a. (b -> c) -> (a -> b) -> a -> c Prelude.. AnIso [Text] [Text] [Text] [Text] -> Iso (Maybe [Text]) (Maybe [Text]) (Maybe [Text]) (Maybe [Text]) forall (f :: * -> *) (g :: * -> *) s t a b. (Functor f, Functor g) => AnIso s t a b -> Iso (f s) (g t) (f a) (g b) Lens.mapping AnIso [Text] [Text] [Text] [Text] forall s t a b. (Coercible s a, Coercible t b) => Iso s t a b Lens.coerced -- | If you choose @S3Prefix@, @S3Uri@ identifies a key name prefix. Amazon -- SageMaker uses all objects that match the specified key name prefix for -- model training. -- -- If you choose @ManifestFile@, @S3Uri@ identifies an object that is a -- manifest file containing a list of object keys that you want Amazon -- SageMaker to use for model training. -- -- If you choose @AugmentedManifestFile@, S3Uri identifies an object that -- is an augmented manifest file in JSON lines format. This file contains -- the data you want to use for model training. @AugmentedManifestFile@ can -- only be used if the Channel\'s input mode is @Pipe@. s3DataSource_s3DataType :: Lens.Lens' S3DataSource S3DataType s3DataSource_s3DataType :: (S3DataType -> f S3DataType) -> S3DataSource -> f S3DataSource s3DataSource_s3DataType = (S3DataSource -> S3DataType) -> (S3DataSource -> S3DataType -> S3DataSource) -> Lens S3DataSource S3DataSource S3DataType S3DataType forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\S3DataSource' {S3DataType s3DataType :: S3DataType $sel:s3DataType:S3DataSource' :: S3DataSource -> S3DataType s3DataType} -> S3DataType s3DataType) (\s :: S3DataSource s@S3DataSource' {} S3DataType a -> S3DataSource s {$sel:s3DataType:S3DataSource' :: S3DataType s3DataType = S3DataType a} :: S3DataSource) -- | Depending on the value specified for the @S3DataType@, identifies either -- a key name prefix or a manifest. For example: -- -- - A key name prefix might look like this: -- @s3:\/\/bucketname\/exampleprefix@ -- -- - A manifest might look like this: -- @s3:\/\/bucketname\/example.manifest@ -- -- A manifest is an S3 object which is a JSON file consisting of an -- array of elements. The first element is a prefix which is followed -- by one or more suffixes. SageMaker appends the suffix elements to -- the prefix to get a full set of @S3Uri@. Note that the prefix must -- be a valid non-empty @S3Uri@ that precludes users from specifying a -- manifest whose individual @S3Uri@ is sourced from different S3 -- buckets. -- -- The following code example shows a valid manifest format: -- -- @[ {\"prefix\": \"s3:\/\/customer_bucket\/some\/prefix\/\"},@ -- -- @ \"relative\/path\/to\/custdata-1\",@ -- -- @ \"relative\/path\/custdata-2\",@ -- -- @ ...@ -- -- @ \"relative\/path\/custdata-N\"@ -- -- @]@ -- -- This JSON is equivalent to the following @S3Uri@ list: -- -- @s3:\/\/customer_bucket\/some\/prefix\/relative\/path\/to\/custdata-1@ -- -- @s3:\/\/customer_bucket\/some\/prefix\/relative\/path\/custdata-2@ -- -- @...@ -- -- @s3:\/\/customer_bucket\/some\/prefix\/relative\/path\/custdata-N@ -- -- The complete set of @S3Uri@ in this manifest is the input data for -- the channel for this data source. The object that each @S3Uri@ -- points to must be readable by the IAM role that Amazon SageMaker -- uses to perform tasks on your behalf. s3DataSource_s3Uri :: Lens.Lens' S3DataSource Prelude.Text s3DataSource_s3Uri :: (Text -> f Text) -> S3DataSource -> f S3DataSource s3DataSource_s3Uri = (S3DataSource -> Text) -> (S3DataSource -> Text -> S3DataSource) -> Lens S3DataSource S3DataSource Text Text forall s a b t. (s -> a) -> (s -> b -> t) -> Lens s t a b Lens.lens (\S3DataSource' {Text s3Uri :: Text $sel:s3Uri:S3DataSource' :: S3DataSource -> Text s3Uri} -> Text s3Uri) (\s :: S3DataSource s@S3DataSource' {} Text a -> S3DataSource s {$sel:s3Uri:S3DataSource' :: Text s3Uri = Text a} :: S3DataSource) instance Core.FromJSON S3DataSource where parseJSON :: Value -> Parser S3DataSource parseJSON = String -> (Object -> Parser S3DataSource) -> Value -> Parser S3DataSource forall a. String -> (Object -> Parser a) -> Value -> Parser a Core.withObject String "S3DataSource" ( \Object x -> Maybe S3DataDistribution -> Maybe [Text] -> S3DataType -> Text -> S3DataSource S3DataSource' (Maybe S3DataDistribution -> Maybe [Text] -> S3DataType -> Text -> S3DataSource) -> Parser (Maybe S3DataDistribution) -> Parser (Maybe [Text] -> S3DataType -> Text -> S3DataSource) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b Prelude.<$> (Object x Object -> Text -> Parser (Maybe S3DataDistribution) forall a. FromJSON a => Object -> Text -> Parser (Maybe a) Core..:? Text "S3DataDistributionType") Parser (Maybe [Text] -> S3DataType -> Text -> S3DataSource) -> Parser (Maybe [Text]) -> Parser (S3DataType -> Text -> S3DataSource) forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b Prelude.<*> (Object x Object -> Text -> Parser (Maybe (Maybe [Text])) forall a. FromJSON a => Object -> Text -> Parser (Maybe a) Core..:? Text "AttributeNames" Parser (Maybe (Maybe [Text])) -> Maybe [Text] -> Parser (Maybe [Text]) forall a. Parser (Maybe a) -> a -> Parser a Core..!= Maybe [Text] forall a. Monoid a => a Prelude.mempty) Parser (S3DataType -> Text -> S3DataSource) -> Parser S3DataType -> Parser (Text -> S3DataSource) forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b Prelude.<*> (Object x Object -> Text -> Parser S3DataType forall a. FromJSON a => Object -> Text -> Parser a Core..: Text "S3DataType") Parser (Text -> S3DataSource) -> Parser Text -> Parser S3DataSource forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b Prelude.<*> (Object x Object -> Text -> Parser Text forall a. FromJSON a => Object -> Text -> Parser a Core..: Text "S3Uri") ) instance Prelude.Hashable S3DataSource instance Prelude.NFData S3DataSource instance Core.ToJSON S3DataSource where toJSON :: S3DataSource -> Value toJSON S3DataSource' {Maybe [Text] Maybe S3DataDistribution Text S3DataType s3Uri :: Text s3DataType :: S3DataType attributeNames :: Maybe [Text] s3DataDistributionType :: Maybe S3DataDistribution $sel:s3Uri:S3DataSource' :: S3DataSource -> Text $sel:s3DataType:S3DataSource' :: S3DataSource -> S3DataType $sel:attributeNames:S3DataSource' :: S3DataSource -> Maybe [Text] $sel:s3DataDistributionType:S3DataSource' :: S3DataSource -> Maybe S3DataDistribution ..} = [Pair] -> Value Core.object ( [Maybe Pair] -> [Pair] forall a. [Maybe a] -> [a] Prelude.catMaybes [ (Text "S3DataDistributionType" Text -> S3DataDistribution -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..=) (S3DataDistribution -> Pair) -> Maybe S3DataDistribution -> Maybe Pair forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b Prelude.<$> Maybe S3DataDistribution s3DataDistributionType, (Text "AttributeNames" Text -> [Text] -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..=) ([Text] -> Pair) -> Maybe [Text] -> Maybe Pair forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b Prelude.<$> Maybe [Text] attributeNames, Pair -> Maybe Pair forall a. a -> Maybe a Prelude.Just (Text "S3DataType" Text -> S3DataType -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..= S3DataType s3DataType), Pair -> Maybe Pair forall a. a -> Maybe a Prelude.Just (Text "S3Uri" Text -> Text -> Pair forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv Core..= Text s3Uri) ] )