License	BSD-style
Maintainer	Vincent Hanquez <vincent@snarc.org>
Stability	experimental
Portability	portable
Safe Haskell	None
Language	Haskell2010

Basement.String

Contents

Binary conversion
Legacy utility

Description

A String type backed by a UTF8 encoded byte array and all the necessary functions to manipulate the string.

You can think of String as a specialization of a byte array that have element of type Char.

The String data must contain UTF8 valid data.

Synopsis

newtype String = String (UArray Word8)
newtype MutableString st = MutableString (MUArray Word8 st)
create :: PrimMonad prim => CountOf Word8 -> (MutableString (PrimState prim) -> prim (Offset Word8)) -> prim String
replicate :: CountOf Char -> Char -> String
length :: String -> CountOf Char
data Encoding
- = ASCII7
- | UTF8
- | UTF16
- | UTF32
- | ISO_8859_1
fromBytes :: Encoding -> UArray Word8 -> (String, Maybe ValidationFailure, UArray Word8)
fromChunkBytes :: [UArray Word8] -> [String]
fromBytesUnsafe :: UArray Word8 -> String
fromBytesLenient :: UArray Word8 -> (String, UArray Word8)
toBytes :: Encoding -> String -> UArray Word8
mutableValidate :: PrimMonad prim => MUArray Word8 (PrimState prim) -> Offset Word8 -> CountOf Word8 -> prim (Offset Word8, Maybe ValidationFailure)
copy :: String -> String
data ValidationFailure
- = InvalidHeader
- | InvalidContinuation
- | MissingByte
- | BuildingFailure
index :: String -> Offset Char -> Maybe Char
null :: String -> Bool
drop :: CountOf Char -> String -> String
take :: CountOf Char -> String -> String
splitAt :: CountOf Char -> String -> (String, String)
revDrop :: CountOf Char -> String -> String
revTake :: CountOf Char -> String -> String
revSplitAt :: CountOf Char -> String -> (String, String)
splitOn :: (Char -> Bool) -> String -> [String]
sub :: String -> Offset8 -> Offset8 -> String
elem :: Char -> String -> Bool
indices :: String -> String -> [Offset8]
intersperse :: Char -> String -> String
span :: (Char -> Bool) -> String -> (String, String)
spanEnd :: (Char -> Bool) -> String -> (String, String)
break :: (Char -> Bool) -> String -> (String, String)
breakEnd :: (Char -> Bool) -> String -> (String, String)
breakElem :: Char -> String -> (String, String)
breakLine :: String -> Either Bool (String, String)
dropWhile :: (Char -> Bool) -> String -> String
singleton :: Char -> String
charMap :: (Char -> Char) -> String -> String
snoc :: String -> Char -> String
cons :: Char -> String -> String
unsnoc :: String -> Maybe (String, Char)
uncons :: String -> Maybe (Char, String)
find :: (Char -> Bool) -> String -> Maybe Char
findIndex :: (Char -> Bool) -> String -> Maybe (Offset Char)
sortBy :: (Char -> Char -> Ordering) -> String -> String
filter :: (Char -> Bool) -> String -> String
reverse :: String -> String
replace :: String -> String -> String -> String
builderAppend :: PrimMonad state => Char -> Builder String MutableString Word8 state err ()
builderBuild :: PrimMonad m => Int -> Builder String MutableString Word8 m err () -> m (Either err String)
builderBuild_ :: PrimMonad m => Int -> Builder String MutableString Word8 m () () -> m String
readInteger :: String -> Maybe Integer
readIntegral :: (HasNegation i, IntegralUpsize Word8 i, Additive i, Multiplicative i, IsIntegral i) => String -> Maybe i
readNatural :: String -> Maybe Natural
readDouble :: String -> Maybe Double
readRational :: String -> Maybe Rational
readFloatingExact :: String -> ReadFloatingCallback a -> Maybe a
upper :: String -> String
lower :: String -> String
caseFold :: String -> String
isPrefixOf :: String -> String -> Bool
isSuffixOf :: String -> String -> Bool
isInfixOf :: String -> String -> Bool
stripPrefix :: String -> String -> Maybe String
stripSuffix :: String -> String -> Maybe String
all :: (Char -> Bool) -> String -> Bool
any :: (Char -> Bool) -> String -> Bool
lines :: String -> [String]
words :: String -> [String]
toBase64 :: String -> String
toBase64URL :: Bool -> String -> String
toBase64OpenBSD :: String -> String

Documentation

newtype String #

Opaque packed array of characters in the UTF8 encoding

Constructors

String (UArray Word8)

Instances

Instances details

IsList String #
Instance details Defined in Basement.UTF8.Base Associated Types type Item String # Methods fromList :: [Item String] -> String # fromListN :: Int -> [Item String] -> String # toList :: String -> [Item String] #
Eq String #
Instance details Defined in Basement.UTF8.Base Methods (==) :: String -> String -> Bool # (/=) :: String -> String -> Bool #
Data String #
Instance details Defined in Basement.UTF8.Base Methods gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> String -> c String # gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c String # toConstr :: String -> Constr # dataTypeOf :: String -> DataType # dataCast1 :: Typeable t => (forall d. Data d => c (t d)) -> Maybe (c String) # dataCast2 :: Typeable t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c String) # gmapT :: (forall b. Data b => b -> b) -> String -> String # gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> String -> r # gmapQr :: forall r r'. (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> String -> r # gmapQ :: (forall d. Data d => d -> u) -> String -> [u] # gmapQi :: Int -> (forall d. Data d => d -> u) -> String -> u # gmapM :: Monad m => (forall d. Data d => d -> m d) -> String -> m String # gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> String -> m String # gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> String -> m String #
Ord String #
Instance details Defined in Basement.UTF8.Base Methods compare :: String -> String -> Ordering # (<) :: String -> String -> Bool # (<=) :: String -> String -> Bool # (>) :: String -> String -> Bool # (>=) :: String -> String -> Bool # max :: String -> String -> String # min :: String -> String -> String #
Show String #
Instance details Defined in Basement.UTF8.Base Methods showsPrec :: Int -> String -> ShowS # show :: String -> String0 # showList :: [String] -> ShowS #
IsString String #
Instance details Defined in Basement.UTF8.Base Methods fromString :: String0 -> String #
Semigroup String #
Instance details Defined in Basement.UTF8.Base Methods (<>) :: String -> String -> String # sconcat :: NonEmpty String -> String # stimes :: Integral b => b -> String -> String #
Monoid String #
Instance details Defined in Basement.UTF8.Base Methods mempty :: String # mappend :: String -> String -> String # mconcat :: [String] -> String #
NormalForm String #
Instance details Defined in Basement.UTF8.Base Methods toNormalForm :: String -> () #
From AsciiString String #
Instance details Defined in Basement.From Methods from :: AsciiString -> String #
From String (UArray Word8) #
Instance details Defined in Basement.From Methods from :: String -> UArray Word8 #
TryFrom (UArray Word8) String #
Instance details Defined in Basement.From Methods tryFrom :: UArray Word8 -> Maybe String #
type Item String #
Instance details Defined in Basement.UTF8.Base type Item String = Char

newtype MutableString st #

Mutable String Buffer.

Use as an *append* buffer, as UTF8 variable encoding doesn't really allow to change previously written character without potentially shifting bytes.

Constructors

MutableString (MUArray Word8 st)

create :: PrimMonad prim => CountOf Word8 -> (MutableString (PrimState prim) -> prim (Offset Word8)) -> prim String #

Unsafely create a string of up to sz bytes.

The callback f needs to return the number of bytes filled in the underlaying bytes buffer. No check is made on the callback return values, and if it's not contained without the bounds, bad things will happen.

replicate :: CountOf Char -> Char -> String #

Replicate a character c n times to create a string of length n

length :: String -> CountOf Char #

Length of a String using CountOf

this size is available in o(n)

Binary conversion

data Encoding #

Various String Encoding that can be use to convert to and from bytes

Constructors

ASCII7
UTF8
UTF16
UTF32
ISO_8859_1

Instances

Instances details

Bounded Encoding #
Instance details Defined in Basement.String Methods minBound :: Encoding # maxBound :: Encoding #
Enum Encoding #
Instance details Defined in Basement.String Methods succ :: Encoding -> Encoding # pred :: Encoding -> Encoding # toEnum :: Int -> Encoding # fromEnum :: Encoding -> Int # enumFrom :: Encoding -> [Encoding] # enumFromThen :: Encoding -> Encoding -> [Encoding] # enumFromTo :: Encoding -> Encoding -> [Encoding] # enumFromThenTo :: Encoding -> Encoding -> Encoding -> [Encoding] #
Eq Encoding #
Instance details Defined in Basement.String Methods (==) :: Encoding -> Encoding -> Bool # (/=) :: Encoding -> Encoding -> Bool #
Data Encoding #
Instance details Defined in Basement.String Methods gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> Encoding -> c Encoding # gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c Encoding # toConstr :: Encoding -> Constr # dataTypeOf :: Encoding -> DataType # dataCast1 :: Typeable t => (forall d. Data d => c (t d)) -> Maybe (c Encoding) # dataCast2 :: Typeable t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Encoding) # gmapT :: (forall b. Data b => b -> b) -> Encoding -> Encoding # gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> Encoding -> r # gmapQr :: forall r r'. (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> Encoding -> r # gmapQ :: (forall d. Data d => d -> u) -> Encoding -> [u] # gmapQi :: Int -> (forall d. Data d => d -> u) -> Encoding -> u # gmapM :: Monad m => (forall d. Data d => d -> m d) -> Encoding -> m Encoding # gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> Encoding -> m Encoding # gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> Encoding -> m Encoding #
Ord Encoding #
Instance details Defined in Basement.String Methods compare :: Encoding -> Encoding -> Ordering # (<) :: Encoding -> Encoding -> Bool # (<=) :: Encoding -> Encoding -> Bool # (>) :: Encoding -> Encoding -> Bool # (>=) :: Encoding -> Encoding -> Bool # max :: Encoding -> Encoding -> Encoding # min :: Encoding -> Encoding -> Encoding #
Show Encoding #
Instance details Defined in Basement.String Methods showsPrec :: Int -> Encoding -> ShowS # show :: Encoding -> String # showList :: [Encoding] -> ShowS #

fromBytes :: Encoding -> UArray Word8 -> (String, Maybe ValidationFailure, UArray Word8) #

Convert a ByteArray to a string assuming a specific encoding.

It returns a 3-tuple of:

The string that has been succesfully converted without any error
An optional validation error
The remaining buffer that hasn't been processed (either as a result of an error, or because the encoded sequence is not fully available)

Considering a stream of data that is fetched chunk by chunk, it's valid to assume that some sequence might fall in a chunk boundary. When converting chunks, if the error is Nothing and the remaining buffer is not empty, then this buffer need to be prepended to the next chunk

fromChunkBytes :: [UArray Word8] -> [String] #

Decode a stream of binary chunks containing UTF8 encoding in a list of valid String

Chunk not necessarily contains a valid string, as a UTF8 sequence could be split over 2 chunks.

fromBytesUnsafe :: UArray Word8 -> String #

Convert a Byte Array representing UTF8 data directly to a string without checking for UTF8 validity

If the input contains invalid sequences, it will trigger runtime async errors when processing data.

In doubt, use fromBytes

fromBytesLenient :: UArray Word8 -> (String, UArray Word8) #

Convert a UTF8 array of bytes to a String.

If there's any error in the stream, it will automatically insert replacement bytes to replace invalid sequences.

In the case of sequence that fall in the middle of 2 chunks, the remaining buffer is supposed to be preprended to the next chunk, and resume the parsing.

toBytes :: Encoding -> String -> UArray Word8 #

Convert a String to a bytearray in a specific encoding

if the encoding is UTF8, the underlying buffer is returned without extra allocation or any processing

In any other encoding, some allocation and processing are done to convert.

mutableValidate :: PrimMonad prim => MUArray Word8 (PrimState prim) -> Offset Word8 -> CountOf Word8 -> prim (Offset Word8, Maybe ValidationFailure) #

Similar to validate but works on a MutableByteArray

copy :: String -> String #

Copy the String

The slice of memory is copied to a new slice, making the new string independent from the original string..

data ValidationFailure #

Possible failure related to validating bytes of UTF8 sequences.

Constructors

InvalidHeader
InvalidContinuation
MissingByte
BuildingFailure

Instances

Instances details

Eq ValidationFailure #
Instance details Defined in Basement.UTF8.Types Methods (==) :: ValidationFailure -> ValidationFailure -> Bool # (/=) :: ValidationFailure -> ValidationFailure -> Bool #
Show ValidationFailure #
Instance details Defined in Basement.UTF8.Types Methods showsPrec :: Int -> ValidationFailure -> ShowS # show :: ValidationFailure -> String # showList :: [ValidationFailure] -> ShowS #
Exception ValidationFailure #
Instance details Defined in Basement.UTF8.Types Methods toException :: ValidationFailure -> SomeException # fromException :: SomeException -> Maybe ValidationFailure # displayException :: ValidationFailure -> String #

index :: String -> Offset Char -> Maybe Char #

Return the nth character in a String

Compared to an array, the string need to be scanned from the beginning since the UTF8 encoding is variable.

null :: String -> Bool #

Check if a String is null

drop :: CountOf Char -> String -> String #

Create a string with the remaining Chars after dropping @n Chars from the beginning

take :: CountOf Char -> String -> String #

Create a string composed of a number @n of Chars (Unicode code points).

if the input @s contains less characters than required, then the input string is returned.

splitAt :: CountOf Char -> String -> (String, String) #

Split a string at the Offset specified (in Char) returning both the leading part and the remaining part.

revDrop :: CountOf Char -> String -> String #

Similar to drop but from the end

revTake :: CountOf Char -> String -> String #

Similar to take but from the end

revSplitAt :: CountOf Char -> String -> (String, String) #

Similar to splitAt but from the end

splitOn :: (Char -> Bool) -> String -> [String] #

Split on the input string using the predicate as separator

e.g.

splitOn (== ',') ","          == ["",""]
splitOn (== ',') ",abc,"      == ["","abc",""]
splitOn (== ':') "abc"        == ["abc"]
splitOn (== ':') "abc::def"   == ["abc","","def"]
splitOn (== ':') "::abc::def" == ["","","abc","","def"]

sub :: String -> Offset8 -> Offset8 -> String #

Internal call to make a substring given offset in bytes.

This is unsafe considering that one can create a substring starting and/or ending on the middle of a UTF8 sequence.

elem :: Char -> String -> Bool #

Return whereas the string contains a specific character or not

indices :: String -> String -> [Offset8] #

Finds where are the insertion points when we search for a needle within an haystack.

intersperse :: Char -> String -> String #

Intersperse the character sep between each character in the string

intersperse ' ' "Hello Foundation"

"H e l l o F o u n d a t i o n"

span :: (Char -> Bool) -> String -> (String, String) #

Apply a predicate to the string to return the longest prefix that satisfy the predicate and the remaining

spanEnd :: (Char -> Bool) -> String -> (String, String) #

Apply a predicate to the string to return the longest suffix that satisfy the predicate and the remaining

break :: (Char -> Bool) -> String -> (String, String) #

Break a string into 2 strings at the location where the predicate return True

breakEnd :: (Char -> Bool) -> String -> (String, String) #

breakElem :: Char -> String -> (String, String) #

Break a string into 2 strings at the first occurence of the character

breakLine :: String -> Either Bool (String, String) #

Same as break but cut on a line feed with an optional carriage return.

This is the same operation as 'breakElem LF' dropping the last character of the string if it's a CR.

Also for efficiency reason (streaming), it returns if the last character was a CR character.

dropWhile :: (Char -> Bool) -> String -> String #

Drop character from the beginning while the predicate is true

singleton :: Char -> String #

Create a single element String

charMap :: (Char -> Char) -> String -> String #

Monomorphically map the character in a string and return the transformed one

snoc :: String -> Char -> String #

Append a Char to the end of the String and return this new String

cons :: Char -> String -> String #

Prepend a Char to the beginning of the String and return this new String

unsnoc :: String -> Maybe (String, Char) #

Extract the String stripped of the last character and the last character if not empty

If empty, Nothing is returned

uncons :: String -> Maybe (Char, String) #

Extract the First character of a string, and the String stripped of the first character.

If empty, Nothing is returned

find :: (Char -> Bool) -> String -> Maybe Char #

Look for a predicate in the String and return the matched character, if any.

findIndex :: (Char -> Bool) -> String -> Maybe (Offset Char) #

Return the index in unit of Char of the first occurence of the predicate returning True

If not found, Nothing is returned

sortBy :: (Char -> Char -> Ordering) -> String -> String #

Sort the character in a String using a specific sort function

TODO: optimise not going through a list

filter :: (Char -> Bool) -> String -> String #

Filter characters of a string using the predicate

reverse :: String -> String #

Reverse a string

replace :: String -> String -> String -> String #

Replace all the occurrencies of needle with replacement in the haystack string.

builderAppend :: PrimMonad state => Char -> Builder String MutableString Word8 state err () #

Append a character to a String builder

builderBuild :: PrimMonad m => Int -> Builder String MutableString Word8 m err () -> m (Either err String) #

Create a new String builder using chunks of sizeChunksI

builderBuild_ :: PrimMonad m => Int -> Builder String MutableString Word8 m () () -> m String #

readInteger :: String -> Maybe Integer #

readIntegral :: (HasNegation i, IntegralUpsize Word8 i, Additive i, Multiplicative i, IsIntegral i) => String -> Maybe i #

Read an Integer from a String

Consume an optional minus sign and many digits until end of string.

readNatural :: String -> Maybe Natural #

Read a Natural from a String

Consume many digits until end of string.

readDouble :: String -> Maybe Double #

Try to read a Double

readRational :: String -> Maybe Rational #

Try to read a floating number as a Rational

Note that for safety reason, only exponent between -10000 and 10000 is allowed as otherwise DoS/OOM is very likely. if you don't want this behavior, switching to a scientific type (not provided yet) that represent the exponent separately is the advised solution.

readFloatingExact :: String -> ReadFloatingCallback a -> Maybe a #

Read an Floating like number of the form:

-: numbers [ . numbers ] [ ( e | E ) [ - ] number ]

Call a function with:

A boolean representing if the number is negative
The digits part represented as a single natural number (123.456 is represented as 123456)
The number of digits in the fractional part (e.g. 123.456 => 3)
The exponent if any

The code is structured as a simple state machine that:

Optionally Consume a - sign
Consume number for the integral part
Optionally
Consume .
Consume remaining digits if not already end of string
Optionally Consume a e or E follow by an optional - and a number

upper :: String -> String #

Convert a String to the upper-case equivalent.

lower :: String -> String #

Convert a String to the upper-case equivalent.

caseFold :: String -> String #

Convert a String to the unicode case fold equivalent.

Case folding is mostly used for caseless comparison of strings.

isPrefixOf :: String -> String -> Bool #

Check whether the first string is a prefix of the second string.

isSuffixOf :: String -> String -> Bool #

Check whether the first string is a suffix of the second string.

isInfixOf :: String -> String -> Bool #

Check whether the first string is contains within the second string.

TODO: implemented the naive way and thus terribly inefficient, reimplement properly

stripPrefix :: String -> String -> Maybe String #

Try to strip a prefix from the start of a String.

If the prefix is not starting the string, then Nothing is returned, otherwise the striped string is returned

stripSuffix :: String -> String -> Maybe String #

Try to strip a suffix from the end of a String.

If the suffix is not ending the string, then Nothing is returned, otherwise the striped string is returned

all :: (Char -> Bool) -> String -> Bool #

any :: (Char -> Bool) -> String -> Bool #

Legacy utility

lines :: String -> [String] #

Split lines in a string using newline as separation.

Note that carriage return preceding a newline are also strip for maximum compatibility between Windows and Unix system.

words :: String -> [String] #

Split words in a string using spaces as separation

words "Hello Foundation"

Hello, Foundation

toBase64 :: String -> String #

Transform string src to base64 binary representation.

toBase64URL :: Bool -> String -> String #

Transform string src to URL-safe base64 binary representation. The result will be either padded or unpadded, depending on the boolean padded argument.

toBase64OpenBSD :: String -> String #

Transform string src to OpenBSD base64 binary representation.