License | BSD-style |
---|---|
Maintainer | Vincent Hanquez <vincent@snarc.org> |
Stability | experimental |
Portability | portable |
Safe Haskell | None |
Language | Haskell2010 |
A String type backed by a UTF8 encoded byte array and all the necessary functions to manipulate the string.
You can think of String as a specialization of a byte array that have element of type Char.
The String data must contain UTF8 valid data.
Synopsis
- newtype String = String (UArray Word8)
- newtype MutableString st = MutableString (MUArray Word8 st)
- create :: PrimMonad prim => CountOf Word8 -> (MutableString (PrimState prim) -> prim (Offset Word8)) -> prim String
- replicate :: CountOf Char -> Char -> String
- length :: String -> CountOf Char
- data Encoding
- = ASCII7
- | UTF8
- | UTF16
- | UTF32
- | ISO_8859_1
- fromBytes :: Encoding -> UArray Word8 -> (String, Maybe ValidationFailure, UArray Word8)
- fromChunkBytes :: [UArray Word8] -> [String]
- fromBytesUnsafe :: UArray Word8 -> String
- fromBytesLenient :: UArray Word8 -> (String, UArray Word8)
- toBytes :: Encoding -> String -> UArray Word8
- mutableValidate :: PrimMonad prim => MUArray Word8 (PrimState prim) -> Offset Word8 -> CountOf Word8 -> prim (Offset Word8, Maybe ValidationFailure)
- copy :: String -> String
- data ValidationFailure
- index :: String -> Offset Char -> Maybe Char
- null :: String -> Bool
- drop :: CountOf Char -> String -> String
- take :: CountOf Char -> String -> String
- splitAt :: CountOf Char -> String -> (String, String)
- revDrop :: CountOf Char -> String -> String
- revTake :: CountOf Char -> String -> String
- revSplitAt :: CountOf Char -> String -> (String, String)
- splitOn :: (Char -> Bool) -> String -> [String]
- sub :: String -> Offset8 -> Offset8 -> String
- elem :: Char -> String -> Bool
- indices :: String -> String -> [Offset8]
- intersperse :: Char -> String -> String
- span :: (Char -> Bool) -> String -> (String, String)
- spanEnd :: (Char -> Bool) -> String -> (String, String)
- break :: (Char -> Bool) -> String -> (String, String)
- breakEnd :: (Char -> Bool) -> String -> (String, String)
- breakElem :: Char -> String -> (String, String)
- breakLine :: String -> Either Bool (String, String)
- dropWhile :: (Char -> Bool) -> String -> String
- singleton :: Char -> String
- charMap :: (Char -> Char) -> String -> String
- snoc :: String -> Char -> String
- cons :: Char -> String -> String
- unsnoc :: String -> Maybe (String, Char)
- uncons :: String -> Maybe (Char, String)
- find :: (Char -> Bool) -> String -> Maybe Char
- findIndex :: (Char -> Bool) -> String -> Maybe (Offset Char)
- sortBy :: (Char -> Char -> Ordering) -> String -> String
- filter :: (Char -> Bool) -> String -> String
- reverse :: String -> String
- replace :: String -> String -> String -> String
- builderAppend :: PrimMonad state => Char -> Builder String MutableString Word8 state err ()
- builderBuild :: PrimMonad m => Int -> Builder String MutableString Word8 m err () -> m (Either err String)
- builderBuild_ :: PrimMonad m => Int -> Builder String MutableString Word8 m () () -> m String
- readInteger :: String -> Maybe Integer
- readIntegral :: (HasNegation i, IntegralUpsize Word8 i, Additive i, Multiplicative i, IsIntegral i) => String -> Maybe i
- readNatural :: String -> Maybe Natural
- readDouble :: String -> Maybe Double
- readRational :: String -> Maybe Rational
- readFloatingExact :: String -> ReadFloatingCallback a -> Maybe a
- upper :: String -> String
- lower :: String -> String
- caseFold :: String -> String
- isPrefixOf :: String -> String -> Bool
- isSuffixOf :: String -> String -> Bool
- isInfixOf :: String -> String -> Bool
- stripPrefix :: String -> String -> Maybe String
- stripSuffix :: String -> String -> Maybe String
- all :: (Char -> Bool) -> String -> Bool
- any :: (Char -> Bool) -> String -> Bool
- lines :: String -> [String]
- words :: String -> [String]
- toBase64 :: String -> String
- toBase64URL :: Bool -> String -> String
- toBase64OpenBSD :: String -> String
Documentation
Opaque packed array of characters in the UTF8 encoding
Instances
IsList String # | |
Eq String # | |
Data String # | |
Defined in Basement.UTF8.Base gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> String -> c String # gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c String # toConstr :: String -> Constr # dataTypeOf :: String -> DataType # dataCast1 :: Typeable t => (forall d. Data d => c (t d)) -> Maybe (c String) # dataCast2 :: Typeable t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c String) # gmapT :: (forall b. Data b => b -> b) -> String -> String # gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> String -> r # gmapQr :: forall r r'. (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> String -> r # gmapQ :: (forall d. Data d => d -> u) -> String -> [u] # gmapQi :: Int -> (forall d. Data d => d -> u) -> String -> u # gmapM :: Monad m => (forall d. Data d => d -> m d) -> String -> m String # gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> String -> m String # gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> String -> m String # | |
Ord String # | |
Show String # | |
IsString String # | |
Defined in Basement.UTF8.Base fromString :: String0 -> String # | |
Semigroup String # | |
Monoid String # | |
NormalForm String # | |
Defined in Basement.UTF8.Base toNormalForm :: String -> () # | |
From AsciiString String # | |
Defined in Basement.From from :: AsciiString -> String # | |
From String (UArray Word8) # | |
TryFrom (UArray Word8) String # | |
type Item String # | |
Defined in Basement.UTF8.Base |
newtype MutableString st #
Mutable String Buffer.
Use as an *append* buffer, as UTF8 variable encoding doesn't really allow to change previously written character without potentially shifting bytes.
MutableString (MUArray Word8 st) |
create :: PrimMonad prim => CountOf Word8 -> (MutableString (PrimState prim) -> prim (Offset Word8)) -> prim String #
Unsafely create a string of up to sz
bytes.
The callback f
needs to return the number of bytes filled in the underlaying
bytes buffer. No check is made on the callback return values, and if it's not
contained without the bounds, bad things will happen.
replicate :: CountOf Char -> Char -> String #
Replicate a character c
n
times to create a string of length n
Binary conversion
Various String Encoding that can be use to convert to and from bytes
Instances
Bounded Encoding # | |
Enum Encoding # | |
Eq Encoding # | |
Data Encoding # | |
Defined in Basement.String gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> Encoding -> c Encoding # gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c Encoding # toConstr :: Encoding -> Constr # dataTypeOf :: Encoding -> DataType # dataCast1 :: Typeable t => (forall d. Data d => c (t d)) -> Maybe (c Encoding) # dataCast2 :: Typeable t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Encoding) # gmapT :: (forall b. Data b => b -> b) -> Encoding -> Encoding # gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> Encoding -> r # gmapQr :: forall r r'. (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> Encoding -> r # gmapQ :: (forall d. Data d => d -> u) -> Encoding -> [u] # gmapQi :: Int -> (forall d. Data d => d -> u) -> Encoding -> u # gmapM :: Monad m => (forall d. Data d => d -> m d) -> Encoding -> m Encoding # gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> Encoding -> m Encoding # gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> Encoding -> m Encoding # | |
Ord Encoding # | |
Defined in Basement.String | |
Show Encoding # | |
fromBytes :: Encoding -> UArray Word8 -> (String, Maybe ValidationFailure, UArray Word8) #
Convert a ByteArray to a string assuming a specific encoding.
It returns a 3-tuple of:
- The string that has been succesfully converted without any error
- An optional validation error
- The remaining buffer that hasn't been processed (either as a result of an error, or because the encoded sequence is not fully available)
Considering a stream of data that is fetched chunk by chunk, it's valid to assume that some sequence might fall in a chunk boundary. When converting chunks, if the error is Nothing and the remaining buffer is not empty, then this buffer need to be prepended to the next chunk
fromChunkBytes :: [UArray Word8] -> [String] #
Decode a stream of binary chunks containing UTF8 encoding in a list of valid String
Chunk not necessarily contains a valid string, as a UTF8 sequence could be split over 2 chunks.
fromBytesUnsafe :: UArray Word8 -> String #
Convert a Byte Array representing UTF8 data directly to a string without checking for UTF8 validity
If the input contains invalid sequences, it will trigger runtime async errors when processing data.
In doubt, use fromBytes
fromBytesLenient :: UArray Word8 -> (String, UArray Word8) #
Convert a UTF8 array of bytes to a String.
If there's any error in the stream, it will automatically insert replacement bytes to replace invalid sequences.
In the case of sequence that fall in the middle of 2 chunks, the remaining buffer is supposed to be preprended to the next chunk, and resume the parsing.
toBytes :: Encoding -> String -> UArray Word8 #
Convert a String to a bytearray in a specific encoding
if the encoding is UTF8, the underlying buffer is returned without extra allocation or any processing
In any other encoding, some allocation and processing are done to convert.
mutableValidate :: PrimMonad prim => MUArray Word8 (PrimState prim) -> Offset Word8 -> CountOf Word8 -> prim (Offset Word8, Maybe ValidationFailure) #
Similar to validate
but works on a MutableByteArray
Copy the String
The slice of memory is copied to a new slice, making the new string independent from the original string..
data ValidationFailure #
Possible failure related to validating bytes of UTF8 sequences.
Instances
Eq ValidationFailure # | |
Defined in Basement.UTF8.Types (==) :: ValidationFailure -> ValidationFailure -> Bool # (/=) :: ValidationFailure -> ValidationFailure -> Bool # | |
Show ValidationFailure # | |
Defined in Basement.UTF8.Types showsPrec :: Int -> ValidationFailure -> ShowS # show :: ValidationFailure -> String # showList :: [ValidationFailure] -> ShowS # | |
Exception ValidationFailure # | |
Defined in Basement.UTF8.Types |
index :: String -> Offset Char -> Maybe Char #
Return the nth character in a String
Compared to an array, the string need to be scanned from the beginning since the UTF8 encoding is variable.
drop :: CountOf Char -> String -> String #
Create a string with the remaining Chars after dropping @n Chars from the beginning
take :: CountOf Char -> String -> String #
Create a string composed of a number @n of Chars (Unicode code points).
if the input @s contains less characters than required, then the input string is returned.
splitAt :: CountOf Char -> String -> (String, String) #
Split a string at the Offset specified (in Char) returning both the leading part and the remaining part.
splitOn :: (Char -> Bool) -> String -> [String] #
Split on the input string using the predicate as separator
e.g.
splitOn (== ',') "," == ["",""] splitOn (== ',') ",abc," == ["","abc",""] splitOn (== ':') "abc" == ["abc"] splitOn (== ':') "abc::def" == ["abc","","def"] splitOn (== ':') "::abc::def" == ["","","abc","","def"]
sub :: String -> Offset8 -> Offset8 -> String #
Internal call to make a substring given offset in bytes.
This is unsafe considering that one can create a substring starting and/or ending on the middle of a UTF8 sequence.
indices :: String -> String -> [Offset8] #
Finds where are the insertion points when we search for a needle
within an haystack
.
intersperse :: Char -> String -> String #
Intersperse the character sep
between each character in the string
intersperse ' ' "Hello Foundation"
"H e l l o F o u n d a t i o n"
span :: (Char -> Bool) -> String -> (String, String) #
Apply a predicate
to the string to return the longest prefix that satisfy the predicate and
the remaining
spanEnd :: (Char -> Bool) -> String -> (String, String) #
Apply a predicate
to the string to return the longest suffix that satisfy the predicate and
the remaining
break :: (Char -> Bool) -> String -> (String, String) #
Break a string into 2 strings at the location where the predicate return True
breakElem :: Char -> String -> (String, String) #
Break a string into 2 strings at the first occurence of the character
breakLine :: String -> Either Bool (String, String) #
Same as break but cut on a line feed with an optional carriage return.
This is the same operation as 'breakElem LF' dropping the last character of the string if it's a CR.
Also for efficiency reason (streaming), it returns if the last character was a CR character.
dropWhile :: (Char -> Bool) -> String -> String #
Drop character from the beginning while the predicate is true
charMap :: (Char -> Char) -> String -> String #
Monomorphically map the character in a string and return the transformed one
cons :: Char -> String -> String #
Prepend a Char to the beginning of the String and return this new String
unsnoc :: String -> Maybe (String, Char) #
Extract the String stripped of the last character and the last character if not empty
If empty, Nothing is returned
uncons :: String -> Maybe (Char, String) #
Extract the First character of a string, and the String stripped of the first character.
If empty, Nothing is returned
find :: (Char -> Bool) -> String -> Maybe Char #
Look for a predicate in the String and return the matched character, if any.
findIndex :: (Char -> Bool) -> String -> Maybe (Offset Char) #
Return the index in unit of Char of the first occurence of the predicate returning True
If not found, Nothing is returned
sortBy :: (Char -> Char -> Ordering) -> String -> String #
Sort the character in a String using a specific sort function
TODO: optimise not going through a list
replace :: String -> String -> String -> String #
Replace all the occurrencies of needle
with replacement
in
the haystack
string.
builderAppend :: PrimMonad state => Char -> Builder String MutableString Word8 state err () #
Append a character to a String builder
builderBuild :: PrimMonad m => Int -> Builder String MutableString Word8 m err () -> m (Either err String) #
Create a new String builder using chunks of sizeChunksI
builderBuild_ :: PrimMonad m => Int -> Builder String MutableString Word8 m () () -> m String #
readInteger :: String -> Maybe Integer #
readIntegral :: (HasNegation i, IntegralUpsize Word8 i, Additive i, Multiplicative i, IsIntegral i) => String -> Maybe i #
Read an Integer from a String
Consume an optional minus sign and many digits until end of string.
readNatural :: String -> Maybe Natural #
Read a Natural from a String
Consume many digits until end of string.
readDouble :: String -> Maybe Double #
Try to read a Double
readRational :: String -> Maybe Rational #
Try to read a floating number as a Rational
Note that for safety reason, only exponent between -10000 and 10000 is allowed as otherwise DoS/OOM is very likely. if you don't want this behavior, switching to a scientific type (not provided yet) that represent the exponent separately is the advised solution.
readFloatingExact :: String -> ReadFloatingCallback a -> Maybe a #
Read an Floating like number of the form:
Call a function with:
- A boolean representing if the number is negative
- The digits part represented as a single natural number (123.456 is represented as 123456)
- The number of digits in the fractional part (e.g. 123.456 => 3)
- The exponent if any
The code is structured as a simple state machine that:
caseFold :: String -> String #
Convert a String
to the unicode case fold equivalent.
Case folding is mostly used for caseless comparison of strings.
isPrefixOf :: String -> String -> Bool #
Check whether the first string is a prefix of the second string.
isSuffixOf :: String -> String -> Bool #
Check whether the first string is a suffix of the second string.
isInfixOf :: String -> String -> Bool #
Check whether the first string is contains within the second string.
TODO: implemented the naive way and thus terribly inefficient, reimplement properly
stripPrefix :: String -> String -> Maybe String #
Try to strip a prefix from the start of a String.
If the prefix is not starting the string, then Nothing is returned, otherwise the striped string is returned
stripSuffix :: String -> String -> Maybe String #
Try to strip a suffix from the end of a String.
If the suffix is not ending the string, then Nothing is returned, otherwise the striped string is returned
Legacy utility
Split lines in a string using newline as separation.
Note that carriage return preceding a newline are also strip for maximum compatibility between Windows and Unix system.
toBase64URL :: Bool -> String -> String #
Transform string src
to URL-safe base64 binary representation.
The result will be either padded or unpadded, depending on the boolean
padded
argument.
toBase64OpenBSD :: String -> String #
Transform string src
to OpenBSD base64 binary representation.