From 04c8fa8017a04da05cd8e1d104cfcd0a9aba17f5 Mon Sep 17 00:00:00 2001 From: vnugent Date: Fri, 22 Sep 2023 12:29:32 -0400 Subject: Structure refactor & unused feature pruning, more http2 prep --- lib/Utils/src/BitField.cs | 14 +++- lib/Utils/src/Extensions/StringExtensions.cs | 117 +++++++++++++++++++------- lib/Utils/src/IO/VnTextReaderExtensions.cs | 120 +++++++++++++++------------ lib/Utils/src/VnEncoding.cs | 73 ++++++++++++++-- lib/Utils/tests/VnEncodingTests.cs | 58 ++++++++++++- 5 files changed, 288 insertions(+), 94 deletions(-) (limited to 'lib/Utils') diff --git a/lib/Utils/src/BitField.cs b/lib/Utils/src/BitField.cs index bc001df..8bdac4f 100644 --- a/lib/Utils/src/BitField.cs +++ b/lib/Utils/src/BitField.cs @@ -1,5 +1,5 @@ /* -* Copyright (c) 2022 Vaughn Nugent +* Copyright (c) 2023 Vaughn Nugent * * Library: VNLib * Package: VNLib.Utils @@ -22,7 +22,6 @@ * along with VNLib.Utils. If not, see http://www.gnu.org/licenses/. */ -using System; using System.Runtime.CompilerServices; namespace VNLib.Utils @@ -33,22 +32,26 @@ namespace VNLib.Utils public class BitField { private ulong Field; + /// /// The readonly value of the /// public ulong Value => Field; + /// /// Creates a new initialized to the specified value /// /// Initial value [MethodImpl(MethodImplOptions.AggressiveInlining)] public BitField(ulong initial) => Field = initial; + /// /// Creates a new initialized to the specified value /// /// Initial value [MethodImpl(MethodImplOptions.AggressiveInlining)] public BitField(long initial) => Field = unchecked((ulong)initial); + /// /// Determines if the specified flag is set /// @@ -56,6 +59,7 @@ namespace VNLib.Utils /// True if the flag(s) is currently set, false if flag is not set [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool IsSet(ulong mask) => (Field & mask) != 0; + /// /// Determines if the specified flag is set /// @@ -63,6 +67,7 @@ namespace VNLib.Utils /// True if the flag(s) is currently set, false if flag is not set [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool IsSet(long mask) => (Field & unchecked((ulong)mask)) != 0; + /// /// Determines if the specified flag is set /// @@ -70,6 +75,7 @@ namespace VNLib.Utils /// True if the flag(s) is currently set, false if flag is not set [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Set(ulong mask) => Field |= mask; + /// /// Determines if the specified flag is set /// @@ -77,6 +83,7 @@ namespace VNLib.Utils /// True if the flag(s) is currently set, false if flag is not set [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Set(long mask) => Field |= unchecked((ulong)mask); + /// /// Sets or clears a flag(s) indentified by a mask based on the value /// @@ -94,18 +101,21 @@ namespace VNLib.Utils Clear(mask); } } + /// /// Clears the flag identified by the specified mask /// /// The mask used to clear the given flag [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Clear(ulong mask) => Field &= ~mask; + /// /// Clears the flag identified by the specified mask /// /// The mask used to clear the given flag [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Clear(long mask) => Field &= ~unchecked((ulong)mask); + /// /// Clears all flags by setting the property value to 0 /// diff --git a/lib/Utils/src/Extensions/StringExtensions.cs b/lib/Utils/src/Extensions/StringExtensions.cs index 09d6517..f211b73 100644 --- a/lib/Utils/src/Extensions/StringExtensions.cs +++ b/lib/Utils/src/Extensions/StringExtensions.cs @@ -1,5 +1,5 @@ /* -* Copyright (c) 2022 Vaughn Nugent +* Copyright (c) 2023 Vaughn Nugent * * Library: VNLib * Package: VNLib.Utils @@ -32,6 +32,10 @@ using VNLib.Utils.Memory; namespace VNLib.Utils.Extensions { + /// + /// Delegate for a stateless span action + /// + /// The line of data to process public delegate void StatelessSpanAction(ReadOnlySpan line); /// @@ -51,6 +55,7 @@ namespace VNLib.Utils.Extensions { Split(value, splitter.AsSpan(), output, options); } + /// /// Split a string based on split value and insert into the specified list /// @@ -66,6 +71,7 @@ namespace VNLib.Utils.Extensions //Call the split function on the span Split(value, cs, output, options); } + /// /// Split a string based on split value and insert into the specified list /// @@ -79,6 +85,7 @@ namespace VNLib.Utils.Extensions { Split(value.AsSpan(), splitter, output, options); } + /// /// Split a string based on split value and insert into the specified list /// @@ -88,13 +95,14 @@ namespace VNLib.Utils.Extensions /// String split options /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Split(this in ReadOnlySpan value, char splitter, T output, StringSplitOptions options) where T : ICollection + public static void Split(this ReadOnlySpan value, char splitter, T output, StringSplitOptions options) where T : ICollection { //Create span from char pointer ReadOnlySpan cs = MemoryMarshal.CreateReadOnlySpan(ref splitter, 1); //Call the split function on the span - Split(in value, cs, output, options); + Split(value, cs, output, options); } + /// /// Split a based on split value and insert into the specified list /// @@ -104,13 +112,15 @@ namespace VNLib.Utils.Extensions /// String split options /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Split(this in ReadOnlySpan value, ReadOnlySpan splitter, T output, StringSplitOptions options) where T : ICollection + public static void Split(this ReadOnlySpan value, ReadOnlySpan splitter, T output, StringSplitOptions options) where T : ICollection { //Create a local function that adds the split strings to the list static void SplitFound(ReadOnlySpan split, T output) => output.Add(split.ToString()); + //Invoke the split function with the local callback method - Split(in value, splitter, options, SplitFound, output); + Split(value, splitter, options, SplitFound, output); } + /// /// Split a based on split value and pass it to the split delegate handler /// @@ -120,11 +130,13 @@ namespace VNLib.Utils.Extensions /// The action to invoke when a split segment has been found /// The state to pass to the callback handler /// - public static void Split(this in ReadOnlySpan value, ReadOnlySpan splitter, StringSplitOptions options, ReadOnlySpanAction splitCb, T state) + public static void Split(this ReadOnlySpan value, ReadOnlySpan splitter, StringSplitOptions options, ReadOnlySpanAction splitCb, T state) { _ = splitCb ?? throw new ArgumentNullException(nameof(splitCb)); + //Get span over string ForwardOnlyReader reader = new(value); + //No string options if (options == 0) { @@ -132,41 +144,49 @@ namespace VNLib.Utils.Extensions { //Find index of the splitter int start = reader.Window.IndexOf(splitter); + //guard if (start == -1) { break; } + //Trim and add it regardless of length splitCb(reader.Window[..start], state); + //shift window reader.Advance(start + splitter.Length); } while (true); + //Trim remaining and add it regardless of length splitCb(reader.Window, state); } //Trim but do not remove empties - else if ((options & StringSplitOptions.RemoveEmptyEntries) == 0) + else if ((options & StringSplitOptions.TrimEntries) == StringSplitOptions.TrimEntries) { do { //Find index of the splitter int start = reader.Window.IndexOf(splitter); + //guard if (start == -1) { break; } + //Trim and add it regardless of length splitCb(reader.Window[..start].Trim(), state); + //shift window reader.Advance(start + splitter.Length); } while (true); + //Trim remaining and add it regardless of length splitCb(reader.Window.Trim(), state); } //Remove empty entires but do not trim them - else if ((options & StringSplitOptions.TrimEntries) == 0) + else if ((options & StringSplitOptions.RemoveEmptyEntries) == StringSplitOptions.RemoveEmptyEntries) { //Get data before splitter and trim it ReadOnlySpan data; @@ -186,9 +206,11 @@ namespace VNLib.Utils.Extensions { splitCb(data, state); } - //shift window - reader.Advance(start + splitter.Length); + + reader.Advance(start + splitter.Length); + } while (true); + //Add if not empty if (reader.WindowSize > 0) { @@ -204,23 +226,29 @@ namespace VNLib.Utils.Extensions { //Find index of the splitter int start = reader.Window.IndexOf(splitter); + //guard if (start == -1) { break; } + //Get data before splitter and trim it data = reader.Window[..start].Trim(); + //If its not empty, then add it to the list if (!data.IsEmpty) { splitCb(data, state); } - //shift window + reader.Advance(start + splitter.Length); + } while (true); + //Trim remaining data = reader.Window.Trim(); + //Add if not empty if (!data.IsEmpty) { @@ -238,13 +266,14 @@ namespace VNLib.Utils.Extensions /// The action to invoke when a split segment has been found /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Split(this in ReadOnlySpan value, char splitter, StringSplitOptions options, ReadOnlySpanAction splitCb, T state) + public static void Split(this ReadOnlySpan value, char splitter, StringSplitOptions options, ReadOnlySpanAction splitCb, T state) { //Alloc a span for char ReadOnlySpan cs = MemoryMarshal.CreateReadOnlySpan(ref splitter, 1); //Call the split function on the span - Split(in value, cs, options, splitCb, state); + Split(value, cs, options, splitCb, state); } + /// /// Split a based on split value and pass it to the split delegate handler /// @@ -254,13 +283,14 @@ namespace VNLib.Utils.Extensions /// The action to invoke when a split segment has been found /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Split(this in ReadOnlySpan value, ReadOnlySpan splitter, StringSplitOptions options, StatelessSpanAction splitCb) + public static void Split(this ReadOnlySpan value, ReadOnlySpan splitter, StringSplitOptions options, StatelessSpanAction splitCb) { //Create a SpanSplitDelegate with the non-typed delegate as the state argument static void ssplitcb(ReadOnlySpan param, StatelessSpanAction callback) => callback(param); //Call split with the new callback delegate - Split(in value, splitter, options, ssplitcb, splitCb); + Split(value, splitter, options, ssplitcb, splitCb); } + /// /// Split a based on split value and pass it to the split delegate handler /// @@ -270,12 +300,12 @@ namespace VNLib.Utils.Extensions /// The action to invoke when a split segment has been found /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static void Split(this in ReadOnlySpan value, char splitter, StringSplitOptions options, StatelessSpanAction splitCb) + public static void Split(this ReadOnlySpan value, char splitter, StringSplitOptions options, StatelessSpanAction splitCb) { //Create a SpanSplitDelegate with the non-typed delegate as the state argument static void ssplitcb(ReadOnlySpan param, StatelessSpanAction callback) => callback(param); //Call split with the new callback delegate - Split(in value, splitter, options, ssplitcb, splitCb); + Split(value, splitter, options, ssplitcb, splitCb); } /// @@ -285,11 +315,12 @@ namespace VNLib.Utils.Extensions /// Sequence to search for within the current sequence /// the index of the end of the sequenc [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int EndOf(this in ReadOnlySpan data, ReadOnlySpan search) + public static int EndOf(this ReadOnlySpan data, ReadOnlySpan search) { int index = data.IndexOf(search); return index > -1 ? index + search.Length : -1; } + /// /// Gets the index of the end of the found character /// @@ -297,15 +328,18 @@ namespace VNLib.Utils.Extensions /// Character to search for within the current sequence /// the index of the end of the sequence [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int EndOf(this in ReadOnlySpan data, char search) + public static int EndOf(this ReadOnlySpan data, char search) { int index = data.IndexOf(search); return index > -1 ? index + 1 : -1; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int IndexOf(this in Memory data, byte search) => data.Span.IndexOf(search); + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int IndexOf(this in Memory data, ReadOnlySpan search) => data.Span.IndexOf(search); + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int IndexOf(this in Memory data, ReadOnlyMemory search) => IndexOf(data, search.Span); @@ -317,13 +351,14 @@ namespace VNLib.Utils.Extensions /// The delimiting character /// The segment of data before the search character, or the entire segment if not found [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static ReadOnlySpan SliceBeforeParam(this in ReadOnlySpan data, char search) + public static ReadOnlySpan SliceBeforeParam(this ReadOnlySpan data, char search) { //Find the index of the specified data int index = data.IndexOf(search); //Return the slice of data before the index, or an empty span if it was not found return index > -1 ? data[..index] : data; } + /// /// Slices the current span from the begining of the segment to the first occurrance of the specified character sequence. /// If the character sequence is not found, the entire segment is returned @@ -332,13 +367,14 @@ namespace VNLib.Utils.Extensions /// The delimiting character sequence /// The segment of data before the search character, or the entire if the seach sequence is not found [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static ReadOnlySpan SliceBeforeParam(this in ReadOnlySpan data, ReadOnlySpan search) + public static ReadOnlySpan SliceBeforeParam(this ReadOnlySpan data, ReadOnlySpan search) { //Find the index of the specified data int index = data.IndexOf(search); //Return the slice of data before the index, or an empty span if it was not found return index > -1 ? data[..index] : data; } + /// /// Gets the remaining segment of data after the specified search character or /// if the search character is not found within the current segment @@ -347,13 +383,15 @@ namespace VNLib.Utils.Extensions /// The character to search for within the segment /// The segment of data after the search character or if not found [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static ReadOnlySpan SliceAfterParam(this in ReadOnlySpan data, char search) + public static ReadOnlySpan SliceAfterParam(this ReadOnlySpan data, char search) { //Find the index of the specified data - int index = EndOf(in data, search); + int index = EndOf(data, search); + //Return the slice of data after the index, or an empty span if it was not found return index > -1 ? data[index..] : ReadOnlySpan.Empty; } + /// /// Gets the remaining segment of data after the specified search sequence or /// if the search sequence is not found within the current segment @@ -362,42 +400,53 @@ namespace VNLib.Utils.Extensions /// The sequence to search for within the segment /// The segment of data after the search sequence or if not found [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static ReadOnlySpan SliceAfterParam(this in ReadOnlySpan data, ReadOnlySpan search) + public static ReadOnlySpan SliceAfterParam(this ReadOnlySpan data, ReadOnlySpan search) { //Find the index of the specified data int index = EndOf(data, search); + //Return the slice of data after the index, or an empty span if it was not found return index > -1 ? data[index..] : ReadOnlySpan.Empty; } + /// /// Trims any leading or trailing '\r'|'\n'|' '(whitespace) characters from the segment /// /// The trimmed segment [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static ReadOnlySpan TrimCRLF(this in ReadOnlySpan data) + public static ReadOnlySpan TrimCRLF(this ReadOnlySpan data) { int start = 0, end = data.Length; + //trim leading \r\n chars while(start < end) { char t = data[start]; + //If character \r or \n slice it off - if (t != '\r' && t != '\n' && t != ' ') { + if (t != '\r' && t != '\n' && t != ' ') + { break; } + //Shift start++; } + //remove trailing crlf characters while (end > start) { char t = data[end - 1]; + //If character \r or \n slice it off - if (t != '\r' && t != '\n' && t != ' ') { + if (t != '\r' && t != '\n' && t != ' ') + { break; } + end--; } + return data[start..end]; } @@ -408,7 +457,7 @@ namespace VNLib.Utils.Extensions /// The sequence to search for /// The sequence to write in the place of the search parameter /// - public static int Replace(this ref Span buffer, ReadOnlySpan search, ReadOnlySpan replace) + public static int Replace(this Span buffer, ReadOnlySpan search, ReadOnlySpan replace) { ForwardOnlyWriter writer = new (buffer); writer.Replace(search, replace); @@ -425,20 +474,25 @@ namespace VNLib.Utils.Extensions public static void Replace(this ref ForwardOnlyWriter writer, ReadOnlySpan search, ReadOnlySpan replace) { Span buffer = writer.AsSpan(); + //If the search and replacment parameters are the same length if (search.Length == replace.Length) { buffer.ReplaceInPlace(search, replace); return; } + //Search and replace are not the same length int searchLen = search.Length, start = buffer.IndexOf(search); + if(start == -1) { return; } + //Replacment might be empty writer.Reset(); + do { //Append the data before the split character @@ -449,10 +503,13 @@ namespace VNLib.Utils.Extensions buffer = buffer[(start + searchLen)..]; //search for next index start = buffer.IndexOf(search); + } while (start > -1); + //Write remaining data writer.Append(replace); } + /// /// Replaces very ocurrance of character sequence within a buffer with another sequence of the same length /// @@ -466,13 +523,17 @@ namespace VNLib.Utils.Extensions { throw new ArgumentException("Search parameter and replacment parameter must be the same length"); } + int start = buffer.IndexOf(search); + while(start > -1) { //Shift the buffer to the begining of the search parameter buffer = buffer[start..]; + //Overwite the search parameter replace.CopyTo(buffer); + //Search for next index of the search character start = buffer.IndexOf(search); } diff --git a/lib/Utils/src/IO/VnTextReaderExtensions.cs b/lib/Utils/src/IO/VnTextReaderExtensions.cs index 119461b..9ca5ae5 100644 --- a/lib/Utils/src/IO/VnTextReaderExtensions.cs +++ b/lib/Utils/src/IO/VnTextReaderExtensions.cs @@ -1,5 +1,5 @@ /* -* Copyright (c) 2022 Vaughn Nugent +* Copyright (c) 2023 Vaughn Nugent * * Library: VNLib * Package: VNLib.Utils @@ -52,8 +52,9 @@ namespace VNLib.Utils.IO /// Allows reading lines of data from the stream without allocations public static ERRNO ReadLine(this ref T reader, Span charBuffer) where T:struct, IVnTextReader { - return readLine(ref reader, charBuffer); + return ReadLineInternal(ref reader, charBuffer); } + /// /// Attempts to read a line from the stream and store it in the specified buffer /// @@ -65,7 +66,7 @@ namespace VNLib.Utils.IO /// Allows reading lines of data from the stream without allocations public static ERRNO ReadLine(this T reader, Span charBuffer) where T : class, IVnTextReader { - return readLine(ref reader, charBuffer); + return ReadLineInternal(ref reader, charBuffer); } /// @@ -80,6 +81,7 @@ namespace VNLib.Utils.IO { return reader.ReadRemaining(buffer.AsSpan(offset, count)); } + /// /// Fill a buffer with reamining buffered data /// @@ -103,8 +105,9 @@ namespace VNLib.Utils.IO /// You should use the property to know how much remaining data is buffered public static int ReadRemaining(this ref T reader, Span buffer) where T : struct, IVnTextReader { - return readRemaining(ref reader, buffer); + return ReadRemainingInternal(ref reader, buffer); } + /// /// Fill a buffer with reamining buffered data, up to /// the size of the supplied buffer @@ -115,10 +118,10 @@ namespace VNLib.Utils.IO /// You should use the property to know how much remaining data is buffered public static int ReadRemaining(this T reader, Span buffer) where T : class, IVnTextReader { - return readRemaining(ref reader, buffer); + return ReadRemainingInternal(ref reader, buffer); } - private static ERRNO readLine(ref T reader, Span chars) where T: IVnTextReader + private static ERRNO ReadLineInternal(ref T reader, Span chars) where T: IVnTextReader { /* * I am aware of a potential bug, the line decoding process @@ -129,68 +132,32 @@ namespace VNLib.Utils.IO * I dont expect this to be an issue unless there is a bug within the specified * encoder implementation */ - ReadOnlySpan LineTermination = reader.LineTermination.Span; + + int result = 0; + //If buffered data is available, check for line termination - if (reader.Available > 0) + if (reader.Available > 0 && TryReadLine(ref reader, chars, ref result)) { - //Get current buffer window - ReadOnlySpan bytes = reader.BufferedDataWindow; - //search for line termination in current buffer - int term = bytes.IndexOf(LineTermination); - //Termination found in buffer window - if (term > -1) - { - //Capture the line from the begining of the window to the termination - ReadOnlySpan line = bytes[..term]; - //Get the number ot chars - int charCount = reader.Encoding.GetCharCount(line); - //See if the buffer is large enough - if (bytes.Length < charCount) - { - return E_BUFFER_TOO_SMALL; - } - //Use the decoder to convert the data - _ = reader.Encoding.GetChars(line, chars); - //Shift the window to the end of the line (excluding the termination, regardless of the conversion result) - reader.Advance(term + LineTermination.Length); - //Return the number of characters - return charCount; - } - //Termination not found but there may be more data waiting + return result; } + //Compact the buffer window and make sure it was compacted so there is room to fill the buffer - if (reader.CompactBufferWindow()) + if (reader.CompactBufferWindow() > 0) { //There is room, so buffer more data reader.FillBuffer(); + //Check again to see if more data is buffered if (reader.Available <= 0) { //No data avialable return 0; } - //Get current buffer window - ReadOnlySpan bytes = reader.BufferedDataWindow; - //search for line termination in current buffer - int term = bytes.IndexOf(LineTermination); - //Termination found in buffer window - if (term > -1) + + //Try to read the line again after refill + if (TryReadLine(ref reader, chars, ref result)) { - //Capture the line from the begining of the window to the termination - ReadOnlySpan line = bytes[..term]; - //Get the number ot chars - int charCount = reader.Encoding.GetCharCount(line); - //See if the buffer is large enough - if (bytes.Length < charCount) - { - return E_BUFFER_TOO_SMALL; - } - //Use the decoder to convert the data - _ = reader.Encoding.GetChars(line, chars); - //Shift the window to the end of the line (excluding the termination, regardless of the conversion result) - reader.Advance(term + LineTermination.Length); - //Return the number of characters - return charCount; + return result; } } @@ -201,20 +168,63 @@ namespace VNLib.Utils.IO throw new OutOfMemoryException("The line was not found within the current buffer, cannot continue"); #pragma warning restore CA2201 // Do not raise reserved exception types } + + private static bool TryReadLine(ref T reader, Span chars, ref int result) where T: IVnTextReader + { + ReadOnlySpan LineTermination = reader.LineTermination.Span; + + //Get current buffer window + ReadOnlySpan bytes = reader.BufferedDataWindow; + + //search for line termination in current buffer + int term = bytes.IndexOf(LineTermination); + + //Termination found in buffer window + if (term > -1) + { + //Capture the line from the begining of the window to the termination + ReadOnlySpan line = bytes[..term]; + + //Get the number ot chars + result = reader.Encoding.GetCharCount(line); + + //See if the buffer is large enough + if (bytes.Length < result) + { + result = E_BUFFER_TOO_SMALL; + return true; + } + + //Use the decoder to convert the data + _ = reader.Encoding.GetChars(line, chars); + + //Shift the window to the end of the line (excluding the termination, regardless of the conversion result) + reader.Advance(term + LineTermination.Length); + + //Return the number of characters + return true; + } + + return false; + } - private static int readRemaining(ref T reader, Span buffer) where T: IVnTextReader + private static int ReadRemainingInternal(ref T reader, Span buffer) where T: IVnTextReader { //guard for empty buffer if (buffer.Length == 0 || reader.Available == 0) { return 0; } + //get the remaining bytes in the reader Span remaining = reader.BufferedDataWindow; + //Calculate the number of bytes to copy int canCopy = Math.Min(remaining.Length, buffer.Length); + //Copy remaining bytes to buffer remaining[..canCopy].CopyTo(buffer); + //Shift the window by the number of bytes copied reader.Advance(canCopy); return canCopy; diff --git a/lib/Utils/src/VnEncoding.cs b/lib/Utils/src/VnEncoding.cs index 9a50a50..b8f18bd 100644 --- a/lib/Utils/src/VnEncoding.cs +++ b/lib/Utils/src/VnEncoding.cs @@ -45,6 +45,7 @@ namespace VNLib.Utils /// public static class VnEncoding { + /// /// Encodes a with the specified to a that must be disposed by the user /// @@ -90,6 +91,7 @@ namespace VNLib.Utils //Return default if null return data == null || data.Length == 0 ? ValueTask.FromResult(default) : JsonSerializer.DeserializeAsync(data, options, cancellationToken); } + /// /// Attempts to deserialze a json object from a stream of UTF8 data /// @@ -105,6 +107,7 @@ namespace VNLib.Utils //Return default if null return data == null || data.Length == 0 ? ValueTask.FromResult(default) : JsonSerializer.DeserializeAsync(data, type, options, cancellationToken); } + /// /// Attempts to serialize the object to json and write the encoded data to the stream /// @@ -538,6 +541,8 @@ namespace VNLib.Utils #region percent encoding + private const int MAX_STACKALLOC = 1024; + private static readonly ReadOnlyMemory HexToUtf8Pos = new byte[16] { 0x30, //0 @@ -572,9 +577,10 @@ namespace VNLib.Utils * For every illegal character, the percent encoding adds 3 bytes of * entropy. So a single byte will be replaced by 3, so adding * 2 bytes for every illegal character plus the length of the - * intial buffer, we get the size of the buffer needed to + * intial buffer, we get the exact size of the buffer needed to * percent encode. */ + int count = 0, len = utf8Bytes.Length; fixed (byte* utfBase = &MemoryMarshal.GetReference(utf8Bytes)) { @@ -658,29 +664,29 @@ namespace VNLib.Utils { int outPos = 0, len = utf8Encoded.Length; ReadOnlySpan lookupTable = HexToUtf8Pos.Span; - + for (int i = 0; i < len; i++) { byte value = utf8Encoded[i]; //Begining of percent encoding character - if(value == 0x25) + if (value == 0x25) { //Calculate the base16 multiplier from the upper half of the int multiplier = lookupTable.IndexOf(utf8Encoded[i + 1]); - + //get the base16 lower half to add int lower = lookupTable.IndexOf(utf8Encoded[i + 2]); - + //Check format - if(multiplier < 0 || lower < 0) + if (multiplier < 0 || lower < 0) { throw new FormatException($"Encoded buffer contains invalid hexadecimal characters following the % character at position {i}"); } - + //Calculate the new value, shift multiplier to the upper 4 bits, then mask + or the lower 4 bits value = (byte)(((byte)(multiplier << 4)) | ((byte)lower & 0x0f)); - + //Advance the encoded index by the two consumed chars i += 2; } @@ -690,6 +696,56 @@ namespace VNLib.Utils return outPos; } + /// + /// Encodes the utf8 encoded character buffer to its percent/hex encoded utf8 + /// character representation and returns the encoded string + /// + /// The bytes to encode + /// A collection of allowed characters that will not be encoded + /// The percent encoded string + /// + public static string PercentEncode(ReadOnlySpan utf8Bytes, ReadOnlySpan allowedChars = default) + { + /* + * I cannot avoid the allocation of a binary buffer without doing some sketchy + * byte -> char cast on the string.create method. Which would also require object + * allocation for state data, and since spans are used, we cannot cross that + * callback boundry anyway. + */ + + int bufferSize = PercentEncodeCalcBufferSize(utf8Bytes, allowedChars); + + //use stackalloc if the buffer is small enough + if (bufferSize <= MAX_STACKALLOC) + { + //stack alloc output buffer + Span output = stackalloc byte[bufferSize]; + + ERRNO encoded = PercentEncode(utf8Bytes, output, allowedChars); + + if(encoded <= 0) + { + throw new FormatException("Failed to percent encode the input data"); + } + + return Encoding.UTF8.GetString(output); + } + else + { + //Alloc heap buffer + using UnsafeMemoryHandle handle = MemoryUtil.UnsafeAllocNearestPage(bufferSize); + + ERRNO encoded = PercentEncode(utf8Bytes, handle.Span, allowedChars); + + if (encoded <= 0) + { + throw new FormatException("Failed to percent encode the input data"); + } + + return Encoding.UTF8.GetString(handle.AsSpan(0, encoded)); + } + } + #endregion #region Base64 @@ -767,6 +823,7 @@ namespace VNLib.Utils } } } + /// /// Converts a base64url encoded utf8 encoded binary buffer to /// its base64 encoded version diff --git a/lib/Utils/tests/VnEncodingTests.cs b/lib/Utils/tests/VnEncodingTests.cs index f1ef5f4..f2b5e85 100644 --- a/lib/Utils/tests/VnEncodingTests.cs +++ b/lib/Utils/tests/VnEncodingTests.cs @@ -30,6 +30,7 @@ using System.Buffers.Text; using System.Security.Cryptography; using Microsoft.VisualStudio.TestTools.UnitTesting; +using System.Diagnostics; namespace VNLib.Utils.Tests { @@ -92,6 +93,61 @@ namespace VNLib.Utils.Tests } - + [TestMethod()] + public void PercentEncodeTest() + { + const string urlEnoded = "https%3A%2F%2Fwww.google.com%2Fsearch%3Fq%3Dtest%26oq%3Dtest%26aqs%3Dchrome..69i57j0l7.1001j0j7%26sourceid%3Dchrome%26ie%3DUTF-8"; + const string urlDecoded = "https://www.google.com/search?q=test&oq=test&aqs=chrome..69i57j0l7.1001j0j7&sourceid=chrome&ie=UTF-8"; + + //We need to allow the '.' character to be encoded + ReadOnlySpan allowedChars = Encoding.UTF8.GetBytes("."); + + + /* + * Test that the url encoded string is the same as the percent encoded string + */ + + ReadOnlySpan utf8Encoded = Encoding.UTF8.GetBytes(urlDecoded); + + string percentEncoded = VnEncoding.PercentEncode(utf8Encoded, allowedChars); + + Assert.IsTrue(percentEncoded.Equals(urlEnoded, StringComparison.Ordinal)); + + /* + * Test decoding the percent encoded string + */ + + ReadOnlySpan percentEncodedUtf8 = Encoding.UTF8.GetBytes(urlEnoded); + + byte[] outBuffer = new byte[percentEncodedUtf8.Length]; + + ERRNO decoded = VnEncoding.PercentDecode(percentEncodedUtf8, outBuffer); + + //Make sure result is valid + Debug.Assert(decoded > 0); + + string decodedString = Encoding.UTF8.GetString(outBuffer, 0, decoded); + + Assert.IsTrue(decodedString.Equals(urlDecoded, StringComparison.Ordinal)); + } + + [TestMethod()] + public void Base32BasicEncodeDecodeTest() + { + const string base32Encoded = "JBSWY3DPEBLW64TMMQQQ===="; + const string base32Decoded = "Hello World!"; + byte[] rawBytes = Encoding.UTF8.GetBytes(base32Decoded); + + //Recover bytes from base32 encoded string + byte[]? fromString = VnEncoding.FromBase32String(base32Encoded); + Assert.IsNotNull(fromString); + + //Test that the decoded bytes are the same as the raw bytes + Assert.IsTrue(rawBytes.SequenceEqual(fromString)); + + //Test that the encoded string is the same as the base32 encoded string + string toString = VnEncoding.ToBase32String(rawBytes, true); + Assert.IsTrue(toString.Equals(base32Encoded, StringComparison.Ordinal)); + } } } \ No newline at end of file -- cgit