aboutsummaryrefslogtreecommitdiff
path: root/lib/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Utils')
-rw-r--r--lib/Utils/src/BitField.cs14
-rw-r--r--lib/Utils/src/Extensions/StringExtensions.cs117
-rw-r--r--lib/Utils/src/IO/VnTextReaderExtensions.cs120
-rw-r--r--lib/Utils/src/VnEncoding.cs73
-rw-r--r--lib/Utils/tests/VnEncodingTests.cs58
5 files changed, 288 insertions, 94 deletions
diff --git a/lib/Utils/src/BitField.cs b/lib/Utils/src/BitField.cs
index bc001df..8bdac4f 100644
--- a/lib/Utils/src/BitField.cs
+++ b/lib/Utils/src/BitField.cs
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2022 Vaughn Nugent
+* Copyright (c) 2023 Vaughn Nugent
*
* Library: VNLib
* Package: VNLib.Utils
@@ -22,7 +22,6 @@
* along with VNLib.Utils. If not, see http://www.gnu.org/licenses/.
*/
-using System;
using System.Runtime.CompilerServices;
namespace VNLib.Utils
@@ -33,22 +32,26 @@ namespace VNLib.Utils
public class BitField
{
private ulong Field;
+
/// <summary>
/// The readonly value of the <see cref="BitField"/>
/// </summary>
public ulong Value => Field;
+
/// <summary>
/// Creates a new <see cref="BitField"/> initialized to the specified value
/// </summary>
/// <param name="initial">Initial value</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public BitField(ulong initial) => Field = initial;
+
/// <summary>
/// Creates a new <see cref="BitField"/> initialized to the specified value
/// </summary>
/// <param name="initial">Initial value</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public BitField(long initial) => Field = unchecked((ulong)initial);
+
/// <summary>
/// Determines if the specified flag is set
/// </summary>
@@ -56,6 +59,7 @@ namespace VNLib.Utils
/// <returns>True if the flag(s) is currently set, false if flag is not set</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool IsSet(ulong mask) => (Field & mask) != 0;
+
/// <summary>
/// Determines if the specified flag is set
/// </summary>
@@ -63,6 +67,7 @@ namespace VNLib.Utils
/// <returns>True if the flag(s) is currently set, false if flag is not set</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool IsSet(long mask) => (Field & unchecked((ulong)mask)) != 0;
+
/// <summary>
/// Determines if the specified flag is set
/// </summary>
@@ -70,6 +75,7 @@ namespace VNLib.Utils
/// <returns>True if the flag(s) is currently set, false if flag is not set</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Set(ulong mask) => Field |= mask;
+
/// <summary>
/// Determines if the specified flag is set
/// </summary>
@@ -77,6 +83,7 @@ namespace VNLib.Utils
/// <returns>True if the flag(s) is currently set, false if flag is not set</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Set(long mask) => Field |= unchecked((ulong)mask);
+
/// <summary>
/// Sets or clears a flag(s) indentified by a mask based on the value
/// </summary>
@@ -94,18 +101,21 @@ namespace VNLib.Utils
Clear(mask);
}
}
+
/// <summary>
/// Clears the flag identified by the specified mask
/// </summary>
/// <param name="mask">The mask used to clear the given flag</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Clear(ulong mask) => Field &= ~mask;
+
/// <summary>
/// Clears the flag identified by the specified mask
/// </summary>
/// <param name="mask">The mask used to clear the given flag</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Clear(long mask) => Field &= ~unchecked((ulong)mask);
+
/// <summary>
/// Clears all flags by setting the <see cref="Field"/> property value to 0
/// </summary>
diff --git a/lib/Utils/src/Extensions/StringExtensions.cs b/lib/Utils/src/Extensions/StringExtensions.cs
index 09d6517..f211b73 100644
--- a/lib/Utils/src/Extensions/StringExtensions.cs
+++ b/lib/Utils/src/Extensions/StringExtensions.cs
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2022 Vaughn Nugent
+* Copyright (c) 2023 Vaughn Nugent
*
* Library: VNLib
* Package: VNLib.Utils
@@ -32,6 +32,10 @@ using VNLib.Utils.Memory;
namespace VNLib.Utils.Extensions
{
+ /// <summary>
+ /// Delegate for a stateless span action
+ /// </summary>
+ /// <param name="line">The line of data to process</param>
public delegate void StatelessSpanAction(ReadOnlySpan<char> line);
/// <summary>
@@ -51,6 +55,7 @@ namespace VNLib.Utils.Extensions
{
Split(value, splitter.AsSpan(), output, options);
}
+
/// <summary>
/// Split a string based on split value and insert into the specified list
/// </summary>
@@ -66,6 +71,7 @@ namespace VNLib.Utils.Extensions
//Call the split function on the span
Split(value, cs, output, options);
}
+
/// <summary>
/// Split a string based on split value and insert into the specified list
/// </summary>
@@ -79,6 +85,7 @@ namespace VNLib.Utils.Extensions
{
Split(value.AsSpan(), splitter, output, options);
}
+
/// <summary>
/// Split a string based on split value and insert into the specified list
/// </summary>
@@ -88,13 +95,14 @@ namespace VNLib.Utils.Extensions
/// <param name="options">String split options</param>
/// <exception cref="ArgumentNullException"></exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static void Split<T>(this in ReadOnlySpan<char> value, char splitter, T output, StringSplitOptions options) where T : ICollection<string>
+ public static void Split<T>(this ReadOnlySpan<char> value, char splitter, T output, StringSplitOptions options) where T : ICollection<string>
{
//Create span from char pointer
ReadOnlySpan<char> cs = MemoryMarshal.CreateReadOnlySpan(ref splitter, 1);
//Call the split function on the span
- Split(in value, cs, output, options);
+ Split(value, cs, output, options);
}
+
/// <summary>
/// Split a <see cref="ReadOnlySpan{T}"/> based on split value and insert into the specified list
/// </summary>
@@ -104,13 +112,15 @@ namespace VNLib.Utils.Extensions
/// <param name="options">String split options</param>
/// <exception cref="ArgumentNullException"></exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static void Split<T>(this in ReadOnlySpan<char> value, ReadOnlySpan<char> splitter, T output, StringSplitOptions options) where T : ICollection<string>
+ public static void Split<T>(this ReadOnlySpan<char> value, ReadOnlySpan<char> splitter, T output, StringSplitOptions options) where T : ICollection<string>
{
//Create a local function that adds the split strings to the list
static void SplitFound(ReadOnlySpan<char> split, T output) => output.Add(split.ToString());
+
//Invoke the split function with the local callback method
- Split(in value, splitter, options, SplitFound, output);
+ Split(value, splitter, options, SplitFound, output);
}
+
/// <summary>
/// Split a <see cref="ReadOnlySpan{T}"/> based on split value and pass it to the split delegate handler
/// </summary>
@@ -120,11 +130,13 @@ namespace VNLib.Utils.Extensions
/// <param name="splitCb">The action to invoke when a split segment has been found</param>
/// <param name="state">The state to pass to the callback handler</param>
/// <exception cref="ArgumentNullException"></exception>
- public static void Split<T>(this in ReadOnlySpan<char> value, ReadOnlySpan<char> splitter, StringSplitOptions options, ReadOnlySpanAction<char, T> splitCb, T state)
+ public static void Split<T>(this ReadOnlySpan<char> value, ReadOnlySpan<char> splitter, StringSplitOptions options, ReadOnlySpanAction<char, T> splitCb, T state)
{
_ = splitCb ?? throw new ArgumentNullException(nameof(splitCb));
+
//Get span over string
ForwardOnlyReader<char> reader = new(value);
+
//No string options
if (options == 0)
{
@@ -132,41 +144,49 @@ namespace VNLib.Utils.Extensions
{
//Find index of the splitter
int start = reader.Window.IndexOf(splitter);
+
//guard
if (start == -1)
{
break;
}
+
//Trim and add it regardless of length
splitCb(reader.Window[..start], state);
+
//shift window
reader.Advance(start + splitter.Length);
} while (true);
+
//Trim remaining and add it regardless of length
splitCb(reader.Window, state);
}
//Trim but do not remove empties
- else if ((options & StringSplitOptions.RemoveEmptyEntries) == 0)
+ else if ((options & StringSplitOptions.TrimEntries) == StringSplitOptions.TrimEntries)
{
do
{
//Find index of the splitter
int start = reader.Window.IndexOf(splitter);
+
//guard
if (start == -1)
{
break;
}
+
//Trim and add it regardless of length
splitCb(reader.Window[..start].Trim(), state);
+
//shift window
reader.Advance(start + splitter.Length);
} while (true);
+
//Trim remaining and add it regardless of length
splitCb(reader.Window.Trim(), state);
}
//Remove empty entires but do not trim them
- else if ((options & StringSplitOptions.TrimEntries) == 0)
+ else if ((options & StringSplitOptions.RemoveEmptyEntries) == StringSplitOptions.RemoveEmptyEntries)
{
//Get data before splitter and trim it
ReadOnlySpan<char> data;
@@ -186,9 +206,11 @@ namespace VNLib.Utils.Extensions
{
splitCb(data, state);
}
- //shift window
- reader.Advance(start + splitter.Length);
+
+ reader.Advance(start + splitter.Length);
+
} while (true);
+
//Add if not empty
if (reader.WindowSize > 0)
{
@@ -204,23 +226,29 @@ namespace VNLib.Utils.Extensions
{
//Find index of the splitter
int start = reader.Window.IndexOf(splitter);
+
//guard
if (start == -1)
{
break;
}
+
//Get data before splitter and trim it
data = reader.Window[..start].Trim();
+
//If its not empty, then add it to the list
if (!data.IsEmpty)
{
splitCb(data, state);
}
- //shift window
+
reader.Advance(start + splitter.Length);
+
} while (true);
+
//Trim remaining
data = reader.Window.Trim();
+
//Add if not empty
if (!data.IsEmpty)
{
@@ -238,13 +266,14 @@ namespace VNLib.Utils.Extensions
/// <param name="splitCb">The action to invoke when a split segment has been found</param>
/// <param name="state"></param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static void Split<T>(this in ReadOnlySpan<char> value, char splitter, StringSplitOptions options, ReadOnlySpanAction<char, T> splitCb, T state)
+ public static void Split<T>(this ReadOnlySpan<char> value, char splitter, StringSplitOptions options, ReadOnlySpanAction<char, T> splitCb, T state)
{
//Alloc a span for char
ReadOnlySpan<char> cs = MemoryMarshal.CreateReadOnlySpan(ref splitter, 1);
//Call the split function on the span
- Split(in value, cs, options, splitCb, state);
+ Split(value, cs, options, splitCb, state);
}
+
/// <summary>
/// Split a <see cref="ReadOnlySpan{T}"/> based on split value and pass it to the split delegate handler
/// </summary>
@@ -254,13 +283,14 @@ namespace VNLib.Utils.Extensions
/// <param name="splitCb">The action to invoke when a split segment has been found</param>
/// <exception cref="ArgumentNullException"></exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static void Split(this in ReadOnlySpan<char> value, ReadOnlySpan<char> splitter, StringSplitOptions options, StatelessSpanAction splitCb)
+ public static void Split(this ReadOnlySpan<char> value, ReadOnlySpan<char> splitter, StringSplitOptions options, StatelessSpanAction splitCb)
{
//Create a SpanSplitDelegate with the non-typed delegate as the state argument
static void ssplitcb(ReadOnlySpan<char> param, StatelessSpanAction callback) => callback(param);
//Call split with the new callback delegate
- Split(in value, splitter, options, ssplitcb, splitCb);
+ Split(value, splitter, options, ssplitcb, splitCb);
}
+
/// <summary>
/// Split a <see cref="ReadOnlySpan{T}"/> based on split value and pass it to the split delegate handler
/// </summary>
@@ -270,12 +300,12 @@ namespace VNLib.Utils.Extensions
/// <param name="splitCb">The action to invoke when a split segment has been found</param>
/// <exception cref="ArgumentNullException"></exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static void Split(this in ReadOnlySpan<char> value, char splitter, StringSplitOptions options, StatelessSpanAction splitCb)
+ public static void Split(this ReadOnlySpan<char> value, char splitter, StringSplitOptions options, StatelessSpanAction splitCb)
{
//Create a SpanSplitDelegate with the non-typed delegate as the state argument
static void ssplitcb(ReadOnlySpan<char> param, StatelessSpanAction callback) => callback(param);
//Call split with the new callback delegate
- Split(in value, splitter, options, ssplitcb, splitCb);
+ Split(value, splitter, options, ssplitcb, splitCb);
}
/// <summary>
@@ -285,11 +315,12 @@ namespace VNLib.Utils.Extensions
/// <param name="search">Sequence to search for within the current sequence</param>
/// <returns>the index of the end of the sequenc</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static int EndOf(this in ReadOnlySpan<char> data, ReadOnlySpan<char> search)
+ public static int EndOf(this ReadOnlySpan<char> data, ReadOnlySpan<char> search)
{
int index = data.IndexOf(search);
return index > -1 ? index + search.Length : -1;
}
+
/// <summary>
/// Gets the index of the end of the found character
/// </summary>
@@ -297,15 +328,18 @@ namespace VNLib.Utils.Extensions
/// <param name="search">Character to search for within the current sequence</param>
/// <returns>the index of the end of the sequence</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static int EndOf(this in ReadOnlySpan<char> data, char search)
+ public static int EndOf(this ReadOnlySpan<char> data, char search)
{
int index = data.IndexOf(search);
return index > -1 ? index + 1 : -1;
}
+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int IndexOf(this in Memory<byte> data, byte search) => data.Span.IndexOf(search);
+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int IndexOf(this in Memory<byte> data, ReadOnlySpan<byte> search) => data.Span.IndexOf(search);
+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int IndexOf(this in Memory<byte> data, ReadOnlyMemory<byte> search) => IndexOf(data, search.Span);
@@ -317,13 +351,14 @@ namespace VNLib.Utils.Extensions
/// <param name="search">The delimiting character</param>
/// <returns>The segment of data before the search character, or the entire segment if not found</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static ReadOnlySpan<char> SliceBeforeParam(this in ReadOnlySpan<char> data, char search)
+ public static ReadOnlySpan<char> SliceBeforeParam(this ReadOnlySpan<char> data, char search)
{
//Find the index of the specified data
int index = data.IndexOf(search);
//Return the slice of data before the index, or an empty span if it was not found
return index > -1 ? data[..index] : data;
}
+
/// <summary>
/// Slices the current span from the begining of the segment to the first occurrance of the specified character sequence.
/// If the character sequence is not found, the entire segment is returned
@@ -332,13 +367,14 @@ namespace VNLib.Utils.Extensions
/// <param name="search">The delimiting character sequence</param>
/// <returns>The segment of data before the search character, or the entire <paramref name="data"/> if the seach sequence is not found</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static ReadOnlySpan<char> SliceBeforeParam(this in ReadOnlySpan<char> data, ReadOnlySpan<char> search)
+ public static ReadOnlySpan<char> SliceBeforeParam(this ReadOnlySpan<char> data, ReadOnlySpan<char> search)
{
//Find the index of the specified data
int index = data.IndexOf(search);
//Return the slice of data before the index, or an empty span if it was not found
return index > -1 ? data[..index] : data;
}
+
/// <summary>
/// Gets the remaining segment of data after the specified search character or <see cref="ReadOnlySpan{T}.Empty"/>
/// if the search character is not found within the current segment
@@ -347,13 +383,15 @@ namespace VNLib.Utils.Extensions
/// <param name="search">The character to search for within the segment</param>
/// <returns>The segment of data after the search character or <see cref="ReadOnlySpan{T}.Empty"/> if not found</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static ReadOnlySpan<char> SliceAfterParam(this in ReadOnlySpan<char> data, char search)
+ public static ReadOnlySpan<char> SliceAfterParam(this ReadOnlySpan<char> data, char search)
{
//Find the index of the specified data
- int index = EndOf(in data, search);
+ int index = EndOf(data, search);
+
//Return the slice of data after the index, or an empty span if it was not found
return index > -1 ? data[index..] : ReadOnlySpan<char>.Empty;
}
+
/// <summary>
/// Gets the remaining segment of data after the specified search sequence or <see cref="ReadOnlySpan{T}.Empty"/>
/// if the search sequence is not found within the current segment
@@ -362,42 +400,53 @@ namespace VNLib.Utils.Extensions
/// <param name="search">The sequence to search for within the segment</param>
/// <returns>The segment of data after the search sequence or <see cref="ReadOnlySpan{T}.Empty"/> if not found</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static ReadOnlySpan<char> SliceAfterParam(this in ReadOnlySpan<char> data, ReadOnlySpan<char> search)
+ public static ReadOnlySpan<char> SliceAfterParam(this ReadOnlySpan<char> data, ReadOnlySpan<char> search)
{
//Find the index of the specified data
int index = EndOf(data, search);
+
//Return the slice of data after the index, or an empty span if it was not found
return index > -1 ? data[index..] : ReadOnlySpan<char>.Empty;
}
+
/// <summary>
/// Trims any leading or trailing <c>'\r'|'\n'|' '</c>(whitespace) characters from the segment
/// </summary>
/// <returns>The trimmed segment</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static ReadOnlySpan<char> TrimCRLF(this in ReadOnlySpan<char> data)
+ public static ReadOnlySpan<char> TrimCRLF(this ReadOnlySpan<char> data)
{
int start = 0, end = data.Length;
+
//trim leading \r\n chars
while(start < end)
{
char t = data[start];
+
//If character \r or \n slice it off
- if (t != '\r' && t != '\n' && t != ' ') {
+ if (t != '\r' && t != '\n' && t != ' ')
+ {
break;
}
+
//Shift
start++;
}
+
//remove trailing crlf characters
while (end > start)
{
char t = data[end - 1];
+
//If character \r or \n slice it off
- if (t != '\r' && t != '\n' && t != ' ') {
+ if (t != '\r' && t != '\n' && t != ' ')
+ {
break;
}
+
end--;
}
+
return data[start..end];
}
@@ -408,7 +457,7 @@ namespace VNLib.Utils.Extensions
/// <param name="search">The sequence to search for</param>
/// <param name="replace">The sequence to write in the place of the search parameter</param>
/// <exception cref="OutOfMemoryException"></exception>
- public static int Replace(this ref Span<char> buffer, ReadOnlySpan<char> search, ReadOnlySpan<char> replace)
+ public static int Replace(this Span<char> buffer, ReadOnlySpan<char> search, ReadOnlySpan<char> replace)
{
ForwardOnlyWriter<char> writer = new (buffer);
writer.Replace(search, replace);
@@ -425,20 +474,25 @@ namespace VNLib.Utils.Extensions
public static void Replace(this ref ForwardOnlyWriter<char> writer, ReadOnlySpan<char> search, ReadOnlySpan<char> replace)
{
Span<char> buffer = writer.AsSpan();
+
//If the search and replacment parameters are the same length
if (search.Length == replace.Length)
{
buffer.ReplaceInPlace(search, replace);
return;
}
+
//Search and replace are not the same length
int searchLen = search.Length, start = buffer.IndexOf(search);
+
if(start == -1)
{
return;
}
+
//Replacment might be empty
writer.Reset();
+
do
{
//Append the data before the split character
@@ -449,10 +503,13 @@ namespace VNLib.Utils.Extensions
buffer = buffer[(start + searchLen)..];
//search for next index
start = buffer.IndexOf(search);
+
} while (start > -1);
+
//Write remaining data
writer.Append(replace);
}
+
/// <summary>
/// Replaces very ocurrance of character sequence within a buffer with another sequence of the same length
/// </summary>
@@ -466,13 +523,17 @@ namespace VNLib.Utils.Extensions
{
throw new ArgumentException("Search parameter and replacment parameter must be the same length");
}
+
int start = buffer.IndexOf(search);
+
while(start > -1)
{
//Shift the buffer to the begining of the search parameter
buffer = buffer[start..];
+
//Overwite the search parameter
replace.CopyTo(buffer);
+
//Search for next index of the search character
start = buffer.IndexOf(search);
}
diff --git a/lib/Utils/src/IO/VnTextReaderExtensions.cs b/lib/Utils/src/IO/VnTextReaderExtensions.cs
index 119461b..9ca5ae5 100644
--- a/lib/Utils/src/IO/VnTextReaderExtensions.cs
+++ b/lib/Utils/src/IO/VnTextReaderExtensions.cs
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2022 Vaughn Nugent
+* Copyright (c) 2023 Vaughn Nugent
*
* Library: VNLib
* Package: VNLib.Utils
@@ -52,8 +52,9 @@ namespace VNLib.Utils.IO
/// <remarks>Allows reading lines of data from the stream without allocations</remarks>
public static ERRNO ReadLine<T>(this ref T reader, Span<char> charBuffer) where T:struct, IVnTextReader
{
- return readLine(ref reader, charBuffer);
+ return ReadLineInternal(ref reader, charBuffer);
}
+
/// <summary>
/// Attempts to read a line from the stream and store it in the specified buffer
/// </summary>
@@ -65,7 +66,7 @@ namespace VNLib.Utils.IO
/// <remarks>Allows reading lines of data from the stream without allocations</remarks>
public static ERRNO ReadLine<T>(this T reader, Span<char> charBuffer) where T : class, IVnTextReader
{
- return readLine(ref reader, charBuffer);
+ return ReadLineInternal(ref reader, charBuffer);
}
/// <summary>
@@ -80,6 +81,7 @@ namespace VNLib.Utils.IO
{
return reader.ReadRemaining(buffer.AsSpan(offset, count));
}
+
/// <summary>
/// Fill a buffer with reamining buffered data
/// </summary>
@@ -103,8 +105,9 @@ namespace VNLib.Utils.IO
/// <remarks>You should use the <see cref="IVnTextReader.Available"/> property to know how much remaining data is buffered</remarks>
public static int ReadRemaining<T>(this ref T reader, Span<byte> buffer) where T : struct, IVnTextReader
{
- return readRemaining(ref reader, buffer);
+ return ReadRemainingInternal(ref reader, buffer);
}
+
/// <summary>
/// Fill a buffer with reamining buffered data, up to
/// the size of the supplied buffer
@@ -115,10 +118,10 @@ namespace VNLib.Utils.IO
/// <remarks>You should use the <see cref="IVnTextReader.Available"/> property to know how much remaining data is buffered</remarks>
public static int ReadRemaining<T>(this T reader, Span<byte> buffer) where T : class, IVnTextReader
{
- return readRemaining(ref reader, buffer);
+ return ReadRemainingInternal(ref reader, buffer);
}
- private static ERRNO readLine<T>(ref T reader, Span<char> chars) where T: IVnTextReader
+ private static ERRNO ReadLineInternal<T>(ref T reader, Span<char> chars) where T: IVnTextReader
{
/*
* I am aware of a potential bug, the line decoding process
@@ -129,68 +132,32 @@ namespace VNLib.Utils.IO
* I dont expect this to be an issue unless there is a bug within the specified
* encoder implementation
*/
- ReadOnlySpan<byte> LineTermination = reader.LineTermination.Span;
+
+ int result = 0;
+
//If buffered data is available, check for line termination
- if (reader.Available > 0)
+ if (reader.Available > 0 && TryReadLine(ref reader, chars, ref result))
{
- //Get current buffer window
- ReadOnlySpan<byte> bytes = reader.BufferedDataWindow;
- //search for line termination in current buffer
- int term = bytes.IndexOf(LineTermination);
- //Termination found in buffer window
- if (term > -1)
- {
- //Capture the line from the begining of the window to the termination
- ReadOnlySpan<byte> line = bytes[..term];
- //Get the number ot chars
- int charCount = reader.Encoding.GetCharCount(line);
- //See if the buffer is large enough
- if (bytes.Length < charCount)
- {
- return E_BUFFER_TOO_SMALL;
- }
- //Use the decoder to convert the data
- _ = reader.Encoding.GetChars(line, chars);
- //Shift the window to the end of the line (excluding the termination, regardless of the conversion result)
- reader.Advance(term + LineTermination.Length);
- //Return the number of characters
- return charCount;
- }
- //Termination not found but there may be more data waiting
+ return result;
}
+
//Compact the buffer window and make sure it was compacted so there is room to fill the buffer
- if (reader.CompactBufferWindow())
+ if (reader.CompactBufferWindow() > 0)
{
//There is room, so buffer more data
reader.FillBuffer();
+
//Check again to see if more data is buffered
if (reader.Available <= 0)
{
//No data avialable
return 0;
}
- //Get current buffer window
- ReadOnlySpan<byte> bytes = reader.BufferedDataWindow;
- //search for line termination in current buffer
- int term = bytes.IndexOf(LineTermination);
- //Termination found in buffer window
- if (term > -1)
+
+ //Try to read the line again after refill
+ if (TryReadLine(ref reader, chars, ref result))
{
- //Capture the line from the begining of the window to the termination
- ReadOnlySpan<byte> line = bytes[..term];
- //Get the number ot chars
- int charCount = reader.Encoding.GetCharCount(line);
- //See if the buffer is large enough
- if (bytes.Length < charCount)
- {
- return E_BUFFER_TOO_SMALL;
- }
- //Use the decoder to convert the data
- _ = reader.Encoding.GetChars(line, chars);
- //Shift the window to the end of the line (excluding the termination, regardless of the conversion result)
- reader.Advance(term + LineTermination.Length);
- //Return the number of characters
- return charCount;
+ return result;
}
}
@@ -201,20 +168,63 @@ namespace VNLib.Utils.IO
throw new OutOfMemoryException("The line was not found within the current buffer, cannot continue");
#pragma warning restore CA2201 // Do not raise reserved exception types
}
+
+ private static bool TryReadLine<T>(ref T reader, Span<char> chars, ref int result) where T: IVnTextReader
+ {
+ ReadOnlySpan<byte> LineTermination = reader.LineTermination.Span;
+
+ //Get current buffer window
+ ReadOnlySpan<byte> bytes = reader.BufferedDataWindow;
+
+ //search for line termination in current buffer
+ int term = bytes.IndexOf(LineTermination);
+
+ //Termination found in buffer window
+ if (term > -1)
+ {
+ //Capture the line from the begining of the window to the termination
+ ReadOnlySpan<byte> line = bytes[..term];
+
+ //Get the number ot chars
+ result = reader.Encoding.GetCharCount(line);
+
+ //See if the buffer is large enough
+ if (bytes.Length < result)
+ {
+ result = E_BUFFER_TOO_SMALL;
+ return true;
+ }
+
+ //Use the decoder to convert the data
+ _ = reader.Encoding.GetChars(line, chars);
+
+ //Shift the window to the end of the line (excluding the termination, regardless of the conversion result)
+ reader.Advance(term + LineTermination.Length);
+
+ //Return the number of characters
+ return true;
+ }
+
+ return false;
+ }
- private static int readRemaining<T>(ref T reader, Span<byte> buffer) where T: IVnTextReader
+ private static int ReadRemainingInternal<T>(ref T reader, Span<byte> buffer) where T: IVnTextReader
{
//guard for empty buffer
if (buffer.Length == 0 || reader.Available == 0)
{
return 0;
}
+
//get the remaining bytes in the reader
Span<byte> remaining = reader.BufferedDataWindow;
+
//Calculate the number of bytes to copy
int canCopy = Math.Min(remaining.Length, buffer.Length);
+
//Copy remaining bytes to buffer
remaining[..canCopy].CopyTo(buffer);
+
//Shift the window by the number of bytes copied
reader.Advance(canCopy);
return canCopy;
diff --git a/lib/Utils/src/VnEncoding.cs b/lib/Utils/src/VnEncoding.cs
index 9a50a50..b8f18bd 100644
--- a/lib/Utils/src/VnEncoding.cs
+++ b/lib/Utils/src/VnEncoding.cs
@@ -45,6 +45,7 @@ namespace VNLib.Utils
/// </summary>
public static class VnEncoding
{
+
/// <summary>
/// Encodes a <see cref="ReadOnlySpan{T}"/> with the specified <see cref="Encoding"/> to a <see cref="VnMemoryStream"/> that must be disposed by the user
/// </summary>
@@ -90,6 +91,7 @@ namespace VNLib.Utils
//Return default if null
return data == null || data.Length == 0 ? ValueTask.FromResult<T?>(default) : JsonSerializer.DeserializeAsync<T>(data, options, cancellationToken);
}
+
/// <summary>
/// Attempts to deserialze a json object from a stream of UTF8 data
/// </summary>
@@ -105,6 +107,7 @@ namespace VNLib.Utils
//Return default if null
return data == null || data.Length == 0 ? ValueTask.FromResult<object?>(default) : JsonSerializer.DeserializeAsync(data, type, options, cancellationToken);
}
+
/// <summary>
/// Attempts to serialize the object to json and write the encoded data to the stream
/// </summary>
@@ -538,6 +541,8 @@ namespace VNLib.Utils
#region percent encoding
+ private const int MAX_STACKALLOC = 1024;
+
private static readonly ReadOnlyMemory<byte> HexToUtf8Pos = new byte[16]
{
0x30, //0
@@ -572,9 +577,10 @@ namespace VNLib.Utils
* For every illegal character, the percent encoding adds 3 bytes of
* entropy. So a single byte will be replaced by 3, so adding
* 2 bytes for every illegal character plus the length of the
- * intial buffer, we get the size of the buffer needed to
+ * intial buffer, we get the exact size of the buffer needed to
* percent encode.
*/
+
int count = 0, len = utf8Bytes.Length;
fixed (byte* utfBase = &MemoryMarshal.GetReference(utf8Bytes))
{
@@ -658,29 +664,29 @@ namespace VNLib.Utils
{
int outPos = 0, len = utf8Encoded.Length;
ReadOnlySpan<byte> lookupTable = HexToUtf8Pos.Span;
-
+
for (int i = 0; i < len; i++)
{
byte value = utf8Encoded[i];
//Begining of percent encoding character
- if(value == 0x25)
+ if (value == 0x25)
{
//Calculate the base16 multiplier from the upper half of the
int multiplier = lookupTable.IndexOf(utf8Encoded[i + 1]);
-
+
//get the base16 lower half to add
int lower = lookupTable.IndexOf(utf8Encoded[i + 2]);
-
+
//Check format
- if(multiplier < 0 || lower < 0)
+ if (multiplier < 0 || lower < 0)
{
throw new FormatException($"Encoded buffer contains invalid hexadecimal characters following the % character at position {i}");
}
-
+
//Calculate the new value, shift multiplier to the upper 4 bits, then mask + or the lower 4 bits
value = (byte)(((byte)(multiplier << 4)) | ((byte)lower & 0x0f));
-
+
//Advance the encoded index by the two consumed chars
i += 2;
}
@@ -690,6 +696,56 @@ namespace VNLib.Utils
return outPos;
}
+ /// <summary>
+ /// Encodes the utf8 encoded character buffer to its percent/hex encoded utf8
+ /// character representation and returns the encoded string
+ /// </summary>
+ /// <param name="utf8Bytes">The bytes to encode</param>
+ /// <param name="allowedChars">A collection of allowed characters that will not be encoded</param>
+ /// <returns>The percent encoded string</returns>
+ /// <exception cref="FormatException"></exception>
+ public static string PercentEncode(ReadOnlySpan<byte> utf8Bytes, ReadOnlySpan<byte> allowedChars = default)
+ {
+ /*
+ * I cannot avoid the allocation of a binary buffer without doing some sketchy
+ * byte -> char cast on the string.create method. Which would also require object
+ * allocation for state data, and since spans are used, we cannot cross that
+ * callback boundry anyway.
+ */
+
+ int bufferSize = PercentEncodeCalcBufferSize(utf8Bytes, allowedChars);
+
+ //use stackalloc if the buffer is small enough
+ if (bufferSize <= MAX_STACKALLOC)
+ {
+ //stack alloc output buffer
+ Span<byte> output = stackalloc byte[bufferSize];
+
+ ERRNO encoded = PercentEncode(utf8Bytes, output, allowedChars);
+
+ if(encoded <= 0)
+ {
+ throw new FormatException("Failed to percent encode the input data");
+ }
+
+ return Encoding.UTF8.GetString(output);
+ }
+ else
+ {
+ //Alloc heap buffer
+ using UnsafeMemoryHandle<byte> handle = MemoryUtil.UnsafeAllocNearestPage(bufferSize);
+
+ ERRNO encoded = PercentEncode(utf8Bytes, handle.Span, allowedChars);
+
+ if (encoded <= 0)
+ {
+ throw new FormatException("Failed to percent encode the input data");
+ }
+
+ return Encoding.UTF8.GetString(handle.AsSpan(0, encoded));
+ }
+ }
+
#endregion
#region Base64
@@ -767,6 +823,7 @@ namespace VNLib.Utils
}
}
}
+
/// <summary>
/// Converts a base64url encoded utf8 encoded binary buffer to
/// its base64 encoded version
diff --git a/lib/Utils/tests/VnEncodingTests.cs b/lib/Utils/tests/VnEncodingTests.cs
index f1ef5f4..f2b5e85 100644
--- a/lib/Utils/tests/VnEncodingTests.cs
+++ b/lib/Utils/tests/VnEncodingTests.cs
@@ -30,6 +30,7 @@ using System.Buffers.Text;
using System.Security.Cryptography;
using Microsoft.VisualStudio.TestTools.UnitTesting;
+using System.Diagnostics;
namespace VNLib.Utils.Tests
{
@@ -92,6 +93,61 @@ namespace VNLib.Utils.Tests
}
-
+ [TestMethod()]
+ public void PercentEncodeTest()
+ {
+ const string urlEnoded = "https%3A%2F%2Fwww.google.com%2Fsearch%3Fq%3Dtest%26oq%3Dtest%26aqs%3Dchrome..69i57j0l7.1001j0j7%26sourceid%3Dchrome%26ie%3DUTF-8";
+ const string urlDecoded = "https://www.google.com/search?q=test&oq=test&aqs=chrome..69i57j0l7.1001j0j7&sourceid=chrome&ie=UTF-8";
+
+ //We need to allow the '.' character to be encoded
+ ReadOnlySpan<byte> allowedChars = Encoding.UTF8.GetBytes(".");
+
+
+ /*
+ * Test that the url encoded string is the same as the percent encoded string
+ */
+
+ ReadOnlySpan<byte> utf8Encoded = Encoding.UTF8.GetBytes(urlDecoded);
+
+ string percentEncoded = VnEncoding.PercentEncode(utf8Encoded, allowedChars);
+
+ Assert.IsTrue(percentEncoded.Equals(urlEnoded, StringComparison.Ordinal));
+
+ /*
+ * Test decoding the percent encoded string
+ */
+
+ ReadOnlySpan<byte> percentEncodedUtf8 = Encoding.UTF8.GetBytes(urlEnoded);
+
+ byte[] outBuffer = new byte[percentEncodedUtf8.Length];
+
+ ERRNO decoded = VnEncoding.PercentDecode(percentEncodedUtf8, outBuffer);
+
+ //Make sure result is valid
+ Debug.Assert(decoded > 0);
+
+ string decodedString = Encoding.UTF8.GetString(outBuffer, 0, decoded);
+
+ Assert.IsTrue(decodedString.Equals(urlDecoded, StringComparison.Ordinal));
+ }
+
+ [TestMethod()]
+ public void Base32BasicEncodeDecodeTest()
+ {
+ const string base32Encoded = "JBSWY3DPEBLW64TMMQQQ====";
+ const string base32Decoded = "Hello World!";
+ byte[] rawBytes = Encoding.UTF8.GetBytes(base32Decoded);
+
+ //Recover bytes from base32 encoded string
+ byte[]? fromString = VnEncoding.FromBase32String(base32Encoded);
+ Assert.IsNotNull(fromString);
+
+ //Test that the decoded bytes are the same as the raw bytes
+ Assert.IsTrue(rawBytes.SequenceEqual(fromString));
+
+ //Test that the encoded string is the same as the base32 encoded string
+ string toString = VnEncoding.ToBase32String(rawBytes, true);
+ Assert.IsTrue(toString.Equals(base32Encoded, StringComparison.Ordinal));
+ }
}
} \ No newline at end of file