From 748cdbf4880d830fd794e92856e8c35a46e4f884 Mon Sep 17 00:00:00 2001 From: vnugent Date: Mon, 11 Mar 2024 21:21:18 -0400 Subject: feat(app): #1 update libs & add curl support --- back-end/src/Endpoints/SiteLookupEndpoint.cs | 156 ++++++++++ back-end/src/PlatformFeatures/Curl/CurlResult.cs | 19 ++ back-end/src/PlatformFeatures/Curl/ICurlApp.cs | 34 +++ .../src/PlatformFeatures/Curl/SystemCurlApp.cs | 338 +++++++++++++++++++++ .../PlatformFeatures/Curl/WebsiteLookupResult.cs | 23 ++ back-end/src/PlatformFeatures/ISystemApp.cs | 31 ++ back-end/src/SimpleBookmark.csproj | 10 +- back-end/src/SimpleBookmark.json | 13 + back-end/src/SimpleBookmarkEntry.cs | 3 +- 9 files changed, 621 insertions(+), 6 deletions(-) create mode 100644 back-end/src/Endpoints/SiteLookupEndpoint.cs create mode 100644 back-end/src/PlatformFeatures/Curl/CurlResult.cs create mode 100644 back-end/src/PlatformFeatures/Curl/ICurlApp.cs create mode 100644 back-end/src/PlatformFeatures/Curl/SystemCurlApp.cs create mode 100644 back-end/src/PlatformFeatures/Curl/WebsiteLookupResult.cs create mode 100644 back-end/src/PlatformFeatures/ISystemApp.cs (limited to 'back-end') diff --git a/back-end/src/Endpoints/SiteLookupEndpoint.cs b/back-end/src/Endpoints/SiteLookupEndpoint.cs new file mode 100644 index 0000000..effe6aa --- /dev/null +++ b/back-end/src/Endpoints/SiteLookupEndpoint.cs @@ -0,0 +1,156 @@ +// Copyright (C) 2024 Vaughn Nugent +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +using System; +using System.Net; +using System.Text; +using System.Linq; +using System.Threading.Tasks; +using System.Collections.Generic; + +using VNLib.Utils; +using VNLib.Utils.Memory; +using VNLib.Plugins; +using VNLib.Plugins.Essentials; +using VNLib.Plugins.Essentials.Endpoints; +using VNLib.Plugins.Essentials.Extensions; +using VNLib.Plugins.Extensions.Loading; +using VNLib.Plugins.Extensions.Validation; + +using SimpleBookmark.PlatformFeatures.Curl; + +namespace SimpleBookmark.Endpoints +{ + [ConfigurationName("curl")] + internal sealed class SiteLookupEndpoint : ProtectedWebEndpoint + { + const string DefaultCurlExecName = "curl"; + const int MaxTimeoutValue = 30000; + + private readonly SystemCurlApp _curl; + private readonly IAsyncLazy _isSupported; + + public SiteLookupEndpoint(PluginBase plugin, IConfigScope config) + { + string path = config.GetRequiredProperty("path", p => p.GetString()!); + InitPathAndLog(path, plugin.Log); + + string exePath = config.GetValueOrDefault("exe_path", p => p.GetString(), DefaultCurlExecName); + bool httspOnly = config.GetValueOrDefault("https_only", p => p.GetBoolean(), false); + + //Optional extra arguments + string[] extrArgs = config.GetValueOrDefault( + "extra_args", + p => p.EnumerateArray().Select(s => s.GetString()!).ToArray(), + Array.Empty() + ); + + _curl = new SystemCurlApp(exePath, httspOnly, extrArgs); + + //Immediately check if curl is supported + _isSupported = _curl.TestIsAvailable(plugin.UnloadToken).AsLazy(); + } + + protected override async ValueTask GetAsync(HttpEntity entity) + { + WebMessage webm = new(); + + bool isEnabled = await _isSupported; + + //Allow site to cache if curl is supported on the platform + if (entity.QueryArgs.ContainsKey("support")) + { + webm.Success = isEnabled; + return VirtualOk(entity, webm); + } + + //Assert supported value as curl is required for a normal url lookup + if(webm.Assert(isEnabled, "Curl is not supported on the current platform")) + { + return VirtualClose(entity, webm, HttpStatusCode.NotImplemented); + } + + string? url = entity.QueryArgs.GetValueOrDefault("url"); + + if(webm.Assert(!string.IsNullOrWhiteSpace(url), "No url provided")) + { + return VirtualClose(entity, webm, HttpStatusCode.BadRequest); + } + + if(webm.Assert(UrlFromBase64Url(url!, out Uri? uri), "Invalid url provided")) + { + return VirtualClose(entity, webm, HttpStatusCode.UnprocessableEntity); + } + + int? timeoutMs = null; + + //Allow clients to specify a timeout for the request + string? timeoutMsS = entity.QueryArgs.GetValueOrDefault("timeout"); + if (timeoutMsS is not null && int.TryParse(timeoutMsS, out int _timeoutMs)) + { + //Miniumum timeout must be greater than 1 second because curl is timed in seconds + timeoutMs = Math.Clamp(_timeoutMs, 1000, MaxTimeoutValue); + } + + try + { + //Exec curl on the url + CurlResult result = await _curl.ExecLookupAsync(uri!, timeoutMs, entity.EventCancellation); + + if(webm.Assert(result.IsError == false, result.ErrorMessage!)) + { + return VirtualClose(entity, webm, HttpStatusCode.InternalServerError); + } + + webm.Success = true; + webm.Result = result.Result; //Set curl lookup result as the response + + return VirtualOk(entity, webm); + } + catch (TimeoutException) + { + webm.Result = "Request timed out"; + return VirtualClose(entity, webm, HttpStatusCode.InternalServerError); + } + catch (OperationCanceledException) + { + webm.Result = "Request timed out"; + return VirtualClose(entity, webm, HttpStatusCode.InternalServerError); + } + } + + /* + * Reads in a base64url encoded string which is the user's search url and + * attempts to parse it into a uri. If the url is invalid, the function + */ + private static bool UrlFromBase64Url(string base64Url, out Uri? uri) + { + uri = null; + + //Alloc output buffer for decoded data + using UnsafeMemoryHandle output = MemoryUtil.UnsafeAllocNearestPage(base64Url.Length, true); + + ERRNO decoded = VnEncoding.Base64UrlDecode(base64Url, output.Span, Encoding.UTF8); + if(decoded < 1) + { + return false; + } + + //Recover the url string from its binary representation and try to parse it into a uri + string urlstring = Encoding.UTF8.GetString(output.Span[..(int)decoded]); + return Uri.TryCreate(urlstring, UriKind.Absolute, out uri); + } + } +} diff --git a/back-end/src/PlatformFeatures/Curl/CurlResult.cs b/back-end/src/PlatformFeatures/Curl/CurlResult.cs new file mode 100644 index 0000000..7d70e0e --- /dev/null +++ b/back-end/src/PlatformFeatures/Curl/CurlResult.cs @@ -0,0 +1,19 @@ +// Copyright (C) 2024 Vaughn Nugent +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +namespace SimpleBookmark.PlatformFeatures.Curl +{ + internal sealed record class CurlResult(WebsiteLookupResult? Result, bool IsError, string? ErrorMessage); +} diff --git a/back-end/src/PlatformFeatures/Curl/ICurlApp.cs b/back-end/src/PlatformFeatures/Curl/ICurlApp.cs new file mode 100644 index 0000000..ec952e0 --- /dev/null +++ b/back-end/src/PlatformFeatures/Curl/ICurlApp.cs @@ -0,0 +1,34 @@ +// Copyright (C) 2024 Vaughn Nugent +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + + + +using System; +using System.Threading; +using System.Threading.Tasks; + +namespace SimpleBookmark.PlatformFeatures.Curl +{ + internal interface ICurlApp + { + /// + /// Executes a lookup on the given website and returns the title and description + /// + /// The website url to search against + /// A token to cancel the operation + /// The result of the website lookup + Task ExecLookupAsync(Uri website, int? timeoutMs, CancellationToken cancellation); + } +} diff --git a/back-end/src/PlatformFeatures/Curl/SystemCurlApp.cs b/back-end/src/PlatformFeatures/Curl/SystemCurlApp.cs new file mode 100644 index 0000000..0949136 --- /dev/null +++ b/back-end/src/PlatformFeatures/Curl/SystemCurlApp.cs @@ -0,0 +1,338 @@ +// Copyright (C) 2024 Vaughn Nugent +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +using System; +using System.IO; +using System.Text; +using System.Threading; +using System.Diagnostics; +using System.ComponentModel; +using System.Threading.Tasks; +using System.Collections.Generic; + +using VNLib.Utils.Memory; +using VNLib.Utils.Extensions; + +namespace SimpleBookmark.PlatformFeatures.Curl +{ + sealed class SystemCurlApp(string exePath, bool httpsOnly, string[] additionalArgs) : ISystemApp, ICurlApp + { + const int DefaultTimeoutMs = 5000; + + /// + public async Task TestIsAvailable(CancellationToken cancellation) + { + try + { + //Test if the curl application is available on the local system, may be at path + using Process? process = Exec(["--version"]); + + if (process is null) + { + return false; + } + + //Wait for the process to exit + await process.WaitForExitAsync(cancellation); + + //If an ok status code, then we know the curl application is available + return process.ExitCode == 0; + } + //App not found + catch (Win32Exception) + { + return false; + } + } + + private Process? Exec(string[] arguments) + { + ProcessStartInfo startInfo = new() + { + FileName = exePath, + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true, + StandardOutputEncoding = Encoding.UTF8, + StandardErrorEncoding = Encoding.UTF8 + }; + + //Add arguments + arguments.ForEach(startInfo.ArgumentList.Add); + + return Process.Start(startInfo); + } + + private void ValidateUrl(Uri? website) + { + ArgumentNullException.ThrowIfNull(website); + + if (!website.IsAbsoluteUri) + { + throw new ArgumentException("The website url must be an absolute uri", nameof(website)); + } + + if (httpsOnly && website.Scheme != Uri.UriSchemeHttps) + { + throw new ArgumentException("The website url must be an https url only!", nameof(website)); + } + else if (website.Scheme != Uri.UriSchemeHttp && website.Scheme != Uri.UriSchemeHttps) + { + //Http or https only + throw new ArgumentException("The website url must be an http or https url", nameof(website)); + } + } + + /// + public async Task ExecLookupAsync(Uri website, int? timeoutMs, CancellationToken cancellation) + { + //Validate the url + ValidateUrl(website); + + string timeoutArg = timeoutMs.HasValue ? $"{timeoutMs.Value / 1000}" : $"{DefaultTimeoutMs / 1000}"; + + string[] args = [ + "--max-time", timeoutArg, //Set the max time for the request + "-S", //Silent mode is required + "-H", "Accept: text/html,application/html", //Html is required + ..additionalArgs, //Additional global arguments + website.AbsoluteUri + ]; + + //Execute the curl command + + using Process? process = Exec(args); + + if (process is null) + { + return new CurlResult(null, true, "Curl is not enabled on this platform, lookup failed"); + } + + //Parse the html data + Task documentHeadTask = HtmlTokenReader.ReadHeadTokenAsync(process.StandardOutput, cancellation); + + //Respect the user's timeout command and termimate the process if it exceeds the timeout + if (timeoutMs.HasValue) + { + await documentHeadTask.WaitAsync(TimeSpan.FromMilliseconds(timeoutMs.Value)); + + await Task.WhenAll( + DiscardStreamAsync(process.StandardOutput, cancellation), + DiscardStreamAsync(process.StandardError, cancellation) + ).WaitAsync(TimeSpan.FromMilliseconds(timeoutMs.Value)); + } + else + { + await documentHeadTask; + + await Task.WhenAll( + DiscardStreamAsync(process.StandardOutput, cancellation), + DiscardStreamAsync(process.StandardError, cancellation) + ); + } + + await process.WaitForExitAsync(cancellation); + + if (process.ExitCode != 0) + { + return new CurlResult(null, true, "Curl exited with a non-zero status code"); + } + + string? documentHead = await documentHeadTask; + + if (documentHead is null) + { + return new CurlResult(null, true, "Failed to parse html data"); + } + + //Get the lookup result from the document head segmetn + WebsiteLookupResult result = HtmlTokenReader.ParseHtmlData(documentHead); + + return new CurlResult(result, false, null); + } + + /// + /// Safely discards the entire stream of data from the reader without + /// allocating a large string buffer + /// + /// The reader to discard + /// A token to cancel the operation + /// A task that represents the discard opeartion + private static async Task DiscardStreamAsync(TextReader reader, CancellationToken cancellation) + { + using ArrayPoolBuffer discarBuffer = new(8192); + + while (await reader.ReadBlockAsync(discarBuffer.AsMemory(), cancellation) > 0) + { } + } + + private static class HtmlTokenReader + { + /// + /// Gets the document title from the head of the html document + /// + /// The head string containing the title to parse + /// The title string if found + public static string? GetDocTitleFromHead(string head) + { + ReadOnlySpan headChars = head.AsSpan(); + + ReadOnlySpan title = headChars.SliceAfterParam(""); + title = title.SliceBeforeParam(""); + + return title.ToString(); + } + + /// + /// Attempts to get the document summary from the head of the html document + /// in the meta description tag + /// + /// The head string to parse + /// The document description if found + public static string? GetDocumentSummary(string head) + { + ReadOnlySpan headChars = head.AsSpan(); + + ReadOnlySpan desc = headChars.SliceAfterParam(""); + desc = desc.SliceBeforeParam("\">"); + + return desc.ToString(); + } + + /// + /// Attempts to get the document keywords from the head of the html document + /// by parsing the meta keywords tag + /// + /// The document head + /// An array of document keywords found from the head section + public static string[]? GetDocumentKeywords(string head) + { + ReadOnlySpan headChars = head.AsSpan(); + + ReadOnlySpan kwStart = headChars.SliceAfterParam(" kwSpan = kwStart.SliceBeforeParam("\">"); + + List keywords = []; + + //Split the keywords at comma, and remove any empty entries/whitespace + kwSpan.Split(',', keywords, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); + + return keywords.ToArray(); + } + + public static WebsiteLookupResult ParseHtmlData(string documentHead) + { + //Parse head segments for title, description, and keywords + return new WebsiteLookupResult( + title: GetDocTitleFromHead(documentHead), + description: GetDocumentSummary(documentHead), + keywords: GetDocumentKeywords(documentHead) + ); + } + + + + public static async Task ReadHeadTokenAsync(TextReader reader, CancellationToken cancellation) + { + //String buffer to store parsed head data + StringBuilder stringBuilder = new(1024); + + //Temp copy buffer + using ArrayPoolBuffer buffer = new(4096); + + bool isStart = true, isEnd = false; + + //scan for docuemnt head + do + { + int read = await reader.ReadBlockAsync(buffer.AsMemory(), cancellation); + + if (read == 0) + { + //Read should never return 0, if it does, then there is no head to read + return null; + } + + if (isStart) + { + Memory headSpan = HeadStart(buffer.AsMemory()); + + //No head was found, continue buffering + if (headSpan.IsEmpty) + { + continue; + } + + /* + * Try to find the end of the head, if it is found, then we can break + */ + isEnd = HeadEnd(ref headSpan); + + //Valid head data to buffer + stringBuilder.Append(headSpan); + + isStart = false; + } + else + { + //Head start was already found, just need to buffer until it ends + Memory end = buffer.AsMemory(); + + isEnd = HeadEnd(ref end); + + stringBuilder.Append(end); + + if (isEnd) + { + break; + } + } + + } while (!isEnd); + + return stringBuilder.ToString(); + } + + static Memory HeadStart(Memory start) + { + //find start of head + int headStartIndex = start.Span.IndexOf(""); + + if (headStartIndex == -1) + { + return default; + } + + return start[headStartIndex..]; + } + + static bool HeadEnd(ref Memory end) + { + //find end of head + int headEndIndex = end.Span.IndexOf(""); + + if (headEndIndex == -1) + { + return false; + } + + end = end[..headEndIndex]; + return true; + } + } + } +} diff --git a/back-end/src/PlatformFeatures/Curl/WebsiteLookupResult.cs b/back-end/src/PlatformFeatures/Curl/WebsiteLookupResult.cs new file mode 100644 index 0000000..e9d9bc0 --- /dev/null +++ b/back-end/src/PlatformFeatures/Curl/WebsiteLookupResult.cs @@ -0,0 +1,23 @@ +// Copyright (C) 2024 Vaughn Nugent +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +namespace SimpleBookmark.PlatformFeatures.Curl +{ +#pragma warning disable IDE1006 // Naming Styles (JSON serialization) + + internal sealed record class WebsiteLookupResult(string? title, string? description, string[]? keywords); + +#pragma warning restore IDE1006 // Naming Styles +} diff --git a/back-end/src/PlatformFeatures/ISystemApp.cs b/back-end/src/PlatformFeatures/ISystemApp.cs new file mode 100644 index 0000000..11d15f1 --- /dev/null +++ b/back-end/src/PlatformFeatures/ISystemApp.cs @@ -0,0 +1,31 @@ +// Copyright (C) 2024 Vaughn Nugent +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + + +using System.Threading.Tasks; +using System.Threading; + +namespace SimpleBookmark.PlatformFeatures +{ + internal interface ISystemApp + { + /// + /// Gets a value indicating if the curl application is available + /// on the local system. + /// + /// True if the curl exe is available on the local system, false otherwise + Task TestIsAvailable(CancellationToken cancellation); + } +} diff --git a/back-end/src/SimpleBookmark.csproj b/back-end/src/SimpleBookmark.csproj index 609144b..1eeaaba 100644 --- a/back-end/src/SimpleBookmark.csproj +++ b/back-end/src/SimpleBookmark.csproj @@ -34,11 +34,11 @@ - - - - - + + + + + diff --git a/back-end/src/SimpleBookmark.json b/back-end/src/SimpleBookmark.json index 27ebff8..116587d 100644 --- a/back-end/src/SimpleBookmark.json +++ b/back-end/src/SimpleBookmark.json @@ -14,6 +14,19 @@ } }, + //System website lookup endpoint (aka curl) + "curl": { + "path": "/lookup", + "exe_path": "curl", //Path to the curl executable + "extra_args": [ + "--globoff", //Disables unsafe url globbing + "--no-keepalive", //Disables keepalive, uneeded for a single lookup request + "--max-filesize", "100K", //Max file size 100K + "--max-redirs", "5", //Max redirects 5 + "--location", //Follow redirects + ] + }, + "registration": { "path": "/register", //Path for the registration endpoint "token_lifetime_mins": 360, //Token lifetime in minutes diff --git a/back-end/src/SimpleBookmarkEntry.cs b/back-end/src/SimpleBookmarkEntry.cs index a1c9590..13b94a5 100644 --- a/back-end/src/SimpleBookmarkEntry.cs +++ b/back-end/src/SimpleBookmarkEntry.cs @@ -50,9 +50,10 @@ namespace SimpleBookmark //route the bm endpoint this.Route(); this.Route(); + this.Route(); //Ensure database is created after a delay - this.ObserveWork(() => this.EnsureDbCreatedAsync(this), 1000); + this.ObserveWork(() => this.EnsureDbCreatedAsync(this), 1500); Log.Information("Plugin Loaded"); PrintHelloMessage(); -- cgit