From be91164499b263fc4c716c6d44c69f440f8ab634 Mon Sep 17 00:00:00 2001 From: Ben Harris Date: Wed, 22 Apr 2020 10:30:26 -0400 Subject: fix some stateful tests also fixes some warnings about culture-specific string comparisons --- IrcTokens/StatefulDecoder.cs | 92 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 81 insertions(+), 11 deletions(-) (limited to 'IrcTokens/StatefulDecoder.cs') diff --git a/IrcTokens/StatefulDecoder.cs b/IrcTokens/StatefulDecoder.cs index 65dd3de..e094760 100644 --- a/IrcTokens/StatefulDecoder.cs +++ b/IrcTokens/StatefulDecoder.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Linq; using System.Text; @@ -6,26 +7,95 @@ namespace IrcTokens { public class StatefulDecoder { - private string _buffer; - public EncodingInfo Encoding { get; set; } - public EncodingInfo Fallback { get; set; } + private byte[] _buffer; + private Encoding _encoding; + private Encoding _fallback; - public string Pending => _buffer; + public Encoding Encoding + { + get => _encoding ?? Encoding.UTF8; + set => _encoding = value; + } + + public Encoding Fallback + { + get => _fallback ?? Encoding.GetEncoding("iso-8859-1"); + set => _fallback = value; + } + + public string Pending => Encoding.GetString(_buffer); + + public StatefulDecoder() + { + Clear(); + } public void Clear() { - _buffer = ""; + _buffer = Array.Empty(); } public List Push(string data) { - if (string.IsNullOrEmpty(data)) + return Push(Encoding.GetBytes(data)); + } + + public List Push(byte[] data) + { + if (data == null || data.Length == 0) return null; - _buffer += data; - return _buffer - .Split('\n') - .Select(l => l.TrimEnd('\r')) + _buffer = _buffer.Concat(data).ToArray(); + + // simulate string.Split('\n') before decoding + var newLineIndices = _buffer.Select((b, i) => b == '\n' ? i : -1).Where(i => i != -1).ToArray(); + var lines = new List(); + + for (int i = 0, currentIndex = 0; i < newLineIndices.Length; ++i) + { + var n = new byte[newLineIndices[i] - currentIndex]; + Array.Copy(_buffer, currentIndex, n, 0, newLineIndices[i] - currentIndex); + currentIndex = newLineIndices[i] + 1; + lines.Add(n); + } + + var listLines = lines.Select(l => l.ToList()).ToList(); + + // simulate string.Trim('\r') before decoding + foreach (var line in listLines) + { + var i = 0; + while (line[i] == '\r') + { + line.RemoveAt(i); + i++; + } + + i = line.Count - 1; + while (line[i] == '\r') + { + line.RemoveAt(i); + i--; + } + } + + //_buffer = listLines.Last().ToArray(); + //listLines.RemoveAt(listLines.Count - 1); + + var decodeLines = new List(); + foreach (var line in listLines.Select(l => l.ToArray())) + { + try + { + decodeLines.Add(Encoding.GetString(line)); + } + catch (DecoderFallbackException) + { + decodeLines.Add(Fallback.GetString(line)); + } + } + + return decodeLines .Select(l => new Line(l)) .ToList(); } -- cgit 1.4.1