about summary refs log tree commit diff
path: root/IrcTokens
diff options
context:
space:
mode:
authorBen Harris <ben@tilde.team>2020-04-19 20:52:41 -0400
committerBen Harris <ben@tilde.team>2020-04-19 23:14:41 -0400
commit616abc70303990fbf8096fc6ada5fac100a6c66a (patch)
tree08df1ec5e896be6a18e6a39630ffc2fec8118297 /IrcTokens
init
Diffstat (limited to 'IrcTokens')
-rw-r--r--IrcTokens/Hostmask.cs42
-rw-r--r--IrcTokens/IrcTokens.csproj23
-rw-r--r--IrcTokens/Line.cs139
-rw-r--r--IrcTokens/Protocol.cs74
-rw-r--r--IrcTokens/Tests/Data/JoinModel.cs31
-rw-r--r--IrcTokens/Tests/Data/SplitModel.cs15
-rw-r--r--IrcTokens/Tests/Data/msg-join.yaml221
-rw-r--r--IrcTokens/Tests/Data/msg-split.yaml343
-rw-r--r--IrcTokens/Tests/FormatTests.cs150
-rw-r--r--IrcTokens/Tests/HostmaskTests.cs64
-rw-r--r--IrcTokens/Tests/ParserTests.cs55
-rw-r--r--IrcTokens/Tests/TokenizationTests.cs118
12 files changed, 1275 insertions, 0 deletions
diff --git a/IrcTokens/Hostmask.cs b/IrcTokens/Hostmask.cs
new file mode 100644
index 0000000..05470ef
--- /dev/null
+++ b/IrcTokens/Hostmask.cs
@@ -0,0 +1,42 @@
+namespace IrcTokens
+{
+    /// <summary>
+    /// Represents the three parts of a hostmask. Parse with the constructor.
+    /// </summary>
+    public class Hostmask
+    {
+        public string NickName { get; set; }
+        public string UserName { get; set; }
+        public string HostName { get; set; }
+
+        public override string ToString() => _source;
+
+        private readonly string _source;
+        
+        public Hostmask(string source)
+        {
+            if (source == null) return;
+
+            _source = source;
+
+            if (source.Contains('@'))
+            {
+                var split = source.Split('@');
+
+                NickName = split[0];
+                HostName = split[1];
+            }
+            else
+            {
+                NickName = source;
+            }
+            
+            if (NickName.Contains('!'))
+            {
+                var userSplit = NickName.Split('!');
+                NickName = userSplit[0];
+                UserName = userSplit[1];
+            }
+        }
+    }
+}
diff --git a/IrcTokens/IrcTokens.csproj b/IrcTokens/IrcTokens.csproj
new file mode 100644
index 0000000..b888b6c
--- /dev/null
+++ b/IrcTokens/IrcTokens.csproj
@@ -0,0 +1,23 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>netcoreapp3.1</TargetFramework>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.5.0" />
+    <PackageReference Include="MSTest.TestAdapter" Version="2.1.1" />
+    <PackageReference Include="MSTest.TestFramework" Version="2.1.1" />
+    <PackageReference Include="YamlDotNet" Version="8.1.0" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <None Update="Tests\Data\msg-join.yaml">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="Tests\Data\msg-split.yaml">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+  </ItemGroup>
+
+</Project>
diff --git a/IrcTokens/Line.cs b/IrcTokens/Line.cs
new file mode 100644
index 0000000..7592376
--- /dev/null
+++ b/IrcTokens/Line.cs
@@ -0,0 +1,139 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace IrcTokens
+{
+    /// <summary>
+    /// Tools to represent, parse, and format IRC lines
+    /// </summary>
+    public class Line
+    {
+        public Dictionary<string, string> Tags { get; set; }
+        public string Source { get; set; }
+        public string Command { get; set; }
+        public List<string> Params { get; set; }
+
+        private Hostmask _hostmask;
+        private string _rawLine;
+
+        public override string ToString() => 
+            $"Line(tags={string.Join(";", Tags.Select(kvp => $"{kvp.Key}={kvp.Value}"))}, params={string.Join(",", Params)})";
+
+        public Hostmask Hostmask =>
+            _hostmask ??= new Hostmask(Source);
+
+        public Line() { }
+
+        /// <summary>
+        /// Build new <see cref="Line"/> object parsed from <param name="line">a string</param>. Analogous to irctokens.tokenise()
+        /// </summary>
+        /// <param name="line">irc line to parse</param>
+        public Line(string line)
+        {
+            _rawLine = line;
+            string[] split;
+
+            if (line.StartsWith('@'))
+            {
+                Tags = new Dictionary<string, string>();
+
+                split = line.Split(" ");
+                var messageTags = split[0];
+                line = string.Join(" ", split.Skip(1));
+
+                foreach (var part in messageTags.Substring(1).Split(';'))
+                {
+                    if (part.Contains('='))
+                    {
+                        split = part.Split('=');
+                        Tags[split[0]] = Protocol.UnescapeTag(split[1]);
+                    }
+                    else
+                    {
+                        Tags[part] = string.Empty;
+                    }
+                }
+            }
+
+            string trailing;
+            if (line.Contains(" :"))
+            {
+                split = line.Split(" :");
+                line = split[0];
+                trailing = string.Join(" :", split.Skip(1));
+            }
+            else
+            {
+                trailing = null;
+            }
+
+            Params = line.Contains(' ')
+                ? line.Split(' ').Where(p => !string.IsNullOrWhiteSpace(p)).ToList()
+                : new List<string> {line};
+
+            if (Params[0].StartsWith(':'))
+            {
+                Source = Params[0].Substring(1);
+                Params.RemoveAt(0);
+            }
+
+            if (Params.Count > 0)
+            {
+                Command = Params[0].ToUpper();
+                Params.RemoveAt(0);
+            }
+
+            if (trailing != null)
+            {
+                Params.Add(trailing);
+            }
+        }
+
+        /// <summary>
+        /// Format a <see cref="Line"/> as a standards-compliant IRC line
+        /// </summary>
+        /// <returns>formatted irc line</returns>
+        public string Format()
+        {
+            var outs = new List<string>();
+
+            if (Tags != null && Tags.Any())
+            {
+                var tags = Tags.Keys
+                    .Select(key => string.IsNullOrWhiteSpace(Tags[key]) ? key : $"{key}={Protocol.EscapeTag(Tags[key])}")
+                    .ToList();
+
+                outs.Add($"@{string.Join(";", tags)}");
+            }
+
+            if (Source != null)
+            {
+                outs.Add($":{Source}");
+            }
+
+            outs.Add(Command);
+
+            if (Params != null && Params.Any())
+            {
+                var last = Params[^1];
+                Params.RemoveAt(Params.Count - 1);
+
+                foreach (var p in Params)
+                {
+                    if (p.Contains(' '))
+                        throw new ArgumentException("non-last parameters cannot have spaces");
+                    if (p.StartsWith(':'))
+                        throw new ArgumentException("non-last parameters cannot start with colon");
+                }
+                outs.AddRange(Params);
+
+                if (last == null || string.IsNullOrWhiteSpace(last) || last.Contains(' ') || last.StartsWith(':'))
+                    last = $":{last}";
+                outs.Add(last);
+            }
+
+            return string.Join(" ", outs);
+        }
+    }
+}
diff --git a/IrcTokens/Protocol.cs b/IrcTokens/Protocol.cs
new file mode 100644
index 0000000..b2af6fa
--- /dev/null
+++ b/IrcTokens/Protocol.cs
@@ -0,0 +1,74 @@
+using System;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+
+namespace IrcTokens
+{
+    public class Protocol
+    {
+        private static readonly string[] TagUnescaped = new []
+        {
+            "\\", " ", ";", "\r", "\n"
+        };
+
+        private static readonly string[] TagEscaped = new []
+        {
+            "\\\\", "\\s", "\\:", "\\r", "\\n"
+        };
+
+        /// <summary>
+        /// Unescape ircv3 tag
+        /// </summary>
+        /// <param name="val">escaped string</param>
+        /// <returns>unescaped string</returns>
+        public static string UnescapeTag(string val)
+        {
+            var unescaped = new StringBuilder();
+
+            var graphemeIterator = StringInfo.GetTextElementEnumerator(val);
+            graphemeIterator.Reset();
+
+            while (graphemeIterator.MoveNext())
+            {
+                var current = graphemeIterator.GetTextElement();
+
+                if (current == @"\")
+                {
+                    try
+                    {
+                        graphemeIterator.MoveNext();
+                        var next = graphemeIterator.GetTextElement();
+                        var pair = current + next;
+                        unescaped.Append(TagEscaped.Contains(pair)
+                            ? TagUnescaped[Array.IndexOf(TagEscaped, pair)]
+                            : next);
+                    }
+                    catch (InvalidOperationException)
+                    {
+                        // ignored
+                    }
+                }
+                else
+                    unescaped.Append(current);
+            }
+
+            return unescaped.ToString();
+        }
+
+        /// <summary>
+        /// Escape strings for use in ircv3 tags
+        /// </summary>
+        /// <param name="val">string to escape</param>
+        /// <returns>escaped string</returns>
+        public static string EscapeTag(string val)
+        {
+            for (var i = 0; i < TagUnescaped.Length; ++i)
+            {
+                val = val.Replace(TagUnescaped[i], TagEscaped[i]);
+            }
+
+            return val;
+        }
+    }
+}
diff --git a/IrcTokens/Tests/Data/JoinModel.cs b/IrcTokens/Tests/Data/JoinModel.cs
new file mode 100644
index 0000000..b016fc3
--- /dev/null
+++ b/IrcTokens/Tests/Data/JoinModel.cs
@@ -0,0 +1,31 @@
+using System.Collections.Generic;
+using YamlDotNet.Serialization;
+
+namespace IrcTokens.Tests.Data
+{
+    public class JoinModel
+    {
+        public List<Test> Tests { get; set; }
+
+        public class Test
+        {
+            [YamlMember(Alias = "desc")]
+            public string Description { get; set; }
+
+            public Atoms Atoms { get; set; }
+
+            public List<string> Matches { get; set; }
+        }
+
+        public class Atoms
+        {
+            public Dictionary<string, string> Tags { get; set; }
+
+            public string Source { get; set; }
+
+            public string Verb { get; set; }
+
+            public List<string> Params { get; set; }
+        }
+    }
+}
diff --git a/IrcTokens/Tests/Data/SplitModel.cs b/IrcTokens/Tests/Data/SplitModel.cs
new file mode 100644
index 0000000..65177a3
--- /dev/null
+++ b/IrcTokens/Tests/Data/SplitModel.cs
@@ -0,0 +1,15 @@
+using System.Collections.Generic;
+
+namespace IrcTokens.Tests.Data
+{
+    public class SplitModel
+    {
+        public List<Test> Tests { get; set; }
+
+        public class Test
+        {
+            public string Input { get; set; }
+            public JoinModel.Atoms Atoms { get; set; }
+        }
+    }
+}
diff --git a/IrcTokens/Tests/Data/msg-join.yaml b/IrcTokens/Tests/Data/msg-join.yaml
new file mode 100644
index 0000000..d1d7429
--- /dev/null
+++ b/IrcTokens/Tests/Data/msg-join.yaml
@@ -0,0 +1,221 @@
+# IRC parser tests
+# joining atoms into sendable messages
+
+# Written in 2015 by Daniel Oaks <daniel@danieloaks.net>
+#
+# To the extent possible under law, the author(s) have dedicated all copyright
+# and related and neighboring rights to this software to the public domain
+# worldwide. This software is distributed without any warranty.
+#
+# You should have received a copy of the CC0 Public Domain Dedication along
+# with this software. If not, see
+# <http://creativecommons.org/publicdomain/zero/1.0/>.
+
+# some of the tests here originate from grawity's test vectors, which is WTFPL v2 licensed
+#   https://github.com/grawity/code/tree/master/lib/tests
+# some of the tests here originate from Mozilla's test vectors, which is public domain
+#   https://dxr.mozilla.org/comm-central/source/chat/protocols/irc/test/test_ircMessage.js
+# some of the tests here originate from SaberUK's test vectors, which he's indicated I am free to include here
+#   https://github.com/SaberUK/ircparser/tree/master/test
+
+tests:
+  # the desc string holds a description of the test, if it exists
+
+  # the atoms dict has the keys:
+  #   * tags: tags dict
+  #       tags with no value are an empty string
+  #   * source: source string, without single leading colon
+  #   * verb: verb string
+  #   * params: params split up as a list
+  # if the params key does not exist, assume it is empty
+  # if any other keys do no exist, assume they are null
+  # a key that is null does not exist or is not specified with the
+  #   given input string
+
+  # matches is a list of messages that match
+
+  # simple tests
+  - desc: Simple test with verb and params.
+    atoms:
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "asdf"
+    matches:
+      - "foo bar baz asdf"
+      - "foo bar baz :asdf"
+
+  # with no regular params
+  - desc: Simple test with source and no params.
+    atoms:
+      source: "src"
+      verb: "AWAY"
+    matches:
+      - ":src AWAY"
+
+  - desc: Simple test with source and empty trailing param.
+    atoms:
+      source: "src"
+      verb: "AWAY"
+      params:
+        - ""
+    matches:
+      - ":src AWAY :"
+
+  # with source
+  - desc: Simple test with source.
+    atoms:
+      source: "coolguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "asdf"
+    matches:
+      - ":coolguy foo bar baz asdf"
+      - ":coolguy foo bar baz :asdf"
+
+  # with trailing param
+  - desc: Simple test with trailing param.
+    atoms:
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "asdf quux"
+    matches:
+      - "foo bar baz :asdf quux"
+
+  - desc: Simple test with empty trailing param.
+    atoms:
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - ""
+    matches:
+      - "foo bar baz :"
+
+  - desc: Simple test with trailing param containing colon.
+    atoms:
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - ":asdf"
+    matches:
+      - "foo bar baz ::asdf"
+
+  # with source and trailing param
+  - desc: Test with source and trailing param.
+    atoms:
+      source: "coolguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "asdf quux"
+    matches:
+      - ":coolguy foo bar baz :asdf quux"
+
+  - desc: Test with trailing containing beginning+end whitespace.
+    atoms:
+      source: "coolguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "  asdf quux "
+    matches:
+      - ":coolguy foo bar baz :  asdf quux "
+
+  - desc: Test with trailing containing what looks like another trailing param.
+    atoms:
+      source: "coolguy"
+      verb: "PRIVMSG"
+      params:
+        - "bar"
+        - "lol :) "
+    matches:
+      - ":coolguy PRIVMSG bar :lol :) "
+
+  - desc: Simple test with source and empty trailing.
+    atoms:
+      source: "coolguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - ""
+    matches:
+      - ":coolguy foo bar baz :"
+
+  - desc: Trailing contains only spaces.
+    atoms:
+      source: "coolguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "  "
+    matches:
+      - ":coolguy foo bar baz :  "
+
+  - desc: Param containing tab (tab is not considered SPACE for message splitting).
+    atoms:
+      source: "coolguy"
+      verb: "foo"
+      params:
+        - "b\tar"
+        - "baz"
+    matches:
+      - ":coolguy foo b\tar baz"
+      - ":coolguy foo b\tar :baz"
+
+  # with tags
+  - desc: Tag with no value and space-filled trailing.
+    atoms:
+      tags:
+        "asd": ""
+      source: "coolguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "  "
+    matches:
+      - "@asd :coolguy foo bar baz :  "
+
+  - desc: Tags with escaped values.
+    atoms:
+      verb: "foo"
+      tags:
+        "a": "b\\and\nk"
+        "d": "gh;764"
+    matches:
+      - "@a=b\\\\and\\nk;d=gh\\:764 foo"
+      - "@d=gh\\:764;a=b\\\\and\\nk foo"
+
+  - desc: Tags with escaped values and params.
+    atoms:
+      verb: "foo"
+      tags:
+        "a": "b\\and\nk"
+        "d": "gh;764"
+      params:
+        - "par1"
+        - "par2"
+    matches:
+      - "@a=b\\\\and\\nk;d=gh\\:764 foo par1 par2"
+      - "@a=b\\\\and\\nk;d=gh\\:764 foo par1 :par2"
+      - "@d=gh\\:764;a=b\\\\and\\nk foo par1 par2"
+      - "@d=gh\\:764;a=b\\\\and\\nk foo par1 :par2"
+
+  - desc: Tag with long, strange values (including LF and newline).
+    atoms:
+      tags:
+        foo: "\\\\;\\s \r\n"
+      verb: "COMMAND"
+    matches:
+      - "@foo=\\\\\\\\\\:\\\\s\\s\\r\\n COMMAND"
diff --git a/IrcTokens/Tests/Data/msg-split.yaml b/IrcTokens/Tests/Data/msg-split.yaml
new file mode 100644
index 0000000..fa3f4aa
--- /dev/null
+++ b/IrcTokens/Tests/Data/msg-split.yaml
@@ -0,0 +1,343 @@
+# IRC parser tests
+# splitting messages into usable atoms
+
+# Written in 2015 by Daniel Oaks <daniel@danieloaks.net>
+#
+# To the extent possible under law, the author(s) have dedicated all copyright
+# and related and neighboring rights to this software to the public domain
+# worldwide. This software is distributed without any warranty.
+#
+# You should have received a copy of the CC0 Public Domain Dedication along
+# with this software. If not, see
+# <http://creativecommons.org/publicdomain/zero/1.0/>.
+
+# some of the tests here originate from grawity's test vectors, which is WTFPL v2 licensed
+#   https://github.com/grawity/code/tree/master/lib/tests
+# some of the tests here originate from Mozilla's test vectors, which is public domain
+#   https://dxr.mozilla.org/comm-central/source/chat/protocols/irc/test/test_ircMessage.js
+# some of the tests here originate from SaberUK's test vectors, which he's indicated I am free to include here
+#   https://github.com/SaberUK/ircparser/tree/master/test
+
+# we follow RFC1459 with regards to multiple ascii spaces splitting atoms:
+#   The prefix, command, and all parameters are
+#   separated by one (or more) ASCII space character(s) (0x20).
+# because doing it as RFC2812 says (strictly as a single ascii space) isn't sane
+
+tests:
+  # input is the string coming directly from the server to parse
+
+  # the atoms dict has the keys:
+  #   * tags: tags dict
+  #       tags with no value are an empty string
+  #   * source: source string, without single leading colon
+  #   * verb: verb string
+  #   * params: params split up as a list
+  # if the params key does not exist, assume it is empty
+  # if any other keys do no exist, assume they are null
+  # a key that is null does not exist or is not specified with the
+  #   given input string
+
+  # simple
+  - input: "foo bar baz asdf"
+    atoms:
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "asdf"
+
+  # with source
+  - input: ":coolguy foo bar baz asdf"
+    atoms:
+      source: "coolguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "asdf"
+
+  # with trailing param
+  - input: "foo bar baz :asdf quux"
+    atoms:
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "asdf quux"
+
+  - input: "foo bar baz :"
+    atoms:
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - ""
+
+  - input: "foo bar baz ::asdf"
+    atoms:
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - ":asdf"
+
+  # with source and trailing param
+  - input: ":coolguy foo bar baz :asdf quux"
+    atoms:
+      source: "coolguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "asdf quux"
+
+  - input: ":coolguy foo bar baz :  asdf quux "
+    atoms:
+      source: "coolguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "  asdf quux "
+
+  - input: ":coolguy PRIVMSG bar :lol :) "
+    atoms:
+      source: "coolguy"
+      verb: "PRIVMSG"
+      params:
+        - "bar"
+        - "lol :) "
+
+  - input: ":coolguy foo bar baz :"
+    atoms:
+      source: "coolguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - ""
+
+  - input: ":coolguy foo bar baz :  "
+    atoms:
+      source: "coolguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "  "
+
+  # with tags
+  - input: "@a=b;c=32;k;rt=ql7 foo"
+    atoms:
+      verb: "foo"
+      tags:
+        "a": "b"
+        "c": "32"
+        "k": ""
+        "rt": "ql7"
+
+  # with escaped tags
+  - input: "@a=b\\\\and\\nk;c=72\\s45;d=gh\\:764 foo"
+    atoms:
+      verb: "foo"
+      tags:
+        "a": "b\\and\nk"
+        "c": "72 45"
+        "d": "gh;764"
+
+  # with tags and source
+  - input: "@c;h=;a=b :quux ab cd"
+    atoms:
+      tags:
+        "c": ""
+        "h": ""
+        "a": "b"
+      source: "quux"
+      verb: "ab"
+      params:
+        - "cd"
+
+  # different forms of last param
+  - input: ":src JOIN #chan"
+    atoms:
+      source: "src"
+      verb: "JOIN"
+      params:
+        - "#chan"
+
+  - input: ":src JOIN :#chan"
+    atoms:
+      source: "src"
+      verb: "JOIN"
+      params:
+        - "#chan"
+
+  # with and without last param
+  - input: ":src AWAY"
+    atoms:
+      source: "src"
+      verb: "AWAY"
+
+  - input: ":src AWAY "
+    atoms:
+      source: "src"
+      verb: "AWAY"
+
+  # tab is not considered <SPACE>
+  - input: ":cool\tguy foo bar baz"
+    atoms:
+      source: "cool\tguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+
+  # with weird control codes in the source
+  - input: ":coolguy!ag@net\x035w\x03ork.admin PRIVMSG foo :bar baz"
+    atoms:
+      source: "coolguy!ag@net\x035w\x03ork.admin"
+      verb: "PRIVMSG"
+      params:
+        - "foo"
+        - "bar baz"
+
+  - input: ":coolguy!~ag@n\x02et\x0305w\x0fork.admin PRIVMSG foo :bar baz"
+    atoms:
+      source: "coolguy!~ag@n\x02et\x0305w\x0fork.admin"
+      verb: "PRIVMSG"
+      params:
+        - "foo"
+        - "bar baz"
+
+  - input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4= :irc.example.com COMMAND param1 param2 :param3 param3"
+    atoms:
+      tags:
+        tag1: "value1"
+        tag2: ""
+        vendor1/tag3: "value2"
+        vendor2/tag4: ""
+      source: "irc.example.com"
+      verb: "COMMAND"
+      params:
+        - "param1"
+        - "param2"
+        - "param3 param3"
+
+  - input: ":irc.example.com COMMAND param1 param2 :param3 param3"
+    atoms:
+      source: "irc.example.com"
+      verb: "COMMAND"
+      params:
+        - "param1"
+        - "param2"
+        - "param3 param3"
+
+  - input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4 COMMAND param1 param2 :param3 param3"
+    atoms:
+      tags:
+        tag1: "value1"
+        tag2: ""
+        vendor1/tag3: "value2"
+        vendor2/tag4: ""
+      verb: "COMMAND"
+      params:
+        - "param1"
+        - "param2"
+        - "param3 param3"
+
+  - input: "COMMAND"
+    atoms:
+      verb: "COMMAND"
+
+  # yaml encoding + slashes is fun
+  - input: "@foo=\\\\\\\\\\:\\\\s\\s\\r\\n COMMAND"
+    atoms:
+      tags:
+        foo: "\\\\;\\s \r\n"
+      verb: "COMMAND"
+
+  # broken messages from unreal
+  - input: ":gravel.mozilla.org 432  #momo :Erroneous Nickname: Illegal characters"
+    atoms:
+      source: "gravel.mozilla.org"
+      verb: "432"
+      params:
+        - "#momo"
+        - "Erroneous Nickname: Illegal characters"
+
+  - input: ":gravel.mozilla.org MODE #tckk +n "
+    atoms:
+      source: "gravel.mozilla.org"
+      verb: "MODE"
+      params:
+        - "#tckk"
+        - "+n"
+
+  - input: ":services.esper.net MODE #foo-bar +o foobar  "
+    atoms:
+      source: "services.esper.net"
+      verb: "MODE"
+      params:
+        - "#foo-bar"
+        - "+o"
+        - "foobar"
+
+  # tag values should be parsed char-at-a-time to prevent wayward replacements.
+  - input: "@tag1=value\\\\ntest COMMAND"
+    atoms:
+      tags:
+        tag1: "value\\ntest"
+      verb: "COMMAND"
+
+  # If a tag value has a slash followed by a character which doesn't need
+  # to be escaped, the slash should be dropped.
+  - input: "@tag1=value\\1 COMMAND"
+    atoms:
+      tags:
+        tag1: "value1"
+      verb: "COMMAND"
+
+  # A slash at the end of a tag value should be dropped
+  - input: "@tag1=value1\\ COMMAND"
+    atoms:
+      tags:
+        tag1: "value1"
+      verb: "COMMAND"
+
+  # Duplicate tags: Parsers SHOULD disregard all but the final occurence 
+  - input: "@tag1=1;tag2=3;tag3=4;tag1=5 COMMAND"
+    atoms:
+      tags:
+        tag1: "5"
+        tag2: "3"
+        tag3: "4"
+      verb: "COMMAND"
+
+  # vendored tags can have the same name as a non-vendored tag
+  - input: "@tag1=1;tag2=3;tag3=4;tag1=5;vendor/tag2=8 COMMAND"
+    atoms:
+      tags:
+        tag1: "5"
+        tag2: "3"
+        tag3: "4"
+        vendor/tag2: "8"
+      verb: "COMMAND"
+
+  # Some parsers handle /MODE in a special way, make sure they do it right
+  - input: ":SomeOp MODE #channel :+i"
+    atoms:
+      source: "SomeOp"
+      verb: "MODE"
+      params:
+      - "#channel"
+      - "+i"
+
+  - input: ":SomeOp MODE #channel +oo SomeUser :AnotherUser"
+    atoms:
+      source: "SomeOp"
+      verb: "MODE"
+      params:
+      - "#channel"
+      - "+oo"
+      - "SomeUser"
+      - "AnotherUser"
diff --git a/IrcTokens/Tests/FormatTests.cs b/IrcTokens/Tests/FormatTests.cs
new file mode 100644
index 0000000..a804c1d
--- /dev/null
+++ b/IrcTokens/Tests/FormatTests.cs
@@ -0,0 +1,150 @@
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+using System;
+using System.Collections.Generic;
+
+namespace IrcTokens.Tests
+{
+    [TestClass]
+    public class FormatTests
+    {
+        [TestMethod]
+        public void TestTags()
+        {
+            var line = new Line
+            {
+                Command = "PRIVMSG", 
+                Params = new List<string> {"#channel", "hello"},
+                Tags = new Dictionary<string, string> {{"id", "\\" + " " + ";" + "\r\n"}}
+            }.Format();
+
+            Assert.AreEqual("@id=\\\\\\s\\:\\r\\n PRIVMSG #channel hello", line);
+        }
+
+        [TestMethod]
+        public void TestMissingTag()
+        {
+            var line = new Line
+            {
+                Command = "PRIVMSG", 
+                Params = new List<string> {"#channel", "hello"}
+            }.Format();
+
+            Assert.AreEqual("PRIVMSG #channel hello", line);
+        }
+
+        [TestMethod]
+        public void TestNullTag()
+        {
+            var line = new Line
+            {
+                Command = "PRIVMSG",
+                Params = new List<string> {"#channel", "hello"},
+                Tags = new Dictionary<string, string> {{"a", null}}
+            }.Format();
+
+            Assert.AreEqual("@a PRIVMSG #channel hello", line);
+        }
+
+        [TestMethod]
+        public void TestEmptyTag()
+        {
+            var line = new Line
+            {
+                Command = "PRIVMSG",
+                Params = new List<string> {"#channel", "hello"},
+                Tags = new Dictionary<string, string> {{"a", ""}}
+            }.Format();
+
+            Assert.AreEqual("@a PRIVMSG #channel hello", line);
+        }
+
+        [TestMethod]
+        public void TestSource()
+        {
+            var line = new Line
+            {
+                Command = "PRIVMSG",
+                Params = new List<string> {"#channel", "hello"},
+                Source = "nick!user@host"
+            }.Format();
+
+            Assert.AreEqual(":nick!user@host PRIVMSG #channel hello", line);
+        }
+
+        [TestMethod]
+        public void TestCommandLowercase()
+        {
+            var line = new Line {Command = "privmsg"}.Format();
+            Assert.AreEqual("privmsg", line);
+        }
+
+        [TestMethod]
+        public void TestCommandUppercase()
+        {
+            var line = new Line {Command = "PRIVMSG"}.Format();
+            Assert.AreEqual("PRIVMSG", line);
+        }
+
+        [TestMethod]
+        public void TestTrailingSpace()
+        {
+            var line = new Line
+            {
+                Command = "PRIVMSG",
+                Params = new List<string> {"#channel", "hello world"}
+            }.Format();
+
+            Assert.AreEqual("PRIVMSG #channel :hello world", line);
+        }
+
+        [TestMethod]
+        public void TestTrailingNoSpace()
+        {
+            var line = new Line
+            {
+                Command = "PRIVMSG",
+                Params = new List<string> {"#channel", "helloworld"}
+            }.Format();
+
+            Assert.AreEqual("PRIVMSG #channel helloworld", line);
+        }
+
+        [TestMethod]
+        public void TestTrailingDoubleColon()
+        {
+            var line = new Line
+            {
+                Command = "PRIVMSG",
+                Params = new List<string> {"#channel", ":helloworld"}
+            }.Format();
+
+            Assert.AreEqual("PRIVMSG #channel ::helloworld", line);
+        }
+
+        [TestMethod]
+        public void TestInvalidNonLastSpace()
+        {
+            Assert.ThrowsException<ArgumentException>(() =>
+            {
+                new Line
+                {
+                    Command = "USER",
+                    Params = new List<string> {"user", "0 *", "real name"}
+                }.Format();
+            });
+        }
+
+        [TestMethod]
+        public void TestInvalidNonLastColon()
+        {
+            Assert.ThrowsException<ArgumentException>(() =>
+            {
+                new Line
+                {
+                    Command = "PRIVMSG",
+                    Params = new List<string> {":#channel", "hello"}
+                }.Format();
+            });
+        }
+    }
+}
diff --git a/IrcTokens/Tests/HostmaskTests.cs b/IrcTokens/Tests/HostmaskTests.cs
new file mode 100644
index 0000000..78b8a54
--- /dev/null
+++ b/IrcTokens/Tests/HostmaskTests.cs
@@ -0,0 +1,64 @@
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+
+namespace IrcTokens.Tests
+{
+    [TestClass]
+    public class HostmaskTests
+    {
+        [TestMethod]
+        public void TestHostmask()
+        {
+            var hostmask = new Hostmask("nick!user@host");
+            Assert.AreEqual("nick", hostmask.NickName);
+            Assert.AreEqual("user", hostmask.UserName);
+            Assert.AreEqual("host", hostmask.HostName);
+        }
+
+        [TestMethod]
+        public void TestNoHostName()
+        {
+            var hostmask = new Hostmask("nick!user");
+            Assert.AreEqual("nick", hostmask.NickName);
+            Assert.AreEqual("user", hostmask.UserName);
+            Assert.IsNull(hostmask.HostName);
+        }
+
+        [TestMethod]
+        public void TestNoUserName()
+        {
+            var hostmask = new Hostmask("nick@host");
+            Assert.AreEqual("nick", hostmask.NickName);
+            Assert.IsNull(hostmask.UserName);
+            Assert.AreEqual("host", hostmask.HostName);
+        }
+
+        [TestMethod]
+        public void TestOnlyNickName()
+        {
+            var hostmask = new Hostmask("nick");
+            Assert.AreEqual("nick", hostmask.NickName);
+            Assert.IsNull(hostmask.UserName);
+            Assert.IsNull(hostmask.HostName);
+        }
+
+        [TestMethod]
+        public void TestHostmaskFromLine()
+        {
+            var line = new Line(":nick!user@host PRIVMSG #channel hello");
+            var hostmask = new Hostmask("nick!user@host");
+            Assert.AreEqual(hostmask.ToString(), line.Hostmask.ToString());
+            Assert.AreEqual("nick", line.Hostmask.NickName);
+            Assert.AreEqual("user", line.Hostmask.UserName);
+            Assert.AreEqual("host", line.Hostmask.HostName);
+        }
+
+        [TestMethod]
+        public void TestEmptyHostmaskFromLine()
+        {
+            var line = new Line("PRIVMSG #channel hello");
+            Assert.IsNull(line.Hostmask.HostName);
+            Assert.IsNull(line.Hostmask.UserName);
+            Assert.IsNull(line.Hostmask.NickName);
+        }
+    }
+}
diff --git a/IrcTokens/Tests/ParserTests.cs b/IrcTokens/Tests/ParserTests.cs
new file mode 100644
index 0000000..502b6d6
--- /dev/null
+++ b/IrcTokens/Tests/ParserTests.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+using System.IO;
+using IrcTokens.Tests.Data;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+using YamlDotNet.Serialization;
+using YamlDotNet.Serialization.NamingConventions;
+
+namespace IrcTokens.Tests
+{
+    [TestClass]
+    public class ParserTests
+    {
+        private static T LoadYaml<T>(string path)
+        {
+            var deserializer = new DeserializerBuilder()
+                .WithNamingConvention(CamelCaseNamingConvention.Instance)
+                .Build();
+
+            return deserializer.Deserialize<T>(File.ReadAllText(path));
+        }
+
+        [TestMethod]
+        public void TestSplit()
+        {
+            foreach (var test in LoadYaml<SplitModel>("Tests/Data/msg-split.yaml").Tests)
+            {
+                var tokens = new Line(test.Input);
+                var atoms = test.Atoms;
+
+                Assert.AreEqual(atoms.Verb.ToUpper(), tokens.Command, $"command failed on: '{test.Input}'");
+                Assert.AreEqual(atoms.Source, tokens.Source, $"source failed on: '{test.Input}'");
+                CollectionAssert.AreEqual(atoms.Tags, tokens.Tags, $"tags failed on: '{test.Input}'");
+                CollectionAssert.AreEqual(atoms.Params ?? new List<string>(), tokens.Params, $"params failed on: '{test.Input}'");
+            }
+        }
+
+        [TestMethod]
+        public void TestJoin()
+        {
+            foreach (var test in LoadYaml<JoinModel>("Tests/Data/msg-join.yaml").Tests)
+            {
+                var atoms = test.Atoms;
+                var line = new Line
+                {
+                    Command = atoms.Verb,
+                    Params = atoms.Params,
+                    Source = atoms.Source ?? null,
+                    Tags = atoms.Tags
+                }.Format();
+
+                Assert.IsTrue(test.Matches.Contains(line), test.Description);
+            }
+        }
+    }
+}
diff --git a/IrcTokens/Tests/TokenizationTests.cs b/IrcTokens/Tests/TokenizationTests.cs
new file mode 100644
index 0000000..6d8a69d
--- /dev/null
+++ b/IrcTokens/Tests/TokenizationTests.cs
@@ -0,0 +1,118 @@
+using System.Collections.Generic;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+
+namespace IrcTokens.Tests
+{
+    [TestClass]
+    public class TokenizationTests
+    {
+        [TestMethod]
+        public void TestTagsMissing()
+        {
+            var line = new Line("PRIVMSG #channel");
+            Assert.IsNull(line.Tags);
+        }
+
+        [TestMethod]
+        public void TestTagsMissingValue()
+        {
+            var line = new Line("@id= PRIVMSG #channel");
+            Assert.AreEqual(string.Empty, line.Tags["id"]);
+        }
+
+        [TestMethod]
+        public void TestTagsMissingEqual()
+        {
+            var line = new Line("@id PRIVMSG #channel");
+            Assert.AreEqual(string.Empty, line.Tags["id"]);
+        }
+
+        [TestMethod]
+        public void TestTagsUnescape()
+        {
+            var line = new Line(@"@id=1\\\:\r\n\s2 PRIVMSG #channel");
+            Assert.AreEqual("1\\;\r\n 2", line.Tags["id"]);
+        }
+
+        [TestMethod]
+        public void TestTagsOverlap()
+        {
+            var line = new Line(@"@id=1\\\s\\s PRIVMSG #channel");
+            Assert.AreEqual("1\\ \\s", line.Tags["id"]);
+        }
+
+        [TestMethod]
+        public void TestTagsLoneEndSlash()
+        {
+            var line = new Line("@id=1\\ PRIVMSG #channel");
+            Assert.AreEqual("1", line.Tags["id"]);
+        }
+
+        [TestMethod]
+        public void TestSourceWithoutTags()
+        {
+            var line = new Line(":nick!user@host PRIVMSG #channel");
+            Assert.AreEqual("nick!user@host", line.Source);
+        }
+
+        [TestMethod]
+        public void TestSourceWithTags()
+        {
+            var line = new Line("@id=123 :nick!user@host PRIVMSG #channel");
+            Assert.AreEqual("nick!user@host", line.Source);
+        }
+
+        [TestMethod]
+        public void TestSourceMissingWithoutTags()
+        {
+            var line = new Line("PRIVMSG #channel");
+            Assert.IsNull(line.Source);
+        }
+
+        [TestMethod]
+        public void TestSourceMissingWithTags()
+        {
+            var line = new Line("@id=123 PRIVMSG #channel");
+            Assert.IsNull(line.Source);
+        }
+
+        [TestMethod]
+        public void TestCommand()
+        {
+            var line = new Line("privmsg #channel");
+            Assert.AreEqual("PRIVMSG", line.Command);
+        }
+
+        [TestMethod]
+        public void TestParamsTrailing()
+        {
+            var line = new Line("PRIVMSG #channel :hello world");
+            CollectionAssert.AreEqual(new List<string> {"#channel", "hello world"}, line.Params);
+        }
+
+        [TestMethod]
+        public void TestParamsOnlyTrailing()
+        {
+            var line = new Line("PRIVMSG :hello world");
+            CollectionAssert.AreEqual(new List<string> {"hello world"}, line.Params);
+        }
+
+        [TestMethod]
+        public void TestParamsMissing()
+        {
+            var line = new Line("PRIVMSG");
+            Assert.AreEqual("PRIVMSG", line.Command);
+            CollectionAssert.AreEqual(new List<string>(), line.Params);
+        }
+
+        [TestMethod]
+        public void TestAllTokens()
+        {
+            var line = new Line("@id=123 :nick!user@host PRIVMSG #channel :hello world");
+            CollectionAssert.AreEqual(new Dictionary<string, string> {{"id", "123"}}, line.Tags);
+            Assert.AreEqual("nick!user@host", line.Source);
+            Assert.AreEqual("PRIVMSG", line.Command);
+            CollectionAssert.AreEqual(new List<string> {"#channel", "hello world"}, line.Params);
+        }
+    }
+}