about summary refs log tree commit diff
path: root/IRCTokens/Tests/Data/msg-split.yaml
diff options
context:
space:
mode:
Diffstat (limited to 'IRCTokens/Tests/Data/msg-split.yaml')
-rw-r--r--IRCTokens/Tests/Data/msg-split.yaml343
1 files changed, 343 insertions, 0 deletions
diff --git a/IRCTokens/Tests/Data/msg-split.yaml b/IRCTokens/Tests/Data/msg-split.yaml
new file mode 100644
index 0000000..fa3f4aa
--- /dev/null
+++ b/IRCTokens/Tests/Data/msg-split.yaml
@@ -0,0 +1,343 @@
+# IRC parser tests
+# splitting messages into usable atoms
+
+# Written in 2015 by Daniel Oaks <daniel@danieloaks.net>
+#
+# To the extent possible under law, the author(s) have dedicated all copyright
+# and related and neighboring rights to this software to the public domain
+# worldwide. This software is distributed without any warranty.
+#
+# You should have received a copy of the CC0 Public Domain Dedication along
+# with this software. If not, see
+# <http://creativecommons.org/publicdomain/zero/1.0/>.
+
+# some of the tests here originate from grawity's test vectors, which is WTFPL v2 licensed
+#   https://github.com/grawity/code/tree/master/lib/tests
+# some of the tests here originate from Mozilla's test vectors, which is public domain
+#   https://dxr.mozilla.org/comm-central/source/chat/protocols/irc/test/test_ircMessage.js
+# some of the tests here originate from SaberUK's test vectors, which he's indicated I am free to include here
+#   https://github.com/SaberUK/ircparser/tree/master/test
+
+# we follow RFC1459 with regards to multiple ascii spaces splitting atoms:
+#   The prefix, command, and all parameters are
+#   separated by one (or more) ASCII space character(s) (0x20).
+# because doing it as RFC2812 says (strictly as a single ascii space) isn't sane
+
+tests:
+  # input is the string coming directly from the server to parse
+
+  # the atoms dict has the keys:
+  #   * tags: tags dict
+  #       tags with no value are an empty string
+  #   * source: source string, without single leading colon
+  #   * verb: verb string
+  #   * params: params split up as a list
+  # if the params key does not exist, assume it is empty
+  # if any other keys do no exist, assume they are null
+  # a key that is null does not exist or is not specified with the
+  #   given input string
+
+  # simple
+  - input: "foo bar baz asdf"
+    atoms:
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "asdf"
+
+  # with source
+  - input: ":coolguy foo bar baz asdf"
+    atoms:
+      source: "coolguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "asdf"
+
+  # with trailing param
+  - input: "foo bar baz :asdf quux"
+    atoms:
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "asdf quux"
+
+  - input: "foo bar baz :"
+    atoms:
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - ""
+
+  - input: "foo bar baz ::asdf"
+    atoms:
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - ":asdf"
+
+  # with source and trailing param
+  - input: ":coolguy foo bar baz :asdf quux"
+    atoms:
+      source: "coolguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "asdf quux"
+
+  - input: ":coolguy foo bar baz :  asdf quux "
+    atoms:
+      source: "coolguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "  asdf quux "
+
+  - input: ":coolguy PRIVMSG bar :lol :) "
+    atoms:
+      source: "coolguy"
+      verb: "PRIVMSG"
+      params:
+        - "bar"
+        - "lol :) "
+
+  - input: ":coolguy foo bar baz :"
+    atoms:
+      source: "coolguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - ""
+
+  - input: ":coolguy foo bar baz :  "
+    atoms:
+      source: "coolguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+        - "  "
+
+  # with tags
+  - input: "@a=b;c=32;k;rt=ql7 foo"
+    atoms:
+      verb: "foo"
+      tags:
+        "a": "b"
+        "c": "32"
+        "k": ""
+        "rt": "ql7"
+
+  # with escaped tags
+  - input: "@a=b\\\\and\\nk;c=72\\s45;d=gh\\:764 foo"
+    atoms:
+      verb: "foo"
+      tags:
+        "a": "b\\and\nk"
+        "c": "72 45"
+        "d": "gh;764"
+
+  # with tags and source
+  - input: "@c;h=;a=b :quux ab cd"
+    atoms:
+      tags:
+        "c": ""
+        "h": ""
+        "a": "b"
+      source: "quux"
+      verb: "ab"
+      params:
+        - "cd"
+
+  # different forms of last param
+  - input: ":src JOIN #chan"
+    atoms:
+      source: "src"
+      verb: "JOIN"
+      params:
+        - "#chan"
+
+  - input: ":src JOIN :#chan"
+    atoms:
+      source: "src"
+      verb: "JOIN"
+      params:
+        - "#chan"
+
+  # with and without last param
+  - input: ":src AWAY"
+    atoms:
+      source: "src"
+      verb: "AWAY"
+
+  - input: ":src AWAY "
+    atoms:
+      source: "src"
+      verb: "AWAY"
+
+  # tab is not considered <SPACE>
+  - input: ":cool\tguy foo bar baz"
+    atoms:
+      source: "cool\tguy"
+      verb: "foo"
+      params:
+        - "bar"
+        - "baz"
+
+  # with weird control codes in the source
+  - input: ":coolguy!ag@net\x035w\x03ork.admin PRIVMSG foo :bar baz"
+    atoms:
+      source: "coolguy!ag@net\x035w\x03ork.admin"
+      verb: "PRIVMSG"
+      params:
+        - "foo"
+        - "bar baz"
+
+  - input: ":coolguy!~ag@n\x02et\x0305w\x0fork.admin PRIVMSG foo :bar baz"
+    atoms:
+      source: "coolguy!~ag@n\x02et\x0305w\x0fork.admin"
+      verb: "PRIVMSG"
+      params:
+        - "foo"
+        - "bar baz"
+
+  - input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4= :irc.example.com COMMAND param1 param2 :param3 param3"
+    atoms:
+      tags:
+        tag1: "value1"
+        tag2: ""
+        vendor1/tag3: "value2"
+        vendor2/tag4: ""
+      source: "irc.example.com"
+      verb: "COMMAND"
+      params:
+        - "param1"
+        - "param2"
+        - "param3 param3"
+
+  - input: ":irc.example.com COMMAND param1 param2 :param3 param3"
+    atoms:
+      source: "irc.example.com"
+      verb: "COMMAND"
+      params:
+        - "param1"
+        - "param2"
+        - "param3 param3"
+
+  - input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4 COMMAND param1 param2 :param3 param3"
+    atoms:
+      tags:
+        tag1: "value1"
+        tag2: ""
+        vendor1/tag3: "value2"
+        vendor2/tag4: ""
+      verb: "COMMAND"
+      params:
+        - "param1"
+        - "param2"
+        - "param3 param3"
+
+  - input: "COMMAND"
+    atoms:
+      verb: "COMMAND"
+
+  # yaml encoding + slashes is fun
+  - input: "@foo=\\\\\\\\\\:\\\\s\\s\\r\\n COMMAND"
+    atoms:
+      tags:
+        foo: "\\\\;\\s \r\n"
+      verb: "COMMAND"
+
+  # broken messages from unreal
+  - input: ":gravel.mozilla.org 432  #momo :Erroneous Nickname: Illegal characters"
+    atoms:
+      source: "gravel.mozilla.org"
+      verb: "432"
+      params:
+        - "#momo"
+        - "Erroneous Nickname: Illegal characters"
+
+  - input: ":gravel.mozilla.org MODE #tckk +n "
+    atoms:
+      source: "gravel.mozilla.org"
+      verb: "MODE"
+      params:
+        - "#tckk"
+        - "+n"
+
+  - input: ":services.esper.net MODE #foo-bar +o foobar  "
+    atoms:
+      source: "services.esper.net"
+      verb: "MODE"
+      params:
+        - "#foo-bar"
+        - "+o"
+        - "foobar"
+
+  # tag values should be parsed char-at-a-time to prevent wayward replacements.
+  - input: "@tag1=value\\\\ntest COMMAND"
+    atoms:
+      tags:
+        tag1: "value\\ntest"
+      verb: "COMMAND"
+
+  # If a tag value has a slash followed by a character which doesn't need
+  # to be escaped, the slash should be dropped.
+  - input: "@tag1=value\\1 COMMAND"
+    atoms:
+      tags:
+        tag1: "value1"
+      verb: "COMMAND"
+
+  # A slash at the end of a tag value should be dropped
+  - input: "@tag1=value1\\ COMMAND"
+    atoms:
+      tags:
+        tag1: "value1"
+      verb: "COMMAND"
+
+  # Duplicate tags: Parsers SHOULD disregard all but the final occurence 
+  - input: "@tag1=1;tag2=3;tag3=4;tag1=5 COMMAND"
+    atoms:
+      tags:
+        tag1: "5"
+        tag2: "3"
+        tag3: "4"
+      verb: "COMMAND"
+
+  # vendored tags can have the same name as a non-vendored tag
+  - input: "@tag1=1;tag2=3;tag3=4;tag1=5;vendor/tag2=8 COMMAND"
+    atoms:
+      tags:
+        tag1: "5"
+        tag2: "3"
+        tag3: "4"
+        vendor/tag2: "8"
+      verb: "COMMAND"
+
+  # Some parsers handle /MODE in a special way, make sure they do it right
+  - input: ":SomeOp MODE #channel :+i"
+    atoms:
+      source: "SomeOp"
+      verb: "MODE"
+      params:
+      - "#channel"
+      - "+i"
+
+  - input: ":SomeOp MODE #channel +oo SomeUser :AnotherUser"
+    atoms:
+      source: "SomeOp"
+      verb: "MODE"
+      params:
+      - "#channel"
+      - "+oo"
+      - "SomeUser"
+      - "AnotherUser"