From 21f1e95fb8e935134a969bc3d729964d8d2aadfa Mon Sep 17 00:00:00 2001 From: Ben Harris Date: Thu, 14 May 2020 23:06:10 -0400 Subject: rename Irc to IRC --- IRCTokens/Tests/Data/msg-split.yaml | 343 ++++++++++++++++++++++++++++++++++++ 1 file changed, 343 insertions(+) create mode 100644 IRCTokens/Tests/Data/msg-split.yaml (limited to 'IRCTokens/Tests/Data/msg-split.yaml') diff --git a/IRCTokens/Tests/Data/msg-split.yaml b/IRCTokens/Tests/Data/msg-split.yaml new file mode 100644 index 0000000..fa3f4aa --- /dev/null +++ b/IRCTokens/Tests/Data/msg-split.yaml @@ -0,0 +1,343 @@ +# IRC parser tests +# splitting messages into usable atoms + +# Written in 2015 by Daniel Oaks +# +# To the extent possible under law, the author(s) have dedicated all copyright +# and related and neighboring rights to this software to the public domain +# worldwide. This software is distributed without any warranty. +# +# You should have received a copy of the CC0 Public Domain Dedication along +# with this software. If not, see +# . + +# some of the tests here originate from grawity's test vectors, which is WTFPL v2 licensed +# https://github.com/grawity/code/tree/master/lib/tests +# some of the tests here originate from Mozilla's test vectors, which is public domain +# https://dxr.mozilla.org/comm-central/source/chat/protocols/irc/test/test_ircMessage.js +# some of the tests here originate from SaberUK's test vectors, which he's indicated I am free to include here +# https://github.com/SaberUK/ircparser/tree/master/test + +# we follow RFC1459 with regards to multiple ascii spaces splitting atoms: +# The prefix, command, and all parameters are +# separated by one (or more) ASCII space character(s) (0x20). +# because doing it as RFC2812 says (strictly as a single ascii space) isn't sane + +tests: + # input is the string coming directly from the server to parse + + # the atoms dict has the keys: + # * tags: tags dict + # tags with no value are an empty string + # * source: source string, without single leading colon + # * verb: verb string + # * params: params split up as a list + # if the params key does not exist, assume it is empty + # if any other keys do no exist, assume they are null + # a key that is null does not exist or is not specified with the + # given input string + + # simple + - input: "foo bar baz asdf" + atoms: + verb: "foo" + params: + - "bar" + - "baz" + - "asdf" + + # with source + - input: ":coolguy foo bar baz asdf" + atoms: + source: "coolguy" + verb: "foo" + params: + - "bar" + - "baz" + - "asdf" + + # with trailing param + - input: "foo bar baz :asdf quux" + atoms: + verb: "foo" + params: + - "bar" + - "baz" + - "asdf quux" + + - input: "foo bar baz :" + atoms: + verb: "foo" + params: + - "bar" + - "baz" + - "" + + - input: "foo bar baz ::asdf" + atoms: + verb: "foo" + params: + - "bar" + - "baz" + - ":asdf" + + # with source and trailing param + - input: ":coolguy foo bar baz :asdf quux" + atoms: + source: "coolguy" + verb: "foo" + params: + - "bar" + - "baz" + - "asdf quux" + + - input: ":coolguy foo bar baz : asdf quux " + atoms: + source: "coolguy" + verb: "foo" + params: + - "bar" + - "baz" + - " asdf quux " + + - input: ":coolguy PRIVMSG bar :lol :) " + atoms: + source: "coolguy" + verb: "PRIVMSG" + params: + - "bar" + - "lol :) " + + - input: ":coolguy foo bar baz :" + atoms: + source: "coolguy" + verb: "foo" + params: + - "bar" + - "baz" + - "" + + - input: ":coolguy foo bar baz : " + atoms: + source: "coolguy" + verb: "foo" + params: + - "bar" + - "baz" + - " " + + # with tags + - input: "@a=b;c=32;k;rt=ql7 foo" + atoms: + verb: "foo" + tags: + "a": "b" + "c": "32" + "k": "" + "rt": "ql7" + + # with escaped tags + - input: "@a=b\\\\and\\nk;c=72\\s45;d=gh\\:764 foo" + atoms: + verb: "foo" + tags: + "a": "b\\and\nk" + "c": "72 45" + "d": "gh;764" + + # with tags and source + - input: "@c;h=;a=b :quux ab cd" + atoms: + tags: + "c": "" + "h": "" + "a": "b" + source: "quux" + verb: "ab" + params: + - "cd" + + # different forms of last param + - input: ":src JOIN #chan" + atoms: + source: "src" + verb: "JOIN" + params: + - "#chan" + + - input: ":src JOIN :#chan" + atoms: + source: "src" + verb: "JOIN" + params: + - "#chan" + + # with and without last param + - input: ":src AWAY" + atoms: + source: "src" + verb: "AWAY" + + - input: ":src AWAY " + atoms: + source: "src" + verb: "AWAY" + + # tab is not considered + - input: ":cool\tguy foo bar baz" + atoms: + source: "cool\tguy" + verb: "foo" + params: + - "bar" + - "baz" + + # with weird control codes in the source + - input: ":coolguy!ag@net\x035w\x03ork.admin PRIVMSG foo :bar baz" + atoms: + source: "coolguy!ag@net\x035w\x03ork.admin" + verb: "PRIVMSG" + params: + - "foo" + - "bar baz" + + - input: ":coolguy!~ag@n\x02et\x0305w\x0fork.admin PRIVMSG foo :bar baz" + atoms: + source: "coolguy!~ag@n\x02et\x0305w\x0fork.admin" + verb: "PRIVMSG" + params: + - "foo" + - "bar baz" + + - input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4= :irc.example.com COMMAND param1 param2 :param3 param3" + atoms: + tags: + tag1: "value1" + tag2: "" + vendor1/tag3: "value2" + vendor2/tag4: "" + source: "irc.example.com" + verb: "COMMAND" + params: + - "param1" + - "param2" + - "param3 param3" + + - input: ":irc.example.com COMMAND param1 param2 :param3 param3" + atoms: + source: "irc.example.com" + verb: "COMMAND" + params: + - "param1" + - "param2" + - "param3 param3" + + - input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4 COMMAND param1 param2 :param3 param3" + atoms: + tags: + tag1: "value1" + tag2: "" + vendor1/tag3: "value2" + vendor2/tag4: "" + verb: "COMMAND" + params: + - "param1" + - "param2" + - "param3 param3" + + - input: "COMMAND" + atoms: + verb: "COMMAND" + + # yaml encoding + slashes is fun + - input: "@foo=\\\\\\\\\\:\\\\s\\s\\r\\n COMMAND" + atoms: + tags: + foo: "\\\\;\\s \r\n" + verb: "COMMAND" + + # broken messages from unreal + - input: ":gravel.mozilla.org 432 #momo :Erroneous Nickname: Illegal characters" + atoms: + source: "gravel.mozilla.org" + verb: "432" + params: + - "#momo" + - "Erroneous Nickname: Illegal characters" + + - input: ":gravel.mozilla.org MODE #tckk +n " + atoms: + source: "gravel.mozilla.org" + verb: "MODE" + params: + - "#tckk" + - "+n" + + - input: ":services.esper.net MODE #foo-bar +o foobar " + atoms: + source: "services.esper.net" + verb: "MODE" + params: + - "#foo-bar" + - "+o" + - "foobar" + + # tag values should be parsed char-at-a-time to prevent wayward replacements. + - input: "@tag1=value\\\\ntest COMMAND" + atoms: + tags: + tag1: "value\\ntest" + verb: "COMMAND" + + # If a tag value has a slash followed by a character which doesn't need + # to be escaped, the slash should be dropped. + - input: "@tag1=value\\1 COMMAND" + atoms: + tags: + tag1: "value1" + verb: "COMMAND" + + # A slash at the end of a tag value should be dropped + - input: "@tag1=value1\\ COMMAND" + atoms: + tags: + tag1: "value1" + verb: "COMMAND" + + # Duplicate tags: Parsers SHOULD disregard all but the final occurence + - input: "@tag1=1;tag2=3;tag3=4;tag1=5 COMMAND" + atoms: + tags: + tag1: "5" + tag2: "3" + tag3: "4" + verb: "COMMAND" + + # vendored tags can have the same name as a non-vendored tag + - input: "@tag1=1;tag2=3;tag3=4;tag1=5;vendor/tag2=8 COMMAND" + atoms: + tags: + tag1: "5" + tag2: "3" + tag3: "4" + vendor/tag2: "8" + verb: "COMMAND" + + # Some parsers handle /MODE in a special way, make sure they do it right + - input: ":SomeOp MODE #channel :+i" + atoms: + source: "SomeOp" + verb: "MODE" + params: + - "#channel" + - "+i" + + - input: ":SomeOp MODE #channel +oo SomeUser :AnotherUser" + atoms: + source: "SomeOp" + verb: "MODE" + params: + - "#channel" + - "+oo" + - "SomeUser" + - "AnotherUser" -- cgit 1.4.1