Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 78 additions & 2 deletions lib/net/imap/command_data.rb
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,85 @@ def validate
end
end

# Represents IMAP +text+ data, which may contain any 7-bit ASCII character,
# except for +NULL+, +CR+, or +LF+. +text+ is extended to allow any
# multibyte +UTF-8+ character when either +UTF8=ACCEPT+ or +IMAP4rev2+ have
# been enabled, or when the server supports only +IMAP4rev2+ and not earlier
# IMAP revisions, or when the server advertises +UTF8=ONLY+.
#
# NOTE: The current implementation does not validate whether the connection
# currently supports UTF-8. Future versions may change.
#
# The string's bytes must be valid ASCII or valid UTF-8. The string's
# reported encoding is ignored, but the string is _not_ transcoded.
class RawText < CommandData # :nodoc:
def initialize(data:)
data = String(data.to_str)
data = if data.encoding in Encoding::ASCII | Encoding::UTF_8
-data
elsif data.ascii_only?
-(data.dup.force_encoding("ASCII"))
else
-(data.dup.force_encoding("UTF-8"))
end
super
validate
end

def validate
if data.include?("\0")
raise DataFormatError, "NULL byte must be binary literal encoded"
elsif !data.valid_encoding?
raise DataFormatError, "invalid UTF-8 must be literal encoded"
elsif /[\r\n]/.match?(data)
raise DataFormatError, "CR and LF bytes must be literal encoded"
end
end

def ascii_only? = data.ascii_only?

def send_data(imap, tag) = imap.__send__(:put_string, data)
end

class RawData < CommandData # :nodoc:
def send_data(imap, tag)
imap.__send__(:put_string, data)
def initialize(data:)
data = split_parts(data)
super
validate
end

def send_data(imap, tag) = data.each do _1.send_data(imap, tag) end

def validate
return unless data.last in RawText(data: text)
if text.rindex(/~?\{[1-9]\d*\+?\}\z/n)
raise DataFormatError, "RawData cannot end with literal continuation"
end
end

private

def split_parts(data)
data = data.b # dups and ensures BINARY encoding
parts = []
while data.match(/(~)?\{(0|[1-9]\d*)(\+)?\}\r\n/n)
text, binary, bytesize, non_sync, data = $`, !!$1, $2, !!$3, $'
bytesize = NumValidator.coerce_number64 bytesize
parts << RawText[text] unless text.empty?
parts << extract_literal(data, binary:, bytesize:, non_sync:)
data.bytesplice(0, bytesize, "")
end
parts << RawText[data] unless data.empty?
parts
end

def extract_literal(data, binary:, bytesize:, non_sync:)
if data.bytesize < bytesize
raise DataFormatError, "Too few bytes in string for literal, " \
"expected: %s, remaining: %s" % [bytesize, data.bytesize]
end
literal = data.byteslice(0, bytesize)
(binary ? Literal8 : Literal).new(data: literal, non_sync:)
end
end

Expand Down
205 changes: 205 additions & 0 deletions test/net/imap/test_command_data.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ class CommandDataTest < Net::IMAP::TestCase
Flag = Net::IMAP::Flag
Literal = Net::IMAP::Literal
Literal8 = Net::IMAP::Literal8
RawText = Net::IMAP::RawText
RawData = Net::IMAP::RawData

Output = Data.define(:name, :args, :kwargs)
TAG = Module.new.freeze
Expand Down Expand Up @@ -162,4 +164,207 @@ class StringFormatterTest < Net::IMAP::TestCase
end
end

class RawTextTest < CommandDataTest
test "basic ASCII string" do
imap.send_data RawText.new('foo "bar" (baz)')
assert_equal [Output.put_string('foo "bar" (baz)')], imap.output
end

test "allows IMAP atom-special symbols" do
imap.send_data RawText.new('foo "bar" (baz)')
imap.send_data RawText.new("(){}[]%*\"\\")
imap.send_data RawText.new("(((((((((((((((( unbalanced ]]]]]]]]]]]]]")
assert_equal [
Output.put_string('foo "bar" (baz)'),
Output.put_string("(){}[]%*\"\\"),
Output.put_string("(((((((((((((((( unbalanced ]]]]]]]]]]]]]"),
], imap.output
end

test "ASCII compatible string with another encodings" do
imap.send_data RawText.new("foo bar".encode("cp1252"))
assert_equal [
Output.put_string("foo bar"),
], imap.output
end

test "allows ASCII control chars" do
text = RawText.new("beep\b beep\b escape!\e delete this:\x1f")
imap.send_data text
assert_equal [
Output.put_string("beep\b beep\b escape!\e delete this:\x1f"),
], imap.output
end

data(
"NULL" => ["with \0 NULL", /NULL\b.+\bbyte/i],
"CR" => ["with \r CR", /CR\b.+\bbyte/i],
"LF" => ["with \n LF", /LF\b.+\bbyte/i],
)
test "invalid ASCII byte" do |(text, error_message)|
try_multiple_encodings(error_message, text)
end

# See Table 3-7, Well-Formed UTF-8 Byte Sequences, in The Unicode Standard:
# https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-3/#G27506
data(
"incomplete 2 byte sequence" => "\xc3".b,
"invalid 2 byte sequence" => "\xc3\x7f".b,
"incomplete 3 byte sequence" => "\xe0\x80\x80".b,
"invalid 3 byte sequence" => "\xe0\x80\x80".b,
"incomplete 4 byte sequence" => "\xf1\x80\x80".b,
"invalid 4 byte sequence" => "\xf0\x80\x80\x80".b,
"first byte too high" => "\xff\xaa\xaa\xaa".b,
"UTF-16 surrogate pair" => "\xFE\xFF\xD8\x3D\xDC\xA3\xFE\x0F".b,
"windows-1252" => "åêïõü".encode("windows-1252"),
)
test "invalid UTF-8" do |text|
try_multiple_encodings(/invalid UTF-8/i, text)
end

def with_multiple_encodings(data)
yield data.b # BINARY
yield data.dup.force_encoding("ASCII")
yield data.dup.force_encoding("UTF-8")
yield data.dup.force_encoding("cp1252")
end

def try_multiple_encodings(error_message, data)
with_multiple_encodings(data) do |encoded|
assert_raise_with_message(DataFormatError, error_message) do
RawText[encoded]
end
end
end
end

class RawDataTest < CommandDataTest
test "simple raw text" do
raw = RawData.new('foo "bar" baz')
assert_equal [RawText['foo "bar" baz']], raw.data
imap.send_data raw
assert_equal [Output.put_string('foo "bar" baz')], imap.output
end

test "a single literal" do
raw = RawData.new("{7}\r\nfoo bar")
assert_equal [Literal["foo bar", false]], raw.data
imap.send_data raw, tag: "t1"
assert_equal [
Output.send_literal("foo bar", "t1", non_sync: false),
], imap.output
end

test "literals embedded between text" do
raw = RawData.new("foo bar {3}\r\nbaz {4+}\r\nquux etc")
assert_equal [
RawText["foo bar "],
Literal["baz", false],
RawText[" "],
Literal["quux", true], # non-synchronizing
RawText[" etc"],
], raw.data
imap.send_data raw, tag: "t2"
assert_equal [
Output.put_string("foo bar "),
Output.send_literal("baz", "t2", non_sync: false),
Output.put_string(" "),
Output.send_literal("quux", "t2", non_sync: true),
Output.put_string(" etc"),
], imap.output
end

test "empty literals" do
raw = RawData.new("{0}\r\n{0+}\r\n~{0}\r\n~{0+}\r\n")
assert_equal [
Literal["", false],
Literal["", true],
Literal8["", false],
Literal8["", true],
], raw.data
imap.send_data raw, tag: "t2.2"
assert_equal [
Output.send_literal("", "t2.2", non_sync: false),
Output.send_literal("", "t2.2", non_sync: true),
Output.send_binary_literal("", "t2.2", non_sync: false),
Output.send_binary_literal("", "t2.2", non_sync: true),
], imap.output
end

test "raw text embedded between literals" do
raw = RawData.new("{3}\r\nfoo bar")
assert_equal [
Literal["foo", false],
RawText[" bar"]
], raw.data
imap.send_data raw, tag: "t3"
assert_equal [
Output.send_literal("foo", "t3", non_sync: false),
Output.put_string(" bar"),
], imap.output
end

test "raw text followed by literal" do
raw = RawData.new("foo {3}\r\nbar")
assert_equal [
RawText["foo "],
Literal["bar", false],
], raw.data
imap.send_data raw, tag: "t4"
assert_equal [
Output.put_string("foo "),
Output.send_literal("bar", "t4", non_sync: false),
], imap.output
imap.clear
end

test "binary literal with regular literal" do
raw = RawData.new("foo ~{7}\r\n\0bar\r\nbaz {4}\r\nquux")
assert_equal [
RawText["foo "],
Literal8["\0bar\r\nb", false],
RawText["az "],
Literal["quux", false],
], raw.data
imap.send_data raw, tag: "t5"
assert_equal [
Output.put_string("foo "),
Output.send_binary_literal("\0bar\r\nb", "t5", non_sync: false),
Output.put_string("az "),
Output.send_literal("quux", "t5", non_sync: false),
], imap.output
end

data(
"CR" => "with \r byte",
"LF" => "with \n byte",
"NULL" => "with \0 byte",
"CRLF" => "with \r\n bytes",
)
test "invalid bytes in raw text" do |data|
assert_raise_with_message(DataFormatError, /must be.* literal encoded/i) do
RawData.new(data:)
end
end

test "invalid literal" do |data|
assert_raise_with_message(DataFormatError, /too few bytes/i) do
RawData.new(data: "invalid literal {123}\r\ntoo small")
end

assert_raise_with_message(DataFormatError, /NULL byte.*in.*literal/i) do
RawData.new(data: "invalid literal {10}\r\ncontains \0 null")
end
end

test "invalid literal ending ('{123}')" do
assert_raise(DataFormatError) do RawData.new(data: "literal {123}") end
assert_raise(DataFormatError) do RawData.new(data: "literal+ {123+}") end
assert_raise(DataFormatError) do RawData.new(data: "~literal ~{123}") end
assert_raise(DataFormatError) do RawData.new(data: "~literal+ ~{123+}") end
raw = RawData.new(data: " {123} ")
assert_equal [RawText[" {123} "]], raw.data
end
end

end
29 changes: 29 additions & 0 deletions test/net/imap/test_imap.rb
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,35 @@ def test_send_symbol_as_flag
end
end

def test_raw_data
with_fake_server do |server, imap|
server.on "TEST", &:done_ok

imap.__send__(:send_command, "TEST", Net::IMAP::RawData.new("foo bar"))
assert_equal "foo bar", server.commands.pop.args

imap.__send__(:send_command, "TEST",
Net::IMAP::RawData.new("{3}\r\nfoo"),
Net::IMAP::RawData.new("~{4}\r\n\0bar"))
assert_equal "{3}\r\nfoo ~{4}\r\n\0bar", server.commands.pop.args

# RawData must pass basic validation before sending command
[
"with \0 NULL",
"with \r CR",
"with \n LF",
"with \r\n CRLF",
"{1234}\r\nliteral is too small",
"{1}\r\n\0 literal contains NULL",
].each do |data|
assert_raise(Net::IMAP::DataFormatError) do
imap.__send__(:send_command, "TEST", Net::IMAP::RawData[data:])
end
assert_empty server.commands
end
end
end

test("send PartialRange args") do
with_fake_server do |server, imap|
server.on "TEST", &:done_ok
Expand Down