|
1 | 1 | #![unstable(issue = "0", feature = "windows_stdio")]
|
2 | 2 |
|
| 3 | +use cell::Cell; |
3 | 4 | use cmp;
|
4 |
| -use io::{self, Cursor}; |
| 5 | +use io; |
5 | 6 | use ptr;
|
6 | 7 | use str;
|
7 |
| -use sync::Mutex; |
8 | 8 | use sys::c;
|
9 | 9 | use sys::cvt;
|
10 | 10 | use sys::handle::Handle;
|
11 | 11 |
|
12 |
| -pub enum Output { |
13 |
| - Console(c::HANDLE), |
14 |
| - Pipe(c::HANDLE), |
15 |
| -} |
16 |
| - |
| 12 | +// Don't cache handles but get them fresh for every read/write. This allows us to track changes to |
| 13 | +// the value over time (such as if a process calls `SetStdHandle` while it's running). See #40490. |
17 | 14 | pub struct Stdin {
|
18 |
| - utf8: Mutex<io::Cursor<Vec<u8>>>, |
| 15 | + high_surrogate: Cell<u16>, |
19 | 16 | }
|
20 | 17 | pub struct Stdout;
|
21 | 18 | pub struct Stderr;
|
22 | 19 |
|
23 |
| -pub fn get(handle: c::DWORD) -> io::Result<Output> { |
24 |
| - let handle = unsafe { c::GetStdHandle(handle) }; |
| 20 | +// Apparently Windows doesn't handle large reads on stdin or writes to stdout/stderr well (see |
| 21 | +// #13304 for details). |
| 22 | +// |
| 23 | +// From MSDN (2011): "The storage for this buffer is allocated from a shared heap for the |
| 24 | +// process that is 64 KB in size. The maximum size of the buffer will depend on heap usage." |
| 25 | +// |
| 26 | +// We choose the cap at 8 KiB because libuv does the same, and it seems to be acceptable so far. |
| 27 | +const MAX_BUFFER_SIZE: usize = 8192; |
| 28 | + |
| 29 | +// The standard buffer size of BufReader for Stdin should be able to hold 3x more bytes than there |
| 30 | +// are `u16`'s in MAX_BUFFER_SIZE. This ensures the read data can always be completely decoded from |
| 31 | +// UTF-16 to UTF-8. |
| 32 | +pub const STDIN_BUF_SIZE: usize = MAX_BUFFER_SIZE / 2 * 3; |
| 33 | + |
| 34 | +pub fn get_handle(handle_id: c::DWORD) -> io::Result<c::HANDLE> { |
| 35 | + let handle = unsafe { c::GetStdHandle(handle_id) }; |
25 | 36 | if handle == c::INVALID_HANDLE_VALUE {
|
26 | 37 | Err(io::Error::last_os_error())
|
27 | 38 | } else if handle.is_null() {
|
28 | 39 | Err(io::Error::from_raw_os_error(c::ERROR_INVALID_HANDLE as i32))
|
29 | 40 | } else {
|
30 |
| - let mut out = 0; |
31 |
| - match unsafe { c::GetConsoleMode(handle, &mut out) } { |
32 |
| - 0 => Ok(Output::Pipe(handle)), |
33 |
| - _ => Ok(Output::Console(handle)), |
34 |
| - } |
| 41 | + Ok(handle) |
35 | 42 | }
|
36 | 43 | }
|
37 | 44 |
|
38 |
| -fn write(handle: c::DWORD, data: &[u8]) -> io::Result<usize> { |
39 |
| - let handle = match get(handle)? { |
40 |
| - Output::Console(c) => c, |
41 |
| - Output::Pipe(p) => { |
42 |
| - let handle = Handle::new(p); |
43 |
| - let ret = handle.write(data); |
44 |
| - handle.into_raw(); |
45 |
| - return ret |
46 |
| - } |
47 |
| - }; |
| 45 | +fn is_console(handle: c::HANDLE) -> bool { |
| 46 | + // `GetConsoleMode` will return false (0) if this is a pipe (we don't care about the reported |
| 47 | + // mode). This will only detect Windows Console, not other terminals connected to a pipe like |
| 48 | + // MSYS. Which is exactly what we need, as only Windows Console needs a conversion to UTF-16. |
| 49 | + let mut mode = 0; |
| 50 | + unsafe { c::GetConsoleMode(handle, &mut mode) != 0 } |
| 51 | +} |
48 | 52 |
|
49 |
| - // As with stdin on windows, stdout often can't handle writes of large |
50 |
| - // sizes. For an example, see #14940. For this reason, don't try to |
51 |
| - // write the entire output buffer on windows. |
52 |
| - // |
53 |
| - // For some other references, it appears that this problem has been |
54 |
| - // encountered by others [1] [2]. We choose the number 8K just because |
55 |
| - // libuv does the same. |
| 53 | +fn write(handle_id: c::DWORD, data: &[u8]) -> io::Result<usize> { |
| 54 | + let handle = get_handle(handle_id)?; |
| 55 | + if !is_console(handle) { |
| 56 | + let handle = Handle::new(handle); |
| 57 | + let ret = handle.write(data); |
| 58 | + handle.into_raw(); // Don't close the handle |
| 59 | + return ret; |
| 60 | + } |
| 61 | + |
| 62 | + // As the console is meant for presenting text, we assume bytes of `data` come from a string |
| 63 | + // and are encoded as UTF-8, which needs to be encoded as UTF-16. |
56 | 64 | //
|
57 |
| - // [1]: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232 |
58 |
| - // [2]: http://www.mail-archive.com/[email protected]/msg00661.html |
59 |
| - const OUT_MAX: usize = 8192; |
60 |
| - let len = cmp::min(data.len(), OUT_MAX); |
| 65 | + // If the data is not valid UTF-8 we write out as many bytes as are valid. |
| 66 | + // Only when there are no valid bytes (which will happen on the next call), return an error. |
| 67 | + let len = cmp::min(data.len(), MAX_BUFFER_SIZE); |
61 | 68 | let utf8 = match str::from_utf8(&data[..len]) {
|
62 | 69 | Ok(s) => s,
|
63 |
| - Err(ref e) if e.valid_up_to() == 0 => return Err(invalid_encoding()), |
| 70 | + Err(ref e) if e.valid_up_to() == 0 => { |
| 71 | + return Err(io::Error::new(io::ErrorKind::InvalidData, |
| 72 | + "Windows stdio in console mode does not support non-UTF-8 byte sequences; \ |
| 73 | + see https://github.com/rust-lang/rust/issues/23344")) |
| 74 | + }, |
64 | 75 | Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(),
|
65 | 76 | };
|
66 | 77 | let utf16 = utf8.encode_utf16().collect::<Vec<u16>>();
|
| 78 | + |
| 79 | + let mut written = write_u16s(handle, &utf16)?; |
| 80 | + |
| 81 | + // Figure out how many bytes of as UTF-8 were written away as UTF-16. |
| 82 | + if written >= utf16.len() { |
| 83 | + Ok(utf8.len()) |
| 84 | + } else { |
| 85 | + // Make sure we didn't end up writing only half of a surrogate pair (even though the chance |
| 86 | + // is tiny). Because it is not possible for user code to re-slice `data` in such a way that |
| 87 | + // a missing surrogate can be produced (and also because of the UTF-8 validation above), |
| 88 | + // write the missing surrogate out now. |
| 89 | + // Buffering it would mean we have to lie about the number of bytes written. |
| 90 | + let first_char_remaining = utf16[written]; |
| 91 | + if first_char_remaining >= 0xDCEE && first_char_remaining <= 0xDFFF { // low surrogate |
| 92 | + // We just hope this works, and give up otherwise |
| 93 | + let _ = write_u16s(handle, &utf16[written..written]); |
| 94 | + written += 1; |
| 95 | + } |
| 96 | + // Calculate the number of bytes of `utf8` that were actually written. |
| 97 | + let mut count = 0; |
| 98 | + for ch in utf16[..written].iter() { |
| 99 | + count += match ch { |
| 100 | + 0x0000 ..= 0x007F => 1, |
| 101 | + 0x0080 ..= 0x07FF => 2, |
| 102 | + 0xDCEE ..= 0xDFFF => 1, // Low surrogate. We already counted 3 bytes for the other. |
| 103 | + _ => 3, |
| 104 | + }; |
| 105 | + } |
| 106 | + Ok(count) |
| 107 | + } |
| 108 | +} |
| 109 | + |
| 110 | +fn write_u16s(handle: c::HANDLE, data: &[u16]) -> io::Result<usize> { |
67 | 111 | let mut written = 0;
|
68 | 112 | cvt(unsafe {
|
69 | 113 | c::WriteConsoleW(handle,
|
70 |
| - utf16.as_ptr() as c::LPCVOID, |
71 |
| - utf16.len() as u32, |
| 114 | + data.as_ptr() as c::LPCVOID, |
| 115 | + data.len() as u32, |
72 | 116 | &mut written,
|
73 | 117 | ptr::null_mut())
|
74 | 118 | })?;
|
75 |
| - |
76 |
| - // FIXME if this only partially writes the utf16 buffer then we need to |
77 |
| - // figure out how many bytes of `data` were actually written |
78 |
| - assert_eq!(written as usize, utf16.len()); |
79 |
| - Ok(utf8.len()) |
| 119 | + Ok(written as usize) |
80 | 120 | }
|
81 | 121 |
|
82 | 122 | impl Stdin {
|
83 | 123 | pub fn new() -> io::Result<Stdin> {
|
84 |
| - Ok(Stdin { |
85 |
| - utf8: Mutex::new(Cursor::new(Vec::new())), |
86 |
| - }) |
| 124 | + Ok(Stdin { high_surrogate: Cell::new(0) }) |
87 | 125 | }
|
88 | 126 |
|
89 | 127 | pub fn read(&self, buf: &mut [u8]) -> io::Result<usize> {
|
90 |
| - let handle = match get(c::STD_INPUT_HANDLE)? { |
91 |
| - Output::Console(c) => c, |
92 |
| - Output::Pipe(p) => { |
93 |
| - let handle = Handle::new(p); |
94 |
| - let ret = handle.read(buf); |
95 |
| - handle.into_raw(); |
96 |
| - return ret |
97 |
| - } |
| 128 | + let handle = get_handle(c::STD_INPUT_HANDLE)?; |
| 129 | + if !is_console(handle) { |
| 130 | + let handle = Handle::new(handle); |
| 131 | + let ret = handle.read(buf); |
| 132 | + handle.into_raw(); // Don't close the handle |
| 133 | + return ret; |
| 134 | + } |
| 135 | + |
| 136 | + if buf.len() == 0 { |
| 137 | + return Ok(0); |
| 138 | + } else if buf.len() < 4 { |
| 139 | + return Err(io::Error::new(io::ErrorKind::InvalidInput, |
| 140 | + "Windows stdin in console mode does not support a buffer too small to; \ |
| 141 | + guarantee holding one arbitrary UTF-8 character (4 bytes)")) |
| 142 | + } |
| 143 | + |
| 144 | + let mut utf16_buf = [0u16; MAX_BUFFER_SIZE / 2]; |
| 145 | + // In the worst case, an UTF-8 string can take 3 bytes for every `u16` of an UTF-16. So |
| 146 | + // we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets |
| 147 | + // lost. |
| 148 | + let amount = cmp::min(buf.len() / 3, utf16_buf.len()); |
| 149 | + let read = self.read_u16s_fixup_surrogates(handle, &mut utf16_buf, amount)?; |
| 150 | + let utf16 = &utf16_buf[..read]; |
| 151 | + |
| 152 | + // FIXME: it would be nice if we could directly decode into the buffer instead of doing an |
| 153 | + // allocation. |
| 154 | + let data = match String::from_utf16(&utf16) { |
| 155 | + Ok(utf8) => utf8.into_bytes(), |
| 156 | + Err(..) => { |
| 157 | + // We can't really do any better than forget all data and return an error. |
| 158 | + return Err(io::Error::new(io::ErrorKind::InvalidData, |
| 159 | + "Windows stdin in console mode does not support non-UTF-16 input; \ |
| 160 | + encountered unpaired surrogate")) |
| 161 | + }, |
98 | 162 | };
|
99 |
| - let mut utf8 = self.utf8.lock().unwrap(); |
100 |
| - // Read more if the buffer is empty |
101 |
| - if utf8.position() as usize == utf8.get_ref().len() { |
102 |
| - let mut utf16 = vec![0u16; 0x1000]; |
103 |
| - let mut num = 0; |
104 |
| - let mut input_control = readconsole_input_control(CTRL_Z_MASK); |
105 |
| - cvt(unsafe { |
106 |
| - c::ReadConsoleW(handle, |
107 |
| - utf16.as_mut_ptr() as c::LPVOID, |
108 |
| - utf16.len() as u32, |
109 |
| - &mut num, |
110 |
| - &mut input_control as c::PCONSOLE_READCONSOLE_CONTROL) |
111 |
| - })?; |
112 |
| - utf16.truncate(num as usize); |
113 |
| - // FIXME: what to do about this data that has already been read? |
114 |
| - let mut data = match String::from_utf16(&utf16) { |
115 |
| - Ok(utf8) => utf8.into_bytes(), |
116 |
| - Err(..) => return Err(invalid_encoding()), |
117 |
| - }; |
118 |
| - if let Some(&last_byte) = data.last() { |
119 |
| - if last_byte == CTRL_Z { |
120 |
| - data.pop(); |
121 |
| - } |
| 163 | + buf.copy_from_slice(&data); |
| 164 | + Ok(data.len()) |
| 165 | + } |
| 166 | + |
| 167 | + // We assume that if the last `u16` is an unpaired surrogate they got sliced apart by our |
| 168 | + // buffer size, and keep it around for the next read hoping to put them together. |
| 169 | + // This is a best effort, and may not work if we are not the only reader on Stdin. |
| 170 | + pub fn read_u16s_fixup_surrogates(&self, handle: c::HANDLE, buf: &mut [u16], mut amount: usize) |
| 171 | + -> io::Result<usize> |
| 172 | + { |
| 173 | + // Insert possibly remaining unpaired surrogate from last read. |
| 174 | + let mut start = 0; |
| 175 | + if self.high_surrogate.get() != 0 { |
| 176 | + buf[0] = self.high_surrogate.replace(0); |
| 177 | + start = 1; |
| 178 | + if amount == 1 { |
| 179 | + // Special case: `Stdin::read` guarantees we can always read at least one new `u16` |
| 180 | + // and combine it with an unpaired surrogate, because the UTF-8 buffer is at least |
| 181 | + // 4 bytes. |
| 182 | + amount = 2; |
| 183 | + } |
| 184 | + } |
| 185 | + let mut amount = read_u16s(handle, &mut buf[start..amount])? + start; |
| 186 | + |
| 187 | + if amount > 0 { |
| 188 | + let last_char = buf[amount - 1]; |
| 189 | + if last_char >= 0xD800 && last_char <= 0xDBFF { // high surrogate |
| 190 | + self.high_surrogate.set(last_char); |
| 191 | + amount -= 1; |
122 | 192 | }
|
123 |
| - *utf8 = Cursor::new(data); |
124 | 193 | }
|
| 194 | + Ok(amount) |
| 195 | + } |
| 196 | +} |
| 197 | + |
| 198 | +fn read_u16s(handle: c::HANDLE, buf: &mut [u16]) -> io::Result<usize> { |
| 199 | + // Configure the `pInputControl` parameter to not only return on `\r\n` but also Ctrl-Z, the |
| 200 | + // traditional DOS method to indicate end of character stream / user input (SUB). |
| 201 | + // See #38274 and https://stackoverflow.com/questions/43836040/win-api-readconsole. |
| 202 | + const CTRL_Z: u16 = 0x1A; |
| 203 | + const CTRL_Z_MASK: c::ULONG = 1 << CTRL_Z; |
| 204 | + let mut input_control = c::CONSOLE_READCONSOLE_CONTROL { |
| 205 | + nLength: ::mem::size_of::<c::CONSOLE_READCONSOLE_CONTROL>() as c::ULONG, |
| 206 | + nInitialChars: 0, |
| 207 | + dwCtrlWakeupMask: CTRL_Z_MASK, |
| 208 | + dwControlKeyState: 0, |
| 209 | + }; |
| 210 | + |
| 211 | + let mut amount = 0; |
| 212 | + cvt(unsafe { |
| 213 | + c::ReadConsoleW(handle, |
| 214 | + buf.as_mut_ptr() as c::LPVOID, |
| 215 | + buf.len() as u32, |
| 216 | + &mut amount, |
| 217 | + &mut input_control as c::PCONSOLE_READCONSOLE_CONTROL) |
| 218 | + })?; |
125 | 219 |
|
126 |
| - // MemReader shouldn't error here since we just filled it |
127 |
| - utf8.read(buf) |
| 220 | + if amount > 0 && buf[amount as usize - 1] == CTRL_Z { |
| 221 | + amount -= 1; |
128 | 222 | }
|
| 223 | + Ok(amount as usize) |
129 | 224 | }
|
130 | 225 |
|
131 | 226 | impl Stdout {
|
@@ -156,43 +251,10 @@ impl Stderr {
|
156 | 251 | }
|
157 | 252 | }
|
158 | 253 |
|
159 |
| -impl Output { |
160 |
| - pub fn handle(&self) -> c::HANDLE { |
161 |
| - match *self { |
162 |
| - Output::Console(c) => c, |
163 |
| - Output::Pipe(c) => c, |
164 |
| - } |
165 |
| - } |
166 |
| -} |
167 |
| - |
168 |
| -fn invalid_encoding() -> io::Error { |
169 |
| - io::Error::new(io::ErrorKind::InvalidData, |
170 |
| - "Windows stdio in console mode does not support non-UTF-8 byte sequences; \ |
171 |
| - see https://github.com/rust-lang/rust/issues/23344") |
172 |
| -} |
173 |
| - |
174 |
| -fn readconsole_input_control(wakeup_mask: c::ULONG) -> c::CONSOLE_READCONSOLE_CONTROL { |
175 |
| - c::CONSOLE_READCONSOLE_CONTROL { |
176 |
| - nLength: ::mem::size_of::<c::CONSOLE_READCONSOLE_CONTROL>() as c::ULONG, |
177 |
| - nInitialChars: 0, |
178 |
| - dwCtrlWakeupMask: wakeup_mask, |
179 |
| - dwControlKeyState: 0, |
180 |
| - } |
181 |
| -} |
182 |
| - |
183 |
| -const CTRL_Z: u8 = 0x1A; |
184 |
| -const CTRL_Z_MASK: c::ULONG = 0x4000000; //1 << 0x1A |
185 |
| - |
186 | 254 | pub fn is_ebadf(err: &io::Error) -> bool {
|
187 | 255 | err.raw_os_error() == Some(c::ERROR_INVALID_HANDLE as i32)
|
188 | 256 | }
|
189 | 257 |
|
190 |
| -// The default buffer capacity is 64k, but apparently windows |
191 |
| -// doesn't like 64k reads on stdin. See #13304 for details, but the |
192 |
| -// idea is that on windows we use a slightly smaller buffer that's |
193 |
| -// been seen to be acceptable. |
194 |
| -pub const STDIN_BUF_SIZE: usize = 8 * 1024; |
195 |
| - |
196 | 258 | pub fn panic_output() -> Option<impl io::Write> {
|
197 | 259 | io::stderr_raw().ok()
|
198 | 260 | }
|
0 commit comments