Skip to content

Commit 2b88ac7

Browse files
authored
Code cleanup (flutter#96)
1 parent 38756a0 commit 2b88ac7

File tree

2 files changed

+33
-44
lines changed

2 files changed

+33
-44
lines changed

lib/src/char_encodings.dart

Lines changed: 0 additions & 40 deletions
This file was deleted.

lib/src/html_input_stream.dart

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import 'dart:collection';
2+
import 'dart:convert' show ascii, utf8;
23

34
import 'package:source_span/source_span.dart';
45

5-
import 'char_encodings.dart';
66
import 'constants.dart';
77
import 'encoding_parser.dart';
88
import 'utils.dart';
@@ -66,7 +66,7 @@ class HtmlInputStream {
6666
this.sourceUrl])
6767
: charEncodingName = codecName(encoding) {
6868
if (source is String) {
69-
_rawChars = toCodepoints(source);
69+
_rawChars = source.runes.toList();
7070
charEncodingName = 'utf-8';
7171
charEncodingCertain = true;
7272
} else if (source is List<int>) {
@@ -92,7 +92,7 @@ class HtmlInputStream {
9292
_chars = <int>[];
9393

9494
if (_rawChars == null) {
95-
_rawChars = decodeBytes(charEncodingName, _rawBytes);
95+
_rawChars = _decodeBytes(charEncodingName, _rawBytes);
9696
}
9797

9898
bool skipNewline = false;
@@ -177,7 +177,7 @@ class HtmlInputStream {
177177
/// encoding otherwise return null.
178178
String detectBOM() {
179179
// Try detecting the BOM using bytes from the string
180-
if (hasUtf8Bom(_rawBytes)) {
180+
if (_hasUtf8Bom(_rawBytes)) {
181181
return 'utf-8';
182182
}
183183
return null;
@@ -292,3 +292,32 @@ String codecName(String encoding) {
292292
var canonicalName = encoding.replaceAll(asciiPunctuation, '').toLowerCase();
293293
return encodings[canonicalName];
294294
}
295+
296+
/// Returns true if the [bytes] starts with a UTF-8 byte order mark.
297+
/// Since UTF-8 doesn't have byte order, it's somewhat of a misnomer, but it is
298+
/// used in HTML to detect the UTF-
299+
bool _hasUtf8Bom(List<int> bytes, [int offset = 0, int length]) {
300+
int end = length != null ? offset + length : bytes.length;
301+
return (offset + 3) <= end &&
302+
bytes[offset] == 0xEF &&
303+
bytes[offset + 1] == 0xBB &&
304+
bytes[offset + 2] == 0xBF;
305+
}
306+
307+
/// Decodes the [bytes] with the provided [encoding] and returns an iterable for
308+
/// the codepoints. Supports the major unicode encodings as well as ascii and
309+
/// and windows-1252 encodings.
310+
Iterable<int> _decodeBytes(String encoding, List<int> bytes) {
311+
switch (encoding) {
312+
case 'ascii':
313+
return ascii.decode(bytes).runes;
314+
315+
case 'utf-8':
316+
// NOTE: To match the behavior of the other decode functions, we eat the
317+
// UTF-8 BOM here. This is the default behavior of `utf8.decode`.
318+
return utf8.decode(bytes).runes;
319+
320+
default:
321+
throw ArgumentError('Encoding $encoding not supported');
322+
}
323+
}

0 commit comments

Comments
 (0)