diff --git a/enc_test.go b/enc_test.go index fbccf9e..de1acb6 100644 --- a/enc_test.go +++ b/enc_test.go @@ -12,39 +12,116 @@ import ( func TestEncode(t *testing.T) { paths := []string{ + // metadata test cases. "meta/testdata/input-SCPAP.flac", "meta/testdata/input-SCVA.flac", "meta/testdata/input-SCVPAP.flac", "meta/testdata/input-VA.flac", + "meta/testdata/input-SCVAUP.flac", // empty metadata block (of type 0x7e) + "meta/testdata/input-SVAUP.flac", // empty metadata block (of type 0x7e) "meta/testdata/silence.flac", - // TODO: fix: support for prediction method 3 not yet implemented - //"testdata/19875.flac", - // TODO: fix: support for prediction method 3 not yet implemented - //"testdata/44127.flac", - // TODO: fix: support for prediction method 3 not yet implemented - //"testdata/59996.flac", - // TODO: fix: support for prediction method 3 not yet implemented - //"testdata/80574.flac", - // TODO: fix: support for prediction method 3 not yet implemented - //"testdata/172960.flac", - // TODO: fix: support for prediction method 2 not yet implemented - //"testdata/189983.flac", - // TODO: fix: support for prediction method 3 not yet implemented - //"testdata/191885.flac", - // TODO: fix: support for prediction method 2 not yet implemented - //"testdata/212768.flac", - // TODO: fix: support for prediction method 2 not yet implemented - //"testdata/220014.flac", - // TODO: fix: support for prediction method 2 not yet implemented - //"testdata/243749.flac", - // TODO: fix: support for prediction method 3 not yet implemented - //"testdata/256529.flac", - // TODO: fix: support for prediction method 3 not yet implemented - //"testdata/257344.flac", - // TODO: fix: support for prediction method 2 not yet implemented - //"testdata/8297-275156-0011.flac", - // TODO: fix: support for prediction method 2 not yet implemented - //"testdata/love.flac", + // flac test cases. + "testdata/19875.flac", // prediction method 3 (FIR) + "testdata/44127.flac", // prediction method 3 (FIR) + "testdata/59996.flac", + "testdata/80574.flac", // prediction method 3 (FIR) + "testdata/172960.flac", + "testdata/189983.flac", + "testdata/191885.flac", + "testdata/212768.flac", + "testdata/220014.flac", // prediction method 2 (Fixed) + "testdata/243749.flac", // prediction method 2 (Fixed) + "testdata/256529.flac", + "testdata/257344.flac", // prediction method 3 (FIR) + "testdata/8297-275156-0011.flac", // prediction method 3 (FIR) + "testdata/love.flac", // wasted bits + // IETF test cases. + "testdata/flac-test-files/subset/01 - blocksize 4096.flac", + "testdata/flac-test-files/subset/02 - blocksize 4608.flac", + "testdata/flac-test-files/subset/03 - blocksize 16.flac", + "testdata/flac-test-files/subset/04 - blocksize 192.flac", + "testdata/flac-test-files/subset/05 - blocksize 254.flac", + "testdata/flac-test-files/subset/06 - blocksize 512.flac", + "testdata/flac-test-files/subset/07 - blocksize 725.flac", + "testdata/flac-test-files/subset/08 - blocksize 1000.flac", + "testdata/flac-test-files/subset/09 - blocksize 1937.flac", + "testdata/flac-test-files/subset/10 - blocksize 2304.flac", + "testdata/flac-test-files/subset/11 - partition order 8.flac", + "testdata/flac-test-files/subset/12 - qlp precision 15 bit.flac", + "testdata/flac-test-files/subset/13 - qlp precision 2 bit.flac", + "testdata/flac-test-files/subset/14 - wasted bits.flac", + "testdata/flac-test-files/subset/15 - only verbatim subframes.flac", + "testdata/flac-test-files/subset/16 - partition order 8 containing escaped partitions.flac", + "testdata/flac-test-files/subset/17 - all fixed orders.flac", + "testdata/flac-test-files/subset/18 - precision search.flac", + "testdata/flac-test-files/subset/19 - samplerate 35467Hz.flac", + "testdata/flac-test-files/subset/20 - samplerate 39kHz.flac", + "testdata/flac-test-files/subset/21 - samplerate 22050Hz.flac", + "testdata/flac-test-files/subset/22 - 12 bit per sample.flac", + "testdata/flac-test-files/subset/23 - 8 bit per sample.flac", + "testdata/flac-test-files/subset/24 - variable blocksize file created with flake revision 264.flac", + "testdata/flac-test-files/subset/25 - variable blocksize file created with flake revision 264, modified to create smaller blocks.flac", + // NOTE: the only diff is that "26 - ...flac" uses `block_size: 0b111 + // (end of header (16 bit))` to encode the block size at the end of the + // header, whereas mewkiz/flac encodes it directly `block_size: 4096 + // (0b1100)`. Notably, the computed md5 hash of the decoded audio samples + // is identical (MD5: 3b2939b39ae7369b80451c77865e60c1). Thus, ignore the + // test case. + //"testdata/flac-test-files/subset/26 - variable blocksize file created with CUETools.Flake 2.1.6.flac", + // NOTE: the only diff is that "27 - ...flac" uses `block_size: 0b111 + // (end of header (16 bit))` to encode the block size at the end of the + // header, whereas mewkiz/flac encodes it directly `block_size: 4608 + // (0b101)`. Notably, the computed md5 hash of the decoded audio samples + // is identical (MD5: 9fb66177d2f735d4b1f501a5af1320a3). Thus, ignore the + // test case. + //"testdata/flac-test-files/subset/27 - old format variable blocksize file created with Flake 0.11.flac", + "testdata/flac-test-files/subset/28 - high resolution audio, default settings.flac", + "testdata/flac-test-files/subset/29 - high resolution audio, blocksize 16384.flac", + "testdata/flac-test-files/subset/30 - high resolution audio, blocksize 13456.flac", + "testdata/flac-test-files/subset/31 - high resolution audio, using only 32nd order predictors.flac", + "testdata/flac-test-files/subset/32 - high resolution audio, partition order 8 containing escaped partitions.flac", + "testdata/flac-test-files/subset/33 - samplerate 192kHz.flac", + // NOTE: the only diff is that "34 - ...flac" uses `0b1100 (end of header + // (8 bit*1000))` to encode the sample rate at the end of the header, + // whereas mewkiz/flac encodes it directly `192000 (0b11)`. Notably, the + // computed md5 hash of the decoded audio samples is identical + // (MD5: 942f56e503437dfd4c269c331774b2e3). Thus, ignore the test case. + //"testdata/flac-test-files/subset/34 - samplerate 192kHz, using only 32nd order predictors.flac", + "testdata/flac-test-files/subset/35 - samplerate 134560Hz.flac", + "testdata/flac-test-files/subset/36 - samplerate 384kHz.flac", + "testdata/flac-test-files/subset/37 - 20 bit per sample.flac", + "testdata/flac-test-files/subset/38 - 3 channels (3.0).flac", + "testdata/flac-test-files/subset/39 - 4 channels (4.0).flac", + "testdata/flac-test-files/subset/40 - 5 channels (5.0).flac", + "testdata/flac-test-files/subset/41 - 6 channels (5.1).flac", + "testdata/flac-test-files/subset/42 - 7 channels (6.1).flac", + "testdata/flac-test-files/subset/43 - 8 channels (7.1).flac", + // NOTE: the only diff is that "44 - ...flac" uses `0b1100 (end of header + // (8 bit*1000))` to encode the sample rate at the end of the header, + // whereas mewkiz/flac encodes it directly `192000 (0b11)`. Notably, the + // computed md5 hash of the decoded audio samples is identical + // (MD5: cdf531d4d4b95233986bc499518a89db). Thus, ignore the test case. + //"testdata/flac-test-files/subset/44 - 8-channel surround, 192kHz, 24 bit, using only 32nd order predictors.flac", + "testdata/flac-test-files/subset/45 - no total number of samples set.flac", + "testdata/flac-test-files/subset/46 - no min-max framesize set.flac", + "testdata/flac-test-files/subset/47 - only STREAMINFO.flac", + "testdata/flac-test-files/subset/48 - Extremely large SEEKTABLE.flac", + "testdata/flac-test-files/subset/49 - Extremely large PADDING.flac", + "testdata/flac-test-files/subset/50 - Extremely large PICTURE.flac", + "testdata/flac-test-files/subset/51 - Extremely large VORBISCOMMENT.flac", + "testdata/flac-test-files/subset/52 - Extremely large APPLICATION.flac", + "testdata/flac-test-files/subset/53 - CUESHEET with very many indexes.flac", + "testdata/flac-test-files/subset/54 - 1000x repeating VORBISCOMMENT.flac", + "testdata/flac-test-files/subset/55 - file 48-53 combined.flac", + "testdata/flac-test-files/subset/56 - JPG PICTURE.flac", + "testdata/flac-test-files/subset/57 - PNG PICTURE.flac", + "testdata/flac-test-files/subset/58 - GIF PICTURE.flac", + "testdata/flac-test-files/subset/59 - AVIF PICTURE.flac", + "testdata/flac-test-files/subset/60 - mono audio.flac", + "testdata/flac-test-files/subset/61 - predictor overflow check, 16-bit.flac", + "testdata/flac-test-files/subset/62 - predictor overflow check, 20-bit.flac", + "testdata/flac-test-files/subset/63 - predictor overflow check, 24-bit.flac", + "testdata/flac-test-files/subset/64 - rice partitions with escape code zero.flac", } for _, path := range paths { t.Run(path, func(t *testing.T) { diff --git a/encode_frame.go b/encode_frame.go index c3961ee..209ac4a 100644 --- a/encode_frame.go +++ b/encode_frame.go @@ -24,9 +24,6 @@ func (enc *Encoder) WriteFrame(f *frame.Frame) error { return errutil.Newf("subframe and channel count mismatch; expected %d, got %d", nchannels, len(f.Subframes)) } nsamplesPerChannel := f.Subframes[0].NSamples - if !(16 <= nsamplesPerChannel && nsamplesPerChannel <= 65535) { - return errutil.Newf("invalid number of samples per channel; expected >= 16 && <= 65535, got %d", nsamplesPerChannel) - } for i, subframe := range f.Subframes { if nsamplesPerChannel != len(subframe.Samples) { return errutil.Newf("invalid number of samples in channel %d; expected %d, got %d", i, nsamplesPerChannel, len(subframe.Samples)) @@ -64,10 +61,30 @@ func (enc *Encoder) WriteFrame(f *frame.Frame) error { return errutil.Err(err) } + // Inter-channel decorrelation of subframe samples. + f.Decorrelate() + defer f.Correlate() // NOTE: revert decorrelation of audio samples after encoding is done (to make encode non-destructive). + // Encode subframes. bw := bitio.NewWriter(hw) - for _, subframe := range f.Subframes { - if err := encodeSubframe(bw, f.Header, subframe); err != nil { + for channel, subframe := range f.Subframes { + // The side channel requires an extra bit per sample when using + // inter-channel decorrelation. + bps := uint(f.BitsPerSample) + switch f.Channels { + case frame.ChannelsSideRight: + // channel 0 is the side channel. + if channel == 0 { + bps++ + } + case frame.ChannelsLeftSide, frame.ChannelsMidSide: + // channel 1 is the side channel. + if channel == 1 { + bps++ + } + } + + if err := encodeSubframe(bw, f.Header, subframe, bps); err != nil { return errutil.Err(err) } } diff --git a/encode_meta.go b/encode_meta.go index 2749f06..2a7edfe 100644 --- a/encode_meta.go +++ b/encode_meta.go @@ -18,6 +18,9 @@ func encodeBlock(bw *bitio.Writer, block *meta.Block, last bool) error { if block.Type == meta.TypePadding { return encodePadding(bw, block.Length, last) } + if block.Length == 0 { + return encodeEmptyBlock(bw, block.Type, last) + } switch body := block.Body.(type) { case *meta.StreamInfo: return encodeStreamInfo(bw, body, last) @@ -38,6 +41,23 @@ func encodeBlock(bw *bitio.Writer, block *meta.Block, last bool) error { // --- [ Metadata block header ] ----------------------------------------------- +// encodeEmptyBlock encodes the metadata block header of an empty metadata +// block with the specified type, writing to bw. +func encodeEmptyBlock(bw *bitio.Writer, typ meta.Type, last bool) error { + // Store metadata block header. + hdr := &meta.Header{ + IsLast: last, + Type: typ, + Length: 0, + } + if err := encodeBlockHeader(bw, hdr); err != nil { + return errutil.Err(err) + } + return nil +} + +// --- [ Metadata block header ] ----------------------------------------------- + // encodeBlockHeader encodes the metadata block header, writing to bw. func encodeBlockHeader(bw *bitio.Writer, hdr *meta.Header) error { // 1 bit: IsLast. diff --git a/encode_subframe.go b/encode_subframe.go index 77a33aa..30f0c20 100644 --- a/encode_subframe.go +++ b/encode_subframe.go @@ -1,6 +1,8 @@ package flac import ( + "fmt" + "github.com/icza/bitio" "github.com/mewkiz/flac/frame" iobits "github.com/mewkiz/flac/internal/bits" @@ -10,32 +12,41 @@ import ( // --- [ Subframe ] ------------------------------------------------------------ // encodeSubframe encodes the given subframe, writing to bw. -func encodeSubframe(bw *bitio.Writer, hdr frame.Header, subframe *frame.Subframe) error { +func encodeSubframe(bw *bitio.Writer, hdr frame.Header, subframe *frame.Subframe, bps uint) error { // Encode subframe header. if err := encodeSubframeHeader(bw, subframe.SubHeader); err != nil { return errutil.Err(err) } + // Adjust bps of subframe for wasted bits-per-sample. + bps -= subframe.Wasted + + // Right shift to account for wasted bits-per-sample. + // TODO: figure out how to make this non-destructive (use defer to restore original samples?). + if subframe.Wasted > 0 { + for i, sample := range subframe.Samples { + subframe.Samples[i] = sample >> subframe.Wasted + } + } + // Encode audio samples. switch subframe.Pred { case frame.PredConstant: - if err := encodeConstantSamples(bw, hdr.BitsPerSample, subframe.Samples); err != nil { + if err := encodeConstantSamples(bw, hdr, subframe, bps); err != nil { return errutil.Err(err) } case frame.PredVerbatim: - if err := encodeVerbatimSamples(bw, hdr, subframe.Samples); err != nil { + if err := encodeVerbatimSamples(bw, hdr, subframe, bps); err != nil { + return errutil.Err(err) + } + case frame.PredFixed: + if err := encodeFixedSamples(bw, hdr, subframe, bps); err != nil { + return errutil.Err(err) + } + case frame.PredFIR: + if err := encodeFIRSamples(bw, hdr, subframe, bps); err != nil { return errutil.Err(err) } - // TODO: implement support for LPC encoding of audio samples. - //case frame.PredFixed: - // if err := encodeFixedSamples(bw, hdr, subframe.Samples, subframe.Order); err != nil { - // return errutil.Err(err) - // } - // TODO: implement support for LPC encoding of audio samples. - //case frame.PredFIR: - // if err := encodeFIRSamples(bw, hdr, subframe.Samples, subframe.Order); err != nil { - // return errutil.Err(err) - // } default: return errutil.Newf("support for prediction method %v not yet implemented", subframe.Pred) } @@ -45,7 +56,7 @@ func encodeSubframe(bw *bitio.Writer, hdr frame.Header, subframe *frame.Subframe // --- [ Subframe header ] ----------------------------------------------------- // encodeSubframeHeader encodes the given subframe header, writing to bw. -func encodeSubframeHeader(bw *bitio.Writer, hdr frame.SubHeader) error { +func encodeSubframeHeader(bw *bitio.Writer, subHdr frame.SubHeader) error { // Zero bit padding, to prevent sync-fooling string of 1s. if err := bw.WriteBits(0x0, 1); err != nil { return errutil.Err(err) @@ -60,7 +71,7 @@ func encodeSubframeHeader(bw *bitio.Writer, hdr frame.SubHeader) error { // 01xxxx : reserved // 1xxxxx : SUBFRAME_LPC, xxxxx=order-1 var bits uint64 - switch hdr.Pred { + switch subHdr.Pred { case frame.PredConstant: // 000000 : SUBFRAME_CONSTANT bits = 0x00 @@ -69,10 +80,10 @@ func encodeSubframeHeader(bw *bitio.Writer, hdr frame.SubHeader) error { bits = 0x01 case frame.PredFixed: // 001xxx : if(xxx <= 4) SUBFRAME_FIXED, xxx=order ; else reserved - bits = 0x08 | uint64(hdr.Order) + bits = 0x08 | uint64(subHdr.Order) case frame.PredFIR: // 1xxxxx : SUBFRAME_LPC, xxxxx=order-1 - bits = 0x20 | uint64(hdr.Order-1) + bits = 0x20 | uint64(subHdr.Order-1) } if err := bw.WriteBits(bits, 6); err != nil { return errutil.Err(err) @@ -82,12 +93,12 @@ func encodeSubframeHeader(bw *bitio.Writer, hdr frame.SubHeader) error { // // 0 : no wasted bits-per-sample in source subblock, k=0 // 1 : k wasted bits-per-sample in source subblock, k-1 follows, unary coded; e.g. k=3 => 001 follows, k=7 => 0000001 follows. - hasWastedBits := hdr.Wasted > 0 + hasWastedBits := subHdr.Wasted > 0 if err := bw.WriteBool(hasWastedBits); err != nil { return errutil.Err(err) } if hasWastedBits { - if err := iobits.WriteUnary(bw, uint64(hdr.Wasted)); err != nil { + if err := iobits.WriteUnary(bw, uint64(subHdr.Wasted-1)); err != nil { return errutil.Err(err) } } @@ -97,7 +108,8 @@ func encodeSubframeHeader(bw *bitio.Writer, hdr frame.SubHeader) error { // --- [ Constant samples ] ---------------------------------------------------- // encodeConstantSamples stores the given constant sample, writing to bw. -func encodeConstantSamples(bw *bitio.Writer, bps byte, samples []int32) error { +func encodeConstantSamples(bw *bitio.Writer, hdr frame.Header, subframe *frame.Subframe, bps uint) error { + samples := subframe.Samples sample := samples[0] for _, s := range samples[1:] { if sample != s { @@ -105,7 +117,7 @@ func encodeConstantSamples(bw *bitio.Writer, bps byte, samples []int32) error { } } // Unencoded constant value of the subblock, n = frame's bits-per-sample. - if err := bw.WriteBits(uint64(sample), bps); err != nil { + if err := bw.WriteBits(uint64(sample), uint8(bps)); err != nil { return errutil.Err(err) } return nil @@ -115,15 +127,238 @@ func encodeConstantSamples(bw *bitio.Writer, bps byte, samples []int32) error { // encodeVerbatimSamples stores the given samples verbatim (uncompressed), // writing to bw. -func encodeVerbatimSamples(bw *bitio.Writer, hdr frame.Header, samples []int32) error { +func encodeVerbatimSamples(bw *bitio.Writer, hdr frame.Header, subframe *frame.Subframe, bps uint) error { // Unencoded subblock; n = frame's bits-per-sample, i = frame's blocksize. + samples := subframe.Samples if int(hdr.BlockSize) != len(samples) { return errutil.Newf("block size and sample count mismatch; expected %d, got %d", hdr.BlockSize, len(samples)) } for _, sample := range samples { - if err := bw.WriteBits(uint64(sample), hdr.BitsPerSample); err != nil { + if err := bw.WriteBits(uint64(sample), uint8(bps)); err != nil { return errutil.Err(err) } } return nil } + +// --- [ Fixed samples ] ------------------------------------------------------- + +// encodeFixedSamples stores the given samples using linear prediction coding +// with a fixed set of predefined polynomial coefficients, writing to bw. +func encodeFixedSamples(bw *bitio.Writer, hdr frame.Header, subframe *frame.Subframe, bps uint) error { + // Encode unencoded warm-up samples. + samples := subframe.Samples + for i := 0; i < subframe.Order; i++ { + sample := samples[i] + if err := bw.WriteBits(uint64(sample), uint8(bps)); err != nil { + return errutil.Err(err) + } + } + + // Compute residuals (signal errors of the prediction) between audio + // samples and LPC predicted audio samples. + const shift = 0 + residuals, err := getLPCResiduals(subframe, frame.FixedCoeffs[subframe.Order], shift) + if err != nil { + return errutil.Err(err) + } + + // Encode subframe residuals. + if err := encodeResiduals(bw, subframe, residuals); err != nil { + return errutil.Err(err) + } + return nil +} + +// --- [ FIR samples ] ------------------------------------------------------- + +// encodeFIRSamples stores the given samples using linear prediction coding +// with a custom set of predefined polynomial coefficients, writing to bw. +func encodeFIRSamples(bw *bitio.Writer, hdr frame.Header, subframe *frame.Subframe, bps uint) error { + // Encode unencoded warm-up samples. + samples := subframe.Samples + for i := 0; i < subframe.Order; i++ { + sample := samples[i] + if err := bw.WriteBits(uint64(sample), uint8(bps)); err != nil { + return errutil.Err(err) + } + } + + // 4 bits: (coefficients' precision in bits) - 1. + if err := bw.WriteBits(uint64(subframe.CoeffPrec-1), 4); err != nil { + return errutil.Err(err) + } + + // 5 bits: predictor coefficient shift needed in bits. + if err := bw.WriteBits(uint64(subframe.CoeffShift), 5); err != nil { + return errutil.Err(err) + } + + // Encode coefficients. + for _, coeff := range subframe.Coeffs { + // (prec) bits: Predictor coefficient. + if err := bw.WriteBits(uint64(coeff), uint8(subframe.CoeffPrec)); err != nil { + return errutil.Err(err) + } + } + + // Compute residuals (signal errors of the prediction) between audio + // samples and LPC predicted audio samples. + residuals, err := getLPCResiduals(subframe, subframe.Coeffs, subframe.CoeffShift) + if err != nil { + return errutil.Err(err) + } + + // Encode subframe residuals. + if err := encodeResiduals(bw, subframe, residuals); err != nil { + return errutil.Err(err) + } + return nil +} + +// encodeResiduals encodes the residuals (prediction method error signals) of the +// subframe. +// +// ref: https://www.xiph.org/flac/format.html#residual +func encodeResiduals(bw *bitio.Writer, subframe *frame.Subframe, residuals []int32) error { + // 2 bits: Residual coding method. + if err := bw.WriteBits(uint64(subframe.ResidualCodingMethod), 2); err != nil { + return errutil.Err(err) + } + // The 2 bits are used to specify the residual coding method as follows: + // 00: Rice coding with a 4-bit Rice parameter. + // 01: Rice coding with a 5-bit Rice parameter. + // 10: reserved. + // 11: reserved. + switch subframe.ResidualCodingMethod { + case frame.ResidualCodingMethodRice1: + return encodeRicePart(bw, subframe, 4, residuals) + case frame.ResidualCodingMethodRice2: + return encodeRicePart(bw, subframe, 5, residuals) + default: + return fmt.Errorf("encodeResiduals: reserved residual coding method bit pattern (%02b)", uint8(subframe.ResidualCodingMethod)) + } +} + +// encodeRicePart encodes a Rice partition of residuals from the subframe, using +// a Rice parameter of the specified size in bits. +// +// ref: https://www.xiph.org/flac/format.html#partitioned_rice +// ref: https://www.xiph.org/flac/format.html#partitioned_rice2 +func encodeRicePart(bw *bitio.Writer, subframe *frame.Subframe, paramSize uint, residuals []int32) error { + // 4 bits: Partition order. + riceSubframe := subframe.RiceSubframe + if err := bw.WriteBits(uint64(riceSubframe.PartOrder), 4); err != nil { + return errutil.Err(err) + } + + // Parse Rice partitions; in total 2^partOrder partitions. + // + // ref: https://www.xiph.org/flac/format.html#rice_partition + // ref: https://www.xiph.org/flac/format.html#rice2_partition + partOrder := riceSubframe.PartOrder + nparts := 1 << partOrder + curResidualIndex := 0 + for i := range riceSubframe.Partitions { + partition := &riceSubframe.Partitions[i] + // (4 or 5) bits: Rice parameter. + param := partition.Param + if err := bw.WriteBits(uint64(param), uint8(paramSize)); err != nil { + return errutil.Err(err) + } + + // Determine the number of Rice encoded samples in the partition. + var nsamples int + if partOrder == 0 { + nsamples = subframe.NSamples - subframe.Order + } else if i != 0 { + nsamples = subframe.NSamples / nparts + } else { + nsamples = subframe.NSamples/nparts - subframe.Order + } + + if paramSize == 4 && param == 0xF || paramSize == 5 && param == 0x1F { + // 1111 or 11111: Escape code, meaning the partition is in unencoded + // binary form using n bits per sample; n follows as a 5-bit number. + if err := bw.WriteBits(uint64(partition.EscapedBitsPerSample), 5); err != nil { + return errutil.Err(err) + } + for j := 0; j < nsamples; j++ { + // ref: https://datatracker.ietf.org/doc/draft-ietf-cellar-flac/ + // + // From section 9.2.7.1. Escaped partition: + // + // The residual samples themselves are stored signed two's + // complement. For example, when a partition is escaped and each + // residual sample is stored with 3 bits, the number -1 is + // represented as 0b111. + residual := residuals[curResidualIndex] + curResidualIndex++ + if err := bw.WriteBits(uint64(residual), uint8(partition.EscapedBitsPerSample)); err != nil { + return errutil.Err(err) + } + } + continue + } + + // Encode the Rice residuals of the partition. + for j := 0; j < nsamples; j++ { + residual := residuals[curResidualIndex] + curResidualIndex++ + if err := encodeRiceResidual(bw, param, residual); err != nil { + return errutil.Err(err) + } + } + } + + return nil +} + +// encodeRiceResidual encodes a Rice residual (error signal). +func encodeRiceResidual(bw *bitio.Writer, k uint, residual int32) error { + // ZigZag encode. + folded := iobits.EncodeZigZag(residual) + + // unfold into low- and high. + lowMask := ^uint32(0) >> (32 - k) // lower k bits. + highMask := ^uint32(0) << k // upper bits. + high := (folded & highMask) >> k + low := folded & lowMask + + // Write unary encoded most significant bits. + if err := iobits.WriteUnary(bw, uint64(high)); err != nil { + return errutil.Err(err) + } + + // Write binary encoded least significant bits. + if err := bw.WriteBits(uint64(low), uint8(k)); err != nil { + return errutil.Err(err) + } + return nil +} + +// getLPCResiduals returns the residuals (signal errors of the prediction) +// between the given audio samples and the LPC predicted audio samples, using +// the coefficients of a given polynomial, and a couple (order of polynomial; +// i.e. len(coeffs)) of unencoded warm-up samples. +func getLPCResiduals(subframe *frame.Subframe, coeffs []int32, shift int32) ([]int32, error) { + if len(coeffs) != subframe.Order { + return nil, fmt.Errorf("getLPCResiduals: prediction order (%d) differs from number of coefficients (%d)", subframe.Order, len(coeffs)) + } + if shift < 0 { + return nil, fmt.Errorf("getLPCResiduals: invalid negative shift") + } + if subframe.NSamples != len(subframe.Samples) { + return nil, fmt.Errorf("getLPCResiduals: subframe sample count mismatch; expected %d, got %d", subframe.NSamples, len(subframe.Samples)) + } + var residuals []int32 + for i := subframe.Order; i < subframe.NSamples; i++ { + var sample int64 + for j, c := range coeffs { + sample += int64(c) * int64(subframe.Samples[i-j-1]) + } + residual := subframe.Samples[i] - int32(sample>>uint(shift)) + residuals = append(residuals, residual) + } + return residuals, nil +} diff --git a/frame/frame.go b/frame/frame.go index e75dc1d..877569a 100644 --- a/frame/frame.go +++ b/frame/frame.go @@ -130,7 +130,7 @@ func (frame *Frame) Parse() error { } // Inter-channel correlation of subframe samples. - frame.correlate() + frame.Correlate() // 2 bytes: CRC-16 checksum. var want uint16 @@ -571,14 +571,14 @@ func (channels Channels) Count() int { return nChannels[channels] } -// correlate reverts any inter-channel decorrelation between the samples of the +// Correlate reverts any inter-channel decorrelation between the samples of the // subframes. // // An encoder decorrelates audio samples as follows: // // mid = (left + right)/2 // side = left - right -func (frame *Frame) correlate() { +func (frame *Frame) Correlate() { switch frame.Channels { case ChannelsLeftSide: // 2 channels: left, side; using inter-channel decorrelation. @@ -592,9 +592,9 @@ func (frame *Frame) correlate() { // 2 channels: side, right; using inter-channel decorrelation. side := frame.Subframes[0].Samples right := frame.Subframes[1].Samples - // left = right + side for i := range side { - side[i] += right[i] + // left = right + side + side[i] = right[i] + side[i] } case ChannelsMidSide: // 2 channels: mid, side; using inter-channel decorrelation. @@ -618,6 +618,57 @@ func (frame *Frame) correlate() { } } +// Decorrelate performs inter-channel decorrelation between the samples of the +// subframes. +// +// An encoder decorrelates audio samples as follows: +// +// mid = (left + right)/2 +// side = left - right +func (frame *Frame) Decorrelate() { + switch frame.Channels { + case ChannelsLeftSide: + // 2 channels: left, side; using inter-channel decorrelation. + left := frame.Subframes[0].Samples // already left; no change after inter-channel decorrelation. + right := frame.Subframes[1].Samples // set to side after inter-channel decorrelation. + for i := range left { + l := left[i] + r := right[i] + // inter-channel decorrelation: + // side = left - right + side := l - r + right[i] = side + } + case ChannelsSideRight: + // 2 channels: side, right; using inter-channel decorrelation. + left := frame.Subframes[0].Samples // set to side after inter-channel decorrelation. + right := frame.Subframes[1].Samples // already right; no change after inter-channel decorrelation. + for i := range left { + l := left[i] + r := right[i] + // inter-channel decorrelation: + // side = left - right + side := l - r + left[i] = side + } + case ChannelsMidSide: + // 2 channels: mid, side; using inter-channel decorrelation. + left := frame.Subframes[0].Samples // set to mid after inter-channel decorrelation. + right := frame.Subframes[1].Samples // set to side after inter-channel decorrelation. + for i := range left { + // inter-channel decorrelation: + // mid = (left + right)/2 + // side = left - right + l := left[i] + r := right[i] + mid := int32((int64(l) + int64(r)) >> 1) // NOTE: using `(left + right) >> 1`, not the same as `(left + right) / 2`. + side := l - r + left[i] = mid + right[i] = side + } + } +} + // SampleNumber returns the first sample number contained within the frame. func (frame *Frame) SampleNumber() uint64 { if frame.HasFixedBlockSize { diff --git a/frame/subframe.go b/frame/subframe.go index eae06fb..b906609 100644 --- a/frame/subframe.go +++ b/frame/subframe.go @@ -50,7 +50,7 @@ func (frame *Frame) parseSubframe(br *bits.Reader, bps uint) (subframe *Subframe err = subframe.decodeFIR(br, bps) } - // Left shift to accout for wasted bits-per-sample. + // Left shift to account for wasted bits-per-sample. for i, sample := range subframe.Samples { subframe.Samples[i] = sample << subframe.Wasted } @@ -68,6 +68,37 @@ type SubHeader struct { Order int // Wasted bits-per-sample. Wasted uint + // Residual coding method used by fixed and FIR linear prediction decoding. + ResidualCodingMethod ResidualCodingMethod + // Coefficients' precision in bits used by FIR linear prediction decoding. + CoeffPrec uint + // Predictor coefficient shift needed in bits used by FIR linear prediction + // decoding. + CoeffShift int32 + // Predictor coefficients used by FIR linear prediction decoding. + Coeffs []int32 + // Rice-coding subframe fields used by residual coding methods rice1 and + // rice2; nil if unused. + RiceSubframe *RiceSubframe +} + +// RiceSubframe holds rice-coding subframe fields used by residual coding +// methods rice1 and rice2. +type RiceSubframe struct { + // Partition order used by fixed and FIR linear prediction decoding + // (for residual coding methods, rice1 and rice2). + PartOrder int // TODO: remove PartOrder and infer from int(math.Log2(float64(len(Partitions))))? + // Rice partitions. + Partitions []RicePartition +} + +// RicePartition is a partition containing a subset of the residuals of a +// subframe. +type RicePartition struct { + // Rice parameter. + Param uint + // Residual sample size in bits-per-sample used by escaped partitions. + EscapedBitsPerSample uint } // parseHeader reads and parses the header of a subframe. @@ -233,7 +264,7 @@ func (subframe *Subframe) decodeVerbatim(br *bits.Reader, bps uint) error { return nil } -// fixedCoeffs maps from prediction order to the LPC coefficients used in fixed +// FixedCoeffs maps from prediction order to the LPC coefficients used in fixed // encoding. // // x_0[n] = 0 @@ -241,7 +272,7 @@ func (subframe *Subframe) decodeVerbatim(br *bits.Reader, bps uint) error { // x_2[n] = 2*x[n-1] - x[n-2] // x_3[n] = 3*x[n-1] - 3*x[n-2] + x[n-3] // x_4[n] = 4*x[n-1] - 6*x[n-2] + 4*x[n-3] - x[n-4] -var fixedCoeffs = [...][]int32{ +var FixedCoeffs = [...][]int32{ // ref: Section 2.2 of http://www.hpl.hp.com/techreports/1999/HPL-1999-144.pdf 1: {1}, 2: {2, -1}, @@ -267,15 +298,15 @@ func (subframe *Subframe) decodeFixed(br *bits.Reader, bps uint) error { } // Decode subframe residuals. - err := subframe.decodeResidual(br) - if err != nil { + if err := subframe.decodeResiduals(br); err != nil { return err } // Predict the audio samples of the subframe using a polynomial with // predefined coefficients of a given order. Correct signal errors using the // decoded residuals. - return subframe.decodeLPC(fixedCoeffs[subframe.Order], 0) + const shift = 0 + return subframe.decodeLPC(FixedCoeffs[subframe.Order], shift) } // decodeFIR decodes the linear prediction coded samples of the subframe, using @@ -303,6 +334,7 @@ func (subframe *Subframe) decodeFIR(br *bits.Reader, bps uint) error { return errors.New("frame.Subframe.decodeFIR: invalid coefficient precision bit pattern (1111)") } prec := uint(x) + 1 + subframe.CoeffPrec = prec // 5 bits: predictor coefficient shift needed in bits. x, err = br.Read(5) @@ -310,6 +342,7 @@ func (subframe *Subframe) decodeFIR(br *bits.Reader, bps uint) error { return unexpected(err) } shift := signExtend(x, 5) + subframe.CoeffShift = shift // Parse coefficients. coeffs := make([]int32, subframe.Order) @@ -321,9 +354,10 @@ func (subframe *Subframe) decodeFIR(br *bits.Reader, bps uint) error { } coeffs[i] = signExtend(x, prec) } + subframe.Coeffs = coeffs // Decode subframe residuals. - if err = subframe.decodeResidual(br); err != nil { + if err := subframe.decodeResiduals(br); err != nil { return err } @@ -333,28 +367,41 @@ func (subframe *Subframe) decodeFIR(br *bits.Reader, bps uint) error { return subframe.decodeLPC(coeffs, shift) } -// decodeResidual decodes the encoded residuals (prediction method error +// ResidualCodingMethod specifies a residual coding method. +type ResidualCodingMethod uint8 + +// Residual coding methods. +const ( + // Rice coding with a 4-bit Rice parameter (rice1). + ResidualCodingMethodRice1 ResidualCodingMethod = 0 + // Rice coding with a 5-bit Rice parameter (rice2). + ResidualCodingMethodRice2 ResidualCodingMethod = 1 +) + +// decodeResiduals decodes the encoded residuals (prediction method error // signals) of the subframe. // // ref: https://www.xiph.org/flac/format.html#residual -func (subframe *Subframe) decodeResidual(br *bits.Reader) error { +func (subframe *Subframe) decodeResiduals(br *bits.Reader) error { // 2 bits: Residual coding method. x, err := br.Read(2) if err != nil { return unexpected(err) } + residualCodingMethod := ResidualCodingMethod(x) + subframe.ResidualCodingMethod = residualCodingMethod // The 2 bits are used to specify the residual coding method as follows: // 00: Rice coding with a 4-bit Rice parameter. // 01: Rice coding with a 5-bit Rice parameter. // 10: reserved. // 11: reserved. - switch x { + switch residualCodingMethod { case 0x0: return subframe.decodeRicePart(br, 4) case 0x1: return subframe.decodeRicePart(br, 5) default: - return fmt.Errorf("frame.Subframe.decodeResidual: reserved residual coding method bit pattern (%02b)", x) + return fmt.Errorf("frame.Subframe.decodeResiduals: reserved residual coding method bit pattern (%02b)", uint8(residualCodingMethod)) } } @@ -369,20 +416,28 @@ func (subframe *Subframe) decodeRicePart(br *bits.Reader, paramSize uint) error if err != nil { return unexpected(err) } - partOrder := x + partOrder := int(x) + riceSubframe := &RiceSubframe{ + PartOrder: partOrder, + } + subframe.RiceSubframe = riceSubframe // Parse Rice partitions; in total 2^partOrder partitions. // // ref: https://www.xiph.org/flac/format.html#rice_partition // ref: https://www.xiph.org/flac/format.html#rice2_partition nparts := 1 << partOrder + partitions := make([]RicePartition, nparts) + riceSubframe.Partitions = partitions for i := 0; i < nparts; i++ { + partition := &partitions[i] // (4 or 5) bits: Rice parameter. x, err = br.Read(paramSize) if err != nil { return unexpected(err) } param := uint(x) + partition.Param = param // Determine the number of Rice encoded samples in the partition. var nsamples int @@ -402,6 +457,7 @@ func (subframe *Subframe) decodeRicePart(br *bits.Reader, paramSize uint) error return unexpected(err) } n := uint(x) + partition.EscapedBitsPerSample = n for j := 0; j < nsamples; j++ { sample, err := br.Read(n) if err != nil { @@ -422,35 +478,36 @@ func (subframe *Subframe) decodeRicePart(br *bits.Reader, paramSize uint) error // Decode the Rice encoded residuals of the partition. for j := 0; j < nsamples; j++ { - if err = subframe.decodeRiceResidual(br, param); err != nil { + residual, err := subframe.decodeRiceResidual(br, param) + if err != nil { return err } + subframe.Samples = append(subframe.Samples, residual) } } return nil } -// decodeRiceResidual decodes a Rice encoded residual (error signal). -func (subframe *Subframe) decodeRiceResidual(br *bits.Reader, k uint) error { +// decodeRiceResidual decodes and returns a Rice encoded residual (error +// signal). +func (subframe *Subframe) decodeRiceResidual(br *bits.Reader, k uint) (int32, error) { // Read unary encoded most significant bits. high, err := br.ReadUnary() if err != nil { - return unexpected(err) + return 0, unexpected(err) } // Read binary encoded least significant bits. low, err := br.Read(k) if err != nil { - return unexpected(err) + return 0, unexpected(err) } folded := uint32(high< 000001 // 6 => 0000001 func WriteUnary(bw *bitio.Writer, x uint64) error { - bits := uint64(1) - n := byte(1) - for ; x > 0; x-- { - n++ + for ; x > 8; x -= 8 { + if err := bw.WriteByte(0x0); err != nil { + return err + } } + + bits := uint64(1) + n := byte(x + 1) if err := bw.WriteBits(bits, n); err != nil { return err } diff --git a/internal/bits/unary_test.go b/internal/bits/unary_test.go new file mode 100644 index 0000000..4db524a --- /dev/null +++ b/internal/bits/unary_test.go @@ -0,0 +1,36 @@ +package bits_test + +import ( + "bytes" + "testing" + + "github.com/icza/bitio" + "github.com/mewkiz/flac/internal/bits" +) + +func TestUnary(t *testing.T) { + buf := &bytes.Buffer{} + bw := bitio.NewWriter(buf) + + for want := uint64(0); want < 1000; want++ { + // Write unary + if err := bits.WriteUnary(bw, want); err != nil { + t.Fatalf("unable to write unary; %v", err) + } + // Flush buffer + if err := bw.Close(); err != nil { + t.Fatalf("unable to close (flush) the bit buffer; %v", err) + } + + // Read written unary + r := bits.NewReader(buf) + got, err := r.ReadUnary() + if err != nil { + t.Fatalf("unable to read unary; %v", err) + } + + if want != got { + t.Fatalf("mismatch between written and read unary value; expected: %d, got: %d", want, got) + } + } +} diff --git a/internal/bits/zigzag.go b/internal/bits/zigzag.go index 16e474f..3d6ac40 100644 --- a/internal/bits/zigzag.go +++ b/internal/bits/zigzag.go @@ -1,6 +1,6 @@ package bits -// ZigZag decodes a ZigZag encoded integer and returns it. +// DecodeZigZag decodes a ZigZag encoded integer and returns it. // // Examples of ZigZag encoded values on the left and decoded values on the // right: @@ -14,6 +14,28 @@ package bits // 6 => 3 // // ref: https://developers.google.com/protocol-buffers/docs/encoding -func ZigZag(x uint32) int32 { +func DecodeZigZag(x uint32) int32 { return int32(x>>1) ^ -int32(x&1) } + +// EncodeZigZag encodes a given integer to ZigZag-encoding. +// +// Examples of integer input on the left and corresponding ZigZag encoded values +// on the right: +// +// 0 => 0 +// -1 => 1 +// 1 => 2 +// -2 => 3 +// 2 => 4 +// -3 => 5 +// 3 => 6 +// +// ref: https://developers.google.com/protocol-buffers/docs/encoding +func EncodeZigZag(x int32) uint32 { + if x < 0 { + x = -x + return uint32(x)<<1 - 1 + } + return uint32(x) << 1 +} diff --git a/internal/bits/zigzag_test.go b/internal/bits/zigzag_test.go new file mode 100644 index 0000000..2a8fdc5 --- /dev/null +++ b/internal/bits/zigzag_test.go @@ -0,0 +1,49 @@ +package bits + +import ( + "testing" +) + +func TestDecodeZigZag(t *testing.T) { + golden := []struct { + x uint32 + want int32 + }{ + {x: 0, want: 0}, + {x: 1, want: -1}, + {x: 2, want: 1}, + {x: 3, want: -2}, + {x: 4, want: 2}, + {x: 5, want: -3}, + {x: 6, want: 3}, + } + for _, g := range golden { + got := DecodeZigZag(g.x) + if g.want != got { + t.Errorf("result mismatch of DecodeZigZag(x=%d); expected %d, got %d", g.x, g.want, got) + continue + } + } +} + +func TestEncodeZigZag(t *testing.T) { + golden := []struct { + x int32 + want uint32 + }{ + {x: 0, want: 0}, + {x: -1, want: 1}, + {x: 1, want: 2}, + {x: -2, want: 3}, + {x: 2, want: 4}, + {x: -3, want: 5}, + {x: 3, want: 6}, + } + for _, g := range golden { + got := EncodeZigZag(g.x) + if g.want != got { + t.Errorf("result mismatch of EncodeZigZag(x=%d); expected %d, got %d", g.x, g.want, got) + continue + } + } +}