25
25
26
26
init_data_reader /2 ,
27
27
init_offset_reader /2 ,
28
+ read_header /1 ,
28
29
read_chunk /1 ,
29
30
read_chunk_parsed /1 ,
30
31
committed_offset /1 ,
131
132
-type record () :: {offset (), iodata ()}.
132
133
-type offset_spec () :: osiris :offset_spec ().
133
134
-type retention_spec () :: osiris :retention_spec ().
135
+ -type header_map () :: #{chunk_id => offset (),
136
+ epoch => epoch (),
137
+ type => chunk_type (),
138
+ crc => integer (),
139
+ num_records => non_neg_integer (),
140
+ num_entries => non_neg_integer (),
141
+ timestamp => osiris :milliseconds (),
142
+ data_size => non_neg_integer (),
143
+ trailer_size => non_neg_integer (),
144
+ header_data => binary ()}.
145
+
134
146
135
147
% % holds static or rarely changing fields
136
148
-record (cfg , {directory :: file :filename (),
158
170
% % the current offset tracking state
159
171
tracking = #{} :: #{tracking_id () => offset ()},
160
172
writers = #{} :: #{osiris :writer_id () =>
161
- {osiris :milliseconds (), non_neg_integer ()}}
173
+ {offset (), osiris :milliseconds (),
174
+ non_neg_integer ()}}
162
175
}).
163
176
164
177
-record (? MODULE , {cfg :: # cfg {},
@@ -354,7 +367,7 @@ write_wrt_snapshot(Writers,
354
367
#? MODULE {cfg = # cfg {},
355
368
mode = # write {} = W0 } = State0 ) ->
356
369
WData = maps :fold (
357
- fun (W , {T , S }, Acc ) ->
370
+ fun (W , {_O , T , S }, Acc ) ->
358
371
[<<(byte_size (W )):8 /unsigned ,
359
372
W /binary ,
360
373
T :64 /unsigned ,
@@ -764,10 +777,35 @@ tracking(#?MODULE{mode = #write{tracking = Tracking}}) ->
764
777
Tracking .
765
778
766
779
-spec writers (state ()) ->
767
- #{osiris :writer_id () => {osiris :milliseconds (), non_neg_integer ()}}.
780
+ #{osiris :writer_id () => {offset (), osiris :milliseconds (), non_neg_integer ()}}.
768
781
writers (#? MODULE {mode = # write {writers = Writers }}) ->
769
782
Writers .
770
783
784
+ -spec read_header (state ()) ->
785
+ {ok , header_map (), state ()} |
786
+ {end_of_stream , state ()} |
787
+ {error , {invalid_chunk_header , term ()}}.
788
+ read_header (#? MODULE {cfg = # cfg {},
789
+ mode = # read {} = Read ,
790
+ fd = Fd } = State ) ->
791
+ % % reads the next chunk of entries, parsed
792
+ % % NB: this may return records before the requested index,
793
+ % % that is fine - the reading process can do the appropriate filtering
794
+ case read_header0 (State ) of
795
+ {ok , #{num_records := NumRecords ,
796
+ data_size := DataSize ,
797
+ trailer_size := TrailerSize } = Header } ->
798
+ % % skip data portion
799
+ {ok , _ } = file :position (Fd , {cur , DataSize + TrailerSize }),
800
+ {ok , Header ,
801
+ State #? MODULE {mode = incr_next_offset (NumRecords , Read )}};
802
+ {end_of_stream , _ } = EOF ->
803
+ EOF ;
804
+ {error , _ } = Err ->
805
+ Err
806
+ end .
807
+
808
+
771
809
-spec read_chunk (state ()) ->
772
810
{ok , {chunk_type (),
773
811
offset (),
@@ -778,66 +816,30 @@ writers(#?MODULE{mode = #write{writers = Writers}}) ->
778
816
}, state ()} |
779
817
{end_of_stream , state ()} |
780
818
{error , {invalid_chunk_header , term ()}}.
781
- read_chunk (#? MODULE {cfg = # cfg {directory = Dir },
819
+ read_chunk (#? MODULE {cfg = # cfg {},
782
820
mode = # read {last_offset = _Last ,
783
821
next_offset = Offs } = Read ,
784
- current_file = CurFile ,
785
822
fd = Fd } = State ) ->
786
823
% % reads the next chunk of entries, parsed
787
824
% % NB: this may return records before the requested index,
788
825
% % that is fine - the reading process can do the appropriate filtering
789
- case can_read_next_offset (Read ) of
790
- true ->
791
- case file :read (Fd , ? HEADER_SIZE_B ) of
792
- {ok , <<? MAGIC :4 /unsigned ,
793
- ? VERSION :4 /unsigned ,
794
- ChType :8 /unsigned ,
795
- _NumEntries :16 /unsigned ,
796
- NumRecords :32 /unsigned ,
797
- _Timestamp :64 /signed ,
798
- Epoch :64 /unsigned ,
799
- Offs :64 /unsigned ,
800
- Crc :32 /integer ,
801
- DataSize :32 /unsigned ,
802
- TrailerSize :32 /unsigned >> = HeaderData } ->
803
- {ok , BlobData } = file :read (Fd , DataSize ),
804
- % % position after trailer
805
- % % TODO: should we return trailer as well?
806
- {ok , TrailerData } = file :read (Fd , TrailerSize ),
807
- validate_crc (Offs , Crc , BlobData ),
808
- % % tracking data
809
- {ok , {ChType , Offs , Epoch , HeaderData , BlobData , TrailerData },
810
- State #? MODULE {mode = incr_next_offset (NumRecords , Read )}};
811
- {ok , _ } ->
812
- % % set the position back for the next read
813
- {ok , _ } = file :position (Fd , {cur , - ? HEADER_SIZE_B }),
814
- {end_of_stream , State };
815
- eof ->
816
- % % open next segment file and start there if it exists
817
- SegFile = make_file_name (Offs , " segment" ),
818
- case SegFile == CurFile of
819
- true ->
820
- % % the new filename is the same as the old one
821
- % % this should only really happen for an empty
822
- % % log but would cause an infinite loop if it does
823
- {end_of_stream , State };
824
- false ->
825
- case file :open (filename :join (Dir , SegFile ),
826
- [raw , binary , read ]) of
827
- {ok , Fd2 } ->
828
- ok = file :close (Fd ),
829
- {ok , _ } = file :position (Fd2 , ? LOG_HEADER_SIZE ),
830
- read_chunk (State #? MODULE {current_file = SegFile ,
831
- fd = Fd2 });
832
- {error , enoent } ->
833
- {end_of_stream , State }
834
- end
835
- end ;
836
- Invalid ->
837
- {error , {invalid_chunk_header , Invalid }}
838
- end ;
839
- false ->
840
- {end_of_stream , State }
826
+ case read_header0 (State ) of
827
+ {ok , #{type := ChType ,
828
+ chunk_id := ChId ,
829
+ epoch := Epoch ,
830
+ crc := Crc ,
831
+ num_records := NumRecords ,
832
+ header_data := HeaderData ,
833
+ data_size := DataSize ,
834
+ trailer_size := TrailerSize }} ->
835
+ {ok , BlobData } = file :read (Fd , DataSize ),
836
+ % % position after trailer
837
+ {ok , TrailerData } = file :read (Fd , TrailerSize ),
838
+ validate_crc (Offs , Crc , BlobData ),
839
+ {ok , {ChType , ChId , Epoch , HeaderData , BlobData , TrailerData },
840
+ State #? MODULE {mode = incr_next_offset (NumRecords , Read )}};
841
+ Other ->
842
+ Other
841
843
end .
842
844
843
845
-spec read_chunk_parsed (state ()) ->
@@ -1332,7 +1334,7 @@ write_chunk(Chunk, NewWriters, Timestamp, Epoch, NumRecords,
1332
1334
counters :add (CntRef , ? C_CHUNKS , 1 ),
1333
1335
Writers = maps :fold (
1334
1336
fun (K , V , Acc ) ->
1335
- maps :put (K , {Timestamp , V }, Acc )
1337
+ maps :put (K , {Next , Timestamp , V }, Acc )
1336
1338
end , Writers0 , NewWriters ),
1337
1339
case file :position (Fd , cur ) of
1338
1340
{ok , After } when After >= MaxSize ->
@@ -1562,7 +1564,7 @@ recover_tracking(Fd, Trk, Wrt) ->
1562
1564
_NumRecords :32 /unsigned ,
1563
1565
Timestamp :64 /signed ,
1564
1566
_Epoch :64 /unsigned ,
1565
- _Next :64 /unsigned ,
1567
+ ChunkId :64 /unsigned ,
1566
1568
_Crc :32 /integer ,
1567
1569
Size :32 /unsigned ,
1568
1570
TSize :32 /unsigned >>} ->
@@ -1582,11 +1584,11 @@ recover_tracking(Fd, Trk, Wrt) ->
1582
1584
? CHNK_WRT_SNAPSHOT ->
1583
1585
{ok , <<0 :1 , S :31 , Data :S /binary >>} = file :read (Fd , Size ),
1584
1586
{ok , _ } = file :read (Fd , TSize ),
1585
- recover_tracking (Fd , Trk , parse_writers_snapshot (Data , #{}));
1587
+ recover_tracking (Fd , Trk , parse_writers_snapshot (Data , ChunkId , #{}));
1586
1588
? CHNK_USER ->
1587
1589
{ok , _ } = file :position (Fd , {cur , Size }),
1588
1590
{ok , TData } = file :read (Fd , TSize ),
1589
- recover_tracking (Fd , Trk , parse_writers (TData , Timestamp , Wrt ))
1591
+ recover_tracking (Fd , Trk , parse_writers (TData , ChunkId , Timestamp , Wrt ))
1590
1592
end ;
1591
1593
eof ->
1592
1594
file :close (Fd ),
@@ -1601,41 +1603,104 @@ parse_tracking(<<Size:8/unsigned,
1601
1603
Rem /binary >>, Acc ) ->
1602
1604
parse_tracking (Rem , Acc #{Id => Offs }).
1603
1605
1604
- parse_writers (<<>>, _ , Acc ) ->
1606
+ parse_writers (<<>>, _ , _ , Acc ) ->
1605
1607
Acc ;
1606
1608
parse_writers (<<Size :8 /unsigned ,
1607
1609
Id :Size /binary ,
1608
1610
Seq :64 /unsigned ,
1609
- Rem /binary >>, Ts , Acc ) ->
1610
- parse_writers (Rem , Ts , Acc #{Id => {Ts , Seq }}).
1611
+ Rem /binary >>, ChunkId , Ts , Acc ) ->
1612
+ parse_writers (Rem , ChunkId , Ts , Acc #{Id => {ChunkId , Ts , Seq }}).
1611
1613
1612
- parse_writers_snapshot (<<>>, Acc ) ->
1614
+ parse_writers_snapshot (<<>>, _ChId , Acc ) ->
1613
1615
Acc ;
1614
1616
parse_writers_snapshot (<<Size :8 /unsigned ,
1615
1617
Id :Size /binary ,
1616
1618
Ts :64 /unsigned ,
1617
1619
Seq :64 /unsigned ,
1618
- Rem /binary >>, Acc ) ->
1619
- parse_writers_snapshot (Rem , Acc #{Id => {Ts , Seq }}).
1620
+ Rem /binary >>, ChunkId , Acc ) ->
1621
+ parse_writers_snapshot (Rem , ChunkId , Acc #{Id => {ChunkId , Ts , Seq }}).
1620
1622
1621
1623
trim_writers (Max , Writers )
1622
1624
when map_size (Writers ) =< Max ->
1623
1625
Writers ;
1624
1626
trim_writers (Max , Writers ) ->
1625
1627
% % remove oldest
1626
1628
{ToRemove , _ } = maps :fold (
1627
- fun (K , {Ts , _ }, {_ , PrevTs } = Prev ) ->
1629
+ fun (K , {_ChId , Ts , _ }, {_ , PrevTs } = Prev ) ->
1628
1630
case Ts < PrevTs of
1629
1631
true ->
1630
1632
{K , Ts };
1631
1633
false ->
1632
1634
Prev
1633
1635
end ;
1634
- (K , {Ts , _ }, undefined ) ->
1636
+ (K , {_ChId , Ts , _ }, undefined ) ->
1635
1637
{K , Ts }
1636
1638
end , undefined , Writers ),
1637
1639
trim_writers (Max , maps :remove (ToRemove , Writers )).
1638
1640
1641
+ read_header0 (#? MODULE {cfg = # cfg {directory = Dir },
1642
+ mode = # read {next_offset = NextChId } = Read ,
1643
+ current_file = CurFile ,
1644
+ fd = Fd } = State ) ->
1645
+ % % reads the next header if permitted
1646
+ case can_read_next_offset (Read ) of
1647
+ true ->
1648
+ case file :read (Fd , ? HEADER_SIZE_B ) of
1649
+ {ok , <<? MAGIC :4 /unsigned ,
1650
+ ? VERSION :4 /unsigned ,
1651
+ ChType :8 /unsigned ,
1652
+ NumEntries :16 /unsigned ,
1653
+ NumRecords :32 /unsigned ,
1654
+ Timestamp :64 /signed ,
1655
+ Epoch :64 /unsigned ,
1656
+ NextChId :64 /unsigned ,
1657
+ Crc :32 /integer ,
1658
+ DataSize :32 /unsigned ,
1659
+ TrailerSize :32 /unsigned >> = HeaderData } ->
1660
+ {ok , #{chunk_id => NextChId ,
1661
+ epoch => Epoch ,
1662
+ type => ChType ,
1663
+ crc => Crc ,
1664
+ num_records => NumRecords ,
1665
+ num_entries => NumEntries ,
1666
+ timestamp => Timestamp ,
1667
+ data_size => DataSize ,
1668
+ trailer_size => TrailerSize ,
1669
+ header_data => HeaderData }};
1670
+ {ok , _ } ->
1671
+ % % set the position back for the next read
1672
+ % % TODO: should it be an exception if the next chunk is not
1673
+ % % the expected next chunk id??
1674
+ {ok , _ } = file :position (Fd , {cur , - ? HEADER_SIZE_B }),
1675
+ {end_of_stream , State };
1676
+ eof ->
1677
+ % % open next segment file and start there if it exists
1678
+ SegFile = make_file_name (NextChId , " segment" ),
1679
+ case SegFile == CurFile of
1680
+ true ->
1681
+ % % the new filename is the same as the old one
1682
+ % % this should only really happen for an empty
1683
+ % % log but would cause an infinite loop if it does
1684
+ {end_of_stream , State };
1685
+ false ->
1686
+ case file :open (filename :join (Dir , SegFile ),
1687
+ [raw , binary , read ]) of
1688
+ {ok , Fd2 } ->
1689
+ ok = file :close (Fd ),
1690
+ {ok , _ } = file :position (Fd2 , ? LOG_HEADER_SIZE ),
1691
+ read_chunk (State #? MODULE {current_file = SegFile ,
1692
+ fd = Fd2 });
1693
+ {error , enoent } ->
1694
+ {end_of_stream , State }
1695
+ end
1696
+ end ;
1697
+ Invalid ->
1698
+ {error , {invalid_chunk_header , Invalid }}
1699
+ end ;
1700
+ false ->
1701
+ {end_of_stream , State }
1702
+ end .
1703
+
1639
1704
-ifdef (TEST ).
1640
1705
-include_lib (" eunit/include/eunit.hrl" ).
1641
1706
0 commit comments