@@ -43,28 +43,23 @@ pub struct FilePartition {
4343}
4444
4545#[ derive( Debug , Clone ) ]
46- pub struct ParquetSourceDesc {
47- partition_files : Vec < PartitionedFile > ,
48- schema : SchemaRef ,
46+ pub struct SourceDescriptor {
47+ pub partition_files : Vec < PartitionedFile > ,
48+ pub schema : SchemaRef ,
4949}
5050
51- pub trait DataSource2 < R > : Send + Sync
52- where
53- R : ChunkReader + ' static ,
54- {
51+ pub trait DataSource2 < R : ChunkReader + ' static > : Send + Sync {
5552 fn list_partitions ( & self , max_concurrency : usize ) -> Result < Arc < FilePartition > > ;
5653
5754 fn schema ( & self ) -> Result < Arc < Schema > > ;
5855
5956 fn get_read_for_file ( & self , partitioned_file : PartitionedFile ) -> Result < R > ;
6057
6158 fn statistics ( & self ) -> & Statistics ;
59+ }
6260
63- fn new_chunk_reader ( file_path : & str ) -> Result < R > ;
64-
65- fn get_all_files ( root_path : & str ) -> Result < Vec < String > > ;
66-
67- fn get_source_desc ( root_path : & str ) -> Result < ParquetSourceDesc > {
61+ pub trait SourceDescBuilder < R : ChunkReader + ' static > {
62+ fn get_source_desc ( root_path : & str ) -> Result < SourceDescriptor > {
6863 let filenames = Self :: get_all_files ( root_path) ?;
6964 if filenames. is_empty ( ) {
7065 return Err ( DataFusionError :: Plan ( format ! (
@@ -90,22 +85,31 @@ where
9085 // schema merging and this is a limitation.
9186 // See https://issues.apache.org/jira/browse/ARROW-11017
9287 return Err ( DataFusionError :: Plan ( format ! (
93- "The Parquet file {} have different schema from the first file and DataFusion does \
88+ "The file {} have different schema from the first file and DataFusion does \
9489 not yet support schema merging",
9590 file_path
9691 ) ) ) ;
9792 }
9893 Ok ( pf)
9994 } ) . collect :: < Result < Vec < PartitionedFile > > > ( ) ;
10095
101- Ok ( ParquetSourceDesc {
96+ Ok ( SourceDescriptor {
10297 partition_files : partitioned_files?,
10398 schema : Arc :: new ( schemas. pop ( ) . unwrap ( ) ) ,
10499 } )
105100 }
106101
107- fn get_file_meta ( file_path : & str ) -> Result < PartitionedFile > {
108- let chunk_reader = Self :: new_chunk_reader ( file_path) ?;
102+ fn get_all_files ( root_path : & str ) -> Result < Vec < String > > ;
103+
104+ fn get_file_meta ( file_path : & str ) -> Result < PartitionedFile > ;
105+
106+ fn reader_for_file_meta ( file_path : & str ) -> Result < R > ;
107+ }
108+
109+ pub trait ParquetSourceDescBuilder : SourceDescBuilder {
110+
111+ fn get_file_meta ( file_path : & str ) {
112+ let chunk_reader = Self :: reader_for_file_meta ( file_path) ?;
109113 let file_reader = Arc :: new ( SerializedFileReader :: new ( chunk_reader) ?) ;
110114 let arrow_reader = ParquetFileArrowReader :: new ( file_reader) ;
111115 let file_path = file_path. to_string ( ) ;
0 commit comments