Skip to content

Commit 7785643

Browse files
committed
all but parquet and constellation working on stable
1 parent 001b75d commit 7785643

File tree

25 files changed

+867
-1049
lines changed

25 files changed

+867
-1049
lines changed

Cargo.toml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,10 @@ parquet = ["amadeus-parquet", "amadeus-derive/parquet"]
3030
postgres = ["amadeus-postgres", "amadeus-derive/postgres"]
3131
csv = ["amadeus-serde", "amadeus-derive/serde"]
3232
json = ["amadeus-serde", "amadeus-derive/serde"]
33-
doc = ["amadeus-core/doc"]
3433
nightly = ["amadeus-core/nightly"]
3534

3635
[package.metadata.docs.rs]
37-
features = ["doc", "constellation", "aws", "commoncrawl", "parquet", "postgres", "csv", "json"]
36+
features = ["nightly", "constellation", "aws", "commoncrawl", "parquet", "postgres", "csv", "json"]
3837

3938
[dependencies]
4039
amadeus-core = { version = "=0.3.1", path = "amadeus-core" }
@@ -53,7 +52,7 @@ futures = "0.3"
5352
num_cpus = "1.13"
5453
pin-project = "0.4"
5554
serde = { version = "1.0", features = ["derive"] }
56-
serde_closure = { version = "0.2", default-features = false }
55+
serde_closure = "0.3"
5756
serde_traitobject = { version = "0.2", optional = true }
5857
tokio = { version = "0.2", features = ["rt-threaded", "rt-util", "blocking"] }
5958

@@ -67,6 +66,9 @@ tokio = { version = "0.2", features = ["macros", "time"] }
6766
[patch.crates-io]
6867
vec-utils = { version = "*", git = "https://github.com/alecmocatta/vec-utils", branch = "stable" }
6968
streaming_algorithms = { version = "*", git = "https://github.com/alecmocatta/streaming_algorithms", branch = "stable" }
69+
serde_closure = { version = "*", git = "https://github.com/alecmocatta/serde_closure", branch = "nameable" }
70+
# serde_closure = { version = "*", path = "../serde_closure" }
71+
# serde_traitobject = { version = "*", path = "../serde_traitobject" }
7072

7173
[[example]]
7274
name = "cloudfront_logs"

amadeus-aws/Cargo.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ edition = "2018"
1818
azure-devops = { project = "alecmocatta/amadeus", pipeline = "tests", build = "26" }
1919
maintenance = { status = "actively-developed" }
2020

21+
[features]
22+
nightly = []
23+
2124
[dependencies]
2225
amadeus-core = { version = "=0.3.1", path = "../amadeus-core" }
2326
amadeus-types = { version = "=0.3.1", path = "../amadeus-types" }
@@ -31,7 +34,7 @@ once_cell = "1.0"
3134
rusoto_core = "0.44"
3235
rusoto_credential = "0.44"
3336
rusoto_s3 = "0.44"
34-
serde_closure = { version = "0.2", default-features = false }
37+
serde_closure = "0.3"
3538
serde = { version = "1.0", features = ["derive"] }
3639
tokio = "0.2"
3740
url = { version = "2.1", features = ["serde"] }

amadeus-aws/src/cloudfront.rs

Lines changed: 69 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22

33
use async_compression::futures::bufread::GzipDecoder;
44
use chrono::{NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
5-
use futures::{future, io::BufReader, AsyncBufReadExt, FutureExt, StreamExt, TryStreamExt};
5+
use futures::{future, io::BufReader, AsyncBufReadExt, FutureExt, Stream, StreamExt, TryStreamExt};
66
use http::{Method, StatusCode};
77
use rusoto_s3::{GetObjectRequest, Object, S3Client, S3};
88
use serde::{Deserialize, Serialize};
9-
use serde_closure::FnMut;
9+
use serde_closure::FnMutNamed;
1010
use std::{
1111
convert::identity, io::{self}, time::Duration
1212
};
@@ -52,20 +52,76 @@ impl Cloudfront {
5252
})
5353
}
5454
}
55+
56+
#[cfg(not(feature = "nightly"))]
57+
type Output = std::pin::Pin<Box<dyn Stream<Item = Result<CloudfrontRow, AwsError>> + Send>>;
58+
#[cfg(feature = "nightly")]
59+
type Output = impl Stream<Item = Result<CloudfrontRow, AwsError>> + Send;
60+
61+
FnMutNamed! {
62+
pub type Closure<> = |self, credentials: AwsCredentials, region: AwsRegion, bucket: String|key=> String| -> Output where {
63+
let (credentials, region, bucket) =
64+
(self.credentials.clone(), self.region.clone(), self.bucket.clone());
65+
#[allow(clippy::let_and_return)]
66+
let ret = async move {
67+
let client = S3Client::new_with(
68+
Ref(once_cell::sync::Lazy::force(&RUSOTO_DISPATCHER)),
69+
credentials,
70+
region,
71+
);
72+
let rows = retry(|| {
73+
client.get_object(GetObjectRequest {
74+
bucket: bucket.clone(),
75+
key: key.clone(),
76+
..GetObjectRequest::default()
77+
})
78+
})
79+
.await
80+
.map_err(AwsError::from)
81+
.map(|res| {
82+
let body = BufReader::new(TryStreamExt::into_async_read(res.body.unwrap()));
83+
let mut body = GzipDecoder::new(body); // Content-Encoding isn't set, so decode manually
84+
body.multiple_members(true);
85+
BufReader::new(body)
86+
.lines()
87+
.filter(|x: &Result<String, io::Error>| {
88+
future::ready(if let Ok(x) = x {
89+
x.chars().find(|x| !x.is_whitespace()) != Some('#')
90+
} else {
91+
true
92+
})
93+
})
94+
.then(|x: Result<String, io::Error>| async {
95+
if let Ok(x) = x {
96+
Ok(CloudfrontRow::from_line(&x))
97+
} else {
98+
Err(AwsError::from(x.err().unwrap()))
99+
}
100+
})
101+
});
102+
ResultExpandIter::new(rows)
103+
}
104+
.flatten_stream()
105+
.map(|x: Result<Result<CloudfrontRow, _>, _>| x.and_then(identity));
106+
#[cfg(not(feature = "nightly"))]
107+
let ret = ret.boxed();
108+
ret
109+
}
110+
}
111+
55112
impl Source for Cloudfront {
56113
type Item = CloudfrontRow;
57114
type Error = AwsError;
58115

59-
#[cfg(not(doc))]
60-
type ParStream =
61-
impl amadeus_core::par_stream::ParallelStream<Item = Result<Self::Item, Self::Error>>;
62-
#[cfg(doc)]
63-
type ParStream =
64-
DistParStream<amadeus_core::util::ImplDistributedStream<Result<Self::Item, Self::Error>>>;
65-
#[cfg(not(doc))]
116+
type ParStream = DistParStream<Self::DistStream>;
117+
#[cfg(not(feature = "nightly"))]
118+
#[allow(clippy::type_complexity)]
119+
type DistStream = amadeus_core::par_stream::FlatMap<
120+
amadeus_core::into_par_stream::IterDistStream<std::vec::IntoIter<String>>,
121+
Closure,
122+
>;
123+
#[cfg(feature = "nightly")]
66124
type DistStream = impl DistributedStream<Item = Result<Self::Item, Self::Error>>;
67-
#[cfg(doc)]
68-
type DistStream = amadeus_core::util::ImplDistributedStream<Result<Self::Item, Self::Error>>;
69125

70126
fn par_stream(self) -> Self::ParStream {
71127
DistParStream::new(self.dist_stream())
@@ -78,57 +134,9 @@ impl Source for Cloudfront {
78134
objects,
79135
credentials,
80136
} = self;
81-
let ret = objects
137+
objects
82138
.into_dist_stream()
83-
.flat_map(FnMut!(move |key: String| {
84-
let (credentials, region, bucket) =
85-
(credentials.clone(), region.clone(), bucket.clone());
86-
async move {
87-
let client = S3Client::new_with(
88-
Ref(once_cell::sync::Lazy::force(&RUSOTO_DISPATCHER)),
89-
credentials,
90-
region,
91-
);
92-
let rows = retry(|| {
93-
client.get_object(GetObjectRequest {
94-
bucket: bucket.clone(),
95-
key: key.clone(),
96-
..GetObjectRequest::default()
97-
})
98-
})
99-
.await
100-
.map_err(AwsError::from)
101-
.map(|res| {
102-
let body = BufReader::new(TryStreamExt::into_async_read(res.body.unwrap()));
103-
let mut body = GzipDecoder::new(body); // Content-Encoding isn't set, so decode manually
104-
body.multiple_members(true);
105-
BufReader::new(body)
106-
.lines()
107-
.filter(|x: &Result<String, io::Error>| {
108-
future::ready(if let Ok(x) = x {
109-
x.chars().find(|x| !x.is_whitespace()) != Some('#')
110-
} else {
111-
true
112-
})
113-
})
114-
.then(|x: Result<String, io::Error>| async {
115-
if let Ok(x) = x {
116-
Ok(CloudfrontRow::from_line(&x))
117-
} else {
118-
Err(AwsError::from(x.err().unwrap()))
119-
}
120-
})
121-
});
122-
ResultExpandIter::new(rows)
123-
}
124-
.flatten_stream()
125-
}))
126-
.map(FnMut!(
127-
|x: Result<Result<CloudfrontRow, _>, _>| x.and_then(self::identity)
128-
));
129-
#[cfg(doc)]
130-
let ret = amadeus_core::util::ImplDistributedStream::new(ret);
131-
ret
139+
.flat_map(Closure::new(credentials, region, bucket))
132140
}
133141
}
134142

amadeus-aws/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
//! This is a support crate of [Amadeus](https://github.com/constellation-rs/amadeus) and is not intended to be used directly. These types are re-exposed in [`amadeus::source`](https://docs.rs/amadeus/0.3/amadeus/source/index.html).
88
99
#![doc(html_root_url = "https://docs.rs/amadeus-aws/0.3.1")]
10-
#![feature(type_alias_impl_trait)]
10+
#![cfg_attr(feature = "nightly", feature(type_alias_impl_trait))]
1111
#![warn(
1212
// missing_copy_implementations,
1313
// missing_debug_implementations,

amadeus-commoncrawl/Cargo.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ edition = "2018"
1818
azure-devops = { project = "alecmocatta/amadeus", pipeline = "tests", build = "26" }
1919
maintenance = { status = "actively-developed" }
2020

21+
[features]
22+
nightly = []
23+
2124
[dependencies]
2225
amadeus-core = { version = "=0.3.1", path = "../amadeus-core" }
2326
amadeus-types = { version = "=0.3.1", path = "../amadeus-types" }
@@ -28,7 +31,7 @@ pin-project = "0.4"
2831
reqwest = "0.10"
2932
reqwest_resume = "0.3"
3033
serde = { version = "1.0", features = ["derive"] }
31-
serde_closure = { version = "0.2", default-features = false }
34+
serde_closure = "0.3"
3235
url = { version = "2.1", features = ["serde"] }
3336

3437
# dependency of reqwest/native-tls; ensure it's vendored to simplify cross-compilation

amadeus-commoncrawl/src/lib.rs

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
//! This is a support crate of [Amadeus](https://github.com/constellation-rs/amadeus) and is not intended to be used directly. These types are re-exposed in [`amadeus::source`](https://docs.rs/amadeus/0.3/amadeus/source/index.html).
88
99
#![doc(html_root_url = "https://docs.rs/amadeus-commoncrawl/0.3.1")]
10-
#![feature(type_alias_impl_trait)]
10+
#![cfg_attr(feature = "nightly", feature(type_alias_impl_trait))]
1111
#![warn(
1212
// missing_copy_implementations,
1313
// missing_debug_implementations,
@@ -30,9 +30,9 @@ mod commoncrawl;
3030
mod parser;
3131

3232
use async_compression::futures::bufread::GzipDecoder; // TODO: use stream or https://github.com/alexcrichton/flate2-rs/pull/214
33-
use futures::{io::BufReader, AsyncBufReadExt, FutureExt, StreamExt, TryStreamExt};
33+
use futures::{io::BufReader, AsyncBufReadExt, FutureExt, Stream, StreamExt, TryStreamExt};
3434
use reqwest_resume::ClientExt;
35-
use serde_closure::FnMut;
35+
use serde_closure::FnMutNamed;
3636
use std::{io, time};
3737

3838
use amadeus_core::{
@@ -80,30 +80,15 @@ impl CommonCrawl {
8080
}
8181
}
8282

83-
impl Source for CommonCrawl {
84-
type Item = Webpage<'static>;
85-
type Error = io::Error;
83+
#[cfg(not(feature = "nightly"))]
84+
type Output = std::pin::Pin<Box<dyn Stream<Item = Result<Webpage<'static>, io::Error>> + Send>>;
85+
#[cfg(feature = "nightly")]
86+
type Output = impl Stream<Item = Result<Webpage<'static>, io::Error>> + Send;
8687

87-
#[cfg(not(doc))]
88-
type ParStream =
89-
impl amadeus_core::par_stream::ParallelStream<Item = Result<Self::Item, Self::Error>>;
90-
#[cfg(doc)]
91-
type ParStream =
92-
DistParStream<amadeus_core::util::ImplDistributedStream<Result<Self::Item, Self::Error>>>;
93-
#[cfg(not(doc))]
94-
type DistStream = impl DistributedStream<Item = Result<Self::Item, Self::Error>>;
95-
#[cfg(doc)]
96-
type DistStream = amadeus_core::util::ImplDistributedStream<Result<Self::Item, Self::Error>>;
97-
98-
fn par_stream(self) -> Self::ParStream {
99-
DistParStream::new(self.dist_stream())
100-
}
101-
#[allow(clippy::let_and_return)]
102-
fn dist_stream(self) -> Self::DistStream {
103-
let ret = self
104-
.urls
105-
.into_dist_stream()
106-
.flat_map(FnMut!(|url: String| async move {
88+
FnMutNamed! {
89+
pub type Closure<> = |self|url=> String| -> Output where {
90+
#[allow(clippy::let_and_return)]
91+
let ret = async move {
10792
let body = reqwest_resume::get(url.parse().unwrap()).await.unwrap();
10893
let body = body
10994
.bytes_stream()
@@ -113,9 +98,32 @@ impl Source for CommonCrawl {
11398
body.multiple_members(true);
11499
WarcParser::new(body)
115100
}
116-
.flatten_stream()));
117-
#[cfg(doc)]
118-
let ret = amadeus_core::util::ImplDistributedStream::new(ret);
101+
.flatten_stream();
102+
#[cfg(not(feature = "nightly"))]
103+
let ret = ret.boxed();
119104
ret
120105
}
121106
}
107+
108+
impl Source for CommonCrawl {
109+
type Item = Webpage<'static>;
110+
type Error = io::Error;
111+
112+
type ParStream = DistParStream<Self::DistStream>;
113+
#[cfg(not(feature = "nightly"))]
114+
#[allow(clippy::type_complexity)]
115+
type DistStream = amadeus_core::par_stream::FlatMap<
116+
amadeus_core::into_par_stream::IterDistStream<std::vec::IntoIter<String>>,
117+
Closure,
118+
>;
119+
#[cfg(feature = "nightly")]
120+
type DistStream = impl DistributedStream<Item = Result<Self::Item, Self::Error>>;
121+
122+
fn par_stream(self) -> Self::ParStream {
123+
DistParStream::new(self.dist_stream())
124+
}
125+
#[allow(clippy::let_and_return)]
126+
fn dist_stream(self) -> Self::DistStream {
127+
self.urls.into_dist_stream().flat_map(Closure::new())
128+
}
129+
}

amadeus-core/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ azure-devops = { project = "alecmocatta/amadeus", pipeline = "tests", build = "2
1919
maintenance = { status = "actively-developed" }
2020

2121
[features]
22-
doc = ["serde_closure/nightly"] # this cleans up Fn*() in docs
22+
# uses Fn*() sugar (better docs), and SIMD for streaming_algorithms
2323
nightly = ["streaming_algorithms/nightly"]
2424

2525
[dependencies]
@@ -34,7 +34,7 @@ pin-project = "0.4"
3434
rand = "0.7"
3535
replace_with = "0.1"
3636
serde = { version = "1.0", features = ["derive"] }
37-
serde_closure = { version = "0.2", default-features = false }
37+
serde_closure = "0.3"
3838
streaming_algorithms = "0.2"
3939
sum = { version = "0.1", features = ["futures", "serde"] }
4040
walkdir = "2.2"

amadeus-core/src/file.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ pub trait Partition: Clone + fmt::Debug + ProcessSend + 'static {
209209
}
210210
#[allow(clippy::len_without_is_empty)]
211211
#[async_trait]
212-
pub trait Page {
212+
pub trait Page: Send {
213213
type Error: Error + Clone + PartialEq + Into<io::Error> + ProcessSend + 'static;
214214

215215
fn len(&self) -> u64;
@@ -228,7 +228,7 @@ pub trait Page {
228228
#[async_trait]
229229
impl<T: ?Sized> Page for &T
230230
where
231-
T: Page,
231+
T: Page + Sync,
232232
{
233233
type Error = T::Error;
234234

@@ -248,7 +248,7 @@ where
248248
#[async_trait]
249249
impl<T: ?Sized> Page for Arc<T>
250250
where
251-
T: Page,
251+
T: Page + Sync,
252252
{
253253
type Error = T::Error;
254254

amadeus-core/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//! This is a support crate of [Amadeus](https://github.com/constellation-rs/amadeus) and is not intended to be used directly. All functionality is re-exposed in [`amadeus`](https://docs.rs/amadeus/0.3/amadeus/).
88
99
#![doc(html_root_url = "https://docs.rs/amadeus-core/0.3.1")]
10+
#![cfg_attr(feature = "nightly", feature(unboxed_closures))]
1011
#![recursion_limit = "25600"]
1112
#![warn(
1213
// missing_copy_implementations,

0 commit comments

Comments
 (0)