Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
763 changes: 597 additions & 166 deletions Cargo.lock

Large diffs are not rendered by default.

44 changes: 29 additions & 15 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,35 +16,40 @@ exclude = ['/dist/', 'node_modules/', '/test/', '/test-assets/',]
crate-type = ["cdylib", "rlib"]

[features]
default = ["test-server"]
test-server = ["futures", "hyper", "tokio"]
default = ["build", "test-server"]
build = ["srtparse", "scraper", "frontmatter", "markdown", "atty", "mime", "reqwest", "indicatif"]
test-server = ["build", "futures", "hyper", "tokio"]

[dependencies]
toml = "0.5"
serde = { version = "1.0", features = ["derive"] }
bincode = "1.2.1"
rmp-serde = "0.14.3"
bincode = "1.2.1" # V2 index serialization
rmp-serde = "0.14.3" # V3 index serialization
serde_json = "1.0"
console_error_panic_hook = "0.1.6"
num-format = "0.4.0"
rust-stemmers = "1.2.0"
srtparse = "0.2.0"
htmlescape = "0.3.1"
scraper = "0.12.0"
frontmatter = "0.4.0"
markdown = "0.3.0"
once_cell = "1"
atty = "0.2"
once_cell = "1.5.2"
indicatif = {version = "0.15.0", optional = true}
# colored = {version = "2.0.0", optional = true}
srtparse = {version = "0.2.0", optional = true}
scraper = {version = "0.12.0", optional = true}
frontmatter = {version = "0.4.0", optional = true}
markdown = {version = "0.3.0", optional = true}
atty = {version = "0.2", optional = true}
mime = {version = "0.3.16", optional = true}
reqwest = { version = "0.11", features = ["blocking", "json"], optional = true} # for loading document contents from URLs
futures = {version = "0.3", optional = true}
hyper = {version = "0.13", optional = true}
tokio = { version = "0.2", features = ["full"], optional = true }
hyper = {version = "0.14", features = ["server"], optional = true}
tokio = { version = "1.2", features = ["full"], optional = true }
wasm-bindgen = { version = "0.2", features = ["serde-serialize"] }

[package.metadata.wasm-pack.profile.release]

# wasm-opt is out of date in wasm-bindgen, so we have to manually
# enable the "mutable globals" feature so wasm-opt doesn't reject
# the wasm blob that gets generated.
# See: https://github.com/rustwasm/wasm-pack/issues/886#issuecomment-667669802
[package.metadata.wasm-pack.profile.release]
wasm-opt = ["-Oz", "--enable-mutable-globals"]

[dev-dependencies]
Expand All @@ -55,4 +60,13 @@ name = "basic"
harness = false

[profile.release]
lto = true

# This config option gives LLVM more opportunities to inline and prune functions,
# which makes the WASM binary smaller. Adding this was inspired by
# https://rustwasm.github.io/book/reference/code-size.html
#
# @TODO 2021-02-24: Starting with commit 7800aedfbb6b0e97a075cf6924077db2ddff7939,
# keeping this option in the full release build configuration means that
# `cargo build --release` mysteriously fails on the benchmark machine, so
# I commented it out.
# lto = true
4 changes: 3 additions & 1 deletion scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,6 @@ if [ ! -f "./test/federalist/federalist-1.txt" ]; then
./scripts/download_test_corpora.sh
fi

yarn build:test-index:federalist
./target/release/stork --build test/federalist-config/federalist.toml
mkdir -p dist
cp test/*.st dist
2 changes: 1 addition & 1 deletion src/bin/stork/display_timings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ macro_rules! display_timings {
($( $t: expr),*) => {
vec![
$(
TimingStatistic {
$crate::display_timings::TimingStatistic {
duration: $t.0,
description: $t.1.to_string()
},
Expand Down
32 changes: 18 additions & 14 deletions src/bin/stork/main.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,19 @@
extern crate stork_search as stork;

mod argparse;
use argparse::Argparse;
use std::env;
use std::fs::File;
use std::io::{BufReader, Read};
use std::time::Instant;

mod argparse;
mod display_timings;
mod test_server;
use atty::Stream;
use test_server::serve;

mod display_timings;
use display_timings::*;
use num_format::{Locale, ToFormattedString};

use std::env;
use std::io::{BufReader, Read};
use std::time::Instant;
use std::{fs::File, io};
use stork::config::Config;
use stork::LatestVersion::structs::Index;

use num_format::{Locale, ToFormattedString};

pub type ExitCode = i32;
pub const EXIT_SUCCESS: ExitCode = 0;
pub const EXIT_FAILURE: ExitCode = 1;
Expand Down Expand Up @@ -51,15 +46,24 @@ USAGE:
}

fn main() {
let mut a = Argparse::new();
let mut a = argparse::Argparse::new();
a.register_range("build", build_handler, 0..2);
a.register("test", test_handler, 1);
a.register("search", search_handler, 2);
a.register_help(&help_text());
std::process::exit(a.exec(env::args().collect()));
}

#[cfg(not(feature = "build"))]
pub fn build_index(_config: Option<&String>) -> (Config, Index) {
println!("Stork was not compiled with support for building indexes. Rebuild the crate with default features to enable the test server.\nIf you don't expect to see this, file a bug: https://jil.im/storkbug\n");
panic!()
}

#[cfg(feature = "build")]
pub fn build_index(optional_config_path: Option<&String>) -> (Config, Index) {
use atty::Stream;
use std::io;
// Potential refactor: this method could return a result instead of
// std::process::exiting when there's a failure.

Expand Down Expand Up @@ -131,7 +135,7 @@ fn build_handler(args: &[String]) {

fn test_handler(args: &[String]) {
let (_, index) = build_index(args.get(2));
let _r = serve(index);
let _r = test_server::serve(index);
}

fn search_handler(args: &[String]) {
Expand Down
5 changes: 2 additions & 3 deletions src/bin/stork/test_server/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
extern crate stork_search as stork;

use stork::LatestVersion::structs::Index;

#[cfg(not(feature = "test-server"))]
Expand All @@ -11,11 +10,11 @@ pub fn serve(_index: Index) -> Result<(), Box<dyn std::error::Error>> {
#[cfg(feature = "test-server")]
pub fn serve(index: Index) -> Result<(), Box<dyn std::error::Error>> {
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Request, Response, Server, StatusCode};
use hyper::{server::Server, Body, Request, Response, StatusCode};
use std::convert::Infallible;
use tokio::runtime::Runtime;

let mut rt = Runtime::new()?;
let rt = Runtime::new()?;
let mut index_binary: Vec<u8> = Vec::new();
index.write_to_buffer(&mut index_binary);

Expand Down
148 changes: 65 additions & 83 deletions src/config/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use super::{FrontmatterConfig, StemmingConfig};
use core::fmt;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::Path;

type Fields = HashMap<String, String>;

Expand Down Expand Up @@ -30,30 +29,6 @@ pub struct File {
pub fields: Fields,
}

impl File {
pub fn computed_filetype(&self) -> Option<Filetype> {
if let Some(user_specified_filetype) = self.filetype.clone() {
return Some(user_specified_filetype);
}

if let DataSource::FilePath(path_string) = &self.source {
let path = Path::new(&path_string);
let ext_str = path.extension()?.to_str()?;
match String::from(ext_str).to_ascii_lowercase().as_ref() {
"html" => Some(Filetype::HTML),
"htm" => Some(Filetype::HTML),
"srt" => Some(Filetype::SRTSubtitle),
"txt" => Some(Filetype::PlainText),
"md" => Some(Filetype::Markdown),
"mdown" => Some(Filetype::Markdown),
"markdown" => Some(Filetype::Markdown),
_ => None,
}
} else {
None
}
}
}
impl fmt::Display for File {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
Expand Down Expand Up @@ -96,89 +71,96 @@ pub enum Filetype {

#[cfg(test)]
mod tests {
use super::*;
// use super::*;
#[test]
#[ignore = "computed_filetype is no more"]
fn compute_from_explicit_filetype() {
assert_eq!(
File {
filetype: Some(Filetype::PlainText),
..Default::default()
}
.computed_filetype()
.unwrap(),
Filetype::PlainText
)
// assert_eq!(
// File {
// filetype: Some(Filetype::PlainText),
// ..Default::default()
// }
// .computed_filetype()
// .unwrap(),
// Filetype::PlainText
// )
}

#[test]
#[ignore = "computed_filetype is no more"]
fn compute_from_implicit_filetype_plaintext() {
assert_eq!(
File {
source: DataSource::FilePath("blah.txt".to_string()),
..Default::default()
}
.computed_filetype()
.unwrap(),
Filetype::PlainText
)
// assert_eq!(
// File {
// source: DataSource::FilePath("blah.txt".to_string()),
// ..Default::default()
// }
// .computed_filetype()
// .unwrap(),
// Filetype::PlainText
// )
}

#[test]
#[ignore = "computed_filetype is no more"]
fn compute_from_implicit_filetype_html() {
assert_eq!(
File {
source: DataSource::FilePath("blah.html".to_string()),
..Default::default()
}
.computed_filetype()
.unwrap(),
Filetype::HTML
)
// assert_eq!(
// File {
// source: DataSource::FilePath("blah.html".to_string()),
// ..Default::default()
// }
// .computed_filetype()
// .unwrap(),
// Filetype::HTML
// )
}

#[test]
#[ignore = "computed_filetype is no more"]
fn compute_from_implicit_filetype_srt() {
assert_eq!(
File {
source: DataSource::FilePath("blah.srt".to_string()),
..Default::default()
}
.computed_filetype()
.unwrap(),
Filetype::SRTSubtitle
)
// assert_eq!(
// File {
// source: DataSource::FilePath("blah.srt".to_string()),
// ..Default::default()
// }
// .computed_filetype()
// .unwrap(),
// Filetype::SRTSubtitle
// )
}

#[test]
#[ignore = "computed_filetype is no more"]
fn compute_from_implicit_filetype_html_allcaps() {
assert_eq!(
File {
source: DataSource::FilePath("MYFILE.HTM".to_string()),
..Default::default()
}
.computed_filetype()
.unwrap(),
Filetype::HTML
)
// assert_eq!(
// File {
// source: DataSource::FilePath("MYFILE.HTM".to_string()),
// ..Default::default()
// }
// .computed_filetype()
// .unwrap(),
// Filetype::HTML
// )
}

#[test]
#[ignore = "computed_filetype is no more"]
fn compute_from_implicit_filetype_error() {
assert!(File {
source: DataSource::FilePath("myfile.derp".to_string()),
..Default::default()
}
.computed_filetype()
.is_none())
// assert!(File {
// source: DataSource::FilePath("myfile.derp".to_string()),
// ..Default::default()
// }
// .computed_filetype()
// .is_none())
}

#[test]
#[ignore = "computed_filetype is no more"]
fn compute_from_no_filetype_error() {
assert!(File {
source: DataSource::Contents("A long time ago...".to_string()),
..Default::default()
}
.computed_filetype()
.is_none())
// assert!(File {
// source: DataSource::Contents("A long time ago...".to_string()),
// ..Default::default()
// }
// .computed_filetype()
// .is_none())
}
}
Loading