Skip to content

Allow overriding module name for uv build backend #11884

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions crates/uv-build-backend/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use thiserror::Error;
use tracing::debug;
use uv_fs::Simplified;
use uv_globfilter::PortableGlobError;
use uv_pypi_types::IdentifierParseError;

#[derive(Debug, Error)]
pub enum Error {
Expand All @@ -24,6 +25,8 @@ pub enum Error {
Toml(#[from] toml::de::Error),
#[error("Invalid pyproject.toml")]
Validation(#[from] ValidationError),
#[error(transparent)]
Identifier(#[from] IdentifierParseError),
#[error("Unsupported glob expression in: `{field}`")]
PortableGlob {
field: String,
Expand Down
16 changes: 13 additions & 3 deletions crates/uv-build-backend/src/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use uv_pep440::{Version, VersionSpecifiers};
use uv_pep508::{
ExtraOperator, MarkerExpression, MarkerTree, MarkerValueExtra, Requirement, VersionOrUrl,
};
use uv_pypi_types::{Metadata23, VerbatimParsedUrl};
use uv_pypi_types::{Identifier, Metadata23, VerbatimParsedUrl};

use crate::serde_verbatim::SerdeVerbatim;
use crate::Error;
Expand Down Expand Up @@ -803,7 +803,7 @@ pub(crate) struct ToolUv {
/// When building the source distribution, the following files and directories are included:
/// * `pyproject.toml`
/// * The module under `tool.uv.build-backend.module-root`, by default
/// `src/<project_name_with_underscores>/**`.
/// `src/<module-name or project_name_with_underscores>/**`.
/// * `project.license-files` and `project.readme`.
/// * All directories under `tool.uv.build-backend.data`.
/// * All patterns from `tool.uv.build-backend.source-include`.
Expand All @@ -812,7 +812,7 @@ pub(crate) struct ToolUv {
///
/// When building the wheel, the following files and directories are included:
/// * The module under `tool.uv.build-backend.module-root`, by default
/// `src/<project_name_with_underscores>/**`.
/// `src/<module-name or project_name_with_underscores>/**`.
/// * `project.license-files` and `project.readme`, as part of the project metadata.
/// * Each directory under `tool.uv.build-backend.data`, as data directories.
///
Expand Down Expand Up @@ -846,6 +846,15 @@ pub(crate) struct BuildBackendSettings {
/// using the flat layout over the src layout.
pub(crate) module_root: PathBuf,

/// The name of the module directory inside `module-root`.
///
/// The default module name is the package name with dots and dashes replaced by underscores.
///
/// Note that using this option runs the risk of creating two packages with different names but
/// the same module names. Installing such packages together leads to unspecified behavior,
/// often with corrupted files or directory trees.
pub(crate) module_name: Option<Identifier>,

/// Glob expressions which files and directories to additionally include in the source
/// distribution.
///
Expand Down Expand Up @@ -877,6 +886,7 @@ impl Default for BuildBackendSettings {
fn default() -> Self {
Self {
module_root: PathBuf::from("src"),
module_name: None,
source_include: Vec::new(),
default_excludes: true,
source_exclude: Vec::new(),
Expand Down
14 changes: 13 additions & 1 deletion crates/uv-build-backend/src/source_dist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ use globset::{Glob, GlobSet};
use std::io;
use std::io::{BufReader, Cursor};
use std::path::{Path, PathBuf};
use std::str::FromStr;
use tar::{EntryType, Header};
use tracing::{debug, trace};
use uv_distribution_filename::{SourceDistExtension, SourceDistFilename};
use uv_fs::Simplified;
use uv_globfilter::{parse_portable_glob, GlobDirFilter};
use uv_pypi_types::Identifier;
use uv_warnings::warn_user_once;
use walkdir::WalkDir;

Expand Down Expand Up @@ -65,11 +67,21 @@ fn source_dist_matcher(
let mut includes: Vec<String> = settings.source_include;
// pyproject.toml is always included.
includes.push(globset::escape("pyproject.toml"));

let module_name = if let Some(module_name) = settings.module_name {
module_name
} else {
// Should never error, the rules for package names (in dist-info formatting) are stricter
// than those for identifiers
Identifier::from_str(pyproject_toml.name().as_dist_info_name().as_ref())?
};
debug!("Module name: `{:?}`", module_name);

// The wheel must not include any files included by the source distribution (at least until we
// have files generated in the source dist -> wheel build step).
let import_path = &settings
.module_root
.join(pyproject_toml.name().as_dist_info_name().as_ref())
.join(module_name.as_ref())
.portable_display()
.to_string();
includes.push(format!("{}/**", globset::escape(import_path)));
Expand Down
21 changes: 16 additions & 5 deletions crates/uv-build-backend/src/wheel.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use std::io::{BufReader, Read, Write};
use std::path::{Path, PathBuf};
use std::{io, mem};

use fs_err::File;
use globset::{GlobSet, GlobSetBuilder};
use itertools::Itertools;
use sha2::{Digest, Sha256};
use std::io::{BufReader, Read, Write};
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::{io, mem};
use tracing::{debug, trace};
use walkdir::WalkDir;
use zip::{CompressionMethod, ZipWriter};
Expand All @@ -14,6 +14,7 @@ use uv_distribution_filename::WheelFilename;
use uv_fs::Simplified;
use uv_globfilter::{parse_portable_glob, GlobDirFilter};
use uv_platform_tags::{AbiTag, LanguageTag, PlatformTag};
use uv_pypi_types::Identifier;
use uv_warnings::warn_user_once;

use crate::metadata::{BuildBackendSettings, DEFAULT_EXCLUDES};
Expand Down Expand Up @@ -128,7 +129,17 @@ fn write_wheel(
return Err(Error::AbsoluteModuleRoot(settings.module_root.clone()));
}
let strip_root = source_tree.join(settings.module_root);
let module_root = strip_root.join(pyproject_toml.name().as_dist_info_name().as_ref());

let module_name = if let Some(module_name) = settings.module_name {
module_name
} else {
// Should never error, the rules for package names (in dist-info formatting) are stricter
// than those for identifiers
Identifier::from_str(pyproject_toml.name().as_dist_info_name().as_ref())?
};
debug!("Module name: `{:?}`", module_name);

let module_root = strip_root.join(module_name.as_ref());
if !module_root.join("__init__.py").is_file() {
return Err(Error::MissingModule(module_root));
}
Expand Down
3 changes: 2 additions & 1 deletion crates/uv-pypi-types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ mailparse = { workspace = true }
regex = { workspace = true }
rkyv = { workspace = true }
schemars = { workspace = true, optional = true }
serde = { workspace = true, optional = true }
serde = { workspace = true }
serde-untagged = { workspace = true }
thiserror = { workspace = true }
toml = { workspace = true }
Expand All @@ -42,6 +42,7 @@ url = { workspace = true }

[dev-dependencies]
anyhow = { workspace = true }
insta = { version = "1.40.0" }

[features]
schemars = ["dep:schemars", "uv-normalize/schemars"]
160 changes: 160 additions & 0 deletions crates/uv-pypi-types/src/identifier.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
use std::fmt::Display;
use std::str::FromStr;
use thiserror::Error;

/// Simplified Python identifier.
///
/// We don't match Python's identifier rules
/// (<https://docs.python.org/3.13/reference/lexical_analysis.html#identifiers>) exactly
/// (we just use Rust's `is_alphabetic`) and we don't convert to NFKC, but it's good enough
/// for our validation purposes.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does wrong if we say an identifier is valid but Python's rules says it isn't?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Python's and Rust's documentation reference different parts of the unicode standard for what's an allowed letter, but I strongly suspect that no one will actually into a case where they disagree.

#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct Identifier(Box<str>);

#[derive(Debug, Clone, Error)]
pub enum IdentifierParseError {
#[error("An identifier must not be empty")]
Empty,
#[error(
"Invalid first character `{first}` for identifier `{identifier}`, expected an underscore or an alphabetic character"
)]
InvalidFirstChar { first: char, identifier: Box<str> },
#[error(
"Invalid character `{invalid_char}` at position {pos} for identifier `{identifier}`, \
expected an underscore or an alphanumeric character"
)]
InvalidChar {
pos: usize,
invalid_char: char,
identifier: Box<str>,
},
}

impl Identifier {
pub fn new(identifier: impl Into<Box<str>>) -> Result<Self, IdentifierParseError> {
let identifier = identifier.into();
let mut chars = identifier.chars().enumerate();
let (_, first_char) = chars.next().ok_or(IdentifierParseError::Empty)?;
if first_char != '_' && !first_char.is_alphabetic() {
return Err(IdentifierParseError::InvalidFirstChar {
first: first_char,
identifier,
});
}

for (pos, current_char) in chars {
if current_char != '_' && !current_char.is_alphanumeric() {
return Err(IdentifierParseError::InvalidChar {
// Make the position 1-indexed
pos: pos + 1,
invalid_char: current_char,
identifier,
});
}
}

Ok(Self(identifier))
}
}

impl FromStr for Identifier {
type Err = IdentifierParseError;

fn from_str(identifier: &str) -> Result<Self, Self::Err> {
Self::new(identifier.to_string())
}
}

impl Display for Identifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}

impl AsRef<str> for Identifier {
fn as_ref(&self) -> &str {
&self.0
}
}

impl<'de> serde::de::Deserialize<'de> for Identifier {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::de::Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Identifier::from_str(&s).map_err(serde::de::Error::custom)
}
}

#[cfg(test)]
mod tests {
use super::*;
use insta::assert_snapshot;

#[test]
fn valid() {
let valid_ids = vec![
"abc",
"_abc",
"a_bc",
"a123",
"snake_case",
"camelCase",
"PascalCase",
// A single character is valid
"_",
"a",
// Unicode
"α",
"férrîs",
"안녕하세요",
];

for valid_id in valid_ids {
assert!(Identifier::from_str(valid_id).is_ok(), "{}", valid_id);
}
}

#[test]
fn empty() {
assert_snapshot!(Identifier::from_str("").unwrap_err(), @"An identifier must not be empty");
}

#[test]
fn invalid_first_char() {
assert_snapshot!(
Identifier::from_str("1foo").unwrap_err(),
@"Invalid first character `1` for identifier `1foo`, expected an underscore or an alphabetic character"
);
assert_snapshot!(
Identifier::from_str("$foo").unwrap_err(),
@"Invalid first character `$` for identifier `$foo`, expected an underscore or an alphabetic character"
);
assert_snapshot!(
Identifier::from_str(".foo").unwrap_err(),
@"Invalid first character `.` for identifier `.foo`, expected an underscore or an alphabetic character"
);
}

#[test]
fn invalid_char() {
// A dot in module names equals a path separator, which is a separate problem.
assert_snapshot!(
Identifier::from_str("foo.bar").unwrap_err(),
@"Invalid character `.` at position 4 for identifier `foo.bar`, expected an underscore or an alphanumeric character"
);
assert_snapshot!(
Identifier::from_str("foo-bar").unwrap_err(),
@"Invalid character `-` at position 4 for identifier `foo-bar`, expected an underscore or an alphanumeric character"
);
assert_snapshot!(
Identifier::from_str("foo_bar$").unwrap_err(),
@"Invalid character `$` at position 8 for identifier `foo_bar$`, expected an underscore or an alphanumeric character"
);
assert_snapshot!(
Identifier::from_str("foo🦀bar").unwrap_err(),
@"Invalid character `🦀` at position 4 for identifier `foo🦀bar`, expected an underscore or an alphanumeric character"
);
}
}
2 changes: 2 additions & 0 deletions crates/uv-pypi-types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ pub use base_url::*;
pub use conflicts::*;
pub use dependency_groups::*;
pub use direct_url::*;
pub use identifier::*;
pub use lenient_requirement::*;
pub use marker_environment::*;
pub use metadata::*;
Expand All @@ -15,6 +16,7 @@ mod base_url;
mod conflicts;
mod dependency_groups;
mod direct_url;
mod identifier;
mod lenient_requirement;
mod marker_environment;
mod metadata;
Expand Down
6 changes: 3 additions & 3 deletions crates/uv-workspace/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ uv-normalize = { workspace = true }
uv-options-metadata = { workspace = true }
uv-pep440 = { workspace = true }
uv-pep508 = { workspace = true }
uv-pypi-types = { workspace = true, features = ["serde"] }
uv-pypi-types = { workspace = true }
uv-static = { workspace = true }
uv-warnings = { workspace = true }

Expand All @@ -38,8 +38,8 @@ schemars = { workspace = true, optional = true }
serde = { workspace = true, features = ["derive"] }
thiserror = { workspace = true }
tokio = { workspace = true }
toml = { workspace = true }
toml_edit = { workspace = true }
toml = { workspace = true }
toml_edit = { workspace = true }
tracing = { workspace = true }
url = { workspace = true }

Expand Down
Loading
Loading