astral-sh · konstin · Mar 7, 2025 · Mar 1, 2025 · Mar 3, 2025 · Mar 6, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/uv-build-backend/src/lib.rs b/crates/uv-build-backend/src/lib.rs
@@ -15,6 +15,7 @@ use thiserror::Error;
 use tracing::debug;
 use uv_fs::Simplified;
 use uv_globfilter::PortableGlobError;
+use uv_pypi_types::IdentifierParseError;
 
 #[derive(Debug, Error)]
 pub enum Error {
@@ -24,6 +25,8 @@ pub enum Error {
     Toml(#[from] toml::de::Error),
     #[error("Invalid pyproject.toml")]
     Validation(#[from] ValidationError),
+    #[error(transparent)]
+    Identifier(#[from] IdentifierParseError),
     #[error("Unsupported glob expression in: `{field}`")]
     PortableGlob {
         field: String,

diff --git a/crates/uv-build-backend/src/metadata.rs b/crates/uv-build-backend/src/metadata.rs
@@ -17,7 +17,7 @@ use uv_pep440::{Version, VersionSpecifiers};
 use uv_pep508::{
     ExtraOperator, MarkerExpression, MarkerTree, MarkerValueExtra, Requirement, VersionOrUrl,
 };
-use uv_pypi_types::{Metadata23, VerbatimParsedUrl};
+use uv_pypi_types::{Identifier, Metadata23, VerbatimParsedUrl};
 
 use crate::serde_verbatim::SerdeVerbatim;
 use crate::Error;
@@ -803,7 +803,7 @@ pub(crate) struct ToolUv {
 /// When building the source distribution, the following files and directories are included:
 /// * `pyproject.toml`
 /// * The module under `tool.uv.build-backend.module-root`, by default
-///   `src/<project_name_with_underscores>/**`.
+///   `src/<module-name or project_name_with_underscores>/**`.
 /// * `project.license-files` and `project.readme`.
 /// * All directories under `tool.uv.build-backend.data`.
 /// * All patterns from `tool.uv.build-backend.source-include`.
@@ -812,7 +812,7 @@ pub(crate) struct ToolUv {
 ///
 /// When building the wheel, the following files and directories are included:
 /// * The module under `tool.uv.build-backend.module-root`, by default
-///   `src/<project_name_with_underscores>/**`.
+///   `src/<module-name or project_name_with_underscores>/**`.
 /// * `project.license-files` and `project.readme`, as part of the project metadata.
 /// * Each directory under `tool.uv.build-backend.data`, as data directories.
 ///
@@ -846,6 +846,15 @@ pub(crate) struct BuildBackendSettings {
     /// using the flat layout over the src layout.
     pub(crate) module_root: PathBuf,
 
+    /// The name of the module directory inside `module-root`.
+    ///
+    /// The default module name is the package name with dots and dashes replaced by underscores.
+    ///
+    /// Note that using this option runs the risk of creating two packages with different names but
+    /// the same module names. Installing such packages together leads to unspecified behavior,
+    /// often with corrupted files or directory trees.
+    pub(crate) module_name: Option<Identifier>,
+
     /// Glob expressions which files and directories to additionally include in the source
     /// distribution.
     ///
@@ -877,6 +886,7 @@ impl Default for BuildBackendSettings {
     fn default() -> Self {
         Self {
             module_root: PathBuf::from("src"),
+            module_name: None,
             source_include: Vec::new(),
             default_excludes: true,
             source_exclude: Vec::new(),

diff --git a/crates/uv-build-backend/src/source_dist.rs b/crates/uv-build-backend/src/source_dist.rs
@@ -8,11 +8,13 @@ use globset::{Glob, GlobSet};
 use std::io;
 use std::io::{BufReader, Cursor};
 use std::path::{Path, PathBuf};
+use std::str::FromStr;
 use tar::{EntryType, Header};
 use tracing::{debug, trace};
 use uv_distribution_filename::{SourceDistExtension, SourceDistFilename};
 use uv_fs::Simplified;
 use uv_globfilter::{parse_portable_glob, GlobDirFilter};
+use uv_pypi_types::Identifier;
 use uv_warnings::warn_user_once;
 use walkdir::WalkDir;
 
@@ -65,11 +67,21 @@ fn source_dist_matcher(
     let mut includes: Vec<String> = settings.source_include;
     // pyproject.toml is always included.
     includes.push(globset::escape("pyproject.toml"));
+
+    let module_name = if let Some(module_name) = settings.module_name {
+        module_name
+    } else {
+        // Should never error, the rules for package names (in dist-info formatting) are stricter
+        // than those for identifiers
+        Identifier::from_str(pyproject_toml.name().as_dist_info_name().as_ref())?
+    };
+    debug!("Module name: `{:?}`", module_name);
+
     // The wheel must not include any files included by the source distribution (at least until we
     // have files generated in the source dist -> wheel build step).
     let import_path = &settings
         .module_root
-        .join(pyproject_toml.name().as_dist_info_name().as_ref())
+        .join(module_name.as_ref())
         .portable_display()
         .to_string();
     includes.push(format!("{}/**", globset::escape(import_path)));

diff --git a/crates/uv-build-backend/src/wheel.rs b/crates/uv-build-backend/src/wheel.rs
@@ -1,11 +1,11 @@
-use std::io::{BufReader, Read, Write};
-use std::path::{Path, PathBuf};
-use std::{io, mem};
-
 use fs_err::File;
 use globset::{GlobSet, GlobSetBuilder};
 use itertools::Itertools;
 use sha2::{Digest, Sha256};
+use std::io::{BufReader, Read, Write};
+use std::path::{Path, PathBuf};
+use std::str::FromStr;
+use std::{io, mem};
 use tracing::{debug, trace};
 use walkdir::WalkDir;
 use zip::{CompressionMethod, ZipWriter};
@@ -14,6 +14,7 @@ use uv_distribution_filename::WheelFilename;
 use uv_fs::Simplified;
 use uv_globfilter::{parse_portable_glob, GlobDirFilter};
 use uv_platform_tags::{AbiTag, LanguageTag, PlatformTag};
+use uv_pypi_types::Identifier;
 use uv_warnings::warn_user_once;
 
 use crate::metadata::{BuildBackendSettings, DEFAULT_EXCLUDES};
@@ -128,7 +129,17 @@ fn write_wheel(
         return Err(Error::AbsoluteModuleRoot(settings.module_root.clone()));
     }
     let strip_root = source_tree.join(settings.module_root);
-    let module_root = strip_root.join(pyproject_toml.name().as_dist_info_name().as_ref());
+
+    let module_name = if let Some(module_name) = settings.module_name {
+        module_name
+    } else {
+        // Should never error, the rules for package names (in dist-info formatting) are stricter
+        // than those for identifiers
+        Identifier::from_str(pyproject_toml.name().as_dist_info_name().as_ref())?
+    };
+    debug!("Module name: `{:?}`", module_name);
+
+    let module_root = strip_root.join(module_name.as_ref());
     if !module_root.join("__init__.py").is_file() {
         return Err(Error::MissingModule(module_root));
     }

diff --git a/crates/uv-pypi-types/Cargo.toml b/crates/uv-pypi-types/Cargo.toml
@@ -32,7 +32,7 @@ mailparse = { workspace = true }
 regex = { workspace = true }
 rkyv = { workspace = true }
 schemars = { workspace = true, optional = true }
-serde = { workspace = true, optional = true }
+serde = { workspace = true }
 serde-untagged = { workspace = true }
 thiserror = { workspace = true }
 toml = { workspace = true }
@@ -42,6 +42,7 @@ url = { workspace = true }
 
 [dev-dependencies]
 anyhow = { workspace = true }
+insta = { version = "1.40.0" }
 
 [features]
 schemars = ["dep:schemars", "uv-normalize/schemars"]
diff --git a/crates/uv-pypi-types/src/identifier.rs b/crates/uv-pypi-types/src/identifier.rs
@@ -0,0 +1,160 @@
+use std::fmt::Display;
+use std::str::FromStr;
+use thiserror::Error;
+
+/// Simplified Python identifier.
+///
+/// We don't match Python's identifier rules
+/// (<https://docs.python.org/3.13/reference/lexical_analysis.html#identifiers>) exactly
+/// (we just use Rust's `is_alphabetic`) and we don't convert to NFKC, but it's good enough
+/// for our validation purposes.
+#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
+pub struct Identifier(Box<str>);
+
+#[derive(Debug, Clone, Error)]
+pub enum IdentifierParseError {
+    #[error("An identifier must not be empty")]
+    Empty,
+    #[error(
+        "Invalid first character `{first}` for identifier `{identifier}`, expected an underscore or an alphabetic character"
+    )]
+    InvalidFirstChar { first: char, identifier: Box<str> },
+    #[error(
+        "Invalid character `{invalid_char}` at position {pos} for identifier `{identifier}`, \
+        expected an underscore or an alphanumeric character"
+    )]
+    InvalidChar {
+        pos: usize,
+        invalid_char: char,
+        identifier: Box<str>,
+    },
+}
+
+impl Identifier {
+    pub fn new(identifier: impl Into<Box<str>>) -> Result<Self, IdentifierParseError> {
+        let identifier = identifier.into();
+        let mut chars = identifier.chars().enumerate();
+        let (_, first_char) = chars.next().ok_or(IdentifierParseError::Empty)?;
+        if first_char != '_' && !first_char.is_alphabetic() {
+            return Err(IdentifierParseError::InvalidFirstChar {
+                first: first_char,
+                identifier,
+            });
+        }
+
+        for (pos, current_char) in chars {
+            if current_char != '_' && !current_char.is_alphanumeric() {
+                return Err(IdentifierParseError::InvalidChar {
+                    // Make the position 1-indexed
+                    pos: pos + 1,
+                    invalid_char: current_char,
+                    identifier,
+                });
+            }
+        }
+
+        Ok(Self(identifier))
+    }
+}
+
+impl FromStr for Identifier {
+    type Err = IdentifierParseError;
+
+    fn from_str(identifier: &str) -> Result<Self, Self::Err> {
+        Self::new(identifier.to_string())
+    }
+}
+
+impl Display for Identifier {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
+impl AsRef<str> for Identifier {
+    fn as_ref(&self) -> &str {
+        &self.0
+    }
+}
+
+impl<'de> serde::de::Deserialize<'de> for Identifier {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::de::Deserializer<'de>,
+    {
+        let s = String::deserialize(deserializer)?;
+        Identifier::from_str(&s).map_err(serde::de::Error::custom)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use insta::assert_snapshot;
+
+    #[test]
+    fn valid() {
+        let valid_ids = vec![
+            "abc",
+            "_abc",
+            "a_bc",
+            "a123",
+            "snake_case",
+            "camelCase",
+            "PascalCase",
+            // A single character is valid
+            "_",
+            "a",
+            // Unicode
+            "α",
+            "férrîs",
+            "안녕하세요",
+        ];
+
+        for valid_id in valid_ids {
+            assert!(Identifier::from_str(valid_id).is_ok(), "{}", valid_id);
+        }
+    }
+
+    #[test]
+    fn empty() {
+        assert_snapshot!(Identifier::from_str("").unwrap_err(), @"An identifier must not be empty");
+    }
+
+    #[test]
+    fn invalid_first_char() {
+        assert_snapshot!(
+            Identifier::from_str("1foo").unwrap_err(),
+            @"Invalid first character `1` for identifier `1foo`, expected an underscore or an alphabetic character"
+        );
+        assert_snapshot!(
+            Identifier::from_str("$foo").unwrap_err(),
+            @"Invalid first character `$` for identifier `$foo`, expected an underscore or an alphabetic character"
+        );
+        assert_snapshot!(
+            Identifier::from_str(".foo").unwrap_err(),
+            @"Invalid first character `.` for identifier `.foo`, expected an underscore or an alphabetic character"
+        );
+    }
+
+    #[test]
+    fn invalid_char() {
+        // A dot in module names equals a path separator, which is a separate problem.
+        assert_snapshot!(
+            Identifier::from_str("foo.bar").unwrap_err(),
+            @"Invalid character `.` at position 4 for identifier `foo.bar`, expected an underscore or an alphanumeric character"
+        );
+        assert_snapshot!(
+            Identifier::from_str("foo-bar").unwrap_err(),
+            @"Invalid character `-` at position 4 for identifier `foo-bar`, expected an underscore or an alphanumeric character"
+        );
+        assert_snapshot!(
+            Identifier::from_str("foo_bar$").unwrap_err(),
+            @"Invalid character `$` at position 8 for identifier `foo_bar$`, expected an underscore or an alphanumeric character"
+        );
+        assert_snapshot!(
+            Identifier::from_str("foo🦀bar").unwrap_err(),
+            @"Invalid character `🦀` at position 4 for identifier `foo🦀bar`, expected an underscore or an alphanumeric character"
+        );
+    }
+}
diff --git a/crates/uv-pypi-types/src/lib.rs b/crates/uv-pypi-types/src/lib.rs
@@ -2,6 +2,7 @@ pub use base_url::*;
 pub use conflicts::*;
 pub use dependency_groups::*;
 pub use direct_url::*;
+pub use identifier::*;
 pub use lenient_requirement::*;
 pub use marker_environment::*;
 pub use metadata::*;
@@ -15,6 +16,7 @@ mod base_url;
 mod conflicts;
 mod dependency_groups;
 mod direct_url;
+mod identifier;
 mod lenient_requirement;
 mod marker_environment;
 mod metadata;

diff --git a/crates/uv-workspace/Cargo.toml b/crates/uv-workspace/Cargo.toml
@@ -25,7 +25,7 @@ uv-normalize = { workspace = true }
 uv-options-metadata = { workspace = true }
 uv-pep440 = { workspace = true }
 uv-pep508 = { workspace = true }
-uv-pypi-types = { workspace = true, features = ["serde"] }
+uv-pypi-types = { workspace = true }
 uv-static = { workspace = true }
 uv-warnings = { workspace = true }
 
@@ -38,8 +38,8 @@ schemars = { workspace = true, optional = true }
 serde = { workspace = true, features = ["derive"] }
 thiserror = { workspace = true }
 tokio = { workspace = true }
-toml =  { workspace = true }
-toml_edit =  { workspace = true }
+toml = { workspace = true }
+toml_edit = { workspace = true }
 tracing = { workspace = true }
 url = { workspace = true }