From 6319c02d424e282cba44204aa3313767b1b2784f Mon Sep 17 00:00:00 2001 From: sudoBash418 Date: Sun, 1 Sep 2024 23:43:38 -0600 Subject: [PATCH] Add support for symbolicating APK/ZIP-embedded libraries on Android By default, modern Android build tools will store native libraries uncompressed, and the loader will map them directly from the APK (instead of the package manager extracting them on installation). This commit adds support for symbolicating these embedded libraries. To avoid parsing ZIP structures, the offset of the library within the archive is determined via /proc/self/maps. ref: https://android.googlesource.com/platform/bionic/+/main/android-changes-for-ndk-developers.md#opening-shared-libraries-directly-from-an-apk --- src/symbolize/gimli.rs | 47 ++++++++++--- src/symbolize/gimli/elf.rs | 41 +++++++++++ src/symbolize/gimli/libs_dl_iterate_phdr.rs | 68 +++++++++++++------ src/symbolize/gimli/mmap_fake.rs | 6 +- src/symbolize/gimli/mmap_unix.rs | 4 +- src/symbolize/gimli/mmap_windows.rs | 10 ++- .../gimli/parse_running_mmaps_unix.rs | 11 ++- 7 files changed, 149 insertions(+), 38 deletions(-) diff --git a/src/symbolize/gimli.rs b/src/symbolize/gimli.rs index dacd490f8..b9f6f3d6c 100644 --- a/src/symbolize/gimli.rs +++ b/src/symbolize/gimli.rs @@ -186,7 +186,7 @@ impl<'data> Context<'data> { fn mmap(path: &Path) -> Option { let file = File::open(path).ok()?; let len = file.metadata().ok()?.len().try_into().ok()?; - unsafe { Mmap::map(&file, len) } + unsafe { Mmap::map(&file, len, 0) } } cfg_if::cfg_if! { @@ -268,6 +268,21 @@ struct Cache { struct Library { name: OsString, + #[cfg(target_os = "android")] + /// On Android, the dynamic linker [can map libraries directly from a + /// ZIP archive][ndk-linker-changes] (typically an `.apk`). + /// + /// The linker requires that these libraries are stored uncompressed + /// and page-aligned. + /// + /// These "embedded" libraries have filepaths of the form + /// `/path/to/my.apk!/lib/mylib.so` (where `/path/to/my.apk` is the archive + /// and `lib/mylib.so` is the name of the library within the archive). + /// + /// This mechanism is present on Android since API level 23. + /// + /// [ndk-linker-changes]: https://android.googlesource.com/platform/bionic/+/main/android-changes-for-ndk-developers.md#opening-shared-libraries-directly-from-an-apk + zip_offset: Option, #[cfg(target_os = "aix")] /// On AIX, the library mmapped can be a member of a big-archive file. /// For example, with a big-archive named libfoo.a containing libbar.so, @@ -294,17 +309,31 @@ struct LibrarySegment { len: usize, } -#[cfg(target_os = "aix")] fn create_mapping(lib: &Library) -> Option { - let name = &lib.name; - let member_name = &lib.member_name; - Mapping::new(name.as_ref(), member_name) + cfg_if::cfg_if! { + if #[cfg(target_os = "aix")] { + Mapping::new(lib.name.as_ref(), &lib.member_name) + } else if #[cfg(target_os = "android")] { + Mapping::new_android(lib.name.as_ref(), lib.zip_offset) + } else { + Mapping::new(lib.name.as_ref()) + } + } } -#[cfg(not(target_os = "aix"))] -fn create_mapping(lib: &Library) -> Option { - let name = &lib.name; - Mapping::new(name.as_ref()) +/// Try to extract the archive path from an "embedded" library path +/// (e.g. `/path/to/my.apk` from `/path/to/my.apk!/mylib.so`). +/// +/// Returns `None` if the path does not contain a `!/` separator. +#[cfg(target_os = "android")] +fn extract_zip_path_android(path: &mystd::ffi::OsStr) -> Option<&mystd::ffi::OsStr> { + use mystd::os::unix::ffi::OsStrExt; + + path.as_bytes() + .windows(2) + .enumerate() + .find(|(_, chunk)| chunk == b"!/") + .map(|(index, _)| mystd::ffi::OsStr::from_bytes(path.as_bytes().split_at(index).0)) } // unsafe because this is required to be externally synchronized diff --git a/src/symbolize/gimli/elf.rs b/src/symbolize/gimli/elf.rs index 5771f93f6..82c53df0a 100644 --- a/src/symbolize/gimli/elf.rs +++ b/src/symbolize/gimli/elf.rs @@ -45,6 +45,47 @@ impl Mapping { }) } + /// On Android, shared objects can be loaded directly from a ZIP archive + /// (see: [`super::Library::zip_offset`]). + /// + /// If `zip_offset` is not None, we interpret the `path` as an + /// "embedded" library path, and the value of `zip_offset` tells us where + /// in the ZIP archive the library data starts. + /// + /// We expect `zip_offset` to be page-aligned because the dynamic linker + /// requires this. Otherwise, loading the embedded library will fail. + /// + /// If we fail to load an embedded library for any reason, we fallback to + /// interpreting the path as a literal file on disk (same as calling [`Self::new`]). + #[cfg(target_os = "android")] + pub fn new_android(path: &Path, zip_offset: Option) -> Option { + fn map_embedded_library(path: &Path, zip_offset: u64) -> Option { + // get path of ZIP archive (delimited by `!/`) + let zip_path = Path::new(super::extract_zip_path_android(path.as_os_str())?); + + let file = fs::File::open(zip_path).ok()?; + let len = file.metadata().ok()?.len(); + + // NOTE: we map the remainder of the entire archive instead of just the library so we don't have to determine its length + // NOTE: mmap will fail if `zip_offset` is not page-aligned + let map = unsafe { + super::mmap::Mmap::map(&file, usize::try_from(len - zip_offset).ok()?, zip_offset) + }?; + + Mapping::mk(map, |map, stash| { + Context::new(stash, Object::parse(&map)?, None, None) + }) + } + + // if ZIP offset is given, try mapping as a ZIP-embedded library + // otherwise, fallback to mapping as a literal filepath + if let Some(zip_offset) = zip_offset { + map_embedded_library(path, zip_offset).or_else(|| Self::new(path)) + } else { + Self::new(path) + } + } + /// Load debuginfo from an external debug file. fn new_debug(original_path: &Path, path: PathBuf, crc: Option) -> Option { let map = super::mmap(&path)?; diff --git a/src/symbolize/gimli/libs_dl_iterate_phdr.rs b/src/symbolize/gimli/libs_dl_iterate_phdr.rs index 359868e5b..6bee79dd0 100644 --- a/src/symbolize/gimli/libs_dl_iterate_phdr.rs +++ b/src/symbolize/gimli/libs_dl_iterate_phdr.rs @@ -6,43 +6,54 @@ use super::mystd::borrow::ToOwned; use super::mystd::env; use super::mystd::ffi::{CStr, OsStr}; use super::mystd::os::unix::prelude::*; -use super::{Library, LibrarySegment, OsString, Vec}; +use super::{parse_running_mmaps, Library, LibrarySegment, OsString, Vec}; use core::slice; +struct CallbackData { + libs: Vec, + maps: Option>, +} pub(super) fn native_libraries() -> Vec { - let mut ret = Vec::new(); + let mut cb_data = CallbackData { + libs: Vec::new(), + #[cfg(not(target_os = "hurd"))] + maps: parse_running_mmaps::parse_maps().ok(), + #[cfg(target_os = "hurd")] + maps: None, + }; unsafe { - libc::dl_iterate_phdr(Some(callback), core::ptr::addr_of_mut!(ret).cast()); + libc::dl_iterate_phdr(Some(callback), core::ptr::addr_of_mut!(cb_data).cast()); } - ret + cb_data.libs } -fn infer_current_exe(base_addr: usize) -> OsString { - cfg_if::cfg_if! { - if #[cfg(not(target_os = "hurd"))] { - if let Ok(entries) = super::parse_running_mmaps::parse_maps() { - let opt_path = entries - .iter() - .find(|e| e.ip_matches(base_addr) && e.pathname().len() > 0) - .map(|e| e.pathname()) - .cloned(); - if let Some(path) = opt_path { - return path; - } - } +fn infer_current_exe( + maps: &Option>, + base_addr: usize, +) -> OsString { + #[cfg(not(target_os = "hurd"))] + if let Some(entries) = maps { + let opt_path = entries + .iter() + .find(|e| e.ip_matches(base_addr) && e.pathname().len() > 0) + .map(|e| e.pathname()) + .cloned(); + if let Some(path) = opt_path { + return path; } } + env::current_exe().map(|e| e.into()).unwrap_or_default() } /// # Safety /// `info` must be a valid pointer. -/// `vec` must be a valid pointer to `Vec` +/// `data` must be a valid pointer to `CallbackData`. #[forbid(unsafe_op_in_unsafe_fn)] unsafe extern "C" fn callback( info: *mut libc::dl_phdr_info, _size: libc::size_t, - vec: *mut libc::c_void, + data: *mut libc::c_void, ) -> libc::c_int { // SAFETY: We are guaranteed these fields: let dlpi_addr = unsafe { (*info).dlpi_addr }; @@ -50,7 +61,7 @@ unsafe extern "C" fn callback( let dlpi_phdr = unsafe { (*info).dlpi_phdr }; let dlpi_phnum = unsafe { (*info).dlpi_phnum }; // SAFETY: We assured this. - let libs = unsafe { &mut *vec.cast::>() }; + let CallbackData { libs, maps } = unsafe { &mut *data.cast::() }; // most implementations give us the main program first let is_main = libs.is_empty(); // we may be statically linked, which means we are main and mostly one big blob of code @@ -63,7 +74,7 @@ unsafe extern "C" fn callback( // don't try to look up our name from /proc/self/maps, it'll get silly env::current_exe().unwrap_or_default().into_os_string() } else if is_main && no_given_name { - infer_current_exe(dlpi_addr as usize) + infer_current_exe(&maps, dlpi_addr as usize) } else { // this fallback works even if we are main, because some platforms give the name anyways if dlpi_name.is_null() { @@ -73,6 +84,19 @@ unsafe extern "C" fn callback( OsStr::from_bytes(unsafe { CStr::from_ptr(dlpi_name) }.to_bytes()).to_owned() } }; + #[cfg(target_os = "android")] + let zip_offset: Option = { + // only check for ZIP-embedded file if we have data from /proc/self/maps + maps.as_ref().and_then(|maps| { + // check if file is embedded within a ZIP archive by searching for `!/` + super::extract_zip_path_android(&name).and_then(|_| { + // find MapsEntry matching library's base address and get its file offset + maps.iter() + .find(|m| m.ip_matches(dlpi_addr as usize)) + .map(|m| m.offset()) + }) + }) + }; let headers = if dlpi_phdr.is_null() || dlpi_phnum == 0 { &[] } else { @@ -81,6 +105,8 @@ unsafe extern "C" fn callback( }; libs.push(Library { name, + #[cfg(target_os = "android")] + zip_offset, segments: headers .iter() .map(|header| LibrarySegment { diff --git a/src/symbolize/gimli/mmap_fake.rs b/src/symbolize/gimli/mmap_fake.rs index ce5096415..71697fc30 100644 --- a/src/symbolize/gimli/mmap_fake.rs +++ b/src/symbolize/gimli/mmap_fake.rs @@ -1,4 +1,5 @@ -use super::{mystd::io::Read, File}; +use super::mystd::io::{Read, Seek, SeekFrom}; +use super::File; use alloc::vec::Vec; use core::ops::Deref; @@ -7,10 +8,11 @@ pub struct Mmap { } impl Mmap { - pub unsafe fn map(mut file: &File, len: usize) -> Option { + pub unsafe fn map(mut file: &File, len: usize, offset: u64) -> Option { let mut mmap = Mmap { vec: Vec::with_capacity(len), }; + file.seek(SeekFrom::Start(offset)); file.read_to_end(&mut mmap.vec).ok()?; Some(mmap) } diff --git a/src/symbolize/gimli/mmap_unix.rs b/src/symbolize/gimli/mmap_unix.rs index 261ffc1d8..0895ee5d1 100644 --- a/src/symbolize/gimli/mmap_unix.rs +++ b/src/symbolize/gimli/mmap_unix.rs @@ -15,14 +15,14 @@ pub struct Mmap { } impl Mmap { - pub unsafe fn map(file: &File, len: usize) -> Option { + pub unsafe fn map(file: &File, len: usize, offset: u64) -> Option { let ptr = mmap64( ptr::null_mut(), len, libc::PROT_READ, libc::MAP_PRIVATE, file.as_raw_fd(), - 0, + offset.try_into().ok()?, ); if ptr == libc::MAP_FAILED { return None; diff --git a/src/symbolize/gimli/mmap_windows.rs b/src/symbolize/gimli/mmap_windows.rs index 787eccf91..1c8bc83c1 100644 --- a/src/symbolize/gimli/mmap_windows.rs +++ b/src/symbolize/gimli/mmap_windows.rs @@ -16,7 +16,7 @@ pub struct Mmap { } impl Mmap { - pub unsafe fn map(file: &File, len: usize) -> Option { + pub unsafe fn map(file: &File, len: usize, offset: u64) -> Option { let file = file.try_clone().ok()?; let mapping = CreateFileMappingA( file.as_raw_handle(), @@ -29,7 +29,13 @@ impl Mmap { if mapping.is_null() { return None; } - let ptr = MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, len); + let ptr = MapViewOfFile( + mapping, + FILE_MAP_READ, + (offset >> 32) as u32, + offset as u32, + len, + ); CloseHandle(mapping); if ptr.Value.is_null() { return None; diff --git a/src/symbolize/gimli/parse_running_mmaps_unix.rs b/src/symbolize/gimli/parse_running_mmaps_unix.rs index 5d4b34675..534492c59 100644 --- a/src/symbolize/gimli/parse_running_mmaps_unix.rs +++ b/src/symbolize/gimli/parse_running_mmaps_unix.rs @@ -20,7 +20,7 @@ pub(super) struct MapsEntry { /// p = private (copy on write) perms: [char; 4], /// Offset into the file (or "whatever"). - offset: usize, + offset: u64, /// device (major, minor) dev: (usize, usize), /// inode on the device. 0 indicates that no inode is associated with the memory region (e.g. uninitalized data aka BSS). @@ -76,6 +76,11 @@ impl MapsEntry { pub(super) fn ip_matches(&self, ip: usize) -> bool { self.address.0 <= ip && ip < self.address.1 } + + #[cfg(target_os = "android")] + pub(super) fn offset(&self) -> u64 { + self.offset + } } impl FromStr for MapsEntry { @@ -118,6 +123,8 @@ impl FromStr for MapsEntry { let pathname_str = s.trim_start(); let hex = |s| usize::from_str_radix(s, 16).map_err(|_| "Couldn't parse hex number"); + let hex64 = |s| u64::from_str_radix(s, 16).map_err(|_| "Couldn't parse hex number"); + let address = if let Some((start, limit)) = range_str.split_once('-') { (hex(start)?, hex(limit)?) } else { @@ -132,7 +139,7 @@ impl FromStr for MapsEntry { } perms }; - let offset = hex(offset_str)?; + let offset = hex64(offset_str)?; let dev = if let Some((major, minor)) = dev_str.split_once(':') { (hex(major)?, hex(minor)?) } else {