543 lines
17 KiB
Rust
543 lines
17 KiB
Rust
//! This crate provides types for identifiers of object files, such as executables, dynamic
|
|
//! libraries or debug companion files. The concept originates in Google Breakpad and defines two
|
|
//! types:
|
|
//!
|
|
//! - [`CodeId`]: Identifies the file containing source code, i.e. the actual library or
|
|
//! executable. The identifier is platform dependent and implementation defined. Thus, there is
|
|
//! no canonical representation.
|
|
//! - [`DebugId`]: Identifies a debug information file, which may or may not use information from
|
|
//! the Code ID. The contents are also implementation defined, but as opposed to `CodeId`, the
|
|
//! structure is streamlined across platforms. It is also guaranteed to be 32 bytes in size.
|
|
//!
|
|
//! [`CodeId`]: struct.CodeId.html
|
|
//! [`DebugId`]: struct.DebugId.html
|
|
|
|
#![warn(missing_docs)]
|
|
|
|
use std::error;
|
|
use std::fmt;
|
|
use std::fmt::Write;
|
|
use std::str;
|
|
|
|
use uuid::{Bytes, Uuid};
|
|
|
|
/// Indicates an error parsing a [`DebugId`](struct.DebugId.html).
|
|
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
|
pub struct ParseDebugIdError;
|
|
|
|
impl error::Error for ParseDebugIdError {}
|
|
|
|
impl fmt::Display for ParseDebugIdError {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(f, "invalid debug identifier")
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug)]
|
|
struct ParseOptions {
|
|
allow_hyphens: bool,
|
|
require_appendix: bool,
|
|
allow_tail: bool,
|
|
}
|
|
|
|
/// Unique identifier for debug information files and their debug information.
|
|
///
|
|
/// This type is analogous to [`CodeId`], except that it identifies a debug file instead of the
|
|
/// actual library or executable. One some platforms, a `DebugId` is an alias for a `CodeId` but the
|
|
/// exact rules around this are complex. On Windows, the identifiers are completely different and
|
|
/// refer to separate files.
|
|
///
|
|
/// The string representation must be between 33 and 40 characters long and consist of:
|
|
///
|
|
/// 1. 36 character hyphenated hex representation of the UUID field
|
|
/// 2. 1-16 character lowercase hex representation of the u32 appendix
|
|
///
|
|
/// The debug identifier is compatible to Google Breakpad. Use [`DebugId::breakpad`] to get a
|
|
/// breakpad string representation of this debug identifier.
|
|
///
|
|
/// There is one exception to this: for the old PDB 2.0 format the debug identifier consists
|
|
/// of only a 32-bit integer + age resulting in a string representation of between 9 and 16
|
|
/// hex characters.
|
|
///
|
|
/// # Example
|
|
///
|
|
/// ```
|
|
/// # extern crate debugid;
|
|
/// use std::str::FromStr;
|
|
/// use debugid::DebugId;
|
|
///
|
|
/// # fn foo() -> Result<(), ::debugid::ParseDebugIdError> {
|
|
/// let id = DebugId::from_str("dfb8e43a-f242-3d73-a453-aeb6a777ef75-a")?;
|
|
/// assert_eq!("dfb8e43a-f242-3d73-a453-aeb6a777ef75-a".to_string(), id.to_string());
|
|
/// # Ok(())
|
|
/// # }
|
|
///
|
|
/// # fn main() { foo().unwrap() }
|
|
/// ```
|
|
///
|
|
/// # In-memory representation
|
|
///
|
|
/// The in-memory representation takes up 32 bytes and can be directly written to storage
|
|
/// and mapped back into an object reference.
|
|
///
|
|
/// ```
|
|
/// use std::str::FromStr;
|
|
/// use debugid::DebugId;
|
|
///
|
|
/// let debug_id = DebugId::from_str("dfb8e43a-f242-3d73-a453-aeb6a777ef75-a").unwrap();
|
|
///
|
|
/// let slice = &[debug_id];
|
|
/// let ptr = slice.as_ptr() as *const u8;
|
|
/// let len = std::mem::size_of_val(slice);
|
|
/// let buf: &[u8] = unsafe { std::slice::from_raw_parts(ptr, len) };
|
|
///
|
|
/// let mut new_buf: Vec<u8> = Vec::new();
|
|
/// std::io::copy(&mut std::io::Cursor::new(buf), &mut new_buf).unwrap();
|
|
///
|
|
/// let ptr = new_buf.as_ptr() as *const DebugId;
|
|
/// let new_debug_id = unsafe { &*ptr };
|
|
///
|
|
/// assert_eq!(*new_debug_id, debug_id);
|
|
/// ```
|
|
///
|
|
/// As long the bytes were written using the same major version of this crate you will be
|
|
/// able to read it again like this.
|
|
///
|
|
/// [`CodeId`]: struct.CodeId.html
|
|
/// [`DebugId::breakpad`]: struct.DebugId.html#method.breakpad
|
|
// This needs to be backwards compatible also in its exact in-memory byte-layout since this
|
|
// struct is directly mapped from disk in e.g. Symbolic SymCache formats. The first version
|
|
// of this struct was defined as:
|
|
//
|
|
// ```rust
|
|
// struct DebugId {
|
|
// uuid: Uuid,
|
|
// appendix: u32,
|
|
// _padding: [u8; 12],
|
|
// }
|
|
// ```
|
|
//
|
|
// For this reason the current `typ` byte represents the type of `DebugId` stored in the
|
|
// `Bytes`:
|
|
//
|
|
// - `0u8`: The `bytes` field contains a UUID.
|
|
// - `1u8`: The first 4 bytes of the `bytes` field contain a big-endian u32, the remaining
|
|
// bytes are 0.
|
|
#[repr(C, packed)]
|
|
#[derive(Default, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)]
|
|
pub struct DebugId {
|
|
bytes: Bytes,
|
|
appendix: u32,
|
|
_padding: [u8; 11],
|
|
typ: u8,
|
|
}
|
|
|
|
impl DebugId {
|
|
/// Constructs an empty debug identifier, containing only zeros.
|
|
pub fn nil() -> Self {
|
|
Self::default()
|
|
}
|
|
|
|
/// Constructs a `DebugId` from its `uuid`.
|
|
pub fn from_uuid(uuid: Uuid) -> Self {
|
|
Self::from_parts(uuid, 0)
|
|
}
|
|
|
|
/// Constructs a `DebugId` from its `uuid` and `appendix` parts.
|
|
pub fn from_parts(uuid: Uuid, appendix: u32) -> Self {
|
|
DebugId {
|
|
bytes: *uuid.as_bytes(),
|
|
appendix,
|
|
typ: 0,
|
|
_padding: [0; 11],
|
|
}
|
|
}
|
|
|
|
/// Constructs a `DebugId` from a Microsoft little-endian GUID and age.
|
|
pub fn from_guid_age(guid: &[u8], age: u32) -> Result<Self, ParseDebugIdError> {
|
|
if guid.len() != 16 {
|
|
return Err(ParseDebugIdError);
|
|
}
|
|
|
|
let uuid = Uuid::from_bytes([
|
|
guid[3], guid[2], guid[1], guid[0], guid[5], guid[4], guid[7], guid[6], guid[8],
|
|
guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15],
|
|
]);
|
|
|
|
Ok(DebugId::from_parts(uuid, age))
|
|
}
|
|
|
|
/// Constructs a `DebugId` from a PDB 2.0 timestamp and age.
|
|
pub fn from_pdb20(timestamp: u32, age: u32) -> Self {
|
|
// The big-endian byte-order here has to match the one used to read this number in
|
|
// the DebugId::timestamp method.
|
|
DebugId {
|
|
bytes: [
|
|
(timestamp >> 24) as u8,
|
|
(timestamp >> 16) as u8,
|
|
(timestamp >> 8) as u8,
|
|
timestamp as u8,
|
|
0u8,
|
|
0u8,
|
|
0u8,
|
|
0u8,
|
|
0u8,
|
|
0u8,
|
|
0u8,
|
|
0u8,
|
|
0u8,
|
|
0u8,
|
|
0u8,
|
|
0u8,
|
|
],
|
|
appendix: age,
|
|
_padding: [0u8; 11],
|
|
typ: 1u8,
|
|
}
|
|
}
|
|
|
|
/// Parses a breakpad identifier from a string.
|
|
pub fn from_breakpad(string: &str) -> Result<Self, ParseDebugIdError> {
|
|
let options = ParseOptions {
|
|
allow_hyphens: false,
|
|
require_appendix: true,
|
|
allow_tail: false,
|
|
};
|
|
Self::parse_str(string, options).ok_or(ParseDebugIdError)
|
|
}
|
|
|
|
/// Returns the UUID part of the code module's debug_identifier.
|
|
///
|
|
/// If this is a debug identifier for the PDB 2.0 format an invalid UUID is returned
|
|
/// where only the first 4 bytes are filled in and the remainder of the bytes are 0.
|
|
/// This means the UUID has variant [`uuid::Variant::NCS`] and an unknown version,
|
|
/// [`Uuid::get_version`] will return `None`, which is not a valid UUID.
|
|
///
|
|
/// This may seem odd however does seem reasonable:
|
|
///
|
|
/// - Every [`DebugId`] can be represented as [`Uuid`] and will still mostly look
|
|
/// reasonable e.g. in comparisons etc.
|
|
/// - The PDB 2.0 format is very old and very unlikely to appear practically.
|
|
pub fn uuid(&self) -> Uuid {
|
|
Uuid::from_bytes(self.bytes)
|
|
}
|
|
|
|
/// Returns the appendix part of the code module's debug identifier.
|
|
///
|
|
/// On Windows, this is an incrementing counter to identify the build.
|
|
/// On all other platforms, this value will always be zero.
|
|
pub fn appendix(&self) -> u32 {
|
|
self.appendix
|
|
}
|
|
|
|
/// Returns whether this identifier is nil, i.e. it consists only of zeros.
|
|
pub fn is_nil(&self) -> bool {
|
|
self.bytes == [0u8; 16] && self.appendix == 0
|
|
}
|
|
|
|
/// Returns whether this identifier is from the PDB 2.0 format.
|
|
pub fn is_pdb20(&self) -> bool {
|
|
self.typ == 1
|
|
}
|
|
|
|
/// Returns a wrapper which when formatted via `fmt::Display` will format a
|
|
/// a breakpad identifier.
|
|
pub fn breakpad(&self) -> BreakpadFormat<'_> {
|
|
BreakpadFormat { inner: self }
|
|
}
|
|
|
|
fn parse_str(string: &str, options: ParseOptions) -> Option<Self> {
|
|
let is_hyphenated = string.get(8..9) == Some("-");
|
|
if is_hyphenated && !options.allow_hyphens || !string.is_ascii() {
|
|
return None;
|
|
}
|
|
|
|
// Can the PDB 2.0 format match? This can never be true for a valid UUID.
|
|
let min_len = if is_hyphenated { 10 } else { 9 };
|
|
let max_len = if is_hyphenated { 17 } else { 16 };
|
|
if min_len <= string.len() && string.len() <= max_len {
|
|
let timestamp_str = string.get(..8)?;
|
|
let timestamp = u32::from_str_radix(timestamp_str, 16).ok()?;
|
|
let appendix_str = match is_hyphenated {
|
|
true => string.get(9..)?,
|
|
false => string.get(8..)?,
|
|
};
|
|
let appendix = u32::from_str_radix(appendix_str, 16).ok()?;
|
|
return Some(Self::from_pdb20(timestamp, appendix));
|
|
}
|
|
|
|
let uuid_len = if is_hyphenated { 36 } else { 32 };
|
|
let uuid = string.get(..uuid_len)?.parse().ok()?;
|
|
if !options.require_appendix && string.len() == uuid_len {
|
|
return Some(Self::from_parts(uuid, 0));
|
|
}
|
|
|
|
let mut appendix_str = &string[uuid_len..];
|
|
if is_hyphenated ^ appendix_str.starts_with('-') {
|
|
return None; // Require a hyphen if and only if we're hyphenated.
|
|
} else if is_hyphenated {
|
|
appendix_str = &appendix_str[1..]; // Skip the hyphen for parsing.
|
|
}
|
|
|
|
if options.allow_tail && appendix_str.len() > 8 {
|
|
appendix_str = &appendix_str[..8];
|
|
}
|
|
|
|
// Parse the appendix, which fails on empty strings.
|
|
let appendix = u32::from_str_radix(appendix_str, 16).ok()?;
|
|
Some(Self::from_parts(uuid, appendix))
|
|
}
|
|
|
|
/// Returns the PDB 2.0 timestamp.
|
|
///
|
|
/// Only valid if you know this is a PDB 2.0 debug identifier.
|
|
fn timestamp(&self) -> u32 {
|
|
u32::from_be_bytes([self.bytes[0], self.bytes[1], self.bytes[2], self.bytes[3]])
|
|
}
|
|
}
|
|
|
|
impl fmt::Debug for DebugId {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
let uuid = self.uuid();
|
|
f.debug_struct("DebugId")
|
|
.field("uuid", &uuid.hyphenated().to_string())
|
|
.field("appendix", &self.appendix())
|
|
.finish()
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for DebugId {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self.is_pdb20() {
|
|
true => {
|
|
let timestamp = self.timestamp();
|
|
write!(f, "{:08X}", timestamp)?;
|
|
}
|
|
false => {
|
|
let uuid = self.uuid();
|
|
uuid.fmt(f)?;
|
|
}
|
|
}
|
|
if self.appendix > 0 {
|
|
write!(f, "-{:x}", { self.appendix })?;
|
|
}
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
impl str::FromStr for DebugId {
|
|
type Err = ParseDebugIdError;
|
|
|
|
fn from_str(string: &str) -> Result<Self, ParseDebugIdError> {
|
|
let options = ParseOptions {
|
|
allow_hyphens: true,
|
|
require_appendix: false,
|
|
allow_tail: true,
|
|
};
|
|
Self::parse_str(string, options).ok_or(ParseDebugIdError)
|
|
}
|
|
}
|
|
|
|
impl From<Uuid> for DebugId {
|
|
fn from(uuid: Uuid) -> Self {
|
|
DebugId::from_uuid(uuid)
|
|
}
|
|
}
|
|
|
|
impl From<(Uuid, u32)> for DebugId {
|
|
fn from(tuple: (Uuid, u32)) -> Self {
|
|
let (uuid, appendix) = tuple;
|
|
DebugId::from_parts(uuid, appendix)
|
|
}
|
|
}
|
|
|
|
/// Wrapper around [`DebugId`] for Breakpad formatting.
|
|
///
|
|
/// **Example:**
|
|
///
|
|
/// ```
|
|
/// # extern crate debugid;
|
|
/// use std::str::FromStr;
|
|
/// use debugid::DebugId;
|
|
///
|
|
/// # fn foo() -> Result<(), debugid::ParseDebugIdError> {
|
|
/// let id = DebugId::from_breakpad("DFB8E43AF2423D73A453AEB6A777EF75a")?;
|
|
/// assert_eq!("DFB8E43AF2423D73A453AEB6A777EF75a".to_string(), id.breakpad().to_string());
|
|
/// # Ok(())
|
|
/// # }
|
|
///
|
|
/// # fn main() { foo().unwrap() }
|
|
/// ```
|
|
///
|
|
/// [`DebugId`]: struct.DebugId.html
|
|
#[derive(Debug)]
|
|
pub struct BreakpadFormat<'a> {
|
|
inner: &'a DebugId,
|
|
}
|
|
|
|
impl<'a> fmt::Display for BreakpadFormat<'a> {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self.inner.is_pdb20() {
|
|
true => {
|
|
let timestamp = self.inner.timestamp();
|
|
write!(f, "{:08X}{:x}", timestamp, self.inner.appendix())
|
|
}
|
|
false => {
|
|
let uuid = self.inner.uuid();
|
|
write!(f, "{:X}{:x}", uuid.simple(), self.inner.appendix())
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Indicates an error parsing a [`CodeId`](struct.CodeId.html).
|
|
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
|
pub struct ParseCodeIdError;
|
|
|
|
impl error::Error for ParseCodeIdError {}
|
|
|
|
impl fmt::Display for ParseCodeIdError {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(f, "invalid code identifier")
|
|
}
|
|
}
|
|
|
|
/// Unique platform-dependent identifier of code files.
|
|
///
|
|
/// This identifier assumes a string representation that depends on the platform and compiler used.
|
|
/// The representation only retains hex characters and canonically stores lower case.
|
|
///
|
|
/// There are the following known formats:
|
|
///
|
|
/// - **MachO UUID**: The unique identifier of a Mach binary, specified in the `LC_UUID` load
|
|
/// command header.
|
|
/// - **GNU Build ID**: Contents of the `.gnu.build-id` note or section contents formatted as
|
|
/// lowercase hex string.
|
|
/// - **PE Timestamp**: Timestamp and size of image values from a Windows PE header. The size of
|
|
/// image value is truncated, so the length of the `CodeId` might not be a multiple of 2.
|
|
#[derive(Clone, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
|
|
pub struct CodeId {
|
|
inner: String,
|
|
}
|
|
|
|
impl CodeId {
|
|
/// Constructs an empty code identifier.
|
|
pub fn nil() -> Self {
|
|
Self::default()
|
|
}
|
|
|
|
/// Constructs a `CodeId` from its string representation.
|
|
pub fn new(mut string: String) -> Self {
|
|
string.retain(|c| c.is_ascii_hexdigit());
|
|
string.make_ascii_lowercase();
|
|
CodeId { inner: string }
|
|
}
|
|
|
|
/// Constructs a `CodeId` from a binary slice.
|
|
pub fn from_binary(slice: &[u8]) -> Self {
|
|
let mut string = String::with_capacity(slice.len() * 2);
|
|
|
|
for byte in slice {
|
|
write!(&mut string, "{:02x}", byte).expect("");
|
|
}
|
|
|
|
Self::new(string)
|
|
}
|
|
|
|
/// Returns whether this identifier is nil, i.e. it is empty.
|
|
pub fn is_nil(&self) -> bool {
|
|
self.inner.is_empty()
|
|
}
|
|
|
|
/// Returns the string representation of this code identifier.
|
|
pub fn as_str(&self) -> &str {
|
|
self.inner.as_str()
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for CodeId {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
f.write_str(&self.inner)
|
|
}
|
|
}
|
|
|
|
impl fmt::Debug for CodeId {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(f, "CodeId({})", self)
|
|
}
|
|
}
|
|
|
|
impl From<String> for CodeId {
|
|
fn from(string: String) -> Self {
|
|
Self::new(string)
|
|
}
|
|
}
|
|
|
|
impl From<&'_ str> for CodeId {
|
|
fn from(string: &str) -> Self {
|
|
Self::new(string.into())
|
|
}
|
|
}
|
|
|
|
impl AsRef<str> for CodeId {
|
|
fn as_ref(&self) -> &str {
|
|
self.as_str()
|
|
}
|
|
}
|
|
|
|
impl str::FromStr for CodeId {
|
|
type Err = ParseCodeIdError;
|
|
|
|
fn from_str(string: &str) -> Result<Self, ParseCodeIdError> {
|
|
Ok(Self::new(string.into()))
|
|
}
|
|
}
|
|
|
|
#[cfg(feature = "serde")]
|
|
mod serde_support {
|
|
use serde::de::{self, Deserialize, Deserializer, Unexpected, Visitor};
|
|
use serde::ser::{Serialize, Serializer};
|
|
|
|
use super::*;
|
|
|
|
impl Serialize for CodeId {
|
|
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
|
|
serializer.serialize_str(self.as_str())
|
|
}
|
|
}
|
|
|
|
impl<'de> Deserialize<'de> for CodeId {
|
|
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
|
|
let string = String::deserialize(deserializer)?;
|
|
Ok(CodeId::new(string))
|
|
}
|
|
}
|
|
|
|
impl<'de> Deserialize<'de> for DebugId {
|
|
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
|
|
struct V;
|
|
|
|
impl<'de> Visitor<'de> for V {
|
|
type Value = DebugId;
|
|
|
|
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
|
formatter.write_str("DebugId")
|
|
}
|
|
|
|
fn visit_str<E: de::Error>(self, value: &str) -> Result<DebugId, E> {
|
|
value
|
|
.parse()
|
|
.map_err(|_| de::Error::invalid_value(Unexpected::Str(value), &self))
|
|
}
|
|
}
|
|
|
|
deserializer.deserialize_str(V)
|
|
}
|
|
}
|
|
|
|
impl Serialize for DebugId {
|
|
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
|
|
serializer.serialize_str(&self.to_string())
|
|
}
|
|
}
|
|
}
|