1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
//! NAR wire format, without I/O details, since those differ between
//! the synchronous and asynchronous implementations.
//!
//! The wire format is an S-expression format, encoded onto the wire
//! using simple encoding rules.
//!
//! # Encoding
//!
//! Lengths are represented as 64-bit unsigned integers in little-endian
//! format. Byte strings, including file contents and syntactic strings
//! part of the grammar, are prefixed by their 64-bit length, and padded
//! to 8-byte (64-bit) alignment with zero bytes. The zero-length string
//! is therefore encoded as eight zero bytes representing its length.
//!
//! # Grammar
//!
//! The NAR grammar is as follows:
//! ```plain
//! archive ::= "nix-archive-1" node
//!
//! node ::= "(" "type" "symlink" "target" string ")"
//! ||= "(" "type" "regular" ("executable" "")? "contents" string ")"
//! ||= "(" "type" "directory" entry* ")"
//!
//! entry ::= "entry" "(" "name" string "node" node ")"
//! ```
//!
//! We rewrite it to pull together the purely syntactic elements into
//! unified tokens, producing an equivalent grammar that can be parsed
//! and serialized more elegantly:
//! ```plain
//! archive ::= TOK_NAR node
//! node ::= TOK_SYM string TOK_PAR
//! ||= (TOK_REG | TOK_EXE) string TOK_PAR
//! ||= TOK_DIR entry* TOK_PAR
//!
//! entry ::= TOK_ENT string TOK_NOD node TOK_PAR
//!
//! TOK_NAR ::= "nix-archive-1" "(" "type"
//! TOK_SYM ::= "symlink" "target"
//! TOK_REG ::= "regular" "contents"
//! TOK_EXE ::= "regular" "executable" "" "contents"
//! TOK_DIR ::= "directory"
//! TOK_ENT ::= "entry" "(" "name"
//! TOK_NOD ::= "node" "(" "type"
//! TOK_PAR ::= ")"
//! ```
//!
//! # Restrictions
//!
//! NOTE: These restrictions are not (and cannot be) enforced by this module,
//! but must be enforced by its consumers, [super::reader] and [super::writer].
//!
//! Directory entry names cannot have the reserved names `.` and `..`, nor contain
//! forward slashes. They must appear in strictly ascending lexicographic order
//! within a directory, and can be at most [MAX_NAME_LEN] bytes in length.
//!
//! Symlink targets can be at most [MAX_TARGET_LEN] bytes in length.
//!
//! Neither is permitted to be empty, or contain null bytes.
// These values are the standard Linux length limits
/// Maximum length of a directory entry name
pub const MAX_NAME_LEN: usize = 255;
/// Maximum length of a symlink target
pub const MAX_TARGET_LEN: usize = 4095;
#[cfg(test)]
fn token(xs: &[&str]) -> Vec<u8> {
let mut out = vec![];
for x in xs {
let len = x.len() as u64;
out.extend_from_slice(&len.to_le_bytes());
out.extend_from_slice(x.as_bytes());
let n = x.len() & 7;
if n != 0 {
const ZERO: [u8; 8] = [0; 8];
out.extend_from_slice(&ZERO[n..]);
}
}
out
}
pub const TOK_NAR: [u8; 56] = *b"\x0d\0\0\0\0\0\0\0nix-archive-1\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0";
pub const TOK_SYM: [u8; 32] = *b"\x07\0\0\0\0\0\0\0symlink\0\x06\0\0\0\0\0\0\0target\0\0";
pub const TOK_REG: [u8; 32] = *b"\x07\0\0\0\0\0\0\0regular\0\x08\0\0\0\0\0\0\0contents";
pub const TOK_EXE: [u8; 64] = *b"\x07\0\0\0\0\0\0\0regular\0\x0a\0\0\0\0\0\0\0executable\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0contents";
pub const TOK_DIR: [u8; 24] = *b"\x09\0\0\0\0\0\0\0directory\0\0\0\0\0\0\0";
pub const TOK_ENT: [u8; 48] = *b"\x05\0\0\0\0\0\0\0entry\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0name\0\0\0\0";
pub const TOK_NOD: [u8; 48] = *b"\x04\0\0\0\0\0\0\0node\0\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0";
pub const TOK_PAR: [u8; 16] = *b"\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0";
#[cfg(feature = "async")]
const TOK_PAD_PAR: [u8; 24] = *b"\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0";
#[cfg(feature = "async")]
#[derive(Debug)]
pub(crate) enum PadPar {}
#[cfg(all(feature = "async", feature = "wire"))]
impl crate::wire::reader::Tag for PadPar {
const PATTERN: &'static [u8] = &TOK_PAD_PAR;
type Buf = [u8; 24];
fn make_buf() -> Self::Buf {
[0; 24]
}
}
#[test]
fn tokens() {
let cases: &[(&[u8], &[&str])] = &[
(&TOK_NAR, &["nix-archive-1", "(", "type"]),
(&TOK_SYM, &["symlink", "target"]),
(&TOK_REG, &["regular", "contents"]),
(&TOK_EXE, &["regular", "executable", "", "contents"]),
(&TOK_DIR, &["directory"]),
(&TOK_ENT, &["entry", "(", "name"]),
(&TOK_NOD, &["node", "(", "type"]),
(&TOK_PAR, &[")"]),
#[cfg(feature = "async")]
(&TOK_PAD_PAR, &["", ")"]),
];
for &(tok, xs) in cases {
assert_eq!(tok, token(xs));
}
}
pub use tag::Tag;
mod tag;
tag::make! {
/// These are the node tokens, succeeding [TOK_NAR] or [TOK_NOD],
/// and preceding the next variable-length element.
pub enum Node[16] {
Sym = TOK_SYM,
Reg = TOK_REG,
Exe = TOK_EXE,
Dir = TOK_DIR,
}
/// Directory entry or terminator
pub enum Entry[0] {
/// End of directory
None = TOK_PAR,
/// Directory entry
/// Followed by a name string, [TOK_NOD], and a [Node].
Some = TOK_ENT,
}
}