nix_compat/derivation/
parser.rs

1//! This module constructs a [Derivation] by parsing its [ATerm][]
2//! serialization.
3//!
4//! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html
5
6use nom::bytes::complete::tag;
7use nom::character::complete::char as nomchar;
8use nom::combinator::{all_consuming, map_res};
9use nom::multi::{separated_list0, separated_list1};
10use nom::sequence::{delimited, preceded, separated_pair, terminated};
11use nom::Parser;
12use std::collections::{btree_map, BTreeMap, BTreeSet};
13use thiserror;
14
15use crate::derivation::parse_error::{into_nomerror, ErrorKind, NomError, NomResult};
16use crate::derivation::{write, CAHash, Derivation, Output};
17use crate::store_path::{self, StorePath};
18use crate::{aterm, nixhash};
19
20#[derive(Debug, thiserror::Error)]
21pub enum Error<I> {
22    #[error("parsing error: {0}")]
23    Parser(#[from] NomError<I>),
24    #[error("premature EOF")]
25    Incomplete,
26    #[error("validation error: {0}")]
27    Validation(super::DerivationError),
28}
29
30/// Convenience conversion of borring Error to an owned counterpart.
31impl From<Error<&[u8]>> for Error<Vec<u8>> {
32    fn from(value: Error<&[u8]>) -> Self {
33        match value {
34            Error::Parser(nom_error) => Error::Parser(NomError {
35                input: nom_error.input.to_vec(),
36                code: nom_error.code,
37            }),
38            Error::Incomplete => Error::Incomplete,
39            Error::Validation(e) => Error::Validation(e),
40        }
41    }
42}
43
44pub(crate) fn parse(i: &[u8]) -> Result<Derivation, Error<&[u8]>> {
45    match all_consuming(parse_derivation).parse(i) {
46        Ok((rest, derivation)) => {
47            // this shouldn't happen, as all_consuming shouldn't return.
48            debug_assert!(rest.is_empty());
49
50            // invoke validate
51            derivation.validate(true).map_err(Error::Validation)?;
52
53            Ok(derivation)
54        }
55        Err(nom::Err::Incomplete(_)) => Err(Error::Incomplete),
56        Err(nom::Err::Error(e) | nom::Err::Failure(e)) => Err(e.into()),
57    }
58}
59
60/// Consume a string containing the algo, and optionally a `r:`
61/// prefix, and a digest (bytes), return a [CAHash::Nar] or [CAHash::Flat].
62fn from_algo_and_mode_and_digest<B: AsRef<[u8]>>(
63    algo_and_mode: &str,
64    digest: B,
65) -> crate::nixhash::NixHashResult<CAHash> {
66    Ok(match algo_and_mode.strip_prefix("r:") {
67        Some(algo) => nixhash::CAHash::Nar(nixhash::from_algo_and_digest(
68            algo.try_into()?,
69            digest.as_ref(),
70        )?),
71        None => nixhash::CAHash::Flat(nixhash::from_algo_and_digest(
72            algo_and_mode.try_into()?,
73            digest.as_ref(),
74        )?),
75    })
76}
77
78/// Parse one output in ATerm. This is 4 string fields inside parans:
79/// output name, output path, algo (and mode), digest.
80/// Returns the output name and [Output] struct.
81fn parse_output(i: &[u8]) -> NomResult<&[u8], (String, Output)> {
82    delimited(
83        nomchar('('),
84        map_res(
85            |i| {
86                (
87                    terminated(aterm::parse_string_field, nomchar(',')),
88                    terminated(aterm::parse_string_field, nomchar(',')),
89                    terminated(aterm::parse_string_field, nomchar(',')),
90                    aterm::parse_bytes_field,
91                )
92                    .parse(i)
93                    .map_err(into_nomerror)
94            },
95            |(output_name, output_path, algo_and_mode, encoded_digest)| {
96                // convert these 4 fields into an [Output].
97                let ca_hash_res = {
98                    if algo_and_mode.is_empty() && encoded_digest.is_empty() {
99                        None
100                    } else {
101                        match data_encoding::HEXLOWER.decode(&encoded_digest) {
102                            Ok(digest) => {
103                                Some(from_algo_and_mode_and_digest(&algo_and_mode, digest))
104                            }
105                            Err(e) => Some(Err(nixhash::Error::InvalidBase64Encoding(e))),
106                        }
107                    }
108                }
109                .transpose();
110
111                match ca_hash_res {
112                    Ok(hash_with_mode) => Ok((
113                        output_name,
114                        Output {
115                            // TODO: Check if allowing empty paths here actually makes sense
116                            //       or we should make this code stricter.
117                            path: if output_path.is_empty() {
118                                None
119                            } else {
120                                Some(string_to_store_path(i, &output_path)?)
121                            },
122                            ca_hash: hash_with_mode,
123                        },
124                    )),
125                    Err(e) => Err(nom::Err::Failure(NomError {
126                        input: i,
127                        code: ErrorKind::NixHashError(e),
128                    })),
129                }
130            },
131        ),
132        nomchar(')'),
133    )
134    .parse(i)
135}
136
137/// Parse multiple outputs in ATerm. This is a list of things acccepted by
138/// parse_output, and takes care of turning the (String, Output) returned from
139/// it to a BTreeMap.
140/// We don't use parse_kv here, as it's dealing with 2-tuples, and these are
141/// 4-tuples.
142fn parse_outputs(i: &[u8]) -> NomResult<&[u8], BTreeMap<String, Output>> {
143    let res = delimited(
144        nomchar('['),
145        separated_list1(tag(","), parse_output),
146        nomchar(']'),
147    )
148    .parse(i);
149
150    match res {
151        Ok((rst, outputs_lst)) => {
152            let mut outputs = BTreeMap::default();
153            for (output_name, output) in outputs_lst.into_iter() {
154                if outputs.contains_key(&output_name) {
155                    return Err(nom::Err::Failure(NomError {
156                        input: i,
157                        code: ErrorKind::DuplicateMapKey(output_name.to_string()),
158                    }));
159                }
160                outputs.insert(output_name, output);
161            }
162            Ok((rst, outputs))
163        }
164        // pass regular parse errors along
165        Err(e) => Err(e),
166    }
167}
168
169fn parse_input_derivations(
170    i: &[u8],
171) -> NomResult<&[u8], BTreeMap<StorePath<String>, BTreeSet<String>>> {
172    let (i, input_derivations_list) = parse_kv(aterm::parse_string_list)(i)?;
173
174    // This is a HashMap of drv paths to a list of output names.
175    let mut input_derivations: BTreeMap<StorePath<String>, BTreeSet<_>> = BTreeMap::new();
176
177    for (input_derivation, output_names) in input_derivations_list {
178        let mut new_output_names = BTreeSet::new();
179        for output_name in output_names.into_iter() {
180            if new_output_names.contains(&output_name) {
181                return Err(nom::Err::Failure(NomError {
182                    input: i,
183                    code: ErrorKind::DuplicateInputDerivationOutputName(
184                        input_derivation.to_string(),
185                        output_name.to_string(),
186                    ),
187                }));
188            }
189            new_output_names.insert(output_name);
190        }
191
192        let input_derivation = string_to_store_path(i, input_derivation.as_str())?;
193
194        input_derivations.insert(input_derivation, new_output_names);
195    }
196
197    Ok((i, input_derivations))
198}
199
200fn parse_input_sources(i: &[u8]) -> NomResult<&[u8], BTreeSet<StorePath<String>>> {
201    let (i, input_sources_lst) = aterm::parse_string_list(i).map_err(into_nomerror)?;
202
203    let mut input_sources: BTreeSet<_> = BTreeSet::new();
204    for input_source in input_sources_lst.into_iter() {
205        let input_source = string_to_store_path(i, input_source.as_str())?;
206        if input_sources.contains(&input_source) {
207            return Err(nom::Err::Failure(NomError {
208                input: i,
209                code: ErrorKind::DuplicateInputSource(input_source.to_owned()),
210            }));
211        } else {
212            input_sources.insert(input_source);
213        }
214    }
215
216    Ok((i, input_sources))
217}
218
219fn string_to_store_path<'a, 'i, S>(
220    i: &'i [u8],
221    path_str: &'a str,
222) -> Result<StorePath<S>, nom::Err<NomError<&'i [u8]>>>
223where
224    S: std::clone::Clone + AsRef<str> + std::convert::From<&'a str>,
225{
226    let path =
227        StorePath::from_absolute_path(path_str.as_bytes()).map_err(|e: store_path::Error| {
228            nom::Err::Failure(NomError {
229                input: i,
230                code: e.into(),
231            })
232        })?;
233
234    #[cfg(debug_assertions)]
235    assert_eq!(path_str, path.to_absolute_path());
236
237    Ok(path)
238}
239
240pub fn parse_derivation(i: &[u8]) -> NomResult<&[u8], Derivation> {
241    use nom::Parser;
242    preceded(
243        tag(write::DERIVATION_PREFIX),
244        delimited(
245            // inside parens
246            nomchar('('),
247            // tuple requires all errors to be of the same type, so we need to be a
248            // bit verbose here wrapping generic IResult into [NomATermResult].
249            (
250                // parse outputs
251                terminated(parse_outputs, nomchar(',')),
252                // // parse input derivations
253                terminated(parse_input_derivations, nomchar(',')),
254                // // parse input sources
255                terminated(parse_input_sources, nomchar(',')),
256                // // parse system
257                |i| {
258                    terminated(aterm::parse_string_field, nomchar(','))
259                        .parse(i)
260                        .map_err(into_nomerror)
261                },
262                // // parse builder
263                |i| {
264                    terminated(aterm::parse_string_field, nomchar(','))
265                        .parse(i)
266                        .map_err(into_nomerror)
267                },
268                // // parse arguments
269                |i| {
270                    terminated(aterm::parse_string_list, nomchar(','))
271                        .parse(i)
272                        .map_err(into_nomerror)
273                },
274                // parse environment
275                parse_kv(aterm::parse_bytes_field),
276            ),
277            nomchar(')'),
278        )
279        .map(
280            |(
281                outputs,
282                input_derivations,
283                input_sources,
284                system,
285                builder,
286                arguments,
287                environment,
288            )| {
289                Derivation {
290                    arguments,
291                    builder,
292                    environment,
293                    input_derivations,
294                    input_sources,
295                    outputs,
296                    system,
297                }
298            },
299        ),
300    )
301    .parse(i)
302}
303
304/// Parse a list of key/value pairs into a BTreeMap.
305/// The parser for the values can be passed in.
306/// In terms of ATerm, this is just a 2-tuple,
307/// but we have the additional restriction that the first element needs to be
308/// unique across all tuples.
309pub(crate) fn parse_kv<'a, V, VF>(
310    vf: VF,
311) -> impl FnMut(&'a [u8]) -> NomResult<&'a [u8], BTreeMap<String, V>> + 'static
312where
313    VF: FnMut(&'a [u8]) -> nom::IResult<&'a [u8], V, nom::error::Error<&'a [u8]>> + Clone + 'static,
314{
315    move |i|
316    // inside brackets
317    delimited(
318        nomchar('['),
319        |ii| {
320            let res = separated_list0(
321                nomchar(','),
322                // inside parens
323                delimited(
324                    nomchar('('),
325                    separated_pair(
326                        aterm::parse_string_field,
327                        nomchar(','),
328                        vf.clone(),
329                    ),
330                    nomchar(')'),
331                ),
332            ).parse(ii).map_err(into_nomerror);
333
334            match res {
335                Ok((rest, pairs)) => {
336                    let mut kvs: BTreeMap<String, V> = BTreeMap::new();
337                    for (k, v) in pairs.into_iter() {
338                        // collect the 2-tuple to a BTreeMap,
339                        // and fail if the key was already seen before.
340                        match kvs.entry(k) {
341                            btree_map::Entry::Vacant(e) => { e.insert(v); },
342                            btree_map::Entry::Occupied(e) => {
343                                return Err(nom::Err::Failure(NomError {
344                                    input: i,
345                                    code: ErrorKind::DuplicateMapKey(e.key().clone()),
346                                }));
347                            }
348                        }
349                    }
350                    Ok((rest, kvs))
351                }
352                Err(e) => Err(e),
353            }
354        },
355        nomchar(']'),
356    ).parse(i)
357}
358
359#[cfg(test)]
360mod tests {
361    use crate::store_path::StorePathRef;
362    use std::collections::{BTreeMap, BTreeSet};
363    use std::sync::LazyLock;
364
365    use crate::{
366        derivation::{
367            parse_error::ErrorKind, parser::from_algo_and_mode_and_digest, CAHash, NixHash, Output,
368        },
369        store_path::StorePath,
370    };
371    use bstr::{BString, ByteSlice};
372    use hex_literal::hex;
373    use rstest::rstest;
374
375    const DIGEST_SHA256: [u8; 32] =
376        hex!("a5ce9c155ed09397614646c9717fc7cd94b1023d7b76b618d409e4fefd6e9d39");
377
378    static NIXHASH_SHA256: NixHash = NixHash::Sha256(DIGEST_SHA256);
379    static EXP_MULTI_OUTPUTS: LazyLock<BTreeMap<String, Output>> = LazyLock::new(|| {
380        let mut b = BTreeMap::new();
381        b.insert(
382            "lib".to_string(),
383            Output {
384                path: Some(
385                    StorePath::from_bytes(b"2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib")
386                        .unwrap(),
387                ),
388                ca_hash: None,
389            },
390        );
391        b.insert(
392            "out".to_string(),
393            Output {
394                path: Some(
395                    StorePath::from_bytes(
396                        b"55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out".as_bytes(),
397                    )
398                    .unwrap(),
399                ),
400                ca_hash: None,
401            },
402        );
403        b
404    });
405
406    static EXP_AB_MAP: LazyLock<BTreeMap<String, BString>> = LazyLock::new(|| {
407        let mut b = BTreeMap::new();
408        b.insert("a".to_string(), b"1".into());
409        b.insert("b".to_string(), b"2".into());
410        b
411    });
412
413    static EXP_INPUT_DERIVATIONS_SIMPLE: LazyLock<BTreeMap<StorePath<String>, BTreeSet<String>>> =
414        LazyLock::new(|| {
415            let mut b = BTreeMap::new();
416            b.insert(
417                StorePath::from_bytes(b"8bjm87p310sb7r2r0sg4xrynlvg86j8k-hello-2.12.1.tar.gz.drv")
418                    .unwrap(),
419                {
420                    let mut output_names = BTreeSet::new();
421                    output_names.insert("out".to_string());
422                    output_names
423                },
424            );
425            b.insert(
426                StorePath::from_bytes(b"p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv")
427                    .unwrap(),
428                {
429                    let mut output_names = BTreeSet::new();
430                    output_names.insert("out".to_string());
431                    output_names.insert("lib".to_string());
432                    output_names
433                },
434            );
435            b
436        });
437
438    static EXP_INPUT_DERIVATIONS_SIMPLE_ATERM: LazyLock<String> = LazyLock::new(|| {
439        format!(
440            "[(\"{0}\",[\"out\"]),(\"{1}\",[\"out\",\"lib\"])]",
441            "/nix/store/8bjm87p310sb7r2r0sg4xrynlvg86j8k-hello-2.12.1.tar.gz.drv",
442            "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv"
443        )
444    });
445
446    static EXP_INPUT_SOURCES_SIMPLE: LazyLock<BTreeSet<String>> = LazyLock::new(|| {
447        let mut b = BTreeSet::new();
448        b.insert("/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out".to_string());
449        b.insert("/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib".to_string());
450        b
451    });
452
453    /// Ensure parsing KVs works
454    #[rstest]
455    #[case::empty(b"[]", &BTreeMap::new(), b"")]
456    #[case::simple(b"[(\"a\",\"1\"),(\"b\",\"2\")]", &EXP_AB_MAP, b"")]
457    fn parse_kv(
458        #[case] input: &'static [u8],
459        #[case] expected: &BTreeMap<String, BString>,
460        #[case] exp_rest: &[u8],
461    ) {
462        let (rest, parsed) =
463            super::parse_kv(crate::aterm::parse_bytes_field)(input).expect("must parse");
464        assert_eq!(exp_rest, rest, "expected remainder");
465        assert_eq!(*expected, parsed);
466    }
467
468    /// Ensures the kv parser complains about duplicate map keys
469    #[test]
470    fn parse_kv_fail_dup_keys() {
471        let input: &'static [u8] = b"[(\"a\",\"1\"),(\"a\",\"2\")]";
472        let e = super::parse_kv(crate::aterm::parse_bytes_field)(input).expect_err("must fail");
473
474        match e {
475            nom::Err::Failure(e) => {
476                assert_eq!(ErrorKind::DuplicateMapKey("a".to_string()), e.code);
477            }
478            _ => panic!("unexpected error"),
479        }
480    }
481
482    /// Ensure parsing input derivations works.
483    #[rstest]
484    #[case::empty(b"[]", &BTreeMap::new())]
485    #[case::simple(EXP_INPUT_DERIVATIONS_SIMPLE_ATERM.as_bytes(), &EXP_INPUT_DERIVATIONS_SIMPLE)]
486    fn parse_input_derivations(
487        #[case] input: &'static [u8],
488        #[case] expected: &BTreeMap<StorePath<String>, BTreeSet<String>>,
489    ) {
490        let (rest, parsed) = super::parse_input_derivations(input).expect("must parse");
491
492        assert_eq!(expected, &parsed, "parsed mismatch");
493        assert!(rest.is_empty(), "rest must be empty");
494    }
495
496    /// Ensures the input derivation parser complains about duplicate output names
497    #[test]
498    fn parse_input_derivations_fail_dup_output_names() {
499        let input_str = format!(
500            "[(\"{0}\",[\"out\"]),(\"{1}\",[\"out\",\"out\"])]",
501            "/nix/store/8bjm87p310sb7r2r0sg4xrynlvg86j8k-hello-2.12.1.tar.gz.drv",
502            "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv"
503        );
504        let e = super::parse_input_derivations(input_str.as_bytes()).expect_err("must fail");
505
506        match e {
507            nom::Err::Failure(e) => {
508                assert_eq!(
509                    ErrorKind::DuplicateInputDerivationOutputName(
510                        "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv".to_string(),
511                        "out".to_string()
512                    ),
513                    e.code
514                );
515            }
516            _ => panic!("unexpected error"),
517        }
518    }
519
520    /// Ensure parsing input sources works
521    #[rstest]
522    #[case::empty(b"[]", &BTreeSet::new())]
523    #[case::simple(b"[\"/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out\",\"/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib\"]", &EXP_INPUT_SOURCES_SIMPLE)]
524    fn parse_input_sources(#[case] input: &'static [u8], #[case] expected: &BTreeSet<String>) {
525        let (rest, parsed) = super::parse_input_sources(input).expect("must parse");
526
527        assert_eq!(
528            expected,
529            &parsed
530                .iter()
531                .map(StorePath::to_absolute_path)
532                .collect::<BTreeSet<_>>(),
533            "parsed mismatch"
534        );
535        assert!(rest.is_empty(), "rest must be empty");
536    }
537
538    /// Ensures the input sources parser complains about duplicate input sources
539    #[test]
540    fn parse_input_sources_fail_dup_keys() {
541        let input: &'static [u8] = b"[\"/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-foo\",\"/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-foo\"]";
542        let e = super::parse_input_sources(input).expect_err("must fail");
543
544        match e {
545            nom::Err::Failure(e) => {
546                assert_eq!(
547                    ErrorKind::DuplicateInputSource(
548                        StorePathRef::from_absolute_path(
549                            "/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-foo".as_bytes()
550                        )
551                        .unwrap()
552                        .to_owned()
553                    ),
554                    e.code
555                );
556            }
557            _ => panic!("unexpected error"),
558        }
559    }
560
561    #[rstest]
562    #[case::simple(
563        br#"("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo","","")"#,
564        ("out".to_string(), Output {
565            path: Some(
566                StorePathRef::from_absolute_path("/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo".as_bytes()).unwrap().to_owned()),
567            ca_hash: None
568        })
569    )]
570    #[case::fod(
571        br#"("out","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar","r:sha256","08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba")"#,
572        ("out".to_string(), Output {
573            path: Some(
574                StorePathRef::from_absolute_path(
575                "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar".as_bytes()).unwrap().to_owned()),
576            ca_hash: Some(from_algo_and_mode_and_digest("r:sha256",
577                   data_encoding::HEXLOWER.decode(b"08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba").unwrap()            ).unwrap()),
578        })
579     )]
580    fn parse_output(#[case] input: &[u8], #[case] expected: (String, Output)) {
581        let (rest, parsed) = super::parse_output(input).expect("must parse");
582        assert!(rest.is_empty());
583        assert_eq!(expected, parsed);
584    }
585
586    #[rstest]
587    #[case::multi_out(
588        br#"[("lib","/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib","",""),("out","/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out","","")]"#,
589        &EXP_MULTI_OUTPUTS
590    )]
591    fn parse_outputs(#[case] input: &[u8], #[case] expected: &BTreeMap<String, Output>) {
592        let (rest, parsed) = super::parse_outputs(input).expect("must parse");
593        assert!(rest.is_empty());
594        assert_eq!(*expected, parsed);
595    }
596
597    #[rstest]
598    #[case::sha256_flat("sha256", &DIGEST_SHA256, CAHash::Flat(NIXHASH_SHA256.clone()))]
599    #[case::sha256_recursive("r:sha256", &DIGEST_SHA256, CAHash::Nar(NIXHASH_SHA256.clone()))]
600    fn test_from_algo_and_mode_and_digest(
601        #[case] algo_and_mode: &str,
602        #[case] digest: &[u8],
603        #[case] expected: CAHash,
604    ) {
605        assert_eq!(
606            expected,
607            from_algo_and_mode_and_digest(algo_and_mode, digest).unwrap()
608        );
609    }
610
611    #[test]
612    fn from_algo_and_mode_and_digest_failure() {
613        assert!(from_algo_and_mode_and_digest("r:sha256", []).is_err());
614        assert!(from_algo_and_mode_and_digest("ha256", DIGEST_SHA256).is_err());
615    }
616}