tvix_eval/
chunk.rs

1use crate::opcode::{CodeIdx, ConstantIdx, Op, OpArg};
2use crate::value::Value;
3use crate::{CoercionKind, SourceCode};
4use integer_encoding::VarInt;
5use std::io::Write;
6
7/// Represents a source location from which one or more operations
8/// were compiled.
9///
10/// The span itself is an index into a [codemap::CodeMap], and the
11/// structure tracks the number of operations that were yielded from
12/// the same span.
13///
14/// At error reporting time, it becomes possible to either just fetch
15/// the textual representation of that span from the codemap, or to
16/// even re-parse the AST using rnix to create more semantically
17/// interesting errors.
18#[derive(Clone, Debug, PartialEq)]
19struct SourceSpan {
20    /// Span into the [codemap::CodeMap].
21    span: codemap::Span,
22
23    /// Index of the first operation covered by this span.
24    start: usize,
25}
26
27/// A chunk is a representation of a sequence of bytecode
28/// instructions, associated constants and additional metadata as
29/// emitted by the compiler.
30#[derive(Debug, Default)]
31pub struct Chunk {
32    pub code: Vec<u8>,
33    pub constants: Vec<Value>,
34    spans: Vec<SourceSpan>,
35
36    /// Index of the last operation (i.e. not data) written to the code vector.
37    /// Some operations (e.g. jump patching) need to know this.
38    last_op: usize,
39}
40
41impl Chunk {
42    pub fn push_op(&mut self, op: Op, span: codemap::Span) -> usize {
43        self.last_op = self.code.len();
44        self.code.push(op as u8);
45        self.push_span(span, self.last_op);
46        self.last_op
47    }
48
49    pub fn push_uvarint(&mut self, data: u64) {
50        let start_len = self.code.len();
51        self.code
52            .resize(start_len + VarInt::required_space(data), 0);
53        VarInt::encode_var(data, &mut self.code[start_len..]);
54    }
55
56    pub fn read_uvarint(&self, idx: usize) -> (u64, usize) {
57        debug_assert!(
58            idx < self.code.len(),
59            "invalid bytecode (missing varint operand)",
60        );
61
62        VarInt::decode_var(&self.code[idx..]).expect("tvix bug: invalid varint encoding")
63    }
64
65    pub fn push_u16(&mut self, data: u16) {
66        self.code.extend_from_slice(&data.to_le_bytes())
67    }
68
69    /// Patches the argument to the jump operand of the jump at the given index
70    /// to point to the *next* instruction that will be emitted.
71    pub fn patch_jump(&mut self, idx: usize) {
72        let offset = (self.code.len() - idx - /* arg idx = */ 1 - /* jump arg size = */ 2) as u16;
73        self.code[idx + 1..idx + 3].copy_from_slice(&offset.to_le_bytes())
74    }
75
76    pub fn read_u16(&self, idx: usize) -> u16 {
77        if idx + 2 > self.code.len() {
78            panic!("Tvix bug: invalid bytecode (expected u16 operand not found)")
79        }
80
81        let byte_array: &[u8; 2] = &self.code[idx..idx + 2]
82            .try_into()
83            .expect("fixed-size slice can not fail to convert to array");
84
85        u16::from_le_bytes(*byte_array)
86    }
87
88    /// Get the first span of a chunk, no questions asked.
89    pub fn first_span(&self) -> codemap::Span {
90        self.spans[0].span
91    }
92
93    /// Return the last op in the chunk together with its index, if any.
94    pub fn last_op(&self) -> Option<(Op, usize)> {
95        if self.code.is_empty() {
96            return None;
97        }
98
99        Some((self.code[self.last_op].into(), self.last_op))
100    }
101
102    pub fn push_constant(&mut self, data: Value) -> ConstantIdx {
103        let idx = self.constants.len();
104        self.constants.push(data);
105        ConstantIdx(idx)
106    }
107
108    /// Return a reference to the constant at the given [`ConstantIdx`]
109    pub fn get_constant(&self, constant: ConstantIdx) -> Option<&Value> {
110        self.constants.get(constant.0)
111    }
112
113    fn push_span(&mut self, span: codemap::Span, start: usize) {
114        match self.spans.last_mut() {
115            // We do not need to insert the same span again, as this
116            // instruction was compiled from the same span as the last
117            // one.
118            Some(last) if last.span == span => {}
119
120            // In all other cases, this is a new source span.
121            _ => self.spans.push(SourceSpan { span, start }),
122        }
123    }
124
125    /// Retrieve the [codemap::Span] from which the instruction at
126    /// `offset` was compiled.
127    pub fn get_span(&self, offset: CodeIdx) -> codemap::Span {
128        let position = self
129            .spans
130            .binary_search_by(|span| span.start.cmp(&offset.0));
131
132        let span = match position {
133            Ok(index) => &self.spans[index],
134            Err(index) => {
135                if index == 0 {
136                    &self.spans[0]
137                } else {
138                    &self.spans[index - 1]
139                }
140            }
141        };
142
143        span.span
144    }
145
146    /// Write the disassembler representation of the operation at
147    /// `idx` to the specified writer, and return how many bytes in the code to
148    /// skip for the next op.
149    pub fn disassemble_op<W: Write>(
150        &self,
151        writer: &mut W,
152        source: &SourceCode,
153        width: usize,
154        idx: CodeIdx,
155    ) -> Result<usize, std::io::Error> {
156        write!(writer, "{:#width$x}\t ", idx.0, width = width)?;
157
158        // Print continuation character if the previous operation was at
159        // the same line, otherwise print the line.
160        let line = source.get_line(self.get_span(idx));
161        if idx.0 > 0 && source.get_line(self.get_span(idx - 1)) == line {
162            write!(writer, "   |\t")?;
163        } else {
164            write!(writer, "{line:4}\t")?;
165        }
166
167        let _fmt_constant = |idx: ConstantIdx| match &self.constants[idx.0] {
168            Value::Thunk(t) => t.debug_repr(),
169            Value::Closure(c) => format!("closure({:p})", c.lambda),
170            Value::Blueprint(b) => format!("blueprint({b:p})"),
171            val => format!("{val}"),
172        };
173
174        let op: Op = self.code[idx.0].into();
175
176        match op.arg_type() {
177            OpArg::None => {
178                writeln!(writer, "Op{op:?}")?;
179                Ok(1)
180            }
181
182            OpArg::Fixed => {
183                let arg = self.read_u16(idx.0 + 1);
184                writeln!(writer, "Op{op:?}({arg})")?;
185                Ok(3)
186            }
187
188            OpArg::Uvarint => {
189                let (arg, size) = self.read_uvarint(idx.0 + 1);
190                writeln!(writer, "Op{op:?}({arg})")?;
191                Ok(1 + size)
192            }
193
194            _ => match op {
195                Op::CoerceToString => {
196                    let kind: CoercionKind = self.code[idx.0 + 1].into();
197                    writeln!(writer, "Op{op:?}({kind:?})")?;
198                    Ok(2)
199                }
200
201                Op::Closure | Op::ThunkClosure | Op::ThunkSuspended => {
202                    let mut cidx = idx.0 + 1;
203
204                    let (bp_idx, size) = self.read_uvarint(cidx);
205                    cidx += size;
206
207                    let (packed_count, size) = self.read_uvarint(cidx);
208                    cidx += size;
209
210                    let captures_with = packed_count & 0b1 == 1;
211                    let count = packed_count >> 1;
212
213                    write!(writer, "Op{op:?}(BP @ {bp_idx}, ")?;
214                    if captures_with {
215                        write!(writer, "captures with, ")?;
216                    }
217                    writeln!(writer, "{count} upvalues)")?;
218
219                    for _ in 0..count {
220                        let (_, size) = self.read_uvarint(cidx);
221                        cidx += size;
222                    }
223
224                    Ok(cidx - idx.0)
225                }
226                _ => panic!("Tvix bug: don't know how to format argument for Op{op:?}"),
227            },
228        }
229    }
230}
231
232#[cfg(test)]
233mod tests {
234    use super::*;
235    use crate::test_utils::dummy_span;
236
237    // Note: These tests are about the functionality of the `Chunk` type, the
238    // opcodes used below do *not* represent valid, executable Tvix code (and
239    // don't need to).
240
241    #[test]
242    fn push_op() {
243        let mut chunk = Chunk::default();
244        let idx = chunk.push_op(Op::Add, dummy_span());
245        assert_eq!(*chunk.code.last().unwrap(), Op::Add as u8);
246        assert_eq!(chunk.code[idx], Op::Add as u8);
247    }
248
249    #[test]
250    fn push_op_with_arg() {
251        let mut chunk = Chunk::default();
252        let mut idx = chunk.push_op(Op::Constant, dummy_span());
253        chunk.push_uvarint(42);
254
255        assert_eq!(chunk.code[idx], Op::Constant as u8);
256
257        idx += 1;
258        let (arg, size) = chunk.read_uvarint(idx);
259        assert_eq!(idx + size, chunk.code.len());
260        assert_eq!(arg, 42);
261    }
262
263    #[test]
264    fn push_jump() {
265        let mut chunk = Chunk::default();
266
267        chunk.push_op(Op::Constant, dummy_span());
268        chunk.push_uvarint(0);
269
270        let idx = chunk.push_op(Op::Jump, dummy_span());
271        chunk.push_u16(0);
272
273        chunk.push_op(Op::Constant, dummy_span());
274        chunk.push_uvarint(1);
275
276        chunk.patch_jump(idx);
277        chunk.push_op(Op::Return, dummy_span());
278
279        #[rustfmt::skip]
280        let expected: Vec<u8> = vec![
281            Op::Constant as u8, 0,
282            Op::Jump as u8, 2, 0,
283            Op::Constant as u8, 1,
284            Op::Return as u8,
285        ];
286
287        assert_eq!(chunk.code, expected);
288    }
289}