1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
use crate::opcode::{CodeIdx, ConstantIdx, Op, OpArg};
use crate::value::Value;
use crate::{CoercionKind, SourceCode};
use std::io::Write;

/// Maximum size of a u64 encoded in the vu128 varint encoding.
const U64_VARINT_SIZE: usize = 9;

/// Represents a source location from which one or more operations
/// were compiled.
///
/// The span itself is an index into a [codemap::CodeMap], and the
/// structure tracks the number of operations that were yielded from
/// the same span.
///
/// At error reporting time, it becomes possible to either just fetch
/// the textual representation of that span from the codemap, or to
/// even re-parse the AST using rnix to create more semantically
/// interesting errors.
#[derive(Clone, Debug, PartialEq)]
struct SourceSpan {
    /// Span into the [codemap::CodeMap].
    span: codemap::Span,

    /// Index of the first operation covered by this span.
    start: usize,
}

/// A chunk is a representation of a sequence of bytecode
/// instructions, associated constants and additional metadata as
/// emitted by the compiler.
#[derive(Debug, Default)]
pub struct Chunk {
    pub code: Vec<u8>,
    pub constants: Vec<Value>,
    spans: Vec<SourceSpan>,

    /// Index of the last operation (i.e. not data) written to the code vector.
    /// Some operations (e.g. jump patching) need to know this.
    last_op: usize,
}

impl Chunk {
    pub fn push_op(&mut self, op: Op, span: codemap::Span) -> usize {
        self.last_op = self.code.len();
        self.code.push(op as u8);
        self.push_span(span, self.last_op);
        self.last_op
    }

    pub fn push_uvarint(&mut self, data: u64) {
        let mut encoded = [0u8; U64_VARINT_SIZE];
        let bytes_written = vu128::encode_u64(&mut encoded, data);
        self.code.extend_from_slice(&encoded[..bytes_written]);
    }

    pub fn read_uvarint(&self, idx: usize) -> (u64, usize) {
        debug_assert!(
            idx < self.code.len(),
            "invalid bytecode (missing varint operand)",
        );

        if self.code.len() - idx >= U64_VARINT_SIZE {
            vu128::decode_u64(
                &self.code[idx..idx + U64_VARINT_SIZE]
                    .try_into()
                    .expect("size statically checked"),
            )
        } else {
            let mut tmp = [0u8; U64_VARINT_SIZE];
            tmp[..self.code.len() - idx].copy_from_slice(&self.code[idx..]);
            vu128::decode_u64(&tmp)
        }
    }

    pub fn push_u16(&mut self, data: u16) {
        self.code.extend_from_slice(&data.to_le_bytes())
    }

    /// Patches the argument to the jump operand of the jump at the given index
    /// to point to the *next* instruction that will be emitted.
    pub fn patch_jump(&mut self, idx: usize) {
        let offset = (self.code.len() - idx - /* arg idx = */ 1 - /* jump arg size = */ 2) as u16;
        self.code[idx + 1..idx + 3].copy_from_slice(&offset.to_le_bytes())
    }

    pub fn read_u16(&self, idx: usize) -> u16 {
        if idx + 2 > self.code.len() {
            panic!("Tvix bug: invalid bytecode (expected u16 operand not found)")
        }

        let byte_array: &[u8; 2] = &self.code[idx..idx + 2]
            .try_into()
            .expect("fixed-size slice can not fail to convert to array");

        u16::from_le_bytes(*byte_array)
    }

    /// Get the first span of a chunk, no questions asked.
    pub fn first_span(&self) -> codemap::Span {
        self.spans[0].span
    }

    /// Return the last op in the chunk together with its index, if any.
    pub fn last_op(&self) -> Option<(Op, usize)> {
        if self.code.is_empty() {
            return None;
        }

        Some((self.code[self.last_op].into(), self.last_op))
    }

    pub fn push_constant(&mut self, data: Value) -> ConstantIdx {
        let idx = self.constants.len();
        self.constants.push(data);
        ConstantIdx(idx)
    }

    /// Return a reference to the constant at the given [`ConstantIdx`]
    pub fn get_constant(&self, constant: ConstantIdx) -> Option<&Value> {
        self.constants.get(constant.0)
    }

    fn push_span(&mut self, span: codemap::Span, start: usize) {
        match self.spans.last_mut() {
            // We do not need to insert the same span again, as this
            // instruction was compiled from the same span as the last
            // one.
            Some(last) if last.span == span => {}

            // In all other cases, this is a new source span.
            _ => self.spans.push(SourceSpan { span, start }),
        }
    }

    /// Retrieve the [codemap::Span] from which the instruction at
    /// `offset` was compiled.
    pub fn get_span(&self, offset: CodeIdx) -> codemap::Span {
        let position = self
            .spans
            .binary_search_by(|span| span.start.cmp(&offset.0));

        let span = match position {
            Ok(index) => &self.spans[index],
            Err(index) => {
                if index == 0 {
                    &self.spans[0]
                } else {
                    &self.spans[index - 1]
                }
            }
        };

        span.span
    }

    /// Write the disassembler representation of the operation at
    /// `idx` to the specified writer, and return how many bytes in the code to
    /// skip for the next op.
    pub fn disassemble_op<W: Write>(
        &self,
        writer: &mut W,
        source: &SourceCode,
        width: usize,
        idx: CodeIdx,
    ) -> Result<usize, std::io::Error> {
        write!(writer, "{:#width$x}\t ", idx.0, width = width)?;

        // Print continuation character if the previous operation was at
        // the same line, otherwise print the line.
        let line = source.get_line(self.get_span(idx));
        if idx.0 > 0 && source.get_line(self.get_span(idx - 1)) == line {
            write!(writer, "   |\t")?;
        } else {
            write!(writer, "{:4}\t", line)?;
        }

        let _fmt_constant = |idx: ConstantIdx| match &self.constants[idx.0] {
            Value::Thunk(t) => t.debug_repr(),
            Value::Closure(c) => format!("closure({:p})", c.lambda),
            Value::Blueprint(b) => format!("blueprint({:p})", b),
            val => format!("{}", val),
        };

        let op: Op = self.code[idx.0].into();

        match op.arg_type() {
            OpArg::None => {
                writeln!(writer, "Op{:?}", op)?;
                Ok(1)
            }

            OpArg::Fixed => {
                let arg = self.read_u16(idx.0 + 1);
                writeln!(writer, "Op{:?}({})", op, arg)?;
                Ok(3)
            }

            OpArg::Uvarint => {
                let (arg, size) = self.read_uvarint(idx.0 + 1);
                writeln!(writer, "Op{:?}({})", op, arg)?;
                Ok(1 + size)
            }

            _ => match op {
                Op::CoerceToString => {
                    let kind: CoercionKind = self.code[idx.0 + 1].into();
                    writeln!(writer, "Op{:?}({:?})", op, kind)?;
                    Ok(2)
                }

                Op::Closure | Op::ThunkClosure | Op::ThunkSuspended => {
                    let mut cidx = idx.0 + 1;

                    let (bp_idx, size) = self.read_uvarint(cidx);
                    cidx += size;

                    let (packed_count, size) = self.read_uvarint(cidx);
                    cidx += size;

                    let captures_with = packed_count & 0b1 == 1;
                    let count = packed_count >> 1;

                    write!(writer, "Op{:?}(BP @ {}, ", op, bp_idx)?;
                    if captures_with {
                        write!(writer, "captures with, ")?;
                    }
                    writeln!(writer, "{} upvalues)", count)?;

                    for _ in 0..count {
                        let (_, size) = self.read_uvarint(cidx);
                        cidx += size;
                    }

                    Ok(cidx - idx.0)
                }
                _ => panic!("Tvix bug: don't know how to format argument for Op{:?}", op),
            },
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::test_utils::dummy_span;

    // Note: These tests are about the functionality of the `Chunk` type, the
    // opcodes used below do *not* represent valid, executable Tvix code (and
    // don't need to).

    #[test]
    fn push_op() {
        let mut chunk = Chunk::default();
        let idx = chunk.push_op(Op::Add, dummy_span());
        assert_eq!(*chunk.code.last().unwrap(), Op::Add as u8);
        assert_eq!(chunk.code[idx], Op::Add as u8);
    }

    #[test]
    fn push_op_with_arg() {
        let mut chunk = Chunk::default();
        let mut idx = chunk.push_op(Op::Constant, dummy_span());
        chunk.push_uvarint(42);

        assert_eq!(chunk.code[idx], Op::Constant as u8);

        idx += 1;
        let (arg, size) = chunk.read_uvarint(idx);
        assert_eq!(idx + size, chunk.code.len());
        assert_eq!(arg, 42);
    }

    #[test]
    fn push_jump() {
        let mut chunk = Chunk::default();

        chunk.push_op(Op::Constant, dummy_span());
        chunk.push_uvarint(0);

        let idx = chunk.push_op(Op::Jump, dummy_span());
        chunk.push_u16(0);

        chunk.push_op(Op::Constant, dummy_span());
        chunk.push_uvarint(1);

        chunk.patch_jump(idx);
        chunk.push_op(Op::Return, dummy_span());

        #[rustfmt::skip]
        let expected: Vec<u8> = vec![
            Op::Constant as u8, 0,
            Op::Jump as u8, 2, 0,
            Op::Constant as u8, 1,
            Op::Return as u8,
        ];

        assert_eq!(chunk.code, expected);
    }
}