diff --git a/crates/uplc/src/binder/debruijn.rs b/crates/uplc/src/binder/debruijn.rs index 57dd70f..cededeb 100644 --- a/crates/uplc/src/binder/debruijn.rs +++ b/crates/uplc/src/binder/debruijn.rs @@ -50,7 +50,7 @@ impl<'a> Binder<'a> for DeBruijn { } } -impl<'a> Eval<'a> for DeBruijn { +impl Eval<'_> for DeBruijn { fn index(&self) -> usize { self.0 } diff --git a/crates/uplc/src/constant.rs b/crates/uplc/src/constant.rs index ca1e796..834cd65 100644 --- a/crates/uplc/src/constant.rs +++ b/crates/uplc/src/constant.rs @@ -37,7 +37,7 @@ impl<'a> Constant<'a> { arena.alloc(Constant::Integer(i)) } - pub fn integer_from(arena: &'a Bump, i: i128) -> &'a Constant { + pub fn integer_from(arena: &'a Bump, i: i128) -> &'a Constant<'a> { arena.alloc(Constant::Integer(integer_from(arena, i))) } @@ -57,7 +57,7 @@ impl<'a> Constant<'a> { arena.alloc(Constant::Data(d)) } - pub fn unit(arena: &'a Bump) -> &'a Constant { + pub fn unit(arena: &'a Bump) -> &'a Constant<'a> { arena.alloc(Constant::Unit) } diff --git a/crates/uplc/src/flat/decode/decoder.rs b/crates/uplc/src/flat/decode/decoder.rs index 08a92ab..d8eb1db 100644 --- a/crates/uplc/src/flat/decode/decoder.rs +++ b/crates/uplc/src/flat/decode/decoder.rs @@ -3,7 +3,7 @@ use bumpalo::{ Bump, }; -use crate::flat::zigzag::ZigZag; +use crate::{constant::Integer, flat::zigzag::ZigZag}; use super::FlatDecodeError; @@ -169,7 +169,7 @@ impl<'b> Decoder<'b> { Ok(b) } - /// Decode an isize integer. + /// Decode an integer of an arbitrary size.. /// /// This is byte alignment agnostic. /// First we decode the next 8 bits of the buffer. @@ -180,8 +180,44 @@ impl<'b> Decoder<'b> { /// so on. If the most significant bit was instead 0 we stop decoding /// any more bits. Finally we use zigzag to convert the unsigned integer /// back to a signed integer. - pub fn integer(&mut self) -> Result { - Ok(self.word()?.zigzag()) + pub fn integer(&mut self) -> Result { + Ok(ZigZag::unzigzag(&self.big_word()?)) + } + + /// Decode a word of 128 bits size. + /// This is byte alignment agnostic. + /// First we decode the next 8 bits of the buffer. + /// We take the 7 least significant bits as the 7 least significant bits of + /// the current unsigned integer. If the most significant bit of the 8 + /// bits is 1 then we take the next 8 and repeat the process above, + /// filling in the next 7 least significant bits of the unsigned integer and + /// so on. If the most significant bit was instead 0 we stop decoding + /// any more bits. + pub fn big_word(&mut self) -> Result { + let mut leading_bit = 1; + let mut final_word = Integer::from(0); + let mut shift = 0_u32; // Using u32 for shift as it's more than enough for 128 bits + + // Continue looping if lead bit is 1 (0x80) otherwise exit + while leading_bit > 0 { + let word8 = self.bits8(8)?; + let word7 = word8 & 0x7F; // 127, get 7 least significant bits + + // Create temporary Integer from word7 and shift it + let part = Integer::from(word7); + let shifted_part = part << shift; + + // OR it with our result + final_word |= shifted_part; + + // Increment shift by 7 for next iteration + shift += 7; + + // Check if we should continue (MSB set) + leading_bit = word8 & 0x80; // 128 + } + + Ok(final_word) } /// Decode a byte array. diff --git a/crates/uplc/src/flat/decode/mod.rs b/crates/uplc/src/flat/decode/mod.rs index fbfda71..7c57559 100644 --- a/crates/uplc/src/flat/decode/mod.rs +++ b/crates/uplc/src/flat/decode/mod.rs @@ -134,9 +134,9 @@ fn decode_constant<'a>( match &tags.as_slice() { [0] => { - let v = d.integer()?; + let v = ctx.arena.alloc(d.integer()?); - Ok(Constant::integer_from(ctx.arena, v)) + Ok(Constant::integer(ctx.arena, v)) } [1] => { let b = d.bytes(ctx.arena)?; diff --git a/crates/uplc/src/flat/encode/encoder.rs b/crates/uplc/src/flat/encode/encoder.rs index 067ed7d..43582ae 100644 --- a/crates/uplc/src/flat/encode/encoder.rs +++ b/crates/uplc/src/flat/encode/encoder.rs @@ -1,3 +1,5 @@ +use crate::{constant::Integer, flat::zigzag::ZigZag}; + use super::FlatEncodeError; #[derive(Default)] @@ -34,6 +36,33 @@ impl Encoder { self } + /// Encode a `bool` value. This is byte alignment agnostic. + /// Uses the next unused bit in the current byte to encode this information. + /// One for true and Zero for false + pub fn bool(&mut self, x: bool) -> &mut Self { + if x { + self.one(); + } else { + self.zero(); + } + + self + } + + /// Encode an arbitrarily sized integer. + /// + /// This is byte alignment agnostic. + /// First we use zigzag once to double the number and encode the negative + /// sign as the least significant bit. Next we encode the 7 least + /// significant bits of the unsigned integer. If the number is greater than + /// 127 we encode a leading 1 followed by repeating the encoding above for + /// the next 7 bits and so on. + pub fn integer(&mut self, i: &Integer) -> &mut Self { + self.big_word(i.zigzag()); + + self + } + /// Encodes up to 8 bits of information and is byte alignment agnostic. /// Uses unused bits in the current byte to write out the passed in byte /// value. Overflows to the most significant digits of the next byte if @@ -114,6 +143,33 @@ impl Encoder { Ok(self) } + /// Encode a unsigned integer of 128 bits size. + /// This is byte alignment agnostic. + /// We encode the 7 least significant bits of the unsigned byte. If the char + /// value is greater than 127 we encode a leading 1 followed by + /// repeating the above for the next 7 bits and so on. + pub fn big_word(&mut self, c: Integer) -> &mut Self { + let mut d = c; + + loop { + let temp: Integer = d.clone() % 128; + let mut w = temp.to_u8().unwrap(); + + d >>= 7; + + if d != 0 { + w |= 128; + } + self.bits(8, w); + + if d == 0 { + break; + } + } + + self + } + /// Encode a string. /// Convert to byte array and then use byte array encoding. /// Uses filler to byte align the buffer, then writes byte array length up diff --git a/crates/uplc/src/flat/encode/mod.rs b/crates/uplc/src/flat/encode/mod.rs index b5c5280..d0f55b7 100644 --- a/crates/uplc/src/flat/encode/mod.rs +++ b/crates/uplc/src/flat/encode/mod.rs @@ -4,9 +4,9 @@ mod error; pub use encoder::Encoder; pub use error::FlatEncodeError; -use crate::{binder::Binder, program::Program, term::Term}; +use crate::{binder::Binder, constant::Constant, program::Program, term::Term}; -use super::tag::{self, BUILTIN_TAG_WIDTH, TERM_TAG_WIDTH}; +use super::tag; pub fn encode<'a, V>(program: &'a Program<'a, V>) -> Result, FlatEncodeError> where @@ -74,11 +74,15 @@ where encoder.list_with(fields, |e, t| encode_term(e, t))?; } - Term::Constant(c) => todo!(), + Term::Constant(c) => { + encode_term_tag(encoder, tag::CONSTANT)?; + + encode_constant(encoder, c)?; + } Term::Builtin(b) => { encode_term_tag(encoder, tag::BUILTIN)?; - encoder.bits(BUILTIN_TAG_WIDTH as i64, **b as u8); + encoder.bits(tag::BUILTIN_TAG_WIDTH as i64, **b as u8); } Term::Error => { encode_term_tag(encoder, tag::ERROR)?; @@ -88,8 +92,48 @@ where Ok(()) } +fn encode_constant<'a>(e: &mut Encoder, constant: &'a Constant<'a>) -> Result<(), FlatEncodeError> { + match constant { + Constant::Integer(i) => { + e.list_with(&[0], encode_constant_tag)?; + + e.integer(i); + } + Constant::ByteString(b) => { + e.list_with(&[1], encode_constant_tag)?; + + e.bytes(b)?; + } + Constant::String(s) => { + e.list_with(&[2], encode_constant_tag)?; + + e.utf8(s)?; + } + Constant::Unit => { + e.list_with(&[3], encode_constant_tag)?; + } + Constant::Boolean(b) => { + e.list_with(&[4], encode_constant_tag)?; + + e.bool(*b); + } + Constant::Data(_) => {} + Constant::ProtoList(_, _) => todo!(), + Constant::ProtoPair(_, _, _, _) => todo!(), + Constant::Bls12_381G1Element(_) => todo!(), + Constant::Bls12_381G2Element(_) => todo!(), + Constant::Bls12_381MlResult(_) => todo!(), + } + + Ok(()) +} + fn encode_term_tag(e: &mut Encoder, tag: u8) -> Result<(), FlatEncodeError> { - safe_encode_bits(e, TERM_TAG_WIDTH, tag) + safe_encode_bits(e, tag::TERM_TAG_WIDTH, tag) +} + +fn encode_constant_tag(e: &mut Encoder, tag: &u8) -> Result<(), FlatEncodeError> { + safe_encode_bits(e, tag::CONST_TAG_WIDTH, *tag) } fn safe_encode_bits(e: &mut Encoder, num_bits: usize, byte: u8) -> Result<(), FlatEncodeError> { diff --git a/crates/uplc/src/flat/zigzag.rs b/crates/uplc/src/flat/zigzag.rs index cdec9a9..fc6aa26 100644 --- a/crates/uplc/src/flat/zigzag.rs +++ b/crates/uplc/src/flat/zigzag.rs @@ -1,52 +1,35 @@ // #[cfg(feature = "num-bigint")] // use num_bigint::{BigInt, BigUint, ToBigInt}; +use crate::constant::Integer; + pub trait ZigZag { type Zag; fn zigzag(self) -> Self::Zag; + fn unzigzag(self) -> Self::Zag; } -// #[cfg(feature = "num-bigint")] -// impl ZigZag for BigInt { -// type Zag = BigUint; - -// fn zigzag(self) -> Self::Zag where { -// if self >= 0.into() { -// self << 1 -// } else { -// let double: BigInt = self << 1; -// -double - >::into(1) -// } -// .to_biguint() -// .expect("number is positive") -// } -// } - -impl ZigZag for i128 { - type Zag = usize; - - fn zigzag(self) -> Self::Zag where { - let bits = i128::BITS as i128; - let i = self; - ((i << 1) ^ (i >> (bits - 1))) as usize +impl ZigZag for &Integer { + type Zag = Integer; + + fn zigzag(self) -> Self::Zag { + if *self >= 0 { + // For non-negative numbers, just multiply by 2 (left shift by 1) + self.clone() << 1 + } else { + // For negative numbers: -(2 * n) - 1 + // First multiply by 2 + let double: Integer = self.clone() << 1; + + // Then negate and subtract 1 + -double - 1 + } } -} - -// #[cfg(feature = "num-bigint")] -// impl ZigZag for BigUint { -// type Zag = BigInt; - -// fn zigzag(self) -> Self::Zag where { -// let i = self.to_bigint().expect("always possible"); -// (i.clone() >> 1) ^ -(i & >::into(1)) -// } -// } -impl ZigZag for usize { - type Zag = i128; + fn unzigzag(self) -> Self::Zag { + let temp: Integer = self.clone() & 1; - fn zigzag(self) -> Self::Zag where { - ((self >> 1) as i128) ^ -((self & 1) as i128) + (self.clone() >> 1) ^ -(temp) } } diff --git a/crates/uplc/src/machine/env.rs b/crates/uplc/src/machine/env.rs index 3170373..953660e 100644 --- a/crates/uplc/src/machine/env.rs +++ b/crates/uplc/src/machine/env.rs @@ -25,7 +25,7 @@ where arena.alloc(Self(new_env)) } - pub fn lookup(&'a self, name: usize) -> Option<&&'a Value<'a, V>> { - self.0.get(self.0.len() - name) + pub fn lookup(&'a self, name: usize) -> Option<&'a Value<'a, V>> { + self.0.get(self.0.len() - name).copied() } }