Skip to content

Commit

Permalink
feat: more constant encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
rvcas committed Dec 31, 2024
1 parent 35bf6b1 commit 318b2b1
Show file tree
Hide file tree
Showing 8 changed files with 173 additions and 54 deletions.
2 changes: 1 addition & 1 deletion crates/uplc/src/binder/debruijn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ impl<'a> Binder<'a> for DeBruijn {
}
}

impl<'a> Eval<'a> for DeBruijn {
impl Eval<'_> for DeBruijn {
fn index(&self) -> usize {
self.0
}
Expand Down
4 changes: 2 additions & 2 deletions crates/uplc/src/constant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ impl<'a> Constant<'a> {
arena.alloc(Constant::Integer(i))
}

pub fn integer_from(arena: &'a Bump, i: i128) -> &'a Constant {
pub fn integer_from(arena: &'a Bump, i: i128) -> &'a Constant<'a> {
arena.alloc(Constant::Integer(integer_from(arena, i)))
}

Expand All @@ -57,7 +57,7 @@ impl<'a> Constant<'a> {
arena.alloc(Constant::Data(d))
}

pub fn unit(arena: &'a Bump) -> &'a Constant {
pub fn unit(arena: &'a Bump) -> &'a Constant<'a> {
arena.alloc(Constant::Unit)
}

Expand Down
44 changes: 40 additions & 4 deletions crates/uplc/src/flat/decode/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use bumpalo::{
Bump,
};

use crate::flat::zigzag::ZigZag;
use crate::{constant::Integer, flat::zigzag::ZigZag};

use super::FlatDecodeError;

Expand Down Expand Up @@ -169,7 +169,7 @@ impl<'b> Decoder<'b> {
Ok(b)
}

/// Decode an isize integer.
/// Decode an integer of an arbitrary size..
///
/// This is byte alignment agnostic.
/// First we decode the next 8 bits of the buffer.
Expand All @@ -180,8 +180,44 @@ impl<'b> Decoder<'b> {
/// so on. If the most significant bit was instead 0 we stop decoding
/// any more bits. Finally we use zigzag to convert the unsigned integer
/// back to a signed integer.
pub fn integer(&mut self) -> Result<i128, FlatDecodeError> {
Ok(self.word()?.zigzag())
pub fn integer(&mut self) -> Result<Integer, FlatDecodeError> {
Ok(ZigZag::unzigzag(&self.big_word()?))
}

/// Decode a word of 128 bits size.
/// This is byte alignment agnostic.
/// First we decode the next 8 bits of the buffer.
/// We take the 7 least significant bits as the 7 least significant bits of
/// the current unsigned integer. If the most significant bit of the 8
/// bits is 1 then we take the next 8 and repeat the process above,
/// filling in the next 7 least significant bits of the unsigned integer and
/// so on. If the most significant bit was instead 0 we stop decoding
/// any more bits.
pub fn big_word(&mut self) -> Result<Integer, FlatDecodeError> {
let mut leading_bit = 1;
let mut final_word = Integer::from(0);
let mut shift = 0_u32; // Using u32 for shift as it's more than enough for 128 bits

// Continue looping if lead bit is 1 (0x80) otherwise exit
while leading_bit > 0 {
let word8 = self.bits8(8)?;
let word7 = word8 & 0x7F; // 127, get 7 least significant bits

// Create temporary Integer from word7 and shift it
let part = Integer::from(word7);
let shifted_part = part << shift;

// OR it with our result
final_word |= shifted_part;

// Increment shift by 7 for next iteration
shift += 7;

// Check if we should continue (MSB set)
leading_bit = word8 & 0x80; // 128
}

Ok(final_word)
}

/// Decode a byte array.
Expand Down
4 changes: 2 additions & 2 deletions crates/uplc/src/flat/decode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,9 @@ fn decode_constant<'a>(

match &tags.as_slice() {
[0] => {
let v = d.integer()?;
let v = ctx.arena.alloc(d.integer()?);

Ok(Constant::integer_from(ctx.arena, v))
Ok(Constant::integer(ctx.arena, v))
}
[1] => {
let b = d.bytes(ctx.arena)?;
Expand Down
56 changes: 56 additions & 0 deletions crates/uplc/src/flat/encode/encoder.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use crate::{constant::Integer, flat::zigzag::ZigZag};

use super::FlatEncodeError;

#[derive(Default)]
Expand Down Expand Up @@ -34,6 +36,33 @@ impl Encoder {
self
}

/// Encode a `bool` value. This is byte alignment agnostic.
/// Uses the next unused bit in the current byte to encode this information.
/// One for true and Zero for false
pub fn bool(&mut self, x: bool) -> &mut Self {
if x {
self.one();
} else {
self.zero();
}

self
}

/// Encode an arbitrarily sized integer.
///
/// This is byte alignment agnostic.
/// First we use zigzag once to double the number and encode the negative
/// sign as the least significant bit. Next we encode the 7 least
/// significant bits of the unsigned integer. If the number is greater than
/// 127 we encode a leading 1 followed by repeating the encoding above for
/// the next 7 bits and so on.
pub fn integer(&mut self, i: &Integer) -> &mut Self {
self.big_word(i.zigzag());

self
}

/// Encodes up to 8 bits of information and is byte alignment agnostic.
/// Uses unused bits in the current byte to write out the passed in byte
/// value. Overflows to the most significant digits of the next byte if
Expand Down Expand Up @@ -114,6 +143,33 @@ impl Encoder {
Ok(self)
}

/// Encode a unsigned integer of 128 bits size.
/// This is byte alignment agnostic.
/// We encode the 7 least significant bits of the unsigned byte. If the char
/// value is greater than 127 we encode a leading 1 followed by
/// repeating the above for the next 7 bits and so on.
pub fn big_word(&mut self, c: Integer) -> &mut Self {
let mut d = c;

loop {
let temp: Integer = d.clone() % 128;
let mut w = temp.to_u8().unwrap();

d >>= 7;

if d != 0 {
w |= 128;
}
self.bits(8, w);

if d == 0 {
break;
}
}

self
}

/// Encode a string.
/// Convert to byte array and then use byte array encoding.
/// Uses filler to byte align the buffer, then writes byte array length up
Expand Down
54 changes: 49 additions & 5 deletions crates/uplc/src/flat/encode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ mod error;
pub use encoder::Encoder;
pub use error::FlatEncodeError;

use crate::{binder::Binder, program::Program, term::Term};
use crate::{binder::Binder, constant::Constant, program::Program, term::Term};

use super::tag::{self, BUILTIN_TAG_WIDTH, TERM_TAG_WIDTH};
use super::tag;

pub fn encode<'a, V>(program: &'a Program<'a, V>) -> Result<Vec<u8>, FlatEncodeError>
where
Expand Down Expand Up @@ -74,11 +74,15 @@ where

encoder.list_with(fields, |e, t| encode_term(e, t))?;
}
Term::Constant(c) => todo!(),
Term::Constant(c) => {
encode_term_tag(encoder, tag::CONSTANT)?;

encode_constant(encoder, c)?;
}
Term::Builtin(b) => {
encode_term_tag(encoder, tag::BUILTIN)?;

encoder.bits(BUILTIN_TAG_WIDTH as i64, **b as u8);
encoder.bits(tag::BUILTIN_TAG_WIDTH as i64, **b as u8);
}
Term::Error => {
encode_term_tag(encoder, tag::ERROR)?;
Expand All @@ -88,8 +92,48 @@ where
Ok(())
}

fn encode_constant<'a>(e: &mut Encoder, constant: &'a Constant<'a>) -> Result<(), FlatEncodeError> {
match constant {
Constant::Integer(i) => {
e.list_with(&[0], encode_constant_tag)?;

e.integer(i);
}
Constant::ByteString(b) => {
e.list_with(&[1], encode_constant_tag)?;

e.bytes(b)?;
}
Constant::String(s) => {
e.list_with(&[2], encode_constant_tag)?;

e.utf8(s)?;
}
Constant::Unit => {
e.list_with(&[3], encode_constant_tag)?;
}
Constant::Boolean(b) => {
e.list_with(&[4], encode_constant_tag)?;

e.bool(*b);
}
Constant::Data(_) => {}
Constant::ProtoList(_, _) => todo!(),
Constant::ProtoPair(_, _, _, _) => todo!(),
Constant::Bls12_381G1Element(_) => todo!(),
Constant::Bls12_381G2Element(_) => todo!(),
Constant::Bls12_381MlResult(_) => todo!(),
}

Ok(())
}

fn encode_term_tag(e: &mut Encoder, tag: u8) -> Result<(), FlatEncodeError> {
safe_encode_bits(e, TERM_TAG_WIDTH, tag)
safe_encode_bits(e, tag::TERM_TAG_WIDTH, tag)
}

fn encode_constant_tag(e: &mut Encoder, tag: &u8) -> Result<(), FlatEncodeError> {
safe_encode_bits(e, tag::CONST_TAG_WIDTH, *tag)
}

fn safe_encode_bits(e: &mut Encoder, num_bits: usize, byte: u8) -> Result<(), FlatEncodeError> {
Expand Down
59 changes: 21 additions & 38 deletions crates/uplc/src/flat/zigzag.rs
Original file line number Diff line number Diff line change
@@ -1,52 +1,35 @@
// #[cfg(feature = "num-bigint")]
// use num_bigint::{BigInt, BigUint, ToBigInt};

use crate::constant::Integer;

pub trait ZigZag {
type Zag;

fn zigzag(self) -> Self::Zag;
fn unzigzag(self) -> Self::Zag;
}

// #[cfg(feature = "num-bigint")]
// impl ZigZag for BigInt {
// type Zag = BigUint;

// fn zigzag(self) -> Self::Zag where {
// if self >= 0.into() {
// self << 1
// } else {
// let double: BigInt = self << 1;
// -double - <u8 as Into<BigInt>>::into(1)
// }
// .to_biguint()
// .expect("number is positive")
// }
// }

impl ZigZag for i128 {
type Zag = usize;

fn zigzag(self) -> Self::Zag where {
let bits = i128::BITS as i128;
let i = self;
((i << 1) ^ (i >> (bits - 1))) as usize
impl ZigZag for &Integer {
type Zag = Integer;

fn zigzag(self) -> Self::Zag {
if *self >= 0 {
// For non-negative numbers, just multiply by 2 (left shift by 1)
self.clone() << 1
} else {
// For negative numbers: -(2 * n) - 1
// First multiply by 2
let double: Integer = self.clone() << 1;

// Then negate and subtract 1
-double - 1
}
}
}

// #[cfg(feature = "num-bigint")]
// impl ZigZag for BigUint {
// type Zag = BigInt;

// fn zigzag(self) -> Self::Zag where {
// let i = self.to_bigint().expect("always possible");
// (i.clone() >> 1) ^ -(i & <u8 as Into<BigInt>>::into(1))
// }
// }

impl ZigZag for usize {
type Zag = i128;
fn unzigzag(self) -> Self::Zag {
let temp: Integer = self.clone() & 1;

fn zigzag(self) -> Self::Zag where {
((self >> 1) as i128) ^ -((self & 1) as i128)
(self.clone() >> 1) ^ -(temp)
}
}
4 changes: 2 additions & 2 deletions crates/uplc/src/machine/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ where
arena.alloc(Self(new_env))
}

pub fn lookup(&'a self, name: usize) -> Option<&&'a Value<'a, V>> {
self.0.get(self.0.len() - name)
pub fn lookup(&'a self, name: usize) -> Option<&'a Value<'a, V>> {
self.0.get(self.0.len() - name).copied()
}
}

0 comments on commit 318b2b1

Please sign in to comment.