diff --git a/evm/spec/tables.tex b/evm/spec/tables.tex
index 92ee1d2a54..43b45eb584 100644
--- a/evm/spec/tables.tex
+++ b/evm/spec/tables.tex
@@ -3,6 +3,7 @@ \section{Tables}
 
 \input{tables/cpu}
 \input{tables/arithmetic}
+\input{tables/byte-packing}
 \input{tables/logic}
 \input{tables/memory}
 \input{tables/keccak-f}
diff --git a/evm/spec/tables/byte-packing.tex b/evm/spec/tables/byte-packing.tex
new file mode 100644
index 0000000000..4b78eea118
--- /dev/null
+++ b/evm/spec/tables/byte-packing.tex
@@ -0,0 +1,4 @@
+\subsection{Byte Packing}
+\label{byte-packing}
+
+TODO
diff --git a/evm/spec/zkevm.pdf b/evm/spec/zkevm.pdf
index f181eba624..b6f3d90599 100644
Binary files a/evm/spec/zkevm.pdf and b/evm/spec/zkevm.pdf differ
diff --git a/evm/src/all_stark.rs b/evm/src/all_stark.rs
index 079ff114c4..50d3268883 100644
--- a/evm/src/all_stark.rs
+++ b/evm/src/all_stark.rs
@@ -23,6 +23,7 @@ use crate::memory::memory_stark;
 use crate::memory::memory_stark::MemoryStark;
 use crate::stark::Stark;
 
+/// Structure containing all STARKs and the cross-table lookups.
 #[derive(Clone)]
 pub struct AllStark<F: RichField + Extendable<D>, const D: usize> {
     pub arithmetic_stark: ArithmeticStark<F, D>,
@@ -36,6 +37,7 @@ pub struct AllStark<F: RichField + Extendable<D>, const D: usize> {
 }
 
 impl<F: RichField + Extendable<D>, const D: usize> Default for AllStark<F, D> {
+    /// Returns an `AllStark` containing all the STARKs initialized with default values.
     fn default() -> Self {
         Self {
             arithmetic_stark: ArithmeticStark::default(),
@@ -64,6 +66,7 @@ impl<F: RichField + Extendable<D>, const D: usize> AllStark<F, D> {
     }
 }
 
+/// Associates STARK tables with a unique index.
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 pub enum Table {
     Arithmetic = 0,
@@ -75,9 +78,11 @@ pub enum Table {
     Memory = 6,
 }
 
+/// Number of STARK tables.
 pub(crate) const NUM_TABLES: usize = Table::Memory as usize + 1;
 
 impl Table {
+    /// Returns all STARK table indices.
     pub(crate) fn all() -> [Self; NUM_TABLES] {
         [
             Self::Arithmetic,
@@ -91,6 +96,7 @@ impl Table {
     }
 }
 
+/// Returns all the `CrossTableLookups` used for proving the EVM.
 pub(crate) fn all_cross_table_lookups<F: Field>() -> Vec<CrossTableLookup<F>> {
     vec![
         ctl_arithmetic(),
@@ -103,6 +109,7 @@ pub(crate) fn all_cross_table_lookups<F: Field>() -> Vec<CrossTableLookup<F>> {
     ]
 }
 
+/// `CrossTableLookup` for `ArithmeticStark`, to connect it with the `Cpu` module.
 fn ctl_arithmetic<F: Field>() -> CrossTableLookup<F> {
     CrossTableLookup::new(
         vec![cpu_stark::ctl_arithmetic_base_rows()],
@@ -110,6 +117,7 @@ fn ctl_arithmetic<F: Field>() -> CrossTableLookup<F> {
     )
 }
 
+/// `CrossTableLookup` for `BytePackingStark`, to connect it with the `Cpu` module.
 fn ctl_byte_packing<F: Field>() -> CrossTableLookup<F> {
     let cpu_packing_looking = TableWithColumns::new(
         Table::Cpu,
@@ -132,9 +140,9 @@ fn ctl_byte_packing<F: Field>() -> CrossTableLookup<F> {
     )
 }
 
-// We now need two different looked tables for `KeccakStark`:
-// one for the inputs and one for the outputs.
-// They are linked with the timestamp.
+/// `CrossTableLookup` for `KeccakStark` inputs, to connect it with the `KeccakSponge` module.
+/// `KeccakStarkSponge` looks into `KeccakStark` to give the inputs of the sponge.
+/// Its consistency with the 'output' CTL is ensured through a timestamp column on the `KeccakStark` side.
 fn ctl_keccak_inputs<F: Field>() -> CrossTableLookup<F> {
     let keccak_sponge_looking = TableWithColumns::new(
         Table::KeccakSponge,
@@ -149,6 +157,8 @@ fn ctl_keccak_inputs<F: Field>() -> CrossTableLookup<F> {
     CrossTableLookup::new(vec![keccak_sponge_looking], keccak_looked)
 }
 
+/// `CrossTableLookup` for `KeccakStark` outputs, to connect it with the `KeccakSponge` module.
+/// `KeccakStarkSponge` looks into `KeccakStark` to give the outputs of the sponge.
 fn ctl_keccak_outputs<F: Field>() -> CrossTableLookup<F> {
     let keccak_sponge_looking = TableWithColumns::new(
         Table::KeccakSponge,
@@ -163,6 +173,7 @@ fn ctl_keccak_outputs<F: Field>() -> CrossTableLookup<F> {
     CrossTableLookup::new(vec![keccak_sponge_looking], keccak_looked)
 }
 
+/// `CrossTableLookup` for `KeccakSpongeStark` to connect it with the `Cpu` module.
 fn ctl_keccak_sponge<F: Field>() -> CrossTableLookup<F> {
     let cpu_looking = TableWithColumns::new(
         Table::Cpu,
@@ -177,6 +188,7 @@ fn ctl_keccak_sponge<F: Field>() -> CrossTableLookup<F> {
     CrossTableLookup::new(vec![cpu_looking], keccak_sponge_looked)
 }
 
+/// `CrossTableLookup` for `LogicStark` to connect it with the `Cpu` and `KeccakSponge` modules.
 fn ctl_logic<F: Field>() -> CrossTableLookup<F> {
     let cpu_looking = TableWithColumns::new(
         Table::Cpu,
@@ -197,6 +209,7 @@ fn ctl_logic<F: Field>() -> CrossTableLookup<F> {
     CrossTableLookup::new(all_lookers, logic_looked)
 }
 
+/// `CrossTableLookup` for `MemoryStark` to connect it with all the modules which need memory accesses.
 fn ctl_memory<F: Field>() -> CrossTableLookup<F> {
     let cpu_memory_code_read = TableWithColumns::new(
         Table::Cpu,
diff --git a/evm/src/arithmetic/addcy.rs b/evm/src/arithmetic/addcy.rs
index 3366e432ae..4f343b45d5 100644
--- a/evm/src/arithmetic/addcy.rs
+++ b/evm/src/arithmetic/addcy.rs
@@ -149,7 +149,7 @@ pub(crate) fn eval_packed_generic_addcy<P: PackedField>(
     }
 }
 
-pub fn eval_packed_generic<P: PackedField>(
+pub(crate) fn eval_packed_generic<P: PackedField>(
     lv: &[P; NUM_ARITH_COLUMNS],
     yield_constr: &mut ConstraintConsumer<P>,
 ) {
@@ -236,7 +236,7 @@ pub(crate) fn eval_ext_circuit_addcy<F: RichField + Extendable<D>, const D: usiz
     }
 }
 
-pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
+pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut CircuitBuilder<F, D>,
     lv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
     yield_constr: &mut RecursiveConstraintConsumer<F, D>,
diff --git a/evm/src/arithmetic/arithmetic_stark.rs b/evm/src/arithmetic/arithmetic_stark.rs
index 3d281c868c..21dcf91985 100644
--- a/evm/src/arithmetic/arithmetic_stark.rs
+++ b/evm/src/arithmetic/arithmetic_stark.rs
@@ -22,8 +22,8 @@ use crate::evaluation_frame::{StarkEvaluationFrame, StarkFrame};
 use crate::lookup::Lookup;
 use crate::stark::Stark;
 
-/// Link the 16-bit columns of the arithmetic table, split into groups
-/// of N_LIMBS at a time in `regs`, with the corresponding 32-bit
+/// Creates a vector of `Columns` to link the 16-bit columns of the arithmetic table,
+/// split into groups of N_LIMBS at a time in `regs`, with the corresponding 32-bit
 /// columns of the CPU table. Does this for all ops in `ops`.
 ///
 /// This is done by taking pairs of columns (x, y) of the arithmetic
@@ -57,7 +57,8 @@ fn cpu_arith_data_link<F: Field>(
     res
 }
 
-pub fn ctl_arithmetic_rows<F: Field>() -> TableWithColumns<F> {
+/// Returns the `TableWithColumns` for `ArithmeticStark` rows where one of the arithmetic operations has been called.
+pub(crate) fn ctl_arithmetic_rows<F: Field>() -> TableWithColumns<F> {
     // We scale each filter flag with the associated opcode value.
     // If an arithmetic operation is happening on the CPU side,
     // the CTL will enforce that the reconstructed opcode value
@@ -102,6 +103,7 @@ pub fn ctl_arithmetic_rows<F: Field>() -> TableWithColumns<F> {
     )
 }
 
+/// Structure representing the `Arithmetic` STARK, which carries out all the arithmetic operations.
 #[derive(Copy, Clone, Default)]
 pub struct ArithmeticStark<F, const D: usize> {
     pub f: PhantomData<F>,
@@ -204,11 +206,17 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for ArithmeticSta
         let range_max = P::Scalar::from_canonical_u64((RANGE_MAX - 1) as u64);
         yield_constr.constraint_last_row(rc1 - range_max);
 
+        // Evaluate constraints for the MUL operation.
         mul::eval_packed_generic(lv, yield_constr);
+        // Evaluate constraints for ADD, SUB, LT and GT operations.
         addcy::eval_packed_generic(lv, yield_constr);
+        // Evaluate constraints for DIV and MOD operations.
         divmod::eval_packed(lv, nv, yield_constr);
+        // Evaluate constraints for ADDMOD, SUBMOD, MULMOD and for FP254 modular operations.
         modular::eval_packed(lv, nv, yield_constr);
+        // Evaluate constraints for the BYTE operation.
         byte::eval_packed(lv, yield_constr);
+        // Evaluate constraints for SHL and SHR operations.
         shift::eval_packed_generic(lv, nv, yield_constr);
     }
 
@@ -223,6 +231,9 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for ArithmeticSta
         let nv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS] =
             vars.get_next_values().try_into().unwrap();
 
+        // Check the range column: First value must be 0, last row
+        // must be 2^16-1, and intermediate rows must increment by 0
+        // or 1.
         let rc1 = lv[columns::RANGE_COUNTER];
         let rc2 = nv[columns::RANGE_COUNTER];
         yield_constr.constraint_first_row(builder, rc1);
@@ -234,11 +245,17 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for ArithmeticSta
         let t = builder.sub_extension(rc1, range_max);
         yield_constr.constraint_last_row(builder, t);
 
+        // Evaluate constraints for the MUL operation.
         mul::eval_ext_circuit(builder, lv, yield_constr);
+        // Evaluate constraints for ADD, SUB, LT and GT operations.
         addcy::eval_ext_circuit(builder, lv, yield_constr);
+        // Evaluate constraints for DIV and MOD operations.
         divmod::eval_ext_circuit(builder, lv, nv, yield_constr);
+        // Evaluate constraints for ADDMOD, SUBMOD, MULMOD and for FP254 modular operations.
         modular::eval_ext_circuit(builder, lv, nv, yield_constr);
+        // Evaluate constraints for the BYTE operation.
         byte::eval_ext_circuit(builder, lv, yield_constr);
+        // Evaluate constraints for SHL and SHR operations.
         shift::eval_ext_circuit(builder, lv, nv, yield_constr);
     }
 
diff --git a/evm/src/arithmetic/byte.rs b/evm/src/arithmetic/byte.rs
index bb8cd12122..b7381ae0fa 100644
--- a/evm/src/arithmetic/byte.rs
+++ b/evm/src/arithmetic/byte.rs
@@ -197,7 +197,7 @@ pub(crate) fn generate<F: PrimeField64>(lv: &mut [F], idx: U256, val: U256) {
     );
 }
 
-pub fn eval_packed<P: PackedField>(
+pub(crate) fn eval_packed<P: PackedField>(
     lv: &[P; NUM_ARITH_COLUMNS],
     yield_constr: &mut ConstraintConsumer<P>,
 ) {
@@ -293,7 +293,7 @@ pub fn eval_packed<P: PackedField>(
     }
 }
 
-pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
+pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut CircuitBuilder<F, D>,
     lv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
     yield_constr: &mut RecursiveConstraintConsumer<F, D>,
@@ -306,6 +306,7 @@ pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     let idx_decomp = &lv[AUX_INPUT_REGISTER_0];
     let tree = &lv[AUX_INPUT_REGISTER_1];
 
+    // low 5 bits of the first limb of idx:
     let mut idx0_lo5 = builder.zero_extension();
     for i in 0..5 {
         let bit = idx_decomp[i];
@@ -316,6 +317,9 @@ pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
         let scale = builder.constant_extension(scale);
         idx0_lo5 = builder.mul_add_extension(bit, scale, idx0_lo5);
     }
+    // Verify that idx0_hi is the high (11) bits of the first limb of
+    // idx (in particular idx0_hi is at most 11 bits, since idx[0] is
+    // at most 16 bits).
     let t = F::Extension::from(F::from_canonical_u64(32));
     let t = builder.constant_extension(t);
     let t = builder.mul_add_extension(idx_decomp[5], t, idx0_lo5);
@@ -323,6 +327,9 @@ pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     let t = builder.mul_extension(is_byte, t);
     yield_constr.constraint(builder, t);
 
+    // Verify the layers of the tree
+    // NB: Each of the bit values is negated in place to account for
+    // the reversed indexing.
     let one = builder.one_extension();
     let bit = idx_decomp[4];
     for i in 0..8 {
@@ -362,6 +369,8 @@ pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     let t = builder.mul_extension(is_byte, t);
     yield_constr.constraint(builder, t);
 
+    // Check byte decomposition of last limb:
+
     let base8 = F::Extension::from(F::from_canonical_u64(1 << 8));
     let base8 = builder.constant_extension(base8);
     let lo_byte = lv[BYTE_LAST_LIMB_LO];
@@ -380,19 +389,29 @@ pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     yield_constr.constraint(builder, t);
     let expected_out_byte = tree[15];
 
+    // Sum all higher limbs; sum will be non-zero iff idx >= 32.
     let mut hi_limb_sum = lv[BYTE_IDX_DECOMP_HI];
     for i in 1..N_LIMBS {
         hi_limb_sum = builder.add_extension(hi_limb_sum, idx[i]);
     }
+    // idx_is_large is 0 or 1
     let idx_is_large = lv[BYTE_IDX_IS_LARGE];
     let t = builder.mul_sub_extension(idx_is_large, idx_is_large, idx_is_large);
     let t = builder.mul_extension(is_byte, t);
     yield_constr.constraint(builder, t);
 
+    // If hi_limb_sum is nonzero, then idx_is_large must be one.
     let t = builder.sub_extension(idx_is_large, one);
     let t = builder.mul_many_extension([is_byte, hi_limb_sum, t]);
     yield_constr.constraint(builder, t);
 
+    // If idx_is_large is 1, then hi_limb_sum_inv must be the inverse
+    // of hi_limb_sum, hence hi_limb_sum is non-zero, hence idx is
+    // indeed "large".
+    //
+    // Otherwise, if idx_is_large is 0, then hi_limb_sum * hi_limb_sum_inv
+    // is zero, which is only possible if hi_limb_sum is zero, since
+    // hi_limb_sum_inv is non-zero.
     let base16 = F::from_canonical_u64(1 << 16);
     let hi_limb_sum_inv = builder.mul_const_add_extension(
         base16,
@@ -414,6 +433,7 @@ pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     let t = builder.mul_extension(is_byte, check);
     yield_constr.constraint(builder, t);
 
+    // Check that the rest of the output limbs are zero
     for i in 1..N_LIMBS {
         let t = builder.mul_extension(is_byte, out[i]);
         yield_constr.constraint(builder, t);
diff --git a/evm/src/arithmetic/columns.rs b/evm/src/arithmetic/columns.rs
index df2d12476b..aa36b3ab71 100644
--- a/evm/src/arithmetic/columns.rs
+++ b/evm/src/arithmetic/columns.rs
@@ -109,4 +109,5 @@ pub(crate) const RANGE_COUNTER: usize = START_SHARED_COLS + NUM_SHARED_COLS;
 /// The frequencies column used in logUp.
 pub(crate) const RC_FREQUENCIES: usize = RANGE_COUNTER + 1;
 
+/// Number of columns in `ArithmeticStark`.
 pub const NUM_ARITH_COLUMNS: usize = START_SHARED_COLS + NUM_SHARED_COLS + 2;
diff --git a/evm/src/arithmetic/divmod.rs b/evm/src/arithmetic/divmod.rs
index e143ded6dd..b284c829c5 100644
--- a/evm/src/arithmetic/divmod.rs
+++ b/evm/src/arithmetic/divmod.rs
@@ -1,3 +1,7 @@
+//! Support for EVM instructions DIV and MOD.
+//!
+//! The logic for verifying them is detailed in the `modular` submodule.
+
 use std::ops::Range;
 
 use ethereum_types::U256;
diff --git a/evm/src/arithmetic/mod.rs b/evm/src/arithmetic/mod.rs
index 7763e98a06..2699ee51c4 100644
--- a/evm/src/arithmetic/mod.rs
+++ b/evm/src/arithmetic/mod.rs
@@ -15,6 +15,9 @@ mod utils;
 pub mod arithmetic_stark;
 pub(crate) mod columns;
 
+/// An enum representing different binary operations.
+///
+/// `Shl` and `Shr` are handled differently, by leveraging `Mul` and `Div` respectively.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub(crate) enum BinaryOperator {
     Add,
@@ -33,6 +36,7 @@ pub(crate) enum BinaryOperator {
 }
 
 impl BinaryOperator {
+    /// Computes the result of a binary arithmetic operation given two inputs.
     pub(crate) fn result(&self, input0: U256, input1: U256) -> U256 {
         match self {
             BinaryOperator::Add => input0.overflowing_add(input1).0,
@@ -81,6 +85,7 @@ impl BinaryOperator {
         }
     }
 
+    /// Maps a binary arithmetic operation to its associated flag column in the trace.
     pub(crate) fn row_filter(&self) -> usize {
         match self {
             BinaryOperator::Add => columns::IS_ADD,
@@ -100,6 +105,7 @@ impl BinaryOperator {
     }
 }
 
+/// An enum representing different ternary operations.
 #[allow(clippy::enum_variant_names)]
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub(crate) enum TernaryOperator {
@@ -109,6 +115,7 @@ pub(crate) enum TernaryOperator {
 }
 
 impl TernaryOperator {
+    /// Computes the result of a ternary arithmetic operation given three inputs.
     pub(crate) fn result(&self, input0: U256, input1: U256, input2: U256) -> U256 {
         match self {
             TernaryOperator::AddMod => addmod(input0, input1, input2),
@@ -117,6 +124,7 @@ impl TernaryOperator {
         }
     }
 
+    /// Maps a ternary arithmetic operation to its associated flag column in the trace.
     pub(crate) fn row_filter(&self) -> usize {
         match self {
             TernaryOperator::AddMod => columns::IS_ADDMOD,
@@ -145,7 +153,7 @@ pub(crate) enum Operation {
 }
 
 impl Operation {
-    /// Create a binary operator with given inputs.
+    /// Creates a binary operator with given inputs.
     ///
     /// NB: This works as you would expect, EXCEPT for SHL and SHR,
     /// whose inputs need a small amount of preprocessing. Specifically,
@@ -170,6 +178,7 @@ impl Operation {
         }
     }
 
+    /// Creates a ternary operator with given inputs.
     pub(crate) fn ternary(
         operator: TernaryOperator,
         input0: U256,
@@ -186,6 +195,7 @@ impl Operation {
         }
     }
 
+    /// Gets the result of an arithmetic operation.
     pub(crate) fn result(&self) -> U256 {
         match self {
             Operation::BinaryOperation { result, .. } => *result,
@@ -222,6 +232,7 @@ impl Operation {
     }
 }
 
+/// Converts a ternary arithmetic operation to one or two rows of the `ArithmeticStark` table.
 fn ternary_op_to_rows<F: PrimeField64>(
     row_filter: usize,
     input0: U256,
@@ -239,6 +250,7 @@ fn ternary_op_to_rows<F: PrimeField64>(
     (row1, Some(row2))
 }
 
+/// Converts a binary arithmetic operation to one or two rows of the `ArithmeticStark` table.
 fn binary_op_to_rows<F: PrimeField64>(
     op: BinaryOperator,
     input0: U256,
diff --git a/evm/src/arithmetic/modular.rs b/evm/src/arithmetic/modular.rs
index 4e6e21a632..05f9cf0da7 100644
--- a/evm/src/arithmetic/modular.rs
+++ b/evm/src/arithmetic/modular.rs
@@ -1,5 +1,5 @@
-//! Support for the EVM modular instructions ADDMOD, MULMOD and MOD,
-//! as well as DIV.
+//! Support for the EVM modular instructions ADDMOD, SUBMOD, MULMOD and MOD,
+//! as well as DIV and FP254 related modular instructions.
 //!
 //! This crate verifies an EVM modular instruction, which takes three
 //! 256-bit inputs A, B and M, and produces a 256-bit output C satisfying
@@ -478,7 +478,7 @@ pub(crate) fn modular_constr_poly<P: PackedField>(
     let base = P::Scalar::from_canonical_u64(1 << LIMB_BITS);
     let offset = P::Scalar::from_canonical_u64(AUX_COEFF_ABS_MAX as u64);
 
-    // constr_poly = c(x) + q(x) * m(x) + (x - β) * s(x)
+    // constr_poly = c(x) + q(x) * m(x) + (x - β) * s(x)c
     let mut aux = [P::ZEROS; 2 * N_LIMBS];
     for (c, i) in aux.iter_mut().zip(MODULAR_AUX_INPUT_LO) {
         // MODULAR_AUX_INPUT elements were offset by 2^20 in
@@ -625,10 +625,13 @@ pub(crate) fn modular_constr_poly_ext_circuit<F: RichField + Extendable<D>, cons
 ) -> [ExtensionTarget<D>; 2 * N_LIMBS] {
     let mod_is_zero = nv[MODULAR_MOD_IS_ZERO];
 
+    // Check that mod_is_zero is zero or one
     let t = builder.mul_sub_extension(mod_is_zero, mod_is_zero, mod_is_zero);
     let t = builder.mul_extension(filter, t);
     yield_constr.constraint_transition(builder, t);
 
+    // Check that mod_is_zero is zero if modulus is not zero (they
+    // could both be zero)
     let limb_sum = builder.add_many_extension(modulus);
     let t = builder.mul_extension(limb_sum, mod_is_zero);
     let t = builder.mul_extension(filter, t);
@@ -636,13 +639,19 @@ pub(crate) fn modular_constr_poly_ext_circuit<F: RichField + Extendable<D>, cons
 
     modulus[0] = builder.add_extension(modulus[0], mod_is_zero);
 
+    // Is 1 iff the operation is DIV or SHR and the denominator is zero.
     let div_denom_is_zero = nv[MODULAR_DIV_DENOM_IS_ZERO];
     let div_shr_filter = builder.add_extension(lv[IS_DIV], lv[IS_SHR]);
     let t = builder.mul_sub_extension(mod_is_zero, div_shr_filter, div_denom_is_zero);
     let t = builder.mul_extension(filter, t);
     yield_constr.constraint_transition(builder, t);
+
+    // Needed to compensate for adding mod_is_zero to modulus above,
+    // since the call eval_packed_generic_addcy() below subtracts modulus
+    // to verify in the case of a DIV or SHR.
     output[0] = builder.add_extension(output[0], div_denom_is_zero);
 
+    // Verify that the output is reduced, i.e. output < modulus.
     let out_aux_red = &nv[MODULAR_OUT_AUX_RED];
     let one = builder.one_extension();
     let zero = builder.zero_extension();
@@ -660,24 +669,31 @@ pub(crate) fn modular_constr_poly_ext_circuit<F: RichField + Extendable<D>, cons
         &is_less_than,
         true,
     );
+    // restore output[0]
     output[0] = builder.sub_extension(output[0], div_denom_is_zero);
 
+    // prod = q(x) * m(x)
     let prod = pol_mul_wide2_ext_circuit(builder, quot, modulus);
+    // higher order terms must be zero
     for &x in prod[2 * N_LIMBS..].iter() {
         let t = builder.mul_extension(filter, x);
         yield_constr.constraint_transition(builder, t);
     }
 
+    // constr_poly = c(x) + q(x) * m(x)
     let mut constr_poly: [_; 2 * N_LIMBS] = prod[0..2 * N_LIMBS].try_into().unwrap();
     pol_add_assign_ext_circuit(builder, &mut constr_poly, &output);
 
     let offset =
         builder.constant_extension(F::Extension::from_canonical_u64(AUX_COEFF_ABS_MAX as u64));
     let zero = builder.zero_extension();
+
+    // constr_poly = c(x) + q(x) * m(x)
     let mut aux = [zero; 2 * N_LIMBS];
     for (c, i) in aux.iter_mut().zip(MODULAR_AUX_INPUT_LO) {
         *c = builder.sub_extension(nv[i], offset);
     }
+    // add high 16-bits of aux input
     let base = F::from_canonical_u64(1u64 << LIMB_BITS);
     for (c, j) in aux.iter_mut().zip(MODULAR_AUX_INPUT_HI) {
         *c = builder.mul_const_add_extension(base, nv[j], *c);
@@ -700,10 +716,13 @@ pub(crate) fn submod_constr_poly_ext_circuit<F: RichField + Extendable<D>, const
     modulus: [ExtensionTarget<D>; N_LIMBS],
     mut quot: [ExtensionTarget<D>; 2 * N_LIMBS],
 ) -> [ExtensionTarget<D>; 2 * N_LIMBS] {
+    // quot was offset by 2^16 - 1 if it was negative; we undo that
+    // offset here:
     let (lo, hi) = quot.split_at_mut(N_LIMBS);
     let sign = hi[0];
     let t = builder.mul_sub_extension(sign, sign, sign);
     let t = builder.mul_extension(filter, t);
+    // sign must be 1 (negative) or 0 (positive)
     yield_constr.constraint(builder, t);
     let offset = F::from_canonical_u16(u16::max_value());
     for c in lo {
@@ -712,6 +731,7 @@ pub(crate) fn submod_constr_poly_ext_circuit<F: RichField + Extendable<D>, const
     }
     hi[0] = builder.zero_extension();
     for d in hi {
+        // All higher limbs must be zero
         let t = builder.mul_extension(filter, *d);
         yield_constr.constraint(builder, t);
     }
@@ -737,8 +757,12 @@ pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
         bn254_filter,
     ]);
 
+    // Ensure that this operation is not the last row of the table;
+    // needed because we access the next row of the table in nv.
     yield_constr.constraint_last_row(builder, filter);
 
+    // Verify that the modulus is the BN254 modulus for the
+    // {ADD,MUL,SUB}FP254 operations.
     let modulus = read_value::<N_LIMBS, _>(lv, MODULAR_MODULUS);
     for (&mi, bi) in modulus.iter().zip(bn254_modulus_limbs()) {
         // bn254_filter * (mi - bi)
@@ -760,6 +784,7 @@ pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     let mul_filter = builder.add_extension(lv[columns::IS_MULMOD], lv[columns::IS_MULFP254]);
     let addmul_filter = builder.add_extension(add_filter, mul_filter);
 
+    // constr_poly has 2*N_LIMBS limbs
     let submod_constr_poly = submod_constr_poly_ext_circuit(
         lv,
         nv,
diff --git a/evm/src/arithmetic/mul.rs b/evm/src/arithmetic/mul.rs
index c09c39d8dc..01c9d5c1c0 100644
--- a/evm/src/arithmetic/mul.rs
+++ b/evm/src/arithmetic/mul.rs
@@ -107,7 +107,7 @@ pub(crate) fn generate_mul<F: PrimeField64>(lv: &mut [F], left_in: [i64; 16], ri
         .copy_from_slice(&aux_limbs.map(|c| F::from_canonical_u16((c >> 16) as u16)));
 }
 
-pub fn generate<F: PrimeField64>(lv: &mut [F], left_in: U256, right_in: U256) {
+pub(crate) fn generate<F: PrimeField64>(lv: &mut [F], left_in: U256, right_in: U256) {
     // TODO: It would probably be clearer/cleaner to read the U256
     // into an [i64;N] and then copy that to the lv table.
     u256_to_array(&mut lv[INPUT_REGISTER_0], left_in);
@@ -173,7 +173,7 @@ pub(crate) fn eval_packed_generic_mul<P: PackedField>(
     }
 }
 
-pub fn eval_packed_generic<P: PackedField>(
+pub(crate) fn eval_packed_generic<P: PackedField>(
     lv: &[P; NUM_ARITH_COLUMNS],
     yield_constr: &mut ConstraintConsumer<P>,
 ) {
@@ -195,6 +195,8 @@ pub(crate) fn eval_ext_mul_circuit<F: RichField + Extendable<D>, const D: usize>
     let output_limbs = read_value::<N_LIMBS, _>(lv, OUTPUT_REGISTER);
 
     let aux_limbs = {
+        // MUL_AUX_INPUT was offset by 2^20 in generation, so we undo
+        // that here
         let base = builder.constant_extension(F::Extension::from_canonical_u64(1 << LIMB_BITS));
         let offset =
             builder.constant_extension(F::Extension::from_canonical_u64(AUX_COEFF_ABS_MAX as u64));
@@ -211,17 +213,22 @@ pub(crate) fn eval_ext_mul_circuit<F: RichField + Extendable<D>, const D: usize>
     let mut constr_poly = pol_mul_lo_ext_circuit(builder, left_in_limbs, right_in_limbs);
     pol_sub_assign_ext_circuit(builder, &mut constr_poly, &output_limbs);
 
+    // This subtracts (x - β) * s(x) from constr_poly.
     let base = builder.constant_extension(F::Extension::from_canonical_u64(1 << LIMB_BITS));
     let rhs = pol_adjoin_root_ext_circuit(builder, aux_limbs, base);
     pol_sub_assign_ext_circuit(builder, &mut constr_poly, &rhs);
 
+    // At this point constr_poly holds the coefficients of the
+    // polynomial a(x)b(x) - c(x) - (x - β)*s(x). The
+    // multiplication is valid if and only if all of those
+    // coefficients are zero.
     for &c in &constr_poly {
         let filter = builder.mul_extension(filter, c);
         yield_constr.constraint(builder, filter);
     }
 }
 
-pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
+pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut CircuitBuilder<F, D>,
     lv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
     yield_constr: &mut RecursiveConstraintConsumer<F, D>,
diff --git a/evm/src/arithmetic/shift.rs b/evm/src/arithmetic/shift.rs
index 6600c01e54..bb83798495 100644
--- a/evm/src/arithmetic/shift.rs
+++ b/evm/src/arithmetic/shift.rs
@@ -38,7 +38,7 @@ use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer
 /// NB: if `shift >= 256`, then the third register holds 0.
 /// We leverage the functions in mul.rs and divmod.rs to carry out
 /// the computation.
-pub fn generate<F: PrimeField64>(
+pub(crate) fn generate<F: PrimeField64>(
     lv: &mut [F],
     nv: &mut [F],
     is_shl: bool,
@@ -117,7 +117,7 @@ fn eval_packed_shr<P: PackedField>(
     );
 }
 
-pub fn eval_packed_generic<P: PackedField>(
+pub(crate) fn eval_packed_generic<P: PackedField>(
     lv: &[P; NUM_ARITH_COLUMNS],
     nv: &[P; NUM_ARITH_COLUMNS],
     yield_constr: &mut ConstraintConsumer<P>,
@@ -168,7 +168,7 @@ fn eval_ext_circuit_shr<F: RichField + Extendable<D>, const D: usize>(
     );
 }
 
-pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
+pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut CircuitBuilder<F, D>,
     lv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
     nv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
diff --git a/evm/src/arithmetic/utils.rs b/evm/src/arithmetic/utils.rs
index 6ea375fef3..7fadb8f14d 100644
--- a/evm/src/arithmetic/utils.rs
+++ b/evm/src/arithmetic/utils.rs
@@ -319,6 +319,7 @@ pub(crate) fn read_value_i64_limbs<const N: usize, F: PrimeField64>(
 }
 
 #[inline]
+/// Turn a 64-bit integer into 4 16-bit limbs and convert them to field elements.
 fn u64_to_array<F: Field>(out: &mut [F], x: u64) {
     const_assert!(LIMB_BITS == 16);
     debug_assert!(out.len() == 4);
@@ -329,6 +330,7 @@ fn u64_to_array<F: Field>(out: &mut [F], x: u64) {
     out[3] = F::from_canonical_u16((x >> 48) as u16);
 }
 
+/// Turn a 256-bit integer into 16 16-bit limbs and convert them to field elements.
 // TODO: Refactor/replace u256_limbs in evm/src/util.rs
 pub(crate) fn u256_to_array<F: Field>(out: &mut [F], x: U256) {
     const_assert!(N_LIMBS == 16);
diff --git a/evm/src/byte_packing/byte_packing_stark.rs b/evm/src/byte_packing/byte_packing_stark.rs
index c28b055a81..ad485c591e 100644
--- a/evm/src/byte_packing/byte_packing_stark.rs
+++ b/evm/src/byte_packing/byte_packing_stark.rs
@@ -59,6 +59,8 @@ use crate::witness::memory::MemoryAddress;
 /// Strict upper bound for the individual bytes range-check.
 const BYTE_RANGE_MAX: usize = 1usize << 8;
 
+/// Creates the vector of `Columns` for `BytePackingStark` corresponding to the final packed limbs being read/written.
+/// `CpuStark` will look into these columns, as the CPU needs the output of byte packing.
 pub(crate) fn ctl_looked_data<F: Field>() -> Vec<Column<F>> {
     // Reconstruct the u32 limbs composing the final `U256` word
     // being read/written from the underlying byte values. For each,
@@ -88,12 +90,14 @@ pub(crate) fn ctl_looked_data<F: Field>() -> Vec<Column<F>> {
         .collect()
 }
 
+/// CTL filter for the `BytePackingStark` looked table.
 pub fn ctl_looked_filter<F: Field>() -> Column<F> {
     // The CPU table is only interested in our sequence end rows,
     // since those contain the final limbs of our packed int.
     Column::single(SEQUENCE_END)
 }
 
+/// Column linear combination for the `BytePackingStark` table reading/writing the `i`th byte sequence from `MemoryStark`.
 pub(crate) fn ctl_looking_memory<F: Field>(i: usize) -> Vec<Column<F>> {
     let mut res =
         Column::singles([IS_READ, ADDR_CONTEXT, ADDR_SEGMENT, ADDR_VIRTUAL]).collect_vec();
@@ -212,6 +216,8 @@ impl<F: RichField + Extendable<D>, const D: usize> BytePackingStark<F, D> {
             row[index_bytes(i)] = F::ONE;
 
             rows.push(row);
+
+            // Update those fields for the next row
             row[index_bytes(i)] = F::ZERO;
             row[ADDR_VIRTUAL] -= F::ONE;
         }
diff --git a/evm/src/byte_packing/columns.rs b/evm/src/byte_packing/columns.rs
index 4eff0df8f5..16bc4be5fa 100644
--- a/evm/src/byte_packing/columns.rs
+++ b/evm/src/byte_packing/columns.rs
@@ -11,6 +11,8 @@ pub(crate) const IS_READ: usize = 0;
 pub(crate) const SEQUENCE_END: usize = IS_READ + 1;
 
 pub(super) const BYTES_INDICES_START: usize = SEQUENCE_END + 1;
+// There are `NUM_BYTES` columns used to represent the index of the active byte
+// for a given row of a byte (un)packing operation.
 pub(crate) const fn index_bytes(i: usize) -> usize {
     debug_assert!(i < NUM_BYTES);
     BYTES_INDICES_START + i
@@ -28,6 +30,9 @@ pub(crate) const TIMESTAMP: usize = ADDR_VIRTUAL + 1;
 
 // 32 byte limbs hold a total of 256 bits.
 const BYTES_VALUES_START: usize = TIMESTAMP + 1;
+// There are `NUM_BYTES` columns used to store the values of the bytes
+// that are beeing read/written for an (un)packing operation.
+// If `index_bytes(i) == 1`, then all `value_bytes(j) for j <= i` may be non-zero.
 pub(crate) const fn value_bytes(i: usize) -> usize {
     debug_assert!(i < NUM_BYTES);
     BYTES_VALUES_START + i
@@ -38,4 +43,5 @@ pub(crate) const RANGE_COUNTER: usize = BYTES_VALUES_START + NUM_BYTES;
 /// The frequencies column used in logUp.
 pub(crate) const RC_FREQUENCIES: usize = RANGE_COUNTER + 1;
 
+/// Number of columns in `BytePackingStark`.
 pub(crate) const NUM_COLUMNS: usize = RANGE_COUNTER + 2;
diff --git a/evm/src/byte_packing/mod.rs b/evm/src/byte_packing/mod.rs
index 7cc93374ca..3767b21ed6 100644
--- a/evm/src/byte_packing/mod.rs
+++ b/evm/src/byte_packing/mod.rs
@@ -6,4 +6,5 @@
 pub mod byte_packing_stark;
 pub mod columns;
 
+/// Maximum number of bytes being processed by a byte (un)packing operation.
 pub(crate) const NUM_BYTES: usize = 32;
diff --git a/evm/src/cpu/bootstrap_kernel.rs b/evm/src/cpu/bootstrap_kernel.rs
index 839fdb2367..b04ff379dd 100644
--- a/evm/src/cpu/bootstrap_kernel.rs
+++ b/evm/src/cpu/bootstrap_kernel.rs
@@ -19,6 +19,7 @@ use crate::memory::segments::Segment;
 use crate::witness::memory::MemoryAddress;
 use crate::witness::util::{keccak_sponge_log, mem_write_gp_log_and_fill};
 
+/// Generates the rows to bootstrap the kernel.
 pub(crate) fn generate_bootstrap_kernel<F: Field>(state: &mut GenerationState<F>) {
     // Iterate through chunks of the code, such that we can write one chunk to memory per row.
     for chunk in &KERNEL.code.iter().enumerate().chunks(NUM_GP_CHANNELS) {
@@ -64,6 +65,7 @@ pub(crate) fn generate_bootstrap_kernel<F: Field>(state: &mut GenerationState<F>
     log::info!("Bootstrapping took {} cycles", state.traces.clock());
 }
 
+/// Evaluates the constraints for kernel bootstrapping.
 pub(crate) fn eval_bootstrap_kernel_packed<F: Field, P: PackedField<Scalar = F>>(
     local_values: &CpuColumnsView<P>,
     next_values: &CpuColumnsView<P>,
@@ -107,6 +109,8 @@ pub(crate) fn eval_bootstrap_kernel_packed<F: Field, P: PackedField<Scalar = F>>
     }
 }
 
+/// Circuit version of `eval_bootstrap_kernel_packed`.
+/// Evaluates the constraints for kernel bootstrapping.
 pub(crate) fn eval_bootstrap_kernel_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut CircuitBuilder<F, D>,
     local_values: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/columns/general.rs b/evm/src/cpu/columns/general.rs
index d4f3447380..cfc24f51c6 100644
--- a/evm/src/cpu/columns/general.rs
+++ b/evm/src/cpu/columns/general.rs
@@ -14,52 +14,62 @@ pub(crate) union CpuGeneralColumnsView<T: Copy> {
 }
 
 impl<T: Copy> CpuGeneralColumnsView<T> {
-    // SAFETY: Each view is a valid interpretation of the underlying array.
+    /// View of the columns used for exceptions: they are the exception code bits.
+    /// SAFETY: Each view is a valid interpretation of the underlying array.
     pub(crate) fn exception(&self) -> &CpuExceptionView<T> {
         unsafe { &self.exception }
     }
 
-    // SAFETY: Each view is a valid interpretation of the underlying array.
+    /// Mutable view of the column required for exceptions: they are the exception code bits.
+    /// SAFETY: Each view is a valid interpretation of the underlying array.
     pub(crate) fn exception_mut(&mut self) -> &mut CpuExceptionView<T> {
         unsafe { &mut self.exception }
     }
 
-    // SAFETY: Each view is a valid interpretation of the underlying array.
+    /// View of the columns required for logic operations.
+    /// SAFETY: Each view is a valid interpretation of the underlying array.
     pub(crate) fn logic(&self) -> &CpuLogicView<T> {
         unsafe { &self.logic }
     }
 
-    // SAFETY: Each view is a valid interpretation of the underlying array.
+    /// Mutable view of the columns required for logic operations.
+    /// SAFETY: Each view is a valid interpretation of the underlying array.
     pub(crate) fn logic_mut(&mut self) -> &mut CpuLogicView<T> {
         unsafe { &mut self.logic }
     }
 
-    // SAFETY: Each view is a valid interpretation of the underlying array.
+    /// View of the columns required for jump operations.
+    /// SAFETY: Each view is a valid interpretation of the underlying array.
     pub(crate) fn jumps(&self) -> &CpuJumpsView<T> {
         unsafe { &self.jumps }
     }
 
-    // SAFETY: Each view is a valid interpretation of the underlying array.
+    /// Mutable view of the columns required for jump operations.
+    /// SAFETY: Each view is a valid interpretation of the underlying array.
     pub(crate) fn jumps_mut(&mut self) -> &mut CpuJumpsView<T> {
         unsafe { &mut self.jumps }
     }
 
-    // SAFETY: Each view is a valid interpretation of the underlying array.
+    /// View of the columns required for shift operations.
+    /// SAFETY: Each view is a valid interpretation of the underlying array.
     pub(crate) fn shift(&self) -> &CpuShiftView<T> {
         unsafe { &self.shift }
     }
 
-    // SAFETY: Each view is a valid interpretation of the underlying array.
+    /// Mutable view of the columns required for shift operations.
+    /// SAFETY: Each view is a valid interpretation of the underlying array.
     pub(crate) fn shift_mut(&mut self) -> &mut CpuShiftView<T> {
         unsafe { &mut self.shift }
     }
 
-    // SAFETY: Each view is a valid interpretation of the underlying array.
+    /// View of the columns required for the stack top.
+    /// SAFETY: Each view is a valid interpretation of the underlying array.
     pub(crate) fn stack(&self) -> &CpuStackView<T> {
         unsafe { &self.stack }
     }
 
-    // SAFETY: Each view is a valid interpretation of the underlying array.
+    /// Mutable view of the columns required for the stack top.
+    /// SAFETY: Each view is a valid interpretation of the underlying array.
     pub(crate) fn stack_mut(&mut self) -> &mut CpuStackView<T> {
         unsafe { &mut self.stack }
     }
@@ -94,41 +104,51 @@ impl<T: Copy> BorrowMut<[T; NUM_SHARED_COLUMNS]> for CpuGeneralColumnsView<T> {
     }
 }
 
+/// View of the first three `CpuGeneralColumns` containing exception code bits.
 #[derive(Copy, Clone)]
 pub(crate) struct CpuExceptionView<T: Copy> {
-    // Exception code as little-endian bits.
+    /// Exception code as little-endian bits.
     pub(crate) exc_code_bits: [T; 3],
 }
 
+/// View of the `CpuGeneralColumns` storing pseudo-inverses used to prove logic operations.
 #[derive(Copy, Clone)]
 pub(crate) struct CpuLogicView<T: Copy> {
-    // Pseudoinverse of `(input0 - input1)`. Used prove that they are unequal. Assumes 32-bit limbs.
+    /// Pseudoinverse of `(input0 - input1)`. Used prove that they are unequal. Assumes 32-bit limbs.
     pub(crate) diff_pinv: [T; 8],
 }
 
+/// View of the first two `CpuGeneralColumns` storing a flag and a pseudoinverse used to prove jumps.
 #[derive(Copy, Clone)]
 pub(crate) struct CpuJumpsView<T: Copy> {
-    // A flag.
+    /// A flag indicating whether a jump should occur.
     pub(crate) should_jump: T,
-    // Pseudoinverse of `cond.iter().sum()`. Used to check `should_jump`.
+    /// Pseudoinverse of `cond.iter().sum()`. Used to check `should_jump`.
     pub(crate) cond_sum_pinv: T,
 }
 
+/// View of the first `CpuGeneralColumns` storing a pseudoinverse used to prove shift operations.
 #[derive(Copy, Clone)]
 pub(crate) struct CpuShiftView<T: Copy> {
-    // For a shift amount of displacement: [T], this is the inverse of
-    // sum(displacement[1..]) or zero if the sum is zero.
+    /// For a shift amount of displacement: [T], this is the inverse of
+    /// sum(displacement[1..]) or zero if the sum is zero.
     pub(crate) high_limb_sum_inv: T,
 }
 
+/// View of the last three `CpuGeneralColumns` storing the stack length pseudoinverse `stack_inv`,
+/// stack_len * stack_inv and filter * stack_inv_aux when needed.
 #[derive(Copy, Clone)]
 pub(crate) struct CpuStackView<T: Copy> {
     // Used for conditionally enabling and disabling channels when reading the next `stack_top`.
     _unused: [T; 5],
+    /// Pseudoinverse of the stack len.
     pub(crate) stack_inv: T,
+    /// stack_inv * stack_len.
     pub(crate) stack_inv_aux: T,
+    /// Holds filter * stack_inv_aux when necessary, to reduce the degree of stack constraints.
     pub(crate) stack_inv_aux_2: T,
 }
 
-// `u8` is guaranteed to have a `size_of` of 1.
+/// Number of columns shared by all the views of `CpuGeneralColumnsView`.
+/// `u8` is guaranteed to have a `size_of` of 1.
 pub const NUM_SHARED_COLUMNS: usize = size_of::<CpuGeneralColumnsView<u8>>();
diff --git a/evm/src/cpu/columns/mod.rs b/evm/src/cpu/columns/mod.rs
index b7b4f780e0..b7970c08ca 100644
--- a/evm/src/cpu/columns/mod.rs
+++ b/evm/src/cpu/columns/mod.rs
@@ -12,23 +12,32 @@ use crate::memory;
 use crate::util::{indices_arr, transmute_no_compile_time_size_checks};
 
 mod general;
+/// Cpu operation flags.
 pub(crate) mod ops;
 
+/// 32-bit limbs of the value stored in the current memory channel.
 pub type MemValue<T> = [T; memory::VALUE_LIMBS];
 
+/// View of the columns required for one memory channel.
 #[repr(C)]
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub struct MemoryChannelView<T: Copy> {
     /// 1 if this row includes a memory operation in the `i`th channel of the memory bus, otherwise
     /// 0.
     pub used: T,
+    /// 1 if a read is performed on the `i`th channel of the memory bus, otherwise 0.
     pub is_read: T,
+    /// Context of the memory operation in the `i`th channel of the memory bus.
     pub addr_context: T,
+    /// Segment of the memory operation in the `ith` channel of the memory bus.
     pub addr_segment: T,
+    /// Virtual address of the memory operation in the `ith` channel of the memory bus.
     pub addr_virtual: T,
+    /// Value, subdivided into 32-bit limbs, stored in the `ith` channel of the memory bus.
     pub value: MemValue<T>,
 }
 
+/// View of all the columns in `CpuStark`.
 #[repr(C)]
 #[derive(Clone, Copy, Eq, PartialEq, Debug)]
 pub struct CpuColumnsView<T: Copy> {
@@ -68,13 +77,18 @@ pub struct CpuColumnsView<T: Copy> {
     /// Filter. 1 iff a Keccak sponge lookup is performed on this row.
     pub is_keccak_sponge: T,
 
+    /// Columns shared by various operations.
     pub(crate) general: CpuGeneralColumnsView<T>,
 
+    /// CPU clock.
     pub(crate) clock: T,
+
+    /// Memory bus channels in the CPU. Each channel is comprised of 13 columns.
     pub mem_channels: [MemoryChannelView<T>; NUM_GP_CHANNELS],
 }
 
-// `u8` is guaranteed to have a `size_of` of 1.
+/// Total number of columns in `CpuStark`.
+/// `u8` is guaranteed to have a `size_of` of 1.
 pub const NUM_CPU_COLUMNS: usize = size_of::<CpuColumnsView<u8>>();
 
 impl<F: Field> Default for CpuColumnsView<F> {
@@ -146,4 +160,5 @@ const fn make_col_map() -> CpuColumnsView<usize> {
     unsafe { transmute::<[usize; NUM_CPU_COLUMNS], CpuColumnsView<usize>>(indices_arr) }
 }
 
+/// Mapping between [0..NUM_CPU_COLUMNS-1] and the CPU columns.
 pub const COL_MAP: CpuColumnsView<usize> = make_col_map();
diff --git a/evm/src/cpu/columns/ops.rs b/evm/src/cpu/columns/ops.rs
index 1df3a86599..c9039526e4 100644
--- a/evm/src/cpu/columns/ops.rs
+++ b/evm/src/cpu/columns/ops.rs
@@ -4,35 +4,59 @@ use std::ops::{Deref, DerefMut};
 
 use crate::util::transmute_no_compile_time_size_checks;
 
+/// Structure representing the flags for the various opcodes.
 #[repr(C)]
 #[derive(Clone, Copy, Eq, PartialEq, Debug)]
 pub struct OpsColumnsView<T: Copy> {
-    pub binary_op: T,  // Combines ADD, MUL, SUB, DIV, MOD, LT, GT and BYTE flags.
-    pub ternary_op: T, // Combines ADDMOD, MULMOD and SUBMOD flags.
-    pub fp254_op: T,   // Combines ADD_FP254, MUL_FP254 and SUB_FP254 flags.
-    pub eq_iszero: T,  // Combines EQ and ISZERO flags.
-    pub logic_op: T,   // Combines AND, OR and XOR flags.
-    pub not_pop: T,    // Combines NOT and POP flags.
-    pub shift: T,      // Combines SHL and SHR flags.
+    /// Combines ADD, MUL, SUB, DIV, MOD, LT, GT and BYTE flags.
+    pub binary_op: T,
+    /// Combines ADDMOD, MULMOD and SUBMOD flags.
+    pub ternary_op: T,
+    /// Combines ADD_FP254, MUL_FP254 and SUB_FP254 flags.
+    pub fp254_op: T,
+    /// Combines EQ and ISZERO flags.
+    pub eq_iszero: T,
+    /// Combines AND, OR and XOR flags.
+    pub logic_op: T,
+    /// Combines NOT and POP flags.
+    pub not_pop: T,
+    /// Combines SHL and SHR flags.
+    pub shift: T,
+    /// Flag for KECCAK_GENERAL.
     pub keccak_general: T,
+    /// Flag for PROVER_INPUT.
     pub prover_input: T,
-    pub jumps: T, // Combines JUMP and JUMPI flags.
+    /// Combines JUMP and JUMPI flags.
+    pub jumps: T,
+    /// Flag for PC.
     pub pc: T,
+    /// Flag for JUMPDEST.
     pub jumpdest: T,
+    /// Flag for PUSH0.
     pub push0: T,
+    /// Flag for PUSH.
     pub push: T,
-    pub dup_swap: T,   // Combines DUP and SWAP flags.
-    pub context_op: T, // Combines GET_CONTEXT and SET_CONTEXT flags.
+    /// Combines DUP and SWAP flags.
+    pub dup_swap: T,
+    /// Combines GET_CONTEXT and SET_CONTEXT flags.
+    pub context_op: T,
+    /// Flag for MSTORE_32BYTES.
     pub mstore_32bytes: T,
+    /// Flag for MLOAD_32BYTES.
     pub mload_32bytes: T,
+    /// Flag for EXIT_KERNEL.
     pub exit_kernel: T,
+    /// Combines MSTORE_GENERAL and MLOAD_GENERAL flags.
     pub m_op_general: T,
 
+    /// Flag for syscalls.
     pub syscall: T,
+    /// Flag for exceptions.
     pub exception: T,
 }
 
-// `u8` is guaranteed to have a `size_of` of 1.
+/// Number of columns in Cpu Stark.
+/// `u8` is guaranteed to have a `size_of` of 1.
 pub const NUM_OPS_COLUMNS: usize = size_of::<OpsColumnsView<u8>>();
 
 impl<T: Copy> From<[T; NUM_OPS_COLUMNS]> for OpsColumnsView<T> {
diff --git a/evm/src/cpu/contextops.rs b/evm/src/cpu/contextops.rs
index edc07e7a29..99367b6253 100644
--- a/evm/src/cpu/contextops.rs
+++ b/evm/src/cpu/contextops.rs
@@ -11,6 +11,7 @@ use crate::cpu::columns::CpuColumnsView;
 use crate::cpu::kernel::constants::context_metadata::ContextMetadata;
 use crate::memory::segments::Segment;
 
+/// Evaluates constraints for GET_CONTEXT.
 fn eval_packed_get<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -37,6 +38,8 @@ fn eval_packed_get<P: PackedField>(
     yield_constr.constraint(filter * nv.mem_channels[0].used);
 }
 
+/// Circuit version of `eval_packed_get`.
+/// Evalutes constraints for GET_CONTEXT.
 fn eval_ext_circuit_get<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
@@ -79,6 +82,7 @@ fn eval_ext_circuit_get<F: RichField + Extendable<D>, const D: usize>(
     }
 }
 
+/// Evaluates constraints for `SET_CONTEXT`.
 fn eval_packed_set<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -139,6 +143,8 @@ fn eval_packed_set<P: PackedField>(
     yield_constr.constraint(filter * new_top_channel.used);
 }
 
+/// Circuit version of `eval_packed_set`.
+/// Evaluates constraints for SET_CONTEXT.
 fn eval_ext_circuit_set<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
@@ -265,6 +271,7 @@ fn eval_ext_circuit_set<F: RichField + Extendable<D>, const D: usize>(
     }
 }
 
+/// Evaluates the constraints for the GET and SET opcodes.
 pub fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -301,6 +308,8 @@ pub fn eval_packed<P: PackedField>(
     yield_constr.constraint(new_filter * (channel.addr_virtual - addr_virtual));
 }
 
+/// Circuit version of èval_packed`.
+/// Evaluates the constraints for the GET and SET opcodes.
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/control_flow.rs b/evm/src/cpu/control_flow.rs
index bf70449454..d050aa1626 100644
--- a/evm/src/cpu/control_flow.rs
+++ b/evm/src/cpu/control_flow.rs
@@ -31,17 +31,20 @@ const NATIVE_INSTRUCTIONS: [usize; 15] = [
     // not exceptions (also jump)
 ];
 
+/// Returns `halt`'s program counter.
 pub(crate) fn get_halt_pc<F: Field>() -> F {
     let halt_pc = KERNEL.global_labels["halt"];
     F::from_canonical_usize(halt_pc)
 }
 
+/// Returns `main`'s program counter.
 pub(crate) fn get_start_pc<F: Field>() -> F {
     let start_pc = KERNEL.global_labels["main"];
 
     F::from_canonical_usize(start_pc)
 }
 
+/// Evaluates the constraints related to the flow of instructions.
 pub fn eval_packed_generic<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -80,6 +83,8 @@ pub fn eval_packed_generic<P: PackedField>(
     yield_constr.constraint_transition(is_last_noncpu_cycle * nv.stack_len);
 }
 
+/// Circuit version of `eval_packed`.
+/// Evaluates the constraints related to the flow of instructions.
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/cpu_stark.rs b/evm/src/cpu/cpu_stark.rs
index e75744f92c..926cd7485c 100644
--- a/evm/src/cpu/cpu_stark.rs
+++ b/evm/src/cpu/cpu_stark.rs
@@ -24,6 +24,8 @@ use crate::memory::segments::Segment;
 use crate::memory::{NUM_CHANNELS, VALUE_LIMBS};
 use crate::stark::Stark;
 
+/// Creates the vector of `Columns` corresponding to the General Purpose channels when calling the Keccak sponge:
+/// the CPU reads the output of the sponge directly from the `KeccakSpongeStark` table.
 pub fn ctl_data_keccak_sponge<F: Field>() -> Vec<Column<F>> {
     // When executing KECCAK_GENERAL, the GP memory channels are used as follows:
     // GP channel 0: stack[-1] = context
@@ -44,11 +46,12 @@ pub fn ctl_data_keccak_sponge<F: Field>() -> Vec<Column<F>> {
     cols
 }
 
+/// CTL filter for a call to the Keccak sponge.
 pub fn ctl_filter_keccak_sponge<F: Field>() -> Column<F> {
     Column::single(COL_MAP.is_keccak_sponge)
 }
 
-/// Create the vector of Columns corresponding to the two inputs and
+/// Creates the vector of `Columns` corresponding to the two inputs and
 /// one output of a binary operation.
 fn ctl_data_binops<F: Field>() -> Vec<Column<F>> {
     let mut res = Column::singles(COL_MAP.mem_channels[0].value).collect_vec();
@@ -57,7 +60,7 @@ fn ctl_data_binops<F: Field>() -> Vec<Column<F>> {
     res
 }
 
-/// Create the vector of Columns corresponding to the three inputs and
+/// Creates the vector of `Columns` corresponding to the three inputs and
 /// one output of a ternary operation. By default, ternary operations use
 /// the first three memory channels, and the last one for the result (binary
 /// operations do not use the third inputs).
@@ -69,6 +72,7 @@ fn ctl_data_ternops<F: Field>() -> Vec<Column<F>> {
     res
 }
 
+/// Creates the vector of columns corresponding to the opcode, the two inputs and the output of the logic operation.
 pub fn ctl_data_logic<F: Field>() -> Vec<Column<F>> {
     // Instead of taking single columns, we reconstruct the entire opcode value directly.
     let mut res = vec![Column::le_bits(COL_MAP.opcode_bits)];
@@ -76,10 +80,12 @@ pub fn ctl_data_logic<F: Field>() -> Vec<Column<F>> {
     res
 }
 
+/// CTL filter for logic operations.
 pub fn ctl_filter_logic<F: Field>() -> Column<F> {
     Column::single(COL_MAP.op.logic_op)
 }
 
+/// Returns the `TableWithColumns` for the CPU rows calling arithmetic operations.
 pub fn ctl_arithmetic_base_rows<F: Field>() -> TableWithColumns<F> {
     // Instead of taking single columns, we reconstruct the entire opcode value directly.
     let mut columns = vec![Column::le_bits(COL_MAP.opcode_bits)];
@@ -101,14 +107,18 @@ pub fn ctl_arithmetic_base_rows<F: Field>() -> TableWithColumns<F> {
     )
 }
 
+/// Creates the vector of `Columns` corresponding to the contents of General Purpose channels when calling byte packing.
+/// We use `ctl_data_keccak_sponge` because the `Columns` are the same as the ones computed for `KeccakSpongeStark`.
 pub fn ctl_data_byte_packing<F: Field>() -> Vec<Column<F>> {
     ctl_data_keccak_sponge()
 }
 
+/// CTL filter for the `MLOAD_32BYTES` operation.
 pub fn ctl_filter_byte_packing<F: Field>() -> Column<F> {
     Column::single(COL_MAP.op.mload_32bytes)
 }
 
+/// Creates the vector of `Columns` corresponding to the contents of General Purpose channels when calling byte unpacking.
 pub fn ctl_data_byte_unpacking<F: Field>() -> Vec<Column<F>> {
     // When executing MSTORE_32BYTES, the GP memory channels are used as follows:
     // GP channel 0: stack[-1] = context
@@ -131,11 +141,14 @@ pub fn ctl_data_byte_unpacking<F: Field>() -> Vec<Column<F>> {
     res
 }
 
+/// CTL filter for the `MSTORE_32BYTES` operation.
 pub fn ctl_filter_byte_unpacking<F: Field>() -> Column<F> {
     Column::single(COL_MAP.op.mstore_32bytes)
 }
 
+/// Index of the memory channel storing code.
 pub const MEM_CODE_CHANNEL_IDX: usize = 0;
+/// Index of the first general purpose memory channel.
 pub const MEM_GP_CHANNELS_IDX_START: usize = MEM_CODE_CHANNEL_IDX + 1;
 
 /// Make the time/channel column for memory lookups.
@@ -145,6 +158,7 @@ fn mem_time_and_channel<F: Field>(channel: usize) -> Column<F> {
     Column::linear_combination_with_constant([(COL_MAP.clock, scalar)], addend)
 }
 
+/// Creates the vector of `Columns` corresponding to the contents of the code channel when reading code values.
 pub fn ctl_data_code_memory<F: Field>() -> Vec<Column<F>> {
     let mut cols = vec![
         Column::constant(F::ONE),                                      // is_read
@@ -164,6 +178,7 @@ pub fn ctl_data_code_memory<F: Field>() -> Vec<Column<F>> {
     cols
 }
 
+/// Creates the vector of `Columns` corresponding to the contents of General Purpose channels.
 pub fn ctl_data_gp_memory<F: Field>(channel: usize) -> Vec<Column<F>> {
     let channel_map = COL_MAP.mem_channels[channel];
     let mut cols: Vec<_> = Column::singles([
@@ -181,14 +196,17 @@ pub fn ctl_data_gp_memory<F: Field>(channel: usize) -> Vec<Column<F>> {
     cols
 }
 
+/// CTL filter for code read and write operations.
 pub fn ctl_filter_code_memory<F: Field>() -> Column<F> {
     Column::sum(COL_MAP.op.iter())
 }
 
+/// CTL filter for General Purpose memory read and write operations.
 pub fn ctl_filter_gp_memory<F: Field>(channel: usize) -> Column<F> {
     Column::single(COL_MAP.mem_channels[channel].used)
 }
 
+/// Structure representing the CPU Stark.
 #[derive(Copy, Clone, Default)]
 pub struct CpuStark<F, const D: usize> {
     pub f: PhantomData<F>,
@@ -202,6 +220,7 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for CpuStark<F, D
 
     type EvaluationFrameTarget = StarkFrame<ExtensionTarget<D>, NUM_CPU_COLUMNS>;
 
+    /// Evaluates all CPU constraints.
     fn eval_packed_generic<FE, P, const D2: usize>(
         &self,
         vars: &Self::EvaluationFrame<FE, P, D2>,
@@ -235,6 +254,8 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for CpuStark<F, D
         syscalls_exceptions::eval_packed(local_values, next_values, yield_constr);
     }
 
+    /// Circuit version of `eval_packed_generic`.
+    /// Evaluates all CPU constraints.
     fn eval_ext_circuit(
         &self,
         builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
diff --git a/evm/src/cpu/decode.rs b/evm/src/cpu/decode.rs
index 2fb1082ff0..9a0df4fd2a 100644
--- a/evm/src/cpu/decode.rs
+++ b/evm/src/cpu/decode.rs
@@ -74,6 +74,7 @@ const fn bits_from_opcode(opcode: u8) -> [bool; 8] {
     ]
 }
 
+/// Evaluates the constraints for opcode decoding.
 pub fn eval_packed_generic<P: PackedField>(
     lv: &CpuColumnsView<P>,
     yield_constr: &mut ConstraintConsumer<P>,
@@ -156,6 +157,8 @@ pub fn eval_packed_generic<P: PackedField>(
     yield_constr.constraint(not_pop_op);
 }
 
+/// Circuit version of `eval_packed_generic`.
+/// Evaluates the constraints for opcode decoding.
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/dup_swap.rs b/evm/src/cpu/dup_swap.rs
index 94169d173f..fd623c7ee6 100644
--- a/evm/src/cpu/dup_swap.rs
+++ b/evm/src/cpu/dup_swap.rs
@@ -101,6 +101,7 @@ fn constrain_channel_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
         );
         yield_constr.constraint(builder, constr);
     }
+    // Top of the stack is at `addr = lv.stack_len - 1`.
     {
         let constr = builder.add_extension(channel.addr_virtual, offset);
         let constr = builder.sub_extension(constr, lv.stack_len);
@@ -109,6 +110,7 @@ fn constrain_channel_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     }
 }
 
+/// Evaluates constraints for DUP.
 fn eval_packed_dup<P: PackedField>(
     n: P,
     lv: &CpuColumnsView<P>,
@@ -121,10 +123,14 @@ fn eval_packed_dup<P: PackedField>(
     let write_channel = &lv.mem_channels[1];
     let read_channel = &lv.mem_channels[2];
 
+    // Constrain the input and top of the stack channels to have the same value.
     channels_equal_packed(filter, write_channel, &lv.mem_channels[0], yield_constr);
+    // Constrain the output channel's addresses, `is_read` and `used` fields.
     constrain_channel_packed(false, filter, P::ZEROS, write_channel, lv, yield_constr);
 
+    // Constrain the output and top of the stack channels to have the same value.
     channels_equal_packed(filter, read_channel, &nv.mem_channels[0], yield_constr);
+    // Constrain the input channel's addresses, `is_read` and `used` fields.
     constrain_channel_packed(true, filter, n, read_channel, lv, yield_constr);
 
     // Constrain nv.stack_len.
@@ -139,6 +145,8 @@ fn eval_packed_dup<P: PackedField>(
     }
 }
 
+/// Circuit version of `eval_packed_dup`.
+/// Evaluates constraints for DUP.
 fn eval_ext_circuit_dup<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut CircuitBuilder<F, D>,
     n: ExtensionTarget<D>,
@@ -155,6 +163,7 @@ fn eval_ext_circuit_dup<F: RichField + Extendable<D>, const D: usize>(
     let write_channel = &lv.mem_channels[1];
     let read_channel = &lv.mem_channels[2];
 
+    // Constrain the input and top of the stack channels to have the same value.
     channels_equal_ext_circuit(
         builder,
         filter,
@@ -162,6 +171,7 @@ fn eval_ext_circuit_dup<F: RichField + Extendable<D>, const D: usize>(
         &lv.mem_channels[0],
         yield_constr,
     );
+    // Constrain the output channel's addresses, `is_read` and `used` fields.
     constrain_channel_ext_circuit(
         builder,
         false,
@@ -172,6 +182,7 @@ fn eval_ext_circuit_dup<F: RichField + Extendable<D>, const D: usize>(
         yield_constr,
     );
 
+    // Constrain the output and top of the stack channels to have the same value.
     channels_equal_ext_circuit(
         builder,
         filter,
@@ -179,6 +190,7 @@ fn eval_ext_circuit_dup<F: RichField + Extendable<D>, const D: usize>(
         &nv.mem_channels[0],
         yield_constr,
     );
+    // Constrain the input channel's addresses, `is_read` and `used` fields.
     constrain_channel_ext_circuit(builder, true, filter, n, read_channel, lv, yield_constr);
 
     // Constrain nv.stack_len.
@@ -201,6 +213,7 @@ fn eval_ext_circuit_dup<F: RichField + Extendable<D>, const D: usize>(
     }
 }
 
+/// Evaluates constraints for SWAP.
 fn eval_packed_swap<P: PackedField>(
     n: P,
     lv: &CpuColumnsView<P>,
@@ -216,10 +229,15 @@ fn eval_packed_swap<P: PackedField>(
     let in2_channel = &lv.mem_channels[1];
     let out_channel = &lv.mem_channels[2];
 
+    // Constrain the first input channel value to be equal to the output channel value.
     channels_equal_packed(filter, in1_channel, out_channel, yield_constr);
+    // We set `is_read`, `used` and the address for the first input. The first input is
+    // read from the top of the stack, and is therefore not a memory read.
     constrain_channel_packed(false, filter, n_plus_one, out_channel, lv, yield_constr);
 
+    // Constrain the second input channel value to be equal to the new top of the stack.
     channels_equal_packed(filter, in2_channel, &nv.mem_channels[0], yield_constr);
+    // We set `is_read`, `used` and the address for the second input.
     constrain_channel_packed(true, filter, n_plus_one, in2_channel, lv, yield_constr);
 
     // Constrain nv.stack_len.
@@ -234,6 +252,8 @@ fn eval_packed_swap<P: PackedField>(
     }
 }
 
+/// Circuit version of `eval_packed_swap`.
+/// Evaluates constraints for SWAP.
 fn eval_ext_circuit_swap<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut CircuitBuilder<F, D>,
     n: ExtensionTarget<D>,
@@ -251,7 +271,10 @@ fn eval_ext_circuit_swap<F: RichField + Extendable<D>, const D: usize>(
     let in2_channel = &lv.mem_channels[1];
     let out_channel = &lv.mem_channels[2];
 
+    // Constrain the first input channel value to be equal to the output channel value.
     channels_equal_ext_circuit(builder, filter, in1_channel, out_channel, yield_constr);
+    // We set `is_read`, `used` and the address for the first input. The first input is
+    // read from the top of the stack, and is therefore not a memory read.
     constrain_channel_ext_circuit(
         builder,
         false,
@@ -262,6 +285,7 @@ fn eval_ext_circuit_swap<F: RichField + Extendable<D>, const D: usize>(
         yield_constr,
     );
 
+    // Constrain the second input channel value to be equal to the new top of the stack.
     channels_equal_ext_circuit(
         builder,
         filter,
@@ -269,6 +293,7 @@ fn eval_ext_circuit_swap<F: RichField + Extendable<D>, const D: usize>(
         &nv.mem_channels[0],
         yield_constr,
     );
+    // We set `is_read`, `used` and the address for the second input.
     constrain_channel_ext_circuit(
         builder,
         true,
@@ -297,6 +322,7 @@ fn eval_ext_circuit_swap<F: RichField + Extendable<D>, const D: usize>(
     }
 }
 
+/// Evaluates the constraints for the DUP and SWAP opcodes.
 pub fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -311,6 +337,8 @@ pub fn eval_packed<P: PackedField>(
     eval_packed_swap(n, lv, nv, yield_constr);
 }
 
+/// Circuit version of `eval_packed`.
+/// Evaluates the constraints for the DUP and SWAP opcodes.
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/gas.rs b/evm/src/cpu/gas.rs
index 6553b81a69..f7ba330d37 100644
--- a/evm/src/cpu/gas.rs
+++ b/evm/src/cpu/gas.rs
@@ -126,6 +126,7 @@ fn eval_packed_init<P: PackedField>(
     yield_constr.constraint_transition(filter * nv.gas[1]);
 }
 
+/// Evaluate the gas constraints for the opcodes that cost a constant gas.
 pub fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -271,12 +272,16 @@ fn eval_ext_circuit_init<F: RichField + Extendable<D>, const D: usize>(
     yield_constr.constraint_transition(builder, constr);
 }
 
+/// Circuit version of `eval_packed`.
+/// Evaluate the gas constraints for the opcodes that cost a constant gas.
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
     nv: &CpuColumnsView<ExtensionTarget<D>>,
     yield_constr: &mut RecursiveConstraintConsumer<F, D>,
 ) {
+    // Evaluates the transition gas constraints.
     eval_ext_circuit_accumulate(builder, lv, nv, yield_constr);
+    // Evaluates the initial gas constraints.
     eval_ext_circuit_init(builder, lv, nv, yield_constr);
 }
diff --git a/evm/src/cpu/halt.rs b/evm/src/cpu/halt.rs
index 9ad34344ea..8ed9aa5bcd 100644
--- a/evm/src/cpu/halt.rs
+++ b/evm/src/cpu/halt.rs
@@ -11,6 +11,7 @@ use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer
 use crate::cpu::columns::{CpuColumnsView, COL_MAP};
 use crate::cpu::membus::NUM_GP_CHANNELS;
 
+/// Evaluates constraints for the `halt` flag.
 pub fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -45,6 +46,8 @@ pub fn eval_packed<P: PackedField>(
     yield_constr.constraint(halt_state * (lv.program_counter - halt_pc));
 }
 
+/// Circuit version of `eval_packed`.
+/// Evaluates constraints for the `halt` flag.
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/jumps.rs b/evm/src/cpu/jumps.rs
index 0c03e2d178..b75ef494e8 100644
--- a/evm/src/cpu/jumps.rs
+++ b/evm/src/cpu/jumps.rs
@@ -9,6 +9,7 @@ use crate::cpu::columns::CpuColumnsView;
 use crate::cpu::membus::NUM_GP_CHANNELS;
 use crate::memory::segments::Segment;
 
+/// Evaluates constraints for EXIT_KERNEL.
 pub fn eval_packed_exit_kernel<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -26,6 +27,8 @@ pub fn eval_packed_exit_kernel<P: PackedField>(
     yield_constr.constraint_transition(filter * (input[7] - nv.gas[1]));
 }
 
+/// Circuit version of `eval_packed_exit_kernel`.
+/// Evaluates constraints for EXIT_KERNEL.
 pub fn eval_ext_circuit_exit_kernel<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
@@ -59,6 +62,7 @@ pub fn eval_ext_circuit_exit_kernel<F: RichField + Extendable<D>, const D: usize
     }
 }
 
+/// Evaluates constraints jump operations: JUMP and JUMPI.
 pub fn eval_packed_jump_jumpi<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -156,6 +160,8 @@ pub fn eval_packed_jump_jumpi<P: PackedField>(
         .constraint_transition(filter * jumps_lv.should_jump * (nv.program_counter - jump_dest));
 }
 
+/// Circuit version of `eval_packed_jumpi_jumpi`.
+/// Evaluates constraints jump operations: JUMP and JUMPI.
 pub fn eval_ext_circuit_jump_jumpi<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
@@ -353,6 +359,7 @@ pub fn eval_ext_circuit_jump_jumpi<F: RichField + Extendable<D>, const D: usize>
     }
 }
 
+/// Evaluates constraints for EXIT_KERNEL, JUMP and JUMPI.
 pub fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -362,6 +369,8 @@ pub fn eval_packed<P: PackedField>(
     eval_packed_jump_jumpi(lv, nv, yield_constr);
 }
 
+/// Circuit version of `eval_packed`.
+/// Evaluates constraints for EXIT_KERNEL, JUMP and JUMPI.
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/membus.rs b/evm/src/cpu/membus.rs
index 10dc25a4ca..fe967a2e3e 100644
--- a/evm/src/cpu/membus.rs
+++ b/evm/src/cpu/membus.rs
@@ -9,6 +9,7 @@ use crate::cpu::columns::CpuColumnsView;
 /// General-purpose memory channels; they can read and write to all contexts/segments/addresses.
 pub const NUM_GP_CHANNELS: usize = 5;
 
+/// Indices for code and general purpose memory channels.
 pub mod channel_indices {
     use std::ops::Range;
 
@@ -31,6 +32,7 @@ pub mod channel_indices {
 /// These limitations save us numerous columns in the CPU table.
 pub const NUM_CHANNELS: usize = channel_indices::GP.end;
 
+/// Evaluates constraints regarding the membus.
 pub fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     yield_constr: &mut ConstraintConsumer<P>,
@@ -47,6 +49,8 @@ pub fn eval_packed<P: PackedField>(
     }
 }
 
+/// Circuit version of `eval_packed`.
+/// Evaluates constraints regarding the membus.
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/memio.rs b/evm/src/cpu/memio.rs
index c0daa8f1c1..71960f5ffc 100644
--- a/evm/src/cpu/memio.rs
+++ b/evm/src/cpu/memio.rs
@@ -18,16 +18,18 @@ fn get_addr<T: Copy>(lv: &CpuColumnsView<T>) -> (T, T, T) {
     (addr_context, addr_segment, addr_virtual)
 }
 
+/// Evaluates constraints for MLOAD_GENERAL.
 fn eval_packed_load<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
     yield_constr: &mut ConstraintConsumer<P>,
 ) {
-    // The opcode for MLOAD_GENERAL is 0xfb. If the operation is MLOAD_GENERAL, lv.opcode_bits[0] = 1
+    // The opcode for MLOAD_GENERAL is 0xfb. If the operation is MLOAD_GENERAL, lv.opcode_bits[0] = 1.
     let filter = lv.op.m_op_general * lv.opcode_bits[0];
 
     let (addr_context, addr_segment, addr_virtual) = get_addr(lv);
 
+    // Check that we are loading the correct value from the correct address.
     let load_channel = lv.mem_channels[3];
     yield_constr.constraint(filter * (load_channel.used - P::ONES));
     yield_constr.constraint(filter * (load_channel.is_read - P::ONES));
@@ -50,17 +52,21 @@ fn eval_packed_load<P: PackedField>(
     );
 }
 
+/// Circuit version for `eval_packed_load`.
+/// Evaluates constraints for MLOAD_GENERAL.
 fn eval_ext_circuit_load<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
     nv: &CpuColumnsView<ExtensionTarget<D>>,
     yield_constr: &mut RecursiveConstraintConsumer<F, D>,
 ) {
+    // The opcode for MLOAD_GENERAL is 0xfb. If the operation is MLOAD_GENERAL, lv.opcode_bits[0] = 1.
     let mut filter = lv.op.m_op_general;
     filter = builder.mul_extension(filter, lv.opcode_bits[0]);
 
     let (addr_context, addr_segment, addr_virtual) = get_addr(lv);
 
+    // Check that we are loading the correct value from the correct channel.
     let load_channel = lv.mem_channels[3];
     {
         let constr = builder.mul_sub_extension(filter, load_channel.used, filter);
@@ -100,6 +106,7 @@ fn eval_ext_circuit_load<F: RichField + Extendable<D>, const D: usize>(
     );
 }
 
+/// Evaluates constraints for MSTORE_GENERAL.
 fn eval_packed_store<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -109,6 +116,7 @@ fn eval_packed_store<P: PackedField>(
 
     let (addr_context, addr_segment, addr_virtual) = get_addr(lv);
 
+    // Check that we are storing the correct value at the correct address.
     let value_channel = lv.mem_channels[3];
     let store_channel = lv.mem_channels[4];
     yield_constr.constraint(filter * (store_channel.used - P::ONES));
@@ -171,6 +179,8 @@ fn eval_packed_store<P: PackedField>(
     yield_constr.constraint(lv.op.m_op_general * lv.opcode_bits[0] * top_read_channel.used);
 }
 
+/// Circuit version of `eval_packed_store`.
+/// /// Evaluates constraints for MSTORE_GENERAL.
 fn eval_ext_circuit_store<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
@@ -182,6 +192,7 @@ fn eval_ext_circuit_store<F: RichField + Extendable<D>, const D: usize>(
 
     let (addr_context, addr_segment, addr_virtual) = get_addr(lv);
 
+    // Check that we are storing the correct value at the correct address.
     let value_channel = lv.mem_channels[3];
     let store_channel = lv.mem_channels[4];
     {
@@ -315,6 +326,7 @@ fn eval_ext_circuit_store<F: RichField + Extendable<D>, const D: usize>(
     }
 }
 
+/// Evaluates constraints for MLOAD_GENERAL and MSTORE_GENERAL.
 pub fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -324,6 +336,8 @@ pub fn eval_packed<P: PackedField>(
     eval_packed_store(lv, nv, yield_constr);
 }
 
+/// Circuit version of `eval_packed`.
+/// Evaluates constraints for MLOAD_GENERAL and MSTORE_GENERAL.
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/modfp254.rs b/evm/src/cpu/modfp254.rs
index eed497f5d3..c5b130fa04 100644
--- a/evm/src/cpu/modfp254.rs
+++ b/evm/src/cpu/modfp254.rs
@@ -15,6 +15,7 @@ const P_LIMBS: [u32; 8] = [
     0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72,
 ];
 
+/// Evaluates constriants to check the modulus in mem_channel[2].
 pub fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     yield_constr: &mut ConstraintConsumer<P>,
@@ -31,6 +32,8 @@ pub fn eval_packed<P: PackedField>(
     }
 }
 
+/// Circuit version of `eval_packed`.
+/// Evaluates constriants to check the modulus in mem_channel[2].
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/pc.rs b/evm/src/cpu/pc.rs
index 5271ad81aa..6641aea0b3 100644
--- a/evm/src/cpu/pc.rs
+++ b/evm/src/cpu/pc.rs
@@ -6,6 +6,7 @@ use plonky2::iop::ext_target::ExtensionTarget;
 use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
 use crate::cpu::columns::CpuColumnsView;
 
+/// Evaluates constraints to check that we are storing the correct PC.
 pub fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -19,6 +20,8 @@ pub fn eval_packed<P: PackedField>(
     }
 }
 
+/// Circuit version if `eval_packed`.
+/// Evaluates constraints to check that we are storing the correct PC.
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/push0.rs b/evm/src/cpu/push0.rs
index d49446cc23..c8c58fefd5 100644
--- a/evm/src/cpu/push0.rs
+++ b/evm/src/cpu/push0.rs
@@ -6,6 +6,7 @@ use plonky2::iop::ext_target::ExtensionTarget;
 use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
 use crate::cpu::columns::CpuColumnsView;
 
+/// Evaluates constraints to check that we are not pushing anything.
 pub fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -17,6 +18,8 @@ pub fn eval_packed<P: PackedField>(
     }
 }
 
+/// Circuit version of `eval_packed`.
+/// Evaluates constraints to check that we are not pushing anything.
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/shift.rs b/evm/src/cpu/shift.rs
index 0f92cbd20d..e77762fc5a 100644
--- a/evm/src/cpu/shift.rs
+++ b/evm/src/cpu/shift.rs
@@ -9,6 +9,8 @@ use crate::cpu::columns::CpuColumnsView;
 use crate::cpu::membus::NUM_GP_CHANNELS;
 use crate::memory::segments::Segment;
 
+/// Evaluates constraints for shift operations on the CPU side:
+/// the shifting factor is read from memory when displacement < 2^32.
 pub(crate) fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     yield_constr: &mut ConstraintConsumer<P>,
@@ -59,6 +61,9 @@ pub(crate) fn eval_packed<P: PackedField>(
     // last -> last  (output is the same)
 }
 
+/// Circuit version.
+/// Evaluates constraints for shift operations on the CPU side:
+/// the shifting factor is read from memory when displacement < 2^32.
 pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/simple_logic/eq_iszero.rs b/evm/src/cpu/simple_logic/eq_iszero.rs
index d75cec4902..7692bb9065 100644
--- a/evm/src/cpu/simple_logic/eq_iszero.rs
+++ b/evm/src/cpu/simple_logic/eq_iszero.rs
@@ -19,7 +19,11 @@ fn limbs(x: U256) -> [u32; 8] {
     }
     res
 }
-
+/// Form `diff_pinv`.
+/// Let `diff = val0 - val1`. Consider `x[i] = diff[i]^-1` if `diff[i] != 0` and 0 otherwise.
+/// Then `diff @ x = num_unequal_limbs`, where `@` denotes the dot product. We set
+/// `diff_pinv = num_unequal_limbs^-1 * x` if `num_unequal_limbs != 0` and 0 otherwise. We have
+/// `diff @ diff_pinv = 1 - equal` as desired.
 pub fn generate_pinv_diff<F: Field>(val0: U256, val1: U256, lv: &mut CpuColumnsView<F>) {
     let val0_limbs = limbs(val0).map(F::from_canonical_u32);
     let val1_limbs = limbs(val1).map(F::from_canonical_u32);
@@ -29,10 +33,6 @@ pub fn generate_pinv_diff<F: Field>(val0: U256, val1: U256, lv: &mut CpuColumnsV
         .sum();
 
     // Form `diff_pinv`.
-    // Let `diff = val0 - val1`. Consider `x[i] = diff[i]^-1` if `diff[i] != 0` and 0 otherwise.
-    // Then `diff @ x = num_unequal_limbs`, where `@` denotes the dot product. We set
-    // `diff_pinv = num_unequal_limbs^-1 * x` if `num_unequal_limbs != 0` and 0 otherwise. We have
-    // `diff @ diff_pinv = 1 - equal` as desired.
     let logic = lv.general.logic_mut();
     let num_unequal_limbs_inv = F::from_canonical_usize(num_unequal_limbs)
         .try_inverse()
@@ -42,6 +42,7 @@ pub fn generate_pinv_diff<F: Field>(val0: U256, val1: U256, lv: &mut CpuColumnsV
     }
 }
 
+/// Evaluates the constraints for EQ and ISZERO.
 pub fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -98,6 +99,8 @@ pub fn eval_packed<P: PackedField>(
     );
 }
 
+/// Circuit version of `eval_packed`.
+/// Evaluates the constraints for EQ and ISZERO.
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/simple_logic/mod.rs b/evm/src/cpu/simple_logic/mod.rs
index cfedfa51c7..dfe396fd48 100644
--- a/evm/src/cpu/simple_logic/mod.rs
+++ b/evm/src/cpu/simple_logic/mod.rs
@@ -9,6 +9,7 @@ use plonky2::iop::ext_target::ExtensionTarget;
 use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
 use crate::cpu::columns::CpuColumnsView;
 
+/// Evaluates constraints for NOT, EQ and ISZERO.
 pub fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -18,6 +19,8 @@ pub fn eval_packed<P: PackedField>(
     eq_iszero::eval_packed(lv, nv, yield_constr);
 }
 
+/// Circuit version of `eval_packed`.
+/// Evaluates constraints for NOT, EQ and ISZERO.
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/simple_logic/not.rs b/evm/src/cpu/simple_logic/not.rs
index 6f71ac6f90..727e840c8c 100644
--- a/evm/src/cpu/simple_logic/not.rs
+++ b/evm/src/cpu/simple_logic/not.rs
@@ -11,6 +11,7 @@ use crate::cpu::stack;
 const LIMB_SIZE: usize = 32;
 const ALL_1_LIMB: u64 = (1 << LIMB_SIZE) - 1;
 
+/// Evaluates constraints for NOT.
 pub fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -30,6 +31,8 @@ pub fn eval_packed<P: PackedField>(
     stack::eval_packed_one(lv, nv, filter, stack::BASIC_UNARY_OP.unwrap(), yield_constr);
 }
 
+/// Circuit version of `eval_packed`.
+/// Evaluates constraints for NOT.
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/stack.rs b/evm/src/cpu/stack.rs
index 20f8e0f592..7057d20f30 100644
--- a/evm/src/cpu/stack.rs
+++ b/evm/src/cpu/stack.rs
@@ -13,6 +13,10 @@ use crate::cpu::columns::CpuColumnsView;
 use crate::cpu::membus::NUM_GP_CHANNELS;
 use crate::memory::segments::Segment;
 
+/// Structure to represent opcodes stack behaviours:
+/// - number of pops
+/// - whether the opcode(s) push
+/// - whether unused channels should be disabled.
 #[derive(Clone, Copy)]
 pub(crate) struct StackBehavior {
     pub(crate) num_pops: usize,
@@ -20,33 +24,37 @@ pub(crate) struct StackBehavior {
     disable_other_channels: bool,
 }
 
+/// `StackBehavior` for unary operations.
 pub(crate) const BASIC_UNARY_OP: Option<StackBehavior> = Some(StackBehavior {
     num_pops: 1,
     pushes: true,
     disable_other_channels: true,
 });
+/// `StackBehavior` for binary operations.
 const BASIC_BINARY_OP: Option<StackBehavior> = Some(StackBehavior {
     num_pops: 2,
     pushes: true,
     disable_other_channels: true,
 });
+/// `StackBehavior` for ternary operations.
 const BASIC_TERNARY_OP: Option<StackBehavior> = Some(StackBehavior {
     num_pops: 3,
     pushes: true,
     disable_other_channels: true,
 });
-
+/// `StackBehavior` for JUMP.
 pub(crate) const JUMP_OP: Option<StackBehavior> = Some(StackBehavior {
     num_pops: 1,
     pushes: false,
     disable_other_channels: false,
 });
+/// `StackBehavior` for JUMPI.
 pub(crate) const JUMPI_OP: Option<StackBehavior> = Some(StackBehavior {
     num_pops: 2,
     pushes: false,
     disable_other_channels: false,
 });
-
+/// `StackBehavior` for MLOAD_GENERAL.
 pub(crate) const MLOAD_GENERAL_OP: Option<StackBehavior> = Some(StackBehavior {
     num_pops: 3,
     pushes: true,
@@ -123,17 +131,20 @@ pub(crate) const STACK_BEHAVIORS: OpsColumnsView<Option<StackBehavior>> = OpsCol
     }),
 };
 
+/// Stack behavior for EQ.
 pub(crate) const EQ_STACK_BEHAVIOR: Option<StackBehavior> = Some(StackBehavior {
     num_pops: 2,
     pushes: true,
     disable_other_channels: true,
 });
+/// Stack behavior for ISZERO.
 pub(crate) const IS_ZERO_STACK_BEHAVIOR: Option<StackBehavior> = Some(StackBehavior {
     num_pops: 1,
     pushes: true,
     disable_other_channels: true,
 });
 
+/// Evaluates constraints for one `StackBehavior`.
 pub(crate) fn eval_packed_one<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -243,6 +254,7 @@ pub(crate) fn eval_packed_one<P: PackedField>(
     yield_constr.constraint_transition(filter * (nv.stack_len - (lv.stack_len - num_pops + push)));
 }
 
+/// Evaluates constraints for all opcodes' `StackBehavior`s.
 pub fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -299,6 +311,8 @@ pub fn eval_packed<P: PackedField>(
     );
 }
 
+/// Circuit version of `eval_packed_one`.
+/// Evaluates constraints for one `StackBehavior`.
 pub(crate) fn eval_ext_circuit_one<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
@@ -503,6 +517,8 @@ pub(crate) fn eval_ext_circuit_one<F: RichField + Extendable<D>, const D: usize>
     yield_constr.constraint_transition(builder, constr);
 }
 
+/// Circuti version of `eval_packed`.
+/// Evaluates constraints for all opcodes' `StackBehavior`s.
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/stack_bounds.rs b/evm/src/cpu/stack_bounds.rs
index e66e6686b5..cc8c6aedfe 100644
--- a/evm/src/cpu/stack_bounds.rs
+++ b/evm/src/cpu/stack_bounds.rs
@@ -20,6 +20,7 @@ use crate::cpu::columns::CpuColumnsView;
 
 pub const MAX_USER_STACK_SIZE: usize = 1024;
 
+/// Evaluates constraints to check for stack overflows.
 pub fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     yield_constr: &mut ConstraintConsumer<P>,
@@ -36,6 +37,8 @@ pub fn eval_packed<P: PackedField>(
     yield_constr.constraint(filter * (lhs - rhs));
 }
 
+/// Circuit version of `eval_packed`.
+/// Evaluates constraints to check for stack overflows.
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
diff --git a/evm/src/cpu/syscalls_exceptions.rs b/evm/src/cpu/syscalls_exceptions.rs
index 1437fba02b..78f1e0c1ae 100644
--- a/evm/src/cpu/syscalls_exceptions.rs
+++ b/evm/src/cpu/syscalls_exceptions.rs
@@ -19,6 +19,7 @@ use crate::memory::segments::Segment;
 const BYTES_PER_OFFSET: usize = crate::cpu::kernel::assembler::BYTES_PER_OFFSET as usize;
 const_assert!(BYTES_PER_OFFSET < NUM_GP_CHANNELS); // Reserve one channel for stack push
 
+/// Evaluates constraints for syscalls and exceptions.
 pub fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     nv: &CpuColumnsView<P>,
@@ -65,6 +66,7 @@ pub fn eval_packed<P: PackedField>(
         exc_jumptable_start + exc_code * P::Scalar::from_canonical_usize(BYTES_PER_OFFSET);
 
     for (i, channel) in lv.mem_channels[1..BYTES_PER_OFFSET + 1].iter().enumerate() {
+        // Set `used` and `is_read`.
         yield_constr.constraint(total_filter * (channel.used - P::ONES));
         yield_constr.constraint(total_filter * (channel.is_read - P::ONES));
 
@@ -120,6 +122,8 @@ pub fn eval_packed<P: PackedField>(
     }
 }
 
+/// Circuit version of `eval_packed`.
+/// Evaluates constraints for syscalls and exceptions.
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
     lv: &CpuColumnsView<ExtensionTarget<D>>,
@@ -182,6 +186,7 @@ pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     );
 
     for (i, channel) in lv.mem_channels[1..BYTES_PER_OFFSET + 1].iter().enumerate() {
+        // Set `used` and `is_read`.
         {
             let constr = builder.mul_sub_extension(total_filter, channel.used, total_filter);
             yield_constr.constraint(builder, constr);
diff --git a/evm/src/cross_table_lookup.rs b/evm/src/cross_table_lookup.rs
index 621403f912..d5d8b6b36b 100644
--- a/evm/src/cross_table_lookup.rs
+++ b/evm/src/cross_table_lookup.rs
@@ -1,3 +1,32 @@
+//! This crate provides support for cross-table lookups.
+//!
+//! If a STARK S_1 calls an operation that is carried out by another STARK S_2,
+//! S_1 provides the inputs to S_2 and reads the output from S_1. To ensure that
+//! the operation was correctly carried out, we must check that the provided inputs
+//! and outputs are correctly read. Cross-table lookups carry out that check.
+//!
+//! To achieve this, smaller CTL tables are created on both sides: looking and looked tables.
+//! In our example, we create a table S_1' comprised of columns -- or linear combinations
+//! of columns -- of S_1, and rows that call operations carried out in S_2. We also create a
+//! table S_2' comprised of columns -- or linear combinations od columns -- of S_2 and rows
+//! that carry out the operations needed by other STARKs. Then, S_1' is a looking table for
+//! the looked S_2', since we want to check that the operation outputs in S_1' are indeeed in S_2'.
+//! Furthermore, the concatenation of all tables looking into S_2' must be equal to S_2'.
+//!
+//! To achieve this, we construct, for each table, a permutation polynomial Z(x).
+//! Z(x) is computed as the product of all its column combinations.
+//! To check it was correctly constructed, we check:
+//! - Z(gw) = Z(w) * combine(w) where combine(w) is the column combination at point w.
+//! - Z(g^(n-1)) = combine(1).
+//! - The verifier also checks that the product of looking table Z polynomials is equal
+//! to the associated looked table Z polynomial.
+//! Note that the first two checks are written that way because Z polynomials are computed
+//! upside down for convenience.
+//!
+//! Additionally, we support cross-table lookups over two rows. The permutation principle
+//! is similar, but we provide not only `local_values` but also `next_values` -- corresponding to
+//! the current and next row values -- when computing the linear combinations.
+
 use std::borrow::Borrow;
 use std::fmt::Debug;
 use std::iter::repeat;
@@ -26,7 +55,10 @@ use crate::evaluation_frame::StarkEvaluationFrame;
 use crate::proof::{StarkProofTarget, StarkProofWithMetadata};
 use crate::stark::Stark;
 
-/// Represent a linear combination of columns.
+/// Represent two linear combination of columns, corresponding to the current and next row values.
+/// Each linear combination is represented as:
+/// - a vector of `(usize, F)` corresponding to the column number and the associated multiplicand
+/// - the constant of the linear combination.
 #[derive(Clone, Debug)]
 pub struct Column<F: Field> {
     linear_combination: Vec<(usize, F)>,
@@ -35,6 +67,7 @@ pub struct Column<F: Field> {
 }
 
 impl<F: Field> Column<F> {
+    /// Returns the representation of a single column in the current row.
     pub fn single(c: usize) -> Self {
         Self {
             linear_combination: vec![(c, F::ONE)],
@@ -43,12 +76,14 @@ impl<F: Field> Column<F> {
         }
     }
 
+    /// Returns multiple single columns in the current row.
     pub fn singles<I: IntoIterator<Item = impl Borrow<usize>>>(
         cs: I,
     ) -> impl Iterator<Item = Self> {
         cs.into_iter().map(|c| Self::single(*c.borrow()))
     }
 
+    /// Returns the representation of a single column in the next row.
     pub fn single_next_row(c: usize) -> Self {
         Self {
             linear_combination: vec![],
@@ -57,12 +92,14 @@ impl<F: Field> Column<F> {
         }
     }
 
+    /// Returns multiple single columns for the next row.
     pub fn singles_next_row<I: IntoIterator<Item = impl Borrow<usize>>>(
         cs: I,
     ) -> impl Iterator<Item = Self> {
         cs.into_iter().map(|c| Self::single_next_row(*c.borrow()))
     }
 
+    /// Returns a linear combination corresponding to a constant.
     pub fn constant(constant: F) -> Self {
         Self {
             linear_combination: vec![],
@@ -71,14 +108,17 @@ impl<F: Field> Column<F> {
         }
     }
 
+    /// Returns a linear combination corresponding to 0.
     pub fn zero() -> Self {
         Self::constant(F::ZERO)
     }
 
+    /// Returns a linear combination corresponding to 1.
     pub fn one() -> Self {
         Self::constant(F::ONE)
     }
 
+    /// Given an iterator of `(usize, F)` and a constant, returns the association linear combination of columns for the current row.
     pub fn linear_combination_with_constant<I: IntoIterator<Item = (usize, F)>>(
         iter: I,
         constant: F,
@@ -97,6 +137,7 @@ impl<F: Field> Column<F> {
         }
     }
 
+    /// Given an iterator of `(usize, F)` and a constant, returns the associated linear combination of columns for the current and the next rows.
     pub fn linear_combination_and_next_row_with_constant<I: IntoIterator<Item = (usize, F)>>(
         iter: I,
         next_row_iter: I,
@@ -124,14 +165,19 @@ impl<F: Field> Column<F> {
         }
     }
 
+    /// Returns a linear combination of columns, with no additional constant.
     pub fn linear_combination<I: IntoIterator<Item = (usize, F)>>(iter: I) -> Self {
         Self::linear_combination_with_constant(iter, F::ZERO)
     }
 
+    /// Given an iterator of columns (c_0, ..., c_n) containing bits in little endian order:
+    /// returns the representation of c_0 + 2 * c_1 + ... + 2^n * c_n.
     pub fn le_bits<I: IntoIterator<Item = impl Borrow<usize>>>(cs: I) -> Self {
         Self::linear_combination(cs.into_iter().map(|c| *c.borrow()).zip(F::TWO.powers()))
     }
 
+    /// Given an iterator of columns (c_0, ..., c_n) containing bytes in little endian order:
+    /// returns the representation of c_0 + 256 * c_1 + ... + 256^n * c_n.
     pub fn le_bytes<I: IntoIterator<Item = impl Borrow<usize>>>(cs: I) -> Self {
         Self::linear_combination(
             cs.into_iter()
@@ -140,10 +186,12 @@ impl<F: Field> Column<F> {
         )
     }
 
+    /// Given an iterator of columns, returns the representation of their sum.
     pub fn sum<I: IntoIterator<Item = impl Borrow<usize>>>(cs: I) -> Self {
         Self::linear_combination(cs.into_iter().map(|c| *c.borrow()).zip(repeat(F::ONE)))
     }
 
+    /// Given the column values for the current row, returns the evaluation of the linear combination.
     pub fn eval<FE, P, const D: usize>(&self, v: &[P]) -> P
     where
         FE: FieldExtension<D, BaseField = F>,
@@ -156,6 +204,7 @@ impl<F: Field> Column<F> {
             + FE::from_basefield(self.constant)
     }
 
+    /// Given the column values for the current and next rows, evaluates the current and next linear combinations and returns their sum.
     pub fn eval_with_next<FE, P, const D: usize>(&self, v: &[P], next_v: &[P]) -> P
     where
         FE: FieldExtension<D, BaseField = F>,
@@ -173,7 +222,7 @@ impl<F: Field> Column<F> {
             + FE::from_basefield(self.constant)
     }
 
-    /// Evaluate on an row of a table given in column-major form.
+    /// Evaluate on a row of a table given in column-major form.
     pub fn eval_table(&self, table: &[PolynomialValues<F>], row: usize) -> F {
         let mut res = self
             .linear_combination
@@ -195,6 +244,7 @@ impl<F: Field> Column<F> {
         res
     }
 
+    /// Circuit version of `eval`: Given a row's targets, returns their linear combination.
     pub fn eval_circuit<const D: usize>(
         &self,
         builder: &mut CircuitBuilder<F, D>,
@@ -217,6 +267,8 @@ impl<F: Field> Column<F> {
         builder.inner_product_extension(F::ONE, constant, pairs)
     }
 
+    /// Circuit version of `eval_with_next`:
+    /// Given the targets of the current and next row, returns the sum of their linear combinations.
     pub fn eval_with_next_circuit<const D: usize>(
         &self,
         builder: &mut CircuitBuilder<F, D>,
@@ -248,6 +300,9 @@ impl<F: Field> Column<F> {
     }
 }
 
+/// A `Table` with a linear combination of columns and a filter.
+/// `filter_column` is used to determine the rows to select in `Table`.
+/// `columns` represents linear combinations of the columns of `Table`.
 #[derive(Clone, Debug)]
 pub struct TableWithColumns<F: Field> {
     table: Table,
@@ -256,6 +311,7 @@ pub struct TableWithColumns<F: Field> {
 }
 
 impl<F: Field> TableWithColumns<F> {
+    /// Generates a new `TableWithColumns` given a `Table`, a linear combination of columns `columns` and a `filter_column`.
     pub fn new(table: Table, columns: Vec<Column<F>>, filter_column: Option<Column<F>>) -> Self {
         Self {
             table,
@@ -265,13 +321,19 @@ impl<F: Field> TableWithColumns<F> {
     }
 }
 
+/// Cross-table lookup data consisting in the lookup table (`looked_table`) and all the tables that look into `looked_table` (`looking_tables`).
+/// Each `looking_table` corresponds to a STARK's table whose rows have been filtered out and whose columns have been through a linear combination (see `eval_table`). The concatenation of those smaller tables should result in the `looked_table`.
 #[derive(Clone)]
 pub struct CrossTableLookup<F: Field> {
+    /// Column linear combinations for all tables that are looking into the current table.
     pub(crate) looking_tables: Vec<TableWithColumns<F>>,
+    /// Column linear combination for the current table.
     pub(crate) looked_table: TableWithColumns<F>,
 }
 
 impl<F: Field> CrossTableLookup<F> {
+    /// Creates a new `CrossTableLookup` given some looking tables and a looked table.
+    /// All tables should have the same width.
     pub fn new(
         looking_tables: Vec<TableWithColumns<F>>,
         looked_table: TableWithColumns<F>,
@@ -285,6 +347,8 @@ impl<F: Field> CrossTableLookup<F> {
         }
     }
 
+    /// Given a `Table` t and the number of challenges, returns the number of Cross-table lookup polynomials associated to t,
+    /// i.e. the number of looking and looked tables among all CTLs whose columns are taken from t.
     pub(crate) fn num_ctl_zs(ctls: &[Self], table: Table, num_challenges: usize) -> usize {
         let mut num_ctls = 0;
         for ctl in ctls {
@@ -298,27 +362,35 @@ impl<F: Field> CrossTableLookup<F> {
 /// Cross-table lookup data for one table.
 #[derive(Clone, Default)]
 pub struct CtlData<F: Field> {
+    /// Data associated with all Z(x) polynomials for one table.
     pub(crate) zs_columns: Vec<CtlZData<F>>,
 }
 
 /// Cross-table lookup data associated with one Z(x) polynomial.
 #[derive(Clone)]
 pub(crate) struct CtlZData<F: Field> {
+    /// Z polynomial values.
     pub(crate) z: PolynomialValues<F>,
+    /// Cross-table lookup challenge.
     pub(crate) challenge: GrandProductChallenge<F>,
+    /// Column linear combination for the current table.
     pub(crate) columns: Vec<Column<F>>,
+    /// Filter column for the current table. It evaluates to either 1 or 0.
     pub(crate) filter_column: Option<Column<F>>,
 }
 
 impl<F: Field> CtlData<F> {
+    /// Returns the number of cross-table lookup polynomials.
     pub fn len(&self) -> usize {
         self.zs_columns.len()
     }
 
+    /// Returns whether there are no cross-table lookups.
     pub fn is_empty(&self) -> bool {
         self.zs_columns.is_empty()
     }
 
+    /// Returns all the cross-table lookup polynomials.
     pub fn z_polys(&self) -> Vec<PolynomialValues<F>> {
         self.zs_columns
             .iter()
@@ -449,6 +521,11 @@ pub(crate) fn get_grand_product_challenge_set_target<
     GrandProductChallengeSet { challenges }
 }
 
+/// Generates all the cross-table lookup data, for all tables.
+/// - `trace_poly_values` corresponds to the trace values for all tables.
+/// - `cross_table_lookups` corresponds to all the cross-table lookups, i.e. the looked and looking tables, as described in `CrossTableLookup`.
+/// - `ctl_challenges` corresponds to the challenges used for CTLs.
+/// For each `CrossTableLookup`, and each looking/looked table, the partial products for the CTL are computed, and added to the said table's `CtlZData`.
 pub(crate) fn cross_table_lookup_data<F: RichField, const D: usize>(
     trace_poly_values: &[Vec<PolynomialValues<F>>; NUM_TABLES],
     cross_table_lookups: &[CrossTableLookup<F>],
@@ -499,6 +576,14 @@ pub(crate) fn cross_table_lookup_data<F: RichField, const D: usize>(
     ctl_data_per_table
 }
 
+/// Computes the cross-table lookup partial products for one table and given column linear combinations.
+/// `trace` represents the trace values for the given table.
+/// `columns` are all the column linear combinations to evaluate.
+/// `filter_column` is a column linear combination used to determine whether a row should be selected.
+/// `challenge` is a cross-table lookup challenge.
+/// The initial product `p` is 1.
+/// For each row, if the `filter_column` evaluates to 1, then the rows is selected. All the column linear combinations are evaluated at said row. All those evaluations are combined using the challenge to get a value `v`.
+/// The product is updated: `p *= v`, and is pushed to the vector of partial products.
 fn partial_products<F: Field>(
     trace: &[PolynomialValues<F>],
     columns: &[Column<F>],
@@ -529,6 +614,7 @@ fn partial_products<F: Field>(
     res.into()
 }
 
+/// Data necessary to check the cross-table lookups of a given table.
 #[derive(Clone)]
 pub struct CtlCheckVars<'a, F, FE, P, const D2: usize>
 where
@@ -536,22 +622,29 @@ where
     FE: FieldExtension<D2, BaseField = F>,
     P: PackedField<Scalar = FE>,
 {
+    /// Evaluation of the trace polynomials at point `zeta`.
     pub(crate) local_z: P,
+    /// Evaluation of the trace polynomials at point `g * zeta`
     pub(crate) next_z: P,
+    /// Cross-table lookup challenges.
     pub(crate) challenges: GrandProductChallenge<F>,
+    /// Column linear combinations of the `CrossTableLookup`s.
     pub(crate) columns: &'a [Column<F>],
+    /// Column linear combination that evaluates to either 1 or 0.
     pub(crate) filter_column: &'a Option<Column<F>>,
 }
 
 impl<'a, F: RichField + Extendable<D>, const D: usize>
     CtlCheckVars<'a, F, F::Extension, F::Extension, D>
 {
+    /// Extracts the `CtlCheckVars` for each STARK.
     pub(crate) fn from_proofs<C: GenericConfig<D, F = F>>(
         proofs: &[StarkProofWithMetadata<F, C, D>; NUM_TABLES],
         cross_table_lookups: &'a [CrossTableLookup<F>],
         ctl_challenges: &'a GrandProductChallengeSet<F>,
         num_lookup_columns: &[usize; NUM_TABLES],
     ) -> [Vec<Self>; NUM_TABLES] {
+        // Get all cross-table lookup polynomial openings for each STARK proof.
         let mut ctl_zs = proofs
             .iter()
             .zip(num_lookup_columns)
@@ -563,6 +656,7 @@ impl<'a, F: RichField + Extendable<D>, const D: usize>
             })
             .collect::<Vec<_>>();
 
+        // Put each cross-table lookup polynomial into the correct table data: if a CTL polynomial is extracted from looking/looked table t, then we add it to the `CtlCheckVars` of table t.
         let mut ctl_vars_per_table = [0; NUM_TABLES].map(|_| vec![]);
         for CrossTableLookup {
             looking_tables,
@@ -595,7 +689,10 @@ impl<'a, F: RichField + Extendable<D>, const D: usize>
     }
 }
 
-/// CTL Z partial products are upside down: the complete product is on the first row, and
+/// Checks the cross-table lookup Z polynomials for each table:
+/// - Checks that the CTL `Z` partial products are correctly updated.
+/// - Checks that the final value of the CTL product is the combination of all STARKs' CTL polynomials.
+/// CTL `Z` partial products are upside down: the complete product is on the first row, and
 /// the first term is on the last row. This allows the transition constraint to be:
 /// Z(w) = Z(gw) * combine(w) where combine is called on the local row
 /// and not the next. This enables CTLs across two rows.
@@ -621,6 +718,7 @@ pub(crate) fn eval_cross_table_lookup_checks<F, FE, P, S, const D: usize, const
             filter_column,
         } = lookup_vars;
 
+        // Compute all linear combinations on the current table, and combine them using the challenge.
         let evals = columns
             .iter()
             .map(|c| c.eval_with_next(local_values, next_values))
@@ -631,6 +729,7 @@ pub(crate) fn eval_cross_table_lookup_checks<F, FE, P, S, const D: usize, const
         } else {
             P::ONES
         };
+        // If the filter evaluates to 1, then the previously computed combination is used.
         let select = local_filter * combined + P::ONES - local_filter;
 
         // Check value of `Z(g^(n-1))`
@@ -640,16 +739,23 @@ pub(crate) fn eval_cross_table_lookup_checks<F, FE, P, S, const D: usize, const
     }
 }
 
+/// Circuit version of `CtlCheckVars`. Data necessary to check the cross-table lookups of a given table.
 #[derive(Clone)]
 pub struct CtlCheckVarsTarget<'a, F: Field, const D: usize> {
+    /// Evaluation of the trace polynomials at point `zeta`.
     pub(crate) local_z: ExtensionTarget<D>,
+    /// Evaluation of the trace polynomials at point `g * zeta`.
     pub(crate) next_z: ExtensionTarget<D>,
+    /// Cross-table lookup challenges.
     pub(crate) challenges: GrandProductChallenge<Target>,
+    /// Column linear combinations of the `CrossTableLookup`s.
     pub(crate) columns: &'a [Column<F>],
+    /// Column linear combination that evaluates to either 1 or 0.
     pub(crate) filter_column: &'a Option<Column<F>>,
 }
 
 impl<'a, F: Field, const D: usize> CtlCheckVarsTarget<'a, F, D> {
+    /// Circuit version of `from_proofs`. Extracts the `CtlCheckVarsTarget` for each STARK.
     pub(crate) fn from_proof(
         table: Table,
         proof: &StarkProofTarget<D>,
@@ -657,6 +763,7 @@ impl<'a, F: Field, const D: usize> CtlCheckVarsTarget<'a, F, D> {
         ctl_challenges: &'a GrandProductChallengeSet<Target>,
         num_lookup_columns: usize,
     ) -> Vec<Self> {
+        // Get all cross-table lookup polynomial openings for each STARK proof.
         let mut ctl_zs = {
             let openings = &proof.openings;
             let ctl_zs = openings.auxiliary_polys.iter().skip(num_lookup_columns);
@@ -667,6 +774,7 @@ impl<'a, F: Field, const D: usize> CtlCheckVarsTarget<'a, F, D> {
             ctl_zs.zip(ctl_zs_next)
         };
 
+        // Put each cross-table lookup polynomial into the correct table data: if a CTL polynomial is extracted from looking/looked table t, then we add it to the `CtlCheckVars` of table t.
         let mut ctl_vars = vec![];
         for CrossTableLookup {
             looking_tables,
@@ -704,6 +812,13 @@ impl<'a, F: Field, const D: usize> CtlCheckVarsTarget<'a, F, D> {
     }
 }
 
+/// Circuit version of `eval_cross_table_lookup_checks`. Checks the cross-table lookups for each table:
+/// - Checks that the CTL `Z` partial products are correctly updated.
+/// - Checks that the final value of the CTL product is the combination of all STARKs' CTL polynomials.
+/// CTL `Z` partial products are upside down: the complete product is on the first row, and
+/// the first term is on the last row. This allows the transition constraint to be:
+/// Z(w) = Z(gw) * combine(w) where combine is called on the local row
+/// and not the next. This enables CTLs across two rows.
 pub(crate) fn eval_cross_table_lookup_checks_circuit<
     S: Stark<F, D>,
     F: RichField + Extendable<D>,
@@ -742,12 +857,14 @@ pub(crate) fn eval_cross_table_lookup_checks_circuit<
             builder.mul_add_extension(filter, x, tmp) // filter * x + 1 - filter
         }
 
+        // Compute all linear combinations on the current table, and combine them using the challenge.
         let evals = columns
             .iter()
             .map(|c| c.eval_with_next_circuit(builder, local_values, next_values))
             .collect::<Vec<_>>();
 
         let combined = challenges.combine_circuit(builder, &evals);
+        // If the filter evaluates to 1, then the previously computed combination is used.
         let select = select(builder, local_filter, combined);
 
         // Check value of `Z(g^(n-1))`
@@ -759,6 +876,7 @@ pub(crate) fn eval_cross_table_lookup_checks_circuit<
     }
 }
 
+/// Verifies all cross-table lookups.
 pub(crate) fn verify_cross_table_lookups<F: RichField + Extendable<D>, const D: usize>(
     cross_table_lookups: &[CrossTableLookup<F>],
     ctl_zs_first: [Vec<F>; NUM_TABLES],
@@ -774,15 +892,19 @@ pub(crate) fn verify_cross_table_lookups<F: RichField + Extendable<D>, const D:
         },
     ) in cross_table_lookups.iter().enumerate()
     {
+        // Get elements looking into `looked_table` that are not associated to any STARK.
         let extra_product_vec = &ctl_extra_looking_products[looked_table.table as usize];
         for c in 0..config.num_challenges {
+            // Compute the combination of all looking table CTL polynomial openings.
             let looking_zs_prod = looking_tables
                 .iter()
                 .map(|table| *ctl_zs_openings[table.table as usize].next().unwrap())
                 .product::<F>()
                 * extra_product_vec[c];
 
+            // Get the looked table CTL polynomial opening.
             let looked_z = *ctl_zs_openings[looked_table.table as usize].next().unwrap();
+            // Ensure that the combination of looking table openings is equal to the looked table opening.
             ensure!(
                 looking_zs_prod == looked_z,
                 "Cross-table lookup {:?} verification failed.",
@@ -795,6 +917,7 @@ pub(crate) fn verify_cross_table_lookups<F: RichField + Extendable<D>, const D:
     Ok(())
 }
 
+/// Circuit version of `verify_cross_table_lookups`. Verifies all cross-table lookups.
 pub(crate) fn verify_cross_table_lookups_circuit<F: RichField + Extendable<D>, const D: usize>(
     builder: &mut CircuitBuilder<F, D>,
     cross_table_lookups: Vec<CrossTableLookup<F>>,
@@ -808,8 +931,10 @@ pub(crate) fn verify_cross_table_lookups_circuit<F: RichField + Extendable<D>, c
         looked_table,
     } in cross_table_lookups.into_iter()
     {
+        // Get elements looking into `looked_table` that are not associated to any STARK.
         let extra_product_vec = &ctl_extra_looking_products[looked_table.table as usize];
         for c in 0..inner_config.num_challenges {
+            // Compute the combination of all looking table CTL polynomial openings.
             let mut looking_zs_prod = builder.mul_many(
                 looking_tables
                     .iter()
@@ -818,7 +943,9 @@ pub(crate) fn verify_cross_table_lookups_circuit<F: RichField + Extendable<D>, c
 
             looking_zs_prod = builder.mul(looking_zs_prod, extra_product_vec[c]);
 
+            // Get the looked table CTL polynomial opening.
             let looked_z = *ctl_zs_openings[looked_table.table as usize].next().unwrap();
+            // Verify that the combination of looking table openings is equal to the looked table opening.
             builder.connect(looked_z, looking_zs_prod);
         }
     }
diff --git a/evm/src/keccak/keccak_stark.rs b/evm/src/keccak/keccak_stark.rs
index 2745d03302..19524e2d8b 100644
--- a/evm/src/keccak/keccak_stark.rs
+++ b/evm/src/keccak/keccak_stark.rs
@@ -33,22 +33,26 @@ pub(crate) const NUM_ROUNDS: usize = 24;
 /// Number of 64-bit elements in the Keccak permutation input.
 pub(crate) const NUM_INPUTS: usize = 25;
 
+/// Create vector of `Columns` corresponding to the permutation input limbs.
 pub fn ctl_data_inputs<F: Field>() -> Vec<Column<F>> {
     let mut res: Vec<_> = (0..2 * NUM_INPUTS).map(reg_input_limb).collect();
     res.push(Column::single(TIMESTAMP));
     res
 }
 
+/// Create vector of `Columns` corresponding to the permutation output limbs.
 pub fn ctl_data_outputs<F: Field>() -> Vec<Column<F>> {
     let mut res: Vec<_> = Column::singles((0..2 * NUM_INPUTS).map(reg_output_limb)).collect();
     res.push(Column::single(TIMESTAMP));
     res
 }
 
+/// CTL filter for the first round of the Keccak permutation.
 pub fn ctl_filter_inputs<F: Field>() -> Column<F> {
     Column::single(reg_step(0))
 }
 
+/// CTL filter for the final round of the Keccak permutation.
 pub fn ctl_filter_outputs<F: Field>() -> Column<F> {
     Column::single(reg_step(NUM_ROUNDS - 1))
 }
diff --git a/evm/src/keccak_sponge/columns.rs b/evm/src/keccak_sponge/columns.rs
index 431c09e092..f10dfbfd9a 100644
--- a/evm/src/keccak_sponge/columns.rs
+++ b/evm/src/keccak_sponge/columns.rs
@@ -3,17 +3,27 @@ use std::mem::{size_of, transmute};
 
 use crate::util::{indices_arr, transmute_no_compile_time_size_checks};
 
+/// Total number of sponge bytes: number of rate bytes + number of capacity bytes.
 pub(crate) const KECCAK_WIDTH_BYTES: usize = 200;
+/// Total number of 32-bit limbs in the sponge.
 pub(crate) const KECCAK_WIDTH_U32S: usize = KECCAK_WIDTH_BYTES / 4;
+/// Number of non-digest bytes.
 pub(crate) const KECCAK_WIDTH_MINUS_DIGEST_U32S: usize =
     (KECCAK_WIDTH_BYTES - KECCAK_DIGEST_BYTES) / 4;
+/// Number of rate bytes.
 pub(crate) const KECCAK_RATE_BYTES: usize = 136;
+/// Number of 32-bit rate limbs.
 pub(crate) const KECCAK_RATE_U32S: usize = KECCAK_RATE_BYTES / 4;
+/// Number of capacity bytes.
 pub(crate) const KECCAK_CAPACITY_BYTES: usize = 64;
+/// Number of 32-bit capacity limbs.
 pub(crate) const KECCAK_CAPACITY_U32S: usize = KECCAK_CAPACITY_BYTES / 4;
+/// Number of output digest bytes used during the squeezing phase.
 pub(crate) const KECCAK_DIGEST_BYTES: usize = 32;
+/// Number of 32-bit digest limbs.
 pub(crate) const KECCAK_DIGEST_U32S: usize = KECCAK_DIGEST_BYTES / 4;
 
+/// A view of `KeccakSpongeStark`'s columns.
 #[repr(C)]
 #[derive(Eq, PartialEq, Debug)]
 pub(crate) struct KeccakSpongeColumnsView<T: Copy> {
@@ -21,9 +31,11 @@ pub(crate) struct KeccakSpongeColumnsView<T: Copy> {
     /// not a padding byte; 0 otherwise.
     pub is_full_input_block: T,
 
-    // The base address at which we will read the input block.
+    /// The context of the base addresss at which we will read the input block.
     pub context: T,
+    /// The segment of the base address at which we will read the input block.
     pub segment: T,
+    /// The virtual address at which we will read the input block.
     pub virt: T,
 
     /// The timestamp at which inputs should be read from memory.
@@ -66,6 +78,7 @@ pub(crate) struct KeccakSpongeColumnsView<T: Copy> {
 }
 
 // `u8` is guaranteed to have a `size_of` of 1.
+/// Number of columns in `KeccakSpongeStark`.
 pub const NUM_KECCAK_SPONGE_COLUMNS: usize = size_of::<KeccakSpongeColumnsView<u8>>();
 
 impl<T: Copy> From<[T; NUM_KECCAK_SPONGE_COLUMNS]> for KeccakSpongeColumnsView<T> {
@@ -117,4 +130,5 @@ const fn make_col_map() -> KeccakSpongeColumnsView<usize> {
     }
 }
 
+/// Map between the `KeccakSponge` columns and (0..`NUM_KECCAK_SPONGE_COLUMNS`)
 pub(crate) const KECCAK_SPONGE_COL_MAP: KeccakSpongeColumnsView<usize> = make_col_map();
diff --git a/evm/src/keccak_sponge/keccak_sponge_stark.rs b/evm/src/keccak_sponge/keccak_sponge_stark.rs
index e491252ba8..03c1f811a0 100644
--- a/evm/src/keccak_sponge/keccak_sponge_stark.rs
+++ b/evm/src/keccak_sponge/keccak_sponge_stark.rs
@@ -23,6 +23,11 @@ use crate::stark::Stark;
 use crate::util::trace_rows_to_poly_values;
 use crate::witness::memory::MemoryAddress;
 
+/// Creates the vector of `Columns` corresponding to:
+/// - the address in memory of the inputs,
+/// - the length of the inputs,
+/// - the timestamp at which the inputs are read from memory,
+/// - the output limbs of the Keccak sponge.
 pub(crate) fn ctl_looked_data<F: Field>() -> Vec<Column<F>> {
     let cols = KECCAK_SPONGE_COL_MAP;
     let mut outputs = Vec::with_capacity(8);
@@ -47,6 +52,9 @@ pub(crate) fn ctl_looked_data<F: Field>() -> Vec<Column<F>> {
     .collect()
 }
 
+/// Creates the vector of `Columns` corresponding to the inputs of the Keccak sponge.
+/// This is used to check that the inputs of the sponge correspond to the inputs
+/// given by `KeccakStark`.
 pub(crate) fn ctl_looking_keccak_inputs<F: Field>() -> Vec<Column<F>> {
     let cols = KECCAK_SPONGE_COL_MAP;
     let mut res: Vec<_> = Column::singles(
@@ -62,6 +70,9 @@ pub(crate) fn ctl_looking_keccak_inputs<F: Field>() -> Vec<Column<F>> {
     res
 }
 
+/// Creates the vector of `Columns` corresponding to the outputs of the Keccak sponge.
+/// This is used to check that the outputs of the sponge correspond to the outputs
+/// given by `KeccakStark`.
 pub(crate) fn ctl_looking_keccak_outputs<F: Field>() -> Vec<Column<F>> {
     let cols = KECCAK_SPONGE_COL_MAP;
 
@@ -83,6 +94,7 @@ pub(crate) fn ctl_looking_keccak_outputs<F: Field>() -> Vec<Column<F>> {
     res
 }
 
+/// Creates the vector of `Columns` corresponding to the address and value of the byte being read from memory.
 pub(crate) fn ctl_looking_memory<F: Field>(i: usize) -> Vec<Column<F>> {
     let cols = KECCAK_SPONGE_COL_MAP;
 
@@ -111,12 +123,16 @@ pub(crate) fn ctl_looking_memory<F: Field>(i: usize) -> Vec<Column<F>> {
     res
 }
 
+/// Returns the number of `KeccakSponge` tables looking into the `LogicStark`.
 pub(crate) fn num_logic_ctls() -> usize {
     const U8S_PER_CTL: usize = 32;
     ceil_div_usize(KECCAK_RATE_BYTES, U8S_PER_CTL)
 }
 
-/// CTL for performing the `i`th logic CTL. Since we need to do 136 byte XORs, and the logic CTL can
+/// Creates the vector of `Columns` required to perform the `i`th logic CTL.
+/// It is comprised of the ÌS_XOR` flag, the two inputs and the output
+/// of the XOR operation.
+/// Since we need to do 136 byte XORs, and the logic CTL can
 /// XOR 32 bytes per CTL, there are 5 such CTLs.
 pub(crate) fn ctl_looking_logic<F: Field>(i: usize) -> Vec<Column<F>> {
     const U32S_PER_CTL: usize = 8;
@@ -156,6 +172,7 @@ pub(crate) fn ctl_looking_logic<F: Field>(i: usize) -> Vec<Column<F>> {
     res
 }
 
+/// CTL filter for the final block rows of the `KeccakSponge` table.
 pub(crate) fn ctl_looked_filter<F: Field>() -> Column<F> {
     // The CPU table is only interested in our final-block rows, since those contain the final
     // sponge output.
@@ -181,6 +198,7 @@ pub(crate) fn ctl_looking_logic_filter<F: Field>() -> Column<F> {
     Column::sum(once(&cols.is_full_input_block).chain(&cols.is_final_input_len))
 }
 
+/// CTL filter for looking at the input and output in the Keccak table.
 pub(crate) fn ctl_looking_keccak_filter<F: Field>() -> Column<F> {
     let cols = KECCAK_SPONGE_COL_MAP;
     Column::sum(once(&cols.is_full_input_block).chain(&cols.is_final_input_len))
@@ -199,12 +217,14 @@ pub(crate) struct KeccakSpongeOp {
     pub(crate) input: Vec<u8>,
 }
 
+/// Structure representing the `KeccakSponge` STARK, which carries out the sponge permutation.
 #[derive(Copy, Clone, Default)]
 pub struct KeccakSpongeStark<F, const D: usize> {
     f: PhantomData<F>,
 }
 
 impl<F: RichField + Extendable<D>, const D: usize> KeccakSpongeStark<F, D> {
+    /// Generates the trace polynomial values for the `KeccakSponge`STARK.
     pub(crate) fn generate_trace(
         &self,
         operations: Vec<KeccakSpongeOp>,
@@ -227,6 +247,8 @@ impl<F: RichField + Extendable<D>, const D: usize> KeccakSpongeStark<F, D> {
         trace_polys
     }
 
+    /// Generates the trace rows given the vector of `KeccakSponge` operations.
+    /// The trace is padded to a power of two with all-zero rows.
     fn generate_trace_rows(
         &self,
         operations: Vec<KeccakSpongeOp>,
@@ -237,9 +259,11 @@ impl<F: RichField + Extendable<D>, const D: usize> KeccakSpongeStark<F, D> {
             .map(|op| op.input.len() / KECCAK_RATE_BYTES + 1)
             .sum();
         let mut rows = Vec::with_capacity(base_len.max(min_rows).next_power_of_two());
+        // Generate active rows.
         for op in operations {
             rows.extend(self.generate_rows_for_op(op));
         }
+        // Pad the trace.
         let padded_rows = rows.len().max(min_rows).next_power_of_two();
         for _ in rows.len()..padded_rows {
             rows.push(self.generate_padding_row());
@@ -247,6 +271,9 @@ impl<F: RichField + Extendable<D>, const D: usize> KeccakSpongeStark<F, D> {
         rows
     }
 
+    /// Generates the rows associated to a given operation:
+    /// Performs a Keccak sponge permutation and fills the STARK's rows accordingly.
+    /// The number of rows is the number of input chunks of size `KECCAK_RATE_BYTES`.
     fn generate_rows_for_op(&self, op: KeccakSpongeOp) -> Vec<[F; NUM_KECCAK_SPONGE_COLUMNS]> {
         let mut rows = Vec::with_capacity(op.input.len() / KECCAK_RATE_BYTES + 1);
 
@@ -255,6 +282,7 @@ impl<F: RichField + Extendable<D>, const D: usize> KeccakSpongeStark<F, D> {
         let mut input_blocks = op.input.chunks_exact(KECCAK_RATE_BYTES);
         let mut already_absorbed_bytes = 0;
         for block in input_blocks.by_ref() {
+            // We compute the updated state of the sponge.
             let row = self.generate_full_input_row(
                 &op,
                 already_absorbed_bytes,
@@ -262,6 +290,9 @@ impl<F: RichField + Extendable<D>, const D: usize> KeccakSpongeStark<F, D> {
                 block.try_into().unwrap(),
             );
 
+            // We update the state limbs for the next block absorption.
+            // The first `KECCAK_DIGEST_U32s` limbs are stored as bytes after the computation,
+            // so we recompute the corresponding `u32` and update the first state limbs.
             sponge_state[..KECCAK_DIGEST_U32S]
                 .iter_mut()
                 .zip(row.updated_digest_state_bytes.chunks_exact(4))
@@ -273,6 +304,8 @@ impl<F: RichField + Extendable<D>, const D: usize> KeccakSpongeStark<F, D> {
                         .sum();
                 });
 
+            // The rest of the bytes are already stored in the expected form, so we can directly
+            // update the state with the stored values.
             sponge_state[KECCAK_DIGEST_U32S..]
                 .iter_mut()
                 .zip(row.partial_updated_state_u32s)
@@ -295,6 +328,8 @@ impl<F: RichField + Extendable<D>, const D: usize> KeccakSpongeStark<F, D> {
         rows
     }
 
+    /// Generates a row where all bytes are input bytes, not padding bytes.
+    /// This includes updating the state sponge with a single absorption.
     fn generate_full_input_row(
         &self,
         op: &KeccakSpongeOp,
@@ -313,6 +348,10 @@ impl<F: RichField + Extendable<D>, const D: usize> KeccakSpongeStark<F, D> {
         row
     }
 
+    /// Generates a row containing the last input bytes.
+    /// On top of computing one absorption and padding the input,
+    /// we indicate the last non-padding input byte by setting
+    /// `row.is_final_input_len[final_inputs.len()]` to 1.
     fn generate_final_row(
         &self,
         op: &KeccakSpongeOp,
@@ -345,6 +384,9 @@ impl<F: RichField + Extendable<D>, const D: usize> KeccakSpongeStark<F, D> {
 
     /// Generate fields that are common to both full-input-block rows and final-block rows.
     /// Also updates the sponge state with a single absorption.
+    /// Given a state S = R || C and a block input B,
+    /// - R is updated with R XOR B,
+    /// - S is replaced by keccakf_u32s(S).
     fn generate_common_fields(
         row: &mut KeccakSpongeColumnsView<F>,
         op: &KeccakSpongeOp,
diff --git a/evm/src/logic.rs b/evm/src/logic.rs
index 319dfab2d0..2382897b1a 100644
--- a/evm/src/logic.rs
+++ b/evm/src/logic.rs
@@ -19,28 +19,34 @@ use crate::logic::columns::NUM_COLUMNS;
 use crate::stark::Stark;
 use crate::util::{limb_from_bits_le, limb_from_bits_le_recursive, trace_rows_to_poly_values};
 
-// Total number of bits per input/output.
+/// Total number of bits per input/output.
 const VAL_BITS: usize = 256;
-// Number of bits stored per field element. Ensure that this fits; it is not checked.
+/// Number of bits stored per field element. Ensure that this fits; it is not checked.
 pub(crate) const PACKED_LIMB_BITS: usize = 32;
-// Number of field elements needed to store each input/output at the specified packing.
+/// Number of field elements needed to store each input/output at the specified packing.
 const PACKED_LEN: usize = ceil_div_usize(VAL_BITS, PACKED_LIMB_BITS);
 
+/// `LogicStark` columns.
 pub(crate) mod columns {
     use std::cmp::min;
     use std::ops::Range;
 
     use super::{PACKED_LEN, PACKED_LIMB_BITS, VAL_BITS};
 
+    /// 1 if this is an AND operation, 0 otherwise.
     pub const IS_AND: usize = 0;
+    /// 1 if this is an OR operation, 0 otherwise.
     pub const IS_OR: usize = IS_AND + 1;
+    /// 1 if this is a XOR operation, 0 otherwise.
     pub const IS_XOR: usize = IS_OR + 1;
-    // The inputs are decomposed into bits.
+    /// First input, decomposed into bits.
     pub const INPUT0: Range<usize> = (IS_XOR + 1)..(IS_XOR + 1) + VAL_BITS;
+    /// Second input, decomposed into bits.
     pub const INPUT1: Range<usize> = INPUT0.end..INPUT0.end + VAL_BITS;
-    // The result is packed in limbs of `PACKED_LIMB_BITS` bits.
+    /// The result is packed in limbs of `PACKED_LIMB_BITS` bits.
     pub const RESULT: Range<usize> = INPUT1.end..INPUT1.end + PACKED_LEN;
 
+    /// Returns the column range for each 32 bit chunk in the input.
     pub fn limb_bit_cols_for_input(input_bits: Range<usize>) -> impl Iterator<Item = Range<usize>> {
         (0..PACKED_LEN).map(move |i| {
             let start = input_bits.start + i * PACKED_LIMB_BITS;
@@ -49,9 +55,11 @@ pub(crate) mod columns {
         })
     }
 
+    /// Number of columns in `LogicStark`.
     pub const NUM_COLUMNS: usize = RESULT.end;
 }
 
+/// Creates the vector of `Columns` corresponding to the opcode, the two inputs and the output of the logic operation.
 pub fn ctl_data<F: Field>() -> Vec<Column<F>> {
     // We scale each filter flag with the associated opcode value.
     // If a logic operation is happening on the CPU side, the CTL
@@ -68,15 +76,18 @@ pub fn ctl_data<F: Field>() -> Vec<Column<F>> {
     res
 }
 
+/// CTL filter for logic operations.
 pub fn ctl_filter<F: Field>() -> Column<F> {
     Column::sum([columns::IS_AND, columns::IS_OR, columns::IS_XOR])
 }
 
+/// Structure representing the Logic STARK, which computes all logic operations.
 #[derive(Copy, Clone, Default)]
 pub struct LogicStark<F, const D: usize> {
     pub f: PhantomData<F>,
 }
 
+/// Logic operations.
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
 pub(crate) enum Op {
     And,
@@ -85,6 +96,7 @@ pub(crate) enum Op {
 }
 
 impl Op {
+    /// Returns the output of the current Logic operation.
     pub(crate) fn result(&self, a: U256, b: U256) -> U256 {
         match self {
             Op::And => a & b,
@@ -94,6 +106,8 @@ impl Op {
     }
 }
 
+/// A logic operation over `U256`` words. It contains an operator,
+/// either `AND`, `OR` or `XOR`, two inputs and its expected result.
 #[derive(Debug)]
 pub(crate) struct Operation {
     operator: Op,
@@ -103,6 +117,8 @@ pub(crate) struct Operation {
 }
 
 impl Operation {
+    /// Computes the expected result of an operator with the two provided inputs,
+    /// and returns the associated logic `Operation`.
     pub(crate) fn new(operator: Op, input0: U256, input1: U256) -> Self {
         let result = operator.result(input0, input1);
         Operation {
@@ -113,6 +129,7 @@ impl Operation {
         }
     }
 
+    /// Given an `Operation`, fills a row with the corresponding flag, inputs and output.
     fn into_row<F: Field>(self) -> [F; NUM_COLUMNS] {
         let Operation {
             operator,
@@ -140,17 +157,20 @@ impl Operation {
 }
 
 impl<F: RichField, const D: usize> LogicStark<F, D> {
+    /// Generates the trace polynomials for `LogicStark`.
     pub(crate) fn generate_trace(
         &self,
         operations: Vec<Operation>,
         min_rows: usize,
         timing: &mut TimingTree,
     ) -> Vec<PolynomialValues<F>> {
+        // First, turn all provided operations into rows in `LogicStark`, and pad if necessary.
         let trace_rows = timed!(
             timing,
             "generate trace rows",
             self.generate_trace_rows(operations, min_rows)
         );
+        // Generate the trace polynomials from the trace values.
         let trace_polys = timed!(
             timing,
             "convert to PolynomialValues",
@@ -159,6 +179,8 @@ impl<F: RichField, const D: usize> LogicStark<F, D> {
         trace_polys
     }
 
+    /// Generate the `LogicStark` traces based on the provided vector of operations.
+    /// The trace is padded to a power of two with all-zero rows.
     fn generate_trace_rows(
         &self,
         operations: Vec<Operation>,
diff --git a/evm/src/memory/columns.rs b/evm/src/memory/columns.rs
index 9a41323200..b77a799cf8 100644
--- a/evm/src/memory/columns.rs
+++ b/evm/src/memory/columns.rs
@@ -5,10 +5,18 @@ use crate::memory::VALUE_LIMBS;
 // Columns for memory operations, ordered by (addr, timestamp).
 /// 1 if this is an actual memory operation, or 0 if it's a padding row.
 pub(crate) const FILTER: usize = 0;
+/// Each memory operation is associated to a unique timestamp.
+/// For a given memory operation `op_i`, its timestamp is computed as `C * N + i`
+/// where `C` is the CPU clock at that time, `N` is the number of general memory channels,
+/// and `i` is the index of the memory channel at which the memory operation is performed.
 pub(crate) const TIMESTAMP: usize = FILTER + 1;
+/// 1 if this is a read operation, 0 if it is a write one.
 pub(crate) const IS_READ: usize = TIMESTAMP + 1;
+/// The execution context of this address.
 pub(crate) const ADDR_CONTEXT: usize = IS_READ + 1;
+/// The segment section of this address.
 pub(crate) const ADDR_SEGMENT: usize = ADDR_CONTEXT + 1;
+/// The virtual address within the given context and segment.
 pub(crate) const ADDR_VIRTUAL: usize = ADDR_SEGMENT + 1;
 
 // Eight 32-bit limbs hold a total of 256 bits.
diff --git a/evm/src/memory/memory_stark.rs b/evm/src/memory/memory_stark.rs
index 4a63f50a7a..74aa8787d7 100644
--- a/evm/src/memory/memory_stark.rs
+++ b/evm/src/memory/memory_stark.rs
@@ -27,6 +27,11 @@ use crate::stark::Stark;
 use crate::witness::memory::MemoryOpKind::Read;
 use crate::witness::memory::{MemoryAddress, MemoryOp};
 
+/// Creates the vector of `Columns` corresponding to:
+/// - the memory operation type,
+/// - the address in memory of the element being read/written,
+/// - the value being read/written,
+/// - the timestamp at which the element is read/written.
 pub fn ctl_data<F: Field>() -> Vec<Column<F>> {
     let mut res =
         Column::singles([IS_READ, ADDR_CONTEXT, ADDR_SEGMENT, ADDR_VIRTUAL]).collect_vec();
@@ -35,6 +40,7 @@ pub fn ctl_data<F: Field>() -> Vec<Column<F>> {
     res
 }
 
+/// CTL filter for memory operations.
 pub fn ctl_filter<F: Field>() -> Column<F> {
     Column::single(FILTER)
 }
diff --git a/evm/src/memory/mod.rs b/evm/src/memory/mod.rs
index 4cdfd1be5a..c61119530f 100644
--- a/evm/src/memory/mod.rs
+++ b/evm/src/memory/mod.rs
@@ -1,7 +1,13 @@
+//! The Memory STARK is used to handle all memory read and write operations happening when
+//! executing the EVM. Each non-dummy row of the table correspond to a single operation,
+//! and rows are ordered by the timestamp associated to each memory operation.
+
 pub mod columns;
 pub mod memory_stark;
 pub mod segments;
 
 // TODO: Move to CPU module, now that channels have been removed from the memory table.
 pub(crate) const NUM_CHANNELS: usize = crate::cpu::membus::NUM_CHANNELS;
+/// The number of limbs holding the value at a memory address.
+/// Eight limbs of 32 bits can hold a `U256`.
 pub(crate) const VALUE_LIMBS: usize = 8;
diff --git a/evm/src/proof.rs b/evm/src/proof.rs
index 43561e8c8b..c8e7214e88 100644
--- a/evm/src/proof.rs
+++ b/evm/src/proof.rs
@@ -23,36 +23,52 @@ use crate::cross_table_lookup::GrandProductChallengeSet;
 /// A STARK proof for each table, plus some metadata used to create recursive wrapper proofs.
 #[derive(Debug, Clone)]
 pub struct AllProof<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const D: usize> {
+    /// Proofs for all the different STARK modules.
     pub stark_proofs: [StarkProofWithMetadata<F, C, D>; NUM_TABLES],
+    /// Cross-table lookup challenges.
     pub(crate) ctl_challenges: GrandProductChallengeSet<F>,
+    /// Public memory values used for the recursive proofs.
     pub public_values: PublicValues,
 }
 
 impl<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const D: usize> AllProof<F, C, D> {
+    /// Returns the degree (i.e. the trace length) of each STARK.
     pub fn degree_bits(&self, config: &StarkConfig) -> [usize; NUM_TABLES] {
         core::array::from_fn(|i| self.stark_proofs[i].proof.recover_degree_bits(config))
     }
 }
 
+/// Randomness for all STARKs.
 pub(crate) struct AllProofChallenges<F: RichField + Extendable<D>, const D: usize> {
+    /// Randomness used in each STARK proof.
     pub stark_challenges: [StarkProofChallenges<F, D>; NUM_TABLES],
+    /// Randomness used for cross-table lookups. It is shared by all STARKs.
     pub ctl_challenges: GrandProductChallengeSet<F>,
 }
 
 /// Memory values which are public.
 #[derive(Debug, Clone, Default, Deserialize, Serialize)]
 pub struct PublicValues {
+    /// Trie hashes before the execution of the local state transition
     pub trie_roots_before: TrieRoots,
+    /// Trie hashes after the execution of the local state transition.
     pub trie_roots_after: TrieRoots,
+    /// Block metadata: it remains unchanged withing a block.
     pub block_metadata: BlockMetadata,
+    /// 256 previous block hashes and current block's hash.
     pub block_hashes: BlockHashes,
+    /// Extra block data that is specific to the current proof.
     pub extra_block_data: ExtraBlockData,
 }
 
+/// Trie hashes.
 #[derive(Debug, Clone, Default, Serialize, Deserialize)]
 pub struct TrieRoots {
+    /// State trie hash.
     pub state_root: H256,
+    /// Transaction trie hash.
     pub transactions_root: H256,
+    /// Receipts trie hash.
     pub receipts_root: H256,
 }
 
@@ -141,14 +157,20 @@ pub struct ExtraBlockData {
 /// Note: All the larger integers are encoded with 32-bit limbs in little-endian order.
 #[derive(Eq, PartialEq, Debug)]
 pub struct PublicValuesTarget {
+    /// Trie hashes before the execution of the local state transition.
     pub trie_roots_before: TrieRootsTarget,
+    /// Trie hashes after the execution of the local state transition.
     pub trie_roots_after: TrieRootsTarget,
+    /// Block metadata: it remains unchanged withing a block.
     pub block_metadata: BlockMetadataTarget,
+    /// 256 previous block hashes and current block's hash.
     pub block_hashes: BlockHashesTarget,
+    /// Extra block data that is specific to the current proof.
     pub extra_block_data: ExtraBlockDataTarget,
 }
 
 impl PublicValuesTarget {
+    /// Serializes public value targets.
     pub fn to_buffer(&self, buffer: &mut Vec<u8>) -> IoResult<()> {
         let TrieRootsTarget {
             state_root: state_root_before,
@@ -221,6 +243,7 @@ impl PublicValuesTarget {
         Ok(())
     }
 
+    /// Deserializes public value targets.
     pub fn from_buffer(buffer: &mut Buffer) -> IoResult<Self> {
         let trie_roots_before = TrieRootsTarget {
             state_root: buffer.read_target_array()?,
@@ -271,6 +294,9 @@ impl PublicValuesTarget {
         })
     }
 
+    /// Extracts public value `Target`s from the given public input `Target`s.
+    /// Public values are always the first public inputs added to the circuit,
+    /// so we can start extracting at index 0.
     pub fn from_public_inputs(pis: &[Target]) -> Self {
         assert!(
             pis.len()
@@ -308,6 +334,7 @@ impl PublicValuesTarget {
         }
     }
 
+    /// Returns the public values in `pv0` or `pv1` depening on `condition`.
     pub fn select<F: RichField + Extendable<D>, const D: usize>(
         builder: &mut CircuitBuilder<F, D>,
         condition: BoolTarget,
@@ -349,16 +376,24 @@ impl PublicValuesTarget {
     }
 }
 
+/// Circuit version of `TrieRoots`.
+/// `Target`s for trie hashes. Since a `Target` holds a 32-bit limb, each hash requires 8 `Target`s.
 #[derive(Eq, PartialEq, Debug, Copy, Clone)]
 pub struct TrieRootsTarget {
+    /// Targets for the state trie hash.
     pub state_root: [Target; 8],
+    /// Targets for the transactions trie hash.
     pub transactions_root: [Target; 8],
+    /// Targets for the receipts trie hash.
     pub receipts_root: [Target; 8],
 }
 
 impl TrieRootsTarget {
+    /// Number of `Target`s required for all trie hashes.
     pub const SIZE: usize = 24;
 
+    /// Extracts trie hash `Target`s for all tries from the provided public input `Target`s.
+    /// The provided `pis` should start with the trie hashes.
     pub fn from_public_inputs(pis: &[Target]) -> Self {
         let state_root = pis[0..8].try_into().unwrap();
         let transactions_root = pis[8..16].try_into().unwrap();
@@ -371,6 +406,8 @@ impl TrieRootsTarget {
         }
     }
 
+    /// If `condition`, returns the trie hashes in `tr0`,
+    /// otherwise returns the trie hashes in `tr1`.
     pub fn select<F: RichField + Extendable<D>, const D: usize>(
         builder: &mut CircuitBuilder<F, D>,
         condition: BoolTarget,
@@ -394,6 +431,7 @@ impl TrieRootsTarget {
         }
     }
 
+    /// Connects the trie hashes in `tr0` and in `tr1`.
     pub fn connect<F: RichField + Extendable<D>, const D: usize>(
         builder: &mut CircuitBuilder<F, D>,
         tr0: Self,
@@ -407,23 +445,39 @@ impl TrieRootsTarget {
     }
 }
 
+/// Circuit version of `BlockMetadata`.
+/// Metadata contained in a block header. Those are identical between
+/// all state transition proofs within the same block.
 #[derive(Eq, PartialEq, Debug, Copy, Clone)]
 pub struct BlockMetadataTarget {
+    /// `Target`s for the address of this block's producer.
     pub block_beneficiary: [Target; 5],
+    /// `Target` for the timestamp of this block.
     pub block_timestamp: Target,
+    /// `Target` for the index of this block.
     pub block_number: Target,
+    /// `Target` for the difficulty (before PoS transition) of this block.
     pub block_difficulty: Target,
+    /// `Target`s for the `mix_hash` value of this block.
     pub block_random: [Target; 8],
+    /// `Target`s for the gas limit of this block.
     pub block_gaslimit: [Target; 2],
+    /// `Target` for the chain id of this block.
     pub block_chain_id: Target,
+    /// `Target`s for the base fee of this block.
     pub block_base_fee: [Target; 2],
+    /// `Target`s for the gas used of this block.
     pub block_gas_used: [Target; 2],
+    /// `Target`s for the block bloom of this block.
     pub block_bloom: [Target; 64],
 }
 
 impl BlockMetadataTarget {
+    /// Number of `Target`s required for the block metadata.
     pub const SIZE: usize = 87;
 
+    /// Extracts block metadata `Target`s from the provided public input `Target`s.
+    /// The provided `pis` should start with the block metadata.
     pub fn from_public_inputs(pis: &[Target]) -> Self {
         let block_beneficiary = pis[0..5].try_into().unwrap();
         let block_timestamp = pis[5];
@@ -450,6 +504,8 @@ impl BlockMetadataTarget {
         }
     }
 
+    /// If `condition`, returns the block metadata in `bm0`,
+    /// otherwise returns the block metadata in `bm1`.
     pub fn select<F: RichField + Extendable<D>, const D: usize>(
         builder: &mut CircuitBuilder<F, D>,
         condition: BoolTarget,
@@ -486,6 +542,7 @@ impl BlockMetadataTarget {
         }
     }
 
+    /// Connects the block metadata in `bm0` to the block metadata in `bm1`.
     pub fn connect<F: RichField + Extendable<D>, const D: usize>(
         builder: &mut CircuitBuilder<F, D>,
         bm0: Self,
@@ -516,14 +573,29 @@ impl BlockMetadataTarget {
     }
 }
 
+/// Circuit version of `BlockHashes`.
+/// `Target`s for the user-provided previous 256 block hashes and current block hash.
+/// Each block hash requires 8 `Target`s.
+/// The proofs across consecutive blocks ensure that these values
+/// are consistent (i.e. shifted by eight `Target`s to the left).
+///
+/// When the block number is less than 256, dummy values, i.e. `H256::default()`,
+/// should be used for the additional block hashes.
 #[derive(Eq, PartialEq, Debug, Copy, Clone)]
 pub struct BlockHashesTarget {
+    /// `Target`s for the previous 256 hashes to the current block. The leftmost hash, i.e. `prev_hashes[0..8]`,
+    /// is the oldest, and the rightmost, i.e. `prev_hashes[255 * 7..255 * 8]` is the hash of the parent block.
     pub prev_hashes: [Target; 2048],
+    // `Target` for the hash of the current block.
     pub cur_hash: [Target; 8],
 }
 
 impl BlockHashesTarget {
+    /// Number of `Target`s required for previous and current block hashes.
     pub const BLOCK_HASHES_SIZE: usize = 2056;
+
+    /// Extracts the previous and current block hash `Target`s from the public input `Target`s.
+    /// The provided `pis` should start with the block hashes.
     pub fn from_public_inputs(pis: &[Target]) -> Self {
         Self {
             prev_hashes: pis[0..2048].try_into().unwrap(),
@@ -531,6 +603,8 @@ impl BlockHashesTarget {
         }
     }
 
+    /// If `condition`, returns the block hashes in `bm0`,
+    /// otherwise returns the block hashes in `bm1`.
     pub fn select<F: RichField + Extendable<D>, const D: usize>(
         builder: &mut CircuitBuilder<F, D>,
         condition: BoolTarget,
@@ -547,6 +621,7 @@ impl BlockHashesTarget {
         }
     }
 
+    /// Connects the block hashes in `bm0` to the block hashes in `bm1`.
     pub fn connect<F: RichField + Extendable<D>, const D: usize>(
         builder: &mut CircuitBuilder<F, D>,
         bm0: Self,
@@ -561,20 +636,38 @@ impl BlockHashesTarget {
     }
 }
 
+/// Circuit version of `ExtraBlockData`.
+/// Additional block data that are specific to the local transaction being proven,
+/// unlike `BlockMetadata`.
 #[derive(Eq, PartialEq, Debug, Copy, Clone)]
 pub struct ExtraBlockDataTarget {
+    /// `Target`s for the state trie digest of the genesis block.
     pub genesis_state_trie_root: [Target; 8],
+    /// `Target` for the transaction count prior execution of the local state transition, starting
+    /// at 0 for the initial trnasaction of a block.
     pub txn_number_before: Target,
+    /// `Target` for the transaction count after execution of the local state transition.
     pub txn_number_after: Target,
+    /// `Target` for the accumulated gas used prior execution of the local state transition, starting
+    /// at 0 for the initial transaction of a block.
     pub gas_used_before: [Target; 2],
+    /// `Target` for the accumulated gas used after execution of the local state transition. It should
+    /// match the `block_gas_used` value after execution of the last transaction in a block.
     pub gas_used_after: [Target; 2],
+    /// `Target`s for the accumulated bloom filter of this block prior execution of the local state transition,
+    /// starting with all zeros for the initial transaction of a block.
     pub block_bloom_before: [Target; 64],
+    /// `Target`s for the accumulated bloom filter after execution of the local state transition. It should
+    /// match the `block_bloom` value after execution of the last transaction in a block.
     pub block_bloom_after: [Target; 64],
 }
 
 impl ExtraBlockDataTarget {
+    /// Number of `Target`s required for the extra block data.
     const SIZE: usize = 142;
 
+    /// Extracts the extra block data `Target`s from the public input `Target`s.
+    /// The provided `pis` should start with the extra vblock data.
     pub fn from_public_inputs(pis: &[Target]) -> Self {
         let genesis_state_trie_root = pis[0..8].try_into().unwrap();
         let txn_number_before = pis[8];
@@ -595,6 +688,8 @@ impl ExtraBlockDataTarget {
         }
     }
 
+    /// If `condition`, returns the extra block data in `ed0`,
+    /// otherwise returns the extra block data in `ed1`.
     pub fn select<F: RichField + Extendable<D>, const D: usize>(
         builder: &mut CircuitBuilder<F, D>,
         condition: BoolTarget,
@@ -638,6 +733,7 @@ impl ExtraBlockDataTarget {
         }
     }
 
+    /// Connects the extra block data in `ed0` with the extra block data in `ed1`.
     pub fn connect<F: RichField + Extendable<D>, const D: usize>(
         builder: &mut CircuitBuilder<F, D>,
         ed0: Self,
@@ -666,6 +762,7 @@ impl ExtraBlockDataTarget {
     }
 }
 
+/// Merkle caps and openings that form the proof of a single STARK.
 #[derive(Debug, Clone)]
 pub struct StarkProof<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const D: usize> {
     /// Merkle cap of LDEs of trace values.
@@ -688,7 +785,9 @@ where
     F: RichField + Extendable<D>,
     C: GenericConfig<D, F = F>,
 {
+    /// Initial Fiat-Shamir state.
     pub(crate) init_challenger_state: <C::Hasher as Hasher<F>>::Permutation,
+    /// Proof for a single STARK.
     pub(crate) proof: StarkProof<F, C, D>,
 }
 
@@ -703,21 +802,30 @@ impl<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const D: usize> S
         lde_bits - config.fri_config.rate_bits
     }
 
+    /// Returns the number of cross-table lookup polynomials computed for the current STARK.
     pub fn num_ctl_zs(&self) -> usize {
         self.openings.ctl_zs_first.len()
     }
 }
 
+/// Circuit version of `StarkProof`.
+/// Merkle caps and openings that form the proof of a single STARK.
 #[derive(Eq, PartialEq, Debug)]
 pub struct StarkProofTarget<const D: usize> {
+    /// `Target` for the Merkle cap if LDEs of trace values.
     pub trace_cap: MerkleCapTarget,
+    /// `Target` for the Merkle cap of LDEs of lookup helper and CTL columns.
     pub auxiliary_polys_cap: MerkleCapTarget,
+    /// `Target` for the Merkle cap of LDEs of quotient polynomial evaluations.
     pub quotient_polys_cap: MerkleCapTarget,
+    /// `Target`s for the purported values of each polynomial at the challenge point.
     pub openings: StarkOpeningSetTarget<D>,
+    /// `Target`s for the batch FRI argument for all openings.
     pub opening_proof: FriProofTarget<D>,
 }
 
 impl<const D: usize> StarkProofTarget<D> {
+    /// Serializes a STARK proof.
     pub fn to_buffer(&self, buffer: &mut Vec<u8>) -> IoResult<()> {
         buffer.write_target_merkle_cap(&self.trace_cap)?;
         buffer.write_target_merkle_cap(&self.auxiliary_polys_cap)?;
@@ -727,6 +835,7 @@ impl<const D: usize> StarkProofTarget<D> {
         Ok(())
     }
 
+    /// Deserializes a STARK proof.
     pub fn from_buffer(buffer: &mut Buffer) -> IoResult<Self> {
         let trace_cap = buffer.read_target_merkle_cap()?;
         let auxiliary_polys_cap = buffer.read_target_merkle_cap()?;
@@ -754,6 +863,7 @@ impl<const D: usize> StarkProofTarget<D> {
     }
 }
 
+/// Randomness used for a STARK proof.
 pub(crate) struct StarkProofChallenges<F: RichField + Extendable<D>, const D: usize> {
     /// Random values used to combine STARK constraints.
     pub stark_alphas: Vec<F>,
@@ -761,12 +871,17 @@ pub(crate) struct StarkProofChallenges<F: RichField + Extendable<D>, const D: us
     /// Point at which the STARK polynomials are opened.
     pub stark_zeta: F::Extension,
 
+    /// Randomness used in FRI.
     pub fri_challenges: FriChallenges<F, D>,
 }
 
+/// Circuit version of `StarkProofChallenges`.
 pub(crate) struct StarkProofChallengesTarget<const D: usize> {
+    /// `Target`s for the random values used to combine STARK constraints.
     pub stark_alphas: Vec<Target>,
+    /// `ExtensionTarget` for the point at which the STARK polynomials are opened.
     pub stark_zeta: ExtensionTarget<D>,
+    /// `Target`s for the randomness used in FRI.
     pub fri_challenges: FriChallengesTarget<D>,
 }
 
@@ -788,6 +903,9 @@ pub struct StarkOpeningSet<F: RichField + Extendable<D>, const D: usize> {
 }
 
 impl<F: RichField + Extendable<D>, const D: usize> StarkOpeningSet<F, D> {
+    /// Returns a `StarkOpeningSet` given all the polynomial commitments, the number of permutation `Z`polynomials,
+    /// the evaluation point and a generator `g`.
+    /// Polynomials are evaluated at point `zeta` and, if necessary, at `g * zeta`.
     pub fn new<C: GenericConfig<D, F = F>>(
         zeta: F::Extension,
         g: F,
@@ -796,18 +914,21 @@ impl<F: RichField + Extendable<D>, const D: usize> StarkOpeningSet<F, D> {
         quotient_commitment: &PolynomialBatch<F, C, D>,
         num_lookup_columns: usize,
     ) -> Self {
+        // Batch evaluates polynomials on the LDE, at a point `z`.
         let eval_commitment = |z: F::Extension, c: &PolynomialBatch<F, C, D>| {
             c.polynomials
                 .par_iter()
                 .map(|p| p.to_extension().eval(z))
                 .collect::<Vec<_>>()
         };
+        // Batch evaluates polynomials at a base field point `z`.
         let eval_commitment_base = |z: F, c: &PolynomialBatch<F, C, D>| {
             c.polynomials
                 .par_iter()
                 .map(|p| p.eval(z))
                 .collect::<Vec<_>>()
         };
+        // `g * zeta`.
         let zeta_next = zeta.scalar_mul(g);
         Self {
             local_values: eval_commitment(zeta, trace_commitment),
@@ -821,6 +942,8 @@ impl<F: RichField + Extendable<D>, const D: usize> StarkOpeningSet<F, D> {
         }
     }
 
+    /// Constructs the openings required by FRI.
+    /// All openings but `ctl_zs_first` are grouped together.
     pub(crate) fn to_fri_openings(&self) -> FriOpenings<F, D> {
         let zeta_batch = FriOpeningBatch {
             values: self
@@ -855,17 +978,26 @@ impl<F: RichField + Extendable<D>, const D: usize> StarkOpeningSet<F, D> {
     }
 }
 
+/// Circuit version of `StarkOpeningSet`.
+/// `Target`s for the purported values of each polynomial at the challenge point.
 #[derive(Eq, PartialEq, Debug)]
 pub struct StarkOpeningSetTarget<const D: usize> {
+    /// `ExtensionTarget`s for the openings of trace polynomials at `zeta`.
     pub local_values: Vec<ExtensionTarget<D>>,
+    /// `ExtensionTarget`s for the opening of trace polynomials at `g * zeta`.
     pub next_values: Vec<ExtensionTarget<D>>,
+    /// `ExtensionTarget`s for the opening of lookups and cross-table lookups `Z` polynomials at `zeta`.
     pub auxiliary_polys: Vec<ExtensionTarget<D>>,
+    /// `ExtensionTarget`s for the opening of lookups and cross-table lookups `Z` polynomials at `g * zeta`.
     pub auxiliary_polys_next: Vec<ExtensionTarget<D>>,
+    /// /// `ExtensionTarget`s for the opening of lookups and cross-table lookups `Z` polynomials at 1.
     pub ctl_zs_first: Vec<Target>,
+    /// `ExtensionTarget`s for the opening of quotient polynomials at `zeta`.
     pub quotient_polys: Vec<ExtensionTarget<D>>,
 }
 
 impl<const D: usize> StarkOpeningSetTarget<D> {
+    /// Serializes a STARK's opening set.
     pub fn to_buffer(&self, buffer: &mut Vec<u8>) -> IoResult<()> {
         buffer.write_target_ext_vec(&self.local_values)?;
         buffer.write_target_ext_vec(&self.next_values)?;
@@ -876,6 +1008,7 @@ impl<const D: usize> StarkOpeningSetTarget<D> {
         Ok(())
     }
 
+    /// Deserializes a STARK's opening set.
     pub fn from_buffer(buffer: &mut Buffer) -> IoResult<Self> {
         let local_values = buffer.read_target_ext_vec::<D>()?;
         let next_values = buffer.read_target_ext_vec::<D>()?;
@@ -894,6 +1027,9 @@ impl<const D: usize> StarkOpeningSetTarget<D> {
         })
     }
 
+    /// Circuit version of `to_fri_openings`for `FriOpenings`.
+    /// Constructs the `Target`s the circuit version of FRI.
+    /// All openings but `ctl_zs_first` are grouped together.
     pub(crate) fn to_fri_openings(&self, zero: Target) -> FriOpeningsTarget<D> {
         let zeta_batch = FriOpeningBatchTarget {
             values: self
diff --git a/evm/src/prover.rs b/evm/src/prover.rs
index c5729a573f..fcf59738cc 100644
--- a/evm/src/prover.rs
+++ b/evm/src/prover.rs
@@ -90,6 +90,7 @@ where
     let rate_bits = config.fri_config.rate_bits;
     let cap_height = config.fri_config.cap_height;
 
+    // For each STARK, we compute the polynomial commitments for the polynomials interpolating its trace.
     let trace_commitments = timed!(
         timing,
         "compute all trace commitments",
@@ -115,6 +116,7 @@ where
             .collect::<Vec<_>>()
     );
 
+    // Get the Merkle caps for all trace commitments and observe them.
     let trace_caps = trace_commitments
         .iter()
         .map(|c| c.merkle_tree.cap.clone())
@@ -127,7 +129,9 @@ where
     observe_public_values::<F, C, D>(&mut challenger, &public_values)
         .map_err(|_| anyhow::Error::msg("Invalid conversion of public values."))?;
 
+    // Get challenges for the cross-table lookups.
     let ctl_challenges = get_grand_product_challenge_set(&mut challenger, config.num_challenges);
+    // For each STARK, compute its cross-table lookup Z polynomials and get the associated `CtlData`.
     let ctl_data_per_table = timed!(
         timing,
         "compute CTL data",
@@ -169,6 +173,13 @@ where
     })
 }
 
+/// Generates a proof for each STARK.
+/// At this stage, we have computed the trace polynomials commitments for the various STARKs,
+/// and we have the cross-table lookup data for each table, including the associated challenges.
+/// - `trace_poly_values` are the trace values for each STARK.
+/// - `trace_commitments` are the trace polynomials commitments for each STARK.
+/// - `ctl_data_per_table` group all the cross-table lookup data for each STARK.
+/// Each STARK uses its associated data to generate a proof.
 fn prove_with_commitments<F, C, const D: usize>(
     all_stark: &AllStark<F, D>,
     config: &StarkConfig,
@@ -293,7 +304,10 @@ where
     ])
 }
 
-/// Compute proof for a single STARK table.
+/// Computes a proof for a single STARK table, including:
+/// - the initial state of the challenger,
+/// - all the requires Merkle caps,
+/// - all the required polynomial and FRI argument openings.
 pub(crate) fn prove_single_table<F, C, S, const D: usize>(
     stark: &S,
     config: &StarkConfig,
@@ -350,6 +364,8 @@ where
     );
     let num_lookup_columns = lookup_helper_columns.as_ref().map(|v| v.len()).unwrap_or(0);
 
+    // We add CTLs to the permutation arguments so that we can batch commit to
+    // all auxiliary polynomials.
     let auxiliary_polys = match lookup_helper_columns {
         None => ctl_data.z_polys(),
         Some(mut lookup_columns) => {
@@ -359,6 +375,7 @@ where
     };
     assert!(!auxiliary_polys.is_empty(), "No CTL?");
 
+    // Get the polynomial commitments for all auxiliary polynomials.
     let auxiliary_polys_commitment = timed!(
         timing,
         "compute auxiliary polynomials commitment",
@@ -424,6 +441,7 @@ where
             })
             .collect()
     );
+    // Commit to the quotient polynomials.
     let quotient_commitment = timed!(
         timing,
         "compute quotient commitment",
@@ -436,6 +454,7 @@ where
             None,
         )
     );
+    // Observe the quotient polynomials Merkle cap.
     let quotient_polys_cap = quotient_commitment.merkle_tree.cap.clone();
     challenger.observe_cap(&quotient_polys_cap);
 
@@ -449,6 +468,7 @@ where
         "Opening point is in the subgroup."
     );
 
+    // Compute all openings: evaluate all commited polynomials at `zeta` and, when necessary, at `g * zeta`.
     let openings = StarkOpeningSet::new(
         zeta,
         g,
@@ -457,6 +477,7 @@ where
         &quotient_commitment,
         stark.num_lookup_helper_columns(config),
     );
+    // Get the FRI openings and observe them.
     challenger.observe_openings(&openings.to_fri_openings());
 
     let initial_merkle_trees = vec![
@@ -563,10 +584,12 @@ where
                 lagrange_basis_first,
                 lagrange_basis_last,
             );
+            // Get the local and next row evaluations for the current STARK.
             let vars = S::EvaluationFrame::from_values(
                 &get_trace_values_packed(i_start),
                 &get_trace_values_packed(i_next_start),
             );
+            // Get the local and next row evaluations for the permutation argument, as well as the associated challenges.
             let lookup_vars = lookup_challenges.map(|challenges| LookupCheckVars {
                 local_values: auxiliary_polys_commitment.get_lde_values_packed(i_start, step)
                     [..num_lookup_columns]
@@ -574,6 +597,13 @@ where
                 next_values: auxiliary_polys_commitment.get_lde_values_packed(i_next_start, step),
                 challenges: challenges.to_vec(),
             });
+
+            // Get all the data for this STARK's CTLs:
+            // - the local and next row evaluations for the CTL Z polynomials
+            // - the associated challenges.
+            // - for each CTL:
+            //     - the filter `Column`
+            //     - the `Column`s that form the looking/looked table.
             let ctl_vars = ctl_data
                 .zs_columns
                 .iter()
@@ -588,6 +618,9 @@ where
                     filter_column: &zs_columns.filter_column,
                 })
                 .collect::<Vec<_>>();
+
+            // Evaluate the polynomial combining all constraints, including those associated
+            // to the permutation and CTL arguments.
             eval_vanishing_poly::<F, F, P, S, D, 1>(
                 stark,
                 &vars,
@@ -661,6 +694,7 @@ fn check_constraints<'a, F, C, S, const D: usize>(
         transpose(&values)
     };
 
+    // Get batch evaluations of the trace, permutation and CTL polynomials over our subgroup.
     let trace_subgroup_evals = get_subgroup_evals(trace_commitment);
     let auxiliary_subgroup_evals = get_subgroup_evals(auxiliary_commitment);
 
@@ -682,16 +716,19 @@ fn check_constraints<'a, F, C, S, const D: usize>(
                 lagrange_basis_first,
                 lagrange_basis_last,
             );
+            // Get the local and next row evaluations for the current STARK's trace.
             let vars = S::EvaluationFrame::from_values(
                 &trace_subgroup_evals[i],
                 &trace_subgroup_evals[i_next],
             );
+            // Get the local and next row evaluations for the current STARK's permutation argument.
             let lookup_vars = lookup_challenges.map(|challenges| LookupCheckVars {
                 local_values: auxiliary_subgroup_evals[i][..num_lookup_columns].to_vec(),
                 next_values: auxiliary_subgroup_evals[i_next][..num_lookup_columns].to_vec(),
                 challenges: challenges.to_vec(),
             });
 
+            // Get the local and next row evaluations for the current STARK's CTL Z polynomials.
             let ctl_vars = ctl_data
                 .zs_columns
                 .iter()
@@ -704,6 +741,8 @@ fn check_constraints<'a, F, C, S, const D: usize>(
                     filter_column: &zs_columns.filter_column,
                 })
                 .collect::<Vec<_>>();
+            // Evaluate the polynomial combining all constraints, including those associated
+            // to the permutation and CTL arguments.
             eval_vanishing_poly::<F, F, F, S, D, 1>(
                 stark,
                 &vars,
@@ -716,6 +755,7 @@ fn check_constraints<'a, F, C, S, const D: usize>(
         })
         .collect::<Vec<_>>();
 
+    // Assert that all constraints evaluate to 0 over our subgroup.
     for v in constraint_values {
         assert!(
             v.iter().all(|x| x.is_zero()),
diff --git a/evm/src/vanishing_poly.rs b/evm/src/vanishing_poly.rs
index 2ea6010e83..2e1adfc742 100644
--- a/evm/src/vanishing_poly.rs
+++ b/evm/src/vanishing_poly.rs
@@ -14,6 +14,8 @@ use crate::lookup::{
 };
 use crate::stark::Stark;
 
+/// Evaluates all constraint, permutation and cross-table lookup polynomials
+/// of the current STARK at the local and next values.
 pub(crate) fn eval_vanishing_poly<F, FE, P, S, const D: usize, const D2: usize>(
     stark: &S,
     vars: &S::EvaluationFrame<FE, P, D2>,
@@ -27,8 +29,10 @@ pub(crate) fn eval_vanishing_poly<F, FE, P, S, const D: usize, const D2: usize>(
     P: PackedField<Scalar = FE>,
     S: Stark<F, D>,
 {
+    // Evaluate all of the STARK's table constraints.
     stark.eval_packed_generic(vars, consumer);
     if let Some(lookup_vars) = lookup_vars {
+        // Evaluate the STARK constraints related to the permutation arguments.
         eval_packed_lookups_generic::<F, FE, P, S, D, D2>(
             stark,
             lookups,
@@ -37,9 +41,13 @@ pub(crate) fn eval_vanishing_poly<F, FE, P, S, const D: usize, const D2: usize>(
             consumer,
         );
     }
+    // Evaluate the STARK constraints related to the cross-table lookups.
     eval_cross_table_lookup_checks::<F, FE, P, S, D, D2>(vars, ctl_vars, consumer);
 }
 
+/// Circuit version of `eval_vanishing_poly`.
+/// Evaluates all constraint, permutation and cross-table lookup polynomials
+/// of the current STARK at the local and next values.
 pub(crate) fn eval_vanishing_poly_circuit<F, S, const D: usize>(
     builder: &mut CircuitBuilder<F, D>,
     stark: &S,
@@ -51,9 +59,12 @@ pub(crate) fn eval_vanishing_poly_circuit<F, S, const D: usize>(
     F: RichField + Extendable<D>,
     S: Stark<F, D>,
 {
+    // Evaluate all of the STARK's table constraints.
     stark.eval_ext_circuit(builder, vars, consumer);
     if let Some(lookup_vars) = lookup_vars {
+        // Evaluate all of the STARK's constraints related to the permutation argument.
         eval_ext_lookups_circuit::<F, S, D>(builder, stark, vars, lookup_vars, consumer);
     }
+    // Evaluate all of the STARK's constraints related to the cross-table lookups.
     eval_cross_table_lookup_checks_circuit::<S, F, D>(builder, vars, ctl_vars, consumer);
 }
diff --git a/evm/src/witness/operation.rs b/evm/src/witness/operation.rs
index 95059ae3cc..2777269d89 100644
--- a/evm/src/witness/operation.rs
+++ b/evm/src/witness/operation.rs
@@ -55,6 +55,9 @@ pub(crate) enum Operation {
     MstoreGeneral,
 }
 
+/// Adds a CPU row filled with the two inputs and the output of a logic operation.
+/// Generates a new logic operation and adds it to the vector of operation in `LogicStark`.
+/// Adds three memory read operations to `MemoryStark`: for the two inputs and the output.
 pub(crate) fn generate_binary_logic_op<F: Field>(
     op: logic::Op,
     state: &mut GenerationState<F>,