Skip to content

Commit

Permalink
WIP Add limit for nested data structures
Browse files Browse the repository at this point in the history
YAML is not directly vulnerable to the Billion Laughs Attack:
https://en.wikipedia.org/wiki/Billion_laughs_attack#Variations

As long as the constructor uses perl references, everything is fine.

But if you print such data (for example as JSON), then it will get
huge, as JSON does not know references.

This commit is adding an experimental option that limits nested data.
Every anchor that is created gets assigned a number (its depth):
    &anchor [x] # 1
    &anchor [[x]] # 2

Whenever an alias is used, it will increase the total sum by its
depth.
Whenever an alias is used inside of another anchor, the depth of
this anchor will be increased by the alias depth.

    - &a [[x]]       # *a = 2
    - &b [*a,*a,*a]  # *b = 2+2+2 + 1 = 7  total=6
    - &c [*b]        # *c = 7 + 1          total=6+7=13

The total sum of depths must not be higher then a maximum which is
currently 256 by default.
  • Loading branch information
perlpunk committed Nov 9, 2023
1 parent 8a2ccd5 commit 19ecfc3
Show file tree
Hide file tree
Showing 8 changed files with 366 additions and 3 deletions.
10 changes: 10 additions & 0 deletions etc/memory/billion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
a: &a ["lol","lol","lol","lol","lol","lol","lol","lol","lol"]
b: &b [*a,*a,*a,*a,*a,*a,*a,*a,*a]
c: &c [*b,*b,*b,*b,*b,*b,*b,*b,*b]
d: &d [*c,*c,*c,*c,*c,*c,*c,*c,*c]
e: &e [*d,*d,*d,*d,*d,*d,*d,*d,*d]
f: &f [*e,*e,*e,*e,*e,*e,*e,*e,*e]
g: &g [*f,*f,*f,*f,*f,*f,*f,*f,*f]
h: &h [*g,*g,*g,*g,*g,*g,*g,*g,*g]
i: &i [*h,*h,*h,*h,*h,*h,*h,*h,*h]
231 changes: 231 additions & 0 deletions etc/memory/long-string.yaml

Large diffs are not rendered by default.

40 changes: 40 additions & 0 deletions etc/memory/test.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/perl
use strict;
use warnings;
use 5.010;
use FindBin '$Bin';
use lib "$Bin/../../lib";

use YAML::PP;
use JSON::PP;

my ($file, $limit) = @ARGV;

$limit ||= 1024;

my $yp = YAML::PP->new(
limit => {
alias_depth => $limit,
},
);
my $j = JSON::PP->new;

my $start = size();
say "Memory at start: $start";

my $data = $yp->load_file($file);
my $mem_load = size();
my $growth = $mem_load - $start;
say "After load: $mem_load (+$growth)";

my $json = $j->encode($data);
my $mem_json = size();
$growth = $mem_json - $mem_load;
say "After json encode: $mem_json (+$growth)";
say sprintf "length JSON: %s bytes", length $json;

sub size {
my $s = qx{ps --no-headers -o vsize:3 --pid $$};
chomp $s;
return $s;
}
2 changes: 2 additions & 0 deletions lib/YAML/PP.pm
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ sub new {
$bool = 'perl' unless defined $bool;
my $schemas = delete $args{schema} || ['+'];
my $cyclic_refs = delete $args{cyclic_refs} || 'fatal';
my $limit = delete $args{limit};
my $indent = delete $args{indent};
my $width = delete $args{width};
my $writer = delete $args{writer};
Expand Down Expand Up @@ -69,6 +70,7 @@ sub new {
default_yaml_version => $default_yaml_version,
preserve => $preserve,
duplicate_keys => $duplicate_keys,
limit => $limit,
);
my $dumper = YAML::PP::Dumper->new(
schema => $default_schema,
Expand Down
34 changes: 31 additions & 3 deletions lib/YAML/PP/Constructor.pm
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ sub new {
$preserve = PRESERVE_ORDER | PRESERVE_SCALAR_STYLE | PRESERVE_FLOW_STYLE | PRESERVE_ALIAS;
}
my $cyclic_refs = delete $args{cyclic_refs} || 'fatal';
my $limit = delete $args{limit} || {};
die "Invalid value for cyclic_refs: $cyclic_refs"
unless $cyclic_refs{ $cyclic_refs };
my $schemas = delete $args{schemas};
Expand All @@ -44,6 +45,7 @@ sub new {
cyclic_refs => $cyclic_refs,
preserve => $preserve,
duplicate_keys => $duplicate_keys,
alias_depth => $limit->{alias_depth} || 1024,
}, $class;
$self->init;
return $self;
Expand Down Expand Up @@ -107,6 +109,7 @@ sub document_start_event {
}
my $ref = [];
push @$stack, { type => 'document', ref => $ref, data => $ref, event => $event };
$self->{alias_depth_count} = 0;
}

sub document_end_event {
Expand Down Expand Up @@ -155,7 +158,8 @@ sub mapping_start_event {
$t->{alias} = $anchor;
}
}
$self->anchors->{ $anchor } = { data => $ref->{data} };
$self->anchors->{ $anchor } = { data => $ref->{data}, alias_depth => 1 };
$self->{open_anchors}->{ $anchor } = 1;
}
}

Expand Down Expand Up @@ -218,8 +222,12 @@ sub mapping_end_event {
};
$on_data->($self, $data, \@ref);
push @{ $stack->[-1]->{ref} }, $$data;
my $depth = ($last->{depth} || 0) + 1;
$stack->[-1]->{depth} = $depth;
if (defined(my $anchor = $last->{event}->{anchor})) {
$self->anchors->{ $anchor }->{finished} = 1;
$self->anchors->{ $anchor }->{alias_depth} *= $depth;
delete $self->{open_anchors}->{ $anchor };
}
return;
}
Expand Down Expand Up @@ -253,7 +261,8 @@ sub sequence_start_event {
$t->{alias} = $anchor;
}
}
$self->anchors->{ $anchor } = { data => $ref->{data} };
$self->anchors->{ $anchor } = { data => $ref->{data}, alias_depth => 1 };
$self->{open_anchors}->{ $anchor } = 1;
}
}

Expand All @@ -270,9 +279,13 @@ sub sequence_end_event {
};
$on_data->($self, $data, $ref);
push @{ $stack->[-1]->{ref} }, $$data;
my $depth = ($last->{depth} || 0) + 1;
$stack->[-1]->{depth} = $depth;
if (defined(my $anchor = $last->{event}->{anchor})) {
my $test = $self->anchors->{ $anchor };
$self->anchors->{ $anchor }->{finished} = 1;
$self->anchors->{ $anchor }->{alias_depth} *= $depth;
delete $self->{open_anchors}->{ $anchor };
}
return;
}
Expand Down Expand Up @@ -306,7 +319,12 @@ sub scalar_event {
$value = YAML::PP::Preserve::Scalar->new( %args );
}
if (defined (my $name = $event->{anchor})) {
$self->anchors->{ $name } = { data => \$value, finished => 1 };
my $d = int( length( $event->{value} ) / 1000 ) + 1;
$self->anchors->{ $name } = {
data => \$value,
finished => 1,
alias_depth => $d,
};
}
push @{ $last->{ref} }, $value;
}
Expand Down Expand Up @@ -334,6 +352,16 @@ sub alias_event {
}
}
$value = $anchor->{data};
if (my $open = $self->{open_anchors}) {
for my $n (sort keys %$open) {
$self->anchors->{ $n }->{alias_depth}
+= $anchor->{alias_depth} || 1;
}
}
$self->{alias_depth_count} += $anchor->{alias_depth} || 1;
if ($self->{alias_depth_count} > $self->{alias_depth}) {
die "Limit of nested aliases reached for alias '$name': $self->{alias_depth_count}";
}
}
else {
croak "No anchor defined for alias '$name'";
Expand Down
2 changes: 2 additions & 0 deletions lib/YAML/PP/Loader.pm
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@ sub new {
boolean => 'perl',
)
};
my $limit = delete $args{limit};

my $constructor = delete $args{constructor} || YAML::PP::Constructor->new(
schemas => $schemas,
cyclic_refs => $cyclic_refs,
default_yaml_version => $default_yaml_version,
preserve => $preserve,
duplicate_keys => $duplicate_keys,
limit => $limit,
);
my $parser = delete $args{parser};
unless ($parser) {
Expand Down
40 changes: 40 additions & 0 deletions t/54.alias-bomb.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More;
use Test::Deep;
use FindBin '$Bin';
use Data::Dumper;
use YAML::PP;

my $file = "$Bin/data/billion.yaml";

subtest "nested aliases limit reached" => sub {
my $yp = YAML::PP->new(
schema => ['Failsafe'],
limit => {
alias_depth => 100,
},
);

eval {
my $data = $yp->load_file($file);
};
my $error = $@;
note $error;
like($error, qr{Limit of nested aliases reached});
};

subtest "nested aliases ok" => sub {
my $yp = YAML::PP->new(
schema => ['Failsafe'],
limit => {
alias_depth => 1000_000_000,
},
);

my $data = $yp->load_file($file);
is($data->{data}->{i}->[0]->[0]->[0]->[0]->[0]->[0]->[0]->[0]->[0], 'lol');
};

done_testing;
10 changes: 10 additions & 0 deletions t/data/billion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
data:
a: &a [lol,lol,lol,lol,lol,lol,lol,lol,lol,lol]
b: &b [*a,*a,*a,*a,*a,*a,*a,*a,*a,*a]
c: &c [*b,*b,*b,*b,*b,*b,*b,*b,*b,*b]
d: &d [*c,*c,*c,*c,*c,*c,*c,*c,*c,*c]
e: &e [*d,*d,*d,*d,*d,*d,*d,*d,*d,*d]
f: &f [*e,*e,*e,*e,*e,*e,*e,*e,*e,*e]
g: &g [*f,*f,*f,*f,*f,*f,*f,*f,*f,*f]
h: &h [*g,*g,*g,*g,*g,*g,*g,*g,*g,*g]
i: &i [*h,*h,*h,*h,*h,*h,*h,*h,*h,*h]

0 comments on commit 19ecfc3

Please sign in to comment.