From 12deff500b73c9807ec6a4faf74140d104e5745c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tina=20M=C3=BCller?= Date: Thu, 28 Dec 2023 00:32:25 +0100 Subject: [PATCH] WIP --- lib/YAML/PP/Loader.pm | 8 +++++++- lib/YAML/PP/Reader.pm | 29 +++++++++++++++++++++++------ t/58.utf8.t | 16 ++++++++-------- 3 files changed, 38 insertions(+), 15 deletions(-) diff --git a/lib/YAML/PP/Loader.pm b/lib/YAML/PP/Loader.pm index 5f8f623e..fe192600 100644 --- a/lib/YAML/PP/Loader.pm +++ b/lib/YAML/PP/Loader.pm @@ -79,7 +79,13 @@ sub filename { sub load_string { my ($self, $yaml) = @_; - $self->parser->set_reader(YAML::PP::Reader->new( input => $yaml, utf8 => $self->{utf8} )); + my $utf8 = $self->{utf8}; + my $p = $self->parser; + $self->parser->set_reader(YAML::PP::Reader->new( + input => $yaml, + utf8_in => $self->{utf8}, + utf8_out => 0, + )); $self->load(); } diff --git a/lib/YAML/PP/Reader.pm b/lib/YAML/PP/Reader.pm index 13c4e191..88c99ed8 100644 --- a/lib/YAML/PP/Reader.pm +++ b/lib/YAML/PP/Reader.pm @@ -12,17 +12,34 @@ sub set_input { $_[0]->{input} = $_[1] } sub new { my ($class, %args) = @_; my $input = delete $args{input}; - my $utf8 = delete $args{utf8}; - $utf8 = 0 unless defined $utf8; + my $utf8_in = delete $args{utf8_in}; + my $utf8_out = delete $args{utf8_out}; + $utf8_in = 0 unless defined $utf8_in; + $utf8_out = 0 unless defined $utf8_out; if (keys %args) { die "Unexpected arguments: " . join ', ', sort keys %args; } - if ($utf8) { - $input = decode 'UTF-8', $input, Encode::FB_CROAK; - } - return bless { + my $self = bless { + utf8_in => $utf8_in, + utf8_out => $utf8_out, input => $input, }, $class; + $self->_prepare_input; + return $self; +} + +sub _prepare_input { + my ($self) = @_; + if ($self->{utf8_in} and $self->{utf8_out} == 0) { + warn __PACKAGE__.':'.__LINE__.": !!!!!!!!! DECODE '$self->{input}'\n"; + warn __PACKAGE__.':'.__LINE__.$".Data::Dumper->Dump([\$self], ['self']); + $self->{input} = decode 'UTF-8', $self->{input}, Encode::FB_CROAK; + } + elsif ($self->{utf8_in} == 0 and $self->{utf8_out}) { + warn __PACKAGE__.':'.__LINE__.": !!!!!!!!! ENCODE '$self->{input}'\n"; + warn __PACKAGE__.':'.__LINE__.$".Data::Dumper->Dump([\$self], ['self']); + $self->{input} = encode 'UTF-8', $self->{input}, Encode::FB_CROAK; + } } sub read { diff --git a/t/58.utf8.t b/t/58.utf8.t index 12f4f97f..c523a6c2 100644 --- a/t/58.utf8.t +++ b/t/58.utf8.t @@ -23,8 +23,8 @@ EOM my $perl = decode_utf8 $utf8; -my $bear = "bär"; -my $bear_perl = decode_utf8 $bear; +my $bear_utf8 = "bär"; +my $bear_perl = decode_utf8 $bear_utf8; subtest 'load unicode' => sub { my $data = $p_utf8->load_string($utf8); @@ -40,7 +40,7 @@ subtest 'load unicode' => sub { is $data->[0], $bear_perl, 'load decoded with perl loader'; $data = $p_perl->load_string($utf8); - is $data->[0], $bear, 'load utf8 with perl loader'; + is $data->[0], $bear_utf8, 'load utf8 with perl loader'; $data = $p_default->load_string($perl); is $data->[0], $bear_perl, 'load decoded with default loader'; @@ -52,19 +52,19 @@ subtest 'load unicode' => sub { subtest 'dump unicode' => sub { my $yaml = $p_utf8->dump_string([$bear_perl]); $yaml =~ s/^- //; chomp $yaml; - is $yaml, $bear, 'dump perl data with utf8 dumper -> utf8'; + is $yaml, $bear_utf8, 'dump perl data with utf8 dumper -> utf8'; - $yaml = $p_utf8->dump_string([$bear]); + $yaml = $p_utf8->dump_string([$bear_utf8]); $yaml =~ s/^- //; chomp $yaml; - is $yaml, encode_utf8($bear), 'dump utf8 data with utf8 dumper -> rubbish'; + is $yaml, encode_utf8($bear_utf8), 'dump utf8 data with utf8 dumper -> rubbish'; $yaml = $p_perl->dump_string([$bear_perl]); $yaml =~ s/^- //; chomp $yaml; is $yaml, $bear_perl, 'dump perl data with perl dumper -> perl'; - $yaml = $p_perl->dump_string([$bear]); + $yaml = $p_perl->dump_string([$bear_utf8]); $yaml =~ s/^- //; chomp $yaml; - $yaml, $bear, 'dump utf8 data with perl dumper -> utf8'; + $yaml, $bear_utf8, 'dump utf8 data with perl dumper -> utf8'; }; done_testing;