Skip to content

Commit

Permalink
Deals with Invalid XML characters in oai_dc and other OAI-PMH formats.
Browse files Browse the repository at this point in the history
  • Loading branch information
drn05r committed Nov 24, 2021
1 parent e153792 commit 94b2b57
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 7 deletions.
14 changes: 7 additions & 7 deletions flavours/pub_lib/plugins/EPrints/Plugin/Export/DC.pm
Expand Up @@ -102,7 +102,7 @@ sub convert_dataobj
foreach my $creator ( @{$creators} )
{
next if !defined $creator;
push @dcdata, [ "creator", EPrints::Utils::make_name_string( $creator ) ];
push @dcdata, [ "creator", EPrints::XML::remove_invalid_chars( EPrints::Utils::make_name_string( $creator ) ) ];
}
}
}
Expand All @@ -115,7 +115,7 @@ sub convert_dataobj
my $subject = EPrints::DataObj::Subject->new( $plugin->{session}, $subjectid );
# avoid problems with bad subjects
next unless( defined $subject );
push @dcdata, [ "subject", EPrints::Utils::tree_to_utf8( $subject->render_description() ) ];
push @dcdata, [ "subject", EPrints::XML::remove_invalid_chars( EPrints::Utils::tree_to_utf8( $subject->render_description() ) ) ];
}
}

Expand All @@ -129,7 +129,7 @@ sub convert_dataobj
{
foreach my $editor ( @{$editors} )
{
push @dcdata, [ "contributor", EPrints::Utils::make_name_string( $editor ) ];
push @dcdata, [ "contributor", EPrints::XML::remove_invalid_chars( EPrints::Utils::make_name_string( $editor ) ) ];
}
}
}
Expand Down Expand Up @@ -172,7 +172,7 @@ sub convert_dataobj

# The citation for this eprint
push @dcdata, [ "identifier",
EPrints::Utils::tree_to_utf8( $eprint->render_citation( 'default', %params ) ) ];
EPrints::XML::remove_invalid_chars( EPrints::Utils::tree_to_utf8( $eprint->render_citation( 'default', %params ) ) ) ];

# Most commonly a DOI or journal link
push @dcdata, $plugin->simple_value( $eprint, official_url => "relation" );
Expand Down Expand Up @@ -216,18 +216,18 @@ sub simple_value
$langs = [$langs] if ref($values) ne "ARRAY";
foreach my $i (0..$#$values)
{
push @dcdata, [ $term, $values->[$i], { 'xml:lang' => $langs->[$i] } ];
push @dcdata, [ $term, EPrints::XML::remove_invalid_chars( $values->[$i] ), { 'xml:lang' => $langs->[$i] } ];
}
}
elsif( $field->property( "multiple" ) )
{
push @dcdata, map {
[ $term, $_ ]
[ $term, EPrints::XML::remove_invalid_chars( $_ ) ]
} @{ $field->get_value( $eprint ) };
}
else
{
push @dcdata, [ $term, $field->get_value( $eprint ) ];
push @dcdata, [ $term, EPrints::XML::remove_invalid_chars( $field->get_value( $eprint ) ) ];
}

return @dcdata;
Expand Down
9 changes: 9 additions & 0 deletions perl_lib/EPrints/XML.pm
Expand Up @@ -939,6 +939,15 @@ sub debug_xml
print STDERR "<\n";
}

sub remove_invalid_chars
{
my( $value ) = @_;

$value =~ s/[\x00-\x08\x0B\x0C\x0E-\x19]//g;

return $value;
}

sub is_empty
{
my( $node ) = @_;
Expand Down

0 comments on commit 94b2b57

Please sign in to comment.