[EP-tech] Re: RoMEO autocomplete
Ian Stuart
Ian.Stuart at ed.ac.uk
Fri Apr 11 08:38:34 BST 2014
On 10/04/14 18:05, Gilles Fournié wrote:
> Hi,
>
> We would like to add an autocompletion for the journal fields.
>
> As explained in the Wiki page
> http://wiki.eprints.org/w/Autocompletion_and_Authority_Files_%28Romeo_Autocomplete%29
> we have downloaded and used the file found at
> http://romeo.eprints.org/romeo_journals.autocomplete.
>
> The solution works well.
>
> But we realized that the file romeo_journals.autocomplete is old. Its
> date is Jan 19, 2009. Of course, we checked its content and we noticed
> that journals we can find on the Sherpa/RoMEO website are not in the
> file. So, it seems it has not been updated for a while.
>
> Does anybody know if there is a plan to update it ? Or if we can find
> another one elsewhere ?
I have two files for you, which relate to the wiki page
http://wiki.eprints.org/w/Adding_an_Auto-Completer_to_a_non-workflow_page
update_romeo_publishers lives in eprints/~~/bin and should be run daily
- it creates a set of lookup files
get_journals lives in eprints/~~/cgi and is the bit that does the AJAX
response stuff, using the files created above
--
Ian Stuart.
Developer: ORI, RJ-Broker, and OpenDepot.org
Bibliographics and Multimedia Service Delivery team,
EDINA,
The University of Edinburgh.
http://edina.ac.uk/
This email was sent via the University of Edinburgh.
The University of Edinburgh is a charitable body, registered in
Scotland, with registration number SC005336.
-------------- next part --------------
use strict;
use HTTP::Request;
use LWP::UserAgent;
use XML::Twig;
use File::Slurp;
use EPrints;
my $varpath = $EPrints::SystemSettings::conf->{base_path}.'/var/romeopub';
my $mapfile = "$varpath/map.txt";
my $pub_map = {};
open( MAP, $mapfile ) || die "can't read mapfile: $mapfile: $!";
while(<MAP>)
{
chomp;
m/^(\d+) (.*)/;
$pub_map->{$2} = $1;
}
close MAP;
#########################
# Some global variables #
#########################
my $journal_data = {};
#######################
# Various subroutines #
#######################
sub urldecode{
my ($url) = @_;
$url =~ s/%([0-9a-f][0-9a-f])/pack("C",hex($1))/egi;
$url =~ s/\x2B/ /; # swap '+' for ' '
return $url;
}
# XML::Twig's routine for dealing with a journal entry
sub process_journal {
my ( $twig, $journal ) = @_;
# get the components
my $title = urldecode( $journal->first_child('jtitle')->text );
my $zetoc = urldecode( $journal->first_child('zetocpub')->text )
if $journal->first_child('zetocpub');
my $romeo = urldecode( $journal->first_child('romeopub')->text )
if $journal->first_child('romeopub');
my $issn = urldecode( $journal->first_child('issn')->text )
if $journal->first_child('issn');
my $publisher = $romeo;
$publisher = $zetoc if (not $publisher && $zetoc);
my $conditions = qq(<div class='romeo_message'><div class='romeo_yellow_content'><table width="100%"><tbody><tr><td><img class='romeo_message_icon' src='/style/images/alert.png' alt='Archiving of pre- and post-prints is not as straight forward as it could be.'></td><td><div class='publishers'><div style='float:right; padding:5px; width: 10em'><img src='http://www.sherpa.ac.uk/images/romeotiny.jpg' alt='SHERPA/RoMEO logo' title='Record data from the SHERPA/RoMEO database' /><p style='font-size:75%;'>SHERPA/Romeo is a project that categorises publisher policies on OA archiving.</p></div> <p>The publishers conditions are not defined.</p></div></td></tr></table></div></div>);
if( defined $pub_map->{$publisher} )
{
my $fn = $varpath."/".$pub_map->{$publisher}.".xhtml";
$conditions = read_file($fn);
}
# build a lub of html based on the components
my $html .= "<li>$title";
$html .= "<br />published by $publisher" if $publisher;
$title = "" unless $title;
$publisher = "" unless $publisher;
$issn = "" unless $issn;
$html .= "<ul>";
$html .= "<li id='for:value:component:_publication'>$title</li>";
$html .= "<li id='for:value:component:_publisher'>$publisher</li>";
$html .= "<li id='for:value:component:_issn'>$issn</li>";
$html .= "<li id='for:block:absolute:publisher_policy'>$conditions</li>";
$html .= "</ul></li>\n";
# save the html
$journal_data->{$title} = $html;
1;
} ## end process_journal
# get a list of journals that match the query
sub get_journals {
my $journal = shift;
my @html = ();
if (!$journal)
{
return "<!-- No journal name supplied -->\n";
}
return ("<ul><li>keep typing....</li></ul>") if (length($journal) < 3);
$journal =~ s/([^a-z0-9])/sprintf("%%%02X",ord($1))/ige;
my $query = "http://www.sherpa.ac.uk/romeo/api29.php?qtype=starts&jtitle=$journal&ak=hC0DitNXMJA";
my $request = HTTP::Request->new( GET => "$query" );
my $ua = LWP::UserAgent->new();
my $response = $ua->request($request);
my $content = $response->content();
my $twig = XML::Twig->new(
'keep_encoding' => 1,
'TwigRoots' => { 'journals' => 1 },
'TwigHandlers' => { 'journal' => \&process_journal, }
);
$twig->parse($content);
if (!scalar keys %{$journal_data})
{
push @html, "<!-- no matches -->";
return (join "\n", @html)
}
push @html, "<ul class='journals'>\n";
foreach my $title (sort keys %{$journal_data})
{
push @html, "$journal_data->{$title}\n";
} ## end of foreach my $title (sort keys %{$journal_data})
push @html, "</ul>\n";
return (join "\n", @html)
} ## end get_journals
my $session = EPrints::Session->new();
# we need the send an initial content-type
print <<END;
<?xml version="1.0" encoding="UTF-8" ?>
END
# then we send the fragment of html for the autocompleter
my $q = "";
$q = lc $session->param( "q" );
print get_journals( $q );
#print STDERR get_journals( $q );
$session->terminate;
-------------- next part --------------
#!/home/cpan/bin/perl -w -I/home/oarj/eprints/perl_lib
use strict;
use utf8;
use HTTP::Request;
use LWP::UserAgent;
use XML::Twig;
use EPrints::SystemSettings;
use Data::Dumper;
my $publisher_data = {};
my $pub_map = {};
sub get_romeo_pub_ids
{
my $query = "http://www.sherpa.ac.uk/romeo/api.php?all=yes&ak=<your_key>";
my $request = HTTP::Request->new( GET => "$query" );
my $ua = LWP::UserAgent->new();
my $response = $ua->request($request);
my $content = $response->content();
my @contents = split /\n/, $content;
my @pubids = ();
foreach (@contents) {
if( m/id="([0-9]+)"/ ) { push @pubids, $1; }
};
warn ("pubids: ".scalar @pubids."\n");
if( scalar @pubids < 100 ) { die "urk, not enough pubids"; }
return @pubids;
}
my %depositing = (
'pre' => {
'can' => 'It permits archiving of preprints',
'cannot' => 'It prohibits archiving of preprints',
'restricted' => 'It permits OA archiving of preprints subject to restrictions (see below)',
'unclear' => 'Its policy on OA archiving of preprints is unclear. Please check the publisher policy (see link below)'
},
'post' => {
'can' => 'It permits archiving of postprints',
'cannot' => 'It prohibits archiving of postprints',
'restricted' => 'It permits OA archiving of postprints subject to restrictions (see below)',
'unclear' => 'Its policy on OA archiving of postprints is unclear. Please check the publisher policy (see link below)'
}
);
##<strong>pre-print</strong>
#######################
# Various subroutines #
#######################
sub process_prints {
my ($which, $print_twig) = @_;
my $text;
my $permission = $print_twig->first_child("${which}archiving")->text;
my @restrictions = $print_twig->first_child("${which}restrictions")->children if $print_twig->first_child("${which}restrictions");
if ($permission) {
$text = "<dl><dt>".$depositing{$which}{$permission}."</dt>\n";
if (scalar @restrictions) {
$text .= "<dd>The publisher defines the following restriction:\n<ul>\n";
foreach my $restriction (@restrictions) {
$text .= "<li>".$restriction->text."</li>\n";
} ## end of foreach restriction
$text .= "</ul>\n</dd>\n";
} ## end of if scalar restrictions
$text .= "</dl>\n";
} ## end of if $permission
return $text;
}
# XML::Twig's routine for dealing with a journal entry
sub process_publisher {
my ( $twig, $publisher ) = @_;
# get the components
my ($pubid, $name, $homeurl, $romeocolour, $copyright, $alias, $permission);
my @restrictions;
$name = $publisher->first_child('name')->text;
$pubid = $publisher->{att}->{id};
$pub_map->{$name} = $pubid;
$homeurl = $publisher->first_child('homeurl')->text if $publisher->first_child('homeurl');
$romeocolour = $publisher->first_child('romeocolour')->text if $publisher->first_child('romeocolour');
$copyright = $publisher->first_child('copyright')->text if $publisher->first_child('copyright');
#$alias = $publisher->first_child('alias')->text if $publisher->first_child('alias');
my @conditions = $publisher->first_child('conditions')->children;
my @mandates = $publisher->first_child('mandates')->children;
# build a lump of html based on the data returned.
my $html ;
$html .= "<div class='romeo_message'>";
if ($romeocolour eq 'green') {
$html .= "<div class='romeo_green_content'><table style='width:100%; border:1px solid blue;'><tbody><tr><td><img class='romeno_message_icon' src='/style/images/good.png' alt='Archiving of pre- and post-prints is permitted'>";
} elsif ($romeocolour eq 'red') {
$html .= "<div class='romeo_orange_content'><table style='width:100%; border:1px solid red;'><tbody><tr><td><img class='romeo_message_icon' src='/style/images/warning.png' alt='Archiving of pre- and post-prints is not permitted'>";
} else {
$html .= "<div class='romeo_yellow_content'><table style='width:100%; border:1px solid yellow;'><tbody><tr><td><img class='romeo_message_icon' src='/style/images/alert.png' alt='Archiving of pre- and post-prints is not as straight forward as it could be.'>";
}
$html .= "</td><td><div class='publishers'>";
$html .= "<div style='float:right; padding:5px; width: 10em'><img src='http://www.sherpa.ac.uk/images/romeotiny.jpg' alt='SHERPA/RoMEO logo' title='Record data from the SHERPA/RoMEO database' /><p style='font-size:75%;'>SHERPA/Romeo is a project that categorises publisher policies on OA archiving.</p></div>\n";
$html .= "<p>This journal is published by ";
if ($homeurl) {
$html .= "<a href='$homeurl' title='Link to the publishers home page. NOTE: this will open a new window.' target='_new'>$name</a>.";
} else {
$html .= $name."."
}
#$html .= "<br />(this publisher is also known as $alias)" if ($alias);
$html .= "<br />\nAccording to the Sherpa/Romeo database, the following conditions apply:</p><dl>";
$html .= "<dd>".process_prints('pre', $publisher->first_child('preprints'))."</dd>\n";
$html .= "<dd>".process_prints('post', $publisher->first_child('postprints'))."</dd>\n";
$html .= "</dl>\n";
# if we have any general conditions, we need to add them to the data-set
if (scalar @conditions) {
$html .= "<p>The publisher also defines the following general conditions</p>\n<dl>\n";
foreach my $condition (@conditions) {
$html .= "<dd>".$condition->text."</dd>\n";
} ## end of foreach condition
$html .= "</dl>\n";
}; ## end of if conditions
if (scalar @mandates) {
$html .= "<p>Juliet has records on the following mandates:</p>\n<dl>\n";
foreach my $mandate (@mandates) {
my $funder = $mandate->first_child('funder');
my $julieturl = $funder->first_child('julieturl')->text;
if ( $funder->first_child('funderacronym') ) {
$html .= "<dd><a title='Opens new window.' href='$julieturl' onclick=\"dialog = window.open('$julieturl','dialogwindow','directories=no,menubar=no,scrollbars=yes,taskbar=no,resizable=yes,location=no,status=no,toolbar=no;');dialog.focus(); return false\">".$funder->first_child('fundername')->text."'</a> (".$funder->first_child('funderacronym')->text.")</dd>\n";
} else {
$html .= "<dd><a title='Opens new window.' href='$julieturl' onclick=\"dialog = window.open('$julieturl','dialogwindow','directories=no,menubar=no,scrollbars=yes,taskbar=no,resizable=yes,location=no,status=no,toolbar=no;');dialog.focus(); return false\">".$funder->first_child('fundername')->text."</a></dd>\n";
}
}
$html .= "</dl>\n";
}
$copyright =~ s/</</g if $copyright;
$copyright =~ s/>/>/g if $copyright;
# $html .= "<p>The publisher has given $copyright for their copyright references.</p>\n";
$html .= "</div>\n</td></tr></table>\n</div></div>";
# save the html
$publisher_data->{$name} = $html;
1;
} ## end process_journal
# get a list of journals that match the query
sub get_publisher {
my $pubid = shift;
my @html = ();
if ($pubid) {
my $query = "http://www.sherpa.ac.uk/romeo/api29.php?id=$pubid&ak=hC0DitNXMJA";
my $request = HTTP::Request->new( GET => "$query" );
my $ua = LWP::UserAgent->new();
my $response = $ua->request($request);
my $content = $response->content();
$publisher_data = {};
my $twig = XML::Twig->new(
'keep_encoding' => 1,
'TwigRoots' => { 'publishers' => 1 },
'TwigHandlers' => { 'publisher' => \&process_publisher, }
);
$twig->parse($content);
if (scalar keys %{$publisher_data}) {
foreach my $name (sort keys %{$publisher_data}) {
push @html, "$publisher_data->{$name}\n";
} ## end of foreach my $name (sort keys %{$publisher_data})
} ## end of if (scalar keys %{$publisher_data}) ...
} else {
push @html, "<!-- No pubid name supplied -->\n";
}
return (join "\n", @html)
} ## end get_publisher
my $path = $EPrints::SystemSettings::conf->{base_path}.'/var/romeopub';
foreach my $pub_id ( get_romeo_pub_ids() )
{
open( PUBINFO, ">$path/$pub_id.xhtml" ) || die "failed to write";
print PUBINFO get_publisher($pub_id);
close PUBINFO;
}
use Data::Dumper;
open( PUBMAP, ">$path/map.txt" ) || die "failed to write map.pl";
foreach( keys %$pub_map )
{
my $key = lc $pub_map->{$_};
print PUBMAP "$key $_\n";
}
close PUBMAP;
More information about the Eprints-tech
mailing list