Today I wanted an HTML page of links with all the articles I had collected for my project this weekend. All of the resources are stored in a BibTeX database, so each has a URL and title field. Unfortunately, I couldn't get Text::BibTeX to install so I decided to write a parser using Parse::RecDescent as a fun exercise.
The resulting tool, bibtex2html, can't handle comments, plus I didn't even look at the btparse source code for what the real grammar is. Maybe I can do this over the correct way and create Text::BibTeX::PurePerl.
#!/usr/bin/perl
use strict; use warnings;
use Parse::RecDescent; use Template;
my $grammar = <<'ENDGRAMMAR';
entries : Entry(s) /\Z/ { $item[1] }
Entry : '@' Key '{' Key ',' Tuple(s) '}' { { category => lc $item[2], key => lc $item[4], map { @$_ } @{ $item[6] } } }
Tuple : Key '=' Value /,?/ { for ( $item[3] ) { s/\n/ /gs; s/\s{3,}/ /gs; } [ lc $item[1], $item[3] ]; }
Value : Key | String
Key : /[\w-]+/
String : /".+?(?
ENDGRAMMAR
my $entries = Parse::RecDescent->new($grammar)->entries( do { local $/; <> } ) or die "Couldn't parse input\n";
my $tt = Template->new; $tt->process( \*DATA, { entries => $entries } ) or die "Couldn't process template: " . $tt->error;
__DATA__BibTeX Entries