...

#!/usr/bin/perl -w

#
#-------------------------------------------------------------------------------
#
# nav.pl
#
#-------------------------------------------------------------------------------
#
# A CGI progam which will generate a navigation bar with links from the
# root of a web server to the current page.  Each page leading to the
# current page is linked, but the current page is not linked.  The nav
# bar looks like this:
#
#    <div class="navbar">
#        <a href="/">kirriwa.net</a>
#        &rArr; <a href="/john/">John Clarke</a>
#        &rArr; <a href="/john/photos/">Photos</a>
#        &rArr; 2003
#    </div>
#
# The sepator (" &rArr; ") , opening ("<div class=\"navbar\">") and closing
# ("</div>") tags and header ("Content-type: text/html; charset=iso-8859-1") are
# all configurable.
#
#-------------------------------------------------------------------------------
#
# How it works:
#
# It starts with the current page, reading the title of the page, then if 
# the current page is not called index.html, it extracts the title of 
# index.html in the current directory as the next higher link.  This is
# repeated until the top level index.html is read.
#
# I've attempted to handle cases where index.html doesn't exist (the
# directory name is used instead of the title), but since I have an
# index.html in every directory, I've not tested it.
#
#-------------------------------------------------------------------------------
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#-------------------------------------------------------------------------------
#

use strict;
use File::Basename;
use HTML::TreeBuilder;

#-------------------------------------------------------------------------------
#
# Configurable options
#

my $separator = "\n    \&#187; ";
my $opening_tag = "<div class=\"navbar\">";
my $closing_tag = "\n</div>";
my $header = "Content-type: text/html; charset=iso-8859-1\n\n";
#
#-------------------------------------------------------------------------------


my ($uri, $links, $file, $dir, $title);

#
# GetTitle: opens the file passed as $_[0] and attempts to extract the
#           title from the <title>...</title> tag.  Multiple spaces are
#           replaced to a single space.  If the title cannot be found,
#           an empty string is returned.
#
sub GetTitle
{
    my ($file) = @_;
    my ($title, $tree, $t);

    # if the file doesn't exist, there's no title available
    return "" if ! -f $file;
    
    # parse the file
    $tree = HTML::TreeBuilder->new;
    $tree->parse_file($file);
    # get the title, if available
    $t = $tree->look_down('_tag', 'title');

    if ($t)
    {
        # get the title text
        $title = $t->as_text;
        # and replace multiple spaces with a single space
        $title =~ s/\s+/ /g;
    }
    else
    {
        # no title found
        $title = "";
    }
    $tree->delete;
    
    return $title;
}

# get the current page
$uri = $ENV{"DOCUMENT_URI"};
# do nothing if it's not available
exit if !defined $uri;

# the URI will be of this form:
#   /john/photos/index.html

# this script is in the /cgi-bin directory, so the root directory is "../"
# change to the directory containing the page this script is called by
$dir = "../html/" . dirname $uri;
chdir($dir);

# start with the calling page
$file = basename $uri;
$uri = dirname($uri) . "/";
$uri = "/" if $uri eq "//";

# get its title
$title = GetTitle($file);
$title = "$file" if !$title;
# the title of the calling page is the end of the nav bar
$links = "$title";

# if the calling file is not index.html and index.html exisits in
# the current directory, use index.html as the next higher link, otherwise
# move up on directory
if ($file eq "index.html" or ! -f "index.html")
{
    chdir ".." ;
}
else
{
    $uri .= "index.html";
}

# read the title from every index.html leading to the current directory
# stop when the root directory is reached
while ($uri ne "/")
{
    $dir = basename $uri;
    $uri = dirname($uri) . "/";
    $uri = "/" if $uri eq "//";

    # read the title of index.html in the current directory, if it exists
    $title = GetTitle("index.html");
    # use the directory name if no title found (this includes the case
    # of there being no index.html in this directory)
    $title = "$dir" if !$title;
    
    # and add the link
    $links = "<a href=\"${uri}\">${title}</a>${separator}${links}";
    
    # move up one level
    chdir "..";
}

# Finally, output the headers and nav bar
print "${header}${opening_tag}${links}${closing_tag}\n";