1 #!/usr/bin/perl
2
3 # Blosxom # [1] [2] [3]
4 # Author: Rael Dornfest <rael@oreilly.com>
5 # Version: 2.0
6 # Home/Docs/Licensing: http://www.raelity.org/apps/blosxom/
7
8 package blosxom; # [4]
9
10 # --- Configurable variables ----- # [5]
11
12 # What's this blog's title?
13 $blog_title = "My Weblog"; # [6]
14
15 # What's this blog's description (for outgoing RSS feed)?
16 $blog_description = "Yet another Blosxom weblog.";
17
18 # What's this blog's primary language (for outgoing RSS feed)?
19 $blog_language = "en";
20
21 # Where are this blog's entries kept?
22 $datadir = "/Library/WebServer/Documents/blosxom";
23
24 # What's my preferred base URL for this blog (leave blank for automatic)?
25 $url = "";
26
27 # Should I stick only to the datadir for items or travel down the
28 # directory hierarchy looking for items? If so, to what depth?
29 # 0 = infinite depth (aka grab everything), 1 = datadir only, n = n levels down
30 $depth = 0;
31
32 # How many entries should I show on the home page?
33 $num_entries = 40;
34
35 # What file extension signifies a blosxom entry?
36 $file_extension = "txt";
37
38 # What is the default flavour?
39 $default_flavour = "html";
40
41 # Should I show entries from the future (i.e. dated after now)?
42 $show_future_entries = 0;
43
44 # --- Plugins (Optional) -----
45
46 # Where are my plugins kept?
47 $plugin_dir = "";
48
49 # Where should my modules keep their state information?
50 $plugin_state_dir = "$plugin_dir/state";
51
52 # --- Static Rendering -----
53
54 # Where are this blog's static files to be created?
55 $static_dir = "/Library/WebServer/Documents/blog";
56
57 # What's my administrative password (you must set this for static rendering)?
58 $static_password = "";
59
60 # What flavours should I generate statically?
61 @static_flavours = qw/html rss/; # [7]
62
63 # Should I statically generate individual entries?
64 # 0 = no, 1 = yes
65 $static_entries = 0;
66
67 # --------------------------------
68
69 use vars qw! $version $blog_title $blog_description $blog_language $datadir $url %template $template $depth $num_entries $file_extension $default_flavour $static_or_dynamic $plugin_dir $plugin_state_dir @plugins %plugins $static_dir $static_password @static_flavours $static_entries $path_info $path_info_yr $path_info_mo $path_info_da $path_info_mo_num $flavour $static_or_dynamic %month2num @num2month $interpolate $entries $output $header $show_future_entries %files %indexes %others !; # [8]
70
71 use strict; # [9]
72 use FileHandle; # [10] [11]
73 use File::Find; # [12]
74 use File::stat; # [13]
75 use Time::localtime; # [14]
76 use CGI qw/:standard :netscape/; # [15]
77
78 $version = "2.0"; # [16]
79
80 my $fh = new FileHandle; # [17] [18]
81
82 %month2num = (nil=>'00', Jan=>'01', Feb=>'02', Mar=>'03', Apr=>'04', May=>'05', Jun=>'06', Jul=>'07', Aug=>'08', Sep=>'09', Oct=>'10', Nov=>'11', Dec=>'12'); # [19]
83 @num2month = sort { $month2num{$a} <=> $month2num{$b} } keys %month2num; # [20]
84
85 # Use the stated preferred URL or figure it out automatically
86 $url ||= url(); # [21] [22]
87 $url =~ s/^included:/http:/; # Fix for Server Side Includes (SSI) # [23] [24]
88 $url =~ s!/$!!; # [25]
89
90 # Drop ending any / from dir settings
91 $datadir =~ s!/$!!; $plugin_dir =~ s!/$!!; $static_dir =~ s!/$!!;
92
93 # Fix depth to take into account datadir's path
94 $depth and $depth += ($datadir =~ tr[/][]) - 1; # [26] [27]
95
96 # Global variable to be used in head/foot.{flavour} templates
97 $path_info = '';
98
99 $static_or_dynamic = (!$ENV{GATEWAY_INTERFACE} and param('-password') and $static_password and param('-password') eq $static_password) ? 'static' : 'dynamic'; # [28] [29]
100 $static_or_dynamic eq 'dynamic' and param(-name=>'-quiet', -value=>1); # [30]
101
102 # Path Info Magic
103 # Take a gander at HTTP's PATH_INFO for optional blog name, archive yr/mo/day
104 my @path_info = split m{/}, path_info() || param('path'); # [31] [32]
105 shift @path_info; # [33]
106
107 while ($path_info[0] and $path_info[0] =~ /^[a-zA-Z].*$/ and $path_info[0] !~ /(.*)\.(.*)/) { $path_info .= '/' . shift @path_info; } # [34] [35]
108
109 # Flavour specified by ?flav={flav} or index.{flav}
110 $flavour = '';
111
112 if ( $path_info[$#path_info] =~ /(.+)\.(.+)$/ ) { # [36] [37]
113 $flavour = $2; # [38]
114 $1 ne 'index' and $path_info .= "/$1.$2"; # [39]
115 pop @path_info; # [40]
116 } else {
117 $flavour = param('flav') || $default_flavour; # [41]
118 }
119
120 # Strip spurious slashes
121 $path_info =~ s!(^/*)|(/*$)!!g; # [42]
122
123 # Date fiddling
124 ($path_info_yr,$path_info_mo,$path_info_da) = @path_info; # [43] [44]
125 $path_info_mo_num = $path_info_mo ? ( $path_info_mo =~ /\d{2}/ ? $path_info_mo : ($month2num{ucfirst(lc $path_info_mo)} || undef) ) : undef; # [45] [46]
126
127 # Define standard template subroutine, plugin-overridable at Plugins: Template
128 $template = # [47] [48]
129 sub {
130 my ($path, $chunk, $flavour) = @_; # [49] [50]
131
132 do { # [51]
133 return join '', <$fh> if $fh->open("< $datadir/$path/$chunk.$flavour"); # [52]
134 } while ($path =~ s/(\/*[^\/]*)$// and $1); # [53]
135
136 return join '', ($template{$flavour}{$chunk} || $template{error}{$chunk} || ''); # [54]
137 };
138 # Bring in the templates
139 %template = (); # [55]
140 while (<DATA>) { # [56] [57]
141 last if /^(__END__)?$/; # [58]
142 my($ct, $comp, $txt) = /^(\S+)\s(\S+)\s(.*)$/; # [59]
143 $txt =~ s/\\n/\n/mg; # [60]
144 $template{$ct}{$comp} = $txt; # [61]
145 }
146
147 # Plugins: Start
148 if ( $plugin_dir and opendir PLUGINS, $plugin_dir ) { # [62]
149 foreach my $plugin ( grep { /^\w+$/ && -f "$plugin_dir/$_" } sort readdir(PLUGINS) ) { # [63]
150 my($plugin_name, $off) = $plugin =~ /^\d*(\w+?)(_?)$/; # [64]
151 my $on_off = $off eq '_' ? -1 : 1; # [65]
152 require "$plugin_dir/$plugin"; # [66]
153 $plugin_name->start() and ( $plugins{$plugin_name} = $on_off ) and push @plugins, $plugin_name; # [67]
154 }
155 closedir PLUGINS; # [68]
156 }
157
158 # Plugins: Template
159 # Allow for the first encountered plugin::template subroutine to override the
160 # default built-in template subroutine
161 my $tmp; foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('template') and defined($tmp = $plugin->template()) and $template = $tmp and last; } # [69] [70] [71]
162
163 # Provide backward compatibility for Blosxom < 2.0rc1 plug-ins
164 sub load_template { # [72]
165 return &$template(@_);
166 }
167
168 # Define default find subroutine
169 $entries = # [73]
170 sub {
171 my(%files, %indexes, %others); # [74]
172 find( # [75]
173 sub {
174 my $d;
175 my $curr_depth = $File::Find::dir =~ tr[/][]; # [76]
176 return if $depth and $curr_depth > $depth; # [77]
177
178 if ( # [78]
179 # a match
180 $File::Find::name =~ m!^$datadir/(?:(.*)/)?(.+)\.$file_extension$! # [79]
181 # not an index, .file, and is readable
182 and $2 ne 'index' and $2 !~ /^\./ and (-r $File::Find::name) # [80]
183 ) {
184
185 # to show or not to show future entries # [81]
186 ( # [82]
187 $show_future_entries
188 or stat($File::Find::name)->mtime < time
189 )
190
191 # add the file and its associated mtime to the list of files
192 and $files{$File::Find::name} = stat($File::Find::name)->mtime # [83]
193
194 # static rendering bits
195 and ( # [84]
196 param('-all') # [85]
197 or !-f "$static_dir/$1/index." . $static_flavours[0] # [86]
198 or stat("$static_dir/$1/index." . $static_flavours[0])->mtime < stat($File::Find::name)->mtime # [87]
199 )
200 and $indexes{$1} = 1 # [88]
201 and $d = join('/', (nice_date($files{$File::Find::name}))[5,2,3]) # [89]
202
203 and $indexes{$d} = $d # [90]
204 and $static_entries and $indexes{ ($1 ? "$1/" : '') . "$2.$file_extension" } = 1 # [91]
205
206 }
207 else {
208 !-d $File::Find::name and -r $File::Find::name and $others{$File::Find::name} = stat($File::Find::name)->mtime # [92]
209 }
210 }, $datadir # [93]
211 );
212
213 return (\%files, \%indexes, \%others); # [94]
214 };
215
216 # Plugins: Entries
217 # Allow for the first encountered plugin::entries subroutine to override the
218 # default built-in entries subroutine
219 my $tmp; foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('entries') and defined($tmp = $plugin->entries()) and $entries = $tmp and last; } # [95]
220
221 my ($files, $indexes, $others) = &$entries(); # [96]
222 %files = %$files; %indexes = %$indexes; %others = ref $others ? %$others : (); # [97]
223
224 # Plugins: Filter
225 foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('filter') and $entries = $plugin->filter(\%files, \%others) } # [98]
226
227 # Static
228 if (!$ENV{GATEWAY_INTERFACE} and param('-password') and $static_password and param('-password') eq $static_password) { # [99]
229
230 param('-quiet') or print "Blosxom is generating static index pages...\n"; # [100]
231
232 # Home Page and Directory Indexes
233 my %done; # [101]
234 foreach my $path ( sort keys %indexes) { # [102]
235 my $p = ''; # [103]
236 foreach ( ('', split /\//, $path) ) { # [104]
237 $p .= "/$_"; # [105]
238 $p =~ s!^/!!;
239 $path_info = $p; # [106]
240 $done{$p}++ and next; # [107] [108]
241 (-d "$static_dir/$p" or $p =~ /\.$file_extension$/) or mkdir "$static_dir/$p", 0755; # [109]
242 foreach $flavour ( @static_flavours ) { # [110]
243 my $content_type = (&$template($p,'content_type',$flavour)); # [111]
244 $content_type =~ s!\n.*!!s;
245 my $fn = $p =~ m!^(.+)\.$file_extension$! ? $1 : "$p/index"; # [112]
246 param('-quiet') or print "$fn.$flavour\n";
247 my $fh_w = new FileHandle "> $static_dir/$fn.$flavour" or die "Couldn't open $static_dir/$p for writing: $!"; # [113]
248 $output = ''; # [114]
249 print $fh_w # [115]
250 $indexes{$path} == 1
251 ? &generate('static', $p, '', $flavour, $content_type) # [116]
252 : &generate('static', '', $p, $flavour, $content_type);
253 $fh_w->close; # [117]
254 }
255 }
256 }
257 }
258
259 # Dynamic
260 else { # [118]
261 my $content_type = (&$template($path_info,'content_type',$flavour)); # [119]
262 $content_type =~ s!\n.*!!s;
263
264 $header = {-type=>$content_type}; # [120] [121]
265
266 print generate('dynamic', $path_info, "$path_info_yr/$path_info_mo_num/$path_info_da", $flavour, $content_type); # [122]
267 }
268
269 # Plugins: End
270 foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('end') and $entries = $plugin->end() } # [123]
271
272 # Generate # [124]
273 sub generate { # [125]
274 my($static_or_dynamic, $currentdir, $date, $flavour, $content_type) = @_; # [126]
275
276 my %f = %files; # [127]
277
278 # Plugins: Skip
279 # Allow plugins to decide if we can cut short story generation
280 my $skip; foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('skip') and defined($tmp = $plugin->skip()) and $skip = $tmp and last; } # [128] [129]
281
282 # Define default interpolation subroutine
283 $interpolate = # [130]
284 sub {
285 package blosxom; # [131]
286 my $template = shift;
287 $template =~ # [132] [133]
288 s/(\$\w+(?:::)?\w*)/"defined $1 ? $1 : ''"/gee;
289 return $template;
290 };
291
292 unless (defined($skip) and $skip) { # [134]
293
294 # Plugins: Interpolate
295 # Allow for the first encountered plugin::interpolate subroutine to
296 # override the default built-in interpolate subroutine
297 my $tmp; foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('interpolate') and defined($tmp = $plugin->interpolate()) and $interpolate = $tmp and last; } # [135]
298
299 # Head
300 my $head = (&$template($currentdir,'head',$flavour)); # [136]
301
302 # Plugins: Head
303 foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('head') and $entries = $plugin->head($currentdir, \$head) } # [137] [138]
304
305 $head = &$interpolate($head); # [139]
306
307 $output .= $head;
308
309 # Stories
310 my $curdate = '';
311 my $ne = $num_entries; # [140]
312
313 if ( $currentdir =~ /(.*?)([^\/]+)\.(.+)$/ and $2 ne 'index' ) { # [141] [142]
314 $currentdir = "$1$2.$file_extension"; # [143]
315 $files{"$datadir/$1$2.$file_extension"} and %f = ( "$datadir/$1$2.$file_extension" => $files{"$datadir/$1$2.$file_extension"} ); # [144]
316 }
317 else {
318 $currentdir =~ s!/index\..+$!!; # [145]
319 }
320
321 # Define a default sort subroutine
322 my $sort = sub { # [146]
323 my($files_ref) = @_;
324 return sort { $files_ref->{$b} <=> $files_ref->{$a} } keys %$files_ref;
325 };
326
327 # Plugins: Sort
328 # Allow for the first encountered plugin::sort subroutine to override the
329 # default built-in sort subroutine
330 my $tmp; foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('sort') and defined($tmp = $plugin->sort()) and $sort = $tmp and last; } # [147]
331
332 foreach my $path_file ( &$sort(\%f, \%others) ) { # [148]
333 last if $ne <= 0 && $date !~ /\d/; # [149] [150]
334 use vars qw/ $path $fn /; # [151]
335 ($path,$fn) = $path_file =~ m!^$datadir/(?:(.*)/)?(.*)\.$file_extension!; # [152]
336
337 # Only stories in the right hierarchy
338 $path =~ /^$currentdir/ or $path_file eq "$datadir/$currentdir" or next; # [153]
339
340 # Prepend a slash for use in templates only if a path exists
341 $path &&= "/$path"; # [154]
342
343 # Date fiddling for by-{year,month,day} archive views
344 use vars qw/ $dw $mo $mo_num $da $ti $yr $hr $min $hr12 $ampm /; # [155]
345 ($dw,$mo,$mo_num,$da,$ti,$yr) = nice_date($files{"$path_file"});
346 ($hr,$min) = split /:/, $ti;
347 ($hr12, $ampm) = $hr >= 12 ? ($hr - 12,'pm') : ($hr, 'am');
348 $hr12 =~ s/^0//; $hr12 == 0 and $hr12 = 12;
349
350 # Only stories from the right date
351 my($path_info_yr,$path_info_mo_num, $path_info_da) = split /\//, $date; # [156]
352 next if $path_info_yr && $yr != $path_info_yr; last if $path_info_yr && $yr < $path_info_yr; # [157]
353 next if $path_info_mo_num && $mo ne $num2month[$path_info_mo_num]; # [158]
354 next if $path_info_da && $da != $path_info_da; last if $path_info_da && $da < $path_info_da; # [159]
355
356 # Date
357 my $date = (&$template($path,'date',$flavour)); # [160] [161]
358
359 # Plugins: Date
360 foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('date') and $entries = $plugin->date($currentdir, \$date, $files{$path_file}, $dw,$mo,$mo_num,$da,$ti,$yr) } # [162]
361
362 $date = &$interpolate($date); # [163]
363
364 $curdate ne $date and $curdate = $date and $output .= $date; # [164]
365
366 use vars qw/ $title $body $raw /; # [165]
367 if (-f "$path_file" && $fh->open("< $path_file")) { # [166]
368 chomp($title = <$fh>);
369 chomp($body = join '', <$fh>);
370 $fh->close;
371 $raw = "$title\n$body"; # [167]
372 }
373 my $story = (&$template($path,'story',$flavour)); # [168]
374
375 # Plugins: Story
376 foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('story') and $entries = $plugin->story($path, $fn, \$story, \$title, \$body) } # [169]
377
378 if ($content_type =~ m{\Wxml$}) { # [170]
379 # Escape <, >, and &, and to produce valid RSS
380 my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"'); # [171]
381 my $escape_re = join '|' => keys %escape; # [172]
382 $title =~ s/($escape_re)/$escape{$1}/g; # [173]
383 $body =~ s/($escape_re)/$escape{$1}/g;
384 }
385
386 $story = &$interpolate($story); # [174]
387
388 $output .= $story;
389 $fh->close; # [175]
390
391 $ne--;
392 }
393
394 # Foot
395 my $foot = (&$template($currentdir,'foot',$flavour)); # [176]
396
397 # Plugins: Foot
398 foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('foot') and $entries = $plugin->foot($currentdir, \$foot) } # [177]
399
400 $foot = &$interpolate($foot); # [178]
401 $output .= $foot;
402
403 # Plugins: Last
404 foreach my $plugin ( @plugins ) { $plugins{$plugin} > 0 and $plugin->can('last') and $entries = $plugin->last() } # [179]
405
406 } # End skip
407
408 # Finally, add the header, if any and running dynamically
409 $static_or_dynamic eq 'dynamic' and $header and $output = header($header) . $output; # [180]
410
411 $output; # [181]
412 }
413
414
415 sub nice_date { # [182]
416 my($unixtime) = @_; # [183]
417
418 my $c_time = ctime($unixtime); # [184]
419 my($dw,$mo,$da,$ti,$yr) = ( $c_time =~ /(\w{3}) +(\w{3}) +(\d{1,2}) +(\d{2}:\d{2}):\d{2} +(\d{4})$/ ); # [185]
420 $da = sprintf("%02d", $da); # [186]
421 my $mo_num = $month2num{$mo}; # [187]
422
423 return ($dw,$mo,$mo_num,$da,$ti,$yr);
424 }
425
426
427 # Default HTML and RSS template bits # [188]
428 __DATA__
429 html content_type text/html
430 html head <html><head><link rel="alternate" type="type="application/rss+xml" title="RSS" href="$url/index.rss" /><title>$blog_title $path_info_da $path_info_mo $path_info_yr</title></head><body><center><font size="+3">$blog_title</font><br />$path_info_da $path_info_mo $path_info_yr</center><p />
431 html story <p><a name="$fn"><b>$title</b></a><br />$body<br /><br />posted at: $ti | path: <a href="$url$path">$path</a> | <a href="$url/$yr/$mo_num/$da#$fn">permanent link to this entry</a></p>\n
432 html date <h3>$dw, $da $mo $yr</h3>\n
433 html foot <p /><center><a href="http://www.blosxom.com/"><img src="http://www.blosxom.com/images/pb_blosxom.gif" border="0" /></a></body></html>
434 rss content_type text/xml
435 rss head <?xml version="1.0"?>\n<!-- name="generator" content="blosxom/$version" -->\n<!DOCTYPE rss PUBLIC "-//Netscape Communications//DTD RSS 0.91//EN" "http://my.netscape.com/publish/formats/rss-0.91.dtd">\n\n<rss version="0.91">\n <channel>\n <title>$blog_title $path_info_da $path_info_mo $path_info_yr</title>\n <link>$url</link>\n <description>$blog_description</description>\n <language>$blog_language</language>\n
436 rss story <item>\n <title>$title</title>\n <link>$url/$yr/$mo_num/$da#$fn</link>\n <description>$body</description>\n </item>\n
437 rss date \n
438 rss foot </channel>\n</rss>
439 error content_type text/html
440 error head <html><body><p><font color="red">Error: I'm afraid this is the first I've heard of a "$flavour" flavoured Blosxom. Try dropping the "/+$flavour" bit from the end of the URL.</font>\n\n
441 error story <p><b>$title</b><br />$body <a href="$url/$yr/$mo_num/$da#fn.$default_flavour">#</a></p>\n
442 error date <h3>$dw, $da $mo $yr</h3>\n
443 error foot </body></html>
444 __END__
This is version 0.9 of the Blosxom 2 annotations, by Frank Hecker <hecker@hecker.org>. These annotations to the blosxom.cgi source code are made available under the same license terms as Blosxom itself. Comments and suggestions for changes are welcome.
The online Perl documentation was indispensable in creating these notes. The notes2html script was used to create HTML documents from the inline notes.
For more information see the following URLs:
http://www.blosxom.com/downloads/blosxom.zip
http://www.blosxom.com/license.html
http://www.hecker.org/blosxom/
All lines starting with '#' are comments, not part of the code itself. The first line uses the standard Unix #! convention to identify the location of the Perl interpreter. You would need to change this line if for some reason the Perl interpreter were in a different directory or had a different name.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlrun.html
The Blosxom source file is divided into three major sections:
The main Blosxom code itself is further divided into various sections as discussed below.
In this section the following tasks are carried out:
$fh: a FileHandle for reading files (line 80)%month2num: hash to convert a month abbreviation to a month number (line 82)@num2month: array to convert a month number to a month abbreviation (line 83)$url: if a value for the base URL wasn't defined in the configurable variables section, set a new value as described below (lines 86-88)$datadir: strip any trailing slash if present (line 91)$depth: adjust to account for the number of path components in $datadir (line 94)$url: the part of the requested URL corresponding to the Blosxom script itself (e.g., http://www.example.com/cgi-bin/blosxom.cgi) (lines 86-88)$static_or_dynamic: set to 'static' if Blosxom is running in static mode, 'dynamic' if Blosxom isrunning in dynamic mode, i.e., through CGI (line 99)$path_info: the part of the requested URL identifying a particular category or individual entry to be displayed, e.g., /society/literature or /cooking/italian/bruschetta.html (lines 97, 104-107, 121)$flavour: the particular flavour of data being requested, e.g., 'html' or 'rss' (lines 110-118)$path_info_yr, $path_info_mo, and $path_info_da: the dates for which we are requesting that entries be displayed (lines 124-125)$template (lines 128-137)%template hash keyed by the content type (e.g., 'html' or 'rss') and template component (e.g., 'head' or 'foot') (lines 139-145)@plugins array (a list of plugin names, minus prefixes) and the %plugins hash (which stores the enabled/disabled status for each plugin, keyed by the plugin name) (line 153)$entries) (lines 169-214) and then allow overriding it by the first plugin that defines an alternate entries subroutine (line 219)This section of the code looks for Blosxom entries and related items of interest, performing the following tasks:
$entries, and build up three hashes (lines 221-222):
%files: files representing individual Blosxom entries (e.g., foo.txt if '.txt' is the standard Blosxom file extension)%indexes: directories for which index files might need to be created or updated as part of static page generation, as well as individual entry files for which static pages might need to be generated%others: all other files not falling into the above two categories%files and %others by invoking the filter subroutine for each and every plugin that defines one (line 225)The hashes %files and %others are keyed by the name of the entry file (for %files) or other item (for %others), in the form of an absolute pathname; the value for each element in %files or %others is the date/time last modified for the corresponding entry file or other item.
The hash %indexes is keyed by the name of the directory or entry file for which static page generation should be done, expressed as a relative pathname relative to the Blosxom data directory (e.g., 'a/b' or '2004/05/22'); the value for elements in %indexes is 1 for elements corresponding to category directories or individual entries, and for elements corresponding to date directories is the same as the key (e.g., '2004/05/22').
The next section of the Blosxom code generates HTML or other output. For dynamic invocation of Blosxom this is relatively simple, since we need to generate only one page in response to the requested URL (lines 260-267):
generate subroutine to create the page output, based on the category, date, entry, and flavour information from the requested URL (line 266)generate (which includes the HTTP header for the appropriate content type) (line 266)For static invocation of Blosxom page generation is more complex, since we may need to generate several pages (lines 230-257):
%indexes (lines 234-256) and then for each element loop through each directory component of the item (directory or entry file) corresponding to the element (lines 236-255)
@static_flavours (lines 242-254)
generate subroutine to create the output for the page (lines 250-252)Finally, we loop through the plugins and call each plugin's end subroutine in order to do any final processing (line 270).
generate subroutine (lines 273-412)The generate subroutine creates the actual output for a page of the desired flavour, taking as input the path information for the category, entry file, and/or date, along with the flavour and content type, and an indication of whether static or dynamic page generation is desired. The generate subroutine also uses the hashes %files, %indexes, and %others previously populated.
The specific tasks performed by the generate subroutine are as follows:
interpolate subroutine for variable interpolation in templates (lines 283-290)interpolate subroutine (line 297)$blog_title) in the 'head' template and add the result to the output (lines 305-307)$currentdir argument, which holds information on the category and/or individual entry for which a page needs to be generated (lines 313-319)%f hash (a copy of %files) so that it contains information for just that entry (line 315)%f and %others, each representing an entry to be added to the generated page (lines 332-392)
$num_entries (line 333)nice_date subroutine (lines 415-424)The nice_date subroutine converts OS-provided time values (expressed as the number of seconds since some fixed date) into year, month, day, etc., values that we can use for printing date/times and creating date-based URLs. For more information see the notes for lines 415-424.
package defines a namespace for variables, subroutines, etc., so that their names won't conflict with names defined in other Perl code used by Blosxom and pulled in from other places.
See the following URL for more information:
http://www.perldoc.com/perl5.8.4/pod/perlmod.html#Packages
The scope of the configurable variables is within the blosxom package. We put "global" in quotes because, as the Perl documentation notes, "there's really no such thing as a global variable in Perl", in the sense of global variables as used in C and similar languages. However the configurable variables are like global variables in that their values are visible anywhere in the Blosxom code (unless "hidden" by other variable declarations as described in the notes to line 171). See also the notes to line 69.
The configurable variables can be referenced from Blosxom plugins as $blosxom::foo where $foo is a variable. Alternatively, a Blosxom plugin can include a package blosxom statement prior to a section of code to allow Blosxom configurable variables to be referenced within that code section without having to preface the variables' names with "blosxom::". (For example, a plugin would do this when defining its own version of the interpolate subroutine; see the notes to lines 283 and 285 for more information.)
See the following URL for more information on variable scope:
http://www.perldoc.com/perl5.8.4/pod/perlmod.html
Note when reading the documentation that the configurable variables are considered to be "dynamic" (as opposed to "lexical") variables.
In Perl a variable starting with '$' is a scalar (i.e., single-valued) variable. Note that unlike shell syntax the '$' is used when assigning to the variable as well as when using its value.
The $blog_title variable is used to hold a string. Like shell variables Perl scalar variables can have either string or numeric values. String values can be delimited by either single quotes or double quotes; like the Unix shell, if the string is within double quotes then it can include references to other Perl variables (e.g., "A Blog by $author") and the values of those variables will be interpolated into the string, replacing the variable references.
Because of this variable interpolation, if you want to use a '$' in your blog title or description then you need to either precede the '$' with a '\' ("My \$64,000 Blog") or use single quotes to delimit the string ('My $64,000 Blog'). (If you use single quotes for your string delimiter then you will also need to escape any single quote character in the string itself by preceding it with a '\', e.g., 'John\'s $64,000 Blog'; a similar rule holds when you want to include a double quote in a string delimited by double quotes.)
For more information on Perl scalar variables see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perldata.html#Scalar-values
qwIn Perl a variable starting with '@' is an array variable that holds an ordered list of values indexed by array position (starting from 0 as the first position).
Here we define a 2-element array with the string values 'html' and 'rss'. qw is a function that returns a list of words extracted out of a string enclosed within delimiters, e.g., qw/a b/ is the same as 'a', 'b'. (Alternately you could use qw(a b) or qw! a b ! or whatever.) This is a very common Perl idiom, as it eliminates the need to quote each and every word within the list.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perldata.html
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Regexp-Quote-Like-Operators
use varsHere we declare global variables used in this package (actually, within the file, but the file just contains a single package). Note that use vars was deemed obsolete as of Perl 5.6, being replaced by our, but as used here supports use of Blosxom with earlier Perl 5.x versions.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlmodlib.html#Pragmatic-Modules
http://search.cpan.org/~nwclark/perl-5.8.4/lib/vars.pm
use strictuse strict tells Perl to produce compiler warnings for all sorts of things, such as references to variables that were not previously defined or declared.
For more information see the following URL:
http://search.cpan.org/~nwclark/perl-5.8.4/lib/strict.pm
use functionThe next few lines import various Perl modules, making their functions and global variables available without needing to qualify the names with package names. (In other words, we can refer to bar() rather than foo::bar() where bar is a function in the package foo.)
On packages vs. modules: per the documentation, "A module is just a set of related functions in a library file, i.e., a Perl package with the same name as the file." Strictly speaking Blosxom 2.0 is a package but not a module; however Blosxom 3.0 will be a full-fledged module.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/use.html
The FileHandle module contains functions for basic file I/O operations: open, new, getc, gets, seek, close, etc.
For more information see the following URL:
http://search.cpan.org/~nwclark/perl-5.8.4/lib/FileHandle.pm
The File::Find module contains functions to traverse a directory tree in the file system, analogous to the Unix find command. Blosxom uses File::Find functions and variables in its own find subroutine below.
For more information see the following URL:
http://search.cpan.org/~nwclark/perl-5.8.4/lib/File/Find.pm
The File::stat module gets a file's attributes, like the Unix stat kernel routine. Blosxom uses File::stat functions and variables to get the date/time modified for entry files and related information.
For more information see the following URL:
http://search.cpan.org/~nwclark/perl-5.8.4/lib/File/stat.pm
The Time::localtime module gets the current date and time and performs other date/time-related operations, like the corresponding Unix functions. Blosxom uses Time::localtime functions in the subroutine nice_date and elsewhere.
For more information see the following URL:
http://search.cpan.org/~nwclark/perl-5.8.4/lib/Time/localtime.pm
The CGI module is used to parse incoming HTTP requests (e.g., to get the URL being requested) and to create HTTP headers and HTML pages sent in response (see the subroutine generate for an example).
Note that :standard imports a standard set of functions and :netscape imports optional functions for Netscape-specific HTML extensions.
For more information see the following URL:
http://search.cpan.org/~lds/CGI.pm-3.05/CGI.pm
Blosxom 2.0 is considered stable. Blosxom 3.0 is currently in development.
my variablesmy creates a private variable visible only within the lexical scope within which it is defined (e.g., within a given code block enclosed by curly braces), and not visible anywhere else (including subroutines called from a given code block). In this case the lexical scope is considered to be the entire blosxom package within the blosxom.cgi source file.
For more information see
http://www.perldoc.com/perl5.8.4/pod/perlintro.html#Variable-scoping
http://www.perldoc.com/perl5.8.4/pod/perlsub.html#Private-Variables-via-my()
newThe FileHandle module presents an object-oriented interface, so new in this context produces a new instance of the FileHandle class.
In object-oriented terms new is a "constructor", i.e., a so-called "class method" that creates and initializes new objects. Unlike object-oriented languages like C++, in Perl a constructor could be called something other than "new", but it's a common convention.
For more information see
http://search.cpan.org/~nwclark/perl-5.8.4/lib/FileHandle.pm
http://www.perldoc.com/perl5.8.4/pod/perlobj.html
We create a hash table (or plain "hash" in Perl jargon) with month names being the keys and month numbers (as strings) being the values. Hashes are initialized by providing a list in which the odd entries are the key values and the even entries are the corresponding values, e.g., ('key1', 'value1', 'key2', 'value2'). The syntax (a=>'b', c=>'d') is equivalent to ('a','b','c','d') and is intended to make hash initialization more understandable.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perldata.html
keys and sort functionsThis takes the list of keys in the previously-defined hash table, i.e., the list ('nil', 'Jan', 'Feb', ..., 'Dec'), sorts it using a comparison function that compares the corresponding values in the hash table for each key, i.e., the values '00', '01', etc., and then assigns the resulting sorted list of keys to an array indexed by month number.
This is equivalent to defining the array as follows:
@num2month = ('nil', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec');
(Note that the 'nil' value is included because Perl arrays are indexed from 0 but month numbers start at 1.)
keys is a function that takes as an argument a hash and returns either a list consisting of all the keys in the hash (if used in list context) or the number of keys in the hash (if used in scalar context). Here we're using keys in list context, because as noted below the sort function expects a list as an argument.
sort is a function that takes as arguments the list of items to be sorted and (as an optional first argument) a subroutine defining how sort comparisons are to be done; in this case that subroutine is an "anonymous" inline routine enclosed in curly braces. $a and $b are special global variables used to hold the values being sorted at each step of the sort algorithm; <=> is a comparison operator that returns -1, 0, or 1 depending on whether the first item is respectively less than, equal to, or greater than the second. (This is a numeric comparison.)
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/func/keys.html
http://www.perldoc.com/perl5.8.4/pod/func/sort.html
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Equality-Operators
||= operatorThis defines $url to be its existing value (if it has one) or (if it has no value) the value returned by the url function (part of the CGI module) as described in the next note. (Perl has different namespaces for variables and functions, which is why we can name the variable the same as the function.)
The || operator is a logical "or" operator similar (but not identical) to that used in shell or C programming; $url ||= url(); is equivalent to $url = $url || url(); where the original value of $url is considered false if it is undefined or its value is the empty string '', and true otherwise. So if $url already has a value then the second part of the conditional expression (after ||) is not executed, and that existing value is (re)assigned to $url; otherwise the second part is executed to obtain the returned value from url(), and that value is assigned to $url.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlop.html#C-style-Logical-Or
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Assignment-Operators
url()Note that url() returns only the URL of the Blosxom CGI script itself, not the full URL being requested. Thus (for example) if the HTTP request were for the URL
http://www.example.com/cgi-bin/blosxom.cgi/2004/05/22
then url() would return (and $url would be set to) the URL
http://www.example.com/cgi-bin/blosxom.cgi
If you have configured the web server to hide the blosxom.cgi part of the URL (as described in the FAQ referenced below) then the value of url() will be that part of the full URL which was translated into the script location. For example, if you configured Apache using the ScriptAlias directive as follows:
ScriptAlias /blog "/var/www/cgi-bin/blosxom.cgi"
then if the requested URL were
http://www.example.com/blog/2004/05/22
then url() would return (and $url would be set to) the URL
http://www.example.com/blog
For more information see the following URLs:
http://search.cpan.org/~lds/CGI.pm-3.05/CGI.pm#OBTAINING_THE_SCRIPT'S_URL
http://www.blosxom.com/faq/cgi/hide_cgi_bit.htm
We assign $url a new value consisting of its previous value with the initial string "included:" (if present) replaced with "http:". This is intended for the case when output from bloxsom.cgi is included in an HTML file by a Server Side Include directive like the following:
<!--#include virtual="/cgi-bin/blosxom.cgi/2004/05/19" -->
When invoked in this way the URL returned by url() above would be (for example)
included://www.example.com/cgi-bin/blosxom.cgi
instead of
http://www.example.com/cgi-bin/blosxom.cgi
For more information see the following URL:
http://httpd.apache.org/docs-2.0/howto/ssi.html
=~ operator and regular expression matching=~ is a special operator that takes the left side ($url) and applies to it a pattern match specified on the right side (s/^included:/http:/), in this case a pattern match that actually does substitution, using regular expressions modeled on those used in the Unix shell and utilities. (So, for example, in this case the '^' tells Perl to look for a match starting at the beginning of the string.) The result is that the value of $url is modified if the match succeeds.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Binding-Operators
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Regexp-Quote-Like-Operators
This statement strips off a trailing slash from the URL value if present; the '$' in the regular expression tells Perl to look for a match at the end of the string.
Note that the value returned by the url() function doesn't have a trailing slash, but the person configuring Blosxom may have included a trailing slash when specifying a non-default value for the $url variable.
$depthIf $depth is non-zero (i.e., limiting search to n directories deep) then we take the $datadir path, count the number of path components, subtract 1, and add that to $depth to get the new value. For example, if the value of $data_dir is /a/b/c then a $depth value of 2 would get changed to a value of 4.
The new value of $depth can be interpreted as follows: Search through a directory only if the number of components in its path is $depth or less. So in the previous example the directory /a/b/c/d would be searched but the directory /a/b/c/d/e would not.
tr and and operatorsCounting the number of path components is done using the tr operator, which is typically used to modify a string by transliterating one set of characters with another, e.g., $s =~ tr[a-z][A-Z] to change lowercase characters in $s to uppercase. However in this case the set of replacement characters is empty ([]) so no replacement is done; instead we simply use the standard return value from tr, namely the number of times the character(s) in the search list (i.e., the '/' character in this case) was found.
Since the value of $datadir is an absolute path (i.e., it starts with '/') and we trimmed any trailing '/' characters (see above) the number of '/' characters will be equal to the number of components in the path.
[Note: There is a minor potential bug here: If the value of $datadir were specified with multiple trailing slashes, e.g., /a/b/c//, then the code above would remove only a single trailing '/', leaving one extra '/' at the end, and the number of directory components would be miscounted as being higher than it actually is. The fix is simple: Replace the existing statement $datadir =~ s!/$!!; with the statement $datadir =~ s!/*$!!; to look for zero or more '/' characters at the end of the string and remove any found; even safer would be $datadir =~ s!/*\s*$!!; to remove trailing whitespace as well.]
The and operator here is used to conditionally change $depth only if it is non-zero; if $depth is zero then it is interpreted as false and the expression after the and is not executed. However any non-zero value will be interpreted as true and $depth modified as described above.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Quote-and-Quote-like-Operators
http://www.perldoc.com/perl5.8.4/pod/perlreref.html
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Logical-And
We set the variable $static_or_dynamic to 'static' or 'dynamic' to reflect the mode we're in. We're in static mode if all the following are true:
-password has a value$static_password is defined (see above)-password parameter is the same as the value of $static_passwordOtherwise we're in dynamic mode.
eq operator and CGI::param() functionWe saw the and operator above. The eq operator tests for string equality. The expression $a ? $b : $c is like that used in C: if $a is true then return $b, otherwise return $c.
param() is a CGI function, but it can also return values when the Perl script is invoked from the command line, e.g.
perl blosxom.cgi -password='secret'
would assign the string value 'secret' to the parameter -password.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Equality-Operators
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Conditional-Operator
http://search.cpan.org/~lds/CGI.pm-3.05/CGI.pm#DEBUGGING
http://www.blosxom.com/documentation/users/configure/static.html
CGI::param() functionIf we're in dynamic mode then we set the value of the -quiet parameter to be 1. When setting parameters the param() function takes an argument list similar in syntax to the way hashes are initialized, e.g., param(-name=>'a', -value=>'b') would set the parameter a to the value b.
For more information see the following URL:
http://search.cpan.org/~lds/CGI.pm-3.05/CGI.pm#SETTING_THE_VALUE(S)_OF_A_NAMED_PARAMETER:
PATH_INFOPATH_INFO (the CGI environment variable whose value is returned by the path_info function) contains any path information in the URL after the part of the URL that identifies the CGI script. For example, if the requested URL were http://www.example.com/cgi-bin/blosxom.cgi/2004/05 then the value returned by path_info() would be /2004/05.
For more information see the following URL:
http://search.cpan.org/~lds/CGI.pm-3.05/CGI.pm#FETCHING_ENVIRONMENT_VARIABLES
split functionWe use my to define a private array variable @path_info. To set this variable we first use the split function on the string returned by the path_info() function (if it's non-empty), splitting that value into different components separated by the '/' character. (m{/} is a regular expression that will match a single '/'.) The split function returns a list of strings, which is why we use an array to hold the result.
If for some reason path_info() returns an empty string then we split the value of the path CGI parameter instead. This would allow you to use a URL like
http://www.example.com/cgi-bin/blosxom.cgi?path=/2004/05/22
if you wished to do so. Note that the || operator has a higher precedence than the comma operator, so the decision whether to use the value of path_info() or param('path') is made before that value is passed to the split function.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/split.html
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Operator-Precedence-and-Associativity
shift functionThe shift function discards the first element of an array. We do this because path_info() returns a path with an initial '/', and the split function as used above on that path will produce an empty string as the first element of the returned array; for example, the expression split m{/}, "/a/b/c" will return the list ('', 'a', 'b', 'c'). We don't want the initial empty string so we use shift to get the list ('a', 'b', 'c') instead.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/shift.html
Recall that after the part of the URL that references the Blosxom script itself (stored in $url), a Blosxom URL can contain an additional path consisting of three possible parts: an optional set of categories, an optional set of year, month, and day values, and an optional reference to an individual entry. For example, the following are values that might be returned by the path_info() function as applied to Blosxom URLs:
/society/literature
/2004/05/19
/music/index.rss
/personal/resolutions/2003/07
/cooking/italian/bruschetta.html
From the path returned by path_info() we end up setting the following variables:
$path_info: either an individual entry path including categories, subcategories, and entry name (e.g., /cooking/italian/bruschetta.html) or a category/subcategory path for which we wish to see all entries (e.g., /society/literature or /music)$flavour: the desired flavour, whether explicitly specified in the URL (e.g., 'html' for /cooking/italian/bruschetta.html or 'rss' for /music/index.rss) or defaulted (e.g., as in /society/literature/)$path_info_yr, $path_info_mo_num, and $path_info_da: the year, month, and day if present in the URL (e.g., for /personal/resolutions/2003/07 the year and month would be '2003' and '07' respectively while the day would be undefined)Our first task is to extract the path information relating to categories; since we know that category names can't begin with a digit we can simply look for path components starting with alphabetic characters. However we have to stop before we get to any reference to an individual entry; we identify such entries by the presence of a '.' character in their names.
[Note: This implies two additional restrictions in Blosxom as currently designed: you can't have a category name containing a '.', and you can't reference individual entries using URLs that don't have a file extension at the end (as recommended by the W3C, among others.]
For more information see the following URLs:
http://www.blosxom.com/documentation/users/view.html
http://www.w3.org/Provider/Style/URI
while loopA while loop executes a block of code (in curly braces) as long as a given condition (in parentheses) is true. In this case before executing the code block we first check to see if the first element of @path_info is defined and non-empty; otherwise there are no more components and we're done. ($a[i] is the i'th element of the array @a; note that it's distinct from the scalar variable $a.)
If we have a further component, we then check to see if its value starts with an alphabetic character, by trying to match it against the regular expression character class [a-zA-Z] starting at the beginning of the string ('^'); otherwise the component represents a date and not a category, and we're done.
Finally we check to verify that the component's value does not have a literal period (\.) in it; otherwise the component represents an individual entry (e.g., "a.html") and we're done. (The operator !~ is the reverse of =~, returning a true value when the pattern match fails.)
See the notes for line 112 below for the meaning of the parentheses in the regular expression /(.*)\.(.*)/ used to check for a period in in the path component. For now we simply note that as used here the regular expression could have been replaced with the simpler regular expression /.*\..*/ without affecting things.
If the first element of @path_info looks like a category then we append it to the scalar variable $path_info, preceded by a '/', and remove the element from the @path_info array. ($path_info was defined above, with its initial value set to the empty string.) Note that shift @path_info both does the removal and returns the removed element as a result. The . operator concatenates two strings, in this case '/' and the removed first element. The .= assignment operator is like the ||= and += operators seen above, so that $a .= 'b' is the same as $a = $a . 'b', where the . operator concatenates two strings.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlsyn.html#Compound-Statements
http://www.perldoc.com/perl5.8.4/pod/perlreref.html#CHARACTER-CLASSES
http://www.perldoc.com/perl5.8.4/pod/perlop.html#Additive-Operators
If the flavour is specified by index.{flav}, as in
http://www.example.com/cgi-bin/blosxom.cgi/music/index.rss
then it must be parsed from the PATH_INFO value stored in @path_info. However if the flavour is specified by ?flav={flav}, as in
http://www.example.com/cgi-bin/blosxom.cgi/music?flav=rss
then its value must be obtained using param(), since anything in the URL after a '?' is considered a CGI parameter and not part of PATH_INFO.
$#path_info$#path_info returns the index of the last element of the array @path_info. We match the value of that last element against a regular expression consisting of one or more characters followed by a literal '.' character followed by one or more characters to the end of the string. This match will succeed when the last element looks like, e.g., 'a.b', where we'll interpret 'b' as the flavour.
(Note that this regular expression is slightly different from the one used in the while loop on line 107; the previous expression matched zero or more characters followed by a '.' followed by zero or more characters. In other words, the test at line 107 will match . by itself, .a, a., and so on, while the test here will not. In practice this doesn't matter: the first test was simply intended to reject path components that weren't categories, which can't contain '.'; the second test is intended to find flavour values, and for that purpose we need a component that actually has something after the '.', as well as before.)
The regular expression uses parentheses to save parts of the component that are matched, for later use. In particular, the regular expression /(.+)\.(.+)/ is used (instead of the simpler /.+\..+/) to save the flavour value (matched by the expression in the second set of parentheses) and the entry name (matched by the expression in the first set of parentheses). The saved values can then be referenced by the special variables $1 (first part matched, the entry name) and $2 (second part matched, the flavour).
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perldata.html#Variable-names
http://www.perldoc.com/perl5.8.4/pod/perlreref.html#SYNTAX
If the last value in @path_info does contain a '.' character then as noted above the value of the variable $2 will be the string to the right of the '.', and we save that value in $flavour.
Because of the way regular expression matching works, if the final component actually has two or more periods, e.g., "example.com-news.html", $2 will be set to the string after the final '.', not the string after the first one. This "greedy" matching (i.e., match as many characters as you can) is exactly what we want to happen.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlreref.html#QUANTIFIERS
If the first part (before the '.') of the last path component is not equal to 'index' then that component points to an individual entry, and we save both the entry name and flavour by appending them to the $path_info variable that stores the category components of the path.
On the other hand, if the first part is 'index' then the original URL was not a request for an individual entry but rather a request for all entries in a particular category or for a particular day, month, or year, displayed using a specified flavour. For such requests the path might be something like /a/b/index.rss or /2004/05/index.rss. In this case we don't need to save the value 'index.rss' (or whatever) as part of $path_info, since all we need is the flavour value.
pop functionNow that we've extracted the needed information from the last element of @path_info we use the pop function to remove it.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/pop.html
If the final component of the path does not contain a period then either the flavour was specified using the flav parameter, as in the URL
http://www.example.com/cgi-bin/blosxom.cgi/a/b?flav=rss
or the flavour was omitted entirely. In the latter case we set $flavour to the default flavor defined in the configurable variables section.
Using | in a regular expression lets you search for (and in this case replace) two or more alternative patterns, in this case zero or more '/' characters at the beginning of $path_info and zero or more at the end. The 'g' option replaces all patterns found, so we replace both '/' characters found at the beginning and any found at the end.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlretut.html#Matching-this-or-that
At this point we've extracted from @path_info any category names (at the beginning of the path) and any final path component associated with either an individual entry or an index.{flav} reference. So the only components left in @path_info should be date references (if any) from URLs containing sequences like /2004/05/19, /2004/05, or /2004.
This statement assigns $path_info[0] (i.e., the first element in the array @path_info) to $path_info_yr, $path_info[1] to $path_info_mo, and $path_info[2] to $path_info_da. If @path_info doesn't have three elements then some or all of the three variables may end up undefined (starting with $path_info_da).
In general you can assign a list of scalar values into a list of scalar variables:
($a, $b, $c) = (1, 2, 3);
($a, $b, $c) = @d;
where the righthand side could be a constructed list (using ','), an array, a function returning a list, or any other expression returning a list.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perldata.html#List-value-constructors
[Note: Although I don't believe the online documentation mentions this, based on this code it appears that you can use Blosxom URLs that identify months by their three-letter abbreviations instead of month numbers; so, for example, rather than identifying the date as /2004/01/31 it appears that you could request it as /2004/Jan/31.
If so, there's no danger in mistaking a month abbreviation for a category name since the month must be preceded by a four-digit year, and Blosxom stops parsing the URL for categories as soon as it hits a component starting with a digit.]
lc, ucfirst, and undef functionsThis statement can be paraphrased as follows: if $path_info_mo has a (non-empty) value, then check to see if that value is a string with (at least) two digits (i.e., it matches the regular expression \d{2}); if so, assign the value of $path_info_mo to $path_info_num. If $path_info_mo has a value that doesn't contain two digits, then put the value in "initial cap" form and look it up in the %month2num hash to see if the value is a month abbreviation; if so, assign the month number from the hash to $path_info_num.
If the value of $path_info_mo doesn't look like a month number or month abbreviation, or if it's empty or undefined, then $path_info_mo_num is undefined as well.
The function lc returns the lower-case equivalent of its string argument, and the ucfirst function returns a copy of its argument with the first letter (only) capitalized. Hence ucfirst(lc 'jaN') returns the value 'Jan', which is the capitalization style used in %month2num.
The function undef returns an undefined value that (as in this case) can be assigned to a variable.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlreref.html#CHARACTER-CLASSES
http://www.perldoc.com/perl5.8.4/pod/perlreref.html#QUANTIFIERS
http://www.perldoc.com/perl5.8.4/pod/func/lc.html
http://www.perldoc.com/perl5.8.4/pod/func/ucfirst.html
http://www.perldoc.com/perl5.8.4/pod/func/undef.html
The template subroutine is used to look for and return the contents of flavour template files (e.g., head.html, foot.html, etc.). It can be overridden by a plugin that defines its own template subroutine; see the notes for line 161.
sub { ... } defines an "anonymous" (i.e., not named) subroutine, a reference to which is then assigned to the variable $template. (References are basically names that can be used to refer to variables and subroutines, and are the third type of value that a scalar variable can have, along with numbers and strings.) The subroutine can then be called using the syntax &$template() where you can put subroutine arguments inside the parentheses.
The template subroutine is defined in this way (using a reference stored in a variable rather than a named subroutine) so that the subroutine can be overridden; a plugin can define its own template subroutine, and a reference to that can be assigned to $template, replacing the reference to the original subroutine defined here.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlsub.html
http://www.perldoc.com/perl5.8.4/pod/perlref.html
$flavour is the flavour for which we are looking, e.g., 'html', 'rss', etc. $chunk is the type of template we are looking for, e.g., 'head', 'foot', 'story', etc. $path is the directory at which we should start our search, expressed as a relative pathname relative to the Blosxom data directory.
@_Arguments to the subroutine are passed in a special array variable @_, with the first three elements of that assigned to the private variables $path, $chunk, and $flavour respectively.
do while loopA do while loop is like a while loop except that the condition is checked at the bottom (after the loop is executed at least once) instead of at the top.
(The similarity between while and do while loops is only superficial, since in Perl the do {...} while construct isn't considered to be a true loop. In particular, you can't put next and last statements within a do {...} while; see the notes for lines 141 and 240.)
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/do.html
join and open functionsThe following statement is basically a backwards if statement: First we use the FileHandle $fh (created above) and try to open a template file for read access ("<"), constructing a template pathname from the values of $datadir, $path, $chunk, and $flavour. So, for example, if $datadir is '/blosxom', $path is '/a/b', $chunk is 'head', and $flavour is 'html', we look for a flavour template file '/blosxom/a/b/head.html'.
(Because the FileHandle module provides an object-oriented interface, we use the method invocation $fh->open(...) rather than the function call open($fh, ...). Also note that if we have already opened a file using the FileHandle $fh that file will be closed first before we open a new one.)
If the open succeeds (i.e., the template file exists and is readable) then we read in all the lines of the template file using the $fh FileHandle and return a string containing all those lines concatenated together.
(<$fh> would normally read only one line of the file, but using the join function causes <$fh> to be used in a list context -- because join expects a list as its second argument -- and that causes <$fh> to read all lines and return them as an array, with each array element being a newline-terminated line. The join function then returns a string consisting of all the array elements concatenated together separated by the join function's first argument, which in this case happens to be the empty string. So the returned result is a single string containing all the lines in the flavour template file, each terminated by a newline, e.g.,
<html>\n<body>\n<h1>A Blog</h1>\n...
for a typical head section.)
For more information see the following URLs:
http://search.cpan.org/~nwclark/perl-5.8.4/lib/FileHandle.pm
http://www.perldoc.com/perl5.8.4/pod/perlopentut.html
http://www.perldoc.com/perl5.8.4/pod/func/open.html
http://www.perldoc.com/perl5.8.4/pod/func/join.html
http://www.perldoc.com/perl5.8.4/pod/func/return.html
If the open fails (e.g., there was no file at the location we looked) then we modify the value of $path by stripping off the last path component (e.g., if $path has the value /a/b we change it to /a) and then we go back to the top of the loop and try the open again. (In other words, we search for the template file in the parent directory of the directory we just looked in.)
(To explain the regular expression a bit: '\/' matches a literal '/' and '[^\/]' matches anything but a slash, so '\/*[^\/]*' matches zero or more '/' characters followed by zero or more other characters. The regular expression \/*[^\/]*$ means look for this pattern at the end of the string, so that when the substitution is done -- replacing the matched pattern by an empty string -- it removes the last component of $path. Finally, we use parentheses to save the matched pattern in the $1 variable for later checking, hence (\/*[^\/]*)$ is the final regular expression used.)
If we never succeed in opening a template file then the loop ends when all the path components have been removed, the matched pattern is an empty string so that $1 is empty and hence false, and the and test fails.
If we never succeed in opening a template file (i.e., we drop out of the do while loop) then we return a string consisting of lines from a flavour template already stored in a multidimensional hash, using $flavour and $chunk as keys. (This hash is defined below; recall that right now we are defining the subroutine, not executing it. See the notes for line 144 for more information.)
We set the %template hash variable to contain nothing, i.e., no keys and no values.
Read in and store the default templates defined in the data section of this file, saving them in %template.
<DATA><DATA> causes lines to be read from the data section of this file (i.e., blosxom.cgi). The data section starts after a line consisting of __DATA__ by itself. In this context <DATA> returns a line at a time, returning an undefined value (and thus ending the while loop) when we reach the end of the file.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perldata.html
__END__ and lastUsing <DATA> would continue to read lines after __DATA__ until the end of the blosxom.cgi file. However in our case we may want to put some additional text after the __END__ line (which marks the end of what the Perl compiler parses). We therefore explicitly check for the presence of __END__ on a line by itself, and if we find it we use the last command to exit the while loop immediately.
Note that since we are not using the =~ operator the string pattern match is done against the special variable $_ that holds the line just read from the data section using <DATA>.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlsyn.html#Loop-Control
Note that there are a couple of subtle points about the test for __END__. First, the test is actually for either zero or one occurrence of __END__, so the test would succeed (and reading of data end) if the __DATA__ section contained a blank line at some point. Second, the pattern match requested is for __END__ starting from the beginning of the line (^) and ending at the end of the line ($), with nothing else present. But the string being tested against (the value of the $_) variable does in fact have something else in it, namely a newline at the end of the string.
Why then does the test work? Because as noted in the Perl online documentation, "the '^' character is guaranteed to match only the beginning of the string, the '$' character only the end (or before the newline at the end), ..." (emphasis added). In other words, the newline at the end of $_ is ignored for the purpose of matching the specified pattern /^(__END__)?$/.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/perlre.html#Regular-Expressions
As can be seen by looking at the data sections below, the default templates are each defined as a single line containing the flavour, the type of template, and the template data itself, each field separated by whitespace. We therefore parse each line of the data section into three whitespace-separated fields, and then assign the values to the private variables $ct, $comp, and $txt respectively.
In the code thus far we have seen pattern matching done in a scalar context; in that context a pattern match will return the number of matches found, or zero if no match exists. However here the pattern match is being done in a list context because of the assignment to ($ct, $comp, $txt). (Recall that this is comparable to an assignment of the form @a = ... where @a is an array variable.)
When done in a list context a pattern match will return an array ($1, $2, ...) containing the parts of the string that were matched. Hence in this context $ct will be assigned the value of $1, $comp will be assigned the value of $2, and $txt will be assigned the value of $3.
To expand a bit on the regular expression: \s matches a whitespace character (space, tab, etc.) and \S matches a non-whitespace character. The first field gets matched by ^(\S+), the second field gets matched by (\S+), and the third field (which can contain spaces) gets matched by (.*)$; the field patterns are then separated by the \s pattern.
[Note: The regular expression looks for a single whitespace character between the fields. On each line in the data section there is in fact only a single space between the flavour specifier and the template type specifier, on each line, so this works out OK. However on some lines there is more than one space between the template type specifier and the template content. This does not cause any problem in practice, since the pattern for the third field can match spaces; the extra spaces are simply included as leading whitespace in the value matched for the third field and then assigned to $txt.]
We modify $txt to change literal occurrences of '\n' (i.e., the '\' character followed by the character 'n') to occurrences of the newline character.
'\\' in the pattern being searched for matches for a literal '\', and '\n' in the replacement string is interpreted as a newline character. The g option does a global search and replace as noted above while the m option searches in multiline mode.
Multiline mode treats the string as a multiline buffer, so you can use '^' and '$' to match at the beginning and end of newline terminated substrings within the string as a whole.
[Note: It's not exactly clear why multiline mode is used in this context, particularly since the regular expression doesn't use either '^' or '$'; in testing the substitution seemed to work fine even without the m option.]
We store the default flavour template text read from the data section, indexing it by the flavour and type of content.
The usage $a{$b}{$c} is an example of the use of Perl references to simulate multi-dimensional arrays or nested hashes. To expand on this: the syntax $a{$b}{$c} is equivalent to $a{$b}->{$c}, which in turn is equivalent to ${$a{$b}}{$c}. Here %a is a hash, the value of $b is a key for that hash, and the hash value $a{$b} is a reference that points to another hash. (The second hash is anonymous, i.e., it has no name of its own.) To refer to a value in the second hash we use ${$a{$b}}{$c} where the value of $c is a key in the second hash. As noted above we can also use the syntax $a{$b}->{$c} instead, and can in turn shorten that to $a{$b}{$c}.
When we make an assignment like $a{$b}{$c} = "def" Perl automagically creates the anonymous hash and stores a reference to it in $a{$b}. If Perl didn't do this then you'd have to go through the following machinations to make the same assignment (assuming that the hash %a already existed):
%h = (); # Create an empty hash %h
$h{$c} = "def"; # Store value "def" in %h at key $c
$a{$b} = \%h; # Store reference to %h in hash %a at key $b
In this example the value could then be referenced as either $h{$c} or ${$a{$b}}{$c}. Per the online Perl documentation, "Anywhere you'd put an identifier ... as part of a variable ... name, you can replace the identifier with a simple scalar variable containing a reference of the correct type". So we are replacing the identifier "h" in $h{$c} with the scalar variable $a{$b} that contains a hash reference. We could actually use the syntax $$a{$b}{$c} for this but we use the extra pair of curly braces to clarify what's going on. ${$a{$b}}{$c} then becomes $a{$b}{$c} through the alternative syntax discussed above.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/perlreftut.html
http://www.perldoc.com/perl5.8.4/pod/perlref.html#Using-References
opendir functionIf there's a plugin directory defined we open it and look for plugins, using the file handle PLUGINS; we use the opendir function instead of open because we are opening a directory, not a regular file.
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/opendir.html
readdir, grep, and -f functions and the foreach loopWorking backwards from the end of the statement: We use the readdir function to return a list of all the entries in the plugin directory, and then use the sort function to sort those entries in the default (alphabetical) order. (readdir returns all directory entries because it's being executed in a list context, since sort expects a list argument; otherwise readdir would return one directory entry at a time.)
We then use the grep function to test each of the sorted directory entries against the specified expression (in curly braces) and return a list consisting of only those entries for which the expression is true. In this case the expression for grep is a compound expression consisting of a regular expression and a file test function anded together.
We first test using /^\w+$/ to make sure that the directory entry starts with and contains only alphanumeric characters or '_'; this eliminates directory entries for . (the current directory), .. (the parent directory), and hidden files (e.g., .a). (Note that we don't use the =~ operator here because we are matching against the special variable $_ that grep sets in turn to hold the value of each element of the list passed to it.)
We then test using the file test function -f "$plugin_dir/$_" to verify that the directory entry actually is a file and not something else; this eliminates directory entries for the plugin state directory and other subdirectories that might be present, as well as directory entries for special files like device files, named pipes, and the like. (Again we reference the special $_ variable set by grep.)
[Note: Symbolic links do pass the -f test (at least on Unix and Unix-like systems) if (and only if) they point to regular files. Unless other considerations apply, this should allow you to put a plugin file in another directory and put a symlink in the plugin directory itself.]
Finally, we use a foreach loop to iterate over each element in the list of plugins, assigning the value of each element to the variable $plugin in turn and executing the statements in the following code block.
For more information see the following URLs:
http://www.perldoc.com/perl5.8.4/pod/func/readdir.html
http://www.perldoc.com/perl5.8.4/pod/func/grep.html
http://www.perldoc.com/perl5.8.4/pod/func/-X.html
http://www.perldoc.com/perl5.8.4/pod/perlsyn.html#Foreach-Loops
http://www.perldoc.com/perl5.8.4/pod/perlvar.html
Recall that plugins can have a (normally two-digit) number at the beginning of their names (to enforce a particular plugin order) and can also have an underscore character ('_') at the end of their names to disable them from being used.
Here we use a regular expression to match and save the actual plugin name and look for a concluding '_' if present. (We no longer need the numeric prefix since we are now processing the plugins in the proper sort order.) Note that the regular expression as written allows underscores to be used as part of the plugin name itself; only an underscore at the end is special.
The plugin name and the (optional) trailing underscore are saved in the special variables $1 and $2 and then assigned to the private variables $plugin_name and $off respectively. (See the note to line 142 for more information on pattern matching in a list context.)
If the final underscore is present ($off has the value '_') we set $on_off to -1 to indicate that the plugin is disabled; otherwise $on_off is set to 1 to indicate an active plugin.
require functionWe include the code for the current plugin. (This is somewhat analogous to #include in C.) Note that since we are supplying a pathname the require function will look for the plugin at the pathname (instead of looking in the directories specified by @INC, the Perl search path analogous to LD_LIBRARY_PATH and similar environment variables in Unix.)
[Note: The Perl online documentation for require mentions only searching in @INC directories for a filename, and does not explicitly address using a full pathname. This is presumably just an oversight.]
For more information see the following URL:
http://www.perldoc.com/perl5.8.4/pod/func/require.html
Now that the code for this plugin has been loaded we can call subroutines defined in the plugin. We first call the plugin's start routine, using the plugin's name in a method invocation (see below). Assuming that the start routine exists and returns a true value, we then use the plugin's name as a key to put the plugin's $on_off value into the %plugins hash. Finally, we create a new element in the @plugins array and set its value to the plugin name. (Recall that %plugins and @plugins are entirely different variables that just happen to share the same name.)
Note that we set $on_off to the value -1 for off instead of 0 because otherwise the middle expression (between the two and's) would have evaluated false, and we would never have executed the third expression to set @plugins.
For those wanting a more in-depth explanation, calling the start routine works as follows:
A plugin "abc" has to define a package abc, as noted in the Blosxom plugin developer documentation. So as a result of the "abc" plugin being loaded (by require) we can now refer to subroutines and variables defined by the package. (Strictly speaking we can't refer to everything defined by the package, but let's ignore that for now.) For example, if a scalar variable $foo is defined by plugin "abc" (i.e., package abc) then we could refer to it as $abc::foo to obtain its value. Similarly we could call the start subroutine in package abc using the notation abc::start().
However we have a problem: the Blosxom code doesn't know beforehand that there's going to be a plugin "abc" (or "foo", or whatever), so the Blosxom code can't use abc::start() to invoke package abc's start subroutine. The solution is to use a different way to call a routine defined in a plugin: Blosxom invokes abc::start as a method rather than calling it as a subroutine.
Methods are a concept from object-oriented (OO) programming, in which (in theory) everything of interest is an "object", objects can belong to "classes", classes can have "methods" that operate on objects of that class, classes can be "subclasses" of higher-level classes, and so on.
For Blosxom (at least Blosxom 2.0) we don't need to worry about the full OO story, we simply need to know that in Perl terms an object is just a reference, a class is simply a package and a method is a subroutine defined by a package. So in our example rather than using abc::start() to call the start subroutine in package abc, we can use the method invocation notation abc->start() instead. (Method invocation doesn't work exactly like subroutine calling, particularly in terms of which arguments are passed, but we can ignore that for now.)
However we still have the problem of Blosxom not knowing about package abc beforehand, so using abc->start() won't work either. Fortunately in method invocation instead of a package identifier to the left of the -> we can substitute a scalar variable whose value is a string representing a valid package name. In particular, rather than using abc->start() to invoke the start subroutine (using the package identifier abc), we can set a scalar variable $foo to the value "abc", and then use $foo->start() to invoke the subroutine. (We're using $foo as an example; Blosxom actually uses the variable $plugin_name previously assigned.)
For more information see the following URLs:
http://www.blosxom.com/documentation/developers/plugins.html
http://www.perldoc.com/perl5.8.4/pod/perlobj.html#Method-Invocation
[Note: (This is for people like me who get led astray reading Perl documentation.) The usage $foo->start()