## Subroutines for converting R documentation into text, HTML, LaTeX and ## R (Examples) format ## Copyright (C) 1997-2009 R Development Core Team ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2, or (at your option) ## any later version. ## ## This program is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## A copy of the GNU General Public License is available at ## http://www.r-project.org/Licenses/ ## Send any bug reports to r-bugs@r-project.org. package R::Rdconv; require Exporter; @ISA = qw(Exporter); @EXPORT = qw(Rdconv); use File::Basename; use FileHandle; use Text::DelimMatch; use Text::Tabs; use Text::Wrap; use R::Utils; use R::Vars; if($main::opt_dosnames) { $HTML = ".htm"; } else { $HTML = ".html"; } ## Names of unique text blocks, these may NOT appear MORE THAN ONCE! @blocknames = ("name", "title", "usage", "arguments", "format", "description", "details", "value", "references", "source", "seealso", "examples", "author", "note", "synopsis", "docType", "encoding", "Rdversion"); ## These may appear multiply but are of simple structure: @multiblocknames = ("alias", "keyword"); ## These should NOT contain letters from $LATEX_SPEC $NB = "normal-bracket"; $BN = "bracket-normal"; $EOB = "escaped-opening-bracket"; $ECB = "escaped-closing-bracket"; $ID = "$NB\\d+$BN"; ## regex for escaped {...} $EPREFORMAT = "this-is-preformat-code"; $ECODE = "this-is-escaped-code"; $LATEX_SPEC = '\$\^&~_#';#-- these **should** be escaped in text2latex(.) $LATEX_SPECIAL = $LATEX_SPEC . '%\{\}\\\\'; ## $MD = ',,,Math,del;;;'; #-- should NOT contain any characters from $LATEX_.. ## $Math_del = "\$"; #UNquoted '$' $MAXLOOPS = 10000; my $EDASH = "escaped-dash"; # maybe something better? my $ECMD = "escaped-command"; # maybe something better? ## In addition to \code, the following commands are special: dashes in ## their arguments need to be left alone (otherwise, e.g. \samp{--no} ## would give '-no' when converted to text). my @special_commands = ("command", "env", "file", "kbd", "option", "samp", "url", "var"); sub isNonASCII { return $_[0] =~ /[^A-Za-z0-9[:punct:][:space:]]/ } sub Rdconv { # Rdconv(foobar.Rd, type, debug, filename, pkgname, version, def_encoding) $Rdname = $_[0]; open(my $rdfile, "<$Rdname") or die "Rdconv(): Couldn't open '$Rdname': $!\n"; ## This was not previously being closed: now closed when ## goes out of scope. $type = $_[1]; $debug = $_[2]; $pkgname = $_[4]; $version = $_[5]; $def_encoding = $_[6]; $def_encoding = "unknown" unless $def_encoding; $Rdfile = basename($Rdname); $Rdversion = "1.0"; # the default if($type !~ /,/) { ## Trivial (R 0.62 case): Only 1 $type at a time ==> one ## filename is ok. ## filename = 0 ==> use stdout $htmlfile = $txtfile = $Sdfile = $latexfile = $Exfile = $chmfile = $_[3]; $Rdname = basename($Rdname, (".Rd", ".rd")); } else { ## Have ',' in $type: Multiple types with multiple output files $dirname = $_[3]; # The super-directory, such as # '/library/' die "Rdconv(): '$dirname' is NOT a valid directory: $!\n" unless -d $dirname; $Rdname = basename($Rdname, (".Rd", ".rd")); $htmlfile = file_path($dirname, "html", $Rdname . $HTML) if $type =~ /html/i; $txtfile= file_path($dirname, "help", $Rdname) if $type =~ /txt/i; $latexfile = file_path($dirname, "latex", $Rdname . ".tex") if $type =~ /tex/i; $Exfile = file_path($dirname, "R-ex" , $Rdname . ".R") if $type =~ /example/i; } $max_bracket = 0; $max_section = 0; undef $complete_text; undef %blocks; undef @section_body; undef @section_title; ## Remove comments (everything after a '%') and CR in CRLF ## terminators. my $skip_level; my @skip_state; my $skip; while(<$rdfile>){ $_ = expand $_; s/\r//; ## ## Copied from Rdtools::Rdpp() so that nested conditionals are ## handled correctly. Should really *call* Rdpp() instead. ## Known OSdirs are actually ASCII, so this test is OK if (/^#ifdef\s+([A-Za-z0-9]+)/o) { $skip = $1 ne $main::OSdir; $skip_level += $skip; push(@skip_state, $skip); next; } if (/^#ifndef\s+([A-Za-z0-9]+)/o) { $skip = $1 eq $main::OSdir; $skip_level += $skip; push(@skip_state, $skip); next; } if (/^#endif/o) { $skip_level -= pop(@skip_state); next; } next if $skip_level > 0; if (/^\s*%/o) { ## replace comment lines by blank lines, to keep line nos. $complete_text .= "%blank_line\n"; } else { my $loopcount = 0; while(checkloop($loopcount++, $_, "\\%") && s/^\\%|([^\\])((\\\\)*)\\%/$1$2escaped_percent_sign/go) {}; s/^([^%]*)%.*$/$1/o; s/escaped_percent_sign/\\%/go; $complete_text .= $_; } } printf STDERR "-- read file '%s';\n",$_[0] if $debug; ## don't want encoding unless non-ASCII $def_encoding = "unknown" unless &isNonASCII($complete_text); ## extract Rdversion early $complete_text =~ /\\Rdversion\{([0-9.]+)\}/; $Rdversion = $1; macro_subs(); mark_brackets(); ## now remove comment-only lines $complete_text =~ s/%blank_line\n//sgo; ##HARD Debug:print "$complete_text\n"; exit; escape_preformats(); escape_codes(); if($debug) { print STDERR "\n--------------\nescape codes: '\@ecodes' =\n"; while(my($id,$code) = each %ecodes) { print STDERR "\t\$ec{$id}='$code'\n"; } } if($type) { ##-- These may be used in all cases : @aliases = get_multi($complete_text,"alias"); @keywords= get_multi($complete_text,"keyword"); get_blocks($complete_text); ## these are intentionally global $Rdversion = $blocks{"Rdversion"} if defined $blocks{"Rdversion"}; $verbatimdollar = $Rdversion >= 1.1; $unescape_amp = $Rdversion < 1.1; print STDERR "-- Rd version is $Rdversion\n" if $debug; if($type =~ /html/i || $type =~ /txt/i || $type =~ /tex/i || $type =~ /chm/i ) { get_sections($complete_text); } elsif($type =~ /example/i ) { ; } else { warn "\n** Rdconv --type '..' : no valid type specified\n"; } ## Remove empty sections. foreach my $key (keys %blocks) { if($blocks{$key} =~ /^[[:space:]]*$/) { warn "Note: removing empty section \\${key} in file '$Rdfile'\n"; delete $blocks{$key}; } } my ($section, $title, @nonempty); for($section = 0; $section < $max_section; $section++) { if($section_body[$section] =~ /^[[:space:]]*$/) { $title = $section_title[$section]; warn "Note: removing empty section \\section\{$title\} in file '$Rdfile'\n"; } else { push(@nonempty, $section); } } @section_title = @section_title[@nonempty]; @section_body = @section_body [@nonempty]; $max_section = scalar(@section_title); $issue_warnings = 1; $issue_warnings = 0 if $Rdversion >= 1.1; rdoc2html($htmlfile, $def_encoding) if $type =~ /html/i; rdoc2txt($txtfile, $def_encoding) if $type =~ /txt/i; rdoc2latex($latexfile, $def_encoding) if $type =~ /tex/i; rdoc2chm($chmfile, $def_encoding) if $type =~ /chm/i; while($text =~ /$EPREFORMAT($ID)/){ my $id = $1; my $ec = $epreformat{$id}; $text =~ s/$EPREFORMAT$id/$ec/; } rdoc2ex($Exfile, $def_encoding) if $type =~ /example/i; } else { warn "\n*** Rdconv(): no type specified\n"; } } sub checkloop { my $loopcount = $_[0]; my $text = $_[1]; my $what = $_[2]; if($loopcount > $MAXLOOPS){ while($text =~ /$ECODE($ID)/){ my $id = $1; my $ec = $ecodes{$id}; $text =~ s/$ECODE$id/$ec/; } $text = unmark_brackets($text); die("\n\n******* Syntax error: $what in\n/-----\n$text\\-----\n"); } 1; } sub macro_subs { # does macro substitution on $complete_text print STDERR "\n-- macro_subs:" if $debug; $complete_text =~ s/\\(R|dots|ldots)\{\}/\\$1/go if $Rdversion >= 1.1; $complete_text =~ s/\\linkS4class\{([^}]*)\}/\\link[=$1-class]{$1}/g; } ## Mark each matching opening and closing bracket with a unique id. ## Idea and original code from latex2html sub mark_brackets { $complete_text =~ s/^\\{|([^\\])((\\\\)*)\\{/$1$2$EOB/gso; $complete_text =~ s/^\\}|([^\\])((\\\\)*)\\}/$1$2$ECB/gso; print STDERR "\n-- mark_brackets:" if $debug; my $loopcount = 0; while(checkloop($loopcount++, $complete_text, "mismatched or missing braces") && $complete_text =~ /{([^{}]*)}/s) { my $id = $NB . ++$max_bracket . $BN; die "too many pairs of braces in file '$Rdfile'" if $max_bracket > $MAXLOOPS; $complete_text =~ s/{([^{}]*)}/$id$1$id/s; print STDERR "." if $debug; } # Any remaining brackets must be unmatched ones. # However, unmatched brackets are sometimes legal, # (e.g. \alias{{}), so only warn. # matching } for editors # But as from R 2.9.0 they are not needed and parse_Rd does not allow them, # so increase Note to Warning. if ($complete_text =~ /([{}])/s) { my $badlineno = 0 ; foreach my $line (split /\n/, $complete_text) { $badlineno++; if ($line =~ /([{}])/) { my $extra_info = "\'$1\'" ; $extra_info = "\'$1\'" if $line =~ /(\\\w+{)/ ; # } if( $extra_info =~ /^'}'$/ ) { warn "Warning: unmatched right brace in file '$Rdfile'". " on line $badlineno\n"; } elsif(! ($extra_info =~ /\\alias{/) ) { # } warn "Warning: unmatched brace ($extra_info) in file '$Rdfile'". " on line $badlineno\n"; } } } } } sub unmark_brackets { my $text = $_[0]; my $loopcount = 0; while(($loopcount++ < $MAXLOOPS) && $text =~ /($ID)(.*)($ID)/s) { $id = $1; if($text =~ s/$id(.*)$id/\{$1\}/s) { $text =~ s/$id(.*)$id/\{$1\}/so; } else{ # return $text; $text =~ s/$id/\{/so; } } $text =~ s/$EOB/\{/gso; $text =~ s/$ECB/\}/gso; $text; } sub escape_codes { print STDERR "\n-- escape_codes:" if $debug; my $loopcount = 0; while(checkloop($loopcount++, $complete_text, "while replacing all \\code{...}") && $complete_text =~ /\\code/) { my ($id, $arg) = get_arguments("code", $complete_text, 1); $complete_text =~ s/\\code$id(.*)$id/$ECODE$id/s; $ecodes{$id} = $1; print STDERR "," if $debug; } } sub escape_preformats { print STDERR "\n-- escape_preformats:" if $debug; my $loopcount = 0; while(checkloop($loopcount++, $complete_text, "while replacing all \\preformatted{...}") && $complete_text =~ /\\preformatted/ ){ my ($id, $arg) = get_arguments("preformatted", $complete_text, 1); $complete_text =~ s/\\preformatted$id(.*)$id/$EPREFORMAT$id/s; my $txt = $1; # strip spaces/tabs on last line from Rd formatting in emacs. $txt =~ s/[ \t]+$//; $epreformats{$id} = $txt; $found_any = 1; print STDERR "," if $debug; } $complete_text } ## Write documentation blocks such as title, usage, etc., into the ## global hash array %blocks. sub get_blocks { my $text = $_[0]; my $id=""; print STDERR "--- Blocks\n" if $debug; foreach $block (@blocknames){ if($text =~ /\\($block)($ID)/){ ($id, $blocks{$block}) = get_arguments($block, $text, 1); print STDERR "found: $block\n" if $debug; if((($block =~ /usage/) || ($block =~ /examples/))) { ## multiple empty lines to one $blocks{$block} =~ s/^[ \t]+$//; $blocks{$block} =~ s/\n\n\n/\n\n/gom; } else { ## remove leading and trailing whitespace $blocks{$block} =~ s/^\s+//so; $blocks{$block} =~ s/\s+$//so; $blocks{$block} =~ s/\n[ \t]+/\n/go; } # no formatting commands allowed in the title string if($block =~ /title/) { if($blocks{"title"} =~ /$ID/){ die("\nERROR in file '$Rdfile': Environment ". "(text enclosed in \{\}) found in \\title\{...\}.\n". "The title must be plain text!\n\n"); } } } } print STDERR "---\n" if $debug; } ## Get *ALL* multiblock things -- their simple arg. is put in array: sub get_multi { my ($text, $name) = @_; # name: "alias" or "keyword" my @res, $k=0; print STDERR "--- Multi: $name\n" if $debug; my $loopcount = 0; my $any = 0; while(checkloop($loopcount++, $text, "\\name") && $text =~ /\\$name$ID/) { my $id = $1; my ($endid, $arg) = get_arguments($name, $text, 1); print STDERR "found:" if $debug && $k==0; print STDERR " $k:$arg" if $debug; $arg =~ s/^\s*(\S)/$1/; $arg =~ s/\n[ \t]*(\S)/\n$1/g; $arg =~ s/\s*$//; if($arg) { $res[$k++] = $arg; } else { $any++; } $text =~ s/\\$name//s; } print STDERR "\n---\n" if $debug; warn "Note: ignoring empty \\${name} entries in file '$Rdfile'\n" if($any); @res; } ## Write the user defined sections into the global hashs @section_body ## and @section_title. sub get_sections { my $text = $_[0]; print STDERR "--- Sections\n" if $debug; my $loopcount = 0; while(checkloop($loopcount++, $text, "\\section") && $text =~ /\\section$ID/){ my $id = $1; my ($endid, $section, $body) = get_arguments_check("section", $text, 2); print STDERR "found: $section\n" if $debug; ## remove leading and trailing whitespace $section =~ s/^\s+//so; $section =~ s/\s+$//so; $body =~ s/^\s+//so; $body =~ s/\s+$//so; $body =~ s/\n[ \t]+/\n/go; $section_body[$max_section] = $body; $section_title[$max_section++] = $section; $text =~ s/\\section//s; } print STDERR "---\n" if $debug; } ## Get the arguments of a command. sub get_arguments { my ($command, $text, $nargs) = @_; ## Arguments of get_arguments: ## 1, command: next occurence of 'command' is searched ## 2, text: 'text' is the text containing the command ## 3, nargs: the optional number of arguments to be extracted; ## default 1 my @retval; ## Returns a list with the id of the last closing bracket and the ## arguments. if($text =~ /\\($command)(\[[^\]]+\])?($ID)/){ $id = $3; $text =~ s/$id(.*)$id/$id/s; $retval[1] = $1; my $k = 2; while(($k <= $nargs) && ($text =~ /$id($ID)/)){ ## this merely checked for a left or right brace: ## we need a left brace starting a pair for \eqn and \deqn $ie = $1; if($command =~ /eqn/) {last unless $text =~ /$id.*$ie.*$ie/s ;} $id = $ie; $text =~ s/$id\s*(.*)$id/$id/s; $retval[$k++] = $1; } } $retval[0] = $id; @retval; } sub get_arguments_check { my ($command, $text, $nargs) = @_; $keep = $text; if($text =~ /\\($command)(\[[^\]]+\])?($ID)/){ $warn = 0; $id = $3; $text =~ s/$id(.*)$id/$id/s; $retval[1] = $1; my $k = 2; while(($k <= $nargs) && ($text =~ /$id(\s*)($ID)/)){ $warn = 1 if (length($1) > 0); $id = $2; $text =~ s/$id\s*(.*)$id/$id/s; $retval[$k++] = $1; } } $retval[0] = $id; if ($warn && $issue_warnings) { $keep =~ s/\n/ /g; $keep =~ /(.*\s$id.*$id)/; $keep = unmark_brackets($1); warn "Warning: invalid whitespace before brace in file '$Rdfile' at '$keep'\n"; } @retval; } ## Get the argument(s) of a link. ## The return value is ($id, $arg, $dest, $opt) ## Here $arg is the argument in {} ## $dest=$arg unless it is of the form \link[=dest]{arg}. It is the ## topic to link to. ## $opt is empty unless the form is \link[opt]{arg}, so it has to be last. sub get_link { my ($text) = @_; my @retval, $id; if($text =~ /\\link\[=([^\]]+)\]($ID)/){ $retval[2] = $1; $id = $2; $text =~ /$id(.*)$id/s; $retval[1] = $1; } elsif($text =~ /\\link\[([^\]]+)\]($ID)/){ $retval[3] = $1; $id = $2; $text =~ /$id(.*)$id/s; $retval[1] = $retval[2] = $1; } elsif($text =~ /\\link($ID)/){ $id = $1; $text =~ /$id(.*)$id/s; $retval[1] = $retval[2] = $1; } $retval[0] = $id; @retval; } ## Print a short vector of strings (utility). sub print_vec { my($F, $nam, $do_nam, $sep, $end) = @_; my($i)=0; $sep = ', ' unless $sep; $end = ".\n" unless $end; print $F "\@$nam = " if $do_nam; foreach (@$nam) { print $F ($i>0 ? $sep : '') . "'$_'"; $i++ } print $F $end; } ## Print the hash %blocks ... for debugging only (I just insert this ## function manually at places where I need it :-) sub print_blocks { while(($block,$text) = each %blocks) { print STDERR "\n\n********** $block **********\n\n"; print STDERR $text; } print STDERR "\n"; } ## Drop the command and leave its inside argument, i.e., replace ## '\abc{longtext}' by 'longtext'. sub undefine_command { my ($text, $cmd) = @_; my $loopcount = 0; while(checkloop($loopcount++, $text, "\\$cmd") && $text =~ /\\$cmd(\[[^\]]+\])?$ID/) { my ($id, $arg) = get_arguments($cmd, $text, 1); $text =~ s/\\$cmd(\[.*\])?$id(.*)$id/$2/s; } $text; } ## Drop the command AND its inside argument, i.e., replace ## '_text1_\abc{longtext}-text2-' by '_text1_-text2-' sub drop_full_command { my ($text, $cmd) = @_; my $loopcount = 0; while(checkloop($loopcount++, $text, "\\$cmd") && $text =~ /\\$cmd(\[[^\]]+\])?$ID/) { my ($id, $arg) = get_arguments($cmd, $text, 1); $text =~ s/\\$cmd$id.*$id//s; } $text; } ## Replace the command and its closing bracket by $before and $after, ## respectively, e.g., replace '\abc{longtext}' by 'longtext'. sub replace_command { my ($text, $cmd, $before, $after) = @_; my $loopcount = 0; while(checkloop($loopcount++, $text, "\\$cmd") && $text =~ /\\$cmd(\[[^\]]+\])?$ID/) { my ($id, $arg) = get_arguments($cmd, $text, 1); $text =~ s/\\$cmd$id(.*)$id/$before$1$after/s; } $text; } # ditto, but add newline before $after unless it starts a new line # and if there is more than one line. sub replace_addnl_command { my ($text, $cmd, $before, $after) = @_; my $loopcount = 0; while(checkloop($loopcount++, $text, "\\$cmd") && $text =~ /\\$cmd(\[[^\]]+\])?$ID/) { my ($id, $arg) = get_arguments($cmd, $text, 1); $text =~ /\\$cmd$id(.*)$id/s; $arg = $1; if ($arg =~ /\n/m) { $arg = "\n" . $arg unless $arg =~ /^\n/m; $arg = $arg . "\n" unless $arg =~ /\n$/m; $text =~ s/\\$cmd$id(.*)$id/$before$arg$after/s; } else { $text =~ s/\\$cmd$id(.*)$id/$before$arg/s; } } $text; } ## Replace the command and its closing bracket by $before and $after, ## respectively, AND PREPEND a comment to each LINE. E.g., replace ## '\abc{line1\nline2\n....}' by '\n##line1\n##line2\n##....' sub replace_prepend_command { my ($text, $cmd, $before, $after, $prepend) = @_; my $loopcount = 0; while(checkloop($loopcount++, $text, "\\$cmd") && $text =~ /\\$cmd(\[[^\]]+\])?$ID/) { my ($id, $arg) = get_arguments($cmd, $text, 1); $text =~ /\\$cmd$id(.*)$id/s; $arg = $1; if ($prepend eq "" || $arg =~ /\n/m) { $arg = "\n" . $arg unless $arg =~ /^\n/m; $arg =~ s/^/$prepend/gmo;# prepend at all line beginnings $arg =~ s/^$prepend//; # but NOT the very beginning.. $arg = $arg . "\n" unless $arg =~ /\n$/m; $text =~ s/\\$cmd$id.*$id/$before$arg$after/s; } else { $text =~ s/\\$cmd$id.*$id/$before$arg/s; } } $text; } sub transform_command { ## Transform the command and its argument. (Only transforming the ## argument causes looping ...) my ($text, $cmd, $tcmd, $from, $to) = @_; my $loopcount = 0; while(checkloop($loopcount++, $text, "\\$cmd") && $text =~ /\\$cmd(\[[^\]]+\])?$ID/) { my ($id, $arg) = get_arguments($cmd, $text, 1); $text =~ /\\$cmd$id(.*)$id/s; $arg = $1; $arg =~ s/$from/$to/g; $text =~ s/\\$cmd$id.*$id/\\$tcmd$id$1$arg$id/s; } $text; } sub transform_S3method { ## \method{GENERIC}{CLASS} ## Note that this markup should really only be used inside \usage. ## NB: \w includes _ as well as [:alnum:], which R now allows in name my ($text) = @_; my ($method, $prefix, $match, $rest); my ($str, $ini, $fun, $cls, $lst, @args); my $delimround = new Text::DelimMatch("\\(", "\\)"); $delimround->quote('"'); $delimround->quote("'"); my $S3method_RE = "([ \t]*)\\\\(S3)?method" . "\{([\\w.]+)\}" . "\{([._[:alnum:]]+|`[^`]+`)\}"; while($text =~ /$S3method_RE(.*)/s) { $ini = $1; $fun = $3; $cls = $4; $method = "method"; ($prefix, $match, $rest) = $delimround->match($5); if(($prefix eq "") && ($rest =~ m/^[ \t]*<-/)) { ## (Note that the RHS should really be called 'value', and ## that we could check for a syntacticaly valid R name.) $method = "replacement method"; } if($cls eq "default") { $str = "$ini\#\# Default S3 $method:\n$ini$fun"; } else { $str = "$ini\#\# S3 $method for class '$cls':\n$ini$fun"; } $text =~ s/$S3method_RE/$str/s; } ## Also try to handle markup for S3 methods for subscripting and ## subassigning. $S3method_RE = "([ \t]*)\\\\(S3)?method" . "\{(\\\$|\\\[\\\[?)\}" . "\{([._[:alnum:]]+|`[^`]+`)\}" . "\\\(([^)]+)\\\)"; while($text =~ /$S3method_RE(.*)/s) { ## ## The hard part is to rewrite the argument list, because ## although something like ## Method for class 'foo': ## `[`(x, i, ..., drop = FALSE) ## is correct, the majority will certainly prefer something like ## Method for class 'foo': ## x[i, ..., drop = FALSE] ## This can be tricky if the argument list contains embedded ## parentheses (e.g., in default argument strings), so we use ## Text::DelimMatch. ## $ini = $1; $fun = $3; $cls = $4; $method = "method"; ## The match was for something ending in a pair of balanced ## parentheses, which are not necessarily the matching ones. ($prefix, $match, $rest) = $delimround->match("($5)$6"); $method = "replacement method" if($rest =~ m/^[ \t]*<-/); ## Extract the first argument from the argument list. substr($match, 1, -1) =~ m/\s*([^,]+),\s*(.*)/s; ## Now put things together. $str = "$ini\#\# S3 $method for class '$cls':\n$ini$1$fun$2"; $str .= "]" x length($fun) if($fun ne "\$"); $text =~ s/$S3method_RE.*/$str$rest/s; } ## Also try to handle markup for S3 methods for binary ops and S3 ## Ops group methods. ## Argh. When converting to HTML/SGML, ampersands get replaced by ## '&' entities early on ... my $amp = "\\\&"; if($type =~ /html/i || $type =~ /Ssgm/ || $type =~ /chm/i) { $amp = "\\\&"; } $S3method_RE = "([ \t]*)\\\\(S3)?method" . "\{(" . join("|", ("\\\+", "\\\-", "\\\*", "\\\/", "\\\^", "<=?", ">=?", "!=?", "==", $amp, "\\\|", "\\\%[[:alnum:][:punct:]]*\\\%")) . ")\}" . "\{([._[:alnum:]]+|`[^`]+`)\}" . "\\\(([^)]+)\\\)"; while($text =~ /$S3method_RE/) { $ini = $1; $fun = $3; $cls = $4; $lst = $5; $str = "$ini\#\# S3 method for class '$cls':\n$ini"; $method = "$2method"; @args = split(/,\s*/, $lst); my $nargs = scalar(@args); if(($nargs == 1) && ($fun eq "!")) { ## Unary: !. $str .= "$fun $args[0]"; } elsif(($nargs == 2) && ($fun ne "!")) { ## Binary: everything but !. $str .= "$args[0] $fun $args[1]"; } else { warn "Warning: arity problem for \\$method{$fun}{$cls} in file '$Rdfile'?\n"; $str .= "`$fun`($lst)"; } $text =~ s/$S3method_RE/$str/s; } $text; } sub transform_S4method { ## \S4method{GENERIC}{SIGLIST} ## Note that this markup should really only be used inside \usage. my ($text) = @_; my ($method, $prefix, $match, $rest); my ($str, $ini, $fun, $sig, @args); my $delimround = new Text::DelimMatch("\\(", "\\)"); $delimround->quote('"'); $delimround->quote("'"); my $S4method_RE = "([ \t]*)\\\\S4method" . "\{([\\w.]+)\}" . "\{((([._[:alnum:]]+|`[^`]+`),)*([._[:alnum:]]+|`[^`]+`))\}"; local($Text::Wrap::columns) = 60; while($text =~ /$S4method_RE(.*)/s) { $ini = $1; $fun = $2; $sig = $3; $method = "method"; ($prefix, $match, $rest) = $delimround->match($7); if(($prefix eq "") && ($rest =~ m/^[ \t]*<-/)) { ## (Note that the RHS should really be called 'value', and ## that we could check for a syntacticaly valid R name.) $method = "replacement method"; } $sig = &format_sig($sig); ## Note that we reformat the siglist so that wrapping becomes ## possible between siglist elements. $str = wrap("$ini\#\# ", "$ini\#\# ", "S4 $method for signature '$sig':\n") . "$ini$fun"; $text =~ s/$S4method_RE/$str/s; } ## Also try to handle markup for S4 methods for subscripting and ## subassigning. See transform_S3method() above for details. $S4method_RE = "([ \t]*)\\\\S4method" . "\{(\\\$|\\\[\\\[?)\}" . "\{((([._[:alnum:]]+|`[^`]+`),)*([._[:alnum:]]+|`[^`]+`))\}" . "\\\(([^)]+)\\\)"; while($text =~ /$S4method_RE(.*)/s) { $ini = $1; $fun = $2; $sig = $3; $method = "method"; ## The match was for something ending in a pair of balanced ## parentheses, which are not necessarily the matching ones. ($prefix, $match, $rest) = $delimround->match("($7)$8"); $method = "replacement method" if($rest =~ m/^[ \t]*<-/); ## Extract the first argument from the argument list. substr($match, 1, -1) =~ m/\s*([^,]+),\s*(.*)/s; ## Now put things together. $sig = &format_sig($sig); $str = wrap("$ini\#\# ", "$ini\#\# ", "S4 $method for signature '$sig':\n") . "$ini$1$fun$2"; $str .= "]" x length($fun) if($fun ne "\$"); $text =~ s/$S4method_RE.*/$str$rest/s; } $text; } sub format_sig { ## Add a space after each comma separating sig list entries. my ($str) = @_; my $out; while($str =~ m/^([._[:alnum:]]+|`[^`]+`),(.*)$/) { $out .= $1 . ", "; $str = $2; } $out . $str; } sub striptitle { # text my $text = $_[0]; $text =~ s/\\//go; $text =~ s/---/-/go; $text =~ s/--/-/go; return $text; } #==************************ HTML ******************************** sub rdoc2html { # (filename) ; 0 for STDOUT local $htmlout; local $encoding = $_[1]; if($_[0]) { $htmlout = new FileHandle; open $htmlout, "> $_[0]"; # will be closed when goes out of scope } else { $htmlout = "STDOUT"; } $using_chm = 0; $encoding = $blocks{"encoding"} if defined $blocks{"encoding"}; $encoding = mime_canonical_encoding($encoding) unless $encoding eq "unknown"; $encoding = "iso-8859-1" if $encoding eq "unknown"; print $htmlout (html_functionhead(html_striptitle($blocks{"title"}), $pkgname, &html_escape_name($blocks{"name"}), $encoding, )); html_print_block("description", "Description"); html_print_codeblock("usage", "Usage"); html_print_argblock("arguments", "Arguments"); html_print_block("format", "Format"); html_print_block("details", "Details"); html_print_argblock("value", "Value"); html_print_sections(); html_print_block("note", "Note"); html_print_block("author", "Author(s)"); html_print_block("source", "Source"); html_print_block("references", "References"); html_print_block("seealso", "See Also"); html_print_codeblock("examples", "Examples"); print $htmlout (html_functionfoot($pkgname, $version)); $issue_warnings = 0; } sub html_striptitle { ## Call striptitle(), and handle LaTeX single and double quotes, < and >. my ($text) = @_; # $text = striptitle($text); $text =~ s/\\//go; $text =~ s/---/—/go; $text =~ s/--/–/go; $text =~ s/\`\`/“/g; $text =~ s/\'\'/”/g; $text =~ s/\`([^']+)'/‘$1’/g; $text =~ s/\`/\'/g; # @samp{'} could be an apostrophe ... $text =~ s//>/g; $text; } sub html_escape_name { my ($text) = @_; $text = unmark_brackets($text); $text =~ s/\\%/%/g; $text =~ s/\\\\/\\/g; $text; } ## Convert a Rdoc text string to HTML, i.e., convert \code to etc. sub text2html { my $text = $_[0]; my $outerpass = $_[1]; my $inarglist = $_[2]; if($outerpass) { $text =~ s/&([^#])/&\1/go; # might have explicit &# in source $text =~ s/>/>/go; $text =~ s/\n/sgo; } else { $text =~ s/\n\s*\n/\n<\/p>\n

\n/sgo; } $text =~ s/\\dots/.../go; $text =~ s/\\ldots/.../go; if($Rdversion < 1.1) { $text =~ s/\\Gamma/Γ/go; $text =~ s/\\alpha/α/go; $text =~ s/\\Alpha/Α/go; $text =~ s/\\pi/π/go; $text =~ s/\\mu/μ/go; $text =~ s/\\sigma/σ/go; $text =~ s/\\Sigma/Σ/go; $text =~ s/\\lambda/λ/go; $text =~ s/\\beta/β/go; $text =~ s/\\epsilon/ε/go; $text =~ s/\\left\(/\(/go; $text =~ s/\\right\)/\)/go; $text =~ s/\\le/<=/go;# \le *after* \left ! $text =~ s/\\ge/>=/go; } $text =~ s/^\\R|([^\\])((\\\\)*)\\R/$1$2R<\/b><\/font>/go; foreach my $cmd (@special_commands) { $text = transform_command($text, $cmd, $ECMD . $cmd, "-", "$EDASH"); } ## ## Can we safely assume HTML 4 these days? ## (HTML 4.0 Specification last revised on 24-Apr-1998) ## See also below for single/double left/right quotes. ## $text =~ s/---/—/go; $text =~ s/---/—/go; ## $text =~ s/--/–/go; $text =~ s/--/–/go; ## foreach my $cmd (@special_commands) { $text = transform_command($text, $ECMD . $cmd, $cmd, "$EDASH", "-"); } $text =~ s/$EOB/\{/go; $text =~ s/$ECB/\}/go; } $text = undefine_command($text, "special"); $text = replace_command($text, "emph", "", ""); $text = replace_command($text, "bold", "", ""); $text = replace_command($text, "file", "‘", "’"); $text = replace_command($text, "strong", "", ""); $text = replace_command($text, "acronym", "", ""); $text = replace_command($text, "cite", "", ""); $text = replace_command($text, "command", "", ""); $text = replace_command($text, "dfn", "", ""); $text = replace_command($text, "env", "", ""); $text = replace_command($text, "kbd", "", ""); $text = replace_command($text, "option", "", ""); $text = replace_command($text, "pkg", "", ""); $text = replace_command($text, "samp", "", ""); $text = replace_command($text, "var", "", ""); $text = replace_command($text, "sQuote", "‘", "’"); $text = replace_command($text, "dQuote", "“", "”"); $text = html_tables($text); $text =~ s/\\cr/
/sgo; my $loopcount = 0; while(checkloop($loopcount++, $text, "\\link") && $text =~ /\\link(\[.*\])?$ID/){ my ($id, $arg, $dest, $opt) = get_link($text); ## fix conversions in key of htmlindex: my $argkey = $dest; $argkey =~ s/<//go; die "\nERROR in file '$Rdfile': command (e.g. \\url) inside \\link\n" if $arg =~ normal-bracket; $htmlfile = $main::htmlindex{$argkey}; if($htmlfile && !length($opt)){ if($using_chm) { if ($htmlfile =~ s+^$pkgname/html/++) { # in the same chm file $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } else { $tmp = $htmlfile; ($base, $topic) = ($tmp =~ m+(.*)/(.*)+); $base =~ s+/html$++; $htmlfile = mklink($base, $topic); # print "$htmlfile\n"; $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } } else { if ($htmlfile =~ s+^$pkgname/html/++) { # in the same html file $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } elsif ($htmlfile =~ s+^$pkgname\_[^/]*/html/++) { # in the same html file, versioned install $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } else { $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } } } else { $main::misslink = $main::misslink . " " . $argkey unless $opt ne ""; if($using_chm){ if($opt ne "") { my ($pkg, $topic) = split(/:/, $opt); $topic = $arg if $topic eq ""; $opt =~ s/:.*$//o; if($pkg ne $pkgname) { $htmlfile = mklink($opt, $topic . $HTML); } else { $htmlfile = "href=\"" . $topic . $HTML . "\""; } $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } else { $text =~ s/\\link(\[.*\])?$id.*$id/$arg/s; } } else{ if($opt ne "") { my ($pkg, $topic) = split(/:/, $opt); $topic = $arg if $topic eq ""; $htmlfile = $pkg."/html/".$topic.$HTML; if ($htmlfile =~ s+^$pkgname/html/++) { # in the same html file $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } elsif ($htmlfile =~ s+^$pkgname\_[^/]*/html/++) { # in the same html file, versioned install $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } else { $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } } else { $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } } } } $loopcount = 0; while(checkloop($loopcount++, $text, "\\email") && $text =~ /\\email$ID/){ my ($id, $arg) = get_arguments("email", $text, 1); $text =~ s/\\email$id.*$id/$arg<\/a>/s; } $loopcount = 0; while(checkloop($loopcount++, $text, "\\url") && $text =~ /\\url$ID/){ my ($id, $arg) = get_arguments("url", $text, 1); $text =~ s/\\url.*$id/$arg<\/a>/s; } ## Handle equations: $loopcount = 0; while(checkloop($loopcount++, $text, "\\eqn") && $text =~ /\\eqn$ID/){ my ($id, $eqn, $ascii) = get_arguments("eqn", $text, 2); $eqn = $ascii if $ascii; $text =~ s/\\eqn(.*)$id/$eqn<\/i>/s; } $loopcount = 0; while(checkloop($loopcount++, $text, "\\deqn") && $text =~ /\\deqn$ID/){ my ($id, $eqn, $ascii) = get_arguments("deqn", $text, 2); $eqn = $ascii if $ascii; $text =~ s/\\deqn(.*)$id/<\/p>

$eqn<\/i><\/p>

/s; } ## Handle encoded text: $loopcount = 0; while(checkloop($loopcount++, $text, "\\enc") && $text =~ /\\enc$ID/){ my ($id, $enc, $ascii) = get_arguments("enc", $text, 2); $text =~ s/\\enc(.*)$id/$enc/s; } $text = replace_command($text, "itemize", "

"); $text = replace_command($text, "enumerate", "
    ", "
"); $text =~ s/<\/p>\n

\s+\\item\s+/

  • /go; $text =~ s/\\item\s+/
  • /go; ## Handle '\describe': $text = replace_command($text, "describe", "
    ", "
    "); while(checkloop($loopcount++, $text, "\\item") && $text =~ /\\itemnormal/s) { my ($id, $arg, $desc) = get_arguments_check("item", $text, 2); my $descitem; $descitem = "
    " . text2html($arg, 0, $inarglist) . "
    "; $descitem .= "
    " . text2html($desc, 0, $inarglist) . "
    "; $text =~ s/\\itemnormal.*$id/$descitem/s; } if($outerpass) { if($Rdversion < 1.1) { $text =~ s/\\([^\\])/$1/go; #-drop single "\" (as in '\R') } $text =~ s/\\\\/\\/go; $text = html_unescape_codes($text); $text = unmark_brackets($text); $text =~ s/(^[\\])\\R(\{\})?/$1R<\/b><\/font>/go unless $unesape_amp; } $text; } sub code2html { my $text = $_[0]; $text =~ s/&/&/go; $text =~ s/>/>/go; $text =~ s/", ""); my $loopcount = 0; while(checkloop($loopcount++, $text, "\\link") && $text =~ /\\link(\[.*\])?$ID/){ my ($id, $arg, $dest, $opt) = get_link($text); ## fix conversions in key of htmlindex: my $argkey = $dest; $argkey =~ s/<//go; $argkey =~ s/&/&/go; $htmlfile = $main::htmlindex{$argkey}; if($htmlfile && !length($opt)){ if($using_chm) { if ($htmlfile =~ s+^$pkgname/html/++) { # in the same chm file $text =~ s/\\link(\[.*\])?$id.*$id/
    $arg<\/a>/s; } else { $tmp = $htmlfile; ($base, $topic) = ($tmp =~ m+(.*)/(.*)+); $base =~ s+/html$++; $htmlfile = mklink($base, $topic); $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } } else { my $uxfile = $htmlfile; if ($uxfile =~ s+^$pkgname/html/++) { # in the same html file $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } elsif ($uxfile =~ s+^$pkgname\_[^/]*/html/++) { # in the same html file, versioned install $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } else { $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } } } else{ $main::misslink = $main::misslink . " " . $argkey unless $opt ne ""; if($using_chm){ if($opt ne "") { my ($pkg, $topic) = split(/:/, $opt); $topic = $arg if $topic eq ""; $opt =~ s/:.*$//o; if($pkg ne $pkgname) { $htmlfile = mklink($opt, $topic . $HTML); } else { $htmlfile = "href=\"" . $topic . $HTML . "\""; } $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } else { $text =~ s/\\link(\[.*\])?$id.*$id/$arg/s; } } else { if($opt ne "") { my ($pkg, $topic) = split(/:/, $opt); $topic = $arg if $topic eq ""; $htmlfile = $pkg."/html/".$topic.$HTML; if ($htmlfile =~ s+^$pkgname/html/++) { # in the same html file $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } elsif ($htmlfile =~ s+^$pkgname\_[^/]*/html/++) { # in the same html file, versioned install $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } else { $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } } else { $text =~ s/\\link(\[.*\])?$id.*$id/$arg<\/a>/s; } } } } $text = replace_addnl_command($text, "dontrun", "## Not run: ", "## End(Not run)"); $text = drop_full_command($text, "testonly"); $text = drop_full_command($text, "dontshow"); $text = undefine_command($text, "donttest"); $text =~ s/\\\\/\\/go; $text = unmark_brackets($text); $text = transform_S3method($text); $text = transform_S4method($text); $text; } ## Print a standard block sub html_print_block { my ($block,$title) = @_; html_print_a_section($title, $blocks{$block}) if defined $blocks{$block}; } ## Print a code block (preformatted) sub html_print_codeblock { my ($block,$title) = @_; if(defined $blocks{$block}){ my $ntext = $blocks{$block}; if ($ntext =~ /$ECODE/) { warn "Warning: \\code inside code block in file '$Rdfile'\n" if $issue_warnings; } my $ntext = code2html($ntext); $ntext = html_unescape_codes($ntext); print $htmlout (html_title3($title), "
    " , $ntext, "
    \n\n"); } } ## Print the value or arguments block sub html_print_argblock { my ($block,$title) = @_; my $loopcount = 0; while(checkloop($loopcount++, $text, "escaped preformat") && $text =~ /$EPREFORMAT($ID)/){ my $id = $1; my $ec = code2html($epreformats{$id}); $text =~ s/$EPREFORMAT$id/
    $ec<\/pre>/;
        }
    
        if(defined $blocks{$block}){
    	print $htmlout (html_title3($title));
    
    	my $text = $blocks{$block};
    
    	## some people have put \itemize inside \value.
    	## as from R 2.8.1, strip with a warning, providing not after \item{
    	if($text =~ /\\item(ize|$ID)/) {
    	    if($1 eq "ize") {
    		warn "Warning: found \\itemize inside \\$block in file '$Rdfile'\n" if $issue_warnings;
    		$text =~ /\\itemize($ID)/;
    		$id = $1;
    		$text =~  s/\\itemize$id//;
    		$text =~ s/$id//;
    	    }
    	}
    
    	if($text =~ /\\item/s){
    	    $text =~ /^(.*)(\\item.*)*/s;
    	    my ($begin, $rest) = split(/\\item/, $text, 2);
    	    if($begin){
    		$text =~ s/^\Q$begin//s;
    		$begin =~ s/(\n)+$//;
    		print $htmlout "

    \n", text2html($begin, 1, 1), "\n

    \n"; } print $htmlout "\n"; my $loopcount = 0; while(checkloop($loopcount++, $text, "\\item") && $text =~ /\\item/s) { my ($id, $arg, $desc) = get_arguments_check("item", $text, 2); print $htmlout ("\n\n"); $text =~ s/.*$id//s; } print $htmlout "
    ", text2html($arg, 1, 1), "\n", text2html($desc, 1, 1), "
    \n"; my $rest = text2html($text, 1, 1); print $htmlout ("

    \n", $rest, "

    \n") if $rest; } else{ my $rest = text2html($text, 1, 1); print $htmlout ("

    \n", $rest, "

    \n") if $rest; } } } ## Print sections sub html_print_sections { my $section; for($section=0; $section<$max_section; $section++){ html_print_a_section(html_striptitle($section_title[$section]), $section_body[$section]); } } sub html_print_a_section { my ($title, $body) = @_; my $htmlbody = text2html($body, 1, 0); $htmlbody =~ s/

    \s*

    \s*<\/p>/<\/p>/g; ## attempt to close paragraphs tags, and remove spurious closings. ## next one gets thrown by the unclosed

  • tags. ## $htmlbody =~ s/([^>]\n+)<(table|dl|ul|ol)/\1<\/p>\n<\2/g; $htmlbody =~ s/<\/(table|dl|ul|ol|dd)>\n+<\/p>\n/<\/\1>\n\n/g; $htmlbody =~ s/<\/(table|dl|ul|ol)>\n+(\w|\n

    \n\2/g; $htmlbody =~ s/

    \s*<(table|dl|ul|ol|dt)/\n<\1/g; ## top and tail with paragraph tags if needed. $htmlbody = "

    \n". $htmlbody unless $htmlbody =~ /^<(table|dl|ul|ol)>/; $htmlbody .= "\n

    \n" unless $htmlbody =~ /<\/(table|dl|ul|ol)>\s*$/; ## remove empty paras $htmlbody =~ s/

    \s*<\/p>//g; print $htmlout (html_title3($title), $htmlbody, "\n"); } sub html_unescape_codes { my $text = $_[0]; my $loopcount = 0; while(checkloop($loopcount++, $text, "escaped code") && $text =~ /$ECODE($ID)/) { my $id = $1; my $ec = code2html($ecodes{$id}); $text =~ s/$ECODE$id/$ec<\/code>/; } my $loopcount = 0; while(checkloop($loopcount++, $text, "escaped preformat") && $text =~ /$EPREFORMAT($ID)/){ my $id = $1; my $ec = code2html($epreformats{$id}); $text =~ s/$EPREFORMAT$id/

    $ec<\/pre>/;
        }
    
        $text;
    }
    
    
    sub html_tables {
    
        my $text = $_[0];
    
        my $loopcount = 0;
        while(checkloop($loopcount++, $text, "\\tabular")
    	  &&  $text =~ /\\tabular/){
    
    	my ($id, $format, $arg)	 =
    	    get_arguments("tabular", $text, 2);
    
    	$arg =~ s/\n/ /sgo;
    
    	## remove trailing \cr (otherwise we get an empty last line)
    	$arg =~ s/\\cr\s*$//go;
    
    	## parse the format of the tabular environment
    	my $ncols = length($format);
    	my @colformat = ();
    	for($k=0; $k<$ncols; $k++){
    	    my $cf = substr($format, $k, 1);
    
    	    if($cf =~ /l/o){
    		$colformat[$k] = "left";
    	    }
    	    elsif($cf =~ /r/o){
    		$colformat[$k] = "right";
    	    }
    	    elsif($cf =~ /c/o){
    		$colformat[$k] = "center";
    	    }
    	    else{
    		die("Error in file '$Rdfile': unknown identifier \{$cf\} in" .
    		    " tabular format \{$format\}\n");
    	    }
    	}
    
    	## now do the real work: split into lines and columns
    	my $table = "\n";
    	my @rows = split(/\\cr/, $arg);
    	for($k=0; $k<=$#rows;$k++){
    	    $table .= "\n";
    	    my @cols = split(/\\tab/, $rows[$k]);
    	    die("Error in file '$Rdfile':\n  $rows[$k]\\cr\n" .
    		"does not fit tabular format \{$format\}\n")
    		if ($#cols != $#colformat);
    	    for($l=0; $l<=$#cols; $l++){
    		$table .= "  ";
    	    }
    	    $table .= "\n\n";
    	}
    	$table .= "
    $cols[$l]
    \n"; $text =~ s/\\tabular.*$id/$table/s; } $text; } sub html_title3 { my $title = $_[0]; "\n

    $title

    \n\n"; } ## The header & footer of a function page sub html_functionhead { my ($title, $pkgname, $name, $enc) = @_; my $retval = "\n" . "R: $title\n" . "\n" . "\n" . "\n\n"; if($pkgname){ $retval .= "" . "" . "
    $name {$pkgname}R Documentation
    "; } $retval .= "\n

    $title

    \n\n"; } sub html_functionfoot { my ($pkgname, $version) = @_; my $retval; if($HTML){ $retval .= "\n\n
    \n\n"; } $retval .= "\n"; } sub chm_functionhead { my ($title, $pkgname, $name, $enc) = @_; my $retval = "$title\n" . "\n" . "\n". "\n\n\n"; if($pkgname){ $retval .= "" . "" . "
    $name($pkgname)R Documentation
    "; } $retval .= "\n"; foreach(@aliases) { # print "alias: $_\n"; $retval .= "\n"; } $title =~ s/\"/'/go; #' $title =~ s/,//go; # commas seem to give problems $retval .= "\n" . "\n\n"; $retval .= "\n

    $title

    \n\n"; } #==************************** txt ****************************** use Text::Tabs qw(expand); sub rdoc2txt { # (filename, def_encoding); 0 for STDOUT local $txtout; if($_[0]) { $txtout = new FileHandle; open $txtout, "> $_[0]"; # will be closed when goes out of scope } else { $txtout = "STDOUT"; } local $encoding = $_[1]; $encoding = $blocks{"encoding"} if defined $blocks{"encoding"}; $encoding = latex_canonical_encoding($encoding) unless $encoding eq "unknown"; $INDENT = 3; # indent for \itemize and \enumerate first line $INDENTD = 0; # indent for \describe list first line $INDENTDD = 5; # indent for \describe list bodies if ($pkgname) { my $pad = 75 - length($blocks{"name"}) - length($pkgname) - 30; $pad = $pad - 2 - length($encoding) if $encoding ne "unknown"; $pad = int($pad/2); print $txtout &html_escape_name($blocks{"name"}), " " x $pad, "package:$pkgname", " " x $pad,"R Documentation"; print $txtout "($encoding)" if $encoding ne "unknown"; print $txtout "\n\n"; } print $txtout (txt_header(txt_striptitle($blocks{"title"})), "\n"); txt_print_block("description", "Description"); txt_print_codeblock("usage", "Usage"); txt_print_argblock("arguments", "Arguments"); txt_print_block("format", "Format"); txt_print_block("details", "Details"); txt_print_argblock("value", "Value"); txt_print_sections(); txt_print_block("note", "Note"); txt_print_block("author", "Author(s)"); txt_print_block("source", "Source"); txt_print_block("references", "References"); txt_print_block("seealso", "See Also"); txt_print_codeblock("examples", "Examples"); print $txtout "\n"; if($_[0]) { close $txtout; } $issue_warnings = 0; } sub txt_striptitle { ## Call striptitle(), and handle LaTeX style single/double quotes. my ($text) = @_; $text = striptitle($text); if(($R::Vars::OSTYPE eq "windows") && $ENV{"LC_ALL"} ne "C" && ($encoding eq "unknown" || $encoding eq "latin1")) { $text =~ s/\`\`/\x93/g; $text =~ s/\'\'/\x94/g; $text =~ s/\`([^']+)'/\x91$1\x92/g; $text =~ s/\`/\'/g; # } elsif (encoding eq "UTF-8") { # $text =~ s/\`\`/\xe2\x80\x9c/g; # $text =~ s/\'\'/\xe2\x80\xdc/g; # $text =~ s/`([^']+)'/\xe2\x80\x98$1\xe2\x80\x99/g; #` for emacs # $text =~ s/\`/\'/g; } else { $text =~ s/(\`\`|\'\')/\"/g; $text =~ s/\`/\'/g; } $text; } ## Underline section headers sub txt_header { my $header = $_[0]; $header =~ s/\\//go; ## '_' . join '_', split //, $header; my @letters = split //, $header; my $out = "", $a; for($l = 0; $l <= $#letters; $l++){ $a = @letters[$l]; if($a =~ /[[:alnum:]]/) { $out .= '_' . $a; } else { $out .= $a; } } return $out; } ## Convert a Rdoc text string to txt ## $_[0]: text to be converted ## $_[1]: (optional) indentation of paragraphs. default = $INDENT sub text2txt { my $text = $_[0]; if($_[1]){ my $indent = $_[1]; } else{ my $indent = $INDENT; } $text =~ s/^\.|([\n\(])\./$1\\\&./g; ## TABs are just whitespace $text =~ s/\t/ /g; $text = txt_tables($text); $text =~ s/\n\s*\n/\n\n/sgo; $text =~ s/\\dots/\\&.../go; $text =~ s/\\ldots/\\&.../go; $text =~ s/\\%/%/sgo; $text =~ s/\\\$/\$/sgo; if ($Rdversion < 1.1) { $text =~ s/\\Gamma/Gamma/go; $text =~ s/\\alpha/alpha/go; $text =~ s/\\Alpha/Alpha/go; $text =~ s/\\pi/pi/go; $text =~ s/\\mu/mu/go; $text =~ s/\\sigma/sigma/go; $text =~ s/\\Sigma/Sigma/go; $text =~ s/\\lambda/lambda/go; $text =~ s/\\beta/beta/go; $text =~ s/\\epsilon/epsilon/go; $text =~ s/\\left\(/\(/go; $text =~ s/\\right\)/\)/go; $text =~ s/\\le/<=/go; $text =~ s/\\ge/>=/go; } $text =~ s/\\R/R/go; foreach my $cmd (@special_commands) { $text = transform_command($text, $cmd, $ECMD . $cmd, "-", "$EDASH"); } $text =~ s/---/--/go; $text =~ s/--/-/go; foreach my $cmd (@special_commands) { $text = transform_command($text, $ECMD . $cmd, $cmd, "$EDASH", "-"); } $text =~ s/$EOB/\{/go; $text =~ s/$ECB/\}/go; $text = undefine_command($text, "special"); $text = undefine_command($text, "link"); $text = undefine_command($text, "textbf"); $text = undefine_command($text, "mathbf"); $text = undefine_command($text, "email"); $text = replace_command($text, "file", "'", "'"); $text = replace_command($text, "url", ""); $text = replace_command($text, "emph", "_", "_"); $text = replace_command($text, "bold", "*", "*"); $text = replace_command($text, "strong", "*", "*"); $text = undefine_command($text, "acronym"); $text = undefine_command($text, "cite"); $text = undefine_command($text, "dfn"); $text = replace_command($text, "command", "'", "'"); $text = replace_command($text, "env", "'", "'"); $text = replace_command($text, "kbd", "'", "'"); $text = replace_command($text, "option", "'", "'"); $text = replace_command($text, "pkg", "'", "'"); $text = replace_command($text, "samp", "'", "'"); ## ## Maybe this should uppercase its argument a la Texinfo? $text = undefine_command($text, "var"); ## if(($R::Vars::OSTYPE eq "windows") && $ENV{"LC_ALL"} ne "C" && ($encoding eq "unknown" || $encoding eq "latin1")) { $text = replace_command($text, "sQuote", "\x91", "\x92"); $text = replace_command($text, "dQuote", "\x93", "\x94"); # } elsif (encoding eq "UTF-8") { # $text = replace_command($text, "sQuote", "\xe2\x80\x98", "\xe2\x80\x99"); # $text = replace_command($text, "dQuote", "\xe2\x80\x9c", "\xe2\x80\x9d"); } else { $text = replace_command($text, "sQuote", "'", "'"); $text = replace_command($text, "dQuote", "\"", "\""); } ## Handle equations: my $loopcount = 0; while(checkloop($loopcount++, $text, "\\eqn") && $text =~ /\\eqn$ID/){ my ($id, $eqn, $ascii) = get_arguments("eqn", $text, 2); $eqn = $ascii if $ascii; $eqn =~ s/\\([^&])/$1/go; $text =~ s/\\eqn(.*)$id/$eqn/s; } $loopcount = 0; while(checkloop($loopcount++, $text, "\\deqn") && $text =~ /\\deqn$ID/) { my ($id, $eqn, $ascii) = get_arguments("deqn", $text, 2); $eqn = $ascii if $ascii; $eqn =~ s/\\([^&])/$1/go; $eqn =~ s/^\n*//o; $eqn =~ s/\n*$//o; $text =~ s/\\deqn(.*)$id/\n\n.DS B\n$eqn\n.DE\n\n/s; } ## Handle encoded text: my $loopcount = 0; while(checkloop($loopcount++, $text, "\\enc") && $text =~ /\\enc$ID/){ my ($id, $enc, $ascii) = get_arguments("enc", $text, 2); $enc = $ascii if $ascii; $enc =~ s/\\([^&])/$1/go; $text =~ s/\\enc(.*)$id/$enc/s; } $list_depth=0; $text = replace_command($text, "itemize", "\n\n.in +$INDENT\n", "\n\n.in -$INDENT\n"); $text = replace_command($text, "enumerate", "\n\n.inen +$INDENT\n", "\n\n.inen -$INDENT\n"); $text =~ s/\\item\s+/\n.ti * \n/go; ## Handle '\describe': $text = replace_command($text, "describe", "\n\n.in +$INDENTDD\n", "\n\n.in -$INDENTDD\n"); while(checkloop($loopcount++, $text, "\\item") && $text =~ /\\itemnormal/s) { my ($id, $arg, $desc) = get_arguments_check("item", $text, 2); my $descitem = text2txt($arg); my $ll = length($desc); $descitem =~ s/\n/ /go; # no NLs in items if($ll > 0) { $descitem = "\n.tide " . $descitem . " \n". text2txt($desc); } else { warn "Warning: missing text for item '$descitem' " . "in \\describe in file '$Rdfile'\n"; $descitem = "\n.tide " . $descitem . " \n \n" } $text =~ s/\\itemnormal.*$id/$descitem/s; } $text = txt_unescape_codes($text); $text = unmark_brackets($text); $text; } sub code2txt { my $text = $_[0]; $text =~ s/^\.|([\n\(])\./$1\\&./g; $text =~ s/\\%/%/go; $text =~ s/\\ldots/.../go; $text =~ s/\\dots/.../go; $text = undefine_command($text, "special"); $text = undefine_command($text, "var"); $text = undefine_command($text, "link"); $text = replace_addnl_command($text, "dontrun", "## Not run: ", "## End(Not run)"); $text = drop_full_command($text, "testonly"); $text = drop_full_command($text, "dontshow"); $text = undefine_command($text, "donttest"); $text = unmark_brackets($text); $text = transform_S3method($text); $text = transform_S4method($text); $text; } sub nounder { my ($text) = @_; $text =~ s/_//g; $text; } ## Modified from Text::Wrap to take account of underlines, not entab. sub Rwrap { my ($ip, $xp, @t) = @_; my $r = ""; my $columns = 72; my $t = expand(join(" ",@t)); my $lead = $ip; my $ll = $columns - length(nounder(expand($ip))) - 1; my $nll = $columns - length(nounder(expand($xp))) - 1; my $nl = ""; my $remainder = ""; $ll = 0 if($ll <= 0); ## Allow for wrapping after the expansion of $ip. if($t =~ s/^([^\n]{0,$ll})(\s|\Z(?!\n))//xm) { $r .= $nl . $lead . $1; $remainder = $2; } else { $r .= $nl . $lead; } $lead = $xp; $ll = $nll; $nl = "\n"; while ($t !~ /^\s*$/) { if ($t =~ s/^([^\n]{0,$ll})(\s|\Z(?!\n))//xm) { $r .= $nl . $lead . $1; $remainder = $2; } elsif ($t =~ s/([^\n]*?)(\s|\Z(?!\n))//xm) { ## Prefer overflow to wrap at fill column, cf Text::Wrap. $r .= $nl . $lead . $1; $remainder = $2; } else { print "$t\n"; die "This shouldn't happen"; } } $r .= $remainder; $r .= $lead . $t if $t ne ""; return $r; } ## generate wrapped text, zap empty lines to \n sub mywrap { my ($pre1, $pre2, $text) = @_; my $ntext; if(length($text) > 0) { $ntext = Rwrap($pre1, $pre2, $text); } else { $ntext = $pre1; } my @lines = split /\n/, $ntext; my $out = "", $line; foreach $line (@lines) { ## $line = expand $line; $line =~ s/^\s+$//o; $out .= $line . "\n"; } $out =~ s/\n$//; return $out; } ## Print text indent and filled: will put out a leading blank line. sub txt_fill { # pre1, base, "text to be formatted" my ($pre1, $base, $text) = @_; my $INDENT = $base; my $indent = " " x $INDENT; ## first split by paragraphs $text =~ s/\\\\/\\bsl{}/go; $text =~ s/\\&\./\./go; # unescape code pieces ## A mess: map & \& \\& \\\& to & & \& \& $text =~ s/\\&/&/go if $unescape_amp; $text =~ s/\\ / /go; $text =~ s/\\_/_/go if $unescape_amp; $text =~ s/\\$/\$/go if $unescape_amp; $text =~ s/\\#/#/go if $unescape_amp; $text =~ s/\\%/%/go; $text =~ s/\\bsl{}/\\/go; $text =~ s/escaped-code-backslash/\\/go; my @paras = split /\n\n/, $text; $indent1 = $pre1; $indent2 = $indent; my $enumlevel = 0, @enum; foreach $para (@paras) { ## strip leading white space $para =~ s/^\s+//; my $para0 = $para; $para0 =~ s/\n\s*/ /go; ## check for a item in itemize etc if ($para =~ s/^[\n]*\.ti //) { $indent1 = $indent; $indent2 = $indent1 . (" " x 3); if ($enum{$enumlevel} > 0) { $para =~ s/\*/$enum{$enumlevel}./; $enum{$enumlevel} += 1; } } ## check for a item in describe etc if ($para =~ s/^[\n]*\.tide ([^\n]+)\n//) { $indent1 = " " x ($INDENT - $INDENTDD) . txt_header($1); $indent2 = $indent; } ## check for .in or .inen command if ($para =~ s/^[\n]*\.in([^\ ]*) (.*)/\2/) { $INDENT = $INDENT + $para; $indent1 = $indent2 = $indent = " " x $INDENT; if ($para > 0) { $enumlevel += 1; if ($1 =~ /en/) { $enum{$enumlevel} = 1; } else { $enum{$enumlevel} = 0; } } else { $enumlevel -= 1; } ## check for a \deqn block } elsif ($para0 =~ s/^\s*\.DS B\s*(.*)\.DE/\1/) { $para0 =~ s/\s*$//o; if(length($para0) > 65) { print $txtout "\n", " ", $para0, "\n"; } else { my $shift = int((70 - length($para0))/2); print $txtout "\n", " " x $shift, $para0, "\n"; } ## check for a \tabular block } elsif ($para =~ s/^\.TS\n//) { my $format = $para; $format =~ s/([rlc]*)\n.*/$1/o; # parse the format of the tabular environment my $ncols = length($format); my @colformat = (); for($k=0; $k<$ncols; $k++){ my $cf = substr($format, $k, 1); if($cf =~ /l/o){ $colformat[$k] = "l"; } elsif($cf =~ /r/o){ $colformat[$k] = "r"; } elsif($cf =~ /c/o){ $colformat[$k] = "c"; } } my @colwidths, $colwidth, $left, $right; for($l = 0; $l < $#colformat; $l++){ $colwidths[$l] = 0; } # now do the real work: split into lines and columns # first scan them and get the field widths. $para =~ s/([^\n]*)\n//o; my @rows = split(/\\cr/, $para); my $tmp, $line = ""; for($k = 0; $k <= $#rows; $k++){ my @cols = split(/\\tab/, $rows[$k]); die("Error in file '$Rdfile':\n $rows[$k]\\cr\n" . "does not fit tabular format \{$format\}\n") if ($#cols != $#colformat); for($l = 0; $l <= $#cols; $l++){ $tmp = $cols[$l]; $tmp =~ s/^\s*//; $tmp =~ s/\s*$//; $colwidth = length($tmp); if ($colwidth > $colwidths[$l]) { $colwidths[$l] = $colwidth; } } } print $txtout "\n"; for($k = 0; $k <= $#rows; $k++){ $line = " "; # indent by two my @cols = split(/\\tab/, $rows[$k]); for($l = 0; $l <= $#cols; $l++){ $tmp = $cols[$l]; $tmp =~ s/^\s*//; $tmp =~ s/\s*$//; $colwidth = length($tmp); if ($colformat[$l] eq "r") { $left = $colwidths[$l] - $colwidth; } elsif ($colformat[$l] eq "c") { $left = int (($colwidths[$l] - $colwidth)/2); } else { $left = 0; } # 2 is the column gap $right = $colwidths[$l] - $colwidth + 2 - $left; if ($l == $#cols) { $right = 0; # don't need to right-pad end. } $line .= " " x $left . $tmp . " " x $right; } print $txtout $indent, "$line\n"; } ## plain text } else { $para =~ s/\n\s*/ /go; print $txtout "\n"; # Now split by \cr blocks my @blocks = split /\\cr/, $para; foreach $text (@blocks) { # $text =~ s/^\s+//o; print $txtout mywrap($indent1, $indent2, $text), "\n"; $indent1 = $indent2; } } } } ## Print a standard block sub txt_print_block { my ($block,$title) = @_; my $next; if(defined $blocks{$block}){ print $txtout "\n"; print $txtout txt_header($title), ":\n"; $ntext = text2txt($blocks{$block}); txt_fill(" ", 5, $ntext); } } ## Print a code block (preformatted) sub txt_print_codeblock { my ($block,$title) = @_; my $ntext; my $indent = " " x 5; if(defined $blocks{$block}){ print $txtout "\n"; print $txtout txt_header($title), ":\n" if $title; $ntext = code2txt($blocks{$block}); if ($ntext =~ /$ECODE/) { warn "Warning: \\code inside code block in file '$Rdfile'\n" if $issue_warnings; } # make sure there is precisely one leading "\n" $ntext =~ s/^[\n]*//go; $ntext = "\n". $ntext; $ntext =~ s/\\&\././go; $ntext = txt_unescape_codes($ntext); foreach $line (split /\n/, $ntext) { $line =~ s/\\\\/\\/go; $line =~ s/^\t/ /o; # $line =~ s/^\s+$//o; if(length($line) > 0) { print $txtout $indent, $line, "\n"; } else { print $txtout "\n"; } } } } ## Print the value or arguments block sub txt_print_argblock { my ($block,$title) = @_; if(defined $blocks{$block}){ print $txtout "\n"; print $txtout txt_header($title), ":\n" if $title; my $text = $blocks{$block}; ## some people have put \itemize inside \value. ## as from R 2.8.1, strip with a warning, providing not after \item{ if($text =~ /\\item(ize|$ID)/) { if($1 eq "ize") { warn "Warning: found \\itemize inside \\$block in file '$Rdfile'\n" if $issue_warnings; $text =~ /\\itemize($ID)/; $id = $1; $text =~ s/\\itemize$id//; $text =~ s/$id//; } } if($text =~ /\\item/s){ $text =~ /^(.*)(\\item.*)*/s; my ($begin, $rest) = split(/\\item/, $text, 2); if($begin){ txt_fill(" ", 5, text2txt($begin)); $text =~ s/^\Q$begin//s; } my $loopcount = 0; while(checkloop($loopcount++, $text, "\\item") && $text =~ /\\item/s){ my ($id, $arg, $desc) = get_arguments_check("item", $text, 2); $arg = text2txt($arg); $arg =~ s/\\&//go; $desc = text2txt($desc); $arg0 = $arg.": "; $short = 10 - length($arg0); $arg0 = " " x $short. $arg0 if $short > 0; if (length($desc) > 0) { txt_fill($arg0, 10, $desc); } else { print $txtout "\n", $arg0, "\n"; } $text =~ s/.*$id//s; } txt_fill(" ", 5, text2txt($text)); } else{ txt_fill(" ", 5, text2txt($text)); } } } ## Print sections sub txt_print_sections { my $section; for($section=0; $section<$max_section; $section++){ print $txtout "\n"; print $txtout txt_header(txt_striptitle($section_title[$section])), ":\n"; txt_fill(" ", 5, text2txt($section_body[$section])); } } sub txt_unescape_codes { my $text = $_[0]; my $loopcount = 0; while(checkloop($loopcount++, $text, "escaped code") && $text =~ /$ECODE($ID)/) { my $id = $1; my $ec = code2txt($ecodes{$id}); ## in code, \ means itself, \\ means \ $ec =~ s/\\\\/escaped-code-backslash/go; $text =~ s/$ECODE$id/\'$ec\'/; } my $loopcount = 0; while(checkloop($loopcount++, $text, "escaped preformat") && $text =~ /$EPREFORMAT($ID)/){ my $id = $1; my $ec = code2txt($epreformats{$id}); $ec =~ s/\n/\\cr/g; $text =~ s/$EPREFORMAT$id/\n$ec\n/; } $text; } sub txt_tables { my $text = $_[0]; my $loopcount = 0; while(checkloop($loopcount++, $text, "\\tabular") && $text =~ /\\tabular/){ my ($id, $format, $arg) = get_arguments("tabular", $text, 2); $arg =~ s/\n/ /sgo; ## remove trailing \cr (otherwise we get an empty last line) $arg =~ s/\\cr\s*$//go; ## parse the format of the tabular environment my $ncols = length($format); my @colformat = (); for($k=0; $k<$ncols; $k++){ my $cf = substr($format, $k, 1); if($cf =~ /l/o){ $colformat[$k] = "l"; } elsif($cf =~ /r/o){ $colformat[$k] = "r"; } elsif($cf =~ /c/o){ $colformat[$k] = "c"; } else{ die("Error in file '$Rdfile': unknown identifier \{$cf\} in" . " tabular format \{$format\}\n"); } } my $table = "\n\n.TS\n$format\n$arg\n\n"; $text =~ s/\\tabular.*$id/$table/s; } $text; } #==********************* Example *********************************** sub rdoc2ex { # (filename) local $encoding = $_[1]; my $tit = striptitle($blocks{"title"}); if(defined $blocks{"examples"}) { local $Exout; if($_[0]) { $Exout = new FileHandle; open $Exout, "> $_[0]"; # will be closed when goes out of scope } else { $Exout = "STDOUT"; } $tit =~ s/\s+/ /g; $encoding = $blocks{"encoding"} if defined $blocks{"encoding"}; if ($encoding ne "unknown") { $Exout->print("### Encoding: $encoding\n\n"); } my $qaliases = ""; foreach my $a (@aliases) { ## % is still escaped here $a =~ s/\\%/%/go; $a = "'" . $a . "'" if $a =~ / / ; $qaliases = $qaliases . " " . $a; } $Exout->print(wrap("### Name: ", "### ", $blocks{"name"}), "\n", wrap("### Title: ", "### ", $tit), "\n", wrap("### Aliases:", "### ", $qaliases), "\n", wrap("### Keywords: ", "### ", @keywords), "\n\n"); ex_print_exampleblock("examples", "Examples"); $Exout->print("\n\n"); } } sub ex_print_exampleblock { my ($block,$env) = @_; if(defined $blocks{$block}) { $Exout->print("### ** Examples\n", code2examp($blocks{$block}), "\n"); } } sub code2examp { ##- similar to ..2latex my $text = $_[0]; $text =~ s/\\%/%/go; $text =~ s/\\ldots/.../go; $text =~ s/\\dots/.../go; $text = undefine_command($text, "special"); $text = undefine_command($text, "var"); $text = undefine_command($text, "link"); $text = replace_prepend_command($text, "dontshow", "## Don't show: ", "## End Don't show", ""); $text = replace_prepend_command($text, "testonly", "## Don't show:", "## End Don't show", ""); $text = replace_prepend_command($text, "dontrun", "## Not run: ", "## End(Not run)", "##D "); $text = replace_addnl_command($text, "donttest", "## No test: ", "## End(No test)"); $text =~ s/\\\\/\\/g; $text = unmark_brackets($text); $text = txt_unescape_codes($text); $text = transform_S3method($text); $text = transform_S4method($text); $text; } #==********************* LaTeX *********************************** sub ltxstriptitle { # text my $text = $_[0]; $text =~ s/\\R/\\R\{\}/go; ## escape unescaped latex specials if ($Rdversion >= 1.1) { $text =~ s/([^\\])([\$\#_&])/$1\\$2/go; } else { $text =~ s/([^\\])([\#_&])/$1\\$2/go; } $text =~ s/\^/\\textasciicircum{}/go; $text =~ s/~/\\textasciitilde{}/go; return $text; } sub foldorder {uc($a) cmp uc($b) or $a cmp $b;} sub rdoc2latex {# (filename) my $c, $a, $blname; local $encoding = $_[1]; $encoding = $blocks{"encoding"} if defined $blocks{"encoding"}; $encoding = latex_canonical_encoding($encoding) unless $encoding eq "unknown"; local $latexout; if($_[0]) { $latexout = new FileHandle; open $latexout, "> $_[0]"; # will be closed when goes out of scope } else { $latexout = "STDOUT"; } $blname = &latex_escape_name($blocks{"name"}); ## This is going to be used in indexing, so it needs ||@ escaped too. ## They render as escaped, though. $blname =~ s/([!@|])/"$1/g; print $latexout "\\inputencoding{$encoding}\n" if $encoding ne "unknown"; print $latexout "\\HeaderA\{"; print $latexout $blname; print $latexout "\}\{"; print $latexout <xstriptitle($blocks{"title"}); print $latexout "\}\{"; print $latexout &latex_link_trans0($blocks{"name"}); print $latexout "\}\n"; my $current = $blocks{"name"}, $generic, $cmd; foreach (sort foldorder @aliases) { next if (/\(/ || /\{/ || /\{-class/); # these two break the PDF # indexing $generic = $a = $_; $generic =~ s/\.data\.frame$/.dataframe/o; $generic =~ s/\.model\.matrix$/.modelmatrix/o; $generic =~ s/\.[^.]+$//o; if ($generic ne "" && $generic eq $current && $generic ne "ar") { $cmd = "methaliasA" } else { $cmd = "aliasA"; $current = $a; } $c = code2latex($_,0, 1); $a = latex_code_alias($c); print STDERR "rdoc2l: alias='$_', code2l(.)='$c', latex_c_a(.)='$a'\n" if $debug; printf $latexout "\\%s\{%s\}\{%s\}\{%s\}\n", $cmd, $a, $blname, latex_link_trans0($a) unless /^\Q$blocks{"name"}\E$/; # Q..E : Quote (escape) Metacharacters } foreach (@keywords) { printf $latexout "\\keyword\{%s\}\{%s\}\n", $_, $blname unless /^$/ ; } latex_print_block("description", "Description"); latex_print_usageblock("usage", "Usage"); latex_print_argblock("arguments", "Arguments"); latex_print_block("format", "Format"); latex_print_block("details", "Details"); latex_print_argblock("value", "Value"); latex_print_sections(); latex_print_block("note", "Note"); latex_print_block("author", "Author"); latex_print_block("source", "Source"); latex_print_block("references", "References"); latex_print_block("seealso", "SeeAlso"); latex_print_exampleblock("examples", "Examples"); print $latexout "\n"; $issue_warnings = 0; } ## The basic translator for 'normal text' sub text2latex { my ($text, $recursive) = @_; ## When processing \item, this calls itself (recursively), hence we ## need to make sure that handling things like \tab and \cr happens ## only once. $text =~ s/$EOB/\\\{/go; $text =~ s/$ECB/\\\}/go; $text = undefine_command($text, "special"); $text =~ s/\\cite/\\Cite/go; $text =~ s/\\itemize/\\Itemize/go; $text =~ s/\\enumerate/\\Enumerate/go; $text =~ s/\\tabular/\\Tabular/go; my $loopcount = 0; if(!$recursive) { ## convert specials while [d]eqn is still escaped. $text =~ s/\^/escaped-textcircum/go; $text =~ s/~/escaped-texttilde/go; $text =~ s//escaped-textgreater/go; $text =~ s/\|/escaped-textbar/go; $text =~ s/\$/escaped-textdollar/go if $verbatimdollar; $text =~ s/\#/escaped-texthash/go; $text =~ s/_/escaped-textunderscore/go; while(checkloop($loopcount++, $text, "\\eqn") && $text =~ /\\eqn/){ my ($id, $eqn, $ascii) = get_arguments("eqn", $text, 2); $eqn = latex_unescape_codes($eqn); $eqn = unmark_brackets($eqn); $eqn =~ s/escaped-textcircum/^/go; $eqn =~ s/escaped-texttilde/~/go; $eqn =~ s/escaped-textless//go; $eqn =~ s/escaped-textbar/|/go; $eqn =~ s/escaped-textdollar/\$/go if $verbatimdollar; $eqn =~ s/escaped-texthash/\#/go; $eqn =~ s/escaped-textunderscore/_/go; ## $ascii may be empty $text =~ s/\\eqn.*$id/\\eeeeqn\{$eqn\}\{\}/s; } $loopcount = 0; while(checkloop($loopcount++, $text, "\\deqn") && $text =~ /\\deqn/) { my ($id, $eqn, $ascii) = get_arguments("deqn", $text, 2); $eqn = latex_unescape_codes($eqn); $eqn = unmark_brackets($eqn); $eqn =~ s/escaped-textcircum/^/go; $eqn =~ s/escaped-texttilde/~/go; $eqn =~ s/escaped-textless//go; $eqn =~ s/escaped-textbar/|/go; $eqn =~ s/escaped-textdollar/\$/go if $verbatimdollar; $eqn =~ s/escaped-texthash/\#/go; $eqn =~ s/escaped-textunderscore/_/go; $text =~ s/\\deqn.*$id/\\dddeqn\{$eqn\}\{\}/s; } ## URLs need to be verbatim $loopcount = 0; while(checkloop($loopcount++, $text, "\\url") && $text =~ /\\url/) { my ($id, $url) = get_arguments("url", $text, 1); $url =~ s/escaped-textcircum/^/go; $url =~ s/escaped-texttilde/~/go; $url =~ s/escaped-textless//go; $url =~ s/escaped-textbar/|/go; $url =~ s/escaped-textdollar/\$/go if $verbatimdollar; $url =~ s/escaped-texthash/\#/go; $url =~ s/escaped-textunderscore/_/go; $text =~ s/\\url.*$id/\\uuuurl\{$url\}/s; } $text =~ s/escaped-textcircum/\\textasciicircum{}/go; $text =~ s/escaped-texttilde/\\textasciitilde{}/go; $text =~ s/escaped-textless/\\textless{}/go; $text =~ s/escaped-textgreater/\\textgreater{}/go; $text =~ s/escaped-textbar/\\textbar{}/go; ## interpret \$ and $ as dollar, possibly preceded by multiple \ ## This works for $ \$ and \\$ but not \\\$ -- implausible in text. $text =~ s/^\\escaped-textdollar|((\\\\)*)\\escaped-textdollar/$1$2\\\$/go if $verbatimdollar; $text =~ s/^\\escaped-texthash|((\\\\)*)\\escaped-texthash/$1$2\\\#/go; $text =~ s/^\\escaped-textunderscore|((\\\\)*)\\escaped-textunderscore/$1$2\\_/go; $text =~ s/escaped-textdollar/\\\$/go if $verbatimdollar; $text =~ s/escaped-texthash/\\\#/go; $text =~ s/escaped-textunderscore/\\_/go; } ## Handle encoded text: $loopcount = 0; while(checkloop($loopcount++, $text, "\\enc") && $text =~ /\\enc/){ my ($id, $enc, $ascii) = get_arguments("enc", $text, 2); if($encoding eq "unknown") { # \enc without \encoding $enc = $ascii if $ascii; $enc =~ s/\\([^&])/$1/go; } $text =~ s/\\enc(.*)$id/$enc/s; } $loopcount = 0; while(checkloop($loopcount++, $text, "\\item") && $text =~ /\\itemnormal/s) { my ($id, $arg, $desc) = get_arguments_check("item", $text, 2); $descitem = "\\DITEM[" . text2latex($arg, 1) . "] " . text2latex($desc, 1); $text =~ s/\\itemnormal.*$id/$descitem/s; } $text =~ s/\\eeeeqn/\\eqn/go; $text =~ s/\\dddeqn/\\deqn/og; $text =~ s/\\DITEM/\\item/og; $text =~ s/\\uuuurl/\\url/go; my $loopcount = 0; while(checkloop($loopcount++, $text, "escaped preformat") && $text =~ /$EPREFORMAT($ID)/){ my $id = $1; my $ec = latex_preformat_cmd(code2latex($epreformats{$id},1,1)); $text =~ s/$EPREFORMAT$id/$ec/; } if(!$recursive) { $text =~ s/\\\\/\\bsl{}/go; ## A mess: map & \& \\& \\\& to \& \& \bsl{}\& \bsl{}\& $text =~ s/([^\\])&/$1\\&/go; $text =~ s/\\R(\s+)/\\R\{\}$1/go; $text =~ s/\\cr\n\[/\\\\\{\}\n\[/go; $text =~ s/\\cr/\\\\/go; $text =~ s/\\tab(\s+)/&$1/go; } ## we need to convert \links's $loopcount = 0; while(checkloop($loopcount++, $text, "\\link") && $text =~ /\\link(\[.*\])?$ID/){ my ($id, $arg, $dest, $opt) = get_link($text); my $mapped_name = &latex_link_trans0($dest); $text =~ s/\\link(\[.*\])?$id.*$id/\\LinkA{$arg}{$mapped_name}/s; } $text = latex_unescape_codes($text); unmark_brackets($text); } sub code2latex { my ($text, $hyper, $var) = @_; $text =~ s/\\%/%/go; $text =~ s/\\ldots/.../go; $text =~ s/\\dots/.../go; $text = undefine_command($text, "special"); $text = undefine_command($text, "var") unless $var > 0; ## $text =~ s/\\\\/\\bsl{}/go; if($hyper) { my $loopcount = 0; while(checkloop($loopcount++, $text, "\\link") && $text =~ /\\link(\[.*\])?$ID/) { my ($id, $arg, $dest, $opt) = get_link($text); $text =~ s/\\link(\[.*\])?$id.*$id/HYPERLINK($arg)($dest)/s; } } else { $text = undefine_command($text, "link"); } $text = replace_addnl_command($text, "dontrun", "## Not run: ", "## End(Not run)"); $text = drop_full_command($text, "testonly"); $text = drop_full_command($text, "dontshow"); $text = undefine_command($text, "donttest"); $text = unmark_brackets($text); $text = transform_S3method($text); $text = transform_S4method($text); $text; } sub latex_preformat_cmd { my $code = $_[0]; $code = latex_code_trans ($code); $code = "\\begin\{alltt\}" . $code . "\\end\{alltt\}"; $code; } sub latex_print_block { my ($block,$env) = @_; if(defined $blocks{$block}){ print $latexout "\\begin\{$env\}\\relax\n"; my $thisblock = &text2latex($blocks{$block}); print $latexout $thisblock; print $latexout "\n" unless $thisblock =~ /\n$/ || length($thisblock) == 0; print $latexout "\\end\{$env\}\n"; } } sub latex_print_codeblock { my ($block,$env) = @_; if(defined $blocks{$block}){ print $latexout "\\begin\{$env\}\n"; print $latexout "\\begin\{verbatim\}"; my $ntext = $blocks{$blocK}; if ($ntext =~ /$ECODE/) { warn "\nWarning: \\code inside code block in file '$Rdfile'\n" if $issue_warnings; } my $out = &code2latex($ntext, 0, 1); $out = latex_unescape_codes($out); $out =~ s/\\\\/\\/go; print $latexout $out; print $latexout "\\end\{verbatim\}\n"; print $latexout "\\end\{$env\}\n"; } } sub latex_print_usageblock { my ($block,$env) = @_; if(defined $blocks{$block}){ print $latexout "\\begin\{$env\}\n"; print $latexout "\\begin\{verbatim\}"; my $out = &code2latex($blocks{$block},0,0); $out =~ s/\\\\/\\/go; print $latexout $out; print $latexout "\\end\{verbatim\}\n"; print $latexout "\\end\{$env\}\n"; } } sub latex_print_exampleblock { my ($block,$env) = @_; if(defined $blocks{$block}){ print $latexout "\\begin\{$env\}\n"; print $latexout "\\begin\{ExampleCode\}"; my $ntext = $blocks{$block}; if ($ntext =~ /$ECODE/) { warn "\nWarning: \\code inside \\examples in file '$Rdfile'\n" if $issue_warnings; } my $out = &code2latex($ntext,0,0); $out = latex_unescape_codes($out); $out =~ s/\\\\/\\/go; print $latexout $out; print $latexout "\\end\{ExampleCode\}\n"; print $latexout "\\end\{$env\}\n"; } } ## used for \arguments and \value sub latex_print_argblock { my ($block,$env) = @_; if(defined $blocks{$block}){ print $latexout "\\begin\{$env\}\n"; my $text = $blocks{$block}; ## some people have put \itemize inside \value. ## as from R 2.8.1, strip with a warning, providing not after \item{ if($text =~ /\\item(ize|$ID)/) { if($1 eq "ize") { warn "\nWarning: found \\itemize inside \\$block in file '$Rdfile'\n" if $issue_warnings; $text =~ /\\itemize($ID)/; $id = $1; $text =~ s/\\itemize$id//; $text =~ s/$id//; } } if($text =~ /\\item/s){ #-- if there are \item's $text =~ /^(.*)(\\item.*)*/s; my ($begin, $rest) = split(/\\item/, $text, 2); if($begin){ print $latexout &text2latex($begin); $text =~ s/^\Q$begin//s; } print $latexout "\\begin\{ldescription\}\n"; my $loopcount = 0; while(checkloop($loopcount++, $text, "\\item") && $text =~ /\\item/s){ my ($id, $arg, $desc) = get_arguments_check("item", $text, 2); print $latexout "\\item\["; print $latexout &latex_code_cmd(code2latex($arg,1,1)); print $latexout "\] "; print $latexout &text2latex($desc), "\n"; $text =~ s/.*$id//s; } print $latexout "\\end\{ldescription\}\n"; my $thisblock = &text2latex($text); print $latexout $thisblock; print $latexout "\n" unless $thisblock =~ /\n$/ || length($thisblock) == 0; } else{ # no \item, typical \value block my $thisblock = &text2latex($text); print $latexout $thisblock; print $latexout "\n" unless $thisblock =~ /\n$/ || length($thisblock) == 0; } print $latexout "\\end\{$env\}\n"; } } sub latex_print_sections { my $section; for($section=0; $section<$max_section; $section++){ print $latexout "\\begin\{Section\}\{" . $section_title[$section] . "\}\n"; my $thisblock = &text2latex($section_body[$section]); print $latexout $thisblock; print $latexout "\n" unless $thisblock =~ /\n$/ || length($thisblock) == 0; print $latexout "\\end\{Section\}\n"; } } sub latex_unescape_codes { my $text = $_[0]; my $loopcount = 0; while(checkloop($loopcount++, $text, "escaped code") && $text =~ /$ECODE($ID)/) { my $id = $1; my $ec = latex_code_cmd(code2latex($ecodes{$id},1,1)); $text =~ s/$ECODE$id/$ec/; } my $loopcount = 0; while(checkloop($loopcount++, $text, "escaped preformat") && $text =~ /$EPREFORMAT($ID)/){ my $id = $1; my $ec = latex_preformat_cmd(code2latex($epreformats{$id},1,0)); $text =~ s/$EPREFORMAT$id/$ec/; } $text; } sub latex_escape_name { my $c = $_[0]; $c = unmark_brackets($c); if($c =~ /[$LATEX_SPECIAL]/){ $c =~ s/[$LATEX_SPECIAL]/\\$&/go; #- escape them } $c =~ s/\\\^/\\textasciicircum{}/go;# ^ is SPECIAL $c =~ s/\\~/\\textasciitilde{}/go; $c =~ s/\\\\\\%/\\Rpercent{}/go; $c =~ s/\\\{/\\textbraceleft{}/go; $c =~ s/\\\}/\\textbraceright{}/go; $c =~ s/\\\\\\\\/\\textbackslash{}/go; ## avoid conversion to guillemets $c =~ s/<>/>\{\}>/go; $c; } ## The next two should transform links and aliases identically so use ## common subroutines sub latex_code_trans { my $c = $_[0]; my $BSL = '@BSL@'; if($c =~ /[$LATEX_SPECIAL]/){ $c =~ s/\\\\/$BSL/go; $c =~ s/\\([$LATEX_SPECIAL])/$1/go; #- unescape them (should not be escaped) $c =~ s/[$LATEX_SPECIAL]/\\$&/go; #- escape them $c =~ s/\\\^/\\textasciicircum{}/go;# ^ is SPECIAL $c =~ s/\\~/\\textasciitilde{}/go; $c =~ s/$BSL/\\bsl{}/go; $c =~ s/\\\\/\\bsl{}/go; } ## avoid conversion to guillemets $c =~ s/<>/>\{\}>/go; $c =~ /HYPERLINK\(([^)]*)\)\(([^)]*)\)/; my $c0 = $2; # destination my $link = latex_link_trans($1); $c0 = latex_link_trans0($c0); $c =~ s/HYPERLINK\([^)]*\)\([^)]*\)/\\LinkA{$link}{$c0}/go; $c =~ s/,,/,{},/g; # ,, is a ligature in the ae font. $c =~ s/\\bsl{}var\\{([^}]+)\\}/\\var{$1}/go; $c; } sub latex_link_trans { my $c = $_[0]; $c =~ s/<-\./<\\Rdash\./go; $c =~ s/<-$/<\\Rdash/go; $c; } sub latex_code_cmd { my $code = $_[0]; $code = latex_code_trans ($code); $code = "\\code\{" . $code . "\}"; $code; } sub latex_link_trans0 { my $c = $_[0]; $c = unmark_brackets($c); $c =~ s/\\Rdash/.Rdash./go; $c =~ s/-/.Rdash./go; $c =~ s/\\_/.Rul./go; $c =~ s/\\\$/.Rdol./go; $c =~ s/\\\^/.Rcaret./go; $c =~ s/\^/.Rcaret./go; $c =~ s/_/.Rul./go; $c =~ s/\$/.Rdol./go; $c =~ s/\\#/.Rhash./go; $c =~ s/#/.Rhash./go; $c =~ s/\\&/.Ramp./go; $c =~ s/&/.Ramp./go; $c =~ s/\\~/.Rtilde./go; $c =~ s/~/.Rtilde./go; $c =~ s/\\%/.Rpcent./go; $c =~ s/%/.Rpcent./go; $c =~ s/\\\\/.Rbl./go; $c =~ s/\{/.Rlbrace./go; $c =~ s/\}/.Rrbrace./go; $c; } ## Tough examples are ## Logic.Rd Arithmetic.Rd Extract.Rd formula.Rd sub latex_code_alias { my $c = $_[0]; ##-- $c is (typically) the OUTPUT of code2latex(.) : $c = latex_code_trans ($c); $c = latex_link_trans ($c); $c =~ s/\!/"!/go; # " This is the indexing escape $c =~ s/\|/"|/go; # " ## $c =~ s/@/"@/go; # " Not currently valid R character $c; } #==************************ Compiled HTML ******************************** sub rdoc2chm { # (filename) ; 0 for STDOUT local $htmlout; if($_[0]) { $htmlout = new FileHandle; open $htmlout, "> $_[0]"; # will be closed when goes out of scope } else { $htmlout = "STDOUT"; } $encoding = $_[1]; $encoding = $blocks{"encoding"} if defined $blocks{"encoding"}; $encoding = mime_canonical_encoding($encoding) unless $encoding eq "unknown"; $encoding = "iso-8859-1" if $encoding eq "unknown"; $using_chm = 1; $nlink = 0; print $htmlout (chm_functionhead(html_striptitle($blocks{"title"}), $pkgname, &html_escape_name($blocks{"name"}), $encoding, )); html_print_block("description", "Description"); html_print_codeblock("usage", "Usage"); html_print_argblock("arguments", "Arguments"); html_print_block("format", "Format"); html_print_block("details", "Details"); html_print_argblock("value", "Value"); html_print_sections(); html_print_block("note", "Note"); html_print_block("author", "Author(s)"); html_print_block("source", "Source"); html_print_block("references", "References"); html_print_block("seealso", "See Also"); html_print_codeblock("examples", "Examples"); JScript() if $using_chm && $nlink > 0; print $htmlout (html_functionfoot($pkgname, $version)); if($_[0]) { close $htmlout; } $using_chm = 0; } sub mklink { $nlink++; "onclick=\"findlink('" . $_[0] . "', '" . $_[1] . "')\" " . "style=\"text-decoration: underline; color: blue; cursor: hand\"" } sub JScript { print $htmlout < function findlink(pkg, fn) { var Y, link; Y = location.href.lastIndexOf("\\\\") + 1; link = location.href.substring(0, Y); link = link + "../../" + pkg + "/chtml/" + pkg + ".chm::/" + fn; location.href = link; } END } # Local variables: ** # perl-indent-level: 4 ** # cperl-indent-level: 4 ** # page-delimiter: "^#==" ** # End: **