SDL: wikiheaders: LaTeX output support.

From b181e7b1ca80d0127b6c5887ccb43ea421ac0b36 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <[EMAIL REDACTED]>
Date: Sun, 5 May 2024 02:03:44 -0400
Subject: [PATCH] wikiheaders: LaTeX output support.

Reference Issue #9440.
---
 build-scripts/wikiheaders.pl | 408 ++++++++++++++++++++++++++++++++++-
 1 file changed, 401 insertions(+), 7 deletions(-)

diff --git a/build-scripts/wikiheaders.pl b/build-scripts/wikiheaders.pl
index 9bd515d1f0dda..700f4ef35f6cc 100755
--- a/build-scripts/wikiheaders.pl
+++ b/build-scripts/wikiheaders.pl
@@ -42,6 +42,7 @@
     $copy_direction = -1, next if $_ eq '--copy-to-wiki';
     $copy_direction = -2, next if $_ eq '--copy-to-manpages';
     $copy_direction = -3, next if $_ eq '--report-coverage-gaps';
+    $copy_direction = -4, next if $_ eq '--copy-to-latex';
     if (/\A--options=(.*)\Z/) {
         $optionsfname = $1;
         next;
@@ -103,6 +104,12 @@
     close(OPTIONS);
 }
 
+sub escLaTeX {
+    my $str = shift;
+    $str =~ s/([_\#\&\^])/\\$1/g;
+    return $str;
+}
+
 my $wordwrap_mode = 'mediawiki';
 sub wordwrap_atom {   # don't call this directly.
     my $str = shift;
@@ -379,7 +386,7 @@ sub dewikify_chunk {
         }
 
         if (defined $code) {
-            $str .= "```$codelang$code```";
+            $str .= "\n```$codelang$code```\n";
         }
     } elsif ($dewikify_mode eq 'manpage') {
         $str =~ s/\./\\[char46]/gms;  # make sure these can't become control codes.
@@ -429,9 +436,6 @@ sub dewikify_chunk {
 
             # bullets
             $str =~ s/^\- /\n\\\(bu /gm;
-
-        } else {
-            die("Unexpected wikitype when converting to manpages");   # !!! FIXME: need to handle Markdown wiki pages.
         }
 
         if (defined $code) {
@@ -444,6 +448,78 @@ sub dewikify_chunk {
             }
             $str .= ".EX\n$code\n.EE\n.PP\n";
         }
+    } elsif ($dewikify_mode eq 'LaTeX') {
+        if ($wikitype eq 'mediawiki') {
+            # Dump obvious wikilinks.
+            if (defined $apiprefixregex) {
+                $str =~ s/\s*\[\[($apiprefixregex[a-zA-Z0-9_]+)\]\]/$1/gms;
+            }
+
+            # links
+            $str =~ s/\[(https?\:\/\/.*?)\s+(.*?)\]/\\href{$1}{$2}/g;
+
+            # <code></code> is also popular.  :/
+            $str =~ s/\s*\<code>(.*?)<\/code>/ \\texttt{$1}/gms;
+
+            # bold+italic
+            $str =~ s/\s*'''''(.*?)'''''/ \\textbf{\\textit{$1}}/gms;
+
+            # bold
+            $str =~ s/\s*'''(.*?)'''/ \\textbf{$1}/gms;
+
+            # italic
+            $str =~ s/\s*''(.*?)''/ \\textit{$1}/gms;
+
+            # bullets
+            $str =~ s/^\*\s+/  \\item /gm;
+        } elsif ($wikitype eq 'md') {
+            # Dump obvious wikilinks.
+            if (defined $apiprefixregex) {
+                $str =~ s/\[(\`?$apiprefixregex[a-zA-Z0-9_]+\`?)\]\($apiprefixregex[a-zA-Z0-9_]+\)/$1/gms;
+            }
+
+            # links
+            $str =~ s/\[(.*?)]\((https?\:\/\/.*?)\)/\\href{$2}{$1}/g;
+
+            # <code></code> is also popular.  :/
+            $str =~ s/\s*\`(.*?)\`/ \\texttt{$1}/gms;
+
+            # bold+italic
+            $str =~ s/\s*\*\*\*(.*?)\*\*\*/ \\textbf{\\textit{$1}}/gms;
+
+            # bold
+            $str =~ s/\s*\*\*(.*?)\*\*/ \\textbf{$1}/gms;
+
+            # italic
+            $str =~ s/\s*\*(.*?)\*/ \\textit{$1}/gms;
+
+            # bullets
+            $str =~ s/^\-\s+/  \\item /gm;
+        }
+
+        # Wrap bullet lists in itemize blocks...
+        $str =~ s/^(\s*\\item .*?)(\n\n|\Z)/\n\\begin{itemize}\n$1$2\n\\end{itemize}\n\n/gms;
+
+        $str = escLaTeX($str);
+
+        if (defined $code) {
+            $code =~ s/\A\n+//gms;
+            $code =~ s/\n+\Z//gms;
+
+            if (($codelang eq '') || ($codelang eq 'output')) {
+                $str .= "\\begin{verbatim}\n$code\n\\end{verbatim}\n";
+            } else {
+                if ($codelang eq 'c') {
+                    $codelang = 'C';
+                } elsif ($codelang eq 'c++') {
+                    $codelang = 'C++';
+                } else {
+                    die("Unexpected codelang '$codelang'");
+                }
+                $str .= "\n\\lstset{language=$codelang}\n";
+                $str .= "\\begin{lstlisting}\n$code\n\\end{lstlisting}\n";
+            }
+        }
     } else {
         die("Unexpected dewikify_mode");
     }
@@ -464,8 +540,14 @@ sub dewikify {
     $str =~ s/\A[\s\n]*\=\= .*? \=\=\s*?\n+//ms;
 
     my $retval = '';
-    while ($str =~ s/\A(.*?)<syntaxhighlight lang='?(.*?)'?>(.*?)<\/syntaxhighlight\>//ms) {
-        $retval .= dewikify_chunk($wikitype, $1, $2, $3);
+    if ($wikitype eq 'mediawiki') {
+        while ($str =~ s/\A(.*?)<syntaxhighlight lang='?(.*?)'?>(.*?)<\/syntaxhighlight\>//ms) {
+            $retval .= dewikify_chunk($wikitype, $1, $2, $3);
+        }
+    } elsif ($wikitype eq 'md') {
+        while ($str =~ s/\A(.*?)(\n?)```(.*?)\n(.*?\n)```\n//ms) {
+            $retval .= dewikify_chunk($wikitype, $1, $3, "$2$4");
+        }
     }
     $retval .= dewikify_chunk($wikitype, $str, undef, undef);
 
@@ -2178,7 +2260,319 @@ sub print_undocumented_section {
         close(FH);
         rename($tmppath, $path) or die("Can't rename '$tmppath' to '$path': $!\n");
     }
-} elsif ($copy_direction == -3) { # --report-coverage_gaps
+
+} elsif ($copy_direction == -4) { # --copy-to-latex
+    # This only takes from the wiki data, since it has sections we omit from the headers, like code examples.
+
+    print STDERR "\n(The --copy-to-latex code is known to not be ready for serious use; send patches, not bug reports, please.)\n\n";
+
+    $dewikify_mode = 'LaTeX';
+    $wordwrap_mode = 'LaTeX';
+
+    # !!! FIXME: code duplication with --copy-to-manpages section.
+
+    my $introtxt = '';
+    if (0) {
+    open(FH, '<', "$srcpath/LICENSE.txt") or die("Can't open '$srcpath/LICENSE.txt': $!\n");
+    while (<FH>) {
+        chomp;
+        $introtxt .= ".\\\" $_\n";
+    }
+    close(FH);
+    }
+
+    if (!$gitrev) {
+        $gitrev = `cd "$srcpath" ; git rev-list HEAD~..`;
+        chomp($gitrev);
+    }
+
+    # !!! FIXME
+    open(FH, '<', "$srcpath/$versionfname") or die("Can't open '$srcpath/$versionfname': $!\n");
+    my $majorver = 0;
+    my $minorver = 0;
+    my $patchver = 0;
+    while (<FH>) {
+        chomp;
+        if (/$versionmajorregex/) {
+            $majorver = int($1);
+        } elsif (/$versionminorregex/) {
+            $minorver = int($1);
+        } elsif (/$versionpatchregex/) {
+            $patchver = int($1);
+        }
+    }
+    close(FH);
+    my $fullversion = "$majorver.$minorver.$patchver";
+
+    my $latex_fname = "$srcpath/$projectshortname.tex";
+    my $latex_tmpfname = "$latex_fname.tmp";
+    open(TEXFH, '>', "$latex_tmpfname") or die("Can't open '$latex_tmpfname' for writing: $!\n");
+
+    print TEXFH <<__EOF__
+\\documentclass{book}
+
+\\usepackage{listings}
+\\usepackage{color}
+\\usepackage{hyperref}
+
+\\definecolor{dkgreen}{rgb}{0,0.6,0}
+\\definecolor{gray}{rgb}{0.5,0.5,0.5}
+\\definecolor{mauve}{rgb}{0.58,0,0.82}
+
+\\setcounter{secnumdepth}{0}
+
+\\lstset{frame=tb,
+  language=C,
+  aboveskip=3mm,
+  belowskip=3mm,
+  showstringspaces=false,
+  columns=flexible,
+  basicstyle={\\small\\ttfamily},
+  numbers=none,
+  numberstyle=\\tiny\\color{gray},
+  keywordstyle=\\color{blue},
+  commentstyle=\\color{dkgreen},
+  stringstyle=\\color{mauve},
+  breaklines=true,
+  breakatwhitespace=true,
+  tabsize=3
+}
+
+\\begin{document}
+\\frontmatter
+
+\\title{$projectfullname $majorver.$minorver.$patchver Reference Manual}
+\\author{The $projectshortname Developers}
+\\maketitle
+
+\\mainmatter
+
+__EOF__
+;
+
+    # !!! FIXME: Maybe put this in the book intro?  print TEXFH $introtxt;
+
+    # Sort symbols by symbol type, then alphabetically.
+    my @headersymskeys = sort {
+        my $rc = $headersymstype{$a} <=> $headersymstype{$b};
+        if ($rc == 0) {
+            $rc = $a cmp $b;
+        }
+        return $rc;
+    } keys %headersyms;
+
+    my $current_symtype = 0;
+    foreach (@headersymskeys) {
+        my $sym = $_;
+        next if not defined $wikisyms{$sym};  # don't have a page for that function, skip it.
+        my $symtype = $headersymstype{$sym};
+        my $wikitype = $wikitypes{$sym};
+        my $sectionsref = $wikisyms{$sym};
+        my $remarks = $sectionsref->{'Remarks'};
+        my $params = $sectionsref->{'Function Parameters'};
+        my $returns = $sectionsref->{'Return Value'};
+        my $version = $sectionsref->{'Version'};
+        my $threadsafety = $sectionsref->{'Thread Safety'};
+        my $related = $sectionsref->{'See Also'};
+        my $examples = $sectionsref->{'Code Examples'};
+        my $deprecated = $sectionsref->{'Deprecated'};
+        my $headerfile = $manpageheaderfiletext;
+        $headerfile =~ s/\%fname\%/$headersymslocation{$sym}/g;
+        $headerfile .= "\n";
+
+        my $brief = $sectionsref->{'[Brief]'};
+        my $decl = $headerdecls{$sym};
+        my $str = '';
+
+        if ($current_symtype != $symtype) {
+            my $newchapter = '';
+            if ($symtype == 1) {
+                $newchapter = 'Functions';
+            } elsif ($symtype == 2) {
+                $newchapter = 'Macros';
+            } else {
+                $newchapter = 'Datatypes';
+            }
+            $str .= "\n\n\\chapter{$projectshortname $newchapter}\n\n\\clearpage\n\n";
+            $current_symtype = $symtype;
+        }
+
+        $brief = "$brief";
+        $brief =~ s/\A[\s\n]*\= .*? \=\s*?\n+//ms;
+        $brief =~ s/\A[\s\n]*\=\= .*? \=\=\s*?\n+//ms;
+        $brief =~ s/\A(.*?\.) /$1\n/;  # \brief should only be one sentence, delimited by a period+space. Split if necessary.
+        my @briefsplit = split /\n/, $brief;
+        $brief = shift @briefsplit;
+        $brief = dewikify($wikitype, $brief);
+
+        if (defined $remarks) {
+            $remarks = dewikify($wikitype, join("\n", @briefsplit) . $remarks);
+        }
+
+        my $escapedsym = escLaTeX($sym);
+        $str .= "\\hypertarget{$sym}{%\n\\section{$escapedsym}\\label{$sym}}\n\n";
+        $str .= $brief if (defined $brief);
+        $str .= "\n\n";
+
+        if (defined $deprecated) {
+            $str .= "\\subsection{Deprecated}\n\n";
+            $str .= dewikify($wikitype, $deprecated) . "\n";
+        }
+
+        if (defined $headerfile) {
+            $str .= "\\subsection{Header File}\n\n";
+            $str .= dewikify($wikitype, $headerfile) . "\n";
+        }
+
+        $str .= "\\subsection{Syntax}\n\n";
+        $str .= "\\begin{lstlisting}\n$decl\n\\end{lstlisting}\n";
+
+        if (defined $params) {
+            if (($symtype == 1) || ($symtype == 5)) {
+                $str .= "\\subsection{Function Parameters}\n\n";
+            } elsif ($symtype == 2) {  # macro
+                $str .= "\\subsection{Macro Parameters}\n\n";
+            } elsif ($symtype == 3) {  # struct/union
+                $str .= "\\subsection{Fields}\n\n";
+            } elsif ($symtype == 4) {  # enum
+                $str .= "\\subsection{Values}\n\n";
+            } else {
+                die("Unexpected symtype $symtype");
+            }
+
+            $str .= "\\begin{center}\n";
+            $str .= "    \\begin{tabular}{ | l | p{0.75\\textwidth} |}\n";
+            $str .= "    \\hline\n";
+
+            my @lines = split /\n/, $params;
+            if ($wikitype eq 'mediawiki') {
+                die("Unexpected data parsing MediaWiki table") if (shift @lines ne '{|');  # Dump the '{|' start
+                while (scalar(@lines) >= 3) {
+                    my $name = shift @lines;
+                    my $desc = shift @lines;
+                    my $terminator = shift @lines;  # the '|-' or '|}' line.
+                    last if ($terminator ne '|-') and ($terminator ne '|}');  # we seem to have run out of table.
+                    $name =~ s/\A\|\s*//;
+                    $name =~ s/\A\*\*(.*?)\*\*/$1/;
+                    $name =~ s/\A\'\'\'(.*?)\'\'\'/$1/;
+                    $name = escLaTeX($name);
+                    $desc =~ s/\A\|\s*//;
+                    $desc = dewikify($wikitype, $desc);
+                    #print STDERR "FN: $sym   NAME: $name   DESC: $desc TERM: $terminator\n";
+                    $str .= "    \\textbf{$name} & $desc \\\\ \\hline\n";
+                }
+            } elsif ($wikitype eq 'md') {
+                my $l;
+                $l = shift @lines;
+                die("Unexpected data parsing Markdown table") if (not $l =~ /\A\s*\|\s*\|\s*\|\s*\Z/);
+                $l = shift @lines;
+                die("Unexpected data parsing Markdown table") if (not $l =~ /\A\s*\|\s*\-*\s*\|\s*\-*\s*\|\s*\Z/);
+                while (scalar(@lines) >= 1) {
+                    $l = shift @lines;
+                    if ($l =~ /\A\s*\|\s*(.*?)\s*\|\s*(.*?)\s*\|\s*\Z/) {
+                        my $name = $1;
+                        my $desc = $2;
+                        $name =~ s/\A\*\*(.*?)\*\*/$1/;
+                        $name =~ s/\A\'\'\'(.*?)\'\'\'/$1/;
+                        $name = escLaTeX($name);
+                        $desc = dewikify($wikitype, $desc);
+                        $str .= "    \\textbf{$name} & $desc \\\\ \\hline\n";
+                    } else {
+                        last;  # we seem to have run out of table.
+                    }
+                }
+            } else {
+                die("write me");
+            }
+
+            $str .= "    \\end{tabular}\n";
+            $str .= "\\end{center}\n";
+        }
+
+        if (defined $returns) {
+            $str .= "\\subsection{Return Value}\n\n";
+            $str .= dewikify($wikitype, $returns) . "\n";
+        }
+
+        if (defined $remarks) {
+            $str .= "\\subsection{Remarks}\n\n";
+            $str .= $remarks . "\n";
+        }
+
+        if (defined $examples) {
+            $str .= "\\subsection{Code Examples}\n\n";
+            $dewikify_manpage_code_indent = 0;
+            $str .= dewikify($wikitype, $examples) . "\n";
+            $dewikify_manpage_code_indent = 1;
+        }
+
+        if (defined $threadsafety) {
+            $str .= "\\subsection{Thread Safety}\n\n";
+            $str .= dewikify($wikitype, $threadsafety) . "\n";
+        }
+
+        if (defined $version) {
+            $str .= "\\subsection{Version}\n\n";
+            $str .= dewikify($wikitype, $version) . "\n";
+        }
+
+        if (defined $related) {
+            $str .= "\\subsection{See Also}\n\n";
+            $str .= "\\begin{itemize}\n";
+            # !!! FIXME: lots of code duplication in all of these.
+            my $v = dewikify($wikitype, $related);
+            my @desclines = split /\n/, $v;
+            my $nextstr = '';
+            foreach (@desclines) {
+                s/\A(\:|\* )//;
+                s/\(\)\Z//;  # Convert "SDL_Func()" to "SDL_Func"
+                s/\[\[(.*?)\]\]/$1/;  # in case some wikilinks remain.
+                s/\[(.*?)\]\(.*?\)/$1/;  # in case some wikilinks remain.
+                s/\A\*\s*\Z//;
+                s/\A\s*\\item\s*//;
+                s/\A\/*//;
+                s/\A\s+//;
+                s/\s+\Z//;
+                next if $_ eq '';
+                next if $_ eq '\begin{itemize}';
+                next if $_ eq '\end{itemize}';
+                $str .= "    \\item $_\n";
+            }
+            $str .= "\\end{itemize}\n";
+            $str .= "\n";
+        }
+
+        # !!! FIXME: Maybe put copyright in the book intro?
+        if (0) {
+        $str .= ".SH COPYRIGHT\n";
+        $str .= "This manpage is licensed under\n";
+        $str .= ".UR https://creativecommons.org/licenses/by/4.0/\n";
+        $str .= "Creative Commons Attribution 4.0 International (CC BY 4.0)\n";
+        $str .= ".UE\n";
+        $str .= ".PP\n";
+        $str .= "This manpage was generated from\n";
+        $str .= ".UR $wikiurl/$sym\n";
+        $str .= "${projectshortname}'s wiki\n";
+        $str .= ".UE\n";
+        $str .= "using SDL/build-scripts/wikiheaders.pl";
+        $str .= " revision $gitrev" if $gitrev ne '';
+        $str .= ".\n";
+        $str .= "Please report issues in this manpage at\n";
+        $str .= ".UR $bugreporturl\n";
+        $str .= "our bugtracker!\n";
+        $str .= ".UE\n";
+        }
+
+        $str .= "\\clearpage\n\n";
+
+        print TEXFH $str;
+    }
+
+    print TEXFH "\\end{document}\n\n";
+    close(TEXFH);
+    rename($latex_tmpfname, $latex_fname) or die("Can't rename '$latex_tmpfname' to '$latex_fname': $!\n");
+
+} elsif ($copy_direction == -3) { # --report-coverage-gaps
     foreach (@coverage_gap) {
         print("$_\n");
     }