SDL: wikiheaders.pl: A bunch of work on correct wordwrap and (de)wikify.

From d115857c6f63f1014da8f5477b89539fbfe100fd Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <[EMAIL REDACTED]>
Date: Wed, 14 Jul 2021 08:11:18 -0400
Subject: [PATCH] wikiheaders.pl: A bunch of work on correct wordwrap and
 (de)wikify.

---
 build-scripts/wikiheaders.pl | 162 ++++++++++++++++++++++-------------
 1 file changed, 104 insertions(+), 58 deletions(-)

diff --git a/build-scripts/wikiheaders.pl b/build-scripts/wikiheaders.pl
index c7b84f35d..c4019cb67 100755
--- a/build-scripts/wikiheaders.pl
+++ b/build-scripts/wikiheaders.pl
@@ -103,27 +103,36 @@ sub wordwrap {
 
     my $retval = '';
 
-    while ($str =~ s/(.*?)(\n+\`\`\`.*?\`\`\`\n+|\n+\<syntaxhighlight.*?\<\/syntaxhighlight\>\n+)//ms) {
+    #print("\n\nWORDWRAP:\n\n$str\n\n\n");
+
+    while ($str =~ s/(.*?)(\`\`\`.*?\`\`\`|\<syntaxhighlight.*?\<\/syntaxhighlight\>)//ms) {
+        #print("\n\nWORDWRAP BLOCK:\n\n$1\n\n ===\n\n$2\n\n\n");
         $retval .= wordwrap_paragraphs($1); # wrap it.
-        $retval .= $2;  # don't wrap it.
+        $retval .= "$2\n\n";  # don't wrap it.
     }
 
     $retval .= wordwrap_paragraphs($str);  # wrap what's left.
     $retval =~ s/\n+$//;
+
+    #print("\n\nWORDWRAP DONE:\n\n$retval\n\n\n");
     return $retval;
 }
 
-
-sub wikify {
+# This assumes you're moving from Markdown (in the Doxygen data) to Wiki, which
+#  is why the 'md' section is so sparse.
+sub wikify_chunk {
     my $wikitype = shift;
     my $str = shift;
+    my $codelang = shift;
+    my $code = shift;
+
+    #print("\n\nWIKIFY CHUNK:\n\n$str\n\n\n");
 
     if ($wikitype eq 'mediawiki') {
         # Convert obvious SDL things to wikilinks.
         $str =~ s/\b(SDL_[a-zA-Z0-9_]+)/[[$1]]/gms;
 
         # Make some Markdown things into MediaWiki...
-        $str =~ s/\`\`\`(c\+\+|c)(.*?)\`\`\`/<syntaxhighlight lang='$1'>$2<\/syntaxhighlight>/gms;
 
         # <code></code> is also popular.  :/
         $str =~ s/\`(.*?)\`/<code>$1<\/code>/gms;
@@ -139,71 +148,55 @@ sub wikify {
 
         # bullets
         $str =~ s/^\- /* /gm;
+
+        if (defined $code) {
+            $str .= "<syntaxhighlight lang='$codelang'>$code<\/syntaxhighlight>";
+        }
     } elsif ($wikitype eq 'md') {
         # Convert obvious SDL things to wikilinks.
         $str =~ s/\b(SDL_[a-zA-Z0-9_]+)/[$1]($1)/gms;
+        if (defined $code) {
+            $str .= "```$codelang$code```";
+        }
     }
+
+    #print("\n\nWIKIFY CHUNK DONE:\n\n$str\n\n\n");
+
     return $str;
 }
 
-sub dewikify {
+sub wikify {
     my $wikitype = shift;
     my $str = shift;
-    return '' if not defined $str;
-    my @lines = split /\n/, $str;
-    return '' if scalar(@lines) == 0;
+    my $retval = '';
 
-    my $iwikitype = 0;
-    if ($wikitype eq 'mediawiki') {
-        $iwikitype = 1;
-    } elsif ($wikitype eq 'md') {
-        $iwikitype = 2;
-    } else {
-        die("Unexpected wikitype '$wikitype'\n");
-    }
+    #print("WIKIFY WHOLE:\n\n$str\n\n\n");
 
-    while (1) {
-        my $l = shift @lines;
-        last if not defined $l;
-        chomp($l);
-        $l =~ s/\A\s*//;
-        $l =~ s/\s*\Z//;
-        next if ($l eq '');
-        next if ($iwikitype == 1) and ($l =~ /\A\= .*? \=\Z/);
-        next if ($iwikitype == 1) and ($l =~ /\A\=\= .*? \=\=\Z/);
-        next if ($iwikitype == 2) and ($l =~ /\A\#\# /);
-        unshift @lines, $l;
-        last;
+    while ($str =~ s/\A(.*?)\`\`\`(c\+\+|c)(.*?)\`\`\`//ms) {
+        $retval .= wikify_chunk($wikitype, $1, $2, $3);
     }
+    $retval .= wikify_chunk($wikitype, $str, undef, undef);
 
-    while (1) {
-        my $l = pop @lines;
-        last if not defined $l;
-        chomp($l);
-        $l =~ s/\A\s*//;
-        $l =~ s/\s*\Z//;
-        next if ($l eq '');
-        push @lines, $l;
-        last;
-    }
+    #print("WIKIFY WHOLE DONE:\n\n$retval\n\n\n");
+
+    return $retval;
+}
 
-    $str = '';
-    foreach (@lines) {
-        chomp;
-        s/\A\s*//;
-        s/\s*\Z//;
-        $str .= "$_\n";
-    }
 
-    if ($iwikitype == 1) {  #($wikitype eq 'mediawiki')
+sub dewikify_chunk {
+    my $wikitype = shift;
+    my $str = shift;
+    my $codelang = shift;
+    my $code = shift;
+
+    #print("\n\nDEWIKIFY CHUNK:\n\n$str\n\n\n");
+
+    if ($wikitype eq 'mediawiki') {
         # Doxygen supports Markdown (and it just simply looks better than MediaWiki
-        # when looking at the raw headers, so do some conversions here as necessary.
+        # when looking at the raw headers), so do some conversions here as necessary.
 
         $str =~ s/\[\[(SDL_[a-zA-Z0-9_]+)\]\]/$1/gms;  # Dump obvious wikilinks.
 
-        # convert mediawiki syntax highlighting to Markdown backticks.
-        $str =~ s/\<syntaxhighlight lang='?(.*?)'?>(.*?)<\/syntaxhighlight>/```$1$2```/gms;
-
         # <code></code> is also popular.  :/
         $str =~ s/\<code>(.*?)<\/code>/`$1`/gms;
 
@@ -220,9 +213,36 @@ sub dewikify {
         $str =~ s/^\* /- /gm;
     }
 
+    if (defined $code) {
+        $str .= "```$codelang$code```";
+    }
+
+    #print("\n\nDEWIKIFY CHUNK DONE:\n\n$str\n\n\n");
+
     return $str;
 }
 
+sub dewikify {
+    my $wikitype = shift;
+    my $str = shift;
+    return '' if not defined $str;
+
+    #print("DEWIKIFY WHOLE:\n\n$str\n\n\n");
+
+    $str =~ s/\A[\s\n]*\= .*? \=\s*?\n+//ms;
+    $str =~ s/\A[\s\n]*\=\= .*? \=\=\s*?\n+//ms;
+
+    my $retval = '';
+    while ($str =~ s/\A(.*?)<syntaxhighlight lang='?(.*?)'?>(.*?)<\/syntaxhighlight\>//ms) {
+        $retval .= dewikify_chunk($wikitype, $1, $2, $3);
+    }
+    $retval .= dewikify_chunk($wikitype, $str, undef, undef);
+
+    #print("DEWIKIFY WHOLE DONE:\n\n$retval\n\n\n");
+
+    return $retval;
+}
+
 sub usage {
     die("USAGE: $0 <source code git clone path> <wiki git clone path> [--copy-to-headers|--copy-to-wiki] [--warn-about-missing]\n\n");
 }
@@ -280,8 +300,23 @@ sub usage {
             chomp;
             push @templines, $_;
             last if /\A\s*\*\/\Z/;
-            s/\A\s*\*\s*//;
-            $str .= "$_\n";
+            if (s/\A\s*\*\s*\`\`\`/```/) {  # this is a hack, but a lot of other code relies on the whitespace being trimmed, but we can't trim it in code blocks...
+                $str .= "$_\n";
+                while (<FH>) {
+                    chomp;
+                    push @templines, $_;
+                    s/\A\s*\*\s?//;
+                    if (s/\A\s*\`\`\`/```/) {
+                        $str .= "$_\n";
+                        last;
+                    } else {
+                        $str .= "$_\n";
+                    }
+                }
+            } else {
+                s/\A\s*\*\s*//;
+                $str .= "$_\n";
+            }
         }
 
         my $decl = <FH>;
@@ -341,6 +376,7 @@ sub usage {
         }
 
         #print("$fn:\n$str\n\n");
+
         $headerfuncs{$fn} = $str;
         $headerdecls{$fn} = $decl;
         $headerfuncslocation{$fn} = $dent;
@@ -424,8 +460,7 @@ sub usage {
             die("Unexpected wiki file type. Fixme!\n");
         }
 
-        my $str = ($current_section eq 'Code Examples') ? $orig : $_;
-        $sections{$current_section} .= "$str\n";
+        $sections{$current_section} .= "$orig\n";
     }
     close(FH);
 
@@ -646,14 +681,25 @@ sub usage {
         @doxygenlines = (@briefsplit, @doxygenlines);
 
         my $remarks = '';
+        # !!! FIXME: wordwrap and wikify might handle this, now.
         while (@doxygenlines) {
             last if $doxygenlines[0] =~ /\A\\/;  # some sort of doxygen command, assume we're past the general remarks.
             my $l = shift @doxygenlines;
-            $l =~ s/\A\s*//;
-            $l =~ s/\s*\Z//;
-            $remarks .= "$l\n";
+            if ($l =~ /\A\`\`\`/) {  # syntax highlighting, don't reformat.
+                $remarks .= "$l\n";
+                while ((@doxygenlines) && (not $l =~ /\`\`\`\Z/)) {
+                    $l = shift @doxygenlines;
+                    $remarks .= "$l\n";
+                }
+            } else {
+                $l =~ s/\A\s*//;
+                $l =~ s/\s*\Z//;
+                $remarks .= "$l\n";
+            }
         }
 
+        #print("REMARKS:\n\n $remarks\n\n");
+
         $remarks = wordwrap(wikify($wikitype, $remarks));
         $remarks =~ s/\A\s*//;
         $remarks =~ s/\s*\Z//;