ghwikipp: build_categories.php now works per-subdir.

From a85d34db634bf441ed0f8b0c6a89397ba9a94083 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <[EMAIL REDACTED]>
Date: Fri, 17 Jun 2022 17:07:57 -0400
Subject: [PATCH] build_categories.php now works per-subdir.

---
 build_categories.php | 164 +++++++++++++++++++++++++++----------------
 1 file changed, 102 insertions(+), 62 deletions(-)

diff --git a/build_categories.php b/build_categories.php
index a5ddcf0..923d6be 100755
--- a/build_categories.php
+++ b/build_categories.php
@@ -36,8 +36,9 @@ function build_category_lists($srcdir)
     while (($dent = readdir($dirp)) !== false) {
         if (substr($dent, 0, 1) == '.') { continue; }  // skip ".", "..", and metadata.
         $src = "$srcdir/$dent";
-        if (is_dir($src)) {  // !!! FIXME: we don't actually support subdirs elsewhere.
-            build_category_lists($src);
+        if (is_dir($src)) {
+            // categories are per-subdir, don't walk the tree.
+            continue;  //build_category_lists($src);
         } else {
             $ext = strrchr($dent, '.');
 
@@ -103,85 +104,124 @@ function write_category_list($fp, $pages)
     fputs($fp, "<!-- END CATEGORY LIST -->\n");
 }
 
+function find_subdirs($base, $dname, &$output)
+{
+    $dirp = opendir($dname);
+    if ($dirp === false) {
+        return;  // oh well.
+    }
 
-// Mainline!
+    $sep = ($base == NULL) ? '' : '/';
 
-$git_repo_lock_fp = fopen($repo_lock_fname, 'c+');
-if ($git_repo_lock_fp === false) {
-    print("Failed to obtain Git repo lock. Please try again later.\n");
-    exit(1);
-} else if (flock($git_repo_lock_fp, LOCK_EX) === false) {
-    print("Exclusive flock of Git repo lock failed. Please try again later.");  // uh...?
-    exit(1);
+    while (($dent = readdir($dirp)) !== false) {
+        $path = "$dname/$dent";
+        if (substr($dent, 0, 1) == '.') {
+            continue;  // skip ".", "..", and metadata.
+        } else if (is_dir($path)) {
+            $thisbase = "$base$sep$dent";
+            $output[] = $thisbase;
+            find_subdirs($thisbase, $path, $output);
+        }
+    }
+
+    closedir($dirp);
+
+    return $output;
 }
 
-build_category_lists($raw_data);
+function handle_subdir($dname)
+{
+    global $categories;
 
-foreach ($categories as $cat => $pages) {
-    //print("CATEGORY '$cat':\n");
-    //print_r($pages);
+    $categories = array();
+    build_category_lists($dname);
 
-    $path = "$raw_data/$cat.mediawiki";  // for now.
-    $tmppath = "$raw_data/.$cat.mediawiki.tmp";  // for now.
-    $contents = '';
-    if (!file_exists($path)) {
-        file_put_contents($path, "= $cat =\n\n<!-- BEGIN CATEGORY LIST -->\n<!-- END CATEGORY LIST -->\n\n");
-    }
+    foreach ($categories as $cat => $pages) {
+        //print("CATEGORY '$cat':\n");
+        //print_r($pages);
 
-    $in = fopen($path, "r");
-    if ($in === false) {
-        print("Failed to open '$path' for reading\n");
-        system("cd $escrawdata && git clean -dfq && git checkout -- .");
-        exit(1);
-    }
+        $path = "$dname/$cat.mediawiki";  // for now.
+        $tmppath = "$dname/.$cat.mediawiki.tmp";  // for now.
+        $contents = '';
+        if (!file_exists($path)) {
+            file_put_contents($path, "= $cat =\n\n<!-- BEGIN CATEGORY LIST -->\n<!-- END CATEGORY LIST -->\n\n");
+        }
 
-    $out = fopen($tmppath, "w");
-    if ($out === false) {
-        print("Failed to open '$tmppath' for writing\n");
-        system("cd $escrawdata && git clean -dfq && git checkout -- .");
-        exit(1);
-    }
+        $in = fopen($path, "r");
+        if ($in === false) {
+            print("Failed to open '$path' for reading\n");
+            system("cd $escrawdata && git clean -dfq && git checkout -- .");
+            exit(1);
+        }
 
-    $wrote_list = false;
-    while (($line = fgets($in)) !== false) {
-        //print("LINE: [" . trim($line) . "]\n");
-        if (trim($line) == '----') {  // the footer? Just stuff the list before it, oh well.
-            if (!$wrote_list) {
-                write_category_list($out, $pages);
-                $wrote_list = true;
-            }
-            fputs($out, "----\n");
-        } else if (trim($line) == '<!-- BEGIN CATEGORY LIST -->') {
-            if (!$wrote_list) {
-                write_category_list($out, $pages);
-                $wrote_list = true;
-            }
-            while (($line = fgets($in)) !== false) {
-                if (trim($line) == '<!-- END CATEGORY LIST -->') {
-                    break;
+        $out = fopen($tmppath, "w");
+        if ($out === false) {
+            print("Failed to open '$tmppath' for writing\n");
+            system("cd $escrawdata && git clean -dfq && git checkout -- .");
+            exit(1);
+        }
+
+        $wrote_list = false;
+        while (($line = fgets($in)) !== false) {
+            //print("LINE: [" . trim($line) . "]\n");
+            if (trim($line) == '----') {  // the footer? Just stuff the list before it, oh well.
+                if (!$wrote_list) {
+                    write_category_list($out, $pages);
+                    $wrote_list = true;
                 }
+                fputs($out, "----\n");
+            } else if (trim($line) == '<!-- BEGIN CATEGORY LIST -->') {
+                if (!$wrote_list) {
+                    write_category_list($out, $pages);
+                    $wrote_list = true;
+                }
+                while (($line = fgets($in)) !== false) {
+                    if (trim($line) == '<!-- END CATEGORY LIST -->') {
+                        break;
+                    }
+                }
+            } else {
+                fputs($out, $line);
             }
-        } else {
-            fputs($out, $line);
         }
-    }
 
-    fclose($in);
+        fclose($in);
 
-    if (!$wrote_list) {
-        write_category_list($out, $pages);
-    }
+        if (!$wrote_list) {
+            write_category_list($out, $pages);
+        }
 
-    fclose($out);
+        fclose($out);
 
-    if (!rename($tmppath, $path)) {
-        unlink($tmppath);
-        print("Failed to rename '$tmppath' to '$path'!\n");
-        system("cd $escrawdata && git clean -dfq && git checkout -- .");
-        exit(1);
+        if (!rename($tmppath, $path)) {
+            unlink($tmppath);
+            print("Failed to rename '$tmppath' to '$path'!\n");
+            system("cd $escrawdata && git clean -dfq && git checkout -- .");
+            exit(1);
+        }
     }
 }
 
+
+// Mainline!
+
+$git_repo_lock_fp = fopen($repo_lock_fname, 'c+');
+if ($git_repo_lock_fp === false) {
+    print("Failed to obtain Git repo lock. Please try again later.\n");
+    exit(1);
+} else if (flock($git_repo_lock_fp, LOCK_EX) === false) {
+    print("Exclusive flock of Git repo lock failed. Please try again later.");  // uh...?
+    exit(1);
+}
+
+handle_subdir($raw_data);  // get the root directory.
+
+$subdirs = array();
+find_subdirs(NULL, $raw_data, $subdirs);
+foreach ($subdirs as $d) {
+    handle_subdir("$raw_data/$d");
+}
+
 unset($output);
 $failed = ((exec("cd $escrawdata && git status -s |wc -l", $output, $rc) === false) || ($rc != 0));
 if ($failed) {