--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genorg.pl Fri Dec 13 00:39:13 2024 +1000
@@ -0,0 +1,499 @@
+#!/usr/bin/perl
+
+use v5.36.0;
+use strict;
+use warnings;
+
+use XML::LibXML;
+use File::Find;
+use File::Copy qw(copy move);
+use Cwd;
+use experimental qw(declared_refs);
+use Data::Dumper;
+
+# Fix up CLI interface
+# Add option ro set cache to 0 through command line
+# Print required elisp code it is in $req_config
+# Add config option for $max_rss and $max_cat
+# Sort cataogirs by entires
+# Variable for map_css
+# Domain thing as well
+# Dicmuemnt how init.el is required
+my $cache = 1;
+
+my $config_file = "genorg-conf";
+my $dir = "blog";
+my $outdir = "out/";
+my $emacs = "emacs";
+my $css = "style.css";
+my $map_css ="other.css";
+my $no_name_dir = "c";
+my $domain = "p.bauherren.ovh";
+my $max_rss = 30;
+# The first page has one less article. Think of it as a feature.
+my $max_cat = 1;
+
+my $username = $ENV{LOGNAME} || $ENV{USER} || getpwuid ($<);
+my $template = <<"END";
+#+title: Test document
+#+subtitle: This is the subtitle
+#+author: $username
+#+keywords: tag thing | related
+#+options: html-link-use-abs-url:nil html-postamble:auto
+#+options: html-preamble:t html-scripts:nil html-style:t
+#+options: html5-fancy:nil tex:t
+#+options: tex:mathjax
+#+html_doctype: html5
+#+html_container: div
+#+html_content_class: content
+#+html_link_home:
+#+html_link_up:
+#+html_mathjax:
+#+html_equation_reference_format: \eqref{%s}
+#+html_head:
+#+html_head_extra:
+#+infojs_opt:
+#+creator: <a href="https://www.gnu.org/software/emacs/">Emacs</a> 31.0.50 (<a href="https://orgmode.org">Org</a> mode 9.7.11)
+#+latex_header:
+END
+
+my $req_config = <<"END";
+(with-eval-after-load 'org
+ (org-link-set-parameters "genorg"
+ :follow nil
+ :export
+ #'(lambda (link desc _ _)
+ (format "<genorg desc=\"%s\" link=\"%s\"/>" desc link))
+ :store nil))
+END
+
+my $elisp_code = <<"END" =~ s/\n\s+//gr;
+(let ((ls '(LIST)))
+ (require 'org)
+ (mapc
+ (lambda (b)
+ (with-current-buffer b
+ (when (and (not (string-match "^ " (buffer-name b)))
+ (eq major-mode 'org-mode))
+ (org-export-to-file 'html (pop ls)))))
+ (buffer-list))
+ (kill-emacs))
+END
+
+
+my ($h_intro, $h_c_intro, $h_cat_li, $h_chap, $h_en_cat, $h_en, $h_end)
+ = split /SPLIT/ ,<<"END" =~ s/\n\s+//gr;
+<!DOCTYPE html>
+<html lang="en">
+ <head>
+ <meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+ <title>TITLE</title>
+ <link rel="stylesheet" href="style.css">
+ </head>
+SPLIT
+ <body>
+ <h1> Pranshu's Blog </h1>
+ <ul id="cataogries">
+SPLIT
+ <li> <a href="LINK"> CATAG (NUMBER)</a>
+SPLIT
+ </ul>
+ <div>
+ <h2> All articles </h2> <hr>
+SPLIT
+ <a href="LINK">
+ <strong> TITLE </strong> <span>DATE</span>
+ <p>DESCREPTION</p>
+ <i>CATAG</i>
+ <hr>
+ </a>
+SPLIT
+ <a href="LINK">
+ <strong> TITLE </strong> <span>DATE</span>
+ <p>DESCREPTION</p>
+ <hr>
+ </a>
+SPLIT
+ </div>
+ </body>
+</html>
+END
+
+my $help = <<"END";
+genorg [option] [dir|file]
+Option can be:
+ -t FILE : Make a template org file at FILE
+ -h : Help
+ -c FILE : sepcify a new config file instead of genorg-conf
+ -r : unconditionally remake org files
+
+If no option is set, accept a directory to generate.
+
+$config_file follow the syntax of:
+VAR VALUE
+where VAR can be:
+ emacs-path : The value should be path of emacs. 'emacs' by default.
+ css : The path of the css file to use.
+ in_dir : The directory that contains the files.
+ out_dir : the output directory.
+ no_name_dir: The directory for generate files. This means in_dir/no_name_dir
+ cannot exist.
+END
+
+
+my $rss_entry = <<"END" =~ s/\n\s+//gr;
+<?xml version=\"1.0\" encoding=\"UTF-8\" ?>
+<rss version=\"2.0\">
+ <channel>
+ <title>W3Schools Home Page</title>
+ <link>https://www.w3schools.com</link>
+ <description>Free web building tutorials</description>"
+END
+
+my %config_vars =
+ (
+ 'emacs-path' => \$emacs,
+ css => \$css,
+ indir => \$dir,
+ outdir => \$outdir,
+ no_name_dir => \$no_name_dir
+ );
+
+sub read_config ($path) {
+ open my $fh, '<', $path or die "Couldn't open config file\n";
+ while (<$fh>) {
+ next if /^\s+$/;
+ chomp;
+ my ($directive, $rest) = split /\s+/, $_, 2;
+ if (exists($config_vars{$directive})) {
+ ${$config_vars{$directive}} = $rest;
+ } else {
+ print "Unknown variable $directive\n";
+ }
+ }
+ close $fh;
+}
+
+sub help_and_bye {
+ print $help;
+ exit;
+}
+
+sub template ($file) {
+ $file // die "-t needs an argument.\n";
+ $file =~ s/\.org$//;
+ open my $fh, '>', $file . ".org" or die $file . " couldn't be opened\n";
+ print $fh $template;
+ close $fh;
+}
+
+# Reletive file from the perspect of file1, to file2
+sub prel_path ($from, $to) {
+ return '.' if $from eq $to;
+ my \(@f1, @f2) = map [m{/[^/]+}g], ($from, $to);
+ # return substr($f1[$#f1], 1) if $from =~ $to;
+ while (@f1 && @f2 && $f1[0] =~ $f2[0]) {
+ shift @f1;
+ shift @f2;
+ }
+ my $p = @f1-1;
+ local $" = "";
+ "../" x ($p > 0 ? $p : 0) . substr("@f2", 1);
+}
+
+my @org_exps;
+my @files_to_make;
+
+sub wanted {
+ my $n_path = $outdir . prel_path ($dir, "$File::Find::name/");
+ # We need to mirror the direcotry structure
+ if (-d) {
+ push @files_to_make, $n_path || die "Couldn't make directory $n_path\n"
+ unless -d $n_path;
+ return;
+ }
+ if (/\.org$/) {
+ push @org_exps, substr $n_path, length($outdir);
+ } else {
+ copy $_, $n_path;
+ }
+}
+
+
+# Export @org_exps
+sub conv_files {
+ my %changed_files;
+ # Remove files that are already converted
+ for my $rel_file (@org_exps) {
+ # Emacs need the full file path
+ my ($file, $to) = map { getcwd . "/$_" . $rel_file} ($dir, $outdir);
+ s/org$/html/ for ($to, $rel_file);
+ unless ($cache && -f $to && (stat($to))[9] > (stat($file))[9]) {
+ $changed_files{$file} = $to;
+ }
+ }
+ %changed_files // exit;
+ $elisp_code =~ s/LIST/join '', map "\"$_\" ", values %changed_files /e;
+ open my $fh, '-|', ($emacs, '--batch', '~/.emacs.d/init.el', keys %changed_files, "--eval" , "$elisp_code");
+ close $fh;
+ \%changed_files
+}
+
+my $cdir;
+
+$ARGV[0] // die "No arguments provided\n";
+while ($_ = shift @ARGV) {
+ /^(-h|--?help)$/ && help_and_bye;
+ /^-t$/ && do { template shift @ARGV ; exit };
+ /^-c$/ && do { $config_file = shift @ARGV; next };
+ /^-d$/ && do { $cache = 0; next };
+ $cdir = $_;
+}
+$cdir || die "directory not provided\n";
+main();
+
+my %catags;
+my @arts;
+
+sub main {
+ -d $cdir or help_and_bye;
+ mkdir $outdir;
+ chdir $cdir or die "Couldn't access $cdir\n";
+ read_config $config_file;
+ -d $dir or die "$dir doesn't exist \n";
+ find (\&wanted, $dir);
+ mkdir for @files_to_make;
+ values %{conv_files()};
+ chdir $outdir;
+ @arts = sort { $b->{date} <=> $a->{date}} map { html_fixup($_) } @org_exps;
+ finalise_html();
+}
+
+sub entry {
+ my ($data, $catagory) = @_;
+ my $str = defined $catagory
+ ? ($h_en_cat =~ s/CATAG/$data->{catag}/er) : $h_en;
+ my $prel_path = "/$no_name_dir/" . (defined $catagory ? "c" : "c/b");
+ $str =~ s/DATE/$data->{date}->fmt/er
+ =~ s/TITLE/$data->{title}/er
+ =~ s/DESCREPTION/$data->{desc}/er
+ =~ s|LINK|prel_path($prel_path, '/'. $data->{file})|er
+ }
+
+sub rss_en ($data) {
+ my $thing = <<"END" =~ s/\n\s+//gr;
+ <item>
+ <title>TITLE</title>
+ <link>LINK</link>
+ <description>DESC</description>
+ </item>
+END
+ $thing =~ s|LINK|"$domain/" . $data->{file}|er
+ =~ s/TITLE/$data->{title}/er
+ =~ s/DESC/$data->{desc}/er;
+}
+
+sub make_h_list {
+ my $rstr = '<ul>';
+ for (@_) {
+ my ($str, $page_no) = @{$_};
+ $rstr .= "<li> <a href=\"$page_no.html\"> $str </a>";
+ }
+ $rstr . "</ul>"
+}
+
+sub min ($x, $y) {
+ ($x, $y)[$x > $y]
+}
+
+sub panigation ($total_arts, $page_no) {
+ if ($total_arts < 9) {
+ make_h_list ((['«', $page_no - 1]) x!! ($page_no - 1),
+ (map [$_ == $page_no ? "<span>$_</span>" : $_, $_ ],
+ (1..($total_arts + 1))),
+ ['»', $page_no])
+ }
+}
+
+sub move_on($article, $cfh, $next_file, $total_arts) {
+ # say "Article is ". $article;
+ my $page = 1 + int $article / $max_cat;
+ print $cfh panigation int($total_arts / $max_cat), $page;
+ print $cfh $h_end;
+ close $cfh;
+ open $cfh, '>', "$next_file$page.html";
+ say "$next_file$page.html";
+ print $cfh $h_intro;
+ $cfh
+}
+
+sub finalise_html {
+ mkdir $no_name_dir;
+ open my $fh, '>', "$no_name_dir/1.html";
+ open my $mrss, '>', "$no_name_dir/rss.xml"; # The master rss file
+ print $mrss $rss_entry;
+ print $fh $h_intro . $h_c_intro;
+ my (%c_files, %r_files); # Cataogry files and rss
+ while (my ($cat, $num) = each %catags) {
+ # ($h_intro, $h_c_intro, $h_cat_li, $h_chap, $h_en_cat, $h_en, $h_end)
+ print $fh $h_cat_li =~ s/NUMBER/$num/re
+ =~ s/CATAG/$cat/er;
+ mkdir "$no_name_dir/$cat";
+ open $c_files{$cat}, '>', "$no_name_dir/$cat/1.html";
+ open $r_files{$cat}, '>', "$no_name_dir/$cat/rss.xml";
+ print {$r_files{$cat}} $rss_entry;
+ print {$c_files{$cat}} $h_intro;
+ }
+ print $fh $h_chap;
+ # :-)
+ my $total_articles = () = map {(1)x$_} values %catags;
+ # Now we iterate through the articles and add their index
+ my ($n_fh, $n_mrss, %n_rss) = (0,0);
+ my %n_cat;
+ for my $art (@arts) {
+ my ($catag, $title) = @{$art}{qw(catag title)};
+ my $rentry = rss_en $art;
+ if (($n_rss{$catag} // 0) < $max_rss) {
+ print {$r_files{$catag}} $rentry;
+ ++$n_rss{$catag};
+ }
+ if ($n_mrss < $max_rss) {
+ print $mrss $rentry;
+ ++$n_mrss
+ }
+ # my $ncat = \$n_cat{$catag};
+ $n_fh++;
+ if ($n_fh % $max_cat == 0) {
+ say "$n_fh % $max_cat = " . $n_fh % $max_cat ;
+ say "\$n_fh is $n_fh";
+ $fh = move_on($n_fh, $fh, "$no_name_dir/", $total_articles);
+ }
+ print $fh entry $art, 1;
+ print {$c_files{$catag}} entry $art;
+ # say "$catag -> $title($n_cat{$catag})";
+ # say ($n_cat{$catag} // 1);
+ # ++$$ncat
+ }
+ # ($total_arts, $page_no)
+ print $fh panigation ((int $total_articles / $max_cat) x 2) if $total_articles >= $max_cat;
+ print $fh $h_end;
+ close $fh;
+ for (values %c_files) {
+ print $_ $h_end;
+ close;
+ }
+ print $mrss '</channel> </rss>';
+ for (values %r_files) {
+ print $_ '</channel> </rss>';;
+ close;
+ }
+}
+
+sub html_fixup ($filename) {
+ my $dom =
+ XML::LibXML->load_html(location => $filename, recover => 1);
+ my %data = ('file', $filename);
+
+ # We don't need style or table of contents heading (if exists)
+ for my $xpath ('/html/head/style', '/html/body/div/div[@id="table-of-contents"]/h2') {
+ $_->parentNode->removeChild($_) for $dom->findnodes($xpath)
+ }
+
+ # Info we need
+
+ # Date
+ for ($dom->findnodes('/html/body/div[@id="postamble"]/p[@class="date"]')) {
+ my $text = $_->to_literal;
+ if ($text =~ /^Date: (\d+)-(\d+)-(\d+)/) {
+ my $date = PDate->new($1, $2, $3);
+ $data{date} = $date;
+ my $p = $_->parentNode;
+ $p->parentNode->removeChild($p);
+ }
+ }
+ # Title\
+ for ($dom->findnodes('/html/head/title')) {
+ $data{title} = $_->to_literal;
+ }
+ # keywords/catogry
+ for my $node ($dom->findnodes('/html/head/meta')) {
+ (my $cont = $node->getAttribute('content')) || next;
+ for ($node->getAttribute('name')) {
+ if (/description/) {
+ $data{desc} = $cont;
+ } elsif (/keywords/) {
+ my ($key, $rest) = parse_keywords($cont);
+ @data{qw[catag title]} = @{$key};
+ $catags{$data{catag}}++;
+ $rest =~ s/\s+\|\s+//;
+ while ($rest) {
+ (my $t,$rest) = parse_keywords($rest);
+ push @{$data{related}}, $t;
+ }
+ } else {
+ next;
+ }
+ $node->parentNode->removeChild($node);
+ }
+ }
+ # $data{dom} = $dom;
+ \%data;
+
+ # print $newfh $dom->toString;
+ # close $newfh;
+}
+
+sub parse_keywords ($str) {
+ if ($str =~ s/\s*(?:"(.+?)"|(\w+))\s*->(?:\s*(?:"(.+?)"|(\w+)))//) {
+ ([$1 // $2, $3 // $4], $str);
+ } else {
+ die "Keywords not arranged properly\n"
+ }
+}
+
+# I wonder if sean combs has made a similar class
+{
+ package PDate;
+
+ sub new {
+ my $class = shift;
+ my $self = { year => 0 + shift,
+ month => 0 + shift,
+ day => 0 + shift,
+ };
+ bless $self, $class;
+ return $self;
+ }
+
+ # $d1 is greater than $d2
+ sub cmp {
+ my ($d1, $d2) = @_;
+ for ($d1->{year} <=> $d2->{year},
+ $d1->{month} <=> $d2->{month},
+ $d1->{day} <=> $d2->{day}) {
+ return $_ unless $_ == 0
+ }
+ 0
+ }
+ use overload '<=>' => \&cmp;
+
+ sub fmt {
+ my $self = shift;
+ my @months =
+ qw(January Febuary March April May June July August September November October December);
+ my $n = $self->{day};
+ if ($n == 1) { $n = '1st' }
+ elsif (($n - 2) % 10 == 0) { $n = "${n}nd" }
+ elsif (($n - 3) % 10 == 0) { $n = "${n}rd" }
+ else { $n = "${n}st" }
+ $months[$self->{month} - 1] . " $n, " . $self->{year}
+ }
+
+ sub short_fmt {
+ my $self = shift;
+ join "-", ($self->{year}, $self->{month}, $self->{day});
+ }
+
+}
+