# HG changeset patch # User Pranshu Sharma # Date 1734014353 -36000 # Node ID 1f2f88ea2e78c483700c9d6f94da3b57ef237789 inital commit diff -r 000000000000 -r 1f2f88ea2e78 genorg.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genorg.pl Fri Dec 13 00:39:13 2024 +1000 @@ -0,0 +1,499 @@ +#!/usr/bin/perl + +use v5.36.0; +use strict; +use warnings; + +use XML::LibXML; +use File::Find; +use File::Copy qw(copy move); +use Cwd; +use experimental qw(declared_refs); +use Data::Dumper; + +# Fix up CLI interface +# Add option ro set cache to 0 through command line +# Print required elisp code it is in $req_config +# Add config option for $max_rss and $max_cat +# Sort cataogirs by entires +# Variable for map_css +# Domain thing as well +# Dicmuemnt how init.el is required +my $cache = 1; + +my $config_file = "genorg-conf"; +my $dir = "blog"; +my $outdir = "out/"; +my $emacs = "emacs"; +my $css = "style.css"; +my $map_css ="other.css"; +my $no_name_dir = "c"; +my $domain = "p.bauherren.ovh"; +my $max_rss = 30; +# The first page has one less article. Think of it as a feature. +my $max_cat = 1; + +my $username = $ENV{LOGNAME} || $ENV{USER} || getpwuid ($<); +my $template = <<"END"; +#+title: Test document +#+subtitle: This is the subtitle +#+author: $username +#+keywords: tag thing | related +#+options: html-link-use-abs-url:nil html-postamble:auto +#+options: html-preamble:t html-scripts:nil html-style:t +#+options: html5-fancy:nil tex:t +#+options: tex:mathjax +#+html_doctype: html5 +#+html_container: div +#+html_content_class: content +#+html_link_home: +#+html_link_up: +#+html_mathjax: +#+html_equation_reference_format: \eqref{%s} +#+html_head: +#+html_head_extra: +#+infojs_opt: +#+creator: Emacs 31.0.50 (Org mode 9.7.11) +#+latex_header: +END + +my $req_config = <<"END"; +(with-eval-after-load 'org + (org-link-set-parameters "genorg" + :follow nil + :export + #'(lambda (link desc _ _) + (format "" desc link)) + :store nil)) +END + +my $elisp_code = <<"END" =~ s/\n\s+//gr; +(let ((ls '(LIST))) + (require 'org) + (mapc + (lambda (b) + (with-current-buffer b + (when (and (not (string-match "^ " (buffer-name b))) + (eq major-mode 'org-mode)) + (org-export-to-file 'html (pop ls))))) + (buffer-list)) + (kill-emacs)) +END + + +my ($h_intro, $h_c_intro, $h_cat_li, $h_chap, $h_en_cat, $h_en, $h_end) + = split /SPLIT/ ,<<"END" =~ s/\n\s+//gr; + + + + + + TITLE + + +SPLIT + +

Pranshu's Blog

CATAG (NUMBER) +SPLIT +

All articles

+SPLIT + + TITLE DATE +

DESCREPTION

+ CATAG +

+ +SPLIT + + TITLE DATE +

DESCREPTION

+ +SPLIT +

+ + +END + +my $help = <<"END"; +genorg [option] [dir|file] +Option can be: + -t FILE : Make a template org file at FILE + -h : Help + -c FILE : sepcify a new config file instead of genorg-conf + -r : unconditionally remake org files + +If no option is set, accept a directory to generate. + +$config_file follow the syntax of: +VAR VALUE +where VAR can be: + emacs-path : The value should be path of emacs. 'emacs' by default. + css : The path of the css file to use. + in_dir : The directory that contains the files. + out_dir : the output directory. + no_name_dir: The directory for generate files. This means in_dir/no_name_dir + cannot exist. +END + + +my $rss_entry = <<"END" =~ s/\n\s+//gr; + + + + W3Schools Home Page + https://www.w3schools.com + Free web building tutorials" +END + +my %config_vars = + ( + 'emacs-path' => \$emacs, + css => \$css, + indir => \$dir, + outdir => \$outdir, + no_name_dir => \$no_name_dir + ); + +sub read_config ($path) { + open my $fh, '<', $path or die "Couldn't open config file\n"; + while (<$fh>) { + next if /^\s+$/; + chomp; + my ($directive, $rest) = split /\s+/, $_, 2; + if (exists($config_vars{$directive})) { + ${$config_vars{$directive}} = $rest; + } else { + print "Unknown variable $directive\n"; + } + } + close $fh; +} + +sub help_and_bye { + print $help; + exit; +} + +sub template ($file) { + $file // die "-t needs an argument.\n"; + $file =~ s/\.org$//; + open my $fh, '>', $file . ".org" or die $file . " couldn't be opened\n"; + print $fh $template; + close $fh; +} + +# Reletive file from the perspect of file1, to file2 +sub prel_path ($from, $to) { + return '.' if $from eq $to; + my \(@f1, @f2) = map [m{/[^/]+}g], ($from, $to); + # return substr($f1[$#f1], 1) if $from =~ $to; + while (@f1 && @f2 && $f1[0] =~ $f2[0]) { + shift @f1; + shift @f2; + } + my $p = @f1-1; + local $" = ""; + "../" x ($p > 0 ? $p : 0) . substr("@f2", 1); +} + +my @org_exps; +my @files_to_make; + +sub wanted { + my $n_path = $outdir . prel_path ($dir, "$File::Find::name/"); + # We need to mirror the direcotry structure + if (-d) { + push @files_to_make, $n_path || die "Couldn't make directory $n_path\n" + unless -d $n_path; + return; + } + if (/\.org$/) { + push @org_exps, substr $n_path, length($outdir); + } else { + copy $_, $n_path; + } +} + + +# Export @org_exps +sub conv_files { + my %changed_files; + # Remove files that are already converted + for my $rel_file (@org_exps) { + # Emacs need the full file path + my ($file, $to) = map { getcwd . "/$_" . $rel_file} ($dir, $outdir); + s/org$/html/ for ($to, $rel_file); + unless ($cache && -f $to && (stat($to))[9] > (stat($file))[9]) { + $changed_files{$file} = $to; + } + } + %changed_files // exit; + $elisp_code =~ s/LIST/join '', map "\"$_\" ", values %changed_files /e; + open my $fh, '-|', ($emacs, '--batch', '~/.emacs.d/init.el', keys %changed_files, "--eval" , "$elisp_code"); + close $fh; + \%changed_files +} + +my $cdir; + +$ARGV[0] // die "No arguments provided\n"; +while ($_ = shift @ARGV) { + /^(-h|--?help)$/ && help_and_bye; + /^-t$/ && do { template shift @ARGV ; exit }; + /^-c$/ && do { $config_file = shift @ARGV; next }; + /^-d$/ && do { $cache = 0; next }; + $cdir = $_; +} +$cdir || die "directory not provided\n"; +main(); + +my %catags; +my @arts; + +sub main { + -d $cdir or help_and_bye; + mkdir $outdir; + chdir $cdir or die "Couldn't access $cdir\n"; + read_config $config_file; + -d $dir or die "$dir doesn't exist \n"; + find (\&wanted, $dir); + mkdir for @files_to_make; + values %{conv_files()}; + chdir $outdir; + @arts = sort { $b->{date} <=> $a->{date}} map { html_fixup($_) } @org_exps; + finalise_html(); +} + +sub entry { + my ($data, $catagory) = @_; + my $str = defined $catagory + ? ($h_en_cat =~ s/CATAG/$data->{catag}/er) : $h_en; + my $prel_path = "/$no_name_dir/" . (defined $catagory ? "c" : "c/b"); + $str =~ s/DATE/$data->{date}->fmt/er + =~ s/TITLE/$data->{title}/er + =~ s/DESCREPTION/$data->{desc}/er + =~ s|LINK|prel_path($prel_path, '/'. $data->{file})|er + } + +sub rss_en ($data) { + my $thing = <<"END" =~ s/\n\s+//gr; + + TITLE + LINK + DESC + +END + $thing =~ s|LINK|"$domain/" . $data->{file}|er + =~ s/TITLE/$data->{title}/er + =~ s/DESC/$data->{desc}/er; +} + +sub make_h_list { + my $rstr = '

$str "; + } + $rstr . "

" +} + +sub min ($x, $y) { + ($x, $y)[$x > $y] +} + +sub panigation ($total_arts, $page_no) { + if ($total_arts < 9) { + make_h_list ((['«', $page_no - 1]) x!! ($page_no - 1), + (map [$_ == $page_no ? "$_" : $_, $_ ], + (1..($total_arts + 1))), + ['»', $page_no]) + } +} + +sub move_on($article, $cfh, $next_file, $total_arts) { + # say "Article is ". $article; + my $page = 1 + int $article / $max_cat; + print $cfh panigation int($total_arts / $max_cat), $page; + print $cfh $h_end; + close $cfh; + open $cfh, '>', "$next_file$page.html"; + say "$next_file$page.html"; + print $cfh $h_intro; + $cfh +} + +sub finalise_html { + mkdir $no_name_dir; + open my $fh, '>', "$no_name_dir/1.html"; + open my $mrss, '>', "$no_name_dir/rss.xml"; # The master rss file + print $mrss $rss_entry; + print $fh $h_intro . $h_c_intro; + my (%c_files, %r_files); # Cataogry files and rss + while (my ($cat, $num) = each %catags) { + # ($h_intro, $h_c_intro, $h_cat_li, $h_chap, $h_en_cat, $h_en, $h_end) + print $fh $h_cat_li =~ s/NUMBER/$num/re + =~ s/CATAG/$cat/er; + mkdir "$no_name_dir/$cat"; + open $c_files{$cat}, '>', "$no_name_dir/$cat/1.html"; + open $r_files{$cat}, '>', "$no_name_dir/$cat/rss.xml"; + print {$r_files{$cat}} $rss_entry; + print {$c_files{$cat}} $h_intro; + } + print $fh $h_chap; + # :-) + my $total_articles = () = map {(1)x$_} values %catags; + # Now we iterate through the articles and add their index + my ($n_fh, $n_mrss, %n_rss) = (0,0); + my %n_cat; + for my $art (@arts) { + my ($catag, $title) = @{$art}{qw(catag title)}; + my $rentry = rss_en $art; + if (($n_rss{$catag} // 0) < $max_rss) { + print {$r_files{$catag}} $rentry; + ++$n_rss{$catag}; + } + if ($n_mrss < $max_rss) { + print $mrss $rentry; + ++$n_mrss + } + # my $ncat = \$n_cat{$catag}; + $n_fh++; + if ($n_fh % $max_cat == 0) { + say "$n_fh % $max_cat = " . $n_fh % $max_cat ; + say "\$n_fh is $n_fh"; + $fh = move_on($n_fh, $fh, "$no_name_dir/", $total_articles); + } + print $fh entry $art, 1; + print {$c_files{$catag}} entry $art; + # say "$catag -> $title($n_cat{$catag})"; + # say ($n_cat{$catag} // 1); + # ++$$ncat + } + # ($total_arts, $page_no) + print $fh panigation ((int $total_articles / $max_cat) x 2) if $total_articles >= $max_cat; + print $fh $h_end; + close $fh; + for (values %c_files) { + print $_ $h_end; + close; + } + print $mrss ' '; + for (values %r_files) { + print $_ ' ';; + close; + } +} + +sub html_fixup ($filename) { + my $dom = + XML::LibXML->load_html(location => $filename, recover => 1); + my %data = ('file', $filename); + + # We don't need style or table of contents heading (if exists) + for my $xpath ('/html/head/style', '/html/body/div/div[@id="table-of-contents"]/h2') { + $_->parentNode->removeChild($_) for $dom->findnodes($xpath) + } + + # Info we need + + # Date + for ($dom->findnodes('/html/body/div[@id="postamble"]/p[@class="date"]')) { + my $text = $_->to_literal; + if ($text =~ /^Date: (\d+)-(\d+)-(\d+)/) { + my $date = PDate->new($1, $2, $3); + $data{date} = $date; + my $p = $_->parentNode; + $p->parentNode->removeChild($p); + } + } + # Title\ + for ($dom->findnodes('/html/head/title')) { + $data{title} = $_->to_literal; + } + # keywords/catogry + for my $node ($dom->findnodes('/html/head/meta')) { + (my $cont = $node->getAttribute('content')) || next; + for ($node->getAttribute('name')) { + if (/description/) { + $data{desc} = $cont; + } elsif (/keywords/) { + my ($key, $rest) = parse_keywords($cont); + @data{qw[catag title]} = @{$key}; + $catags{$data{catag}}++; + $rest =~ s/\s+\|\s+//; + while ($rest) { + (my $t,$rest) = parse_keywords($rest); + push @{$data{related}}, $t; + } + } else { + next; + } + $node->parentNode->removeChild($node); + } + } + # $data{dom} = $dom; + \%data; + + # print $newfh $dom->toString; + # close $newfh; +} + +sub parse_keywords ($str) { + if ($str =~ s/\s*(?:"(.+?)"|(\w+))\s*->(?:\s*(?:"(.+?)"|(\w+)))//) { + ([$1 // $2, $3 // $4], $str); + } else { + die "Keywords not arranged properly\n" + } +} + +# I wonder if sean combs has made a similar class +{ + package PDate; + + sub new { + my $class = shift; + my $self = { year => 0 + shift, + month => 0 + shift, + day => 0 + shift, + }; + bless $self, $class; + return $self; + } + + # $d1 is greater than $d2 + sub cmp { + my ($d1, $d2) = @_; + for ($d1->{year} <=> $d2->{year}, + $d1->{month} <=> $d2->{month}, + $d1->{day} <=> $d2->{day}) { + return $_ unless $_ == 0 + } + 0 + } + use overload '<=>' => \&cmp; + + sub fmt { + my $self = shift; + my @months = + qw(January Febuary March April May June July August September November October December); + my $n = $self->{day}; + if ($n == 1) { $n = '1st' } + elsif (($n - 2) % 10 == 0) { $n = "${n}nd" } + elsif (($n - 3) % 10 == 0) { $n = "${n}rd" } + else { $n = "${n}st" } + $months[$self->{month} - 1] . " $n, " . $self->{year} + } + + sub short_fmt { + my $self = shift; + join "-", ($self->{year}, $self->{month}, $self->{day}); + } + +} +