#!/usr/bin/perl
# Author: Pranshu Sharma <pranshu@bauherren.ovh>
use v5.36.0;
use strict;
use warnings;
use XML::LibXML;
use File::Find;
use File::Copy qw(copy move);
use Cwd;
use experimental qw(declared_refs);
use Data::Dumper;
use POSIX;
# !! Add seperatio between title and identifier
# Fix up CLI interface
# Add option ro set cache to 0 through command line
# Print required elisp code it is in $req_config
# Add config option for $max_rss and $max_cat
# Document link to file x from file x no work
# Variable for map_css
# Remove the desc
# Domain thing as well
# 2 articles 1 day better sorting
# cataogry linking
my $cache = 1;
my $config_file = "genorg-conf";
my $dir = "blog";
my $outdir = "out/";
my $emacs = "emacs";
my $art_css = "astyle.css";
my $cat_css ="cstyle.css";
my $no_name_dir = "c";
my $cache_file = "genorg-cache/cache.xml";
my $domain = "p.bauherren.ovh";
my $max_rss = 30;
# The first page has one less article. Think of it as a feature.
my $max_cat = 10;
my $blog_title = "Pranshu's blog";
my ($home, $about) = ("home.html", "about.html");
my $etc_files;
my $nav_id = "nav";
my $username = $ENV{LOGNAME} || $ENV{USER} || getpwuid ($<);
my $template = <<"END";
#+title: Test document
#+subtitle: This is the subtitle
#+author: $username
#+keywords: tag thing | related
#+options: html-link-use-abs-url:nil html-postamble:auto
#+options: html-preamble:t html-scripts:nil html-style:t
#+options: html5-fancy:nil tex:t
#+options: tex:mathjax
#+html_doctype: html5
#+html_container: div
#+html_content_class: content
#+html_link_home:
#+html_link_up:
#+html_mathjax:
#+html_equation_reference_format: \eqref{%s}
#+html_head:
#+html_head_extra:
#+infojs_opt:
#+creator: <a href="https://www.gnu.org/software/emacs/">Emacs</a> 31.0.50 (<a href="https://orgmode.org">Org</a> mode 9.7.11)
#+latex_header:
END
my $req_config = <<"END";
(require 'org)
(require 'ob)
(org-link-set-parameters "genorg"
:follow nil
:export
#'(lambda (link desc _ _)
(format "<a class=\"rakim\" href=\"%s\">%s</a>" desc link))
:store nil)
END
my $custom_code = "(list 1)";
my $navbar = <<"END" =~ s/\n\s+//gr;
<ul>
<li> <a href="LINK">Home</a> </li>
<li> <a href="LINK">Catagories</a> </li>
<li> <a href="LINK">About</a> </li>
</ul>
END
my $elisp_code = <<"END" =~ s/\n\s+//gr;
(let ((ls '(LIST)))
(require 'org)
(mapc
(lambda (b)
(with-current-buffer b
(when (and (not (string-match "^ " (buffer-name b)))
(eq major-mode 'org-mode))
(org-export-to-file 'html (pop ls)))))
(buffer-list))
(kill-emacs))
END
my ($h_intro, $h_c_intro, $h_cat_li, $h_chap, $h_en_cat, $h_en, $h_end)
= split /SPLIT/ ,<<"END" =~ s/\n\s+//gr;
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>TITLE</title>
<link rel="stylesheet" href="CSS">
</head>
<body>
NAV
<h1> BTIT </h1>
SPLIT
<ul id="cataogries">
SPLIT
<li> <a href="LINK"> CATAG (NUMBER)</a>
SPLIT
</ul>
<div>
<h2> All articles <a id="rss" href="rss.xml">(rss)</span> </h2> <hr>
SPLIT
<a href="LINK">
<strong> TITLE </strong> <span>DATE</span> <i>CATAG</i>
<hr>
</a>
SPLIT
<a href="LINK">
<strong> TITLE </strong> <span>DATE</span>
<hr>
</a>
SPLIT
</div>
</body>
</html>
END
my $help = <<"END";
genorg [option] [dir|file]
Option can be:
-t FILE : Make a template org file at FILE
-h : Help
-c FILE : sepcify a new config file instead of genorg-conf
-r : unconditionally remake org files
If no option is set, accept a directory to generate.
$config_file follow the syntax of:
VAR VALUE
where VAR can be:
emacs-path : The value should be path of emacs. 'emacs' by default.
css : The path of the css file to use.
in_dir : The directory that contains the files.
out_dir : the output directory.
no_name_dir: The directory for generate files. This means in_dir/no_name_dir
cannot exist.
etc_files : Space sperated list of files to be navved up
cache : xml cache file
home : home file
about : about file
domain : domain (for rss)
title : title of the blog
As soon as config file comes accross '>>---', the rest of the file
is now the elisp code used in emacs startup.
END
my $rss_entry = <<"END" =~ s/\n\s+//gr;
<?xml version=\"1.0\" encoding=\"UTF-8\" ?>
<rss version=\"2.0\">
<channel>
<title>W3Schools Home Page</title>
<link>https://www.w3schools.com</link>
<description>Free web building tutorials</description>"
END
my %config_vars =
(
'emacs-path' => \$emacs,
css => \$art_css,
indir => \$dir,
outdir => \$outdir,
no_name_dir => \$no_name_dir,
etc_files => \$etc_files,
cache => \$cache_file,
home => \$home,
about => \$about,
domain => \$domain,
title => \&blog_title
);
sub read_config ($path) {
open my $fh, '<', $path or die "Couldn't open config file\n";
while (<$fh>) {
next if /^\s+$/;
if (/^>>---+/) {
local $/;
$custom_code = <$fh>;
return;
}
chomp;
my ($directive, $rest) = split /\s+/, $_, 2;
if (exists($config_vars{$directive})) {
${$config_vars{$directive}} = $rest;
} else {
print "Unknown variable $directive\n";
}
}
close $fh;
}
sub help_and_bye {
print $help;
exit;
}
sub template ($file) {
$file // die "-t needs an argument.\n";
$file =~ s/\.org$//;
open my $fh, '>', $file . ".org" or die $file . " couldn't be opened\n";
print $fh $template;
close $fh;
}
# Reletive file from the perspect of file1, to file2
sub prel_path ($from, $to) {
return $to =~ s/^.//r if $from eq $to;
my \(@f1, @f2) = map [m{/[^/]+}g], ($from, $to);
# return substr($f1[$#f1], 1) if $from =~ $to;
while (@f1 && @f2 && $f1[0] =~ $f2[0]) {
shift @f1;
shift @f2;
}
my $p = @f1-1;
local $" = "";
"../" x ($p > 0 ? $p : 0) . substr("@f2", 1);
}
my @org_exps;
my @files_to_make;
my @files_to_move;
my @navify_files;
sub wanted {
my $n_path = $outdir . prel_path ($dir, "$File::Find::name/");
# We need to mirror the direcotry structure
if (-d) {
push @files_to_make, $n_path || die "Couldn't make directory $n_path\n"
unless -d $n_path;
return;
}
if (/\.org$/) {
push @org_exps, substr $n_path, length($outdir);
} else {
push @files_to_move, $File::Find::name
}
}
my @modified_files;
# Export @org_exps
sub conv_files {
my %changed_files;
# Remove files that are already converted
for my $rel_file (@org_exps) {
# Emacs need the full file path
my ($file, $to) = map { getcwd . "/$_" . $rel_file} ($dir, $outdir);
s/org$/html/ for ($to, $rel_file);
unless ($cache && -f $to && (stat($to))[9] > (stat($file))[9]) {
$changed_files{$file} = $to;
push @modified_files, $rel_file;
}
}
%changed_files // exit;
$elisp_code =~ s/LIST/join '', map "\"$_\" ", values %changed_files /e;
open my $fh, '-|', ($emacs, '-Q','--eval', "(progn $custom_code)", keys %changed_files, "--eval" , "$elisp_code");
close $fh;
\%changed_files
}
my $cdir;
$ARGV[0] // die "No arguments provided\n";
while ($_ = shift @ARGV) {
/^-c$/ && do { print "$req_config\n"; exit };
/^(-h|--?help)$/ && help_and_bye;
/^-t$/ && do { template shift @ARGV ; exit };
/^-c$/ && do { $config_file = shift @ARGV; next };
/^-d$/ && do { $cache = 0; next };
$cdir = $_;
}
$cdir || die "directory not provided\n";
main();
my %catags;
my @arts;
my $cache_dom;
sub main {
s!/*$!/! for ($outdir, $dir);
-d $cdir or help_and_bye;
mkdir $outdir;
chdir $cdir or die "Couldn't access $cdir\n";
read_config $config_file;
my @navify_files = map {"$dir$_"} ($about, $home, $etc_files ? split(/[ \t]+/, $etc_files) : ());
-d $dir or die "$dir doesn't exist \n";
find (\&wanted, $dir);
for my $f (@files_to_move) {
my $dest = $f =~ s#$dir/?#$outdir#er;
if (grep(m#^$f$#, @navify_files)) {
# YAY we have to sub navbar and copy whoop whoop
open my $file_tc, '<', $f or die "file no open, grrr!";
open my $file_out, '>', $dest;
local $/;
print $file_out ((<$file_tc>) =~ s/NAV/nav_up("$dest" =~ s%.+?\/%%r,1)/er);
map \&close, ($file_tc, $file_out);
} else {
copy $f, $dest;
}
}
mkdir for @files_to_make;
values %{conv_files()};
$cache_dom = XML::LibXML->load_xml(location => $cache_file) if $cache && -f $cache_file;
chdir $outdir;
@arts = sort { $b->{date} <=> $a->{date}} map { html_fixup($_) } @org_exps;
finalise_html();
chdir '..';
populate_cache();
}
sub populate_cache {
my $dom = XML::LibXML::Document->new('1.0', 'UTF-8');
my $root = $dom->createElement('root');
my %c_els;
for my $art (@arts) {
# We need the: related, file, date
my ($art_el, $date, $title, $ca, $related, $utitle) =
map {$dom->createElement($_)} ("art", "date", "title", "catag", "related", "utitle");
$art_el->{path} = $art->{file};
$ca->appendText($art->{catag});
$title->appendText($art->{title});
$date->appendText($art->{date}->text_easy);
$utitle->appendText($art->{utitle});
my @ll = @{$art->{links}};
if (@ll) {
my $links = $dom->createElement('links');
for (@ll) {
my $link = $dom->createElement("link");
$link->{catag} = $_->[0];
$link->appendText($_->[1]);
$links->appendChild($link);
}
$art_el->appendChild($links);
}
my $rel_str;
for my $thing ($art->{related}) {
for (@{$thing}) {
my ($catag, $artt) = @{$_};
my $rel_str .= "\"$catag\"" . ($artt && "->\"$artt\"");
$related->appendText("\"$catag\"" . ($artt && "->\"$artt\" "));
}
}
$art_el->appendChild($_) for ($date, $ca, $related, $title, $utitle);
$root->appendChild($art_el);
}
$root->appendChild($_) for values %c_els;
$dom->setDocumentElement($root);
open my $fh, '>' , $cache_file;
print $fh ($dom->toString(1) =~ s/(.+)/$1\n<!-- Nicley formated :() -->/r);
close $fh;
}
sub entry {
my ($data, $catagory) = @_;
my $str = defined $catagory
? ($h_en_cat =~ s/CATAG/$data->{catag}/er) : $h_en;
my $prel_path = "/$no_name_dir/" . (defined $catagory ? "c" : "c/b");
$str =~ s/DATE/$data->{date}->fmt/er
=~ s/TITLE/$data->{utitle}/er
=~ s|LINK|prel_path($prel_path, '/'. $data->{file})|er
}
sub rss_en ($data) {
my $thing = <<"END" =~ s/\n\s+//gr;
<item>
<title>TITLE</title>
<link>LINK</link>
<description>DESC</description>
</item>
END
$thing
=~ s|LINK|"$domain/" . $data->{file}|er
=~ s/TITLE/$data->{utitle}/er;
}
sub min ($x, $y) {
($x, $y)[$x > $y]
}
# @_ has to be in format of [STRING, LINK]
sub list2paginaiton {
my $page = shift;
'<ul>' . join ("", map {
my ($str, $file) = @{$_};
$str = "<span class=\"psel\">$str</span>" if $file && ($file == $page);
defined $file ?
"<li><a href=\"$file.html\">$str</a>" : $str
} ((["←", $page - 1]) x!! ($page - 1),
@_,
(["→", $page + 1]) x ($page != $_[$#_]->[1]))) . '</ul>'
}
sub move_on_if_neccasary ($fh, $art_num, $total_articles, $path, $title, $rp) {
return $fh if $art_num == 0 || $art_num % $max_cat;
my $cpage = $art_num / $max_cat;
print $fh panigation($cpage, ceil($total_articles / $max_cat), $path);
open $fh, '>', $path . '/' . ($cpage + 1).'.html';
# ($h_intro . $h_c_intro, $blog_title, "a/c");
print $fh cssbs($h_intro, $title, $rp);
$fh;
}
sub panigation ($page, $no_pages, $dir) {
if ($no_pages < 9) {
list2paginaiton ($page, (map [$_, $_], (1..$no_pages)))
} elsif (4 < $page <= ($no_pages - 4)) {
list2paginaiton
($page,
[1, 1],
["<li>..."],
(map [$_, $_], (($page-2)..($page+2))),
["<li>..."],
[($no_pages) x 2])
} elsif ($page < 6) {
list2paginaiton $page,
((map [$_, $_], (1..5)),
["<li>..."],
[($no_pages) x 2])
} else {
list2paginaiton $page,
([1,1],
["<li>..."],
(map [$_, $_], (($no_pages - 5)..$no_pages)))
}
}
sub get_art($cat, $id) {
for (@arts) {
return $_ if $_->{catag} eq $cat && $_->{title} eq $id
}
}
sub nav_up {
my ($path) = @_;
my @links = ($home, 'c/1.html', $about);
my $nav = shift ? $navbar : "<div class\"$nav_id\">$navbar</div>";
$nav =~ s#LINK#prel_path("/$path", "/". shift @links)#ger;
}
sub cssbs($s,$t,$p){
$s =~ s|CSS|prel_path("/$p","/$cat_css")|er
=~ s/TITLE|BTIT/$t/ger
=~ s/NAV/nav_up($p)/er;
}
sub modify_art ($file, $dom) {
# I guess we can rely on navbar not changed, if it did then user
# would need to call with -d option
return $dom unless grep /^$file$/, @modified_files;
# We add css, navbar
for ($dom->findnodes('/html/body')) {
# my @cds = $_->childNodes;
my $nav_el = $dom->createElement('div');
$nav_el->{id} = $nav_id;
$nav_el->appendWellBalancedChunk(nav_up($file));
$_->insertBefore($nav_el, $_->childNodes->[0])
}
for ($dom->findnodes('/html/head')) {
my $link = $dom->createElement('link');
$link->{rel} = "stylesheet";
$link->{href} = prel_path("/$file", "/$art_css");
$_->appendChild($link);
}
$dom;
}
sub finalise_html {
mkdir $no_name_dir;
open my $fh, '>', "$no_name_dir/1.html";
open my $mrss, '>', "$no_name_dir/rss.xml"; # The master rss file
print $mrss $rss_entry;
print $fh cssbs($h_intro . $h_c_intro, $blog_title, "a/c");
my (%c_files, %r_files); # Cataogry files and rss
for my $cat (sort { $catags{$b} <=> $catags{$a} } keys(%catags)) {
my $num = $catags{$cat};
# ($h_intro, $h_c_intro, $h_cat_li, $h_chap, $h_en_cat, $h_en, $h_end)
print $fh $h_cat_li =~ s/NUMBER/$num/re
=~ s/CATAG/$cat/er
=~ s=LINK="$cat/1.html"=er;
mkdir "$no_name_dir/$cat";
open $c_files{$cat}, '>', "$no_name_dir/$cat/1.html";
open $r_files{$cat}, '>', "$no_name_dir/$cat/rss.xml";
print {$r_files{$cat}} $rss_entry;
print {$c_files{$cat}}
cssbs($h_intro, "$cat <a id=\"rss\" href=\"rss.xml\">(rss)</span> ", "a/b/c");
}
print $fh $h_chap;
# :-)
my $total_articles = () = map {(1)x$_} values %catags;
# Now we iterate through the articles and add their index
my ($n_fh, $n_mrss, %n_rss) = (0,0);
my %n_cat;
for my $art (@arts) {
# TODO do we need $title?
my ($catag, $title) = @{$art}{qw(catag title)};
my $rentry = rss_en $art;
if (($n_rss{$catag} // 0) < $max_rss) {
print {$r_files{$catag}} $rentry;
++$n_rss{$catag};
}
if ($n_mrss < $max_rss) {
print $mrss $rentry;
++$n_mrss
}
# ($fh, $art_num, $total_articles, $path)
$fh = move_on_if_neccasary
$fh, $n_fh, $total_articles, $no_name_dir, "All articles", "a/c";
$n_fh++;
print $fh entry $art, 1;
my $ncat = \$n_cat{$catag};
++$$ncat;
my $cfh = \$c_files{$catag};
$$cfh = move_on_if_neccasary
$$cfh, $$ncat, $catags{$catag}, "$no_name_dir/$catag", $catag, "a/c/b";
print {$$cfh} entry $art;
my $a_file = $art->{file};
if (1 or grep /^$a_file$/, @modified_files) {
my $doom = modify_art($a_file, linkify($art));
open my $h_file, '>', $a_file;
print $h_file $doom->toStringHTML;
close $h_file;
}
}
print $fh panigation ((ceil $n_fh / $max_cat) x 2, $no_name_dir) if $max_cat < $total_articles;
print $fh $h_end;
close $fh;
for (values %c_files) {
print $_ $h_end;
close $_;
}
print $mrss '</channel> </rss>';
for (values %r_files) {
print $_ '</channel> </rss>';;
close $_;
}
}
sub linkify ($art) {
# if $art->{dom} is undefined, we must sadly parse again
my @links;
my @olinks = @{$art->{links} //[]};
my $bom = $art->{dom} // XML::LibXML->load_html(location => $art->{file});
for ($bom->findnodes('//a[@class="rakim"]')) {
my $key;
my $link_text;
if (@olinks) {
$key = pop @olinks;
} else {
($key) = parse_keywords($_->to_literal);
$link_text = $_->{href};
$_->removeChild($_->firstChild());
$_->appendText($link_text);
}
push @links, $key;
my $l = get_art(@{$key})->{file} or die "Link not found in $art->{filename}\n";
$_->{href} = prel_path("/".$art->{file}, "/$l");
}
$art->{links} = \@links;
return $bom
}
# I don't want to prepopulate, as it is not known which ones are
# usless and outdated, and it would be a waste of cycles parsing
# something to know it is not needed
sub getcache ($fn) {
$cache_dom || return 0;
for my $d ($cache_dom->findnodes("/root/art[\@path=\"$fn\"]")) {
# TODO related
my %dome = ('file' => $fn);
(@dome{'title', 'catag', 'utitle'}, my $date) =
map
{ $d->getChildrenByTagName($_)->[0]->to_literal }
qw[title catag utitle date];
for ($d->getChildrenByTagName("links")) {
my @ll = map [$_->{catag}, $_->to_literal],
($_->getChildrenByTagName('link'));
$dome{links} = \@ll;
}
$catags{$dome{catag}}++;
$dome{date} = PDate->new(split /-/, $date);
return \%dome;
}
return 0;
}
sub html_fixup ($filename) {
if ($cache && !grep /^$filename$/, @modified_files) {
for (getcache($filename)) {
return $_ if $_
}
}
my $dom =
XML::LibXML->load_html(location => $filename);
my %data = ('file', $filename);
# We don't need style or table of contents heading (if exists)
for my $xpath ('/html/head/style', '/html/body/div/div[@id="table-of-contents"]/h2') {
$_->parentNode->removeChild($_) for $dom->findnodes($xpath)
}
# Info we need
# Date
for ($dom->findnodes('/html/body/div[@id="postamble"]/p[@class="date"]')) {
my $text = $_->to_literal;
if ($text =~ /^Date: (\d+)-(\d+)-(\d+)/) {
my $date = PDate->new($1, $2, $3);
$data{date} = $date;
my $p = $_->parentNode;
$p->parentNode->removeChild($p);
}
}
# Title
for ($dom->findnodes('/html/head/title')) {
$data{utitle} = $_->to_literal;
}
# keywords/catogry
for my $node ($dom->findnodes('/html/head/meta')) {
(my $cont = $node->getAttribute('content')) || next;
for ($node->getAttribute('name')) {
if (/keywords/) {
my ($key, $rest) = parse_keywords($cont);
@data{qw[catag title]} = @{$key};
$catags{$data{catag}}++;
$rest =~ s/\s+\|\s+//;
while ($rest) {
(my $t,$rest) = parse_keywords($rest);
push @{$data{related}}, $t;
}
} else {
next;
}
$node->parentNode->removeChild($node);
}
}
$data{dom} = $dom;
\%data;
}
sub parse_keywords ($str) {
if ($str =~ s/\s*(?:"(.+?)"|(\w+))\s*->(?:\s*(?:"(.+?)"|(\w+)))//) {
([$1 // $2, $3 // $4], $str);
} else {
die "Keywords not arranged properly\n"
}
}
# I wonder if sean combs has made a similar class
{
package PDate;
sub new {
my $class = shift;
my $self = { year => 0 + shift,
month => 0 + shift,
day => 0 + shift,
};
bless $self, $class;
return $self;
}
# $d1 is greater than $d2
sub cmp {
my ($d1, $d2) = @_;
for ($d1->{year} <=> $d2->{year},
$d1->{month} <=> $d2->{month},
$d1->{day} <=> $d2->{day}) {
return $_ unless $_ == 0
}
0
}
use overload '<=>' => \&cmp;
sub fmt {
my $self = shift;
my @months =
qw(January Febuary March April May June July August September November October December);
my $n = $self->{day};
if ($n == 1) { $n = '1st' }
elsif (($n - 2) % 10 == 0) { $n = "${n}nd" }
elsif (($n - 3) % 10 == 0) { $n = "${n}rd" }
else { $n = "${n}th" }
$months[$self->{month} - 1] . " $n, " . $self->{year}
}
sub short_fmt {
my $self = shift;
join "-", ($self->{year}, $self->{month}, $self->{day});
}
sub text_easy {
my $self = shift;
join "-", ($self->{year}, $self->{month}, $self->{day});
}
}