#!/usr/bin/perl
# Author: Pranshu Sharma <pranshu@bauherren.ovh>
use v5.36.0;
use strict;
use warnings;
use XML::LibXML;
use File::Find;
use File::Copy qw(copy move);
use Cwd;
use experimental qw(declared_refs);
use Data::Dumper;
use POSIX;
# Fix up CLI interface
# Add option ro set cache to 0 through command line
# Print required elisp code it is in $req_config
# Add config option for $max_rss and $max_cat
# Sort cataogirs by entires
# Variable for map_css
# Remove the desc
# Domain thing as well
# Load custom filpe for htmlize and all that
# 2 articles 1 day better sorting
my $cache = 1;
my $config_file = "genorg-conf";
my $dir = "blog";
my $outdir = "out/";
my $emacs = "emacs";
my $css = "style.css";
my $map_css ="other.css";
my $no_name_dir = "c";
my $cache_file = "genorg-cache/cache.xml";
my $domain = "p.bauherren.ovh";
my $max_rss = 30;
# The first page has one less article. Think of it as a feature.
my $max_cat = 7;
my $username = $ENV{LOGNAME} || $ENV{USER} || getpwuid ($<);
my $template = <<"END";
#+title: Test document
#+subtitle: This is the subtitle
#+author: $username
#+keywords: tag thing | related
#+options: html-link-use-abs-url:nil html-postamble:auto
#+options: html-preamble:t html-scripts:nil html-style:t
#+options: html5-fancy:nil tex:t
#+options: tex:mathjax
#+html_doctype: html5
#+html_container: div
#+html_content_class: content
#+html_link_home:
#+html_link_up:
#+html_mathjax:
#+html_equation_reference_format: \eqref{%s}
#+html_head:
#+html_head_extra:
#+infojs_opt:
#+creator: <a href="https://www.gnu.org/software/emacs/">Emacs</a> 31.0.50 (<a href="https://orgmode.org">Org</a> mode 9.7.11)
#+latex_header:
END
my $req_config = <<"END";
(with-eval-after-load 'org
(org-link-set-parameters "genorg"
:follow nil
:export
#'(lambda (link desc _ _)
(format "<genorg desc=\"%s\" link=\"%s\"/>" desc link))
:store nil))
END
my $custom_code = "(require 'ob-dot) (setq org-confirm-babel-evaluate nil)";
my $elisp_code = <<"END" =~ s/\n\s+//gr;
(let ((ls '(LIST)))
(require 'org)
$custom_code
(mapc
(lambda (b)
(with-current-buffer b
(when (and (not (string-match "^ " (buffer-name b)))
(eq major-mode 'org-mode))
(org-export-to-file 'html (pop ls)))))
(buffer-list))
(kill-emacs))
END
my ($h_intro, $h_c_intro, $h_cat_li, $h_chap, $h_en_cat, $h_en, $h_end)
= split /SPLIT/ ,<<"END" =~ s/\n\s+//gr;
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>TITLE</title>
<link rel="stylesheet" href="style.css">
</head>
SPLIT
<body>
<h1> Pranshu's Blog </h1>
<ul id="cataogries">
SPLIT
<li> <a href="LINK"> CATAG (NUMBER)</a>
SPLIT
</ul>
<div>
<h2> All articles </h2> <hr>
SPLIT
<a href="LINK">
<strong> TITLE </strong> <span>DATE</span>
<p>DESCREPTION</p>
<i>CATAG</i>
<hr>
</a>
SPLIT
<a href="LINK">
<strong> TITLE </strong> <span>DATE</span>
<p>DESCREPTION</p>
<hr>
</a>
SPLIT
</div>
</body>
</html>
END
my $help = <<"END";
genorg [option] [dir|file]
Option can be:
-t FILE : Make a template org file at FILE
-h : Help
-c FILE : sepcify a new config file instead of genorg-conf
-r : unconditionally remake org files
If no option is set, accept a directory to generate.
$config_file follow the syntax of:
VAR VALUE
where VAR can be:
emacs-path : The value should be path of emacs. 'emacs' by default.
css : The path of the css file to use.
in_dir : The directory that contains the files.
out_dir : the output directory.
no_name_dir: The directory for generate files. This means in_dir/no_name_dir
cannot exist.
END
my $rss_entry = <<"END" =~ s/\n\s+//gr;
<?xml version=\"1.0\" encoding=\"UTF-8\" ?>
<rss version=\"2.0\">
<channel>
<title>W3Schools Home Page</title>
<link>https://www.w3schools.com</link>
<description>Free web building tutorials</description>"
END
my %config_vars =
(
'emacs-path' => \$emacs,
css => \$css,
indir => \$dir,
outdir => \$outdir,
no_name_dir => \$no_name_dir
);
sub read_config ($path) {
open my $fh, '<', $path or die "Couldn't open config file\n";
while (<$fh>) {
next if /^\s+$/;
chomp;
my ($directive, $rest) = split /\s+/, $_, 2;
if (exists($config_vars{$directive})) {
${$config_vars{$directive}} = $rest;
} else {
print "Unknown variable $directive\n";
}
}
close $fh;
}
sub help_and_bye {
print $help;
exit;
}
sub template ($file) {
$file // die "-t needs an argument.\n";
$file =~ s/\.org$//;
open my $fh, '>', $file . ".org" or die $file . " couldn't be opened\n";
print $fh $template;
close $fh;
}
# Reletive file from the perspect of file1, to file2
sub prel_path ($from, $to) {
return '.' if $from eq $to;
my \(@f1, @f2) = map [m{/[^/]+}g], ($from, $to);
# return substr($f1[$#f1], 1) if $from =~ $to;
while (@f1 && @f2 && $f1[0] =~ $f2[0]) {
shift @f1;
shift @f2;
}
my $p = @f1-1;
local $" = "";
"../" x ($p > 0 ? $p : 0) . substr("@f2", 1);
}
my @org_exps;
my @files_to_make;
sub wanted {
my $n_path = $outdir . prel_path ($dir, "$File::Find::name/");
# We need to mirror the direcotry structure
if (-d) {
push @files_to_make, $n_path || die "Couldn't make directory $n_path\n"
unless -d $n_path;
return;
}
if (/\.org$/) {
push @org_exps, substr $n_path, length($outdir);
} else {
copy $_, $n_path;
}
}
my @modified_files;
# Export @org_exps
sub conv_files {
my %changed_files;
# Remove files that are already converted
for my $rel_file (@org_exps) {
# Emacs need the full file path
my ($file, $to) = map { getcwd . "/$_" . $rel_file} ($dir, $outdir);
s/org$/html/ for ($to, $rel_file);
unless ($cache && -f $to && (stat($to))[9] > (stat($file))[9]) {
$changed_files{$file} = $to;
push @modified_files, $rel_file;
}
}
%changed_files // exit;
$elisp_code =~ s/LIST/join '', map "\"$_\" ", values %changed_files /e;
open my $fh, '-|', ($emacs, '--batch', '~/.emacs.d/init.el', keys %changed_files, "--eval" , "$elisp_code");
close $fh;
\%changed_files
}
my $cdir;
$ARGV[0] // die "No arguments provided\n";
while ($_ = shift @ARGV) {
/^(-h|--?help)$/ && help_and_bye;
/^-t$/ && do { template shift @ARGV ; exit };
/^-c$/ && do { $config_file = shift @ARGV; next };
/^-d$/ && do { $cache = 0; next };
$cdir = $_;
}
$cdir || die "directory not provided\n";
main();
my %catags;
my @arts;
my $cache_dom;
sub main {
-d $cdir or help_and_bye;
mkdir $outdir;
chdir $cdir or die "Couldn't access $cdir\n";
read_config $config_file;
-d $dir or die "$dir doesn't exist \n";
find (\&wanted, $dir);
mkdir for @files_to_make;
values %{conv_files()};
$cache_dom = XML::LibXML->load_xml(location => $cache_file) if $cache && -f $cache_file;
chdir $outdir;
@arts = sort { $b->{date} <=> $a->{date}} map { html_fixup($_) } @org_exps;
finalise_html();
chdir '..';
populate_cache();
}
sub populate_cache {
my $dom = XML::LibXML::Document->new('1.0', 'UTF-8');
my $root = $dom->createElement('root');
my %c_els;
for my $art (@arts) {
# We need the: related, file, date
my ($art_el, $date, $title, $ca, $related) =
map {$dom->createElement($_)} ("art", "date", "title", "catag", "related");
$art_el->{path} = $art->{file};
$ca->appendText($art->{catag});
$title->appendText($art->{title});
$date->appendText($art->{date}->text_easy);
my $rel_str;
for my $thing ($art->{related}) {
for (@{$thing}) {
my ($catag, $artt) = @{$_};
my $rel_str .= "\"$catag\"" . ($artt && "->\"$artt\"");
$related->appendText("\"$catag\"" . ($artt && "->\"$artt\" "));
}
}
$art_el->appendChild($_) for ($date, $ca, $related, $title);
$root->appendChild($art_el);
}
$root->appendChild($_) for values %c_els;
$dom->setDocumentElement($root);
open my $fh, '>' , $cache_file;
print $fh ($dom->toString(1) =~ s/(.+)/$1\n<!-- Nicley formated :() -->/r);
close $fh;
}
sub entry {
my ($data, $catagory) = @_;
my $str = defined $catagory
? ($h_en_cat =~ s/CATAG/$data->{catag}/er) : $h_en;
my $prel_path = "/$no_name_dir/" . (defined $catagory ? "c" : "c/b");
$str =~ s/DATE/$data->{date}->fmt/er
=~ s/TITLE/$data->{title}/er
=~ s|LINK|prel_path($prel_path, '/'. $data->{file})|er
}
sub rss_en ($data) {
my $thing = <<"END" =~ s/\n\s+//gr;
<item>
<title>TITLE</title>
<link>LINK</link>
<description>DESC</description>
</item>
END
$thing =~ s|LINK|"$domain/" . $data->{file}|er
=~ s/TITLE/$data->{title}/er;
}
sub min ($x, $y) {
($x, $y)[$x > $y]
}
# @_ has to be in format of [STRING, LINK]
sub list2paginaiton {
my $page = shift;
'<ul>' . join ("", map {
my ($str, $file) = @{$_};
$str = "---$str" if $file && ($file == $page);
defined $file ?
"<li><a href=\"$file.html\">$str</a>" : $str
} ((["←", $page - 1]) x!! ($page - 1),
@_,
(["→", $page + 1]) x ($page != $_[$#_]->[1]))) . '</ul>'
}
sub move_on_if_neccasary ($fh, $art_num, $total_articles, $path) {
return $fh if $art_num == 0 || $art_num % $max_cat;
my $cpage = $art_num / $max_cat;
print $fh panigation($cpage, ceil($total_articles / $max_cat), $path);
open $fh, '>', $path . '/' . ($cpage + 1).'.html';
$fh;
}
sub panigation ($page, $no_pages, $dir) {
if ($no_pages < 9) {
list2paginaiton ($page, (map [$_, $_], (1..$no_pages)))
} elsif (4 < $page <= ($no_pages - 4)) {
list2paginaiton
($page,
[1, 1],
["<li>..."],
(map [$_, $_], (($page-2)..($page+2))),
["<li>..."],
[($no_pages) x 2])
} elsif ($page < 6) {
list2paginaiton $page,
((map [$_, $_], (1..5)),
["<li>..."],
[($no_pages) x 2])
} else {
list2paginaiton $page,
([1,1],
["<li>..."],
(map [$_, $_], (($no_pages - 5)..$no_pages)))
}
}
sub linkify ($bom) {
$bom
}
sub finalise_html {
mkdir $no_name_dir;
open my $fh, '>', "$no_name_dir/1.html";
open my $mrss, '>', "$no_name_dir/rss.xml"; # The master rss file
print $mrss $rss_entry;
print $fh $h_intro . $h_c_intro;
my (%c_files, %r_files); # Cataogry files and rss
while (my ($cat, $num) = each %catags) {
# ($h_intro, $h_c_intro, $h_cat_li, $h_chap, $h_en_cat, $h_en, $h_end)
print $fh $h_cat_li =~ s/NUMBER/$num/re
=~ s/CATAG/$cat/er
=~ s=LINK="$cat/1.html"=er;
mkdir "$no_name_dir/$cat";
open $c_files{$cat}, '>', "$no_name_dir/$cat/1.html";
open $r_files{$cat}, '>', "$no_name_dir/$cat/rss.xml";
print {$r_files{$cat}} $rss_entry;
print {$c_files{$cat}} $h_intro;
}
print $fh $h_chap;
# :-)
my $total_articles = () = map {(1)x$_} values %catags;
# Now we iterate through the articles and add their index
my ($n_fh, $n_mrss, %n_rss) = (0,0);
my %n_cat;
for my $art (@arts) {
my ($catag, $title) = @{$art}{qw(catag title)};
my $rentry = rss_en $art;
if (($n_rss{$catag} // 0) < $max_rss) {
print {$r_files{$catag}} $rentry;
++$n_rss{$catag};
}
if ($n_mrss < $max_rss) {
print $mrss $rentry;
++$n_mrss
}
# ($fh, $art_num, $total_articles, $path)
$fh = move_on_if_neccasary $fh, $n_fh, $total_articles, $no_name_dir;
$n_fh++;
print $fh entry $art, 1;
my $ncat = \$n_cat{$catag};
++$$ncat;
my $cfh = \$c_files{$catag};
$$cfh = move_on_if_neccasary $$cfh, $$ncat, $catags{$catag}, "$no_name_dir/$catag";
print {$$cfh} entry $art;
my $a_file = $art->{file};
# say $a_file;
if (grep /^$a_file$/, @modified_files) {
open my $h_file, '>', $a_file;
my $doom = linkify $art->{dom};
print $h_file $doom->toString;
close $h_file;
}
}
print $fh panigation ((ceil $n_fh / $max_cat) x 2, $no_name_dir) if $max_cat < $total_articles;
print $fh $h_end;
close $fh;
for (values %c_files) {
print $_ $h_end;
close;
}
print $mrss '</channel> </rss>';
for (values %r_files) {
print $_ '</channel> </rss>';;
close;
}
}
# I don't want to prepopulate, as it is not known which ones are
# usless and outdated, and it would be a waste of cycles parsing
# something to know it is not needed
sub getcache ($fn) {
$cache_dom || return 0;
for my $d ($cache_dom->findnodes("/root/art[\@path=\"$fn\"]")) {
# TODO related
my %dome = ('file' => $fn);
(@dome{'title', 'catag'}, my $date) =
map
{ $d->getChildrenByTagName($_)->[0]->to_literal }
qw[title catag date];
$catags{$dome{catag}}++;
$dome{date} = PDate->new(split /-/, $date);
return \%dome;
}
return 0;
}
sub html_fixup ($filename) {
# for (getcache($filename)) {
# $_ && return $_;
# }
unless ($cache && grep /^$filename$/, @modified_files) {
for (getcache($filename)) {
return $_ if $_
}
}
my $dom =
XML::LibXML->load_html(location => $filename, recover => 1);
my %data = ('file', $filename);
# We don't need style or table of contents heading (if exists)
for my $xpath ('/html/head/style', '/html/body/div/div[@id="table-of-contents"]/h2') {
$_->parentNode->removeChild($_) for $dom->findnodes($xpath)
}
# Info we need
# Date
for ($dom->findnodes('/html/body/div[@id="postamble"]/p[@class="date"]')) {
my $text = $_->to_literal;
if ($text =~ /^Date: (\d+)-(\d+)-(\d+)/) {
my $date = PDate->new($1, $2, $3);
$data{date} = $date;
my $p = $_->parentNode;
$p->parentNode->removeChild($p);
}
}
# Title\
for ($dom->findnodes('/html/head/title')) {
$data{title} = $_->to_literal;
}
# keywords/catogry
for my $node ($dom->findnodes('/html/head/meta')) {
(my $cont = $node->getAttribute('content')) || next;
for ($node->getAttribute('name')) {
if (/keywords/) {
my ($key, $rest) = parse_keywords($cont);
@data{qw[catag title]} = @{$key};
$catags{$data{catag}}++;
$rest =~ s/\s+\|\s+//;
while ($rest) {
(my $t,$rest) = parse_keywords($rest);
push @{$data{related}}, $t;
}
} else {
next;
}
$node->parentNode->removeChild($node);
}
}
$data{dom} = $dom;
\%data;
}
sub parse_keywords ($str) {
if ($str =~ s/\s*(?:"(.+?)"|(\w+))\s*->(?:\s*(?:"(.+?)"|(\w+)))//) {
([$1 // $2, $3 // $4], $str);
} else {
die "Keywords not arranged properly\n"
}
}
# I wonder if sean combs has made a similar class
{
package PDate;
sub new {
my $class = shift;
my $self = { year => 0 + shift,
month => 0 + shift,
day => 0 + shift,
};
bless $self, $class;
return $self;
}
# $d1 is greater than $d2
sub cmp {
my ($d1, $d2) = @_;
for ($d1->{year} <=> $d2->{year},
$d1->{month} <=> $d2->{month},
$d1->{day} <=> $d2->{day}) {
return $_ unless $_ == 0
}
0
}
use overload '<=>' => \&cmp;
sub fmt {
my $self = shift;
my @months =
qw(January Febuary March April May June July August September November October December);
my $n = $self->{day};
if ($n == 1) { $n = '1st' }
elsif (($n - 2) % 10 == 0) { $n = "${n}nd" }
elsif (($n - 3) % 10 == 0) { $n = "${n}rd" }
else { $n = "${n}th" }
$months[$self->{month} - 1] . " $n, " . $self->{year}
}
sub short_fmt {
my $self = shift;
join "-", ($self->{year}, $self->{month}, $self->{day});
}
sub text_easy {
my $self = shift;
join "-", ($self->{year}, $self->{month}, $self->{day});
}
}