0
|
1 |
#!/usr/bin/perl
|
|
2 |
|
|
3 |
use v5.36.0;
|
|
4 |
use strict;
|
|
5 |
use warnings;
|
|
6 |
|
|
7 |
use XML::LibXML;
|
|
8 |
use File::Find;
|
|
9 |
use File::Copy qw(copy move);
|
|
10 |
use Cwd;
|
|
11 |
use experimental qw(declared_refs);
|
|
12 |
use Data::Dumper;
|
|
13 |
|
|
14 |
# Fix up CLI interface
|
|
15 |
# Add option ro set cache to 0 through command line
|
|
16 |
# Print required elisp code it is in $req_config
|
|
17 |
# Add config option for $max_rss and $max_cat
|
|
18 |
# Sort cataogirs by entires
|
|
19 |
# Variable for map_css
|
|
20 |
# Domain thing as well
|
|
21 |
# Dicmuemnt how init.el is required
|
|
22 |
my $cache = 1;
|
|
23 |
|
|
24 |
my $config_file = "genorg-conf";
|
|
25 |
my $dir = "blog";
|
|
26 |
my $outdir = "out/";
|
|
27 |
my $emacs = "emacs";
|
|
28 |
my $css = "style.css";
|
|
29 |
my $map_css ="other.css";
|
|
30 |
my $no_name_dir = "c";
|
|
31 |
my $domain = "p.bauherren.ovh";
|
|
32 |
my $max_rss = 30;
|
|
33 |
# The first page has one less article. Think of it as a feature.
|
|
34 |
my $max_cat = 1;
|
|
35 |
|
|
36 |
my $username = $ENV{LOGNAME} || $ENV{USER} || getpwuid ($<);
|
|
37 |
my $template = <<"END";
|
|
38 |
#+title: Test document
|
|
39 |
#+subtitle: This is the subtitle
|
|
40 |
#+author: $username
|
|
41 |
#+keywords: tag thing | related
|
|
42 |
#+options: html-link-use-abs-url:nil html-postamble:auto
|
|
43 |
#+options: html-preamble:t html-scripts:nil html-style:t
|
|
44 |
#+options: html5-fancy:nil tex:t
|
|
45 |
#+options: tex:mathjax
|
|
46 |
#+html_doctype: html5
|
|
47 |
#+html_container: div
|
|
48 |
#+html_content_class: content
|
|
49 |
#+html_link_home:
|
|
50 |
#+html_link_up:
|
|
51 |
#+html_mathjax:
|
|
52 |
#+html_equation_reference_format: \eqref{%s}
|
|
53 |
#+html_head:
|
|
54 |
#+html_head_extra:
|
|
55 |
#+infojs_opt:
|
|
56 |
#+creator: <a href="https://www.gnu.org/software/emacs/">Emacs</a> 31.0.50 (<a href="https://orgmode.org">Org</a> mode 9.7.11)
|
|
57 |
#+latex_header:
|
|
58 |
END
|
|
59 |
|
|
60 |
my $req_config = <<"END";
|
|
61 |
(with-eval-after-load 'org
|
|
62 |
(org-link-set-parameters "genorg"
|
|
63 |
:follow nil
|
|
64 |
:export
|
|
65 |
#'(lambda (link desc _ _)
|
|
66 |
(format "<genorg desc=\"%s\" link=\"%s\"/>" desc link))
|
|
67 |
:store nil))
|
|
68 |
END
|
|
69 |
|
|
70 |
my $elisp_code = <<"END" =~ s/\n\s+//gr;
|
|
71 |
(let ((ls '(LIST)))
|
|
72 |
(require 'org)
|
|
73 |
(mapc
|
|
74 |
(lambda (b)
|
|
75 |
(with-current-buffer b
|
|
76 |
(when (and (not (string-match "^ " (buffer-name b)))
|
|
77 |
(eq major-mode 'org-mode))
|
|
78 |
(org-export-to-file 'html (pop ls)))))
|
|
79 |
(buffer-list))
|
|
80 |
(kill-emacs))
|
|
81 |
END
|
|
82 |
|
|
83 |
|
|
84 |
my ($h_intro, $h_c_intro, $h_cat_li, $h_chap, $h_en_cat, $h_en, $h_end)
|
|
85 |
= split /SPLIT/ ,<<"END" =~ s/\n\s+//gr;
|
|
86 |
<!DOCTYPE html>
|
|
87 |
<html lang="en">
|
|
88 |
<head>
|
|
89 |
<meta charset="UTF-8">
|
|
90 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
91 |
<title>TITLE</title>
|
|
92 |
<link rel="stylesheet" href="style.css">
|
|
93 |
</head>
|
|
94 |
SPLIT
|
|
95 |
<body>
|
|
96 |
<h1> Pranshu's Blog </h1>
|
|
97 |
<ul id="cataogries">
|
|
98 |
SPLIT
|
|
99 |
<li> <a href="LINK"> CATAG (NUMBER)</a>
|
|
100 |
SPLIT
|
|
101 |
</ul>
|
|
102 |
<div>
|
|
103 |
<h2> All articles </h2> <hr>
|
|
104 |
SPLIT
|
|
105 |
<a href="LINK">
|
|
106 |
<strong> TITLE </strong> <span>DATE</span>
|
|
107 |
<p>DESCREPTION</p>
|
|
108 |
<i>CATAG</i>
|
|
109 |
<hr>
|
|
110 |
</a>
|
|
111 |
SPLIT
|
|
112 |
<a href="LINK">
|
|
113 |
<strong> TITLE </strong> <span>DATE</span>
|
|
114 |
<p>DESCREPTION</p>
|
|
115 |
<hr>
|
|
116 |
</a>
|
|
117 |
SPLIT
|
|
118 |
</div>
|
|
119 |
</body>
|
|
120 |
</html>
|
|
121 |
END
|
|
122 |
|
|
123 |
my $help = <<"END";
|
|
124 |
genorg [option] [dir|file]
|
|
125 |
Option can be:
|
|
126 |
-t FILE : Make a template org file at FILE
|
|
127 |
-h : Help
|
|
128 |
-c FILE : sepcify a new config file instead of genorg-conf
|
|
129 |
-r : unconditionally remake org files
|
|
130 |
|
|
131 |
If no option is set, accept a directory to generate.
|
|
132 |
|
|
133 |
$config_file follow the syntax of:
|
|
134 |
VAR VALUE
|
|
135 |
where VAR can be:
|
|
136 |
emacs-path : The value should be path of emacs. 'emacs' by default.
|
|
137 |
css : The path of the css file to use.
|
|
138 |
in_dir : The directory that contains the files.
|
|
139 |
out_dir : the output directory.
|
|
140 |
no_name_dir: The directory for generate files. This means in_dir/no_name_dir
|
|
141 |
cannot exist.
|
|
142 |
END
|
|
143 |
|
|
144 |
|
|
145 |
my $rss_entry = <<"END" =~ s/\n\s+//gr;
|
|
146 |
<?xml version=\"1.0\" encoding=\"UTF-8\" ?>
|
|
147 |
<rss version=\"2.0\">
|
|
148 |
<channel>
|
|
149 |
<title>W3Schools Home Page</title>
|
|
150 |
<link>https://www.w3schools.com</link>
|
|
151 |
<description>Free web building tutorials</description>"
|
|
152 |
END
|
|
153 |
|
|
154 |
my %config_vars =
|
|
155 |
(
|
|
156 |
'emacs-path' => \$emacs,
|
|
157 |
css => \$css,
|
|
158 |
indir => \$dir,
|
|
159 |
outdir => \$outdir,
|
|
160 |
no_name_dir => \$no_name_dir
|
|
161 |
);
|
|
162 |
|
|
163 |
sub read_config ($path) {
|
|
164 |
open my $fh, '<', $path or die "Couldn't open config file\n";
|
|
165 |
while (<$fh>) {
|
|
166 |
next if /^\s+$/;
|
|
167 |
chomp;
|
|
168 |
my ($directive, $rest) = split /\s+/, $_, 2;
|
|
169 |
if (exists($config_vars{$directive})) {
|
|
170 |
${$config_vars{$directive}} = $rest;
|
|
171 |
} else {
|
|
172 |
print "Unknown variable $directive\n";
|
|
173 |
}
|
|
174 |
}
|
|
175 |
close $fh;
|
|
176 |
}
|
|
177 |
|
|
178 |
sub help_and_bye {
|
|
179 |
print $help;
|
|
180 |
exit;
|
|
181 |
}
|
|
182 |
|
|
183 |
sub template ($file) {
|
|
184 |
$file // die "-t needs an argument.\n";
|
|
185 |
$file =~ s/\.org$//;
|
|
186 |
open my $fh, '>', $file . ".org" or die $file . " couldn't be opened\n";
|
|
187 |
print $fh $template;
|
|
188 |
close $fh;
|
|
189 |
}
|
|
190 |
|
|
191 |
# Reletive file from the perspect of file1, to file2
|
|
192 |
sub prel_path ($from, $to) {
|
|
193 |
return '.' if $from eq $to;
|
|
194 |
my \(@f1, @f2) = map [m{/[^/]+}g], ($from, $to);
|
|
195 |
# return substr($f1[$#f1], 1) if $from =~ $to;
|
|
196 |
while (@f1 && @f2 && $f1[0] =~ $f2[0]) {
|
|
197 |
shift @f1;
|
|
198 |
shift @f2;
|
|
199 |
}
|
|
200 |
my $p = @f1-1;
|
|
201 |
local $" = "";
|
|
202 |
"../" x ($p > 0 ? $p : 0) . substr("@f2", 1);
|
|
203 |
}
|
|
204 |
|
|
205 |
my @org_exps;
|
|
206 |
my @files_to_make;
|
|
207 |
|
|
208 |
sub wanted {
|
|
209 |
my $n_path = $outdir . prel_path ($dir, "$File::Find::name/");
|
|
210 |
# We need to mirror the direcotry structure
|
|
211 |
if (-d) {
|
|
212 |
push @files_to_make, $n_path || die "Couldn't make directory $n_path\n"
|
|
213 |
unless -d $n_path;
|
|
214 |
return;
|
|
215 |
}
|
|
216 |
if (/\.org$/) {
|
|
217 |
push @org_exps, substr $n_path, length($outdir);
|
|
218 |
} else {
|
|
219 |
copy $_, $n_path;
|
|
220 |
}
|
|
221 |
}
|
|
222 |
|
|
223 |
|
|
224 |
# Export @org_exps
|
|
225 |
sub conv_files {
|
|
226 |
my %changed_files;
|
|
227 |
# Remove files that are already converted
|
|
228 |
for my $rel_file (@org_exps) {
|
|
229 |
# Emacs need the full file path
|
|
230 |
my ($file, $to) = map { getcwd . "/$_" . $rel_file} ($dir, $outdir);
|
|
231 |
s/org$/html/ for ($to, $rel_file);
|
|
232 |
unless ($cache && -f $to && (stat($to))[9] > (stat($file))[9]) {
|
|
233 |
$changed_files{$file} = $to;
|
|
234 |
}
|
|
235 |
}
|
|
236 |
%changed_files // exit;
|
|
237 |
$elisp_code =~ s/LIST/join '', map "\"$_\" ", values %changed_files /e;
|
|
238 |
open my $fh, '-|', ($emacs, '--batch', '~/.emacs.d/init.el', keys %changed_files, "--eval" , "$elisp_code");
|
|
239 |
close $fh;
|
|
240 |
\%changed_files
|
|
241 |
}
|
|
242 |
|
|
243 |
my $cdir;
|
|
244 |
|
|
245 |
$ARGV[0] // die "No arguments provided\n";
|
|
246 |
while ($_ = shift @ARGV) {
|
|
247 |
/^(-h|--?help)$/ && help_and_bye;
|
|
248 |
/^-t$/ && do { template shift @ARGV ; exit };
|
|
249 |
/^-c$/ && do { $config_file = shift @ARGV; next };
|
|
250 |
/^-d$/ && do { $cache = 0; next };
|
|
251 |
$cdir = $_;
|
|
252 |
}
|
|
253 |
$cdir || die "directory not provided\n";
|
|
254 |
main();
|
|
255 |
|
|
256 |
my %catags;
|
|
257 |
my @arts;
|
|
258 |
|
|
259 |
sub main {
|
|
260 |
-d $cdir or help_and_bye;
|
|
261 |
mkdir $outdir;
|
|
262 |
chdir $cdir or die "Couldn't access $cdir\n";
|
|
263 |
read_config $config_file;
|
|
264 |
-d $dir or die "$dir doesn't exist \n";
|
|
265 |
find (\&wanted, $dir);
|
|
266 |
mkdir for @files_to_make;
|
|
267 |
values %{conv_files()};
|
|
268 |
chdir $outdir;
|
|
269 |
@arts = sort { $b->{date} <=> $a->{date}} map { html_fixup($_) } @org_exps;
|
|
270 |
finalise_html();
|
|
271 |
}
|
|
272 |
|
|
273 |
sub entry {
|
|
274 |
my ($data, $catagory) = @_;
|
|
275 |
my $str = defined $catagory
|
|
276 |
? ($h_en_cat =~ s/CATAG/$data->{catag}/er) : $h_en;
|
|
277 |
my $prel_path = "/$no_name_dir/" . (defined $catagory ? "c" : "c/b");
|
|
278 |
$str =~ s/DATE/$data->{date}->fmt/er
|
|
279 |
=~ s/TITLE/$data->{title}/er
|
|
280 |
=~ s/DESCREPTION/$data->{desc}/er
|
|
281 |
=~ s|LINK|prel_path($prel_path, '/'. $data->{file})|er
|
|
282 |
}
|
|
283 |
|
|
284 |
sub rss_en ($data) {
|
|
285 |
my $thing = <<"END" =~ s/\n\s+//gr;
|
|
286 |
<item>
|
|
287 |
<title>TITLE</title>
|
|
288 |
<link>LINK</link>
|
|
289 |
<description>DESC</description>
|
|
290 |
</item>
|
|
291 |
END
|
|
292 |
$thing =~ s|LINK|"$domain/" . $data->{file}|er
|
|
293 |
=~ s/TITLE/$data->{title}/er
|
|
294 |
=~ s/DESC/$data->{desc}/er;
|
|
295 |
}
|
|
296 |
|
|
297 |
sub make_h_list {
|
|
298 |
my $rstr = '<ul>';
|
|
299 |
for (@_) {
|
|
300 |
my ($str, $page_no) = @{$_};
|
|
301 |
$rstr .= "<li> <a href=\"$page_no.html\"> $str </a>";
|
|
302 |
}
|
|
303 |
$rstr . "</ul>"
|
|
304 |
}
|
|
305 |
|
|
306 |
sub min ($x, $y) {
|
|
307 |
($x, $y)[$x > $y]
|
|
308 |
}
|
|
309 |
|
|
310 |
sub panigation ($total_arts, $page_no) {
|
|
311 |
if ($total_arts < 9) {
|
|
312 |
make_h_list ((['«', $page_no - 1]) x!! ($page_no - 1),
|
|
313 |
(map [$_ == $page_no ? "<span>$_</span>" : $_, $_ ],
|
|
314 |
(1..($total_arts + 1))),
|
|
315 |
['»', $page_no])
|
|
316 |
}
|
|
317 |
}
|
|
318 |
|
|
319 |
sub move_on($article, $cfh, $next_file, $total_arts) {
|
|
320 |
# say "Article is ". $article;
|
|
321 |
my $page = 1 + int $article / $max_cat;
|
|
322 |
print $cfh panigation int($total_arts / $max_cat), $page;
|
|
323 |
print $cfh $h_end;
|
|
324 |
close $cfh;
|
|
325 |
open $cfh, '>', "$next_file$page.html";
|
|
326 |
say "$next_file$page.html";
|
|
327 |
print $cfh $h_intro;
|
|
328 |
$cfh
|
|
329 |
}
|
|
330 |
|
|
331 |
sub finalise_html {
|
|
332 |
mkdir $no_name_dir;
|
|
333 |
open my $fh, '>', "$no_name_dir/1.html";
|
|
334 |
open my $mrss, '>', "$no_name_dir/rss.xml"; # The master rss file
|
|
335 |
print $mrss $rss_entry;
|
|
336 |
print $fh $h_intro . $h_c_intro;
|
|
337 |
my (%c_files, %r_files); # Cataogry files and rss
|
|
338 |
while (my ($cat, $num) = each %catags) {
|
|
339 |
# ($h_intro, $h_c_intro, $h_cat_li, $h_chap, $h_en_cat, $h_en, $h_end)
|
|
340 |
print $fh $h_cat_li =~ s/NUMBER/$num/re
|
|
341 |
=~ s/CATAG/$cat/er;
|
|
342 |
mkdir "$no_name_dir/$cat";
|
|
343 |
open $c_files{$cat}, '>', "$no_name_dir/$cat/1.html";
|
|
344 |
open $r_files{$cat}, '>', "$no_name_dir/$cat/rss.xml";
|
|
345 |
print {$r_files{$cat}} $rss_entry;
|
|
346 |
print {$c_files{$cat}} $h_intro;
|
|
347 |
}
|
|
348 |
print $fh $h_chap;
|
|
349 |
# :-)
|
|
350 |
my $total_articles = () = map {(1)x$_} values %catags;
|
|
351 |
# Now we iterate through the articles and add their index
|
|
352 |
my ($n_fh, $n_mrss, %n_rss) = (0,0);
|
|
353 |
my %n_cat;
|
|
354 |
for my $art (@arts) {
|
|
355 |
my ($catag, $title) = @{$art}{qw(catag title)};
|
|
356 |
my $rentry = rss_en $art;
|
|
357 |
if (($n_rss{$catag} // 0) < $max_rss) {
|
|
358 |
print {$r_files{$catag}} $rentry;
|
|
359 |
++$n_rss{$catag};
|
|
360 |
}
|
|
361 |
if ($n_mrss < $max_rss) {
|
|
362 |
print $mrss $rentry;
|
|
363 |
++$n_mrss
|
|
364 |
}
|
|
365 |
# my $ncat = \$n_cat{$catag};
|
|
366 |
$n_fh++;
|
|
367 |
if ($n_fh % $max_cat == 0) {
|
|
368 |
say "$n_fh % $max_cat = " . $n_fh % $max_cat ;
|
|
369 |
say "\$n_fh is $n_fh";
|
|
370 |
$fh = move_on($n_fh, $fh, "$no_name_dir/", $total_articles);
|
|
371 |
}
|
|
372 |
print $fh entry $art, 1;
|
|
373 |
print {$c_files{$catag}} entry $art;
|
|
374 |
# say "$catag -> $title($n_cat{$catag})";
|
|
375 |
# say ($n_cat{$catag} // 1);
|
|
376 |
# ++$$ncat
|
|
377 |
}
|
|
378 |
# ($total_arts, $page_no)
|
|
379 |
print $fh panigation ((int $total_articles / $max_cat) x 2) if $total_articles >= $max_cat;
|
|
380 |
print $fh $h_end;
|
|
381 |
close $fh;
|
|
382 |
for (values %c_files) {
|
|
383 |
print $_ $h_end;
|
|
384 |
close;
|
|
385 |
}
|
|
386 |
print $mrss '</channel> </rss>';
|
|
387 |
for (values %r_files) {
|
|
388 |
print $_ '</channel> </rss>';;
|
|
389 |
close;
|
|
390 |
}
|
|
391 |
}
|
|
392 |
|
|
393 |
sub html_fixup ($filename) {
|
|
394 |
my $dom =
|
|
395 |
XML::LibXML->load_html(location => $filename, recover => 1);
|
|
396 |
my %data = ('file', $filename);
|
|
397 |
|
|
398 |
# We don't need style or table of contents heading (if exists)
|
|
399 |
for my $xpath ('/html/head/style', '/html/body/div/div[@id="table-of-contents"]/h2') {
|
|
400 |
$_->parentNode->removeChild($_) for $dom->findnodes($xpath)
|
|
401 |
}
|
|
402 |
|
|
403 |
# Info we need
|
|
404 |
|
|
405 |
# Date
|
|
406 |
for ($dom->findnodes('/html/body/div[@id="postamble"]/p[@class="date"]')) {
|
|
407 |
my $text = $_->to_literal;
|
|
408 |
if ($text =~ /^Date: (\d+)-(\d+)-(\d+)/) {
|
|
409 |
my $date = PDate->new($1, $2, $3);
|
|
410 |
$data{date} = $date;
|
|
411 |
my $p = $_->parentNode;
|
|
412 |
$p->parentNode->removeChild($p);
|
|
413 |
}
|
|
414 |
}
|
|
415 |
# Title\
|
|
416 |
for ($dom->findnodes('/html/head/title')) {
|
|
417 |
$data{title} = $_->to_literal;
|
|
418 |
}
|
|
419 |
# keywords/catogry
|
|
420 |
for my $node ($dom->findnodes('/html/head/meta')) {
|
|
421 |
(my $cont = $node->getAttribute('content')) || next;
|
|
422 |
for ($node->getAttribute('name')) {
|
|
423 |
if (/description/) {
|
|
424 |
$data{desc} = $cont;
|
|
425 |
} elsif (/keywords/) {
|
|
426 |
my ($key, $rest) = parse_keywords($cont);
|
|
427 |
@data{qw[catag title]} = @{$key};
|
|
428 |
$catags{$data{catag}}++;
|
|
429 |
$rest =~ s/\s+\|\s+//;
|
|
430 |
while ($rest) {
|
|
431 |
(my $t,$rest) = parse_keywords($rest);
|
|
432 |
push @{$data{related}}, $t;
|
|
433 |
}
|
|
434 |
} else {
|
|
435 |
next;
|
|
436 |
}
|
|
437 |
$node->parentNode->removeChild($node);
|
|
438 |
}
|
|
439 |
}
|
|
440 |
# $data{dom} = $dom;
|
|
441 |
\%data;
|
|
442 |
|
|
443 |
# print $newfh $dom->toString;
|
|
444 |
# close $newfh;
|
|
445 |
}
|
|
446 |
|
|
447 |
sub parse_keywords ($str) {
|
|
448 |
if ($str =~ s/\s*(?:"(.+?)"|(\w+))\s*->(?:\s*(?:"(.+?)"|(\w+)))//) {
|
|
449 |
([$1 // $2, $3 // $4], $str);
|
|
450 |
} else {
|
|
451 |
die "Keywords not arranged properly\n"
|
|
452 |
}
|
|
453 |
}
|
|
454 |
|
|
455 |
# I wonder if sean combs has made a similar class
|
|
456 |
{
|
|
457 |
package PDate;
|
|
458 |
|
|
459 |
sub new {
|
|
460 |
my $class = shift;
|
|
461 |
my $self = { year => 0 + shift,
|
|
462 |
month => 0 + shift,
|
|
463 |
day => 0 + shift,
|
|
464 |
};
|
|
465 |
bless $self, $class;
|
|
466 |
return $self;
|
|
467 |
}
|
|
468 |
|
|
469 |
# $d1 is greater than $d2
|
|
470 |
sub cmp {
|
|
471 |
my ($d1, $d2) = @_;
|
|
472 |
for ($d1->{year} <=> $d2->{year},
|
|
473 |
$d1->{month} <=> $d2->{month},
|
|
474 |
$d1->{day} <=> $d2->{day}) {
|
|
475 |
return $_ unless $_ == 0
|
|
476 |
}
|
|
477 |
0
|
|
478 |
}
|
|
479 |
use overload '<=>' => \&cmp;
|
|
480 |
|
|
481 |
sub fmt {
|
|
482 |
my $self = shift;
|
|
483 |
my @months =
|
|
484 |
qw(January Febuary March April May June July August September November October December);
|
|
485 |
my $n = $self->{day};
|
|
486 |
if ($n == 1) { $n = '1st' }
|
|
487 |
elsif (($n - 2) % 10 == 0) { $n = "${n}nd" }
|
|
488 |
elsif (($n - 3) % 10 == 0) { $n = "${n}rd" }
|
|
489 |
else { $n = "${n}st" }
|
|
490 |
$months[$self->{month} - 1] . " $n, " . $self->{year}
|
|
491 |
}
|
|
492 |
|
|
493 |
sub short_fmt {
|
|
494 |
my $self = shift;
|
|
495 |
join "-", ($self->{year}, $self->{month}, $self->{day});
|
|
496 |
}
|
|
497 |
|
|
498 |
}
|
|
499 |
|