|
1 #!/usr/bin/perl |
|
2 |
|
3 use v5.36.0; |
|
4 use strict; |
|
5 use warnings; |
|
6 |
|
7 use XML::LibXML; |
|
8 use File::Find; |
|
9 use File::Copy qw(copy move); |
|
10 use Cwd; |
|
11 use experimental qw(declared_refs); |
|
12 use Data::Dumper; |
|
13 |
|
14 # Fix up CLI interface |
|
15 # Add option ro set cache to 0 through command line |
|
16 # Print required elisp code it is in $req_config |
|
17 # Add config option for $max_rss and $max_cat |
|
18 # Sort cataogirs by entires |
|
19 # Variable for map_css |
|
20 # Domain thing as well |
|
21 # Dicmuemnt how init.el is required |
|
22 my $cache = 1; |
|
23 |
|
24 my $config_file = "genorg-conf"; |
|
25 my $dir = "blog"; |
|
26 my $outdir = "out/"; |
|
27 my $emacs = "emacs"; |
|
28 my $css = "style.css"; |
|
29 my $map_css ="other.css"; |
|
30 my $no_name_dir = "c"; |
|
31 my $domain = "p.bauherren.ovh"; |
|
32 my $max_rss = 30; |
|
33 # The first page has one less article. Think of it as a feature. |
|
34 my $max_cat = 1; |
|
35 |
|
36 my $username = $ENV{LOGNAME} || $ENV{USER} || getpwuid ($<); |
|
37 my $template = <<"END"; |
|
38 #+title: Test document |
|
39 #+subtitle: This is the subtitle |
|
40 #+author: $username |
|
41 #+keywords: tag thing | related |
|
42 #+options: html-link-use-abs-url:nil html-postamble:auto |
|
43 #+options: html-preamble:t html-scripts:nil html-style:t |
|
44 #+options: html5-fancy:nil tex:t |
|
45 #+options: tex:mathjax |
|
46 #+html_doctype: html5 |
|
47 #+html_container: div |
|
48 #+html_content_class: content |
|
49 #+html_link_home: |
|
50 #+html_link_up: |
|
51 #+html_mathjax: |
|
52 #+html_equation_reference_format: \eqref{%s} |
|
53 #+html_head: |
|
54 #+html_head_extra: |
|
55 #+infojs_opt: |
|
56 #+creator: <a href="https://www.gnu.org/software/emacs/">Emacs</a> 31.0.50 (<a href="https://orgmode.org">Org</a> mode 9.7.11) |
|
57 #+latex_header: |
|
58 END |
|
59 |
|
60 my $req_config = <<"END"; |
|
61 (with-eval-after-load 'org |
|
62 (org-link-set-parameters "genorg" |
|
63 :follow nil |
|
64 :export |
|
65 #'(lambda (link desc _ _) |
|
66 (format "<genorg desc=\"%s\" link=\"%s\"/>" desc link)) |
|
67 :store nil)) |
|
68 END |
|
69 |
|
70 my $elisp_code = <<"END" =~ s/\n\s+//gr; |
|
71 (let ((ls '(LIST))) |
|
72 (require 'org) |
|
73 (mapc |
|
74 (lambda (b) |
|
75 (with-current-buffer b |
|
76 (when (and (not (string-match "^ " (buffer-name b))) |
|
77 (eq major-mode 'org-mode)) |
|
78 (org-export-to-file 'html (pop ls))))) |
|
79 (buffer-list)) |
|
80 (kill-emacs)) |
|
81 END |
|
82 |
|
83 |
|
84 my ($h_intro, $h_c_intro, $h_cat_li, $h_chap, $h_en_cat, $h_en, $h_end) |
|
85 = split /SPLIT/ ,<<"END" =~ s/\n\s+//gr; |
|
86 <!DOCTYPE html> |
|
87 <html lang="en"> |
|
88 <head> |
|
89 <meta charset="UTF-8"> |
|
90 <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
91 <title>TITLE</title> |
|
92 <link rel="stylesheet" href="style.css"> |
|
93 </head> |
|
94 SPLIT |
|
95 <body> |
|
96 <h1> Pranshu's Blog </h1> |
|
97 <ul id="cataogries"> |
|
98 SPLIT |
|
99 <li> <a href="LINK"> CATAG (NUMBER)</a> |
|
100 SPLIT |
|
101 </ul> |
|
102 <div> |
|
103 <h2> All articles </h2> <hr> |
|
104 SPLIT |
|
105 <a href="LINK"> |
|
106 <strong> TITLE </strong> <span>DATE</span> |
|
107 <p>DESCREPTION</p> |
|
108 <i>CATAG</i> |
|
109 <hr> |
|
110 </a> |
|
111 SPLIT |
|
112 <a href="LINK"> |
|
113 <strong> TITLE </strong> <span>DATE</span> |
|
114 <p>DESCREPTION</p> |
|
115 <hr> |
|
116 </a> |
|
117 SPLIT |
|
118 </div> |
|
119 </body> |
|
120 </html> |
|
121 END |
|
122 |
|
123 my $help = <<"END"; |
|
124 genorg [option] [dir|file] |
|
125 Option can be: |
|
126 -t FILE : Make a template org file at FILE |
|
127 -h : Help |
|
128 -c FILE : sepcify a new config file instead of genorg-conf |
|
129 -r : unconditionally remake org files |
|
130 |
|
131 If no option is set, accept a directory to generate. |
|
132 |
|
133 $config_file follow the syntax of: |
|
134 VAR VALUE |
|
135 where VAR can be: |
|
136 emacs-path : The value should be path of emacs. 'emacs' by default. |
|
137 css : The path of the css file to use. |
|
138 in_dir : The directory that contains the files. |
|
139 out_dir : the output directory. |
|
140 no_name_dir: The directory for generate files. This means in_dir/no_name_dir |
|
141 cannot exist. |
|
142 END |
|
143 |
|
144 |
|
145 my $rss_entry = <<"END" =~ s/\n\s+//gr; |
|
146 <?xml version=\"1.0\" encoding=\"UTF-8\" ?> |
|
147 <rss version=\"2.0\"> |
|
148 <channel> |
|
149 <title>W3Schools Home Page</title> |
|
150 <link>https://www.w3schools.com</link> |
|
151 <description>Free web building tutorials</description>" |
|
152 END |
|
153 |
|
154 my %config_vars = |
|
155 ( |
|
156 'emacs-path' => \$emacs, |
|
157 css => \$css, |
|
158 indir => \$dir, |
|
159 outdir => \$outdir, |
|
160 no_name_dir => \$no_name_dir |
|
161 ); |
|
162 |
|
163 sub read_config ($path) { |
|
164 open my $fh, '<', $path or die "Couldn't open config file\n"; |
|
165 while (<$fh>) { |
|
166 next if /^\s+$/; |
|
167 chomp; |
|
168 my ($directive, $rest) = split /\s+/, $_, 2; |
|
169 if (exists($config_vars{$directive})) { |
|
170 ${$config_vars{$directive}} = $rest; |
|
171 } else { |
|
172 print "Unknown variable $directive\n"; |
|
173 } |
|
174 } |
|
175 close $fh; |
|
176 } |
|
177 |
|
178 sub help_and_bye { |
|
179 print $help; |
|
180 exit; |
|
181 } |
|
182 |
|
183 sub template ($file) { |
|
184 $file // die "-t needs an argument.\n"; |
|
185 $file =~ s/\.org$//; |
|
186 open my $fh, '>', $file . ".org" or die $file . " couldn't be opened\n"; |
|
187 print $fh $template; |
|
188 close $fh; |
|
189 } |
|
190 |
|
191 # Reletive file from the perspect of file1, to file2 |
|
192 sub prel_path ($from, $to) { |
|
193 return '.' if $from eq $to; |
|
194 my \(@f1, @f2) = map [m{/[^/]+}g], ($from, $to); |
|
195 # return substr($f1[$#f1], 1) if $from =~ $to; |
|
196 while (@f1 && @f2 && $f1[0] =~ $f2[0]) { |
|
197 shift @f1; |
|
198 shift @f2; |
|
199 } |
|
200 my $p = @f1-1; |
|
201 local $" = ""; |
|
202 "../" x ($p > 0 ? $p : 0) . substr("@f2", 1); |
|
203 } |
|
204 |
|
205 my @org_exps; |
|
206 my @files_to_make; |
|
207 |
|
208 sub wanted { |
|
209 my $n_path = $outdir . prel_path ($dir, "$File::Find::name/"); |
|
210 # We need to mirror the direcotry structure |
|
211 if (-d) { |
|
212 push @files_to_make, $n_path || die "Couldn't make directory $n_path\n" |
|
213 unless -d $n_path; |
|
214 return; |
|
215 } |
|
216 if (/\.org$/) { |
|
217 push @org_exps, substr $n_path, length($outdir); |
|
218 } else { |
|
219 copy $_, $n_path; |
|
220 } |
|
221 } |
|
222 |
|
223 |
|
224 # Export @org_exps |
|
225 sub conv_files { |
|
226 my %changed_files; |
|
227 # Remove files that are already converted |
|
228 for my $rel_file (@org_exps) { |
|
229 # Emacs need the full file path |
|
230 my ($file, $to) = map { getcwd . "/$_" . $rel_file} ($dir, $outdir); |
|
231 s/org$/html/ for ($to, $rel_file); |
|
232 unless ($cache && -f $to && (stat($to))[9] > (stat($file))[9]) { |
|
233 $changed_files{$file} = $to; |
|
234 } |
|
235 } |
|
236 %changed_files // exit; |
|
237 $elisp_code =~ s/LIST/join '', map "\"$_\" ", values %changed_files /e; |
|
238 open my $fh, '-|', ($emacs, '--batch', '~/.emacs.d/init.el', keys %changed_files, "--eval" , "$elisp_code"); |
|
239 close $fh; |
|
240 \%changed_files |
|
241 } |
|
242 |
|
243 my $cdir; |
|
244 |
|
245 $ARGV[0] // die "No arguments provided\n"; |
|
246 while ($_ = shift @ARGV) { |
|
247 /^(-h|--?help)$/ && help_and_bye; |
|
248 /^-t$/ && do { template shift @ARGV ; exit }; |
|
249 /^-c$/ && do { $config_file = shift @ARGV; next }; |
|
250 /^-d$/ && do { $cache = 0; next }; |
|
251 $cdir = $_; |
|
252 } |
|
253 $cdir || die "directory not provided\n"; |
|
254 main(); |
|
255 |
|
256 my %catags; |
|
257 my @arts; |
|
258 |
|
259 sub main { |
|
260 -d $cdir or help_and_bye; |
|
261 mkdir $outdir; |
|
262 chdir $cdir or die "Couldn't access $cdir\n"; |
|
263 read_config $config_file; |
|
264 -d $dir or die "$dir doesn't exist \n"; |
|
265 find (\&wanted, $dir); |
|
266 mkdir for @files_to_make; |
|
267 values %{conv_files()}; |
|
268 chdir $outdir; |
|
269 @arts = sort { $b->{date} <=> $a->{date}} map { html_fixup($_) } @org_exps; |
|
270 finalise_html(); |
|
271 } |
|
272 |
|
273 sub entry { |
|
274 my ($data, $catagory) = @_; |
|
275 my $str = defined $catagory |
|
276 ? ($h_en_cat =~ s/CATAG/$data->{catag}/er) : $h_en; |
|
277 my $prel_path = "/$no_name_dir/" . (defined $catagory ? "c" : "c/b"); |
|
278 $str =~ s/DATE/$data->{date}->fmt/er |
|
279 =~ s/TITLE/$data->{title}/er |
|
280 =~ s/DESCREPTION/$data->{desc}/er |
|
281 =~ s|LINK|prel_path($prel_path, '/'. $data->{file})|er |
|
282 } |
|
283 |
|
284 sub rss_en ($data) { |
|
285 my $thing = <<"END" =~ s/\n\s+//gr; |
|
286 <item> |
|
287 <title>TITLE</title> |
|
288 <link>LINK</link> |
|
289 <description>DESC</description> |
|
290 </item> |
|
291 END |
|
292 $thing =~ s|LINK|"$domain/" . $data->{file}|er |
|
293 =~ s/TITLE/$data->{title}/er |
|
294 =~ s/DESC/$data->{desc}/er; |
|
295 } |
|
296 |
|
297 sub make_h_list { |
|
298 my $rstr = '<ul>'; |
|
299 for (@_) { |
|
300 my ($str, $page_no) = @{$_}; |
|
301 $rstr .= "<li> <a href=\"$page_no.html\"> $str </a>"; |
|
302 } |
|
303 $rstr . "</ul>" |
|
304 } |
|
305 |
|
306 sub min ($x, $y) { |
|
307 ($x, $y)[$x > $y] |
|
308 } |
|
309 |
|
310 sub panigation ($total_arts, $page_no) { |
|
311 if ($total_arts < 9) { |
|
312 make_h_list ((['«', $page_no - 1]) x!! ($page_no - 1), |
|
313 (map [$_ == $page_no ? "<span>$_</span>" : $_, $_ ], |
|
314 (1..($total_arts + 1))), |
|
315 ['»', $page_no]) |
|
316 } |
|
317 } |
|
318 |
|
319 sub move_on($article, $cfh, $next_file, $total_arts) { |
|
320 # say "Article is ". $article; |
|
321 my $page = 1 + int $article / $max_cat; |
|
322 print $cfh panigation int($total_arts / $max_cat), $page; |
|
323 print $cfh $h_end; |
|
324 close $cfh; |
|
325 open $cfh, '>', "$next_file$page.html"; |
|
326 say "$next_file$page.html"; |
|
327 print $cfh $h_intro; |
|
328 $cfh |
|
329 } |
|
330 |
|
331 sub finalise_html { |
|
332 mkdir $no_name_dir; |
|
333 open my $fh, '>', "$no_name_dir/1.html"; |
|
334 open my $mrss, '>', "$no_name_dir/rss.xml"; # The master rss file |
|
335 print $mrss $rss_entry; |
|
336 print $fh $h_intro . $h_c_intro; |
|
337 my (%c_files, %r_files); # Cataogry files and rss |
|
338 while (my ($cat, $num) = each %catags) { |
|
339 # ($h_intro, $h_c_intro, $h_cat_li, $h_chap, $h_en_cat, $h_en, $h_end) |
|
340 print $fh $h_cat_li =~ s/NUMBER/$num/re |
|
341 =~ s/CATAG/$cat/er; |
|
342 mkdir "$no_name_dir/$cat"; |
|
343 open $c_files{$cat}, '>', "$no_name_dir/$cat/1.html"; |
|
344 open $r_files{$cat}, '>', "$no_name_dir/$cat/rss.xml"; |
|
345 print {$r_files{$cat}} $rss_entry; |
|
346 print {$c_files{$cat}} $h_intro; |
|
347 } |
|
348 print $fh $h_chap; |
|
349 # :-) |
|
350 my $total_articles = () = map {(1)x$_} values %catags; |
|
351 # Now we iterate through the articles and add their index |
|
352 my ($n_fh, $n_mrss, %n_rss) = (0,0); |
|
353 my %n_cat; |
|
354 for my $art (@arts) { |
|
355 my ($catag, $title) = @{$art}{qw(catag title)}; |
|
356 my $rentry = rss_en $art; |
|
357 if (($n_rss{$catag} // 0) < $max_rss) { |
|
358 print {$r_files{$catag}} $rentry; |
|
359 ++$n_rss{$catag}; |
|
360 } |
|
361 if ($n_mrss < $max_rss) { |
|
362 print $mrss $rentry; |
|
363 ++$n_mrss |
|
364 } |
|
365 # my $ncat = \$n_cat{$catag}; |
|
366 $n_fh++; |
|
367 if ($n_fh % $max_cat == 0) { |
|
368 say "$n_fh % $max_cat = " . $n_fh % $max_cat ; |
|
369 say "\$n_fh is $n_fh"; |
|
370 $fh = move_on($n_fh, $fh, "$no_name_dir/", $total_articles); |
|
371 } |
|
372 print $fh entry $art, 1; |
|
373 print {$c_files{$catag}} entry $art; |
|
374 # say "$catag -> $title($n_cat{$catag})"; |
|
375 # say ($n_cat{$catag} // 1); |
|
376 # ++$$ncat |
|
377 } |
|
378 # ($total_arts, $page_no) |
|
379 print $fh panigation ((int $total_articles / $max_cat) x 2) if $total_articles >= $max_cat; |
|
380 print $fh $h_end; |
|
381 close $fh; |
|
382 for (values %c_files) { |
|
383 print $_ $h_end; |
|
384 close; |
|
385 } |
|
386 print $mrss '</channel> </rss>'; |
|
387 for (values %r_files) { |
|
388 print $_ '</channel> </rss>';; |
|
389 close; |
|
390 } |
|
391 } |
|
392 |
|
393 sub html_fixup ($filename) { |
|
394 my $dom = |
|
395 XML::LibXML->load_html(location => $filename, recover => 1); |
|
396 my %data = ('file', $filename); |
|
397 |
|
398 # We don't need style or table of contents heading (if exists) |
|
399 for my $xpath ('/html/head/style', '/html/body/div/div[@id="table-of-contents"]/h2') { |
|
400 $_->parentNode->removeChild($_) for $dom->findnodes($xpath) |
|
401 } |
|
402 |
|
403 # Info we need |
|
404 |
|
405 # Date |
|
406 for ($dom->findnodes('/html/body/div[@id="postamble"]/p[@class="date"]')) { |
|
407 my $text = $_->to_literal; |
|
408 if ($text =~ /^Date: (\d+)-(\d+)-(\d+)/) { |
|
409 my $date = PDate->new($1, $2, $3); |
|
410 $data{date} = $date; |
|
411 my $p = $_->parentNode; |
|
412 $p->parentNode->removeChild($p); |
|
413 } |
|
414 } |
|
415 # Title\ |
|
416 for ($dom->findnodes('/html/head/title')) { |
|
417 $data{title} = $_->to_literal; |
|
418 } |
|
419 # keywords/catogry |
|
420 for my $node ($dom->findnodes('/html/head/meta')) { |
|
421 (my $cont = $node->getAttribute('content')) || next; |
|
422 for ($node->getAttribute('name')) { |
|
423 if (/description/) { |
|
424 $data{desc} = $cont; |
|
425 } elsif (/keywords/) { |
|
426 my ($key, $rest) = parse_keywords($cont); |
|
427 @data{qw[catag title]} = @{$key}; |
|
428 $catags{$data{catag}}++; |
|
429 $rest =~ s/\s+\|\s+//; |
|
430 while ($rest) { |
|
431 (my $t,$rest) = parse_keywords($rest); |
|
432 push @{$data{related}}, $t; |
|
433 } |
|
434 } else { |
|
435 next; |
|
436 } |
|
437 $node->parentNode->removeChild($node); |
|
438 } |
|
439 } |
|
440 # $data{dom} = $dom; |
|
441 \%data; |
|
442 |
|
443 # print $newfh $dom->toString; |
|
444 # close $newfh; |
|
445 } |
|
446 |
|
447 sub parse_keywords ($str) { |
|
448 if ($str =~ s/\s*(?:"(.+?)"|(\w+))\s*->(?:\s*(?:"(.+?)"|(\w+)))//) { |
|
449 ([$1 // $2, $3 // $4], $str); |
|
450 } else { |
|
451 die "Keywords not arranged properly\n" |
|
452 } |
|
453 } |
|
454 |
|
455 # I wonder if sean combs has made a similar class |
|
456 { |
|
457 package PDate; |
|
458 |
|
459 sub new { |
|
460 my $class = shift; |
|
461 my $self = { year => 0 + shift, |
|
462 month => 0 + shift, |
|
463 day => 0 + shift, |
|
464 }; |
|
465 bless $self, $class; |
|
466 return $self; |
|
467 } |
|
468 |
|
469 # $d1 is greater than $d2 |
|
470 sub cmp { |
|
471 my ($d1, $d2) = @_; |
|
472 for ($d1->{year} <=> $d2->{year}, |
|
473 $d1->{month} <=> $d2->{month}, |
|
474 $d1->{day} <=> $d2->{day}) { |
|
475 return $_ unless $_ == 0 |
|
476 } |
|
477 0 |
|
478 } |
|
479 use overload '<=>' => \&cmp; |
|
480 |
|
481 sub fmt { |
|
482 my $self = shift; |
|
483 my @months = |
|
484 qw(January Febuary March April May June July August September November October December); |
|
485 my $n = $self->{day}; |
|
486 if ($n == 1) { $n = '1st' } |
|
487 elsif (($n - 2) % 10 == 0) { $n = "${n}nd" } |
|
488 elsif (($n - 3) % 10 == 0) { $n = "${n}rd" } |
|
489 else { $n = "${n}st" } |
|
490 $months[$self->{month} - 1] . " $n, " . $self->{year} |
|
491 } |
|
492 |
|
493 sub short_fmt { |
|
494 my $self = shift; |
|
495 join "-", ($self->{year}, $self->{month}, $self->{day}); |
|
496 } |
|
497 |
|
498 } |
|
499 |