genorg.pl
changeset 0 1f2f88ea2e78
child 1 0b7be2e78d3b
equal deleted inserted replaced
-1:000000000000 0:1f2f88ea2e78
       
     1 #!/usr/bin/perl
       
     2 
       
     3 use v5.36.0;
       
     4 use strict;
       
     5 use warnings;
       
     6 
       
     7 use XML::LibXML;
       
     8 use File::Find;
       
     9 use File::Copy qw(copy move);
       
    10 use Cwd;
       
    11 use experimental qw(declared_refs);
       
    12 use Data::Dumper;
       
    13 
       
    14 # Fix up CLI interface
       
    15 # Add option ro set cache to 0 through command line
       
    16 # Print required elisp code it is in $req_config
       
    17 # Add config option for $max_rss and $max_cat
       
    18 # Sort cataogirs by entires
       
    19 # Variable for map_css
       
    20 # Domain thing as well
       
    21 # Dicmuemnt how init.el is required
       
    22 my $cache = 1;
       
    23 
       
    24 my $config_file = "genorg-conf";
       
    25 my $dir = "blog";
       
    26 my $outdir = "out/";
       
    27 my $emacs = "emacs";
       
    28 my $css = "style.css";
       
    29 my $map_css ="other.css";
       
    30 my $no_name_dir = "c";
       
    31 my $domain = "p.bauherren.ovh";
       
    32 my $max_rss = 30;
       
    33 # The first page has one less article.  Think of it as a feature.
       
    34 my $max_cat = 1;
       
    35 
       
    36 my $username = $ENV{LOGNAME} || $ENV{USER} || getpwuid ($<);
       
    37 my $template = <<"END";
       
    38 #+title: Test document
       
    39 #+subtitle: This is the subtitle
       
    40 #+author: $username
       
    41 #+keywords: tag thing | related
       
    42 #+options: html-link-use-abs-url:nil html-postamble:auto
       
    43 #+options: html-preamble:t html-scripts:nil html-style:t
       
    44 #+options: html5-fancy:nil tex:t
       
    45 #+options: tex:mathjax
       
    46 #+html_doctype: html5
       
    47 #+html_container: div
       
    48 #+html_content_class: content
       
    49 #+html_link_home:
       
    50 #+html_link_up:
       
    51 #+html_mathjax:
       
    52 #+html_equation_reference_format: \eqref{%s}
       
    53 #+html_head:
       
    54 #+html_head_extra:
       
    55 #+infojs_opt:
       
    56 #+creator: <a href="https://www.gnu.org/software/emacs/">Emacs</a> 31.0.50 (<a href="https://orgmode.org">Org</a> mode 9.7.11)
       
    57 #+latex_header:
       
    58 END
       
    59 
       
    60 my $req_config = <<"END";
       
    61 (with-eval-after-load 'org
       
    62     (org-link-set-parameters "genorg"
       
    63 			 :follow nil
       
    64 			 :export
       
    65 			 #'(lambda (link desc _ _)
       
    66 			     (format "<genorg desc=\"%s\" link=\"%s\"/>" desc link))
       
    67 			 :store nil))
       
    68 END
       
    69 
       
    70 my $elisp_code = <<"END" =~ s/\n\s+//gr;
       
    71 (let ((ls '(LIST)))
       
    72   (require 'org)
       
    73   (mapc
       
    74    (lambda (b)
       
    75      (with-current-buffer b
       
    76        (when (and (not (string-match "^ " (buffer-name b)))
       
    77 		  (eq major-mode 'org-mode))
       
    78 	 (org-export-to-file 'html (pop ls)))))
       
    79    (buffer-list))
       
    80   (kill-emacs))
       
    81 END
       
    82 
       
    83 
       
    84 my ($h_intro, $h_c_intro, $h_cat_li, $h_chap, $h_en_cat, $h_en, $h_end)
       
    85   = split /SPLIT/ ,<<"END" =~ s/\n\s+//gr;
       
    86 <!DOCTYPE html>
       
    87 <html lang="en">
       
    88   <head>
       
    89     <meta charset="UTF-8">
       
    90     <meta name="viewport" content="width=device-width, initial-scale=1.0">
       
    91     <title>TITLE</title>
       
    92     <link rel="stylesheet" href="style.css">
       
    93   </head>
       
    94 SPLIT
       
    95   <body>
       
    96     <h1> Pranshu's Blog </h1>
       
    97     <ul id="cataogries">
       
    98 SPLIT 
       
    99       <li> <a href="LINK">  CATAG (NUMBER)</a>
       
   100 SPLIT
       
   101     </ul>
       
   102     <div>
       
   103       <h2> All articles </h2> <hr>
       
   104 SPLIT
       
   105    <a href="LINK">
       
   106      <strong> TITLE </strong> <span>DATE</span>
       
   107      <p>DESCREPTION</p>
       
   108      <i>CATAG</i>
       
   109      <hr>
       
   110   </a>
       
   111 SPLIT
       
   112    <a href="LINK">
       
   113      <strong> TITLE </strong> <span>DATE</span>
       
   114      <p>DESCREPTION</p>
       
   115      <hr>
       
   116    </a>
       
   117 SPLIT
       
   118     </div>
       
   119   </body>
       
   120 </html>
       
   121 END
       
   122 
       
   123 my $help = <<"END";
       
   124 genorg [option] [dir|file]
       
   125 Option can be:
       
   126  -t FILE : Make a template org file at FILE
       
   127  -h      : Help
       
   128  -c FILE : sepcify a new config file instead of genorg-conf
       
   129  -r      : unconditionally remake org files
       
   130 
       
   131 If no option is set, accept a directory to generate.
       
   132 
       
   133 $config_file follow the syntax of:
       
   134 VAR VALUE
       
   135 where VAR can be:
       
   136  emacs-path : The value should be path of emacs. 'emacs' by default.
       
   137  css        : The path of the css file to use.
       
   138  in_dir     : The directory that contains the files.
       
   139  out_dir    : the output directory.
       
   140  no_name_dir: The directory for generate files.  This means in_dir/no_name_dir
       
   141               cannot exist.
       
   142 END
       
   143 
       
   144 
       
   145 my $rss_entry = <<"END" =~ s/\n\s+//gr;
       
   146 <?xml version=\"1.0\" encoding=\"UTF-8\" ?>
       
   147 <rss version=\"2.0\">
       
   148  <channel>
       
   149   <title>W3Schools Home Page</title>
       
   150   <link>https://www.w3schools.com</link>
       
   151   <description>Free web building tutorials</description>"
       
   152 END
       
   153   
       
   154 my %config_vars =
       
   155   (
       
   156    'emacs-path' => \$emacs,
       
   157    css => \$css,
       
   158    indir => \$dir,
       
   159    outdir => \$outdir,
       
   160    no_name_dir => \$no_name_dir
       
   161   );
       
   162 
       
   163 sub read_config ($path) {
       
   164   open my $fh, '<', $path or die "Couldn't open config file\n";
       
   165   while (<$fh>) {
       
   166     next if /^\s+$/;
       
   167     chomp;
       
   168     my ($directive, $rest) = split /\s+/, $_, 2;
       
   169     if (exists($config_vars{$directive})) {
       
   170       ${$config_vars{$directive}} = $rest;
       
   171     } else {
       
   172       print "Unknown variable $directive\n";
       
   173     }
       
   174   }
       
   175   close $fh;
       
   176 }
       
   177 
       
   178 sub help_and_bye {
       
   179   print $help;
       
   180   exit;
       
   181 }
       
   182 
       
   183 sub template ($file) {
       
   184   $file // die "-t needs an argument.\n";
       
   185   $file =~ s/\.org$//;
       
   186   open my $fh, '>', $file . ".org" or die $file . " couldn't be opened\n";
       
   187   print $fh $template;
       
   188   close $fh;
       
   189 }
       
   190 
       
   191 # Reletive file from the perspect of file1, to file2
       
   192 sub prel_path ($from, $to) {
       
   193   return '.' if $from eq $to;
       
   194   my \(@f1, @f2) = map [m{/[^/]+}g], ($from, $to);
       
   195   # return substr($f1[$#f1], 1) if $from =~ $to;
       
   196   while (@f1 && @f2 && $f1[0] =~ $f2[0]) {
       
   197     shift @f1;
       
   198     shift @f2;
       
   199   }
       
   200   my $p = @f1-1;
       
   201   local $" = "";
       
   202   "../" x ($p > 0 ? $p : 0) . substr("@f2", 1);
       
   203 }
       
   204 
       
   205 my @org_exps;
       
   206 my @files_to_make;
       
   207 
       
   208 sub wanted {
       
   209   my $n_path = $outdir . prel_path ($dir,  "$File::Find::name/");
       
   210   # We need to mirror the direcotry structure
       
   211   if (-d) {
       
   212     push @files_to_make, $n_path || die "Couldn't make directory $n_path\n"
       
   213       unless -d $n_path;
       
   214     return;
       
   215   }
       
   216   if (/\.org$/) {
       
   217     push @org_exps, substr $n_path, length($outdir);
       
   218   } else {
       
   219     copy $_, $n_path;
       
   220   }
       
   221 }
       
   222 
       
   223 
       
   224 # Export @org_exps
       
   225 sub conv_files {
       
   226   my %changed_files;
       
   227   # Remove files that are already converted
       
   228   for my $rel_file (@org_exps) {
       
   229     # Emacs need the full file path
       
   230     my ($file, $to) = map { getcwd . "/$_" . $rel_file} ($dir, $outdir);
       
   231     s/org$/html/ for ($to, $rel_file);
       
   232     unless ($cache && -f $to && (stat($to))[9] > (stat($file))[9]) {
       
   233       $changed_files{$file} = $to;
       
   234     }
       
   235   }
       
   236   %changed_files // exit;
       
   237   $elisp_code =~ s/LIST/join '', map "\"$_\" ", values %changed_files /e;
       
   238   open my $fh, '-|', ($emacs, '--batch', '~/.emacs.d/init.el', keys %changed_files, "--eval" , "$elisp_code");
       
   239   close $fh;
       
   240   \%changed_files
       
   241 }
       
   242 
       
   243 my $cdir;
       
   244 
       
   245 $ARGV[0] // die "No arguments provided\n";
       
   246 while ($_ = shift @ARGV) {
       
   247   /^(-h|--?help)$/ && help_and_bye;
       
   248   /^-t$/ && do { template shift @ARGV ; exit };
       
   249   /^-c$/ && do { $config_file = shift @ARGV; next };
       
   250   /^-d$/ && do { $cache = 0; next };
       
   251   $cdir = $_;
       
   252 }
       
   253 $cdir || die "directory not provided\n";
       
   254 main();
       
   255 
       
   256 my %catags;
       
   257 my @arts;
       
   258 
       
   259 sub main {
       
   260   -d $cdir or help_and_bye;
       
   261   mkdir $outdir;
       
   262   chdir $cdir or die "Couldn't access $cdir\n";
       
   263   read_config $config_file;
       
   264   -d $dir or die "$dir doesn't exist \n";
       
   265   find (\&wanted, $dir);
       
   266   mkdir for @files_to_make;
       
   267   values %{conv_files()};
       
   268   chdir $outdir;
       
   269   @arts = sort { $b->{date} <=> $a->{date}} map { html_fixup($_) } @org_exps;
       
   270   finalise_html();
       
   271 }
       
   272 
       
   273 sub entry {
       
   274   my ($data, $catagory) = @_;
       
   275   my $str = defined $catagory
       
   276     ? ($h_en_cat =~ s/CATAG/$data->{catag}/er) : $h_en;
       
   277   my $prel_path = "/$no_name_dir/" . (defined $catagory ? "c" : "c/b");
       
   278   $str =~ s/DATE/$data->{date}->fmt/er
       
   279     =~ s/TITLE/$data->{title}/er
       
   280     =~ s/DESCREPTION/$data->{desc}/er
       
   281     =~ s|LINK|prel_path($prel_path, '/'. $data->{file})|er
       
   282   }
       
   283 
       
   284 sub rss_en ($data) {
       
   285   my $thing = <<"END" =~ s/\n\s+//gr;
       
   286   <item>
       
   287     <title>TITLE</title>
       
   288     <link>LINK</link>
       
   289     <description>DESC</description>
       
   290   </item>
       
   291 END
       
   292   $thing =~ s|LINK|"$domain/" . $data->{file}|er
       
   293     =~ s/TITLE/$data->{title}/er
       
   294     =~ s/DESC/$data->{desc}/er;
       
   295 }
       
   296 
       
   297 sub make_h_list {
       
   298   my $rstr = '<ul>';
       
   299   for (@_) {
       
   300     my ($str, $page_no) = @{$_};
       
   301     $rstr .= "<li> <a href=\"$page_no.html\"> $str </a>";
       
   302   }
       
   303   $rstr . "</ul>"
       
   304 }
       
   305 
       
   306 sub min ($x, $y) {
       
   307   ($x, $y)[$x > $y]
       
   308 }
       
   309 
       
   310 sub panigation ($total_arts, $page_no) {
       
   311   if ($total_arts < 9) {
       
   312     make_h_list ((['&laquo', $page_no - 1]) x!! ($page_no - 1),
       
   313 		 (map [$_ == $page_no ? "<span>$_</span>" : $_,  $_ ],
       
   314 		  (1..($total_arts + 1))),
       
   315 		 ['&raquo', $page_no])
       
   316   }
       
   317 }
       
   318 
       
   319 sub move_on($article, $cfh, $next_file, $total_arts) {
       
   320   # say "Article is ".  $article;
       
   321   my $page = 1 + int $article / $max_cat;
       
   322   print $cfh panigation int($total_arts / $max_cat), $page;
       
   323   print $cfh $h_end;
       
   324   close $cfh;
       
   325   open $cfh, '>', "$next_file$page.html";
       
   326   say "$next_file$page.html";
       
   327   print $cfh $h_intro;
       
   328   $cfh
       
   329 }
       
   330 
       
   331 sub finalise_html {
       
   332   mkdir $no_name_dir;
       
   333   open my $fh, '>', "$no_name_dir/1.html";
       
   334   open my $mrss, '>', "$no_name_dir/rss.xml"; # The master rss file
       
   335   print $mrss $rss_entry;
       
   336   print $fh $h_intro . $h_c_intro;
       
   337   my (%c_files, %r_files);	# Cataogry files and rss
       
   338   while (my ($cat, $num) = each %catags) {
       
   339     # ($h_intro, $h_c_intro, $h_cat_li, $h_chap, $h_en_cat, $h_en, $h_end)
       
   340     print $fh $h_cat_li =~ s/NUMBER/$num/re
       
   341       =~ s/CATAG/$cat/er;
       
   342     mkdir "$no_name_dir/$cat";
       
   343     open $c_files{$cat}, '>', "$no_name_dir/$cat/1.html";
       
   344     open $r_files{$cat}, '>', "$no_name_dir/$cat/rss.xml";
       
   345     print {$r_files{$cat}} $rss_entry;
       
   346     print {$c_files{$cat}} $h_intro;
       
   347   }
       
   348   print $fh $h_chap;
       
   349   # :-)
       
   350   my $total_articles = () =  map {(1)x$_} values %catags;
       
   351   # Now we iterate through the articles and add their index
       
   352   my ($n_fh, $n_mrss, %n_rss) = (0,0);
       
   353   my %n_cat;
       
   354   for my $art (@arts) {
       
   355     my ($catag, $title) = @{$art}{qw(catag title)};
       
   356     my $rentry = rss_en $art;
       
   357     if (($n_rss{$catag} // 0) < $max_rss) {
       
   358       print {$r_files{$catag}} $rentry;
       
   359       ++$n_rss{$catag};
       
   360     }
       
   361     if ($n_mrss < $max_rss) {
       
   362       print $mrss $rentry;
       
   363       ++$n_mrss
       
   364     }
       
   365     # my $ncat = \$n_cat{$catag};
       
   366     $n_fh++;
       
   367     if ($n_fh % $max_cat == 0) {
       
   368       say "$n_fh % $max_cat = " . $n_fh % $max_cat ;
       
   369       say "\$n_fh is $n_fh";
       
   370       $fh = move_on($n_fh, $fh, "$no_name_dir/", $total_articles);
       
   371     }
       
   372     print $fh entry $art, 1;
       
   373     print {$c_files{$catag}} entry $art;
       
   374     # say "$catag -> $title($n_cat{$catag})";
       
   375     # say ($n_cat{$catag} // 1);
       
   376     # ++$$ncat
       
   377   }
       
   378   # ($total_arts, $page_no)
       
   379   print $fh panigation ((int $total_articles / $max_cat) x 2) if $total_articles >= $max_cat;
       
   380   print $fh $h_end;
       
   381   close $fh;
       
   382   for (values %c_files) {
       
   383     print $_ $h_end;
       
   384     close;
       
   385   }
       
   386   print $mrss '</channel> </rss>';
       
   387   for (values %r_files) {
       
   388     print $_ '</channel> </rss>';;
       
   389     close;
       
   390   }
       
   391 }
       
   392 
       
   393 sub html_fixup ($filename) {
       
   394   my $dom =
       
   395     XML::LibXML->load_html(location  => $filename, recover   => 1);
       
   396   my %data = ('file', $filename);
       
   397 
       
   398   # We don't need style or table of contents heading (if exists)
       
   399   for my $xpath ('/html/head/style', '/html/body/div/div[@id="table-of-contents"]/h2') {
       
   400     $_->parentNode->removeChild($_) for $dom->findnodes($xpath) 
       
   401   }
       
   402   
       
   403   # Info we need
       
   404 
       
   405   # Date
       
   406   for ($dom->findnodes('/html/body/div[@id="postamble"]/p[@class="date"]')) {
       
   407     my $text = $_->to_literal;
       
   408     if ($text =~ /^Date: (\d+)-(\d+)-(\d+)/) {
       
   409       my $date = PDate->new($1, $2, $3);
       
   410       $data{date} = $date;
       
   411       my $p = $_->parentNode;
       
   412       $p->parentNode->removeChild($p);
       
   413     }
       
   414   }
       
   415   # Title\
       
   416   for ($dom->findnodes('/html/head/title')) {
       
   417     $data{title}  = $_->to_literal;
       
   418   }
       
   419   # keywords/catogry
       
   420   for my $node ($dom->findnodes('/html/head/meta')) {
       
   421     (my $cont = $node->getAttribute('content')) || next;
       
   422     for ($node->getAttribute('name')) {
       
   423       if (/description/) {
       
   424 	$data{desc} = $cont;
       
   425       } elsif (/keywords/) {
       
   426 	my ($key, $rest) = parse_keywords($cont);
       
   427 	@data{qw[catag title]} = @{$key};
       
   428 	$catags{$data{catag}}++;
       
   429 	$rest =~ s/\s+\|\s+//;
       
   430 	while ($rest) {
       
   431 	  (my $t,$rest) = parse_keywords($rest);
       
   432 	  push @{$data{related}}, $t;
       
   433 	}
       
   434       } else {
       
   435 	next;
       
   436       }
       
   437       $node->parentNode->removeChild($node);
       
   438     }
       
   439   }
       
   440   # $data{dom} = $dom;
       
   441   \%data;
       
   442   
       
   443   # print $newfh $dom->toString;
       
   444   # close $newfh;
       
   445 }
       
   446 
       
   447 sub parse_keywords ($str) {
       
   448   if ($str =~ s/\s*(?:"(.+?)"|(\w+))\s*->(?:\s*(?:"(.+?)"|(\w+)))//) {
       
   449     ([$1 // $2, $3 // $4], $str);
       
   450   } else {
       
   451     die "Keywords not arranged properly\n"
       
   452   }
       
   453 }
       
   454 
       
   455 # I wonder if sean combs has made a similar class
       
   456 {
       
   457   package PDate;
       
   458   
       
   459   sub new {
       
   460     my $class = shift;
       
   461     my $self = { year => 0 + shift,
       
   462 		 month => 0 + shift,
       
   463 		 day => 0 + shift,
       
   464 	       };
       
   465     bless $self, $class;
       
   466     return $self;
       
   467   }
       
   468 
       
   469   # $d1 is greater than $d2
       
   470   sub cmp  {
       
   471     my ($d1, $d2) = @_;
       
   472     for ($d1->{year} <=> $d2->{year},
       
   473 	 $d1->{month} <=> $d2->{month},
       
   474 	 $d1->{day} <=> $d2->{day}) {
       
   475       return $_ unless $_ == 0
       
   476     }
       
   477     0
       
   478   }
       
   479   use overload '<=>' => \&cmp;
       
   480  
       
   481   sub fmt {
       
   482     my $self = shift;
       
   483     my @months =
       
   484       qw(January Febuary March April May June July August September November October December);
       
   485     my $n = $self->{day};
       
   486     if ($n == 1) { $n = '1st' }
       
   487     elsif (($n - 2) % 10 == 0) { $n = "${n}nd" }
       
   488     elsif (($n - 3) % 10 == 0) { $n = "${n}rd" }
       
   489     else { $n = "${n}st" }
       
   490     $months[$self->{month} - 1] . " $n, " . $self->{year}
       
   491   }
       
   492     
       
   493   sub short_fmt {
       
   494     my $self = shift;
       
   495     join "-", ($self->{year}, $self->{month}, $self->{day});
       
   496   }
       
   497 
       
   498 }
       
   499