Add new processing rule and improve comments.

2024-07-31 20:46:24 +08:00
parent 033b124b90
commit 9e33f42d34
1 changed files with 28 additions and 11 deletions
@@ -3,25 +3,42 @@
 const html = await Astro.slots.render("default");
 // rule:
-//   1. If a line belongs to the area where the line break should be kept.
+//   1. If a line belongs to the area where the line break should be kept,
-//   2. If a line ends with [a-zA-Z] add a space to the end of the line.
+//      do not process this line.
-//   3. If a line does not start in ASCII chars, join it to the previous line.
+//   2. If a line ends with [a-zA-Z], add a space to the end of this line.
 //   3. If a line ends without [a-zA-Z] and its next line starts with [a-zA-Z],
 //      add a heading space to the next line.
 //   4. If a line does not start in ASCII chars, join it to its previous line.
 // This is important in using one newline character to wrap Chinese.
 const arr = html.split("\n");
 let articleHTMLFinal = arr[0];
 let remainIntactArea = false;
 // The first line of the HTML string is a table-of-content head recognized by
 // remark-toc so it can be ignored.
 for (let i = 1; i < arr.length; i++) {
  // The first line of the HTML string is a table-of-content head recognized by
  // remark-toc so it can be ignored.
  if (arr[i].charAt(arr[i].length - 1).match(/[a-zA-Z]/) !== null) {
    arr[i] += " ";
  }
  // Check if the current line belongs to some block area.
  if (arr[i].match(/^(<pre|<code|<blockquote|<table)/) !== null) {
    remainIntactArea = true;
  } else {
    remainIntactArea = false;
  }
  // -------- add space
  // If the last character of the current line is [a-zA-Z], add a space to the
  // end of the line.
  if (arr[i].charAt(arr[i].length - 1).match(/[a-zA-Z]/) !== null) {
    arr[i] += " ";
  }
  // The current is not the last line.
  // AND
  // The current line ends without [a-zA-Z].
  // AND
  // The next line starts with [a-zA-Z].
  if (i + 1 < arr.length
      && arr[i].charAt(arr[i].length - 1).match(/[a-zA-Z]/) === null
      && arr[i + 1].charAt(0).match(/[a-zA-Z]/) !== null) {
    arr[i] += " ";
  }
  // -------- combine lines
  if (
    // (
    //   If the first character is not ascii character,
@@ -29,9 +46,9 @@ for (let i = 1; i < arr.length; i++) {
    //   the final character of previous line is not ascii character.
    // ) AND
    // Current line should not belong to area that remains intact.
-    (arr[i].charAt(0).match(/[ -~]/) === null ||
+    (arr[i].charAt(0).match(/[ -~]/) === null
-      arr[i - 1].charAt(arr[i - 1].length - 1).match(/[ -~]/) === null) &&
+      || arr[i - 1].charAt(arr[i - 1].length - 1).match(/[ -~]/) === null)
-    !remainIntactArea
+    && !remainIntactArea
  ) {
    articleHTMLFinal += arr[i];
  } else {