Add new processing rule and improve comments.

This commit is contained in:
liding 2024-08-06 11:00:42 +08:00
parent 9e33f42d34
commit 5d4d388fe8

View File

@ -9,6 +9,10 @@ const html = await Astro.slots.render("default");
// 3. If a line ends without [a-zA-Z] and its next line starts with [a-zA-Z], // 3. If a line ends without [a-zA-Z] and its next line starts with [a-zA-Z],
// add a heading space to the next line. // add a heading space to the next line.
// 4. If a line does not start in ASCII chars, join it to its previous line. // 4. If a line does not start in ASCII chars, join it to its previous line.
// 5. If a line starts with <strong> or <em>, add a heading space to this
// line.
// 6. If a line ends with </strong> or </em>, add a tailing space to this
// line.
// This is important in using one newline character to wrap Chinese. // This is important in using one newline character to wrap Chinese.
const arr = html.split("\n"); const arr = html.split("\n");
let articleHTMLFinal = arr[0]; let articleHTMLFinal = arr[0];
@ -16,6 +20,7 @@ let remainIntactArea = false;
// The first line of the HTML string is a table-of-content head recognized by // The first line of the HTML string is a table-of-content head recognized by
// remark-toc so it can be ignored. // remark-toc so it can be ignored.
for (let i = 1; i < arr.length; i++) { for (let i = 1; i < arr.length; i++) {
// rule 1
// Check if the current line belongs to some block area. // Check if the current line belongs to some block area.
if (arr[i].match(/^(<pre|<code|<blockquote|<table)/) !== null) { if (arr[i].match(/^(<pre|<code|<blockquote|<table)/) !== null) {
remainIntactArea = true; remainIntactArea = true;
@ -23,22 +28,34 @@ for (let i = 1; i < arr.length; i++) {
remainIntactArea = false; remainIntactArea = false;
} }
// -------- add space // -------- add space
// rule 2
// If the last character of the current line is [a-zA-Z], add a space to the // If the last character of the current line is [a-zA-Z], add a space to the
// end of the line. // end of the line.
if (arr[i].charAt(arr[i].length - 1).match(/[a-zA-Z]/) !== null) { if (arr[i].charAt(arr[i].length - 1).match(/[a-zA-Z]/) !== null) {
arr[i] += " "; arr[i] += " ";
} }
// rule 3
// The current is not the last line. // The current is not the last line.
// AND // AND
// The current line ends without [a-zA-Z]. // The current line ends without [a-zA-Z].
// AND // AND
// The next line starts with [a-zA-Z]. // The next line starts with [a-zA-Z].
if (i + 1 < arr.length if (i + 1 < arr.length
&& arr[i].charAt(arr[i].length - 1).match(/[a-zA-Z]/) === null && arr[i].charAt(arr[i].length - 1).match(/[a-zA-Z]/) === null
&& arr[i + 1].charAt(0).match(/[a-zA-Z]/) !== null) { && arr[i + 1].charAt(0).match(/[a-zA-Z]/) !== null
) {
arr[i] += " "; arr[i] += " ";
} }
// rule 5
if (arr[i].match(/^(<em>|<strong>)/) !== null){
arr[i] = " " + arr[i];
}
// rule 6
if (arr[i].match(/(<\/em>|<\/strong>)$/) !== null){
arr[i] = arr[i] + " ";
}
// -------- combine lines // -------- combine lines
// rule 4
if ( if (
// ( // (
// If the first character is not ascii character, // If the first character is not ascii character,