| 12345678910111213141516171819202122232425262728293031323334353637383940 |
- const fs = require('fs');
- const path = require('path');
- const IN_FILE = 'knowledgebase.html';
- const OUT_DIR = './entries';
- // read file
- const html = fs.readFileSync(IN_FILE, 'utf8');
- // MAGIC
- const entryRegex = /<tr class="cb_pointer">([\s\S]*?<span name="(\d+)"[\s\S]*?<\/td>)[\s\S]*?<\/tr>/g;
- const entries = {};
- let match;
- while ((match = entryRegex.exec(html)) !== null) {
- const id = match[2];
- const block = match[1].trim();
- entries[id] = block;
- }
- if (!fs.existsSync(OUT_DIR)) fs.mkdirSync(OUT_DIR);
- function decodeHtmlEntities(str) {
- return str
- // .replace(/</g, "<")
- // .replace(/>/g, ">")
- // .replace(/"/g, "\"")
- // .replace(/&/g, "&")
- // .replace(/<(?!br\s*\/?)[^>]+>/gi, '');
- }
- for (const [id, content] of Object.entries(entries)) {
- const filePath = path.join(OUT_DIR, `${id}.txt`);
- // fs.writeFileSync(filePath, content, 'utf8');
- // console.log(`📄 Gespeichert: ${filePath}`);
- }
- console.log(decodeHtmlEntities(entries[25]).trim());
- console.log(`✅ ${Object.keys(entries).length} Einträge gespeichert.`);
|