getEntries.js 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. const fs = require('fs');
  2. const path = require('path');
  3. const IN_FILE = 'knowledgebase.html';
  4. const OUT_DIR = './entries';
  5. // read file
  6. const html = fs.readFileSync(IN_FILE, 'utf8');
  7. // MAGIC
  8. const entryRegex = /<tr class="cb_pointer">([\s\S]*?<span name="(\d+)"[\s\S]*?<\/td>)[\s\S]*?<\/tr>/g;
  9. const entries = {};
  10. let match;
  11. while ((match = entryRegex.exec(html)) !== null) {
  12. const id = match[2];
  13. const block = match[1].trim();
  14. entries[id] = block;
  15. }
  16. if (!fs.existsSync(OUT_DIR)) fs.mkdirSync(OUT_DIR);
  17. function decodeHtmlEntities(str) {
  18. return str
  19. // .replace(/&lt;/g, "<")
  20. // .replace(/&gt;/g, ">")
  21. // .replace(/&quot;/g, "\"")
  22. // .replace(/&amp;/g, "&")
  23. // .replace(/<(?!br\s*\/?)[^>]+>/gi, '');
  24. }
  25. for (const [id, content] of Object.entries(entries)) {
  26. const filePath = path.join(OUT_DIR, `${id}.txt`);
  27. // fs.writeFileSync(filePath, content, 'utf8');
  28. // console.log(`📄 Gespeichert: ${filePath}`);
  29. }
  30. console.log(decodeHtmlEntities(entries[25]).trim());
  31. console.log(`✅ ${Object.keys(entries).length} Einträge gespeichert.`);