1
1

4 Commits 81f6f70b6b ... 5ae35a631e

Autor SHA1 Nachricht Datum
  eliasCybob 5ae35a631e Merge branch 'fix/Readme-Clarification' vor 5 Monaten
  eliasCybob 1bebdbb4fb Updated Documentation vor 5 Monaten
  eliasCybob a11c66a893 Documented Code, Removed useless getEntries.js vor 5 Monaten
  eliasCybob 5a76f33018 Modified API vor 5 Monaten
4 geänderte Dateien mit 44 neuen und 45 gelöschten Zeilen
  1. 15 4
      README.md
  2. 21 0
      api/CsvReader.php
  3. 8 1
      api/chatbot.php
  4. 0 40
      scraper/getEntries.js

+ 15 - 4
README.md

@@ -1,5 +1,3 @@
-
-
 # Lokaler Deepseek Chatbot mit Ollama
 
 Dieses Projekt stellt einen lokalen Chatbot bereit, der das Deepseek LLM (7B Chat) Modell über Ollama nutzt. Die Kommunikation erfolgt über eine PHP-API.
@@ -7,15 +5,28 @@ Dieses Projekt stellt einen lokalen Chatbot bereit, der das Deepseek LLM (7B Cha
 ## Voraussetzungen
 
 - PHP (empfohlen: >= 7.4)
-  - [Homebrew](https://brew.sh/): `brew install php`
+    - [Homebrew](https://brew.sh/): `brew install php`
+    - [Poppler](https://formulae.brew.sh/formula/poppler): `brew install poppler`
+
 - [Ollama](https://ollama.com/) mit Deepseek-LLM-7B-Chat Modell
 - Optional: [LM Studio](https://lmstudio.ai/) für Modellverwaltung
 - Optional: Postman für API-Tests [Postman collection](https://danielgraf-5846927.postman.co/workspace/Daniel-Graf's-Workspace~471f4bdd-8623-4b95-8d73-aacd2f6a771a/collection/45973825-cc4e1b49-0480-47b1-ab78-292ec8dc7c98?action=share&source=copy-link&creator=45973825)
+
+>Dies wird später mit Docker überarbeitet
 ## Installation
 
 1. Stelle sicher, dass PHP installiert ist.
-2. Installiere Ollama und lade das Modell `deepseek-llm-7b-chat` herunter.
+     ```bash
+    php -v
+     ```
+2. Installiere Ollama und lade das Modell `deepseek-llm-7B-chat-GGUF` über LM Studio herunter.
+
 3. Klone dieses Repository und wechsle ins Projektverzeichnis.
+    ```bash
+    git clone https://git.cybob-one.com/Daniel/deepseek_chatbot
+    cd deepseek_chatbot
+    ```
+
 
 ## Starten des Servers
 

+ 21 - 0
api/CsvReader.php

@@ -1,11 +1,32 @@
 <?php
+
+
+/**
+ * CsvReader class to read CSV files from a specified directory.
+ * It reads all CSV files, combines their headers, and returns an array of rows.
+ */
 class CsvReader {
+
+    /**
+     * Directory where CSV files are stored.
+     * @var string
+     */
     private $dir;
 
+    /**
+     * Constructor to initialize the CsvReader with a directory.
+     *
+     * @param string $directory The directory containing CSV files.
+     */
     public function __construct($directory) {
         $this->dir = rtrim($directory, '/') . '/';
     }
 
+    /**
+     * Reads all CSV files in the specified directory and returns an array of rows.
+     *
+     * @return array An array of associative arrays representing the rows in the CSV files.
+     */
     public function readAll() {
         $rows = array();
 

+ 8 - 1
api/chatbot.php

@@ -11,6 +11,12 @@ if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
     exit;
 }
 
+
+/** Extract text from a PDF file using pdftotext command line tool.
+ *
+ * @param string $file Path to the PDF file.
+ * @return string Extracted text from the PDF.
+ */
 function extractPdfText($file) {
     $tmp = tempnam(sys_get_temp_dir(), 'pdftext');
     exec("pdftotext " . escapeshellarg($file) . " " . escapeshellarg($tmp));
@@ -19,7 +25,7 @@ function extractPdfText($file) {
     return $text ?: '';
 }
 
-// ### Build prompt for deepseek
+// Build prompt for deepseek
 $input = json_decode(file_get_contents('php://input'), true);
 $question = isset($input['question']) ? $input['question'] : '';
 $roleFile = __DIR__ . '/data/role.txt';
@@ -31,6 +37,7 @@ $contextText = "";
 foreach ($rows as $row) {
     $contextText .= "- " . implode(" | ", $row) . "\n";
 }
+
 $pdf1 = extractPdfText(__DIR__ . '/data/pdf/onboarding.pdf');
 $pdf2 = extractPdfText(__DIR__ . '/data/pdf/urlaub.pdf');
 

+ 0 - 40
scraper/getEntries.js

@@ -1,40 +0,0 @@
-const fs = require('fs');
-const path = require('path');
-
-const IN_FILE = 'knowledgebase.html';
-const OUT_DIR = './entries';
-
-// read file
-const html = fs.readFileSync(IN_FILE, 'utf8');
-
-// MAGIC
-const entryRegex = /<tr class="cb_pointer">([\s\S]*?<span name="(\d+)"[\s\S]*?<\/td>)[\s\S]*?<\/tr>/g;
-const entries = {};
-let match;
-
-while ((match = entryRegex.exec(html)) !== null) {
-    const id = match[2];
-    const block = match[1].trim();
-    entries[id] = block;
-}
-
-if (!fs.existsSync(OUT_DIR)) fs.mkdirSync(OUT_DIR);
-
-function decodeHtmlEntities(str) {
-    return str
-        // .replace(/&lt;/g, "<")
-        // .replace(/&gt;/g, ">")
-        // .replace(/&quot;/g, "\"")
-        // .replace(/&amp;/g, "&")
-        // .replace(/<(?!br\s*\/?)[^>]+>/gi, '');
-}
-
-for (const [id, content] of Object.entries(entries)) {
-    const filePath = path.join(OUT_DIR, `${id}.txt`);
-    // fs.writeFileSync(filePath, content, 'utf8');
-    // console.log(`📄 Gespeichert: ${filePath}`);
-}
-
-console.log(decodeHtmlEntities(entries[25]).trim());
-
-console.log(`✅ ${Object.keys(entries).length} Einträge gespeichert.`);