|
@@ -1,9 +1,17 @@
|
|
|
<?php
|
|
<?php
|
|
|
-require_once 'CsvReader.php';
|
|
|
|
|
-set_time_limit(2500); // 2 Minuten erlauben
|
|
|
|
|
|
|
+
|
|
|
|
|
+require_once 'CSVReader.php';
|
|
|
|
|
+require_once 'PDFReader.php';
|
|
|
|
|
+require_once 'PromptBuilder.php';
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+// 2 Minuten erlauben
|
|
|
|
|
+set_time_limit(2500);
|
|
|
|
|
+
|
|
|
|
|
|
|
|
header('Content-Type: application/json');
|
|
header('Content-Type: application/json');
|
|
|
|
|
|
|
|
|
|
+
|
|
|
// only accept POST requests
|
|
// only accept POST requests
|
|
|
if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
|
|
if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
|
|
|
http_response_code(405);
|
|
http_response_code(405);
|
|
@@ -12,75 +20,52 @@ if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
-/** Extract text from a PDF file using pdftotext command line tool.
|
|
|
|
|
- *
|
|
|
|
|
- * @param string $file Path to the PDF file.
|
|
|
|
|
- * @return string Extracted text from the PDF.
|
|
|
|
|
- */
|
|
|
|
|
-function extractPdfText($file) {
|
|
|
|
|
- $tmp = tempnam(sys_get_temp_dir(), 'pdftext');
|
|
|
|
|
- exec("pdftotext " . escapeshellarg($file) . " " . escapeshellarg($tmp));
|
|
|
|
|
- $text = file_get_contents($tmp);
|
|
|
|
|
- unlink($tmp);
|
|
|
|
|
- return $text ?: '';
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
// Build prompt for deepseek
|
|
// Build prompt for deepseek
|
|
|
$input = json_decode(file_get_contents('php://input'), true);
|
|
$input = json_decode(file_get_contents('php://input'), true);
|
|
|
$question = isset($input['question']) ? $input['question'] : '';
|
|
$question = isset($input['question']) ? $input['question'] : '';
|
|
|
$roleFile = __DIR__ . '/data/role.txt';
|
|
$roleFile = __DIR__ . '/data/role.txt';
|
|
|
$role = file_exists($roleFile) ? file_get_contents($roleFile) : '';
|
|
$role = file_exists($roleFile) ? file_get_contents($roleFile) : '';
|
|
|
|
|
|
|
|
-$reader = new CsvReader(__DIR__ . '/data/csv');
|
|
|
|
|
-$rows = $reader->readAll();
|
|
|
|
|
-$contextText = "";
|
|
|
|
|
-foreach ($rows as $row) {
|
|
|
|
|
- $contextText .= "- " . implode(" | ", $row) . "\n";
|
|
|
|
|
-}
|
|
|
|
|
|
|
|
|
|
-$pdf1 = extractPdfText(__DIR__ . '/data/pdf/onboarding.pdf');
|
|
|
|
|
-$pdf2 = extractPdfText(__DIR__ . '/data/pdf/urlaub.pdf');
|
|
|
|
|
|
|
+// Reading CSV data
|
|
|
|
|
+$csvReader = new CSVReader(__DIR__ . '/data/csv');
|
|
|
|
|
+$csvRows = $csvReader->readAll();
|
|
|
|
|
+$contextText = $csvReader->toString($csvRows);
|
|
|
|
|
+
|
|
|
|
|
|
|
|
-$pdfContext = "\n\nAus Onboarding-Dokument:\n" . $pdf1 . "\n\nAus Urlaubsdokument:\n" . $pdf2;
|
|
|
|
|
|
|
+// Reading PDF data
|
|
|
|
|
+$pdfReader = new PDFReader(__DIR__ . '/data/pdf');
|
|
|
|
|
+$pdfData = $pdfReader->readAll();
|
|
|
|
|
+$pdfContext = $pdfReader->toString($pdfData);
|
|
|
|
|
|
|
|
-$htmlDir = __DIR__ . '/data/html';
|
|
|
|
|
|
|
+
|
|
|
|
|
+// TODO: Read HTML data
|
|
|
$htmlContext = '';
|
|
$htmlContext = '';
|
|
|
|
|
|
|
|
-$prompt = "";
|
|
|
|
|
-$prompt = "Deine Rolle ist: ". $role;
|
|
|
|
|
-$prompt .= "\nDeine Frage ist: " . $question;
|
|
|
|
|
-$prompt .= "\nCSV-Daten:\n" . $contextText;
|
|
|
|
|
-$prompt .= "\nPDF-Kontext:\n" . $pdfContext;
|
|
|
|
|
-$prompt .= "\nHTML-Wissensdatenbank:\n" . $htmlContext;
|
|
|
|
|
-
|
|
|
|
|
-// ollama stuff
|
|
|
|
|
-//$payload = json_encode([
|
|
|
|
|
-// "model" => "deepseek-r1",
|
|
|
|
|
-//// "model" => "deepseek-llm",
|
|
|
|
|
-//// "model" => "deepseek-coder:6.7b",
|
|
|
|
|
-// "prompt" => $prompt,
|
|
|
|
|
-// "stream" => false,
|
|
|
|
|
-// "stop" => ["</think>"]
|
|
|
|
|
-//]);
|
|
|
|
|
-//$promptContext = "Kontextdaten:\n\nCSV:\n$contextText\n\nPDF:\n$pdfContext\nHTML:\n$htmlContext";
|
|
|
|
|
-$promptContext = "Kontextdaten:\n\nCSV:\n$contextText\n\nPDF:\n$pdfContext";
|
|
|
|
|
-error_log("System prompt length: " . strlen($promptContext));
|
|
|
|
|
-
|
|
|
|
|
-$payload = json_encode([
|
|
|
|
|
- "model" => "deepseek-llm-7b-chat",
|
|
|
|
|
- "messages" => [
|
|
|
|
|
-// ["role" => "system", "content" => "Rolle:\n$role\n\n$promptContext"],
|
|
|
|
|
- ["role" => "system", "content" => "Rolle:\n$role"],
|
|
|
|
|
- ["role" => "user", "content" => $question]
|
|
|
|
|
-// ["role" => "context", "content" => $promptContext]
|
|
|
|
|
- ],
|
|
|
|
|
- "temperature" => 0.5,
|
|
|
|
|
- "max_tokens" => -1,
|
|
|
|
|
- "stream" => false
|
|
|
|
|
-]);
|
|
|
|
|
|
|
+
|
|
|
|
|
+$prompt = new PromptBuilder();
|
|
|
|
|
+$prompt->setRole($role);
|
|
|
|
|
+$prompt->setQuestion($question);
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+$promptContext = "\nCSV-Kontext:\n" . $contextText;
|
|
|
|
|
+$promptContext .= "\nPDF-Kontext:\n" . $pdfContext;
|
|
|
|
|
+$promptContext .= "\nHTML-Wissensdatenbank:\n" . $htmlContext;
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+$prompt->setContext($contextText);
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+$fullPrompt = $prompt->buildPromptString();
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+error_log("System prompt length: " . strlen($fullPrompt));
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+$payload = $prompt->buildPromptData();
|
|
|
|
|
+
|
|
|
|
|
+//Starting Prompt Generation
|
|
|
$start = microtime(true);
|
|
$start = microtime(true);
|
|
|
-// ### Send Request
|
|
|
|
|
-//$ch = curl_init('http://localhost:11434/api/generate');
|
|
|
|
|
$ch = curl_init('http://localhost:1234/api/v0/chat/completions');
|
|
$ch = curl_init('http://localhost:1234/api/v0/chat/completions');
|
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
|
curl_setopt($ch, CURLOPT_POST, true);
|
|
curl_setopt($ch, CURLOPT_POST, true);
|
|
@@ -91,15 +76,11 @@ curl_close($ch);
|
|
|
$data = json_decode($response, true);
|
|
$data = json_decode($response, true);
|
|
|
$end = microtime(true);
|
|
$end = microtime(true);
|
|
|
|
|
|
|
|
-// ### Prepare Response
|
|
|
|
|
-//$responseText = isset($data['response']) ? $data['response'] : 'No answer from Deepseek!';
|
|
|
|
|
-//$responseText = $data['choices'][0]['message']['content'] ?? 'No answer from Deepseek!';
|
|
|
|
|
|
|
+
|
|
|
$responseText = isset($data['choices'][0]['message']['content']) ? $data['choices'][0]['message']['content'] : '';
|
|
$responseText = isset($data['choices'][0]['message']['content']) ? $data['choices'][0]['message']['content'] : '';
|
|
|
-//$responseText = preg_replace('/<think>.*?<\/think>/s', ' ', $responseText);
|
|
|
|
|
$responseText = trim($responseText);
|
|
$responseText = trim($responseText);
|
|
|
|
|
|
|
|
echo json_encode([
|
|
echo json_encode([
|
|
|
'reply' => $responseText,
|
|
'reply' => $responseText,
|
|
|
-// 'misc' => $htmlContext,
|
|
|
|
|
'duration_seconds' => round($end - $start, 3)
|
|
'duration_seconds' => round($end - $start, 3)
|
|
|
]);
|
|
]);
|