image vision (cooks pcs)
This commit is contained in:
parent
8816fe5b39
commit
b3c772cc20
4 changed files with 417 additions and 5 deletions
18
README.md
18
README.md
|
|
@ -7,6 +7,8 @@ your friendly ai assistant. frontend for ollama.
|
|||
- git clone repository
|
||||
- install ollama from `https://ollama.ai/download`
|
||||
- pull a model from ollama (i recommend gemma3n:e4b for laptops like mine (i7-10750h + rtx 3050ti laptop edition))
|
||||
- for image stuff, you'll need a model like llava:7b
|
||||
- for webcam features, install fswebcam (Linux) or imagesnap (macOS)
|
||||
- copy config.example.toml to config.toml and edit it to have the model you selected, optionally set your name in [user]
|
||||
- npm i
|
||||
- node index.js
|
||||
|
|
@ -31,10 +33,22 @@ lydia is written to be easily configurable through a toml file which is easier t
|
|||
- temperature = the temperature you want lydia to use. basically how random the model is. default is 0.8
|
||||
- max_tokens = the max context tokens you want lydia to use. basically how far she can remember. default is 8192
|
||||
|
||||
## Camera settings
|
||||
- width = webcam capture width. default is 1280
|
||||
- height = webcam capture height. default is 720
|
||||
- quality = webcam capture quality (0-100). default is 100
|
||||
- device = specific camera device (false for default, or "/dev/video0", etc.)
|
||||
|
||||
## Runtime configuration
|
||||
the prompt can be changed by running l!prompt <text> in the chatbox. this only applies for the current session, if you want a persistent change, you can edit the config file.
|
||||
|
||||
## Image & Webcam Commands
|
||||
- `l!image <path>` or `l!img <path>` - send an image file (or just `l!image` to browse)
|
||||
- `l!webcam` or `l!cam` - take and send a webcam snapshot
|
||||
- press `ESC` to open menu for image options
|
||||
|
||||
# Other stuff
|
||||
by hitting escape you can tab out of the chatbox, here you can do cool things like:
|
||||
by hitting escape you can open the menu, here you can do cool things like:
|
||||
- send pictures and take webcam snapshots
|
||||
- get help and see all commands
|
||||
- hit Q or CTRL+C to quit lydia (but why would you wanna do that anyway?)
|
||||
- yea thats it
|
||||
|
|
|
|||
|
|
@ -13,3 +13,9 @@ name = "user"
|
|||
[advanced]
|
||||
temperature = 0.8
|
||||
max_tokens = 8192
|
||||
|
||||
[camera]
|
||||
width = 1280
|
||||
height = 720
|
||||
quality = 100
|
||||
device = false # /dev/video0 on linux
|
||||
|
|
|
|||
394
lydia.js
394
lydia.js
|
|
@ -3,6 +3,9 @@ import blessed from "blessed";
|
|||
import { execSync } from "child_process";
|
||||
import toml from "toml";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import NodeWebcam from "node-webcam";
|
||||
import sharp from "sharp";
|
||||
|
||||
if (!fs.existsSync("./config.toml")) {
|
||||
if (fs.existsSync("./config.example.toml")) {
|
||||
|
|
@ -27,6 +30,12 @@ let username = config.user.name;
|
|||
|
||||
let facefont = config.appearance.facefont;
|
||||
|
||||
// camera settings
|
||||
let cameraWidth = config.camera?.width || 1280;
|
||||
let cameraHeight = config.camera?.height || 720;
|
||||
let cameraQuality = config.camera?.quality || 100;
|
||||
let cameraDevice = config.camera?.device || false;
|
||||
|
||||
let systemprompt = config.assistant.system_prompt
|
||||
.replace("${name}", assistantname)
|
||||
.replace("${username}", username);
|
||||
|
|
@ -81,15 +90,87 @@ const inputBox = blessed.textbox({
|
|||
mouse: true,
|
||||
placeholder: `go on, tell ${assistantname} something!`,
|
||||
});
|
||||
|
||||
const menuBox = blessed.list({
|
||||
top: "center",
|
||||
left: "center",
|
||||
width: 60,
|
||||
height: 16,
|
||||
border: {
|
||||
type: "line",
|
||||
},
|
||||
style: {
|
||||
border: {
|
||||
fg: "cyan",
|
||||
},
|
||||
bg: "black",
|
||||
selected: {
|
||||
bg: "cyan",
|
||||
fg: "black",
|
||||
},
|
||||
item: {
|
||||
fg: "white",
|
||||
},
|
||||
},
|
||||
keys: true,
|
||||
vi: true,
|
||||
mouse: true,
|
||||
hidden: true,
|
||||
label: ` ${assistantname} menu `,
|
||||
items: ["help & commands", "send picture", "take webcam snapshot", `exit ${assistantname}`],
|
||||
});
|
||||
|
||||
const popup = blessed.box({
|
||||
parent: screen,
|
||||
top: "center",
|
||||
left: "center",
|
||||
width: 40,
|
||||
height: 8,
|
||||
border: {
|
||||
type: "line",
|
||||
},
|
||||
style: {
|
||||
border: {
|
||||
fg: "cyan",
|
||||
},
|
||||
bg: "black",
|
||||
},
|
||||
tags: true,
|
||||
hidden: true,
|
||||
content: "",
|
||||
});
|
||||
|
||||
const popupButton = blessed.button({
|
||||
parent: popup,
|
||||
bottom: 1,
|
||||
left: "center",
|
||||
width: 8,
|
||||
height: 1,
|
||||
content: "OK", // < have no clue how to center this
|
||||
style: {
|
||||
bg: "cyan",
|
||||
fg: "black",
|
||||
focus: {
|
||||
bg: "white",
|
||||
fg: "black",
|
||||
},
|
||||
},
|
||||
mouse: true,
|
||||
keys: true,
|
||||
});
|
||||
|
||||
screen.append(faceBox);
|
||||
screen.append(chatBox);
|
||||
screen.append(inputBox);
|
||||
screen.append(menuBox);
|
||||
screen.append(popup);
|
||||
|
||||
inputBox.focus();
|
||||
|
||||
let chatHistory = [];
|
||||
let conversationHistory = [];
|
||||
let currentStreamMessage = "";
|
||||
let menuVisible = false;
|
||||
|
||||
function addMessage(role, content) {
|
||||
let message;
|
||||
|
|
@ -204,6 +285,219 @@ function clearChatHistory() {
|
|||
screen.render();
|
||||
}
|
||||
|
||||
// webcam configuration
|
||||
const webcamOptions = {
|
||||
width: cameraWidth,
|
||||
height: cameraHeight,
|
||||
quality: cameraQuality,
|
||||
delay: 0,
|
||||
saveShots: true,
|
||||
output: "jpeg",
|
||||
device: cameraDevice,
|
||||
callbackReturn: "location"
|
||||
};
|
||||
|
||||
const webcam = NodeWebcam.create(webcamOptions);
|
||||
|
||||
async function convertImageToBase64(imagePath) {
|
||||
try {
|
||||
// resize image to reasonable size for vision models
|
||||
const buffer = await sharp(imagePath)
|
||||
.resize(800, 600, { fit: 'inside', withoutEnlargement: true })
|
||||
.jpeg({ quality: 80 })
|
||||
.toBuffer();
|
||||
|
||||
return buffer.toString('base64');
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to process image: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function sendImageFile() {
|
||||
return new Promise((resolve) => {
|
||||
// Create a simple input box for file path
|
||||
const fileInput = blessed.textbox({
|
||||
parent: screen,
|
||||
top: 'center',
|
||||
left: 'center',
|
||||
width: '80%',
|
||||
height: 5,
|
||||
border: {
|
||||
type: 'line'
|
||||
},
|
||||
style: {
|
||||
border: {
|
||||
fg: 'cyan'
|
||||
},
|
||||
bg: 'black'
|
||||
},
|
||||
inputOnFocus: true,
|
||||
keys: true,
|
||||
mouse: true,
|
||||
label: ' Enter Image File Path (ESC to cancel) ',
|
||||
placeholder: 'Enter full path to image file...'
|
||||
});
|
||||
|
||||
screen.append(fileInput);
|
||||
fileInput.focus();
|
||||
screen.render();
|
||||
|
||||
fileInput.on('submit', async (filePath) => {
|
||||
fileInput.destroy();
|
||||
|
||||
if (!filePath || !filePath.trim()) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
|
||||
const trimmedPath = filePath.trim();
|
||||
|
||||
// Check if file exists
|
||||
if (!fs.existsSync(trimmedPath)) {
|
||||
showPopup('Error: File does not exist');
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
|
||||
const ext = path.extname(trimmedPath).toLowerCase();
|
||||
const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp'];
|
||||
|
||||
if (!imageExtensions.includes(ext)) {
|
||||
showPopup('Error: Please select a valid image file\n(.jpg, .png, .gif, .bmp, .webp)');
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const base64Image = await convertImageToBase64(trimmedPath);
|
||||
await sendMessageWithImage("Here's an image I'd like you to look at:", base64Image);
|
||||
addMessage(username, `[Sent image: ${path.basename(trimmedPath)}]`);
|
||||
} catch (error) {
|
||||
showPopup(`Error processing image: ${error.message}`);
|
||||
}
|
||||
|
||||
resolve();
|
||||
});
|
||||
|
||||
fileInput.on('cancel', () => {
|
||||
fileInput.destroy();
|
||||
resolve();
|
||||
});
|
||||
|
||||
screen.key(['escape'], () => {
|
||||
if (!fileInput.destroyed) {
|
||||
fileInput.destroy();
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function takeWebcamSnapshot() {
|
||||
return new Promise((resolve) => {
|
||||
showPopup('Taking webcam snapshot...\nPlease wait...');
|
||||
|
||||
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
||||
const filename = `webcam-${timestamp}.jpg`;
|
||||
const filepath = path.join(process.cwd(), filename);
|
||||
|
||||
webcam.capture(filename, async (err, data) => {
|
||||
popup.hide();
|
||||
|
||||
if (err) {
|
||||
let errorMsg = 'Webcam error occurred';
|
||||
if (err.message) {
|
||||
errorMsg = `Webcam error: ${err.message}`;
|
||||
}
|
||||
if (err.message && err.message.includes('No such file or directory')) {
|
||||
errorMsg += '\n\nTip: Make sure you have a webcam connected\nand try installing fswebcam or imagesnap';
|
||||
}
|
||||
showPopup(errorMsg);
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// check if file exists with a small delay
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
|
||||
if (!fs.existsSync(filepath)) {
|
||||
showPopup('Error: Snapshot file was not created\nCheck if webcam is connected and accessible');
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
|
||||
const base64Image = await convertImageToBase64(filepath);
|
||||
await sendMessageWithImage("I just took this webcam snapshot:", base64Image);
|
||||
addMessage(username, `[Took webcam snapshot: ${filename}]`);
|
||||
|
||||
// clean up the temporary file
|
||||
try {
|
||||
fs.unlinkSync(filepath);
|
||||
} catch (unlinkErr) {
|
||||
// ignore cleanup errors
|
||||
}
|
||||
} catch (error) {
|
||||
showPopup(`Error processing snapshot: ${error.message}`);
|
||||
// try to clean up file even if there was an error
|
||||
try {
|
||||
if (fs.existsSync(filepath)) {
|
||||
fs.unlinkSync(filepath);
|
||||
}
|
||||
} catch (unlinkErr) {
|
||||
// ignore cleanup errors
|
||||
}
|
||||
}
|
||||
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function showPopup(message) {
|
||||
popup.setContent(`{center}${message}{/center}`);
|
||||
popup.show();
|
||||
popupButton.focus();
|
||||
screen.render();
|
||||
}
|
||||
|
||||
function showMenu() {
|
||||
menuVisible = true;
|
||||
menuBox.show();
|
||||
menuBox.focus();
|
||||
screen.render();
|
||||
}
|
||||
|
||||
function hideMenu() {
|
||||
menuVisible = false;
|
||||
menuBox.hide();
|
||||
inputBox.focus();
|
||||
screen.render();
|
||||
}
|
||||
|
||||
async function handleMenuSelection() {
|
||||
const selectedIndex = menuBox.selected;
|
||||
hideMenu();
|
||||
|
||||
switch (selectedIndex) {
|
||||
case 0:
|
||||
addMessage(
|
||||
assistantname,
|
||||
"available commands:\nl!help - if you wanna know what i can do, run this!\nl!clear - clear chat history, if you want me to forget everything, just run this!\nl!face <text> - if you want to force my expression, here you go! not sure i'll be too happy about it though.\nl!prompt <text> - if you want to change how i act, here you go! not sure i'll be too happy about that either.\nl!image <path> or l!img <path> - send an image file (or just l!image to browse)\nl!webcam or l!cam - take and send a webcam snapshot\n\nMenu shortcuts:\nESC - open/close menu\nUp/Down arrows - navigate menu\nEnter - select option\nCtrl+C or q - quit application\n\nMenu options:\n- help & commands - show this help\n- send picture - browse and send an image file\n- take webcam snapshot - capture and send a webcam photo\n- exit - quit the application\n\nNote: Image features require a vision-capable model like llava or bakllava in Ollama!\n",
|
||||
);
|
||||
break;
|
||||
case 1:
|
||||
await sendImageFile();
|
||||
break;
|
||||
case 2:
|
||||
await takeWebcamSnapshot();
|
||||
break;
|
||||
case 3:
|
||||
process.exit(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
async function sendMessage(message) {
|
||||
if (!message.trim()) return;
|
||||
|
||||
|
|
@ -246,6 +540,47 @@ async function sendMessage(message) {
|
|||
}
|
||||
}
|
||||
|
||||
async function sendMessageWithImage(message, base64Image) {
|
||||
if (!message.trim()) return;
|
||||
|
||||
conversationHistory.push({
|
||||
role: "user",
|
||||
content: message,
|
||||
images: [base64Image]
|
||||
});
|
||||
|
||||
try {
|
||||
currentStreamMessage = "";
|
||||
|
||||
const response = await ollama.chat({
|
||||
model: assistantmodel,
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: systemprompt,
|
||||
},
|
||||
...conversationHistory,
|
||||
],
|
||||
stream: true,
|
||||
options: {
|
||||
num_predict: maxtokens,
|
||||
temperature: temperature,
|
||||
},
|
||||
});
|
||||
|
||||
for await (const part of response) {
|
||||
if (part.message && part.message.content) {
|
||||
currentStreamMessage += part.message.content;
|
||||
updateStreamMessage(currentStreamMessage);
|
||||
}
|
||||
}
|
||||
|
||||
finalizeStreamMessage();
|
||||
} catch (error) {
|
||||
addMessage(assistantname, `Failed to get response: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
inputBox.on("submit", async (text) => {
|
||||
if (text.trim()) {
|
||||
inputBox.clearValue();
|
||||
|
|
@ -261,7 +596,7 @@ inputBox.on("submit", async (text) => {
|
|||
case "help":
|
||||
addMessage(
|
||||
assistantname,
|
||||
"available commands:\nl!help - if you wanna know what i can do, run this!\nl!clear - clear chat history, if you want me to forget everything, just run this!\nl!face <text> - if you want to force my expression, here you go! not sure i'll be too happy about it though.\nl!prompt <text> - if you want to change how i act, here you go! not sure i'll be too happy about that either.\n",
|
||||
"available commands:\nl!help - if you wanna know what i can do, run this!\nl!clear - clear chat history, if you want me to forget everything, just run this!\nl!face <text> - if you want to force my expression, here you go! not sure i'll be too happy about it though.\nl!prompt <text> - if you want to change how i act, here you go! not sure i'll be too happy about that either.\nl!image <path> or l!img <path> - send an image file (or just l!image to browse)\nl!webcam or l!cam - take and send a webcam snapshot\n\nMenu shortcuts:\nESC - open/close menu\nUp/Down arrows - navigate menu\nEnter - select option\n\nMenu options:\n- help & commands - show this help\n- send picture - browse and send an image file\n- take webcam snapshot - capture and send a webcam photo\n- exit - quit the application\n",
|
||||
);
|
||||
break;
|
||||
case "clear":
|
||||
|
|
@ -283,6 +618,35 @@ inputBox.on("submit", async (text) => {
|
|||
addMessage("system", "SYSTEM: You didn't provide a prompt.");
|
||||
}
|
||||
break;
|
||||
case "image":
|
||||
case "img":
|
||||
if (args.trim()) {
|
||||
const imagePath = args.trim();
|
||||
if (!fs.existsSync(imagePath)) {
|
||||
addMessage("system", "SYSTEM: Image file not found.");
|
||||
break;
|
||||
}
|
||||
const ext = path.extname(imagePath).toLowerCase();
|
||||
const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp'];
|
||||
if (!imageExtensions.includes(ext)) {
|
||||
addMessage("system", "SYSTEM: Invalid image file format.");
|
||||
break;
|
||||
}
|
||||
try {
|
||||
const base64Image = await convertImageToBase64(imagePath);
|
||||
await sendMessageWithImage("Here's an image I'd like you to look at:", base64Image);
|
||||
addMessage(username, `[Sent image: ${path.basename(imagePath)}]`);
|
||||
} catch (error) {
|
||||
addMessage("system", `SYSTEM: Error processing image: ${error.message}`);
|
||||
}
|
||||
} else {
|
||||
await sendImageFile();
|
||||
}
|
||||
break;
|
||||
case "webcam":
|
||||
case "cam":
|
||||
await takeWebcamSnapshot();
|
||||
break;
|
||||
default:
|
||||
addMessage(assistantname, `unknown command: ${command}`);
|
||||
break;
|
||||
|
|
@ -299,7 +663,33 @@ screen.key(["q", "C-c"], () => {
|
|||
});
|
||||
|
||||
screen.key(["escape"], () => {
|
||||
// menu soon when i decide that i wanna do it
|
||||
if (menuVisible) {
|
||||
hideMenu();
|
||||
} else {
|
||||
showMenu();
|
||||
}
|
||||
});
|
||||
|
||||
menuBox.on("select", async (item, selected) => {
|
||||
if (menuVisible) {
|
||||
await handleMenuSelection();
|
||||
}
|
||||
});
|
||||
|
||||
popupButton.on("press", () => {
|
||||
popup.hide();
|
||||
inputBox.focus();
|
||||
screen.render();
|
||||
});
|
||||
|
||||
screen.key(["enter"], async () => {
|
||||
if (!popup.hidden) {
|
||||
popup.hide();
|
||||
inputBox.focus();
|
||||
screen.render();
|
||||
} else if (menuVisible) {
|
||||
await handleMenuSelection();
|
||||
}
|
||||
});
|
||||
|
||||
screen.on("resize", () => {
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@
|
|||
"blessed": "^0.1.81",
|
||||
"figlet": "^1.8.2",
|
||||
"ollama": "^0.5.16",
|
||||
"toml": "^3.0.0"
|
||||
"toml": "^3.0.0",
|
||||
"node-webcam": "^0.8.1",
|
||||
"sharp": "^0.33.1"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue