image vision (cooks pcs)

This commit is contained in:
Eri Ishihara 2025-08-02 21:39:23 +02:00
parent 8816fe5b39
commit b3c772cc20
4 changed files with 417 additions and 5 deletions

View file

@ -7,6 +7,8 @@ your friendly ai assistant. frontend for ollama.
- git clone repository
- install ollama from `https://ollama.ai/download`
- pull a model from ollama (i recommend gemma3n:e4b for laptops like mine (i7-10750h + rtx 3050ti laptop edition))
- for image stuff, you'll need a model like llava:7b
- for webcam features, install fswebcam (Linux) or imagesnap (macOS)
- copy config.example.toml to config.toml and edit it to have the model you selected, optionally set your name in [user]
- npm i
- node index.js
@ -31,10 +33,22 @@ lydia is written to be easily configurable through a toml file which is easier t
- temperature = the temperature you want lydia to use. basically how random the model is. default is 0.8
- max_tokens = the max context tokens you want lydia to use. basically how far she can remember. default is 8192
## Camera settings
- width = webcam capture width. default is 1280
- height = webcam capture height. default is 720
- quality = webcam capture quality (0-100). default is 100
- device = specific camera device (false for default, or "/dev/video0", etc.)
## Runtime configuration
the prompt can be changed by running l!prompt <text> in the chatbox. this only applies for the current session, if you want a persistent change, you can edit the config file.
## Image & Webcam Commands
- `l!image <path>` or `l!img <path>` - send an image file (or just `l!image` to browse)
- `l!webcam` or `l!cam` - take and send a webcam snapshot
- press `ESC` to open menu for image options
# Other stuff
by hitting escape you can tab out of the chatbox, here you can do cool things like:
by hitting escape you can open the menu, here you can do cool things like:
- send pictures and take webcam snapshots
- get help and see all commands
- hit Q or CTRL+C to quit lydia (but why would you wanna do that anyway?)
- yea thats it

View file

@ -13,3 +13,9 @@ name = "user"
[advanced]
temperature = 0.8
max_tokens = 8192
[camera]
width = 1280
height = 720
quality = 100
device = false # /dev/video0 on linux

394
lydia.js
View file

@ -3,6 +3,9 @@ import blessed from "blessed";
import { execSync } from "child_process";
import toml from "toml";
import fs from "fs";
import path from "path";
import NodeWebcam from "node-webcam";
import sharp from "sharp";
if (!fs.existsSync("./config.toml")) {
if (fs.existsSync("./config.example.toml")) {
@ -27,6 +30,12 @@ let username = config.user.name;
let facefont = config.appearance.facefont;
// camera settings
let cameraWidth = config.camera?.width || 1280;
let cameraHeight = config.camera?.height || 720;
let cameraQuality = config.camera?.quality || 100;
let cameraDevice = config.camera?.device || false;
let systemprompt = config.assistant.system_prompt
.replace("${name}", assistantname)
.replace("${username}", username);
@ -81,15 +90,87 @@ const inputBox = blessed.textbox({
mouse: true,
placeholder: `go on, tell ${assistantname} something!`,
});
const menuBox = blessed.list({
top: "center",
left: "center",
width: 60,
height: 16,
border: {
type: "line",
},
style: {
border: {
fg: "cyan",
},
bg: "black",
selected: {
bg: "cyan",
fg: "black",
},
item: {
fg: "white",
},
},
keys: true,
vi: true,
mouse: true,
hidden: true,
label: ` ${assistantname} menu `,
items: ["help & commands", "send picture", "take webcam snapshot", `exit ${assistantname}`],
});
const popup = blessed.box({
parent: screen,
top: "center",
left: "center",
width: 40,
height: 8,
border: {
type: "line",
},
style: {
border: {
fg: "cyan",
},
bg: "black",
},
tags: true,
hidden: true,
content: "",
});
const popupButton = blessed.button({
parent: popup,
bottom: 1,
left: "center",
width: 8,
height: 1,
content: "OK", // < have no clue how to center this
style: {
bg: "cyan",
fg: "black",
focus: {
bg: "white",
fg: "black",
},
},
mouse: true,
keys: true,
});
screen.append(faceBox);
screen.append(chatBox);
screen.append(inputBox);
screen.append(menuBox);
screen.append(popup);
inputBox.focus();
let chatHistory = [];
let conversationHistory = [];
let currentStreamMessage = "";
let menuVisible = false;
function addMessage(role, content) {
let message;
@ -204,6 +285,219 @@ function clearChatHistory() {
screen.render();
}
// webcam configuration
const webcamOptions = {
width: cameraWidth,
height: cameraHeight,
quality: cameraQuality,
delay: 0,
saveShots: true,
output: "jpeg",
device: cameraDevice,
callbackReturn: "location"
};
const webcam = NodeWebcam.create(webcamOptions);
async function convertImageToBase64(imagePath) {
try {
// resize image to reasonable size for vision models
const buffer = await sharp(imagePath)
.resize(800, 600, { fit: 'inside', withoutEnlargement: true })
.jpeg({ quality: 80 })
.toBuffer();
return buffer.toString('base64');
} catch (error) {
throw new Error(`Failed to process image: ${error.message}`);
}
}
async function sendImageFile() {
return new Promise((resolve) => {
// Create a simple input box for file path
const fileInput = blessed.textbox({
parent: screen,
top: 'center',
left: 'center',
width: '80%',
height: 5,
border: {
type: 'line'
},
style: {
border: {
fg: 'cyan'
},
bg: 'black'
},
inputOnFocus: true,
keys: true,
mouse: true,
label: ' Enter Image File Path (ESC to cancel) ',
placeholder: 'Enter full path to image file...'
});
screen.append(fileInput);
fileInput.focus();
screen.render();
fileInput.on('submit', async (filePath) => {
fileInput.destroy();
if (!filePath || !filePath.trim()) {
resolve();
return;
}
const trimmedPath = filePath.trim();
// Check if file exists
if (!fs.existsSync(trimmedPath)) {
showPopup('Error: File does not exist');
resolve();
return;
}
const ext = path.extname(trimmedPath).toLowerCase();
const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp'];
if (!imageExtensions.includes(ext)) {
showPopup('Error: Please select a valid image file\n(.jpg, .png, .gif, .bmp, .webp)');
resolve();
return;
}
try {
const base64Image = await convertImageToBase64(trimmedPath);
await sendMessageWithImage("Here's an image I'd like you to look at:", base64Image);
addMessage(username, `[Sent image: ${path.basename(trimmedPath)}]`);
} catch (error) {
showPopup(`Error processing image: ${error.message}`);
}
resolve();
});
fileInput.on('cancel', () => {
fileInput.destroy();
resolve();
});
screen.key(['escape'], () => {
if (!fileInput.destroyed) {
fileInput.destroy();
resolve();
}
});
});
}
async function takeWebcamSnapshot() {
return new Promise((resolve) => {
showPopup('Taking webcam snapshot...\nPlease wait...');
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const filename = `webcam-${timestamp}.jpg`;
const filepath = path.join(process.cwd(), filename);
webcam.capture(filename, async (err, data) => {
popup.hide();
if (err) {
let errorMsg = 'Webcam error occurred';
if (err.message) {
errorMsg = `Webcam error: ${err.message}`;
}
if (err.message && err.message.includes('No such file or directory')) {
errorMsg += '\n\nTip: Make sure you have a webcam connected\nand try installing fswebcam or imagesnap';
}
showPopup(errorMsg);
resolve();
return;
}
try {
// check if file exists with a small delay
await new Promise(r => setTimeout(r, 500));
if (!fs.existsSync(filepath)) {
showPopup('Error: Snapshot file was not created\nCheck if webcam is connected and accessible');
resolve();
return;
}
const base64Image = await convertImageToBase64(filepath);
await sendMessageWithImage("I just took this webcam snapshot:", base64Image);
addMessage(username, `[Took webcam snapshot: ${filename}]`);
// clean up the temporary file
try {
fs.unlinkSync(filepath);
} catch (unlinkErr) {
// ignore cleanup errors
}
} catch (error) {
showPopup(`Error processing snapshot: ${error.message}`);
// try to clean up file even if there was an error
try {
if (fs.existsSync(filepath)) {
fs.unlinkSync(filepath);
}
} catch (unlinkErr) {
// ignore cleanup errors
}
}
resolve();
});
});
}
function showPopup(message) {
popup.setContent(`{center}${message}{/center}`);
popup.show();
popupButton.focus();
screen.render();
}
function showMenu() {
menuVisible = true;
menuBox.show();
menuBox.focus();
screen.render();
}
function hideMenu() {
menuVisible = false;
menuBox.hide();
inputBox.focus();
screen.render();
}
async function handleMenuSelection() {
const selectedIndex = menuBox.selected;
hideMenu();
switch (selectedIndex) {
case 0:
addMessage(
assistantname,
"available commands:\nl!help - if you wanna know what i can do, run this!\nl!clear - clear chat history, if you want me to forget everything, just run this!\nl!face <text> - if you want to force my expression, here you go! not sure i'll be too happy about it though.\nl!prompt <text> - if you want to change how i act, here you go! not sure i'll be too happy about that either.\nl!image <path> or l!img <path> - send an image file (or just l!image to browse)\nl!webcam or l!cam - take and send a webcam snapshot\n\nMenu shortcuts:\nESC - open/close menu\nUp/Down arrows - navigate menu\nEnter - select option\nCtrl+C or q - quit application\n\nMenu options:\n- help & commands - show this help\n- send picture - browse and send an image file\n- take webcam snapshot - capture and send a webcam photo\n- exit - quit the application\n\nNote: Image features require a vision-capable model like llava or bakllava in Ollama!\n",
);
break;
case 1:
await sendImageFile();
break;
case 2:
await takeWebcamSnapshot();
break;
case 3:
process.exit(0);
break;
}
}
async function sendMessage(message) {
if (!message.trim()) return;
@ -246,6 +540,47 @@ async function sendMessage(message) {
}
}
async function sendMessageWithImage(message, base64Image) {
if (!message.trim()) return;
conversationHistory.push({
role: "user",
content: message,
images: [base64Image]
});
try {
currentStreamMessage = "";
const response = await ollama.chat({
model: assistantmodel,
messages: [
{
role: "system",
content: systemprompt,
},
...conversationHistory,
],
stream: true,
options: {
num_predict: maxtokens,
temperature: temperature,
},
});
for await (const part of response) {
if (part.message && part.message.content) {
currentStreamMessage += part.message.content;
updateStreamMessage(currentStreamMessage);
}
}
finalizeStreamMessage();
} catch (error) {
addMessage(assistantname, `Failed to get response: ${error.message}`);
}
}
inputBox.on("submit", async (text) => {
if (text.trim()) {
inputBox.clearValue();
@ -261,7 +596,7 @@ inputBox.on("submit", async (text) => {
case "help":
addMessage(
assistantname,
"available commands:\nl!help - if you wanna know what i can do, run this!\nl!clear - clear chat history, if you want me to forget everything, just run this!\nl!face <text> - if you want to force my expression, here you go! not sure i'll be too happy about it though.\nl!prompt <text> - if you want to change how i act, here you go! not sure i'll be too happy about that either.\n",
"available commands:\nl!help - if you wanna know what i can do, run this!\nl!clear - clear chat history, if you want me to forget everything, just run this!\nl!face <text> - if you want to force my expression, here you go! not sure i'll be too happy about it though.\nl!prompt <text> - if you want to change how i act, here you go! not sure i'll be too happy about that either.\nl!image <path> or l!img <path> - send an image file (or just l!image to browse)\nl!webcam or l!cam - take and send a webcam snapshot\n\nMenu shortcuts:\nESC - open/close menu\nUp/Down arrows - navigate menu\nEnter - select option\n\nMenu options:\n- help & commands - show this help\n- send picture - browse and send an image file\n- take webcam snapshot - capture and send a webcam photo\n- exit - quit the application\n",
);
break;
case "clear":
@ -283,6 +618,35 @@ inputBox.on("submit", async (text) => {
addMessage("system", "SYSTEM: You didn't provide a prompt.");
}
break;
case "image":
case "img":
if (args.trim()) {
const imagePath = args.trim();
if (!fs.existsSync(imagePath)) {
addMessage("system", "SYSTEM: Image file not found.");
break;
}
const ext = path.extname(imagePath).toLowerCase();
const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp'];
if (!imageExtensions.includes(ext)) {
addMessage("system", "SYSTEM: Invalid image file format.");
break;
}
try {
const base64Image = await convertImageToBase64(imagePath);
await sendMessageWithImage("Here's an image I'd like you to look at:", base64Image);
addMessage(username, `[Sent image: ${path.basename(imagePath)}]`);
} catch (error) {
addMessage("system", `SYSTEM: Error processing image: ${error.message}`);
}
} else {
await sendImageFile();
}
break;
case "webcam":
case "cam":
await takeWebcamSnapshot();
break;
default:
addMessage(assistantname, `unknown command: ${command}`);
break;
@ -299,7 +663,33 @@ screen.key(["q", "C-c"], () => {
});
screen.key(["escape"], () => {
// menu soon when i decide that i wanna do it
if (menuVisible) {
hideMenu();
} else {
showMenu();
}
});
menuBox.on("select", async (item, selected) => {
if (menuVisible) {
await handleMenuSelection();
}
});
popupButton.on("press", () => {
popup.hide();
inputBox.focus();
screen.render();
});
screen.key(["enter"], async () => {
if (!popup.hidden) {
popup.hide();
inputBox.focus();
screen.render();
} else if (menuVisible) {
await handleMenuSelection();
}
});
screen.on("resize", () => {

View file

@ -7,6 +7,8 @@
"blessed": "^0.1.81",
"figlet": "^1.8.2",
"ollama": "^0.5.16",
"toml": "^3.0.0"
"toml": "^3.0.0",
"node-webcam": "^0.8.1",
"sharp": "^0.33.1"
}
}