diff --git a/README.md b/README.md index 1950554..56c351c 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ your friendly ai assistant. frontend for ollama. - git clone repository - install ollama from `https://ollama.ai/download` - pull a model from ollama (i recommend gemma3n:e4b for laptops like mine (i7-10750h + rtx 3050ti laptop edition)) +- for image stuff, you'll need a model like llava:7b +- for webcam features, install fswebcam (Linux) or imagesnap (macOS) - copy config.example.toml to config.toml and edit it to have the model you selected, optionally set your name in [user] - npm i - node index.js @@ -31,10 +33,22 @@ lydia is written to be easily configurable through a toml file which is easier t - temperature = the temperature you want lydia to use. basically how random the model is. default is 0.8 - max_tokens = the max context tokens you want lydia to use. basically how far she can remember. default is 8192 +## Camera settings +- width = webcam capture width. default is 1280 +- height = webcam capture height. default is 720 +- quality = webcam capture quality (0-100). default is 100 +- device = specific camera device (false for default, or "/dev/video0", etc.) + ## Runtime configuration the prompt can be changed by running l!prompt in the chatbox. this only applies for the current session, if you want a persistent change, you can edit the config file. +## Image & Webcam Commands +- `l!image ` or `l!img ` - send an image file (or just `l!image` to browse) +- `l!webcam` or `l!cam` - take and send a webcam snapshot +- press `ESC` to open menu for image options + # Other stuff -by hitting escape you can tab out of the chatbox, here you can do cool things like: +by hitting escape you can open the menu, here you can do cool things like: +- send pictures and take webcam snapshots +- get help and see all commands - hit Q or CTRL+C to quit lydia (but why would you wanna do that anyway?) -- yea thats it diff --git a/config.example.toml b/config.example.toml index b28816e..71ca002 100644 --- a/config.example.toml +++ b/config.example.toml @@ -13,3 +13,9 @@ name = "user" [advanced] temperature = 0.8 max_tokens = 8192 + +[camera] +width = 1280 +height = 720 +quality = 100 +device = false # /dev/video0 on linux diff --git a/lydia.js b/lydia.js index 760afbf..24bd04f 100644 --- a/lydia.js +++ b/lydia.js @@ -3,6 +3,9 @@ import blessed from "blessed"; import { execSync } from "child_process"; import toml from "toml"; import fs from "fs"; +import path from "path"; +import NodeWebcam from "node-webcam"; +import sharp from "sharp"; if (!fs.existsSync("./config.toml")) { if (fs.existsSync("./config.example.toml")) { @@ -27,6 +30,12 @@ let username = config.user.name; let facefont = config.appearance.facefont; +// camera settings +let cameraWidth = config.camera?.width || 1280; +let cameraHeight = config.camera?.height || 720; +let cameraQuality = config.camera?.quality || 100; +let cameraDevice = config.camera?.device || false; + let systemprompt = config.assistant.system_prompt .replace("${name}", assistantname) .replace("${username}", username); @@ -81,15 +90,87 @@ const inputBox = blessed.textbox({ mouse: true, placeholder: `go on, tell ${assistantname} something!`, }); + +const menuBox = blessed.list({ + top: "center", + left: "center", + width: 60, + height: 16, + border: { + type: "line", + }, + style: { + border: { + fg: "cyan", + }, + bg: "black", + selected: { + bg: "cyan", + fg: "black", + }, + item: { + fg: "white", + }, + }, + keys: true, + vi: true, + mouse: true, + hidden: true, + label: ` ${assistantname} menu `, + items: ["help & commands", "send picture", "take webcam snapshot", `exit ${assistantname}`], +}); + +const popup = blessed.box({ + parent: screen, + top: "center", + left: "center", + width: 40, + height: 8, + border: { + type: "line", + }, + style: { + border: { + fg: "cyan", + }, + bg: "black", + }, + tags: true, + hidden: true, + content: "", +}); + +const popupButton = blessed.button({ + parent: popup, + bottom: 1, + left: "center", + width: 8, + height: 1, + content: "OK", // < have no clue how to center this + style: { + bg: "cyan", + fg: "black", + focus: { + bg: "white", + fg: "black", + }, + }, + mouse: true, + keys: true, +}); + screen.append(faceBox); screen.append(chatBox); screen.append(inputBox); +screen.append(menuBox); +screen.append(popup); inputBox.focus(); let chatHistory = []; let conversationHistory = []; let currentStreamMessage = ""; +let menuVisible = false; function addMessage(role, content) { let message; @@ -204,6 +285,219 @@ function clearChatHistory() { screen.render(); } +// webcam configuration +const webcamOptions = { + width: cameraWidth, + height: cameraHeight, + quality: cameraQuality, + delay: 0, + saveShots: true, + output: "jpeg", + device: cameraDevice, + callbackReturn: "location" +}; + +const webcam = NodeWebcam.create(webcamOptions); + +async function convertImageToBase64(imagePath) { + try { + // resize image to reasonable size for vision models + const buffer = await sharp(imagePath) + .resize(800, 600, { fit: 'inside', withoutEnlargement: true }) + .jpeg({ quality: 80 }) + .toBuffer(); + + return buffer.toString('base64'); + } catch (error) { + throw new Error(`Failed to process image: ${error.message}`); + } +} + +async function sendImageFile() { + return new Promise((resolve) => { + // Create a simple input box for file path + const fileInput = blessed.textbox({ + parent: screen, + top: 'center', + left: 'center', + width: '80%', + height: 5, + border: { + type: 'line' + }, + style: { + border: { + fg: 'cyan' + }, + bg: 'black' + }, + inputOnFocus: true, + keys: true, + mouse: true, + label: ' Enter Image File Path (ESC to cancel) ', + placeholder: 'Enter full path to image file...' + }); + + screen.append(fileInput); + fileInput.focus(); + screen.render(); + + fileInput.on('submit', async (filePath) => { + fileInput.destroy(); + + if (!filePath || !filePath.trim()) { + resolve(); + return; + } + + const trimmedPath = filePath.trim(); + + // Check if file exists + if (!fs.existsSync(trimmedPath)) { + showPopup('Error: File does not exist'); + resolve(); + return; + } + + const ext = path.extname(trimmedPath).toLowerCase(); + const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']; + + if (!imageExtensions.includes(ext)) { + showPopup('Error: Please select a valid image file\n(.jpg, .png, .gif, .bmp, .webp)'); + resolve(); + return; + } + + try { + const base64Image = await convertImageToBase64(trimmedPath); + await sendMessageWithImage("Here's an image I'd like you to look at:", base64Image); + addMessage(username, `[Sent image: ${path.basename(trimmedPath)}]`); + } catch (error) { + showPopup(`Error processing image: ${error.message}`); + } + + resolve(); + }); + + fileInput.on('cancel', () => { + fileInput.destroy(); + resolve(); + }); + + screen.key(['escape'], () => { + if (!fileInput.destroyed) { + fileInput.destroy(); + resolve(); + } + }); + }); +} + +async function takeWebcamSnapshot() { + return new Promise((resolve) => { + showPopup('Taking webcam snapshot...\nPlease wait...'); + + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + const filename = `webcam-${timestamp}.jpg`; + const filepath = path.join(process.cwd(), filename); + + webcam.capture(filename, async (err, data) => { + popup.hide(); + + if (err) { + let errorMsg = 'Webcam error occurred'; + if (err.message) { + errorMsg = `Webcam error: ${err.message}`; + } + if (err.message && err.message.includes('No such file or directory')) { + errorMsg += '\n\nTip: Make sure you have a webcam connected\nand try installing fswebcam or imagesnap'; + } + showPopup(errorMsg); + resolve(); + return; + } + + try { + // check if file exists with a small delay + await new Promise(r => setTimeout(r, 500)); + + if (!fs.existsSync(filepath)) { + showPopup('Error: Snapshot file was not created\nCheck if webcam is connected and accessible'); + resolve(); + return; + } + + const base64Image = await convertImageToBase64(filepath); + await sendMessageWithImage("I just took this webcam snapshot:", base64Image); + addMessage(username, `[Took webcam snapshot: ${filename}]`); + + // clean up the temporary file + try { + fs.unlinkSync(filepath); + } catch (unlinkErr) { + // ignore cleanup errors + } + } catch (error) { + showPopup(`Error processing snapshot: ${error.message}`); + // try to clean up file even if there was an error + try { + if (fs.existsSync(filepath)) { + fs.unlinkSync(filepath); + } + } catch (unlinkErr) { + // ignore cleanup errors + } + } + + resolve(); + }); + }); +} + +function showPopup(message) { + popup.setContent(`{center}${message}{/center}`); + popup.show(); + popupButton.focus(); + screen.render(); +} + +function showMenu() { + menuVisible = true; + menuBox.show(); + menuBox.focus(); + screen.render(); +} + +function hideMenu() { + menuVisible = false; + menuBox.hide(); + inputBox.focus(); + screen.render(); +} + +async function handleMenuSelection() { + const selectedIndex = menuBox.selected; + hideMenu(); + + switch (selectedIndex) { + case 0: + addMessage( + assistantname, + "available commands:\nl!help - if you wanna know what i can do, run this!\nl!clear - clear chat history, if you want me to forget everything, just run this!\nl!face - if you want to force my expression, here you go! not sure i'll be too happy about it though.\nl!prompt - if you want to change how i act, here you go! not sure i'll be too happy about that either.\nl!image or l!img - send an image file (or just l!image to browse)\nl!webcam or l!cam - take and send a webcam snapshot\n\nMenu shortcuts:\nESC - open/close menu\nUp/Down arrows - navigate menu\nEnter - select option\nCtrl+C or q - quit application\n\nMenu options:\n- help & commands - show this help\n- send picture - browse and send an image file\n- take webcam snapshot - capture and send a webcam photo\n- exit - quit the application\n\nNote: Image features require a vision-capable model like llava or bakllava in Ollama!\n", + ); + break; + case 1: + await sendImageFile(); + break; + case 2: + await takeWebcamSnapshot(); + break; + case 3: + process.exit(0); + break; + } +} + async function sendMessage(message) { if (!message.trim()) return; @@ -246,6 +540,47 @@ async function sendMessage(message) { } } +async function sendMessageWithImage(message, base64Image) { + if (!message.trim()) return; + + conversationHistory.push({ + role: "user", + content: message, + images: [base64Image] + }); + + try { + currentStreamMessage = ""; + + const response = await ollama.chat({ + model: assistantmodel, + messages: [ + { + role: "system", + content: systemprompt, + }, + ...conversationHistory, + ], + stream: true, + options: { + num_predict: maxtokens, + temperature: temperature, + }, + }); + + for await (const part of response) { + if (part.message && part.message.content) { + currentStreamMessage += part.message.content; + updateStreamMessage(currentStreamMessage); + } + } + + finalizeStreamMessage(); + } catch (error) { + addMessage(assistantname, `Failed to get response: ${error.message}`); + } +} + inputBox.on("submit", async (text) => { if (text.trim()) { inputBox.clearValue(); @@ -261,7 +596,7 @@ inputBox.on("submit", async (text) => { case "help": addMessage( assistantname, - "available commands:\nl!help - if you wanna know what i can do, run this!\nl!clear - clear chat history, if you want me to forget everything, just run this!\nl!face - if you want to force my expression, here you go! not sure i'll be too happy about it though.\nl!prompt - if you want to change how i act, here you go! not sure i'll be too happy about that either.\n", + "available commands:\nl!help - if you wanna know what i can do, run this!\nl!clear - clear chat history, if you want me to forget everything, just run this!\nl!face - if you want to force my expression, here you go! not sure i'll be too happy about it though.\nl!prompt - if you want to change how i act, here you go! not sure i'll be too happy about that either.\nl!image or l!img - send an image file (or just l!image to browse)\nl!webcam or l!cam - take and send a webcam snapshot\n\nMenu shortcuts:\nESC - open/close menu\nUp/Down arrows - navigate menu\nEnter - select option\n\nMenu options:\n- help & commands - show this help\n- send picture - browse and send an image file\n- take webcam snapshot - capture and send a webcam photo\n- exit - quit the application\n", ); break; case "clear": @@ -283,6 +618,35 @@ inputBox.on("submit", async (text) => { addMessage("system", "SYSTEM: You didn't provide a prompt."); } break; + case "image": + case "img": + if (args.trim()) { + const imagePath = args.trim(); + if (!fs.existsSync(imagePath)) { + addMessage("system", "SYSTEM: Image file not found."); + break; + } + const ext = path.extname(imagePath).toLowerCase(); + const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']; + if (!imageExtensions.includes(ext)) { + addMessage("system", "SYSTEM: Invalid image file format."); + break; + } + try { + const base64Image = await convertImageToBase64(imagePath); + await sendMessageWithImage("Here's an image I'd like you to look at:", base64Image); + addMessage(username, `[Sent image: ${path.basename(imagePath)}]`); + } catch (error) { + addMessage("system", `SYSTEM: Error processing image: ${error.message}`); + } + } else { + await sendImageFile(); + } + break; + case "webcam": + case "cam": + await takeWebcamSnapshot(); + break; default: addMessage(assistantname, `unknown command: ${command}`); break; @@ -299,7 +663,33 @@ screen.key(["q", "C-c"], () => { }); screen.key(["escape"], () => { - // menu soon when i decide that i wanna do it + if (menuVisible) { + hideMenu(); + } else { + showMenu(); + } +}); + +menuBox.on("select", async (item, selected) => { + if (menuVisible) { + await handleMenuSelection(); + } +}); + +popupButton.on("press", () => { + popup.hide(); + inputBox.focus(); + screen.render(); +}); + +screen.key(["enter"], async () => { + if (!popup.hidden) { + popup.hide(); + inputBox.focus(); + screen.render(); + } else if (menuVisible) { + await handleMenuSelection(); + } }); screen.on("resize", () => { diff --git a/package.json b/package.json index 80a88c0..684e337 100644 --- a/package.json +++ b/package.json @@ -7,6 +7,8 @@ "blessed": "^0.1.81", "figlet": "^1.8.2", "ollama": "^0.5.16", - "toml": "^3.0.0" + "toml": "^3.0.0", + "node-webcam": "^0.8.1", + "sharp": "^0.33.1" } }