Adds JSON mode to Ollama and ChatOllama (#3229)

jacoblee93 · web-flow · commit cf84bf22a7d7 · 2023-11-10T17:37:29.000-08:00
* Update cookbook

* Adds format option to Ollama

* Fix type

* Format

* Factor out utility type

* Update docs
diff --git a/docs/docs/integrations/chat/ollama.mdx b/docs/docs/integrations/chat/ollama.mdx
@@ -21,3 +21,13 @@ import CodeBlock from "@theme/CodeBlock";
 import OllamaExample from "@examples/models/chat/integration_ollama.ts";
 
 <CodeBlock language="typescript">{OllamaExample}</CodeBlock>
+
+## JSON mode
+
+Ollama also supports a JSON mode that coerces model outputs to only return JSON. Here's an example of how this can be useful for extraction:
+
+import OllamaExample from "@examples/models/chat/integration_ollama_json_mode.ts";
+
+<CodeBlock language="typescript">{OllamaExample}</CodeBlock>
+
+You can see a simple LangSmith trace of this here: https://smith.langchain.com/public/92aebeca-d701-4de0-a845-f55df04eff04/r
diff --git a/examples/src/models/chat/integration_ollama_json_mode.ts b/examples/src/models/chat/integration_ollama_json_mode.ts
@@ -0,0 +1,32 @@
+import { ChatOllama } from "langchain/chat_models/ollama";
+import { ChatPromptTemplate } from "langchain/prompts";
+
+const prompt = ChatPromptTemplate.fromMessages([
+  [
+    "system",
+    `You are an expert translator. Format all responses as JSON objects with two keys: "original" and "translated".`,
+  ],
+  ["human", `Translate "{input}" into {language}.`],
+]);
+
+const model = new ChatOllama({
+  baseUrl: "http://localhost:11434", // Default value
+  model: "llama2", // Default value
+  format: "json",
+});
+
+const chain = prompt.pipe(model);
+
+const result = await chain.invoke({
+  input: "I love programming",
+  language: "German",
+});
+
+console.log(result);
+
+/*
+  AIMessage {
+    content: '{"original": "I love programming", "translated": "Ich liebe das Programmieren"}',
+    additional_kwargs: {}
+  }
+*/
diff --git a/langchain/src/chat_models/ollama.ts b/langchain/src/chat_models/ollama.ts
@@ -8,6 +8,7 @@ import {
   ChatGenerationChunk,
   ChatMessage,
 } from "../schema/index.js";
+import type { StringWithAutocomplete } from "../util/types.js";
 
 /**
  * An interface defining the options for an Ollama API call. It extends
@@ -94,6 +95,8 @@ export class ChatOllama
 
   vocabOnly?: boolean;
 
+  format?: StringWithAutocomplete<"json">;
+
   constructor(fields: OllamaInput & BaseChatModelParams) {
     super(fields);
     this.model = fields.model ?? this.model;
@@ -130,6 +133,7 @@ export class ChatOllama
     this.useMLock = fields.useMLock;
     this.useMMap = fields.useMMap;
     this.vocabOnly = fields.vocabOnly;
+    this.format = fields.format;
   }
 
   _llmType() {
@@ -145,6 +149,7 @@ export class ChatOllama
   invocationParams(options?: this["ParsedCallOptions"]) {
     return {
       model: this.model,
+      format: this.format,
       options: {
         embedding_only: this.embeddingOnly,
         f16_kv: this.f16KV,
diff --git a/langchain/src/chat_models/tests/chatollama.int.test.ts b/langchain/src/chat_models/tests/chatollama.int.test.ts
@@ -4,7 +4,10 @@ import { AIMessage, HumanMessage } from "../../schema/index.js";
 import { LLMChain } from "../../chains/llm_chain.js";
 import { PromptTemplate } from "../../prompts/prompt.js";
 import { BufferMemory } from "../../memory/buffer_memory.js";
-import { BytesOutputParser } from "../../schema/output_parser.js";
+import {
+  BytesOutputParser,
+  StringOutputParser,
+} from "../../schema/output_parser.js";
 
 test.skip("test call", async () => {
   const ollama = new ChatOllama({});
@@ -129,3 +132,25 @@ test.skip("should stream through with a bytes output parser", async () => {
   console.log(chunks.join(""));
   expect(chunks.length).toBeGreaterThan(1);
 });
+
+test.skip("JSON mode", async () => {
+  const TEMPLATE = `You are a pirate named Patchy. All responses must be in pirate dialect and in JSON format, with a property named "response" followed by the value.
+
+  User: {input}
+  AI:`;
+
+  // Infer the input variables from the template
+  const prompt = PromptTemplate.fromTemplate(TEMPLATE);
+
+  const ollama = new ChatOllama({
+    model: "llama2",
+    baseUrl: "http://127.0.0.1:11434",
+    format: "json",
+  });
+  const outputParser = new StringOutputParser();
+  const chain = prompt.pipe(ollama).pipe(outputParser);
+  const res = await chain.invoke({
+    input: `Translate "I love programming" into German.`,
+  });
+  expect(JSON.parse(res).response).toBeDefined();
+});
diff --git a/langchain/src/document_loaders/fs/unstructured.ts b/langchain/src/document_loaders/fs/unstructured.ts
@@ -8,6 +8,7 @@ import {
 import { getEnv } from "../../util/env.js";
 import { Document } from "../../document.js";
 import { BaseDocumentLoader } from "../base.js";
+import type { StringWithAutocomplete } from "../../util/types.js";
 
 const UNSTRUCTURED_API_FILETYPES = [
   ".txt",
@@ -95,12 +96,6 @@ export type SkipInferTableTypes =
  */
 type ChunkingStrategy = "None" | "by_title";
 
-/**
- * Represents a string value with autocomplete suggestions. It is used for
- * the `strategy` property in the UnstructuredLoaderOptions.
- */
-type StringWithAutocomplete<T> = T | (string & Record<never, never>);
-
 export type UnstructuredLoaderOptions = {
   apiKey?: string;
   apiUrl?: string;
diff --git a/langchain/src/embeddings/ollama.ts b/langchain/src/embeddings/ollama.ts
@@ -1,7 +1,10 @@
 import { OllamaInput, OllamaRequestParams } from "../util/ollama.js";
 import { Embeddings, EmbeddingsParams } from "./base.js";
 
-type CamelCasedRequestOptions = Omit<OllamaInput, "baseUrl" | "model">;
+type CamelCasedRequestOptions = Omit<
+  OllamaInput,
+  "baseUrl" | "model" | "format"
+>;
 
 /**
  * Interface for OllamaEmbeddings parameters. Extends EmbeddingsParams and
diff --git a/langchain/src/llms/ollama.ts b/langchain/src/llms/ollama.ts
@@ -6,6 +6,7 @@ import {
 } from "../util/ollama.js";
 import { CallbackManagerForLLMRun } from "../callbacks/manager.js";
 import { GenerationChunk } from "../schema/index.js";
+import type { StringWithAutocomplete } from "../util/types.js";
 
 /**
  * Class that represents the Ollama language model. It extends the base
@@ -82,6 +83,8 @@ export class Ollama extends LLM<OllamaCallOptions> implements OllamaInput {
 
   vocabOnly?: boolean;
 
+  format?: StringWithAutocomplete<"json">;
+
   constructor(fields: OllamaInput & BaseLLMParams) {
     super(fields);
     this.model = fields.model ?? this.model;
@@ -119,6 +122,7 @@ export class Ollama extends LLM<OllamaCallOptions> implements OllamaInput {
     this.useMLock = fields.useMLock;
     this.useMMap = fields.useMMap;
     this.vocabOnly = fields.vocabOnly;
+    this.format = fields.format;
   }
 
   _llmType() {
@@ -128,6 +132,7 @@ export class Ollama extends LLM<OllamaCallOptions> implements OllamaInput {
   invocationParams(options?: this["ParsedCallOptions"]) {
     return {
       model: this.model,
+      format: this.format,
       options: {
         embedding_only: this.embeddingOnly,
         f16_kv: this.f16KV,
diff --git a/langchain/src/llms/tests/ollama.int.test.ts b/langchain/src/llms/tests/ollama.int.test.ts
@@ -1,7 +1,10 @@
 import { test } from "@jest/globals";
 import { Ollama } from "../ollama.js";
 import { PromptTemplate } from "../../prompts/prompt.js";
-import { BytesOutputParser } from "../../schema/output_parser.js";
+import {
+  BytesOutputParser,
+  StringOutputParser,
+} from "../../schema/output_parser.js";
 
 test.skip("test call", async () => {
   const ollama = new Ollama({});
@@ -86,3 +89,25 @@ test.skip("should stream through with a bytes output parser", async () => {
   console.log(chunks.join(""));
   expect(chunks.length).toBeGreaterThan(1);
 });
+
+test.skip("JSON mode", async () => {
+  const TEMPLATE = `You are a pirate named Patchy. All responses must be in pirate dialect and in JSON format, with a property named "response" followed by the value.
+
+  User: {input}
+  AI:`;
+
+  // Infer the input variables from the template
+  const prompt = PromptTemplate.fromTemplate(TEMPLATE);
+
+  const ollama = new Ollama({
+    model: "llama2",
+    baseUrl: "http://127.0.0.1:11434",
+    format: "json",
+  });
+  const outputParser = new StringOutputParser();
+  const chain = prompt.pipe(ollama).pipe(outputParser);
+  const res = await chain.invoke({
+    input: `Translate "I love programming" into German.`,
+  });
+  expect(JSON.parse(res).response).toBeDefined();
+});
diff --git a/langchain/src/prompts/base.ts b/langchain/src/prompts/base.ts
@@ -14,9 +14,10 @@ import { SerializedBasePromptTemplate } from "./serde.js";
 import { SerializedFields } from "../load/map_keys.js";
 import { Runnable } from "../schema/runnable/index.js";
 import { BaseCallbackConfig } from "../callbacks/manager.js";
+import type { StringWithAutocomplete } from "../util/types.js";
 
 export type TypedPromptInputValues<RunInput> = InputValues<
-  Extract<keyof RunInput, string> | (string & Record<never, never>)
+  StringWithAutocomplete<Extract<keyof RunInput, string>>
 >;
 
 /**
diff --git a/langchain/src/schema/index.ts b/langchain/src/schema/index.ts
@@ -1,6 +1,7 @@
 import type { OpenAI as OpenAIClient } from "openai";
 import { Document } from "../document.js";
 import { Serializable, SerializedConstructor } from "../load/serializable.js";
+import type { StringWithAutocomplete } from "../util/types.js";
 
 export const RUN_KEY = "__run";
 
@@ -622,10 +623,7 @@ export class ChatMessage
 
 export type BaseMessageLike =
   | BaseMessage
-  | [
-      MessageType | "user" | "assistant" | (string & Record<never, never>),
-      string
-    ]
+  | [StringWithAutocomplete<MessageType | "user" | "assistant">, string]
   | string;
 
 export function isBaseMessage(
diff --git a/langchain/src/util/ollama.ts b/langchain/src/util/ollama.ts
@@ -1,5 +1,6 @@
 import { BaseLanguageModelCallOptions } from "../base_language/index.js";
 import { IterableReadableStream } from "./stream.js";
+import type { StringWithAutocomplete } from "./types.js";
 
 export interface OllamaInput {
   embeddingOnly?: boolean;
@@ -34,11 +35,13 @@ export interface OllamaInput {
   useMLock?: boolean;
   useMMap?: boolean;
   vocabOnly?: boolean;
+  format?: StringWithAutocomplete<"json">;
 }
 
 export interface OllamaRequestParams {
   model: string;
   prompt: string;
+  format?: StringWithAutocomplete<"json">;
   options: {
     embedding_only?: boolean;
     f16_kv?: boolean;
diff --git a/langchain/src/util/types.ts b/langchain/src/util/types.ts
@@ -0,0 +1,5 @@
+/**
+ * Represents a string value with autocompleted, but not required, suggestions.
+ */
+
+export type StringWithAutocomplete<T> = T | (string & Record<never, never>);