From 9262d5b4f6980cb9cd44a75188e5283a03e32938 Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Fri, 24 Apr 2026 13:38:03 +0530 Subject: [PATCH 1/3] feat: add support for document extraction --- src/client/maxun-client.ts | 39 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/client/maxun-client.ts b/src/client/maxun-client.ts index cbc287b..560788c 100644 --- a/src/client/maxun-client.ts +++ b/src/client/maxun-client.ts @@ -6,6 +6,8 @@ import axios, { AxiosInstance, AxiosError } from 'axios'; import http from 'http'; import https from 'https'; +import FormData from 'form-data'; +import * as fs from 'fs'; import { Config, RobotData, @@ -287,6 +289,43 @@ export class Client { return response.data.data; } + /** + * Create a document-extraction robot from a PDF file path or Buffer. + */ + async createDocumentRobot( + file: string | Buffer, + prompt: string, + options?: { robotName?: string; ollamaModel?: string; fileName?: string } + ): Promise<{ robot: RobotData; extractionSchema: Record }> { + const form = new FormData(); + + if (typeof file === 'string') { + form.append('file', fs.createReadStream(file), options?.fileName || require('path').basename(file)); + } else { + form.append('file', file, { filename: options?.fileName || 'document.pdf', contentType: 'application/pdf' }); + } + + form.append('prompt', prompt); + if (options?.robotName) form.append('robotName', options.robotName); + if (options?.ollamaModel) form.append('ollamaModel', options.ollamaModel); + + const response = await this.axios.post( + '/robots/document', + form, + { headers: form.getHeaders(), timeout: 120000 } + ); + + const data = response.data; + if (!data?.data && !data?.robot) { + throw new MaxunError('Failed to create document robot'); + } + + return { + robot: data.data || data.robot, + extractionSchema: data.extractionSchema || {}, + }; + } + /** * Create a crawl robot to discover and scrape multiple pages */ From 65018a16efe69e936fdcb252a0ed801513a5e739 Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Fri, 8 May 2026 12:08:18 +0530 Subject: [PATCH 2/3] feat: add document parsing support --- src/client/maxun-client.ts | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/client/maxun-client.ts b/src/client/maxun-client.ts index 560788c..0fc094b 100644 --- a/src/client/maxun-client.ts +++ b/src/client/maxun-client.ts @@ -326,6 +326,42 @@ export class Client { }; } + /** + * Create a document-parse robot from a PDF file path or Buffer. + */ + async createDocumentParseRobot( + file: string | Buffer, + outputFormats: ('markdown' | 'html' | 'links')[], + options?: { robotName?: string; fileName?: string } + ): Promise<{ robot: RobotData; parsedOutput: Record }> { + const form = new FormData(); + + if (typeof file === 'string') { + form.append('file', fs.createReadStream(file), options?.fileName || require('path').basename(file)); + } else { + form.append('file', file, { filename: options?.fileName || 'document.pdf', contentType: 'application/pdf' }); + } + + if (options?.robotName) form.append('robotName', options.robotName); + outputFormats.forEach((f) => form.append('outputFormats[]', f)); + + const response = await this.axios.post( + '/robots/document-parse', + form, + { headers: form.getHeaders(), timeout: 120000 } + ); + + const data = response.data; + if (!data?.data && !data?.robot) { + throw new MaxunError('Failed to create document-parse robot'); + } + + return { + robot: data.data || data.robot, + parsedOutput: data.parsedOutput || {}, + }; + } + /** * Create a crawl robot to discover and scrape multiple pages */ From 941422073305e741fc6d93e0e6349281bdbc6a6b Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Fri, 8 May 2026 12:19:41 +0530 Subject: [PATCH 3/3] chore: rename doc methods --- src/client/maxun-client.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/client/maxun-client.ts b/src/client/maxun-client.ts index 0fc094b..9617b70 100644 --- a/src/client/maxun-client.ts +++ b/src/client/maxun-client.ts @@ -292,7 +292,7 @@ export class Client { /** * Create a document-extraction robot from a PDF file path or Buffer. */ - async createDocumentRobot( + async createDocumentExtractRobot( file: string | Buffer, prompt: string, options?: { robotName?: string; ollamaModel?: string; fileName?: string }