Files
assistant-storefront/spec/enterprise/jobs/captain/documents/crawl_job_spec.rb
Liang XJ 092fb2e083
Some checks failed
Lock Threads / action (push) Has been cancelled
Mark stale issues and pull requests / stale (push) Has been cancelled
Publish Chatwoot EE docker images / build (linux/amd64, ubuntu-latest) (push) Has been cancelled
Publish Chatwoot EE docker images / build (linux/arm64, ubuntu-22.04-arm) (push) Has been cancelled
Publish Chatwoot EE docker images / merge (push) Has been cancelled
Publish Chatwoot CE docker images / build (linux/amd64, ubuntu-latest) (push) Has been cancelled
Publish Chatwoot CE docker images / build (linux/arm64, ubuntu-22.04-arm) (push) Has been cancelled
Publish Chatwoot CE docker images / merge (push) Has been cancelled
Run Chatwoot CE spec / lint-backend (push) Has been cancelled
Run Chatwoot CE spec / lint-frontend (push) Has been cancelled
Run Chatwoot CE spec / frontend-tests (push) Has been cancelled
Run Chatwoot CE spec / backend-tests (0, 16) (push) Has been cancelled
Run Chatwoot CE spec / backend-tests (1, 16) (push) Has been cancelled
Run Chatwoot CE spec / backend-tests (10, 16) (push) Has been cancelled
Run Chatwoot CE spec / backend-tests (11, 16) (push) Has been cancelled
Run Chatwoot CE spec / backend-tests (12, 16) (push) Has been cancelled
Run Chatwoot CE spec / backend-tests (13, 16) (push) Has been cancelled
Run Chatwoot CE spec / backend-tests (14, 16) (push) Has been cancelled
Run Chatwoot CE spec / backend-tests (15, 16) (push) Has been cancelled
Run Chatwoot CE spec / backend-tests (2, 16) (push) Has been cancelled
Run Chatwoot CE spec / backend-tests (3, 16) (push) Has been cancelled
Run Chatwoot CE spec / backend-tests (4, 16) (push) Has been cancelled
Run Chatwoot CE spec / backend-tests (5, 16) (push) Has been cancelled
Run Chatwoot CE spec / backend-tests (6, 16) (push) Has been cancelled
Run Chatwoot CE spec / backend-tests (7, 16) (push) Has been cancelled
Run Chatwoot CE spec / backend-tests (8, 16) (push) Has been cancelled
Run Chatwoot CE spec / backend-tests (9, 16) (push) Has been cancelled
Run Linux nightly installer / nightly (push) Has been cancelled
Initial commit: Add logistics and order_detail message types
- Add Logistics component with progress tracking
- Add OrderDetail component for order information
- Support data-driven steps and actions
- Add blue color scale to widget SCSS
- Fix node overflow and progress bar rendering issues
- Add English translations for dashboard components

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-26 11:16:56 +08:00

134 lines
4.7 KiB
Ruby

require 'rails_helper'
RSpec.describe Captain::Documents::CrawlJob, type: :job do
let(:document) { create(:captain_document, external_link: 'https://example.com/page') }
let(:assistant_id) { document.assistant_id }
let(:webhook_url) { Rails.application.routes.url_helpers.enterprise_webhooks_firecrawl_url }
describe '#perform' do
context 'when CAPTAIN_FIRECRAWL_API_KEY is configured' do
let(:firecrawl_service) { instance_double(Captain::Tools::FirecrawlService) }
let(:account) { document.account }
let(:token) { Digest::SHA256.hexdigest("-key#{document.assistant_id}#{document.account_id}") }
before do
allow(Captain::Tools::FirecrawlService).to receive(:new).and_return(firecrawl_service)
allow(firecrawl_service).to receive(:perform)
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: 'test-key')
end
context 'with account usage limits' do
before do
allow(account).to receive(:usage_limits).and_return({ captain: { documents: { current_available: 20 } } })
end
it 'uses FirecrawlService with the correct crawl limit' do
expect(firecrawl_service).to receive(:perform).with(
document.external_link,
"#{webhook_url}?assistant_id=#{assistant_id}&token=#{token}",
20
)
described_class.perform_now(document)
end
end
context 'when crawl limit exceeds maximum' do
before do
allow(account).to receive(:usage_limits).and_return({ captain: { documents: { current_available: 1000 } } })
end
it 'caps the crawl limit at 500' do
expect(firecrawl_service).to receive(:perform).with(
document.external_link,
"#{webhook_url}?assistant_id=#{assistant_id}&token=#{token}",
500
)
described_class.perform_now(document)
end
end
context 'with no usage limits configured' do
before do
allow(account).to receive(:usage_limits).and_return({})
end
it 'uses default crawl limit of 10' do
expect(firecrawl_service).to receive(:perform).with(
document.external_link,
"#{webhook_url}?assistant_id=#{assistant_id}&token=#{token}",
10
)
described_class.perform_now(document)
end
end
end
context 'when CAPTAIN_FIRECRAWL_API_KEY is not configured' do
let(:page_links) { ['https://example.com/page1', 'https://example.com/page2'] }
let(:simple_crawler) { instance_double(Captain::Tools::SimplePageCrawlService) }
before do
allow(Captain::Tools::SimplePageCrawlService)
.to receive(:new)
.with(document.external_link)
.and_return(simple_crawler)
allow(simple_crawler).to receive(:page_links).and_return(page_links)
end
it 'enqueues SimplePageCrawlParserJob for each discovered link' do
page_links.each do |link|
expect(Captain::Tools::SimplePageCrawlParserJob)
.to receive(:perform_later)
.with(
assistant_id: assistant_id,
page_link: link
)
end
# Should also crawl the original link
expect(Captain::Tools::SimplePageCrawlParserJob)
.to receive(:perform_later)
.with(
assistant_id: assistant_id,
page_link: document.external_link
)
described_class.perform_now(document)
end
it 'uses SimplePageCrawlService to discover page links' do
expect(simple_crawler).to receive(:page_links)
described_class.perform_now(document)
end
end
context 'when document is a PDF' do
let(:pdf_document) do
doc = create(:captain_document, external_link: 'https://example.com/document')
allow(doc).to receive(:pdf_document?).and_return(true)
allow(doc).to receive(:update!).and_return(true)
doc
end
it 'processes PDF using PdfProcessingService' do
pdf_service = instance_double(Captain::Llm::PdfProcessingService)
expect(Captain::Llm::PdfProcessingService).to receive(:new).with(pdf_document).and_return(pdf_service)
expect(pdf_service).to receive(:process)
expect(pdf_document).to receive(:update!).with(status: :available)
described_class.perform_now(pdf_document)
end
it 'handles PDF processing errors' do
allow(Captain::Llm::PdfProcessingService).to receive(:new).and_raise(StandardError, 'Processing failed')
expect { described_class.perform_now(pdf_document) }.to raise_error(StandardError, 'Processing failed')
end
end
end
end