-
Notifications
You must be signed in to change notification settings - Fork 2k
Added firecrawl as a toolcall to crawl website contents from url #1797
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
47b20b3
2b43d82
6c05577
db20380
793342e
3ee56d4
3857b50
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| import FirecrawlApp from '@mendable/firecrawl-js'; | ||
|
|
||
| export interface CrawlOptions { | ||
| limit?: number; | ||
| scrapeOptions?: { | ||
| formats?: ( | ||
| | 'markdown' | ||
| | 'html' | ||
| | 'rawHtml' | ||
| | 'content' | ||
| | 'links' | ||
| | 'screenshot' | ||
| | 'screenshot@fullPage' | ||
| | 'extract' | ||
| | 'json' | ||
| | 'changeTracking' | ||
| )[]; | ||
| }; | ||
| } | ||
|
|
||
| export class CrawlerService { | ||
| private static instance: CrawlerService; | ||
|
|
||
| private app: FirecrawlApp; | ||
|
|
||
| private constructor() { | ||
| const apiKey = import.meta.env.VITE_FIRECRAWL_API_KEY; | ||
| if (!apiKey) { | ||
| throw new Error( | ||
| 'VITE_FIRECRAWL_API_KEY is not defined. Please provide a valid API key.', | ||
| ); | ||
| } | ||
| this.app = new FirecrawlApp({ apiKey }); | ||
| } | ||
|
|
||
| static getInstance(): CrawlerService { | ||
| if (!this.instance) { | ||
| this.instance = new CrawlerService(); | ||
| } | ||
| return this.instance; | ||
| } | ||
|
|
||
| async crawlUrl( | ||
| url: string, | ||
| options: CrawlOptions = { | ||
| limit: 100, | ||
| scrapeOptions: { | ||
| formats: ['markdown', 'html'], | ||
| }, | ||
| }, | ||
| ) { | ||
| try { | ||
| const response = await this.app.crawlUrl(url, options); | ||
|
|
||
| if (!response.success) { | ||
| throw new Error(`Failed to crawl: ${response.error}`); | ||
| } | ||
| return response; | ||
| } catch (error) { | ||
| console.error('Error during crawling:', error); | ||
| throw error; | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -73,6 +73,16 @@ | |||||||||
| "fileReference": "File Reference", | ||||||||||
| "submit": "Start building your site" | ||||||||||
| }, | ||||||||||
| "crawl": { | ||||||||||
| "title": "Duplicate a website", | ||||||||||
| "description": "Paste a link to a website that you want to duplicate", | ||||||||||
| "input": { | ||||||||||
| "placeholder": "Paste a link to a website", | ||||||||||
| "arialLabel": "URL input for web page crawling", | ||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typo detected: The key
Suggested change
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typo: The key
Suggested change
SoloDevAbu marked this conversation as resolved.
Outdated
|
||||||||||
| "crawling": "Getting Data", | ||||||||||
| "submit": "Get Data" | ||||||||||
| } | ||||||||||
| }, | ||||||||||
| "blankStart": "Start from a blank page" | ||||||||||
| } | ||||||||||
| }, | ||||||||||
|
|
||||||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -14,6 +14,8 @@ import { useEffect, useRef, useState } from 'react'; | |||||
| import useResizeObserver from 'use-resize-observer'; | ||||||
| import { DraftImagePill } from '../../editor/EditPanel/ChatTab/ContextPills/DraftingImagePill'; | ||||||
| import { useTranslation } from 'react-i18next'; | ||||||
| import { CrawlerService } from '@/lib/services/crawler'; | ||||||
| import { toast } from '@onlook/ui/use-toast'; | ||||||
|
|
||||||
| export const PromptingCard = () => { | ||||||
| const projectsManager = useProjectsManager(); | ||||||
|
|
@@ -28,6 +30,9 @@ export const PromptingCard = () => { | |||||
| const [isComposing, setIsComposing] = useState(false); | ||||||
| const imageRef = useRef<HTMLInputElement>(null); | ||||||
| const { t } = useTranslation(); | ||||||
| const [urlInput, setUrlInput] = useState(''); | ||||||
| const [isCrawling, setIsCrawling] = useState(false); | ||||||
| const [crawledValue, setCrawledValue] = useState(''); | ||||||
|
|
||||||
| useEffect(() => { | ||||||
| const handleEscapeKey = (e: KeyboardEvent) => { | ||||||
|
|
@@ -45,11 +50,12 @@ export const PromptingCard = () => { | |||||
| console.warn('Input is too short'); | ||||||
| return; | ||||||
| } | ||||||
| projectsManager.create.sendPrompt(inputValue, selectedImages, false); | ||||||
| projectsManager.create.sendPrompt(inputValue, selectedImages, crawledValue, false); | ||||||
|
SoloDevAbu marked this conversation as resolved.
Outdated
|
||||||
| setCrawledValue(''); | ||||||
| }; | ||||||
|
|
||||||
| const handleBlankSubmit = async () => { | ||||||
| projectsManager.create.sendPrompt('', [], true); | ||||||
| projectsManager.create.sendPrompt('', [], '', true); | ||||||
| }; | ||||||
|
|
||||||
| const handleDragOver = (e: React.DragEvent) => { | ||||||
|
|
@@ -179,6 +185,65 @@ export const PromptingCard = () => { | |||||
| } | ||||||
| }; | ||||||
|
|
||||||
| const handleCrawlSubmit = async () => { | ||||||
| const trimmedUrlInput = urlInput.trim(); | ||||||
| if (!trimmedUrlInput) { | ||||||
| console.warn('URL input is empty'); | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For an empty URL input, consider showing a user-visible error via toast rather than just logging a warning.
Suggested change
|
||||||
| return; | ||||||
| } | ||||||
|
|
||||||
| try { | ||||||
| const url = new URL(trimmedUrlInput); | ||||||
| if (!['http:', 'https:'].includes(url.protocol)) { | ||||||
| console.warn('URL must start with http or https'); | ||||||
| toast({ | ||||||
| title: 'Invalid URL', | ||||||
| description: 'Please enter a URL that starts with http or https.', | ||||||
| variant: 'destructive', | ||||||
| }); | ||||||
| return; | ||||||
| } | ||||||
| } catch (error) { | ||||||
| console.warn('Invalid URL:', trimmedUrlInput); | ||||||
| toast({ | ||||||
| title: 'Invalid URL', | ||||||
| description: 'Please enter a valid URL format.', | ||||||
| variant: 'destructive', | ||||||
| }); | ||||||
| return; | ||||||
| } | ||||||
|
|
||||||
| setIsCrawling(true); | ||||||
|
|
||||||
| try { | ||||||
| const crawler = CrawlerService.getInstance(); | ||||||
|
|
||||||
| const response = await crawler.crawlUrl(trimmedUrlInput); | ||||||
|
|
||||||
| const responseData = response.data; | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider validating the structure of the crawled response (e.g., ensuring |
||||||
| const html = responseData[0]?.html || ''; | ||||||
| const markdown = responseData[0]?.markdown || ''; | ||||||
|
|
||||||
| setCrawledValue(`---MARKDOWN---\n${markdown}\n\n---HTML---\n${html}`); | ||||||
|
SoloDevAbu marked this conversation as resolved.
Outdated
|
||||||
|
|
||||||
| toast({ | ||||||
| title: 'URL Crawled', | ||||||
| description: `Data for ${trimmedUrlInput} has been crawled successfully.`, | ||||||
| }); | ||||||
|
SoloDevAbu marked this conversation as resolved.
Outdated
|
||||||
|
|
||||||
| setUrlInput(''); | ||||||
| } catch (error) { | ||||||
| console.error('Failed to crawl URL:', error); | ||||||
| toast({ | ||||||
| title: 'Failed to Crawl URL', | ||||||
| description: error instanceof Error ? error.message : 'An unknown error occurred', | ||||||
| variant: 'destructive', | ||||||
| }); | ||||||
| } finally { | ||||||
|
SoloDevAbu marked this conversation as resolved.
Outdated
|
||||||
| setIsCrawling(false); | ||||||
| } | ||||||
| }; | ||||||
|
|
||||||
| return ( | ||||||
| <MotionConfig transition={{ duration: 0.5, type: 'spring', bounce: 0 }}> | ||||||
| <div className="flex flex-col gap-4 mb-12"> | ||||||
|
|
@@ -382,6 +447,69 @@ export const PromptingCard = () => { | |||||
| </CardContent> | ||||||
| </motion.div> | ||||||
| </MotionCard> | ||||||
| <MotionCard | ||||||
| initial={{ opacity: 0, y: 20 }} | ||||||
| animate={{ opacity: 1, y: 0 }} | ||||||
| exit={{ opacity: 0, y: 20 }} | ||||||
| className="w-[600px] backdrop-blur-md bg-background/30 overflow-hidden" | ||||||
| > | ||||||
| <CardHeader> | ||||||
| <motion.h2 | ||||||
| initial={{ opacity: 0, y: 20 }} | ||||||
| animate={{ opacity: 1, y: 0 }} | ||||||
| className="text-2xl text-foreground-primary" | ||||||
| > | ||||||
| {t('projects.prompt.crawl.title')} | ||||||
| </motion.h2> | ||||||
| <motion.p | ||||||
| initial={{ opacity: 0, y: 20 }} | ||||||
| animate={{ opacity: 1, y: 0 }} | ||||||
| transition={{ delay: 0.1 }} | ||||||
| className="text-sm text-foreground-secondary" | ||||||
| > | ||||||
| {t('projects.prompt.crawl.description')} | ||||||
| </motion.p> | ||||||
| </CardHeader> | ||||||
| <CardContent className="flex flex-col gap-4"> | ||||||
| <div className="flex flex-col gap-2"> | ||||||
| <div className="flex flex-row gap-2"> | ||||||
| <input | ||||||
|
SoloDevAbu marked this conversation as resolved.
Outdated
|
||||||
| type="url" | ||||||
| value={urlInput} | ||||||
| onChange={(e) => setUrlInput(e.target.value)} | ||||||
| aria-label={t('projects.prompt.crawl.input.ariaLabel')} | ||||||
| placeholder={t('projects.prompt.crawl.input.placeholder')} | ||||||
| className={cn( | ||||||
| 'flex-1 h-9 px-3 rounded-md', | ||||||
| 'bg-background-secondary/80 backdrop-blur-sm', | ||||||
| 'border border-border', | ||||||
| 'text-sm text-foreground-primary', | ||||||
| 'placeholder:text-foreground-secondary', | ||||||
| 'focus:outline-none focus:ring-2 focus:ring-ring', | ||||||
| )} | ||||||
| /> | ||||||
| <Button | ||||||
| variant="secondary" | ||||||
| className="gap-2" | ||||||
| disabled={!urlInput.trim() || isCrawling} | ||||||
| onClick={handleCrawlSubmit} | ||||||
| > | ||||||
| {isCrawling ? ( | ||||||
| <> | ||||||
| <Icons.Circle className="w-4 h-4 animate-spin" />{' '} | ||||||
| {t('projects.prompt.crawl.input.crawling')} | ||||||
| </> | ||||||
| ) : ( | ||||||
| <> | ||||||
| <Icons.ArrowRight className="w-4 h-4" />{' '} | ||||||
| {t('projects.prompt.crawl.input.submit')} | ||||||
| </> | ||||||
| )} | ||||||
| </Button> | ||||||
| </div> | ||||||
| </div> | ||||||
| </CardContent> | ||||||
| </MotionCard> | ||||||
| <Button | ||||||
| variant="outline" | ||||||
| className="w-fit mx-auto bg-background-secondary/90 text-sm border text-foreground-secondary" | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -73,6 +73,16 @@ | |
| "fileReference": "File Reference", | ||
| "submit": "Start building your site" | ||
| }, | ||
| "crawl": { | ||
| "title": "Duplicate a website", | ||
| "description": "Paste a link to a website that you want to duplicate", | ||
| "input": { | ||
| "placeholder": "Paste a link to a website", | ||
| "arialLabel": "URL input for web page crawling", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider renaming the key
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typo in translation key |
||
| "crawling": "Getting Data", | ||
| "submit": "Get Data" | ||
| } | ||
| }, | ||
| "blankStart": "Start from a blank page" | ||
| } | ||
| }, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider renaming the translation key
arialLabeltoariaLabelto better align with ARIA naming conventions.