Commit 1c0a4b
2024-11-01 08:33:58 Qwas: Save archivebox 使用| /dev/null .. archivebox \344\275\277\347\224\250.md | |
| @@ 0,0 1,77 @@ | |
| + | # archivebox 使用 |
| + | |
| + | (未完待续) |
| + | |
| + | ## 安装 archivebox |
| + | |
| + | ```sh |
| + | # create a folder to store your data (can be anywhere) |
| + | mkdir -p ~/archivebox/data && cd ~/archivebox |
| + | |
| + | # download the compose file into the directory |
| + | # curl -fsSL 'https://docker-compose.archivebox.io' > docker-compose.yml |
| + | curl --proxy http://127.0.0.1:7890 -fsSL 'https://docker-compose.archivebox.io' > docker-compose.yml |
| + | |
| + | # (shortcut for getting https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/stable/docker-compose.yml) |
| + | |
| + | # initialize your collection and create an admin user for the Web UI (or set ADMIN_USERNAME/ADMIN_PASSWORD env vars) |
| + | docker compose run archivebox init |
| + | docker compose run archivebox manage createsuperuser |
| + | ``` |
| + | |
| + | ## sonic 全文检索 |
| + | |
| + | ```sh |
| + | # download the sonic config file into your data folder (e.g. ~/archivebox) |
| + | # curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/dev/etc/sonic.cfg' > sonic.cfg |
| + | curl --proxy http://127.0.0.1:7890 -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/dev/etc/sonic.cfg' > sonic.cfg |
| + | |
| + | # then uncomment the sonic-related sections in docker-compose.yml |
| + | vi docker-compose.yml |
| + | |
| + | # to backfill any existing archive data into the search index, run: |
| + | docker compose run archivebox update --index-only |
| + | ``` |
| + | |
| + | ```sh |
| + | docker compose up -d |
| + | ``` |
| + | |
| + | 此时 访问 ip:8000 可以浏览页面了 |
| + | |
| + | |
| + | |
| + | ## 安装 chrome |
| + | |
| + | 抓取需要登录的内容,通过 cookie 设置 |
| + | |
| + | ```sh |
| + | sudo apt update |
| + | sudo apt install chromium-browser |
| + | # or on some systems: |
| + | sudo apt install chromium |
| + | ``` |
| + | |
| + | 修改docker-compose.yml |
| + | |
| + | ```yml |
| + | services: |
| + | archivebox: |
| + | ... |
| + | volumes: |
| + | ... |
| + | - ./data/personas/Default:/data/personas/Default |
| + | environment: |
| + | - CHROME_USER_DATA_DIR=/data/personas/Default/chrome_profile |
| + | - DISPLAY=novnc:0.0 |
| + | novnc: |
| + | image: theasp/novnc:latest |
| + | environment: |
| + | - DISPLAY_WIDTH=1920 |
| + | - DISPLAY_HEIGHT=1080 |
| + | - RUN_XTERM=no |
| + | ports: |
| + | - "8080:8080" |
| + | ``` |
| + | |
| + | 添加 CHROME_USER_DATA_DIR和 DISPLAY |