diff --git a/apps/archivebox/config.json b/apps/archivebox/config.json new file mode 100755 index 00000000..eb18ee4d --- /dev/null +++ b/apps/archivebox/config.json @@ -0,0 +1,35 @@ +{ + "$schema": "../schema.json", + "name": "ArchiveBox", + "available": true, + "exposable": true, + "port": 8015, + "id": "archivebox", + "tipi_version": 1, + "version": "0.7.3", + "categories": ["media"], + "description": "ArchiveBox is a powerful, self-hosted internet archiving solution to collect, save, and view websites offline.", + "short_desc": "Open source self-hosted web archiving.", + "author": "archivebox", + "source": "https://github.com/ArchiveBox/ArchiveBox", + "website": "https://archivebox.io", + "form_fields": [ + { + "type": "text", + "label": "ArchiveBox Username", + "max": 50, + "min": 3, + "required": true, + "env_variable": "ARCHIVEBOX_USERNAME" + }, + { + "type": "password", + "label": "ArchiveBox Password", + "max": 50, + "min": 12, + "required": true, + "env_variable": "ARCHIVEBOX_PASSWORD" + } + ], + "supported_architectures": ["arm64", "amd64"] +} diff --git a/apps/archivebox/docker-compose.yml b/apps/archivebox/docker-compose.yml new file mode 100755 index 00000000..5b244d2b --- /dev/null +++ b/apps/archivebox/docker-compose.yml @@ -0,0 +1,54 @@ +services: + archivebox: + image: archivebox/archivebox:0.7.3 + restart: unless-stopped + container_name: archivebox + command: server --quick-init 0.0.0.0:8015 + environment: + - PORT=8015 + - PUBLIC_INDEX=${ARCHIVEBOX_PUBLIC_INDEX-true} # set to False to prevent anonymous users from viewing snapshot list + - PUBLIC_SNAPSHOTS=${ARCHIVEBOX_PUBLIC_SNAPSHOTS-true} # set to False to prevent anonymous users from viewing snapshot content + - PUBLIC_ADD_VIEW=${ARCHIVEBOX_PUBLIC_ADD_VIEW-false} # set to True to allow anonymous users to submit new URLs to archive + - ADMIN_USERNAME=${ARCHIVEBOX_USERNAME} # create an admin user on first run with the given user/pass combo + - ADMIN_PASSWORD=${ARCHIVEBOX_PASSWORD} + - PUID=1000 # set to your host user's UID & GID if you encounter permissions issues + - PGID=1000 + - SEARCH_BACKEND_ENGINE=${ARCHIVEBOX_SEARCH_BACKEND_ENGINE} # uncomment these and sonic container below for better full-text search + - SEARCH_BACKEND_HOST_NAME=${ARCHIVEBOX_SEARCH_BACKEND_HOST_NAME} + - SEARCH_BACKEND_PASSWORD=${ARCHIVEBOX_SEARCH_BACKEND_PASSWORD} + - MEDIA_MAX_SIZE=${ARCHIVEBOX_MEDIA_MAX_SIZE-750m} # increase this filesize limit to allow archiving larger audio/video files + - TIMEOUT=${ARCHIVEBOX_TIMEOUT-60} # increase this number to 120+ seconds if you see many slow downloads timing out + - CHECK_SSL_VALIDITY=${ARCHIVEBOX_CHECK_SSL_VALIDITY-true} # set to False to disable strict SSL checking (allows saving URLs w/ broken certs) + - SAVE_ARCHIVE_DOT_ORG=${ARCHIVEBOX_SAVE_ARCHIVE_DOT_ORGE-true} # set to False to disable submitting all URLs to Archive.org when archiving + networks: + - tipi_main_network + ports: + - "${APP_PORT}:8015" + volumes: + - "${APP_DATA_DIR}/data:/data" + - "${APP_DATA_DIR}/crontabs:/var/spool/cron/crontabs" + labels: + # Main + traefik.enable: true + traefik.http.middlewares.archivebox-web-redirect.redirectscheme.scheme: https + traefik.http.services.archivebox.loadbalancer.server.port: 8015 + # Web + traefik.http.routers.archivebox-insecure.rule: Host(`${APP_DOMAIN}`) + traefik.http.routers.archivebox-insecure.entrypoints: web + traefik.http.routers.archivebox-insecure.service: archivebox + traefik.http.routers.archivebox-insecure.middlewares: archivebox-web-redirect + # Websecure + traefik.http.routers.archivebox.rule: Host(`${APP_DOMAIN}`) + traefik.http.routers.archivebox.entrypoints: websecure + traefik.http.routers.archivebox.service: archivebox + traefik.http.routers.archivebox.tls.certresolver: myresolver + # Local domain + traefik.http.routers.archivebox-local-insecure.rule: Host(`archivebox.${LOCAL_DOMAIN}`) + traefik.http.routers.archivebox-local-insecure.entrypoints: web + traefik.http.routers.archivebox-local-insecure.service: archivebox + traefik.http.routers.archivebox-local-insecure.middlewares: archivebox-web-redirect + # Local domain secure + traefik.http.routers.archivebox-local.rule: Host(`archivebox.${LOCAL_DOMAIN}`) + traefik.http.routers.archivebox-local.entrypoints: websecure + traefik.http.routers.archivebox-local.service: archivebox + traefik.http.routers.archivebox-local.tls: true diff --git a/apps/archivebox/metadata/description.md b/apps/archivebox/metadata/description.md new file mode 100755 index 00000000..981d321a --- /dev/null +++ b/apps/archivebox/metadata/description.md @@ -0,0 +1,24 @@ +# ArchiveBox + +ArchiveBox is a powerful, self-hosted internet archiving solution to collect, save, and view websites offline. + +--- + +![](https://github.com/ArchiveBox/ArchiveBox/assets/511499/90f1ce3c-75bb-401d-88ed-6297694b76ae?raw=true) + +--- + +Without active preservation effort, everything on the internet eventually disappears or degrades. Archive.org does a great job as a centralized service, but saved URLs have to be public, and they can't save every type of content. + +ArchiveBox is an open source tool that lets organizations & individuals archive both public & private web content while retaining control over their data. It can be used to save copies of bookmarks, preserve evidence for legal cases, backup photos from FB/Insta/Flickr or media from YT/Soundcloud/etc., save research papers, and more... + +📥 **You can feed ArchiveBox URLs one at a time, or schedule regular imports** from your bookmarks or history, social media feeds or RSS, link-saving services like Pocket/Pinboard, our [Browser Extension](https://chromewebstore.google.com/detail/archivebox-exporter/habonpimjphpdnmcfkaockjnffodikoj), and more. + +**It saves snapshots of the URLs you feed it in several redundant formats.** +It also detects any content featured *inside* pages & extracts it out into a folder: + +- 🌐 **HTML**/**Any websites** ➡️ `original HTML+CSS+JS`, `singlefile HTML`, `screenshot PNG`, `PDF`, `WARC`, `title`, `article text`, `favicon`, `headers`, ... +- 🎥 **Social Media**/**News** ➡️ `post content TXT`, `comments`, `title`, `author`, `images`, ... +- 🎬 **YouTube**/**SoundCloud**/etc. ➡️ `MP3/MP4`s, `subtitles`, `metadata`, `thumbnail`, ... +- 💾 **Github**/**Gitlab**/etc. links ➡️ `clone of GIT source code`, `README`, `images`, ... +- ✨ *and more ... diff --git a/apps/archivebox/metadata/logo.jpg b/apps/archivebox/metadata/logo.jpg new file mode 100755 index 00000000..56e38b7f Binary files /dev/null and b/apps/archivebox/metadata/logo.jpg differ