From c407343e4644d7c0553a6bb11947f3a79d324d1a Mon Sep 17 00:00:00 2001 From: Neo <151435968+nrvo@users.noreply.github.com> Date: Sun, 4 Feb 2024 13:40:05 +0100 Subject: [PATCH] [App] ArchiveBox (#2393) * [App] ArchiveBox * Update apps/archivebox/metadata/description.md Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> * feat(docker=compose): add more env vars for app and their default value --------- Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Co-authored-by: JigSawFr --- apps/archivebox/config.json | 35 +++++++++++++++ apps/archivebox/docker-compose.yml | 54 ++++++++++++++++++++++++ apps/archivebox/metadata/description.md | 24 +++++++++++ apps/archivebox/metadata/logo.jpg | Bin 0 -> 17154 bytes 4 files changed, 113 insertions(+) create mode 100755 apps/archivebox/config.json create mode 100755 apps/archivebox/docker-compose.yml create mode 100755 apps/archivebox/metadata/description.md create mode 100755 apps/archivebox/metadata/logo.jpg diff --git a/apps/archivebox/config.json b/apps/archivebox/config.json new file mode 100755 index 00000000..eb18ee4d --- /dev/null +++ b/apps/archivebox/config.json @@ -0,0 +1,35 @@ +{ + "$schema": "../schema.json", + "name": "ArchiveBox", + "available": true, + "exposable": true, + "port": 8015, + "id": "archivebox", + "tipi_version": 1, + "version": "0.7.3", + "categories": ["media"], + "description": "ArchiveBox is a powerful, self-hosted internet archiving solution to collect, save, and view websites offline.", + "short_desc": "Open source self-hosted web archiving.", + "author": "archivebox", + "source": "https://github.com/ArchiveBox/ArchiveBox", + "website": "https://archivebox.io", + "form_fields": [ + { + "type": "text", + "label": "ArchiveBox Username", + "max": 50, + "min": 3, + "required": true, + "env_variable": "ARCHIVEBOX_USERNAME" + }, + { + "type": "password", + "label": "ArchiveBox Password", + "max": 50, + "min": 12, + "required": true, + "env_variable": "ARCHIVEBOX_PASSWORD" + } + ], + "supported_architectures": ["arm64", "amd64"] +} diff --git a/apps/archivebox/docker-compose.yml b/apps/archivebox/docker-compose.yml new file mode 100755 index 00000000..5b244d2b --- /dev/null +++ b/apps/archivebox/docker-compose.yml @@ -0,0 +1,54 @@ +services: + archivebox: + image: archivebox/archivebox:0.7.3 + restart: unless-stopped + container_name: archivebox + command: server --quick-init 0.0.0.0:8015 + environment: + - PORT=8015 + - PUBLIC_INDEX=${ARCHIVEBOX_PUBLIC_INDEX-true} # set to False to prevent anonymous users from viewing snapshot list + - PUBLIC_SNAPSHOTS=${ARCHIVEBOX_PUBLIC_SNAPSHOTS-true} # set to False to prevent anonymous users from viewing snapshot content + - PUBLIC_ADD_VIEW=${ARCHIVEBOX_PUBLIC_ADD_VIEW-false} # set to True to allow anonymous users to submit new URLs to archive + - ADMIN_USERNAME=${ARCHIVEBOX_USERNAME} # create an admin user on first run with the given user/pass combo + - ADMIN_PASSWORD=${ARCHIVEBOX_PASSWORD} + - PUID=1000 # set to your host user's UID & GID if you encounter permissions issues + - PGID=1000 + - SEARCH_BACKEND_ENGINE=${ARCHIVEBOX_SEARCH_BACKEND_ENGINE} # uncomment these and sonic container below for better full-text search + - SEARCH_BACKEND_HOST_NAME=${ARCHIVEBOX_SEARCH_BACKEND_HOST_NAME} + - SEARCH_BACKEND_PASSWORD=${ARCHIVEBOX_SEARCH_BACKEND_PASSWORD} + - MEDIA_MAX_SIZE=${ARCHIVEBOX_MEDIA_MAX_SIZE-750m} # increase this filesize limit to allow archiving larger audio/video files + - TIMEOUT=${ARCHIVEBOX_TIMEOUT-60} # increase this number to 120+ seconds if you see many slow downloads timing out + - CHECK_SSL_VALIDITY=${ARCHIVEBOX_CHECK_SSL_VALIDITY-true} # set to False to disable strict SSL checking (allows saving URLs w/ broken certs) + - SAVE_ARCHIVE_DOT_ORG=${ARCHIVEBOX_SAVE_ARCHIVE_DOT_ORGE-true} # set to False to disable submitting all URLs to Archive.org when archiving + networks: + - tipi_main_network + ports: + - "${APP_PORT}:8015" + volumes: + - "${APP_DATA_DIR}/data:/data" + - "${APP_DATA_DIR}/crontabs:/var/spool/cron/crontabs" + labels: + # Main + traefik.enable: true + traefik.http.middlewares.archivebox-web-redirect.redirectscheme.scheme: https + traefik.http.services.archivebox.loadbalancer.server.port: 8015 + # Web + traefik.http.routers.archivebox-insecure.rule: Host(`${APP_DOMAIN}`) + traefik.http.routers.archivebox-insecure.entrypoints: web + traefik.http.routers.archivebox-insecure.service: archivebox + traefik.http.routers.archivebox-insecure.middlewares: archivebox-web-redirect + # Websecure + traefik.http.routers.archivebox.rule: Host(`${APP_DOMAIN}`) + traefik.http.routers.archivebox.entrypoints: websecure + traefik.http.routers.archivebox.service: archivebox + traefik.http.routers.archivebox.tls.certresolver: myresolver + # Local domain + traefik.http.routers.archivebox-local-insecure.rule: Host(`archivebox.${LOCAL_DOMAIN}`) + traefik.http.routers.archivebox-local-insecure.entrypoints: web + traefik.http.routers.archivebox-local-insecure.service: archivebox + traefik.http.routers.archivebox-local-insecure.middlewares: archivebox-web-redirect + # Local domain secure + traefik.http.routers.archivebox-local.rule: Host(`archivebox.${LOCAL_DOMAIN}`) + traefik.http.routers.archivebox-local.entrypoints: websecure + traefik.http.routers.archivebox-local.service: archivebox + traefik.http.routers.archivebox-local.tls: true diff --git a/apps/archivebox/metadata/description.md b/apps/archivebox/metadata/description.md new file mode 100755 index 00000000..981d321a --- /dev/null +++ b/apps/archivebox/metadata/description.md @@ -0,0 +1,24 @@ +# ArchiveBox + +ArchiveBox is a powerful, self-hosted internet archiving solution to collect, save, and view websites offline. + +--- + +![](https://github.com/ArchiveBox/ArchiveBox/assets/511499/90f1ce3c-75bb-401d-88ed-6297694b76ae?raw=true) + +--- + +Without active preservation effort, everything on the internet eventually disappears or degrades. Archive.org does a great job as a centralized service, but saved URLs have to be public, and they can't save every type of content. + +ArchiveBox is an open source tool that lets organizations & individuals archive both public & private web content while retaining control over their data. It can be used to save copies of bookmarks, preserve evidence for legal cases, backup photos from FB/Insta/Flickr or media from YT/Soundcloud/etc., save research papers, and more... + +📥 **You can feed ArchiveBox URLs one at a time, or schedule regular imports** from your bookmarks or history, social media feeds or RSS, link-saving services like Pocket/Pinboard, our [Browser Extension](https://chromewebstore.google.com/detail/archivebox-exporter/habonpimjphpdnmcfkaockjnffodikoj), and more. + +**It saves snapshots of the URLs you feed it in several redundant formats.** +It also detects any content featured *inside* pages & extracts it out into a folder: + +- 🌐 **HTML**/**Any websites** ➡️ `original HTML+CSS+JS`, `singlefile HTML`, `screenshot PNG`, `PDF`, `WARC`, `title`, `article text`, `favicon`, `headers`, ... +- 🎥 **Social Media**/**News** ➡️ `post content TXT`, `comments`, `title`, `author`, `images`, ... +- 🎬 **YouTube**/**SoundCloud**/etc. ➡️ `MP3/MP4`s, `subtitles`, `metadata`, `thumbnail`, ... +- 💾 **Github**/**Gitlab**/etc. links ➡️ `clone of GIT source code`, `README`, `images`, ... +- ✨ *and more ... diff --git a/apps/archivebox/metadata/logo.jpg b/apps/archivebox/metadata/logo.jpg new file mode 100755 index 0000000000000000000000000000000000000000..56e38b7fa92019d432cf2768b0ef0d358e7d19e2 GIT binary patch literal 17154 zcmeHN3p`ZY{$FD-Ats?h$DH0ul8Bym(xg+TAmzK(2GDX0=)?IBG8LKF9N*?^diuUKraHl2=pS*i@-ldfUX2P0Hfnj zVn7`Xx%fYzBq+`j)Q%;;bm3}@W^Fr3I7cb@JYAdS@F<0$x z>&RNUPG{(ZZGCiS#vDz{Q5$BlK&w*ONaeN^b9K6C}> z%VfX~V`>8g5CMr1CdEkcjKm0I@EU8gc$AuWarX~2wa7c3A|+DZ2!&E{DYGY?i- z#21&N8DV5!9~uM?p+T*G6RGQGj-JskBvl{K;MxnyqclufNG25#ccS0d*-(Z5&7oo{ z6}k+0myZx@uDT^gc|74a*sP<0Z6+$7j2>*k2&;i_2BR?+N4AhMkV_$+N8|YNW;P8BkJBK}CAdV;VsRHXy;31wvyX1O zU@Z-nosX7jrqUo}1m$s`FD?^gc#wTd(T-PJQw9yv-HP}4cD_d#UzdDG9wxhc2rc&E z6Z&S&Hh-r<=p{0d26gW7of6i4R?_fNX8dv`b>4&1V#uR`>O7+87*c%}^TcHOP&8~j z4J4O7D`>wOG5HttbU6g@Aq{f&(!ipgje83#%&1T=?A;v*SoH57ulgoNK`5-=!>544 zuB!@3T??R2NeBP!Dt)g|DS(*x>Ww}P?50(PwVZ3Kb7tkRVp;yQxw9exb%(Fi&Xn0vM+7IITyY6+BUL}*l9CSVy!3IoR zMuUt1vTt)|hX<8$2jAg$0=U1^{!)mEGr=r$Ov|Jys@kLbJPF+VQ&RKY!q0;5#&}H4aD|=P!FdD@|41)MH zSP91oY1B&U8b4R3{HaNOgH}tRCpq!#C=HUHofD=bnvz1YP>r&t!LcLGTwR_yQK9R| z1^?=~MNdBbVzhL>ctr^AKPk2C5~hT@%`TU7aog;PeP|FPtXf@vG>tSDZKA=BI}T(J z4JyBP3wdB0 zk}+zBq@vy7xn?j~Cdz8u?JE^lSma9B?e9#`s__@8{qj<0ODiO0TEA#w_>UuK;D8v< zGER(mLMFf~=z7xK#!?L13fr$zYs?}!X{tLV-UVexrj=Z=C3pJ;&x-ALca2Bnv1|GZ zPOHBfyvM(uzs7Mv&5=D4zpI>U?kxih$EVy?ew(!XfMejZ2s9ilV)ahAk+PY9L$ zV)-#tblAm$(>!4*bl@jVS|cK&U3F?{P}5&rM&Kwu7`sEPmlQRc)L_g&i-nT?&$2zH zZE`;U#PS=%(3OMksRjz6xrt)=;@N0L0HuD1GIl60TU4A;h0h|k1(4^boqOAyQ+fV& z#JA)#zX;hr=?)s~HLSUV7LR*HgBcU$S;pWnZ5mALT!NzOv@%Pqx zI^M3ntv~bKlGf}?0s$&fwI+1is8E3tY}3Dp2183|(9adkaX{<5#AXz01r3@^(G$*k zG3~o37eSBf>oR(4cg3(5lh5r6VloC3FGBF)=o>>CD91~pLxZ&CvtBPBm+W7Yy6o&5 z!`X4)#4J!%w`-3n4B1z}5NhJC^8R>yjqlbCRYs4!A8kxqZG7+Cl{L;*Y2VGM#W(X> zU1`9Aqu^#9F^~r5j1=yBFE$8SRn%@B-IoSsYG{l1qn{JwACSZHv<6>$HF^IEE1SW^ zk=7&JA6+SNo_0`jIE&B;yS|SGXC&_NY`GXtsp%(EFhXZ{eu>Q^iCc*9^pYk4so@iR zvcyR~NbsvrZIahiIc!c5Z$_aWg2z>P92LTimG8WHS?tmWr?q^3hUvUw1B ze@6SdqQ9xj4I;+W>SoxNKfkEO8l506-(oYzMf$KI@o7V;%$BRW*yvgp->&fW47x}B zj9G|yIvWRY(yUB^& z6x){u`@(QD8vHQbjt2H4g2xBn6g2r7!D#Dje>ho{ojI{4__ipTi1LR0nL>l)YqBvB z#%GgcL0#v8PoL=?F-iq+-J{Y|-~07~Gwo}*z}s;}88I>#(vyTU0+Mo&BvCpDq|?PqiYOY4!i!n+xPE-` zK^&pBUqIX1k%DOaTLO{>TPH+L2x@11WvX`<&Git`K&D;}jYZ&Tv)&^x1i?N<{_=QeA1v*j`U;- zd0>sWiR~$Ss8~ZO4epD!=s{B8NZNEq2vEA)49bTT=$~W~*v(Q(Ya@9gcp4r{sb|(y zF|XG+&n@~^oNJ%rB|Q+kAkX0I;jdPoGmOWId9v$eIPAHc8%WbZ&MrErsHat9o^r*F}0v}%(b_5_H!{BXywviJ2Z?E6I=jU<69cw zr=Wkw%HL6Q%(i2VD$tA9?GXNuz!o==Nj92j4i{NvUpkabv8*x()X$6=l4fwF%!)$y zN6M{{sp~EH=4fss4rgY&jhS%XZHAQGpS+9aysK&&h-=bdMXP|!5{g{5PM=B*^eFI^ z8D;6Is$@WWq2AOR6WrT0`(oy5PR~GMD528iuI{M@G!P zkTqfUwWq*$qKO>74%Gh%`DKupY|fh+Q0Tt7d66nMQ*(TNX;=$h|A? zXGRXoFCLzi#;-CxbfInTcQg8Gy%;vAa4w(XI2Ez@KD|EBrebyEGIb^P3&==?Um*h@$Z z*p7#6PN=i+smE4CrnR40M1$~eFi|?c#WXMgI)6u0V#y%xmhekQ5i{q*)?pI=sCOQQ zPuWd_T}jjs7gX0icSra~Tl0>k5wn*P4{*jQ4-kZRyGE6H6sIDjE!7p`Xtz~Dns+ssc&QvRAb!-hoSgn5-$*ulm_jS(Csvk zG9Mr<>v^@TMwpNsDXA97s?Dg#O(cxP{?}~4YC}Of4r8QYaWrVHr$PN%n+UjJ-~=U5 zH%{F8k4~hgOdnI&IHj=h&J>Av7aPaaFi&m4Hp+z4s&NrABF*4N2y_g;fwH1O_Hnds z0&n7$5~8#=jM%dlYlXa`-d@Hm;p%O}!@rQa>6)tV`plr#3&fjI;t-O9${DXYvdJcr zFex;reSzXaa&0UPW@iT%eTlzt%B|n?7YE2+GBu0JuV}-SgX^fQ1v0D?oLNw&2dG2W z1(HvnIdsoh{tX)hCfBqjFne>txk!;%a_x$mdazA2p6)V6fon$%r!F-*e24ITdV~o> z+2|G)>{h1ay%k-T7-b*Fd3j~eMm^%+K40qd;?m)?!)dw6ztlcf2Nj7}IL8Qf#A_Oy zFAKfmlNuP1Cs51GUXYvn)_e2j>le2TsG0Qm)D7JWGk%!Bs9zCF%zhzWNQD=`KHRpE z2E<~3`fuQg;R3Pt#UNc8B$}T^Ma~!v^z8rNS9RGZhAFsNi$alx9RfwRFW^gcs9T&SUzb5JX#WX;xmrlH-4#T8Q1p9iw(E&C_bl~tGkDq_ zLQet-!h2(O=whmv1~s)WG3Y~$-4o?7q5QiQmK6`%?3?*fNYnUSblWE$AW-1}7Ehm< zdwcOY!Mq2F%g(%Y^-PW|SX@$|9#dzpw>|<;K|y}-Z8>NTCn%r>rbY+31Wk*(w`J~6 z<3>b1k_A??hYj#Kd0BlzAQNoVgKj+^rpxX5GVOPm*cA>#J$}hUT?SF}`4{%Y%v6|A z_lMJvksLN*0r~M_@+-#~oO@dR;h6jsTM88HPhv(sjeKLx->CBbhitY31^@zOmr!Ej+Ag(8(;vms|C7YXZx1AZe|Qd57S1s zZ6@_421{minlm(3IOYD%SovXy0WTnzqhk!yeQsqGtPsKGb4qQyk&M;-4uB6q3$y5|>8z1BuVzk>UGh_yi8?N< z6yg(1NVK4;Eu)B3olAI0&W8BautSZ;o-ZvAeqgza2;Y_W?b{{O!>3*tbZ8T^o_~Oo zj=X{4o=SgmdnFG^56u+fV;ipWE2*2S91g!&H7+3k>4Z(ZiEe4ct6Dep{p)^l@KAlD zZ#pmzsZ56}Ge)yvjM#$a_?NyTmPx*smD|G|L1W+pS^ z=dPY{YUeyb)7sdef|MoL9{m7q2_K@+DStA7I}PM(NGZ{K8n&<2bC7&^E;JJVdIkwDUFSr zg4`t>iV5|C-AeB4Y{!y^*TL^~D2+{l9Q`0@LtBd#O0R`2ZAC@DvQi3={-|gy#B(?M zbt@V}7Rm>d@M}*aB1|A&v9Ppu;4V}=z>Kn3z{bt8h=HT8zJrFNBam-^w!O!37ih29B?ff%KcSQSw7DuR9g9zvNfghjEG2E@~^rZA%DegQ1wMl2m`KiBuI$D07c zs0uDAu*H1p+NLlnYA&&pJOVh(X``cMs!nj+4aKG)-blqhN?lF9*}DP>M^|Uh<4cmI zyLBp;&I?$zC~xqw0gmYt2MyQCl*Nc^^GromVKTFw&*f69wYhm`A8|o+X{N6&Co6FD zw)};KKc0GU{J~HCA36yMW(JHfk5Q5Th%_k5k?!}-d~g>^OWortR{fggIymOet*a7qQY-6> z(|GUA7n6n^FCT;sTh?@YY*R*aT7cgH_sPTKq9T-&^wObT+q+w?Wp}?-=7Y|E^e3}t k%i4du7(H4f{^P~y5gGgA#pqEK{l|;ZqeZhnUX1ep0TiZ?$p8QV literal 0 HcmV?d00001