diff --git a/projects.yaml b/projects.yaml index d0c7074..27f961c 100644 --- a/projects.yaml +++ b/projects.yaml @@ -26,6 +26,10 @@ projects: reason: "Official dependency management tool from Golang. To quote the first line of the readme, 'dep is safe for production use.'" - name: Apache Kafka gh_url: https://github.com/apache/kafka + tag_regex_subs: + - remove: ^kafka- + - search: -incubating-candidate-(\d+)$ + replace: rc\1 - name: Minikube gh_url: https://github.com/kubernetes/minikube reason: Official kubernetes project with a logo, but no major release. @@ -36,10 +40,22 @@ projects: - name: Tor gh_url: https://github.com/torproject/tor url: https://blog.torproject.org/ + tag_regex_subs: + - remove: ^tor- + - remove: ^debian-version- + - remove: -root$ + - search: "@(\\d+)$" + replace: +\1 + - search: -cvs-(\d+-\d+)$ + replace: +\1 + - remove: -pre\.\d+$ - name: Home Assistant gh_url: https://github.com/home-assistant/home-assistant - name: Vala gh_url: https://github.com/GNOME/vala + tag_regex_subs: + - search: ^VALA_(\d)+_(\d)+_(\d)+$ + replace: \1.\2.\3 - name: Onion gh_url: https://github.com/davidmoreno/onion - name: Nim @@ -47,19 +63,31 @@ projects: - name: Windows Terminal url: https://www.microsoft.com/en-ca/p/windows-terminal-preview/9n0dx20hk701 gh_url: https://github.com/microsoft/terminal + tag_regex_subs: + - remove: ^\d{4}\.\d{5}$ - name: Bitcoin gh_url: https://github.com/bitcoin/bitcoin + tag_regex_subs: + - remove: -final$ + - remove: _closest$ + - remove: _notexact$ + - remove: -guixtest1$ + - remove: test1$ - name: Caddy gh_url: https://github.com/caddyserver/caddy - name: Werkzeug gh_url: https://github.com/pallets/werkzeug reason: Depended on by Flask and many other web frameworks. + tag_regex_subs: + - remove: \.x$ - name: Cython gh_url: https://github.com/cython/cython - name: TOML gh_url: https://github.com/toml-lang/toml - name: Flask gh_url: https://github.com/pallets/flask + tag_regex_subs: + - remove: \.x$ - name: datadogpy gh_url: https://github.com/DataDog/datadogpy reason: Part of a paid product. @@ -76,12 +104,21 @@ projects: gh_url: https://github.com/jakubroztocil/httpie - name: scikit-learn gh_url: https://github.com/scikit-learn/scikit-learn + tag_regex_subs: + - remove: -branching$ - name: certbot gh_url: https://github.com/certbot/certbot + tag_regex_subs: + - search: -corrected$ + replace: "-1" - name: sshuttle gh_url: https://github.com/sshuttle/sshuttle + tag_regex_subs: + - remove: ^sshuttle- - name: Theano gh_url: https://github.com/Theano/Theano + tag_regex_subs: + - remove: ^rel- - name: Bokeh gh_url: https://github.com/bokeh/bokeh - name: Magic Wormhole @@ -98,22 +135,33 @@ projects: gh_url: https://github.com/gohugoio/hugo - name: drone gh_url: https://github.com/drone/drone + tag_regex_subs: + - search: -debug$ + replace: -dev + - search: -gitspaces-beta$ + replace: -beta - name: HashiCorp Terraform gh_url: https://github.com/hashicorp/terraform - name: HashiCorp Nomad gh_url: https://github.com/hashicorp/nomad + tag_regex_subs: + - remove: ^ent-changelog- - name: HashiCorp Vault gh_url: https://github.com/hashicorp/vault - emeritus: true - last_zv_release_version: v0.11.6 - first_release_date: 2015-04-29 - first_release_version: v0.1.0 + tag_regex_subs: + - search: -rebuild$ + replace: "-1" + - remove: ^ent-changelog- + - search: -rc(\d+)\.(\d+)$ + replace: -rc\1+\2 - name: xhyve gh_url: https://github.com/mist64/xhyve - name: zeal gh_url: https://github.com/zealdocs/zeal - name: html5lib-python gh_url: https://github.com/html5lib/html5lib-python + tag_regex_subs: + - remove: ish$ - name: MyPy gh_url: https://github.com/python/mypy - name: asn1crypto @@ -123,12 +171,17 @@ projects: url: https://cgit.freedesktop.org/gstreamer/orc gh_url: https://github.com/GStreamer/orc reason: Depended on by Ubuntu and other free desktop operating systems + tag_regex_subs: + - remove: ^orc- - name: Gephi gh_url: https://github.com/gephi/gephi - name: vim-airline gh_url: https://github.com/vim-airline/vim-airline - name: Julia gh_url: https://github.com/JuliaLang/julia + tag_regex_subs: + - search: -pre\.([a-z]+)$ + replace: -\1 - name: Flatpak gh_url: https://github.com/flatpak/flatpak - name: Meson Build System @@ -149,19 +202,37 @@ projects: - name: Pilosa gh_url: https://github.com/pilosa/pilosa url: https://www.pilosa.com/ + tag_regex_subs: + - search: -alpha\.(\d+)(-pre|\.)(\d+)$ + replace: -alpha\1+\3 - name: fail2ban gh_url: https://github.com/fail2ban/fail2ban + tag_regex_subs: + - remove: -smartos$ + - remove: -PROPAGATE_DEBIAN_PATCHES$ + - search: ^(\d+)_(\d+)_(\d+)$ + replace: \1.\2.\3 - name: qtile gh_url: https://github.com/qtile/qtile - name: autokey gh_url: https://github.com/autokey/autokey + tag_regex_subs: + - remove: ^ak- - name: ClamAV Antivirus gh_url: https://github.com/Cisco-Talos/clamav-devel + tag_regex_subs: + - search: CLAMAV_(\d+)_(\d+)(RC\d+)? + replace: \1.\2\3 + - remove: ^clamav- + - search: "@(\\d+)$" + replace: +\1 - name: OpenRCT2 url: https://openrct2.io/ gh_url: https://github.com/OpenRCT2/OpenRCT2 - name: bup gh_url: https://github.com/bup/bup + tag_regex_subs: + - remove: ^bup- - name: You-Get gh_url: https://github.com/soimort/you-get - name: Ramda @@ -182,17 +253,32 @@ projects: url: https://marketplace.visualstudio.com/items?itemName=ms-vscode.cpptools gh_url: https://github.com/microsoft/vscode-cpptools reason: Created by Microsoft and with almost 8 million installs, this is the standard extension you want if working with C or C++ in VS Code. + tag_regex_subs: + - search: -insiders$ + replace: rc1 + - search: -insiders(\d+)$ + replace: rc\1 + - search: -debug$ + replace: dev + - search: -prerelease(\d+)$ + replace: pre\1 + - remove: ^v\. - name: pywinauto url: http://pywinauto.github.io/ gh_url: https://github.com/pywinauto/pywinauto - name: React url: https://reactjs.org gh_url: https://github.com/facebook/react - first_release_date: 2013-05-29T19:46:11Z - first_release_version: 0.3.0 # A later v0.0.0 tag breaks this + tag_regex_subs: + - search: -alpha\.(.*)$ + replace: -alpha+\1 - name: Rake gh_url: https://github.com/ruby/rake reason: The pioneer of the zero-to-double-digits jump. + tag_regex_subs: + - remove: ^rake- + - remove: ^drake- + - remove: ^comp_tree- - name: Chocolatey url: https://chocolatey.org/ gh_url: https://github.com/chocolatey/choco @@ -207,10 +293,17 @@ projects: - name: Nuitka url: https://nuitka.net/ gh_url: https://github.com/Nuitka/Nuitka + tag_regex_subs: + - search: ^(\d+\.\d+\.\d+)([a-z])$ + replace: \1+\2 - name: StreamEx gh_url: https://github.com/amaembo/streamex + tag_regex_subs: + - remove: ^streamex- - name: 3proxy gh_url: https://github.com/z3APA3A/3proxy + tag_regex_subs: + - remove: ^3proxy- - name: Flow gh_url: https://github.com/facebook/flow - name: GoReleaser @@ -229,6 +322,9 @@ projects: gh_url: https://github.com/digitalbazaar/forge - name: Stellarium gh_url: https://github.com/Stellarium/stellarium + tag_regex_subs: + - search: ^stellarium-(\d+)-(\d+)-(\d+)$ + replace: \1.\2.\3 - name: xonsh gh_url: https://github.com/xonsh/xonsh - name: ccls @@ -238,6 +334,12 @@ projects: - name: Teeworlds url: https://teeworlds.com/ gh_url: https://github.com/teeworlds/teeworlds + tag_regex_subs: + - remove: -start$ + - remove: -release$ + - remove: -endofline$ + - search: -cp-r(\d+)$ + replace: rc\1 - name: Numba url: https://numba.pydata.org gh_url: https://github.com/numba/numba @@ -253,12 +355,19 @@ projects: gh_url: https://github.com/simonw/datasette - name: Tendermint gh_url: https://github.com/tendermint/tendermint/ + tag_regex_subs: + - search: -dev0-fix0$ + replace: dev + - search: ^dev-(.*) + replace: \1dev - name: Cosmos-sdk gh_url: https://github.com/cosmos/cosmos-sdk first_release_date: 2017-03-06 first_release_version: 0.2.0 # https://github.com/tendermint/basecoin/blob/master/CHANGELOG.md#020-march-6-2017 - latest_release_date: 2024-12-16 - latest_release_version: 0.50.11 + tag_regex_subs: + - remove: -circuit$ + - search: -patch$ + replace: "-1" - name: LocalStack url: https://localstack.cloud gh_url: https://github.com/localstack/localstack @@ -275,10 +384,15 @@ projects: gh_url: https://github.com/paperjs/paper.js - name: Knex.js gh_url: https://github.com/knex/knex + tag_regex_subs: + - search: ^(\d+),(\d+),(\d+)$ + replace: \1.\2.\3 - name: zoxide gh_url: https://github.com/ajeetdsouza/zoxide - name: OpenRC gh_url: https://github.com/OpenRC/openrc + tag_regex_subs: + - remove: ^openrc- - name: Notary gh_url: https://github.com/notaryproject/notary - name: GoodbyeDPI @@ -296,6 +410,10 @@ projects: gh_url: https://github.com/stylus/stylus - name: nw.js gh_url: https://github.com/nwjs/nw.js + tag_regex_subs: + - search: ^nw-v(\d+)\.(\d+),(\d+)$ + replace: \1.\2.\3 + - remove: ^nw- - name: Video Speed Controller gh_url: https://github.com/igrigorik/videospeed url: https://chrome.google.com/webstore/detail/video-speed-controller/nffaoalbilbmmfgbnbgppjihopabppdk @@ -328,6 +446,11 @@ projects: - name: graphile-worker url: https://www.graphile.org/ gh_url: https://github.com/graphile/worker + tag_regex_subs: + - search: -canary\.([a-z\d]+)$ + replace: +\1 + - search: -bridge\.0$ + replace: "-0" - name: Monero url: https://getmonero.org gh_url: https://github.com/monero-project/monero @@ -335,14 +458,24 @@ projects: url: https://multimc.org/ gh_url: https://github.com/MultiMC/Launcher reason: Prominent launcher for maintaining multiple instances of MineCraft. + tag_regex_subs: + - search: -final$ + replace: "-1" - name: Factor url: https://factorcode.org/ gh_url: https://github.com/factor/factor reason: A concatenative stack-based programming language. + tag_regex_subs: + - remove: ^import- + - remove: ^similar- + - search: (release|factor)-(\d+)-(\d+)$ + replace: \2.\3 - name: Tectonic url: https://tectonic-typesetting.github.io/ gh_url: https://github.com/tectonic-typesetting/tectonic reason: A TeX distributon that has been version 0 since 2016 + tag_regex_subs: + - remove: ^tectonic@ - name: Flipper url: https://www.flippercloud.io/docs gh_url: https://github.com/flippercloud/flipper @@ -353,6 +486,9 @@ projects: - name: Apache Druid url: https://druid.apache.org/ gh_url: https://github.com/apache/druid/ + tag_regex_subs: + - remove: ^druid- + - remove: -incubating - name: seaborn url: https://seaborn.pydata.org/ gh_url: https://github.com/mwaskom/seaborn @@ -394,6 +530,8 @@ projects: - name: Create url: https://createmod.net/ gh_url: https://github.com/Creators-of-Create/Create + tag_regex_subs: + - remove: ^mc\d+\.\d+/ - name: FastAPI gh_url: https://github.com/fastapi/fastapi - name: atlantis @@ -405,21 +543,26 @@ projects: - name: Nushell url: https://www.nushell.sh gh_url: https://github.com/nushell/nushell + tag_regex_subs: + - search: ^(\d+)_(\d+)_(\d+)$ + replace: \1.\2.\3 - name: Thanos url: https://thanos.io gh_url: https://github.com/thanos-io/thanos - first_release_date: 2018-05-18 - first_release_version: 0.1.0-rc.0 # https://github.com/thanos-io/thanos/releases/tag/v0.1.0-rc.0 - name: PyVista url: https://docs.pyvista.org/ gh_url: https://github.com/pyvista/pyvista - name: Apache Thrift gh_url: https://github.com/apache/thrift + tag_regex_subs: + - remove: ^thrift- - name: TypeORM url: http://typeorm.io/ gh_url: https://github.com/typeorm/typeorm - name: Sodium gh_url: https://github.com/CaffeineMC/sodium + tag_regex_subs: + - remove: ^mc\d+\.\d+(\.\d+)?- - name: The Clipboard project url: https://getclipboard.app/ gh_url: https://github.com/Slackadays/Clipboard @@ -427,11 +570,19 @@ projects: - name: Uncrustify url: http://uncrustify.sourceforge.net/ gh_url: https://github.com/uncrustify/uncrustify + tag_regex_subs: + - remove: ^uncrustify- - name: lazygit gh_url: https://github.com/jesseduffield/lazygit + tag_regex_subs: + - search: ^pre-(release-)?([\d\.]*)(-2)? + replace: \2pre\3 - name: OpenBLAS url: http://www.openblas.net/ gh_url: https://github.com/OpenMathLib/OpenBLAS + tag_regex_subs: + - search: alpha(\d+)\.(\d+)$ + replace: alpha\1+\2 - name: libc (Rust) url: https://docs.rs/libc gh_url: https://github.com/rust-lang/libc @@ -439,6 +590,9 @@ projects: - name: Metabase url: https://www.metabase.com/ gh_url: https://github.com/metabase/metabase + tag_regex_subs: # Metabase has really wierd mixed versioning + - remove: ^v2015060\d-alpha + - remove: v1.[345]\d\.\d+(\.\d+)?.* # Hide non-0vers, not sure why they use them... - name: esbuild url: https://esbuild.github.io gh_url: https://github.com/evanw/esbuild @@ -448,6 +602,104 @@ projects: reason: 7+ years of history, over 2M weekly downloads on npm - name: Unmanic gh_url: https://github.com/Unmanic/unmanic + - name: OpenSSL + gh_url: https://github.com/openssl/openssl + emeritus: true + tag_regex_subs: + - search: ^(OpenSSL_|SSLeay_|OpenSSL-fips-|OpenSSL-fips-)(\d+)_(\d+)_(\d+)([a-z]{1,2}) + replace: \2.\3.\4+\5 + - search: ^(OpenSSL_|SSLeay_|OpenSSL-fips-|OpenSSL-fips-)(\d+)_(\d+)_(\d+) + replace: \2.\3.\4 + - search: ^OpenSSL-fips-(\d+)_(\d+) + replace: \1.\2 + - remove: ^openssl- + - search: -auto-reformat$ + replace: "-1" + - search: -format$ + replace: "-2" + - search: -reformat$ + replace: "-3" + - name: MAME + gh_url: https://github.com/mamedev/mame + wp_url: https://en.wikipedia.org/wiki/MAME + first_release_date: 1997-02-05 + first_release_version: 0.1 + tag_regex_subs: + - search: ^mame(\d)(\d+)(u\d+)$ + replace: \1.\2+\3 + - search: ^mame(\d)(\d+)$ + replace: \1.\2 + - name: Window Maker + url: https://windowmaker.org/ + gh_url: https://github.com/window-maker/wmaker + wp_url: https://en.wikipedia.org/wiki/Window_Maker + first_release_date: 1997-01-01 # Exact date unknown + first_release_version: 0.0.3 + tag_regex_subs: + - remove: ^wmaker- + - remove: ^wm- + - remove: ^release- + - remove: -crm$ + - name: ReactOS + url: https://www.reactos.org/ + gh_url: https://github.com/reactos/reactos # Many tags are missing, use manual data below + reason: A free Windows-compatible Operating System + tag_regex_subs: + - remove: ^ReactOS- + - remove: -release$ + - name: three.js + url: https://threejs.org/ + gh_url: https://github.com/mrdoob/three.js + tag_regex_subs: + - remove: /ROME$ + - search: ^r(\d+)$ + replace: 0.\1.0 + - name: google-api-client (ruby) + url: https://rubygems.org/gems/google-api-client/ + gh_url: https://github.com/googleapis/google-api-ruby-client + # Their versioning is really hard to figure out... + tag_regex_subs: + - remove: ^google-api-client- + - remove: ^google-api-client/ + - remove: ^google-api-ruby-client- + - remove: ^google-api-ruby-client/ + - name: rand + url: https://rust-random.github.io/book/ + gh_url: https://github.com/rust-random/rand + reason: The most downloaded Rust crate + - name: suhosin + url: https://suhosin.org/ + gh_url: https://github.com/sektioneins/suhosin + first_release_date: 2006-09-16T00:00:00 + first_release_version: 0.9.1 + tag_regex_subs: + - remove: ^suhosin- + - name: Pure Data + wp_url: https://en.wikipedia.org/wiki/Pure_Data + gh_url: https://github.com/pure-data/pure-data + url: https://puredata.info/ + first_release_date: 1996-06-01 + first_release_version: 0.1 + tag_regex_subs: + - search: test(\d+)\.?([a-z\d])$ + replace: dev\1+\2 + - search: test(\d+)$ + replace: dev\1 + - name: cargo-audit + url: https://rustsec.org/ + gh_url: https://github.com/rustsec/rustsec + tag_regex_subs: + - remove: ^cargo-audit/ + - name: pg (Ruby) + url: https://rubygems.org/gems/pg + gh_url: https://github.com/ged/ruby-pg/tags + reason: Leading library for connecting Ruby to Postgres + first_release_date: 2008-01-26 + first_release_version: 0.7.9.2008.01.28 + - name: distlib + url: https://distlib.readthedocs.io + gh_url: https://github.com/pypa/distlib + reason: Depended on by PyPA/Pip - name: n8n url: https://n8n.io/ gh_url: https://github.com/n8n-io/n8n @@ -463,6 +715,7 @@ projects: latest_release_version: 0.9.8 - name: Dash url: https://git.kernel.org/pub/scm/utils/dash/dash.git + repo_url: https://git.kernel.org/pub/scm/utils/dash/dash.git first_release_date: 1997-06-19T09:29:16 first_release_version: 0.3.1 latest_release_date: 2022-12-11T06:33:43 @@ -477,33 +730,19 @@ projects: latest_release_date: 2024-04-23 - name: Inkscape url: https://inkscape.org/ - repo_url: https://launchpad.net/inkscape + repo_url: https://gitlab.com/inkscape/inkscape emeritus: true first_release_date: 2000-09-01 first_release_version: 0.16 # sodipodi cvs import according to changelog first_nonzv_release_date: 2020-05-01 last_zv_release_version: 0.92.5 + star_count: 3482 - name: Compiz url: https://www.compiz-fusion.org/ repo_url: https://launchpad.net/compiz first_release_date: 2006-05-22 latest_release_date: 2022-08-22 latest_release_version: 0.9.14.2 - - name: distlib - url: https://bitbucket.org/pypa/distlib - reason: Depended on by PyPA/Pip - first_release_version: 0.1.0 - first_release_date: 2013-03-02 - latest_release_version: 0.3.4 - latest_release_date: 2021-12-08 - - name: OpenSSL - gh_url: https://github.com/openssl/openssl - emeritus: true - first_release_date: 1998-12-23 - first_release_version: 0.9.1 # release name convention is weird, hardcode counts 2018-03-31 - last_zv_release_version: 0.9.8n - first_nonzv_release_date: 2010-03-29 - release_count_zv: 51 # technically only counts til 1.0 by date (2010). 0.9.8zh was the last 0ver and came out in 2015. - name: Factorio url: https://factorio.com/ reason: Popular, for-profit game. @@ -512,37 +751,21 @@ projects: first_release_version: 0.1.0 first_nonzv_release_date: 2020-08-14 last_zv_release_version: 0.18.47 - - name: pg (Ruby) - url: https://rubygems.org/gems/pg - reason: Leading library for connecting Ruby to Postgres - emeritus: true - first_release_version: 0.7.9.2008.01.28 - first_release_date: 2008-01-26 - last_zv_release_version: 0.21.0 - first_nonzv_release_date: 2017-06-13 - release_count_zv: 123 - name: PuTTY - url: https://www.chiark.greenend.org.uk/~sgtatham/putty/ + url: https://www.chiark.greenend.org.uk/~sgtatham/putty + repo_url: https://git.tartarus.org/?p=simon/putty.git reason: Probably the most popular SSH client in Windows history first_release_version: 0.43beta first_release_date: 1999-01-08 latest_release_version: 0.82 latest_release_date: 2024-11-27 - - name: MAME - gh_url: https://github.com/mamedev/mame # GitHub releases are mame0272 - wp_url: https://en.wikipedia.org/wiki/MAME - first_release_version: 0.1 - first_release_date: 1997-02-05 - latest_release_version: 0.272 - latest_release_date: 2024-11-29 - release_count: 304 - name: slrn # thanks hynek url: http://slrn.sourceforge.net/ wp_url: https://en.wikipedia.org/wiki/Slrn emeritus: true first_release_date: 1994-08-13 # https://sourceforge.net/p/slrn/mailman/message/6405527/ first_release_version: 0.1.0.0 - last_zv_release_version: 0.9.9p1 + last_zv_release_version: 0.9.9-1 first_nonzv_release_date: 2012-12-21 release_count_zv: 71 # changelog_text.count('\nChanges since 0') - name: Dwarf Fortress @@ -555,31 +778,13 @@ projects: release_count_zv: 142 # appx, based on df wiki release history - name: "Cataclysm: Dark Days Ahead" url: https://cataclysmdda.org - gh_url: https://github.com/CleverRaven/Cataclysm-DDA # GitHub is returning tags out of order + repo_url: https://github.com/CleverRaven/Cataclysm-DDA # There are wayyyy to many tags on GitHub and the 0.A release system is hard to translate reason: Immensely popular cross-platform open-source game under continuous development for 6 years. first_release_version: 0.1 first_release_date: 2013-02-26 - latest_release_version: 0.H + # latest_release_version: 0.H # TODO: decide what to do here latest_release_date: 2024-11-22 release_count: 24 # https://cataclysmdda.org/releases/ - - name: Window Maker - url: https://windowmaker.org/ - gh_url: https://github.com/window-maker/wmaker # Tags are prefixed - wp_url: https://en.wikipedia.org/wiki/Window_Maker - first_release_date: 1997-01-01 # exact date unknown - first_release_version: 0.0.3 - latest_release_version: 0.96.0 - latest_release_date: 2023-08-05 - release_count: 94 - - name: ReactOS - url: https://www.reactos.org/ - gh_url: https://github.com/reactos/reactos # Many tags are missing, use manual data below - reason: A free Windows-compatible Operating System - first_release_version: 0.0.7 - first_release_date: 1996-01-23 - latest_release_version: 0.4.14 - latest_release_date: 2021-12-16 - release_count: 59 # Ignore GitHub saying >250 releases, ~80% of them are some kind of weird backup non-releases - name: OpenStreetMap API/website url: https://openstreetmap.org/ gh_url: https://github.com/openstreetmap/openstreetmap-website # Doesn't use releases on GitHub @@ -587,48 +792,9 @@ projects: reason: "Open map data used almost anywhere there's a non-Google map." first_release_version: 0.3 first_release_date: 2004-08-09 - latest_release_version: 0.6 February 2021 # See https://wiki.openstreetmap.org/wiki/API_v0.6#Semantic_versioning + latest_release_version: 0.6 latest_release_date: 2021-02-01 release_count: 14 # 0.3 → 0.6 and then the dated changes to 0.6 - - name: three.js - url: https://threejs.org/ - gh_url: https://github.com/mrdoob/three.js - first_release_version: 0.1 # GitHub releases are r1-r130 - first_release_date: 2013-07-03T11:49:48 - latest_release_version: 0.171.0 - latest_release_date: 2024-11-29 - - name: google-api-client (ruby) - url: https://rubygems.org/gems/google-api-client/ - gh_url: https://github.com/googleapis/google-api-ruby-client # GitHub releases are namespace prefixed - first_release_version: 0.1.0 - first_release_date: 2010-10-14 - latest_release_version: 0.15.1 # Their versioning is really hard to figure out... - latest_release_date: 2024-07-29 - release_count: 254 - - name: rand - url: https://rust-random.github.io/book/ - gh_url: https://github.com/rust-random/rand # GitHub releases are rand_distr-0.5.0-beta.0 - reason: The most downloaded Rust crate - first_release_version: 0.1.1 - first_release_date: 2015-02-03 - latest_release_version: 0.8.5 - latest_release_date: 2022-02-14 - release_count: 68 - - name: suhosin - url: https://suhosin.org/ - gh_url: https://github.com/sektioneins/suhosin # Some GitHub releases are suhosin-0.9.35-RC1 - first_release_version: 0.9.1 - first_release_date: 2006-09-16T00:00:00 - latest_release_version: 0.9.38 - latest_release_date: 2015-05-21T00:00:00 - - name: Pure Data - wp_url: https://en.wikipedia.org/wiki/Pure_Data - gh_url: https://github.com/pure-data/pure-data/ # Some GitHub releases are 0.55-0test3a - url: https://puredata.info/ - first_release_date: 1996-06-01 - first_release_version: 0.1 - latest_release_version: 0.55-2 - latest_release_date: 2024-11-17 - name: XeTeX wp_url: https://en.wikipedia.org/wiki/XeTeX url: http://xetex.sourceforge.net/ @@ -646,6 +812,7 @@ projects: last_zv_release_version: 0.3.85 first_nonzv_release_date: 2023-11-26 release_count_zv: 119 + star_count: 410 - name: gettext url: https://www.gnu.org/software/gettext/ repo_url: https://savannah.gnu.org/projects/gettext/ @@ -663,14 +830,8 @@ projects: first_release_version: 0.0.2 first_nonzv_release_date: 2008-06-17 last_zv_release_version: 0.9.61 - - name: cargo-audit - url: https://rustsec.org/ - # Technically on github, but has a weird monorepo version tagging scheme that'll need code to handle - gh_url: https://github.com/rustsec/rustsec - first_release_date: 2017-02-27 - first_release_version: 0.1.0 - latest_release_date: 2024-10-29 - latest_release_version: 0.21.0 + release_count_zv: 298 + star_count: 118 - name: transformers url: https://hackage.haskell.org/package/transformers repo_url: https://hub.darcs.net/ross/transformers diff --git a/requirements.in b/requirements.in index ac1b8e3..a5694be 100644 --- a/requirements.in +++ b/requirements.in @@ -2,4 +2,5 @@ boltons chert>=24.0.0 hyperlink pyyaml +requests schema diff --git a/requirements.txt b/requirements.txt index 5215a29..59d14a8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,10 @@ boltons==24.1.0 # chert # face # lithoxyl +certifi==2024.12.14 + # via requests +charset-normalizer==3.4.1 + # via requests chert==24.0.0 # via -r requirements.in face==24.0.0 @@ -23,7 +27,9 @@ hyperlink==21.0.0 # -r requirements.in # chert idna==3.10 - # via hyperlink + # via + # hyperlink + # requests lithoxyl==21.0.0 # via chert markdown==3.7 @@ -34,11 +40,15 @@ pyyaml==6.0.2 # via # -r requirements.in # chert +requests==2.32.3 + # via -r requirements.in schema==0.7.7 # via -r requirements.in six==1.17.0 # via # html5lib # python-dateutil +urllib3==2.3.0 + # via requests webencodings==0.5.1 # via html5lib diff --git a/tools/check_projects_yaml.py b/tools/check_projects_yaml.py index 36fca82..a692e11 100644 --- a/tools/check_projects_yaml.py +++ b/tools/check_projects_yaml.py @@ -5,6 +5,7 @@ import yaml from boltons.iterutils import redundant from hyperlink import parse +from packaging.version import InvalidVersion, Version from schema import Optional, Or, Schema @@ -14,6 +15,14 @@ def check_url(url_str: str): return True +def check_version(version_str: str | float): + try: + Version(str(version_str)) + except InvalidVersion: + return False + return True + + OPTIONAL = { Optional("gh_url"): check_url, Optional("repo_url"): str, @@ -22,6 +31,17 @@ def check_url(url_str: str): Optional("reason"): str, Optional("star_count"): int, } +REGEX = [ + Or( + { + "remove": str, + }, # type: ignore + { + "search": str, + "replace": str, + }, # type: ignore + ), +] IN_SCHEMA = Schema( { "projects": [ @@ -32,16 +52,17 @@ def check_url(url_str: str): "name": str, "gh_url": check_url, Optional("emeritus"): False, + Optional("tag_regex_subs"): REGEX, Optional("url"): check_url, # Overrides gh_url for the hyperlink Optional("release_count"): int, Optional("latest_release_date"): Or( datetime.date, datetime.datetime ), - Optional("latest_release_version"): Or(float, str), + Optional("latest_release_version"): check_version, Optional("first_release_date"): Or( datetime.date, datetime.datetime ), - Optional("first_release_version"): Or(float, str), + Optional("first_release_version"): check_version, }, # type: ignore # Emeritus GitHub projects { @@ -49,17 +70,18 @@ def check_url(url_str: str): "name": str, "gh_url": check_url, "emeritus": True, + Optional("tag_regex_subs"): REGEX, Optional("url"): check_url, # Overrides gh_url for the hyperlink Optional("release_count_zv"): int, Optional("first_release_date"): Or( datetime.date, datetime.datetime ), - Optional("first_release_version"): Or(float, str), + Optional("first_release_version"): check_version, Optional("first_nonzv_release_date"): Or( datetime.date, datetime.datetime ), - Optional("first_nonzv_release_version"): Or(float, str), - Optional("last_zv_release_version"): Or(float, str), + Optional("first_nonzv_release_version"): check_version, + Optional("last_zv_release_version"): check_version, }, # type: ignore # Non-GitHub projects { @@ -72,11 +94,11 @@ def check_url(url_str: str): Optional("latest_release_date"): Or( datetime.date, datetime.datetime ), - Optional("latest_release_version"): Or(float, str), + Optional("latest_release_version"): check_version, Optional("first_release_date"): Or( datetime.date, datetime.datetime ), - Optional("first_release_version"): Or(float, str), + Optional("first_release_version"): check_version, }, # type: ignore # Emeritus Non-GitHub projects { @@ -86,12 +108,12 @@ def check_url(url_str: str): "emeritus": True, Optional("release_count_zv"): int, "first_release_date": Or(datetime.date, datetime.datetime), - Optional("first_release_version"): Or(float, str), + Optional("first_release_version"): check_version, Optional("first_nonzv_release_date"): Or( datetime.date, datetime.datetime ), - Optional("first_nonzv_release_version"): Or(float, str), - Optional("last_zv_release_version"): Or(float, str), + Optional("first_nonzv_release_version"): check_version, + Optional("last_zv_release_version"): check_version, }, # type: ignore ) ], diff --git a/tools/gen_projects_json.py b/tools/gen_projects_json.py index b4c8705..06b341a 100644 --- a/tools/gen_projects_json.py +++ b/tools/gen_projects_json.py @@ -1,332 +1,402 @@ import argparse -import base64 import datetime import json import os import re import sys import time -import urllib.request from pathlib import Path from pprint import pprint +from typing import TypedDict +import requests import yaml -from boltons.urlutils import URL - -PROJECT_ROOT_PATH = Path(__file__).parent.parent -VTAG_RE = re.compile( - r""" - ^ - [^0-9]* - (?P\d+) - \. - [0-9a-zA-Z_.]+ - """, - re.VERBOSE, -) - -# Tags matching these patterns will be completely skipped -SKIP_PATTERNS = [ - r"^ciflow/", # pytorch has loads of this noise - r"^ci/", # pytorch has loads of this noise - r"^nightly", # FreeCol -] - -# Version numbers after these patterns should be extracted -STRIP_PATTERNS = [ - r"^mc[0-9.]+-", # Sodium tags include minecraft version numbers -] - - -def strip_prefix(tag_name: str) -> str: - """Strip any non-numeric prefix from the tag name.""" - _, _, tag_name = tag_name.rpartition("/") - - if "-" in tag_name: - _, _, version = tag_name.partition("-") - if re.search(r"^\d", version): - return version - - match = re.search(r"\d", tag_name) - if match: - return tag_name[match.start() :] - return tag_name - - -def match_vtag(tag_name: str) -> re.Match | None: - """Match version tags using a more general approach.""" - tag_name = strip_prefix(tag_name) - return VTAG_RE.match(tag_name) - - -def version_key(version: str) -> tuple: - """Extract and convert version numbers to tuple for comparison.""" - clean_version = strip_prefix(version) - try: - return tuple( - int(x) for x in re.split(r"\D+", clean_version) if x and x.isdigit() +from hyperlink import parse +from packaging.version import InvalidVersion, Version + + +class RegexSubstituionDict(TypedDict): + remove: str + """The regex pattern to remove from the tag name.""" + search: str + """The regex pattern to search for in the tag name to replace with `replace`.""" + replace: str + """The string to replace the `search` pattern with.""" + + +class GitHubTag: + def __init__(self, name: str, commit_url: str, committed_date: datetime.datetime): + self.name = name + self.processed_name = name + self.commit_url = commit_url + self.committed_date = committed_date + self.version: Version | None = None + + def is_version_compatible(self) -> bool: + try: + Version(self.processed_name) + except InvalidVersion: + return False + return True + + def process_name(self, regex_subs: list[RegexSubstituionDict] | None = None): + for sub in regex_subs or []: + if sub.get("remove"): + self.processed_name = re.sub(sub["remove"], "", self.processed_name) + else: + self.processed_name = re.sub( + sub["search"], sub["replace"], self.processed_name + ) + + def parse_version(self): + self.version = Version(self.processed_name) + + +class GitHubAPI: + def __init__(self, user: str, token: str, org: str, repo: str): + self.user = user + self.token = token + self.headers = {"Authorization": f"Bearer {self.token}"} + self.org = org + self.repo = repo + + def get_repo_info(self) -> dict: + query = """ + query($owner: String!, $repo: String!) { + rateLimit { + remaining + } + repository(owner: $owner, name: $repo) { + stargazerCount + } + } + """ + variables = {"owner": self.org, "repo": self.repo} + response = requests.post( + "https://api.github.com/graphql", + json={"query": query, "variables": variables}, + headers=self.headers, ) - except (TypeError, ValueError): - return tuple() - - -def _get_gh_json( - url: str, user: str | None = None, token: str | None = None -) -> dict | list[dict]: - """ - Get paginated results from GitHub, possibly authorized based on command - line arguments or environment variables. - """ - req = urllib.request.Request(url) - if user and token: - auth_str = f"{user}:{token}" - auth_bytes = auth_str.encode("ascii") - auth_header_val = f'Basic {base64.b64encode(auth_bytes).decode("ascii")}' - req.add_header("Authorization", auth_header_val) - - resp = urllib.request.urlopen(req) - body = resp.read() - res = json.loads(body) - rate_rem = int(resp.info().get("x-ratelimit-remaining", "-1")) - - if not isinstance(res, list) or not res: - print(f" (( {rate_rem} requests remaining") - return res - - page = 2 - ret = res - while res: - paged_url = f"{url}?page={page}" - req = urllib.request.Request(paged_url) - if user and token: - req.add_header("Authorization", auth_header_val) - resp = urllib.request.urlopen(req) - body = resp.read() - res = json.loads(body) - ret.extend(res) - page += 1 - - rate_rem = int(resp.info().get("x-ratelimit-remaining", "-1")) - print(f" (( {rate_rem} requests remaining") - return ret - - -def _get_gh_rel_data( - rel_info: dict, user: str | None = None, token: str | None = None -) -> dict: - ret = {} - ret["tag"] = rel_info["name"] - ret["version"] = None - if match_vtag(ret["tag"]): - ret["version"] = strip_prefix(ret["tag"]) - ret["api_commit_url"] = rel_info["commit"]["url"] - rel_data = _get_gh_json(ret["api_commit_url"], user, token) - if isinstance(rel_data, dict): - ret["date"] = rel_data["commit"]["author"]["date"] - ret["link"] = rel_data["html_url"] - return ret - - -def _find_dominant_version_pattern(tags: list[dict]) -> list[dict]: - """Find the most common version tag pattern in a project's tags.""" - patterns = {} - for tag in tags: - _, _, tag_name = tag["name"].rpartition("/") - - if any( - re.search(pattern, tag["name"]) or re.search(pattern, tag_name) - for pattern in SKIP_PATTERNS - ): - continue - - for pattern in STRIP_PATTERNS: - if re.search(pattern, tag_name): - prefix, _, version = tag_name.partition("-") - if re.search(r"^\d", version): - tag_name = version - break - - match = re.search(r"\d", tag_name) - if not match: - continue - prefix = tag_name[: match.start()] - if prefix in patterns: - patterns[prefix].append(tag) - else: - patterns[prefix] = [tag] - - if not patterns: - return [] - return max(patterns.values(), key=len) - - -def get_gh_project_info( - info: dict, user: str | None = None, token: str | None = None -) -> dict: - gh_info = {} - url = info.get("gh_url") - if url is None: - return gh_info - - org, repo = URL(url.rstrip("/")).path_parts[1:] - gh_url = URL("https://api.github.com/repos") - gh_url.path_parts += (org, repo) - - project_data = _get_gh_json(gh_url.to_text(), user, token) - if isinstance(project_data, dict): - gh_info["star_count"] = project_data["stargazers_count"] - - gh_url.path_parts += ("tags",) - tags_data = _get_gh_json(gh_url.to_text(), user, token) - if isinstance(tags_data, dict): - tags_data = [] - - main_tags = _find_dominant_version_pattern(tags_data) - vtags_data = [td for td in main_tags if match_vtag(td["name"])] - if not vtags_data: - return gh_info - - gh_info["release_count"] = len(vtags_data) - - latest_release = vtags_data[0] - latest_release_data = _get_gh_rel_data(latest_release, user, token) - for k, v in latest_release_data.items(): - gh_info[f"latest_release_{k}"] = v - - vtags_data.sort(key=lambda x: version_key(x["name"]), reverse=True) - - first_release_version = info.get("first_release_version") - first_release = None - if first_release_version is None: - first_release = [ - v - for v in vtags_data - if version_key(v["name"]) < version_key(latest_release["name"]) - ][-1] - else: - first_releases = [v for v in vtags_data if v["name"] == first_release_version] - if first_releases: - first_release = first_releases[0] - if first_release: - first_release_data = _get_gh_rel_data(first_release, user, token) - for k, v in first_release_data.items(): - gh_info[f"first_release_{k}"] = v - - zv_releases = [] - for rel in vtags_data: - match = match_vtag(rel["name"]) - if match and match.group("major") == "0": - zv_releases.append(rel) - gh_info["release_count_zv"] = len(zv_releases) - print( - f' .. {gh_info["release_count"]} releases, {gh_info["release_count_zv"]} 0ver' - ) - - is_zerover = latest_release in zv_releases - - gh_info["is_zerover"] = is_zerover - - if is_zerover: - return gh_info - - last_zv_release = zv_releases[0] - first_nonzv_release = vtags_data[vtags_data.index(last_zv_release) - 1] - first_nonzv_release_data = _get_gh_rel_data(first_nonzv_release, user, token) + data = response.json() + + print(f" (( {data["data"]["rateLimit"]["remaining"]} requests remaining") + + return {"star_count": data["data"]["repository"]["stargazerCount"]} + + def fetch_tags(self) -> list[GitHubTag]: + query = """ + query($owner: String!, $repo: String!, $cursor: String) { + rateLimit { + remaining + } + repository(owner: $owner, name: $repo) { + refs(refPrefix: "refs/tags/", first: 100, after: $cursor, orderBy: {field: TAG_COMMIT_DATE, direction: DESC}) { + edges { + node { + name + target { + commitUrl + ... on Commit { + committedDate + } + ... on Tag { + target { + ... on Commit { + committedDate + } + ... on Tag { + target { + ... on Commit { + committedDate + } + } + } + } + } + } + } + } + pageInfo { + hasNextPage + endCursor + } + } + } + } + """ + cursor = None + all_tags = [] + + while True: + variables = {"owner": self.org, "repo": self.repo, "cursor": cursor} + response = requests.post( + "https://api.github.com/graphql", + json={"query": query, "variables": variables}, + headers=self.headers, + ) + data = response.json() + + refs = data["data"]["repository"]["refs"] + all_tags.extend(refs["edges"]) + + if refs["pageInfo"]["hasNextPage"]: + cursor = refs["pageInfo"]["endCursor"] + else: + break - gh_info["last_zv_release_version"] = last_zv_release["name"] - for k, v in first_nonzv_release_data.items(): - gh_info[f"first_nonzv_release_{k}"] = v + print(f" (( {data["data"]["rateLimit"]["remaining"]} requests remaining") - return gh_info + return [ + GitHubTag( + name=t["node"]["name"], + commit_url=t["node"]["target"]["commitUrl"], + committed_date=datetime.datetime.fromisoformat( + t["node"]["target"].get("committedDate") + or t["node"]["target"]["target"].get("committedDate") + or t["node"]["target"]["target"]["target"]["committedDate"] + ), + ) + for t in all_tags + ] def json_default(obj): if isinstance(obj, (datetime.datetime, datetime.date)): return obj.isoformat() + if isinstance(obj, Version): + return str(obj) raise TypeError(f"{obj} is not serializable") -def fetch_entries( - projects: list[dict], user: str | None = None, token: str | None = None -) -> list[dict]: - entries = [] - - for p in projects: - print("Processing", p["name"]) - info = dict(p) - if info.get("skip"): - continue - - info["url"] = info.get("url", info.get("gh_url")) - - if info.get("gh_url"): - gh_info = get_gh_project_info(info, user, token) - # Only add new data, preserve any manual information - info.update({k: v for k, v in gh_info.items() if k not in info}) +class ProjectsEntry: + def __init__( + self, + name: str, + url: str | None = None, + gh_url: str | None = None, + repo_url: str | None = None, + wp_url: str | None = None, + emeritus: bool | None = None, + reason: str | None = None, + tag_regex_subs: list[RegexSubstituionDict] | None = None, + star_count: int | None = None, + release_count: int | None = None, + release_count_zv: int | None = None, + latest_release_date: datetime.datetime | datetime.date | None = None, + latest_release_version: str | Version | None = None, + first_release_date: datetime.datetime | datetime.date | None = None, + first_release_version: str | Version | None = None, + first_nonzv_release_date: datetime.datetime | datetime.date | None = None, + first_nonzv_release_version: str | Version | None = None, + last_zv_release_version: str | Version | None = None, + ): + self.name: str = name + """The name of the project.""" + self.url: str | None = url + """The project's home page.""" + self.gh_url: str | None = gh_url + """The project's GitHub repository link.""" + self.repo_url: str | None = repo_url + """The project's non-GitHub repository link.""" + self.wp_url: str | None = wp_url + """The project's Wikipedia link.""" + self.is_zerover: bool = bool(emeritus) + """Whether the project is still ZeroVer.""" + self.reason: str | None = reason + """The reason this project was added to the 0ver website listing.""" + self.tag_regex_subs: list[RegexSubstituionDict] | None = tag_regex_subs + """The list of regex substitutions to apply to the tag names before parsing.""" + self.star_count: int | None = star_count + """The number of stars the project has.""" + self.release_count: int | None = release_count + """The number of releases the project has had.""" + self.release_count_zv: int | None = release_count_zv + """The number of releases the project has before it left 0ver.""" + self.latest_release_date: datetime.datetime | datetime.date | None = ( + latest_release_date + ) + """The date of the latest release.""" + self.latest_release_version: Version | None = ( + Version(latest_release_version) + if isinstance(latest_release_version, str) + else latest_release_version + ) + """The version of the latest release.""" + self.latest_release_tag: str | None = None + """The tag name of the latest release.""" + self.latest_release_link: str | None = None + """The URL of the latest release commit.""" + self.first_release_date: datetime.datetime | datetime.date | None = ( + first_release_date + ) + """The date of the first release.""" + self.first_release_version: Version | None = ( + Version(first_release_version) + if isinstance(first_release_version, str) + else first_release_version + ) + self.first_release_tag: str | None = None + """The tag name of the first release.""" + self.first_release_link: str | None = None + """The URL of the first release commit.""" + """The version of the first release.""" + self.first_nonzv_release_date: datetime.datetime | datetime.date | None = ( + first_nonzv_release_date + ) + """The date of the first non-0ver release.""" + self.first_nonzv_release_version: Version | None = ( + Version(first_nonzv_release_version) + if isinstance(first_nonzv_release_version, str) + else first_nonzv_release_version + ) + """The version of the first non-0ver release.""" + self.first_nonzv_release_tag: str | None = None + """The tag name of the first non-0ver release.""" + self.first_nonzv_release_link: str | None = None + """The URL of the first non-0ver release commit.""" + self.last_zv_release_version: Version | None = ( + Version(last_zv_release_version) + if isinstance(last_zv_release_version, str) + else last_zv_release_version + ) + """The last 0ver release before the project left ZeroVer.""" + + @classmethod + def from_dict(cls, info: dict): + return cls(**info) + + def to_dict(self) -> dict: + hide = ["tag_regex_subs"] + return { + k: v for k, v in self.__dict__.items() if v is not None and k not in hide + } + + +class Entry: + def __init__(self, info: dict, args: argparse.Namespace): + self.info = ProjectsEntry.from_dict(info) + if self.info.gh_url: + self.gh_org = self.info.gh_url.split("/")[3] + self.gh_repo = self.info.gh_url.split("/")[4] + self.api = GitHubAPI(args.user, args.token, self.gh_org, self.gh_repo) + else: + self.gh_org = None + self.gh_repo = None + self.api = None + self.tags: list[GitHubTag] = [] + self.failed_tags: list[GitHubTag] = [] + self.duplicate_tags: list[GitHubTag] = [] + + def update_gh_project_info(self): + if self.api is None: + return + + repo_info = self.api.get_repo_info() + self.info.star_count = repo_info["star_count"] + + self.get_tags() + if not self.tags: + return + + self.info.release_count = len(self.tags) + + # Latest release + if not self.info.latest_release_version: + latest_release = self.tags[0] + self.info.latest_release_tag = latest_release.name + self.info.latest_release_link = latest_release.commit_url + self.info.latest_release_date = latest_release.committed_date + self.info.latest_release_version = latest_release.version + + # First release + first_release = None + if self.info.first_release_version: + first_releases = [ + v for v in self.tags if v.version == self.info.first_release_version + ] + if first_releases: + first_release = first_releases[0] + else: + first_release = self.tags[-1] + if first_release: + self.info.first_release_tag = first_release.name + self.info.first_release_link = first_release.commit_url + self.info.first_release_date = first_release.committed_date + self.info.first_release_version = first_release.version + + # ZeroVer releases + zv_releases = [t for t in self.tags if t.version and t.version.major == 0] + self.info.release_count_zv = len(zv_releases) + print( + f" .. {self.info.release_count} releases, {self.info.release_count_zv} 0ver" + ) - is_zerover = info.get("is_zerover") - if is_zerover is None: - is_zerover = info.get("emeritus") - if is_zerover is not None: - is_zerover = not is_zerover + self.info.is_zerover = ( + self.info.latest_release_version is not None + and self.info.latest_release_version.major == 0 + ) + if self.info.is_zerover: + return + + # Last ZeroVer release + if not self.info.last_zv_release_version: + last_zv_release = zv_releases[0] + self.info.last_zv_release_version = last_zv_release.version + + # First non-ZeroVer release + if not self.info.first_nonzv_release_version: + nonzv_releases = [ + t for t in self.tags if t.version and t.version.major != 0 + ] + first_nonzv_release = nonzv_releases[-1] + self.info.first_nonzv_release_tag = first_nonzv_release.name + self.info.first_nonzv_release_link = first_nonzv_release.commit_url + self.info.first_nonzv_release_date = first_nonzv_release.committed_date + self.info.first_nonzv_release_version = first_nonzv_release.version + + def get_tags(self): + if not self.api: + return + + tags_data = self.api.fetch_tags() + + tag_names = set() + self.tags = [] + self.failed_tags = [] + self.duplicate_tags = [] + for tag in reversed(tags_data): + tag.process_name(self.info.tag_regex_subs) + if tag.processed_name and tag.processed_name in tag_names: + self.duplicate_tags.append(tag) + continue else: - is_zerover = info.get("last_zv_release_version") is not None - if is_zerover is None: - is_zerover = False - - info["is_zerover"] = is_zerover - - entries.append(info) - - return sorted(entries, key=lambda e: e["name"]) - - -def parse_args(): - parser = argparse.ArgumentParser( - description="Generate or update project.json using projects.yaml." - ) - - parser.add_argument( - "-u", - "--user", - type=str, - default=os.getenv("GH_USER", ""), - help='GitHub Username for API authentication. Falls back to the "GH_USER" environment variable.', - ) - parser.add_argument( - "-k", - "--token", - type=str, - default=os.getenv("GH_TOKEN", ""), - help='A path to a file containing a GitHub personal access token for API authentication. Falls back to the "GH_TOKEN" environment variable.', - ) - parser.add_argument( - "--disable-caching", - action="store_true", - default=os.getenv("ZV_DISABLE_CACHING", "false").lower() - in [ - "true", - "1", - "yes", - ], - help='Flag to disable caching. Falls back to the "ZV_DISABLE_CACHING" environment variable.', - ) + tag_names.add(tag.processed_name) + + if tag.is_version_compatible(): + tag.parse_version() + if tag.version and ( + tag.version.is_devrelease + or tag.version.is_postrelease + or tag.version.is_prerelease + ): + self.duplicate_tags.append(tag) + else: + self.tags.append(tag) + else: + self.failed_tags.append(tag) - args = parser.parse_args() - if Path(args.token).is_file(): - with Path(args.token).open() as f: - args.token = f.read().strip() - return args + self.tags = list(reversed(self.tags)) + self.duplicate_tags = list(reversed(self.duplicate_tags)) + if self.duplicate_tags: + print(self.info.name, [t.name for t in self.duplicate_tags]) + self.failed_tags = list(reversed(self.failed_tags)) -def main(): +def generate(args: argparse.Namespace): start_time = time.time() - - args = parse_args() - projects_yaml_path = Path(__file__).parent.parent / "projects.yaml" with projects_yaml_path.open() as f: - projects = yaml.safe_load(f)["projects"] + projects: list[dict] = yaml.safe_load(f)["projects"] if not projects: return @@ -335,7 +405,7 @@ def main(): try: with projects_json_path.open() as f: cur_data = json.load(f) - cur_projects = cur_data["projects"] + cur_projects: list[dict] = cur_data["projects"] cur_gen_date = datetime.datetime.fromisoformat(cur_data["gen_date"]) except (IOError, KeyError): cur_projects = [] @@ -352,7 +422,23 @@ def main(): new_names = sorted([n["name"] for n in projects]) if fetch_outdated or cur_names != new_names or args.disable_caching: - entries = fetch_entries(projects, args.user, args.token) + entries: list[dict] = [] + + for p in projects: + print("Processing", p["name"]) + if p.get("skip"): + continue + + entry = Entry(p, args) + if not entry.info.url and entry.info.gh_url: + entry.info.url = entry.info.gh_url + + if entry.info.gh_url: + entry.update_gh_project_info() + + entries.append(entry.info.to_dict()) + + entries = sorted(entries, key=lambda e: e["name"]) else: print("Current data already up to date, exiting.") return @@ -368,7 +454,143 @@ def main(): with projects_json_path.open("w") as f: json.dump(res, f, indent=2, sort_keys=True, default=json_default) - sys.exit(0) + +def info(args: argparse.Namespace): + print("Processing", args.name_or_link) + + if parse(args.name_or_link).scheme in ("http", "https"): + info = {"gh_url": args.name_or_link} + else: + info = get_entry_from_name(args.name_or_link) + + entry = Entry(info, args) + entry.update_gh_project_info() + + print() + pprint(entry.info.to_dict()) + + +def tags(args: argparse.Namespace): + print("Processing", args.name_or_link) + + if parse(args.name_or_link).scheme in ("http", "https"): + info = {"gh_url": args.name_or_link} + else: + info = get_entry_from_name(args.name_or_link) + + entry = Entry(info, args) + entry.get_tags() + + print("\nParsed tags:") + for t in entry.tags: + print(f"{t.name} (parsed as {t.version})") + if not entry.tags: + print("No tags parsed.") + if entry.duplicate_tags: + print("\nDuplicate tags:") + for t in entry.duplicate_tags: + print(f"{t.name} (parsed as {t.processed_name})") + if entry.failed_tags: + print("\nFailed tags:") + for t in entry.failed_tags: + print(f"{t.name} (tried {t.processed_name})") + + +def parse_args(): + def add_options(parser: argparse.ArgumentParser, *, caching: bool = False): + parser.add_argument( + "-u", + "--user", + type=str, + default=os.getenv("GH_USER", ""), + help='GitHub Username for API authentication. Falls back to the "GH_USER" environment variable.', + ) + parser.add_argument( + "-k", + "--token", + type=str, + default=os.getenv("GH_TOKEN", ""), + help='A path to a file containing a GitHub personal access token for API authentication. Falls back to the "GH_TOKEN" environment variable.', + ) + if caching: + parser.add_argument( + "--disable-caching", + action="store_true", + default=os.getenv("ZV_DISABLE_CACHING", "false").lower() + in [ + "true", + "1", + "yes", + ], + help='Flag to disable caching. Falls back to the "ZV_DISABLE_CACHING" environment variable.', + ) + + parser = argparse.ArgumentParser( + description="Generate or update project.json using projects.yaml." + ) + add_options(parser, caching=True) + subparsers = parser.add_subparsers(dest="command", help="Available commands") + + # Generate + generate_parser = subparsers.add_parser( + "generate", help="Generate an updated projects.json file." + ) + add_options(generate_parser, caching=True) + + # Info + info_parser = subparsers.add_parser( + "info", + help="Print automatically pulled info for a GitHub project for debugging.", + ) + info_parser.add_argument( + "name_or_link", + type=str, + help="The project.yaml exact entry name or GitHub link.", + ) + add_options(info_parser) + + # Tags + tags_parser = subparsers.add_parser( + "tags", help="Print all sorted tags for a GitHub project for debugging." + ) + tags_parser.add_argument( + "name_or_link", + type=str, + help="The project.yaml exact entry name or GitHub link.", + ) + add_options(tags_parser) + + args = parser.parse_args() + + if args.command is None: + args.command = "generate" + + if Path(args.token).is_file(): + with Path(args.token).open() as f: + args.token = f.read().strip() + + return args + + +def get_entry_from_name(name: str) -> dict: + projects_yaml_path = Path(__file__).parent.parent / "projects.yaml" + with projects_yaml_path.open() as f: + projects = yaml.safe_load(f)["projects"] + matching_info = [p for p in projects if p["name"] == name] + if not matching_info: + print("No matching project found.") + sys.exit(1) + return matching_info[0] + + +def main(): + args = parse_args() + if args.command == "generate": + generate(args) + elif args.command == "info": + info(args) + elif args.command == "tags": + tags(args) if __name__ == "__main__":