diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 2f68432ba..aebd3b95d 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -36,8 +36,10 @@ jobs: name: Run Linter and Formatter runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - name: Lint pinned actions + run: bash tools/linter_actions_pinned.sh + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.11" cache: 'pip' # caching pip dependencies @@ -66,13 +68,13 @@ jobs: version: ["3.10", "3.11", "3.12", "3.13", "3.14"] folder: ["weaviate", "integration", "integration_embedded"] steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: ${{ matrix.version }} cache: 'pip' # caching pip dependencies - run: pip install -r requirements-devel.txt - - uses: jakebailey/pyright-action@v2 + - uses: jakebailey/pyright-action@6cabc0f01c4994be48fd45cd9dbacdd6e1ee6e5e # v2 with: version: 1.1.399 working-directory: ${{ matrix.folder }} @@ -86,8 +88,8 @@ jobs: version: ["3.10", "3.11", "3.12", "3.13", "3.14"] folder: ["test", "mock_tests"] steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: ${{ matrix.version }} cache: 'pip' # caching pip dependencies @@ -96,7 +98,7 @@ jobs: run: pytest --cov -v --cov-report=term-missing --cov=weaviate --cov-report xml:coverage-${{ matrix.folder }}.xml ${{ matrix.folder }} - name: Archive code coverage results if: matrix.version == '3.10' && (github.ref_name != 'main') - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: coverage-report-${{ matrix.folder }} path: coverage-${{ matrix.folder }}.xml @@ -110,8 +112,8 @@ jobs: grpc: ["1.59.5", "1.63.0", "1.65.0", "1.66.0", "1.68.0", "1.72.1", "1.73.0", "1.74.0"] protobuf: ["4.25.8", "5.26.0", "5.27.4", "5.28.3", "5.29.0", "6.30.0", "6.31.1", "6.32.0"] steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.11" cache: 'pip' # caching pip dependencies @@ -129,11 +131,11 @@ jobs: version: ["3.10", "3.11", "3.12", "3.13", "3.14"] optional_dependencies: [false] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 fetch-tags: true - - uses: actions/setup-python@v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: ${{ matrix.version }} cache: 'pip' # caching pip dependencies @@ -145,7 +147,7 @@ jobs: run: pytest -v --cov --cov-report=term-missing --cov=weaviate --cov-report xml:coverage-integration-embedded.xml integration_embedded - name: Archive code coverage results if: matrix.version == '3.10' && (github.ref_name != 'main') && !github.event.pull_request.head.repo.fork - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: coverage-report-integration-embedded path: coverage-integration-embedded.xml @@ -165,16 +167,16 @@ jobs: ] optional_dependencies: [false] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 fetch-tags: true - - uses: actions/setup-python@v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: ${{ matrix.versions.py }} cache: 'pip' # caching pip dependencies - name: Login to Docker Hub - uses: docker/login-action@v3 + uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3 if: ${{ !github.event.pull_request.head.repo.fork && github.triggering_actor != 'dependabot[bot]' }} with: username: ${{secrets.DOCKER_USERNAME}} @@ -198,7 +200,7 @@ jobs: run: pytest -n auto --dist loadgroup -v --cov --cov-report=term-missing --cov=weaviate --cov-report xml:coverage-integration.xml integration - name: Archive code coverage results if: matrix.versions.py == '3.10' && (github.ref_name != 'main') - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: coverage-report-integration path: coverage-integration.xml @@ -220,13 +222,13 @@ jobs: ] optional_dependencies: [false] steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.11" cache: 'pip' # caching pip dependencies - name: Login to Docker Hub - uses: docker/login-action@v3 + uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3 if: ${{ !github.event.pull_request.head.repo.fork && github.triggering_actor != 'dependabot[bot]' }} with: username: ${{secrets.DOCKER_USERNAME}} @@ -245,25 +247,25 @@ jobs: runs-on: ubuntu-latest if: github.ref_name != 'main' && !github.event.pull_request.head.repo.fork steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - name: Download coverage artifacts mock - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 with: name: coverage-report-mock_tests - name: Download coverage artifacts unit - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 with: name: coverage-report-test - name: Download coverage integration - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 with: name: coverage-report-integration - name: Download coverage integration embedded - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 with: name: coverage-report-integration-embedded - name: Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@b9fd7d16f6d7d1b5d2bec1a2887e65ceed900238 # v4 with: fail_ci_if_error: true files: ./coverage-integration.xml, ./coverage-integration-embedded.xml, ./coverage-test.xml, ./coverage-mock_tests.xml @@ -275,11 +277,11 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 - name: Set up Python 3.11 - uses: actions/setup-python@v5 + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.11" cache: 'pip' # caching pip dependencies @@ -288,7 +290,7 @@ jobs: - name: Build a binary wheel run: python -m build - name: Create Wheel Artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: path: "dist/*.whl" name: weaviate-python-client-wheel @@ -315,17 +317,17 @@ jobs: ] steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 - name: Login to Docker Hub - uses: docker/login-action@v3 + uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3 if: ${{ !github.event.pull_request.head.repo.fork && github.triggering_actor != 'dependabot[bot]' }} with: username: ${{secrets.DOCKER_USERNAME}} password: ${{secrets.DOCKER_PASSWORD}} - name: Download build artifact to append to release - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 with: name: weaviate-python-client-wheel - run: | @@ -343,11 +345,11 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 - name: Set up Python 3.11 - uses: actions/setup-python@v5 + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.11" cache: 'pip' # caching pip dependencies @@ -357,7 +359,7 @@ jobs: run: python -m build - name: Publish distribution 📦 to PyPI on new tags if: startsWith(github.ref, 'refs/tags') - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # release/v1 with: verbose: true password: ${{ secrets.PYPI_API_TOKEN }} @@ -369,12 +371,12 @@ jobs: needs: [build-and-publish] steps: - name: Download build artifact to append to release - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 with: name: weaviate-python-client-wheel path: dist - name: Release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@de2c0eb89ae2a093876385947365aca7b0e5f844 # v1 with: generate_release_notes: true draft: true diff --git a/.github/workflows/pr-security-lint.yaml b/.github/workflows/pr-security-lint.yaml new file mode 100644 index 000000000..3dc27e534 --- /dev/null +++ b/.github/workflows/pr-security-lint.yaml @@ -0,0 +1,27 @@ +name: PR Security Lint + +on: + pull_request_target: + types: [opened, synchronize, reopened] + +# No permissions at workflow level — grant only what's needed at job level +permissions: {} + +jobs: + hidden-unicode-check: + name: Check for hidden Unicode characters + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + steps: + - name: Checkout base branch + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + with: + ref: ${{ github.event.pull_request.base.sha }} + + - name: Check PR diff for hidden Unicode + env: + GH_TOKEN: ${{ github.token }} + run: | + gh pr diff ${{ github.event.pull_request.number }} | bash tools/linter_hidden_unicode.sh --stdin diff --git a/tools/linter_actions_pinned.sh b/tools/linter_actions_pinned.sh new file mode 100755 index 000000000..ebe39b378 --- /dev/null +++ b/tools/linter_actions_pinned.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# Lint GitHub Actions workflow files to ensure all external actions are pinned to SHA hashes. +# Usage: bash tools/linter_actions_pinned.sh + +set -euo pipefail + +ERRORS=0 + +for workflow in .github/workflows/*.yaml .github/workflows/*.yml; do + [ -f "$workflow" ] || continue + + while IFS= read -r line; do + lineno=$(echo "$line" | cut -d: -f1) + content=$(echo "$line" | cut -d: -f2-) + + # Extract the action reference (everything after "uses:") + action_ref=$(echo "$content" | sed -n 's/.*uses:[[:space:]]*//p' | xargs) + + # Skip local actions (starting with ./) + if [[ "$action_ref" == ./* ]]; then + continue + fi + + # Extract the version part (after @, before space or # comment) + version=$(echo "$action_ref" | sed -n 's/.*@\([^ #]*\).*/\1/p') + + if [ -z "$version" ]; then + echo "::error file=${workflow},line=${lineno}::Action missing version pin: ${action_ref}" + ERRORS=$((ERRORS + 1)) + continue + fi + + # Check that the version is a 40-character hex SHA + if ! echo "$version" | grep -qE '^[0-9a-f]{40}$'; then + echo "::error file=${workflow},line=${lineno}::Action not pinned to SHA: ${action_ref} (version: ${version})" + ERRORS=$((ERRORS + 1)) + fi + done < <(grep -n 'uses:' "$workflow") +done + +if [ "$ERRORS" -gt 0 ]; then + echo "" + echo "ERROR: Found ${ERRORS} action(s) not pinned to a SHA hash." + echo "Replace tag references (e.g., @v5) with the full commit SHA (e.g., @93cb6ef...)" + echo "Preserve the tag as a comment: uses: actions/checkout@ # v5" + exit 1 +fi + +echo "All GitHub Actions are pinned to SHA hashes." diff --git a/tools/linter_hidden_unicode.sh b/tools/linter_hidden_unicode.sh new file mode 100755 index 000000000..c63a6edb2 --- /dev/null +++ b/tools/linter_hidden_unicode.sh @@ -0,0 +1,143 @@ +#!/usr/bin/env bash +# Lint for hidden/invisible Unicode characters in diffs (trojan-source attack prevention). +# Requires Perl (pre-installed on GitHub Actions Ubuntu runners). +# +# Usage: +# bash tools/linter_hidden_unicode.sh --stdin # read diff from stdin (CI mode) +# bash tools/linter_hidden_unicode.sh # diff against a base ref +# bash tools/linter_hidden_unicode.sh # diff staged changes (git diff --cached) + +set -euo pipefail + +# Binary file extensions to skip +BINARY_PATTERN='\.(png|jpg|jpeg|gif|ico|svg|woff2?|ttf|eot|otf|zip|tar|gz|bz2|xz|7z|rar|pdf|dll|exe|so|dylib|o|obj|class|jar|war|pyc|pyo|wasm|bin|dat|db|sqlite|nupkg|snupkg)$' + +get_diff() { + if [[ "${1:-}" == "--stdin" ]]; then + cat + elif [[ -n "${1:-}" ]]; then + # Validate ref argument to prevent command injection + if ! [[ "$1" =~ ^[a-zA-Z0-9._/-]+$ ]]; then + echo "ERROR: Invalid ref argument: $1" >&2 + exit 1 + fi + if ! git rev-parse --verify "$1" >/dev/null 2>&1; then + echo "ERROR: Git ref not found: $1" >&2 + exit 2 + fi + git diff "$1" + else + git diff --cached + fi +} + +# Perl script that: +# 1. Tracks current file from diff headers +# 2. Skips binary files +# 3. Scans only added lines (starting with +, excluding +++ headers) +# 4. Detects ~30+ categories of invisible/suspicious Unicode characters +PERL_SCRIPT=' +use utf8; +use strict; +use warnings; + +sub escape_property { + my ($s) = @_; + $s =~ s/%/%25/g; + $s =~ s/\r/%0D/g; + $s =~ s/\n/%0A/g; + $s =~ s/:/%3A/g; + $s =~ s/,/%2C/g; + return $s; +} + +sub escape_message { + my ($s) = @_; + $s =~ s/%/%25/g; + $s =~ s/\r/%0D/g; + $s =~ s/\n/%0A/g; + return $s; +} + +my $file = ""; +my $line_in_file = 0; +my $errors = 0; +my $in_binary = 0; +my $binary_pattern = qr/'"$BINARY_PATTERN"'/i; + +while () { + chomp; + + # Track file from diff headers + if (/^\+\+\+ b\/(.+)$/) { + $file = $1; + $line_in_file = 0; + $in_binary = ($file =~ $binary_pattern) ? 1 : 0; + next; + } + + # Skip binary file markers + if (/^Binary files/) { + $in_binary = 1; + next; + } + + # Track hunk headers for line numbers + if (/^@@ -\d+(?:,\d+)? \+(\d+)/) { + $line_in_file = $1 - 1; + next; + } + + # Count lines in the new file + if (/^\+/ || /^ /) { + $line_in_file++; + } + + # Only scan added lines, skip binary files + next if $in_binary; + next unless /^\+/; + next if /^\+\+\+ (?:$|b\/|\/dev\/null)/; + + # Remove the leading + for scanning + my $content = substr($_, 1); + + # Check for suspicious invisible Unicode characters: + # - Bidi overrides and isolates (U+200E-200F, U+202A-202E, U+2066-2069) + # - Zero-width characters (U+200B-200D, U+2060) + # - Byte order mark mid-line (U+FEFF) + # - Soft hyphen (U+00AD) + # - Mongolian vowel separator (U+180E) + # - Combining grapheme joiner (U+034F) + # - Function application and invisible operators (U+2061-2064) + # - Hangul fillers (U+115F, U+1160, U+3164, U+FFA0) + # - Interlinear annotation (U+FFF9-FFFB) + # - Object replacement / replacement char (U+FFFC-FFFD) -- FFFD is sometimes legitimate + # - Unicode tag block (U+E0001, U+E0020-E007F) + # - Deprecated format chars (U+206A-206F) + if ($content =~ /([\x{00AD}\x{034F}\x{115F}\x{1160}\x{180E}\x{200B}-\x{200F}\x{202A}-\x{202E}\x{2060}-\x{2064}\x{2066}-\x{2069}\x{206A}-\x{206F}\x{3164}\x{FE00}-\x{FE0F}\x{FEFF}\x{FFA0}\x{FFF9}-\x{FFFB}\x{E0001}\x{E0020}-\x{E007F}])/) { + my $char = $1; + my $codepoint = sprintf("U+%04X", ord($char)); + my $col = $-[1] + 1; + + if ($ENV{GITHUB_ACTIONS}) { + my $efile = escape_property($file); + my $emsg = escape_message("Hidden Unicode character ${codepoint} found"); + print "::error file=${efile},line=${line_in_file},col=${col}::${emsg}\n"; + } else { + print "ERROR: $file:$line_in_file:$col - Hidden Unicode character $codepoint found\n"; + } + $errors++; + } +} + +if ($errors > 0) { + print "\nFound $errors hidden Unicode character(s) in added lines.\n"; + print "These may indicate a trojan-source attack. See https://trojansource.codes/\n"; + exit 1; +} else { + print "No hidden Unicode characters detected.\n"; + exit 0; +} +' + +get_diff "$@" | perl -CS -e "$PERL_SCRIPT"