Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/create-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
|| (github.event_name == 'pull_request' && github.event.pull_request.merged == true)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Unpack secrets
env:
GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
Expand All @@ -37,7 +37,7 @@ jobs:
-Dcentral-publishing.waitUntil=published \
deploy
- name: Archive artifacts
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: jar-files
path: "target/*.jar"
Expand All @@ -49,12 +49,12 @@ jobs:
needs: [ release ]
steps:
- name: Download artifacts to append to release
uses: actions/download-artifact@v4
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
name: jar-files
path: target
- name: Create a GitHub Release
uses: softprops/action-gh-release@v1
uses: softprops/action-gh-release@de2c0eb89ae2a093876385947365aca7b0e5f844 # v1
with:
generate_release_notes: true
draft: true
Expand Down
27 changes: 27 additions & 0 deletions .github/workflows/pr-security-lint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: PR Security Lint

on:
pull_request_target:
types: [opened, synchronize, reopened]

# No permissions at workflow level — grant only what's needed at job level
permissions: {}

jobs:
hidden-unicode-check:
name: Check for hidden Unicode characters
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: read
steps:
- name: Checkout base branch
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
with:
ref: ${{ github.event.pull_request.base.sha }}

- name: Check PR diff for hidden Unicode
env:
GH_TOKEN: ${{ github.token }}
run: |
gh pr diff ${{ github.event.pull_request.number }} | bash tools/linter_hidden_unicode.sh --stdin
26 changes: 14 additions & 12 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ jobs:
name: Cache Maven dependencies
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-java@v4
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: actions/setup-java@c1e323688fd81a25caa38c78aa6df2d33d3e20d9 # v4
with:
distribution: "zulu"
java-version: "17"
Expand All @@ -31,8 +31,10 @@ jobs:
name: Unit tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-java@v4
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Lint pinned actions
run: bash tools/linter_actions_pinned.sh
- uses: actions/setup-java@c1e323688fd81a25caa38c78aa6df2d33d3e20d9 # v4
with:
distribution: "zulu"
java-version: "17"
Expand All @@ -46,20 +48,20 @@ jobs:
steps:
- name: Login to Docker Hub
if: ${{ !github.event.pull_request.head.repo.fork }}
uses: docker/login-action@v3
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- id: cache-check
uses: actions/cache/restore@v4
uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
env:
DOCKER_CACHE_KEY: docker-images-${{ env.IMG2VEC }}-${{ env.MINIO }}-${{ env.MODEL2VEC }}
with:
path: ${{ env.DOCKER_IMAGES_TAR }}
key: ${{ env.DOCKER_CACHE_KEY }}
lookup-only: true # Only check if cache exists, don't download
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@v1.3.1
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
with:
tool-cache: false
android: true
Expand All @@ -77,7 +79,7 @@ jobs:
docker save $IMG2VEC $MINIO -o $DOCKER_IMAGES_TAR
- name: Cache images
if: steps.cache-check.outputs.cache-hit != 'true'
uses: actions/cache/save@v4
uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
env:
DOCKER_CACHE_KEY: docker-images-${{ env.IMG2VEC }}-${{ env.MINIO }}-${{ env.MODEL2VEC }}
with:
Expand All @@ -94,16 +96,16 @@ jobs:
WEAVIATE_VERSION:
["1.32.24", "1.33.11", "1.34.7", "1.35.2", "1.36.9", "1.37.0-rc.0"]
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

- uses: actions/cache/restore@v4
- uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
env:
DOCKER_CACHE_KEY: docker-images-${{ env.IMG2VEC }}-${{ env.MINIO }}-${{ env.MODEL2VEC }}
with:
path: ${{ env.DOCKER_IMAGES_TAR }}
key: ${{ env.DOCKER_CACHE_KEY }}
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@v1.3.1
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
with:
tool-cache: false
android: true
Expand All @@ -117,7 +119,7 @@ jobs:
if [ -f $DOCKER_IMAGES_TAR ]; then
docker load -i $DOCKER_IMAGES_TAR
fi
- uses: actions/setup-java@v4
- uses: actions/setup-java@c1e323688fd81a25caa38c78aa6df2d33d3e20d9 # v4
name: Setup JDK
with:
distribution: "zulu"
Expand Down
49 changes: 49 additions & 0 deletions tools/linter_actions_pinned.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/usr/bin/env bash
# Lint GitHub Actions workflow files to ensure all external actions are pinned to SHA hashes.
# Usage: bash tools/linter_actions_pinned.sh

set -euo pipefail

ERRORS=0

for workflow in .github/workflows/*.yaml .github/workflows/*.yml; do
[ -f "$workflow" ] || continue

while IFS= read -r line; do
lineno=$(echo "$line" | cut -d: -f1)
content=$(echo "$line" | cut -d: -f2-)

# Extract the action reference (everything after "uses:")
action_ref=$(echo "$content" | sed -n 's/.*uses:[[:space:]]*//p' | xargs)

# Skip local actions (starting with ./)
if [[ "$action_ref" == ./* ]]; then
continue
fi

# Extract the version part (after @, before space or # comment)
version=$(echo "$action_ref" | sed -n 's/.*@\([^ #]*\).*/\1/p')

if [ -z "$version" ]; then
echo "::error file=${workflow},line=${lineno}::Action missing version pin: ${action_ref}"
ERRORS=$((ERRORS + 1))
continue
fi

# Check that the version is a 40-character hex SHA
if ! echo "$version" | grep -qE '^[0-9a-f]{40}$'; then
echo "::error file=${workflow},line=${lineno}::Action not pinned to SHA: ${action_ref} (version: ${version})"
ERRORS=$((ERRORS + 1))
fi
done < <(grep -n 'uses:' "$workflow")
done

if [ "$ERRORS" -gt 0 ]; then
echo ""
echo "ERROR: Found ${ERRORS} action(s) not pinned to a SHA hash."
echo "Replace tag references (e.g., @v5) with the full commit SHA (e.g., @93cb6ef...)"
echo "Preserve the tag as a comment: uses: actions/checkout@<SHA> # v5"
exit 1
fi

echo "All GitHub Actions are pinned to SHA hashes."
143 changes: 143 additions & 0 deletions tools/linter_hidden_unicode.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#!/usr/bin/env bash
# Lint for hidden/invisible Unicode characters in diffs (trojan-source attack prevention).
# Requires Perl (pre-installed on GitHub Actions Ubuntu runners).
#
# Usage:
# bash tools/linter_hidden_unicode.sh --stdin # read diff from stdin (CI mode)
# bash tools/linter_hidden_unicode.sh <base-ref> # diff against a base ref
# bash tools/linter_hidden_unicode.sh # diff staged changes (git diff --cached)

set -euo pipefail

# Binary file extensions to skip
BINARY_PATTERN='\.(png|jpg|jpeg|gif|ico|svg|woff2?|ttf|eot|otf|zip|tar|gz|bz2|xz|7z|rar|pdf|dll|exe|so|dylib|o|obj|class|jar|war|pyc|pyo|wasm|bin|dat|db|sqlite|nupkg|snupkg)$'

get_diff() {
if [[ "${1:-}" == "--stdin" ]]; then
cat
elif [[ -n "${1:-}" ]]; then
# Validate ref argument to prevent command injection
if ! [[ "$1" =~ ^[a-zA-Z0-9._/-]+$ ]]; then
echo "ERROR: Invalid ref argument: $1" >&2
exit 1
fi
if ! git rev-parse --verify "$1" >/dev/null 2>&1; then
echo "ERROR: Git ref not found: $1" >&2
exit 2
fi
git diff "$1"
else
git diff --cached
fi
}

# Perl script that:
# 1. Tracks current file from diff headers
# 2. Skips binary files
# 3. Scans only added lines (starting with +, excluding +++ headers)
# 4. Detects ~30+ categories of invisible/suspicious Unicode characters
PERL_SCRIPT='
use utf8;
use strict;
use warnings;

sub escape_property {
my ($s) = @_;
$s =~ s/%/%25/g;
$s =~ s/\r/%0D/g;
$s =~ s/\n/%0A/g;
$s =~ s/:/%3A/g;
$s =~ s/,/%2C/g;
return $s;
}

sub escape_message {
my ($s) = @_;
$s =~ s/%/%25/g;
$s =~ s/\r/%0D/g;
$s =~ s/\n/%0A/g;
return $s;
}

my $file = "";
my $line_in_file = 0;
my $errors = 0;
my $in_binary = 0;
my $binary_pattern = qr/'"$BINARY_PATTERN"'/i;

while (<STDIN>) {
chomp;

# Track file from diff headers
if (/^\+\+\+ b\/(.+)$/) {
$file = $1;
$line_in_file = 0;
$in_binary = ($file =~ $binary_pattern) ? 1 : 0;
next;
}

# Skip binary file markers
if (/^Binary files/) {
$in_binary = 1;
next;
}

# Track hunk headers for line numbers
if (/^@@ -\d+(?:,\d+)? \+(\d+)/) {
$line_in_file = $1 - 1;
next;
}

# Count lines in the new file
if (/^\+/ || /^ /) {
$line_in_file++;
}

# Only scan added lines, skip binary files
next if $in_binary;
next unless /^\+/;
next if /^\+\+\+ (?:$|b\/|\/dev\/null)/;

# Remove the leading + for scanning
my $content = substr($_, 1);

# Check for suspicious invisible Unicode characters:
# - Bidi overrides and isolates (U+200E-200F, U+202A-202E, U+2066-2069)
# - Zero-width characters (U+200B-200D, U+2060)
# - Byte order mark mid-line (U+FEFF)
# - Soft hyphen (U+00AD)
# - Mongolian vowel separator (U+180E)
# - Combining grapheme joiner (U+034F)
# - Function application and invisible operators (U+2061-2064)
# - Hangul fillers (U+115F, U+1160, U+3164, U+FFA0)
# - Interlinear annotation (U+FFF9-FFFB)
# - Object replacement / replacement char (U+FFFC-FFFD) -- FFFD is sometimes legitimate
# - Unicode tag block (U+E0001, U+E0020-E007F)
# - Deprecated format chars (U+206A-206F)
if ($content =~ /([\x{00AD}\x{034F}\x{115F}\x{1160}\x{180E}\x{200B}-\x{200F}\x{202A}-\x{202E}\x{2060}-\x{2064}\x{2066}-\x{2069}\x{206A}-\x{206F}\x{3164}\x{FE00}-\x{FE0F}\x{FEFF}\x{FFA0}\x{FFF9}-\x{FFFB}\x{E0001}\x{E0020}-\x{E007F}])/) {
my $char = $1;
my $codepoint = sprintf("U+%04X", ord($char));
my $col = $-[1] + 1;

if ($ENV{GITHUB_ACTIONS}) {
my $efile = escape_property($file);
my $emsg = escape_message("Hidden Unicode character ${codepoint} found");
print "::error file=${efile},line=${line_in_file},col=${col}::${emsg}\n";
} else {
print "ERROR: $file:$line_in_file:$col - Hidden Unicode character $codepoint found\n";
}
$errors++;
}
}

if ($errors > 0) {
print "\nFound $errors hidden Unicode character(s) in added lines.\n";
print "These may indicate a trojan-source attack. See https://trojansource.codes/\n";
exit 1;
} else {
print "No hidden Unicode characters detected.\n";
exit 0;
}
'

get_diff "$@" | perl -CS -e "$PERL_SCRIPT"
Loading