Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
88fad29
feat(sca): migrate SCA Reachability to method-level symbol database f…
jandro996 Jun 10, 2026
5741a6e
Merge branch 'master' into alejandro.gonzalez/sca-reachability-add-sy…
jandro996 Jun 10, 2026
68478a1
fix(sca): handle JARs without pom.properties in version resolution
jandro996 Jun 10, 2026
8db9729
nit(sca): inline single-line findArtifactInUrl wrapper
jandro996 Jun 10, 2026
60d05f0
fix(sca): guard against null dep.name in matchVersion fallback
jandro996 Jun 10, 2026
a4fbf95
nit(sca): spotlessApply
jandro996 Jun 10, 2026
cc9f261
fix(sca): pre-warm jarCache before retransformClasses to avoid deadlock
jandro996 Jun 17, 2026
17f7a5b
fix(sca): use cache-only lookup in processClass to prevent JAR I/O un…
jandro996 Jun 17, 2026
32d7a87
revert(sca): remove local-only spring-web CVE entry from sca_cves.json
jandro996 Jun 17, 2026
3eb7707
fix(sca): prevent hit re-emission when DependencyService re-detects t…
jandro996 Jun 17, 2026
5d06a89
Merge branch 'master' into alejandro.gonzalez/sca-reachability-add-sy…
jandro996 Jun 17, 2026
8b36942
fix(sca): pre-warm classpathArtifactCache before retransformClasses t…
jandro996 Jun 17, 2026
2aa7b87
Merge branch 'master' into alejandro.gonzalez/sca-reachability-add-sy…
jandro996 Jun 18, 2026
752fb4f
Merge branch 'master' into alejandro.gonzalez/sca-reachability-add-sy…
jandro996 Jun 18, 2026
4fd274b
fix(sca): replace junrar:7.5.5 test dependency with minimal stubs
jandro996 Jun 22, 2026
173414e
Merge branch 'master' into alejandro.gonzalez/sca-reachability-add-sy…
jandro996 Jun 22, 2026
85233a5
Merge branch 'master' into alejandro.gonzalez/sca-reachability-add-sy…
jandro996 Jun 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ import org.gradle.api.Plugin
import org.gradle.api.Project

/**
* Registers the [generateScaCvesJson] task that downloads GHSA enrichments from
* `sca-reachability-database` and generates `sca_cves.json` bundled in the appsec JAR.
* Registers the [generateScaCvesJson] task that downloads GHSA symbol files from
* `sca-reachability-symbols` and generates `sca_cves.json` bundled in the appsec JAR.
*
* This is a **temporary** build-time approach. The symbol database will be delivered
* via Remote Config in a future iteration, at which point this plugin and the committed
Expand All @@ -25,7 +25,7 @@ class ScaEnrichmentsPlugin : Plugin<Project> {

companion object {
private const val SCA_ENRICHMENTS_API_DEFAULT =
"https://api.github.com/repos/DataDog/sca-reachability-database/contents/enrichments"
"https://api.github.com/repos/DataDog/sca-reachability-symbols/contents/jvm"
}

override fun apply(project: Project) {
Expand All @@ -34,7 +34,7 @@ class ScaEnrichmentsPlugin : Plugin<Project> {
val generateTask =
project.tasks.register("generateScaCvesJson") {
description =
"Downloads GHSA enrichments from sca-reachability-database and updates " +
"Downloads GHSA symbol files from sca-reachability-symbols and updates " +
"src/main/resources/sca_cves.json. Run with -PrefreshSca to force a refresh. " +
"Override the source URL with -PscaEnrichmentsUrl=<url>. " +
"sca_cves.json is committed to the repo so CI does not need network access."
Expand All @@ -51,27 +51,26 @@ class ScaEnrichmentsPlugin : Plugin<Project> {
val apiUrl =
project.findProperty("scaEnrichmentsUrl")?.toString() ?: SCA_ENRICHMENTS_API_DEFAULT

logger.lifecycle("Fetching GHSA enrichment index from $apiUrl ...")
logger.lifecycle("Fetching GHSA symbol index from $apiUrl ...")
@Suppress("UNCHECKED_CAST")
val fileList = githubFetch(apiUrl, token) as List<Map<String, Any>>
val ghsaFiles =
fileList.filter {
it["name"]?.toString()?.endsWith(".json") == true && it["type"] == "file"
}
logger.lifecycle("Found ${ghsaFiles.size} enrichment files")
logger.lifecycle("Found ${ghsaFiles.size} symbol files")

val entries = mutableListOf<Any>()
ghsaFiles.forEach { fileInfo ->
val ghsaId = fileInfo["name"]!!.toString().removeSuffix(".json")
val rawContent = githubFetchRaw(fileInfo["download_url"]!!.toString(), token)
entries.addAll(GhsaEnrichmentParser.parse(ghsaId, rawContent))
entries.addAll(GhsaEnrichmentParser.parse(rawContent))
}

outputFile.writeText(JsonOutput.toJson(mapOf("version" to 1, "entries" to entries)))
logger.lifecycle(
"sca_cves.json: ${entries.size} entries from ${ghsaFiles.size} GHSA files")
logger.lifecycle(
"Remember to commit src/main/resources/sca_cves.json after updating the database.")
"Remember to commit src/main/resources/sca_cves.json after updating the symbols.")
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,78 +4,90 @@ import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.ObjectMapper

/**
* Parses GHSA enrichment JSON files from the sca-reachability-database into the internal
* Parses GHSA symbol JSON files from the sca-reachability-symbols repository into the internal
* sca_cves.json format consumed by SCA Reachability at runtime.
*
* Key transformations:
* - Filters entries to JVM language only
* - Expands multi-package GHSA entries into N records (one per Maven artifact), because
* each artifact may have different version ranges for the same set of class symbols
* - Filters entries to Maven ecosystem only (lang == "maven")
* - Each array entry maps 1:1 to one sca_cves.json record (one dependency_name per entry)
* - Parses target strings "package:ClassName.method" using lastIndexOf to split at the colon
* and lastIndexOf on the class+method part to split class from method
* - Converts class FQNs to JVM internal format (slashes) so the ClassFileTransformer
* can do O(1) map lookups without per-class string conversion
* - Sets method=null for all symbols — field exists for forward compatibility when the
* database adds method-level symbols in the future (see APPSEC-62260)
* can do O(1) map lookups without per-class string conversion at runtime
*/
object GhsaEnrichmentParser {

private val mapper = ObjectMapper()

/**
* Parses a single GHSA enrichment file.
* Parses a single GHSA symbols file.
*
* @param ghsaId the GHSA identifier (e.g. "GHSA-645p-88qh-w398"), used as vuln_id
* @param jsonContent the raw JSON content of the enrichment file
* @param jsonContent the raw JSON content of the symbols file
* @return list of sca_cves.json entry maps, one per affected Maven artifact
*/
fun parse(ghsaId: String, jsonContent: String): List<Map<String, Any?>> {
fun parse(jsonContent: String): List<Map<String, Any?>> {
val root = mapper.readTree(jsonContent)
require(root.isArray) { "GHSA enrichment file $ghsaId must be a JSON array, got ${root.nodeType}" }
require(root.isArray) { "GHSA enrichment file must be a JSON array, got ${root.nodeType}" }

val entries = mutableListOf<Map<String, Any?>>()

for (entry in root) {
if (entry.path("language").asText() != "jvm") continue
if (entry.path("lang").asText() != "maven") continue

val symbols = extractSymbols(entry)
if (symbols.isEmpty()) continue
val ghsaId =
entry.path("vulnerability").path("id").asText().takeIf { it.isNotEmpty() } ?: continue
val artifact = entry.path("dependency_name").asText().takeIf { it.isNotEmpty() } ?: continue
val versionRanges = entry.path("package_versions").map { it.asText() }

for (pkg in entry.path("package")) {
if (pkg.path("ecosystem").asText() != "maven") continue
val artifact = pkg.path("name").asText().takeIf { it.isNotEmpty() } ?: continue
val versionRanges = pkg.path("version_range").map { it.asText() }
val symbols = extractTargets(entry)
if (symbols.isEmpty()) continue

entries += mapOf(
"vuln_id" to ghsaId,
"artifact" to artifact,
"version_ranges" to versionRanges,
"symbols" to symbols,
)
}
entries +=
mapOf(
"vuln_id" to ghsaId,
"artifact" to artifact,
"version_ranges" to versionRanges,
"symbols" to symbols,
)
}

return entries
}

private fun extractSymbols(entry: JsonNode): List<Map<String, Any?>> {
/**
* Parses the targets array from a GHSA entry.
*
* Each target string has the format "package:ClassName.method". Parsing uses
* lastIndexOf(':') to split package from class+method, then lastIndexOf('.') on the
* class+method part to split class name from method name. Malformed targets (missing ':'
* or missing '.' after ':') are silently skipped.
*
* Targets within one entry may come from different packages; no assumption is made that
* all targets share a common package prefix.
*
* TODO(APPSEC-62260): if the database adds inner-class targets (e.g. "pkg:Outer.Inner.method"),
* the current replace('.', '/') will produce pkg/Outer/Inner instead of the correct
* pkg/Outer$Inner. Update when the database team defines the inner-class format.
*/
private fun extractTargets(entry: JsonNode): List<Map<String, Any?>> {
val symbols = mutableListOf<Map<String, Any?>>()
val imports = entry.path("ecosystem_specific").path("imports")
if (imports.isMissingNode || !imports.isArray) return symbols
val targets = entry.path("targets")
if (targets.isMissingNode || !targets.isArray) return symbols

for (importGroup in imports) {
for (symbol in importGroup.path("symbols")) {
if (symbol.path("type").asText() != "class") continue
val pkg = symbol.path("value").asText().takeIf { it.isNotEmpty() } ?: continue
val name = symbol.path("name").asText().takeIf { it.isNotEmpty() } ?: continue
for (target in targets) {
val t = target.asText().takeIf { it.isNotEmpty() } ?: continue
val colonIdx = t.lastIndexOf(':')
if (colonIdx < 0) continue
val pkg = t.substring(0, colonIdx)
val classAndMethod = t.substring(colonIdx + 1)
val dotIdx = classAndMethod.lastIndexOf('.')
if (dotIdx < 0) continue
val simpleClass = classAndMethod.substring(0, dotIdx)
val method = classAndMethod.substring(dotIdx + 1)
if (pkg.isEmpty() || simpleClass.isEmpty() || method.isEmpty()) continue

// JVM internal format (slashes) — avoids per-class conversion in the
// ClassFileTransformer hot path at runtime.
// TODO(APPSEC-62260): verify inner-class format when database adds method-level symbols.
// If GHSA uses dot notation for inner classes (e.g. name="Outer.Inner"), the replace below
// produces com/example/Outer/Inner instead of the correct com/example/Outer$Inner.
// When the database team defines the format, update this to handle the $ separator.
val internalName = "$pkg.$name".replace('.', '/')
symbols += mapOf("class" to internalName, "method" to null)
}
val internalName = "$pkg.$simpleClass".replace('.', '/')
symbols += mapOf("class" to internalName, "method" to method)
}

return symbols
Expand Down
Loading