diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index e55e8c2cea02b..a4f92eb4ef078 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -1688,6 +1688,59 @@ object Unidoc { .map(_.filterNot(_.data.getCanonicalPath.contains("connect-shims"))) } + // genjavadoc emits top-level package-private Scala types (`private` or `private[x]`, e.g. + // `private[spark] trait Foo` or a bare `private class Bar`) as *public* Java stubs even with + // `-P:genjavadoc:strictVisibility=true`, because a top-level package-private type compiles to a + // JVM-public symbol, and the Javadoc `-public` option cannot drop a stub that really is public. + // ScalaDoc honors the qualifier and hides such types, so we drop their stubs here to keep the + // published Java API doc aligned with the Scala one. A stub `/target/java//.java` + // is dropped iff EVERY top-level Scala declaration of `` in that package is `private` or + // `private[...]`; a public class with a private companion object (e.g. `SparkConf`) is kept, since + // the class itself is public. The private regex tolerates other modifiers around the access + // qualifier (e.g. `final private[x] class`). + private val publicTopTypeRe = + """(?m)^(?:@\w+(?:\([^\n)]*\))?\s+)*(?:(?:final|sealed|abstract|implicit|case)\s+)*(?:class|trait|object)\s+(\w+)""".r + private val privateTopTypeRe = + """(?m)^(?:@\w+(?:\([^\n)]*\))?\s+)*(?:(?:final|sealed|abstract|implicit|case)\s+)*private(?:\[[^\]]+\])?\s+(?:(?:final|sealed|abstract|implicit|case)\s+)*(?:class|trait|object)\s+(\w+)""".r + + private def dropPackagePrivateJavaStubs(sources: Seq[Seq[File]]): Seq[Seq[File]] = { + val cache = scala.collection.mutable.Map.empty[String, (Set[String], Set[String])] + def scanPkg(dir: String): (Set[String], Set[String]) = cache.getOrElseUpdate(dir, { + val d = new File(dir) + val scalaFiles = + if (d.isDirectory) d.listFiles.filter(_.getName.endsWith(".scala")) else Array.empty[File] + val text = scalaFiles + .map(f => new String(java.nio.file.Files.readAllBytes(f.toPath), "UTF-8")) + .mkString("\n") + (publicTopTypeRe.findAllMatchIn(text).map(_.group(1)).toSet, + privateTopTypeRe.findAllMatchIn(text).map(_.group(1)).toSet) + }) + val marker = "/target/java/" + sources.map(_.filterNot { f => + val path = f.getCanonicalPath.replace('\\', '/') + val idx = path.indexOf(marker) + if (idx < 0 || !path.endsWith(".java")) { + false + } else { + val rel = path.substring(idx + marker.length) // /.java + val slash = rel.lastIndexOf('/') + if (slash < 0) { + false + } else { + val moduleRoot = path.substring(0, idx) + val pkgPath = rel.substring(0, slash) + val name = f.getName.stripSuffix(".java") + val (pub, priv) = Seq("src/main/scala", "src/main/scala-2.13") + .map(d => scanPkg(s"$moduleRoot/$d/$pkgPath")) + .foldLeft((Set.empty[String], Set.empty[String])) { + case ((p, q), (a, b)) => (p ++ a, q ++ b) + } + priv.contains(name) && !pub.contains(name) + } + } + }) + } + val unidocSourceBase = settingKey[String]("Base URL of source links in Scaladoc.") lazy val settings = BaseUnidocPlugin.projectSettings ++ @@ -1712,8 +1765,9 @@ object Unidoc { // Skip class names containing $ and some internal packages in Javadocs (JavaUnidoc / unidoc / unidocAllSources) := { - ignoreUndocumentedPackages((JavaUnidoc / unidoc / unidocAllSources).value) - .map(_.filterNot(_.getCanonicalPath.contains("org/apache/hadoop"))) + dropPackagePrivateJavaStubs( + ignoreUndocumentedPackages((JavaUnidoc / unidoc / unidocAllSources).value) + .map(_.filterNot(_.getCanonicalPath.contains("org/apache/hadoop")))) }, (JavaUnidoc / unidoc / javacOptions) := {