Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions src/wp-includes/compat.php
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,81 @@ function array_last( array $array ) { // phpcs:ignore Universal.NamingConvention
}
}

if ( ! function_exists( 'mb_trim' ) ) {
/**
* Polyfill for `mb_trim()` function added in PHP 8.4.
*
* Trims whitespace from the beginning and end of a string.
*
* @since 6.9.0
*
* @param string $string The string to trim.
* @param string|null $characters Optional. The characters to trim from the string.
* Without the second parameter, mb_trim() will strip these characters:
* - " " (Unicode U+0020), an ordinary space.
* - "\t" (Unicode U+0009), a tab.
* - "\n" (Unicode U+000A), a new line (line feed).
* - "\r" (Unicode U+000D), a carriage return.
* - "\0" (Unicode U+0000), the NUL-byte.
* - "\v" (Unicode U+000B), a vertical tab.
* - "\f" (Unicode U+000C), a form feed.
* - "\u00A0" (Unicode U+00A0), a NO-BREAK SPACE.
* - "\u1680" (Unicode U+1680), an OGHAM SPACE MARK.
* - "\u2000" (Unicode U+2000), an EN QUAD.
* - "\u2001" (Unicode U+2001), an EM QUAD.
* - "\u2002" (Unicode U+2002), an EN SPACE.
* - "\u2003" (Unicode U+2003), an EM SPACE.
* - "\u2004" (Unicode U+2004), a THREE-PER-EM SPACE.
* - "\u2005" (Unicode U+2005), a FOUR-PER-EM SPACE.
* - "\u2006" (Unicode U+2006), a SIX-PER-EM SPACE.
* - "\u2007" (Unicode U+2007), a FIGURE SPACE.
* - "\u2008" (Unicode U+2008), a PUNCTUATION SPACE.
* - "\u2009" (Unicode U+2009), a THIN SPACE.
* - "\u200A" (Unicode U+200A), a HAIR SPACE.
* - "\u2028" (Unicode U+2028), a LINE SEPARATOR.
* - "\u2029" (Unicode U+2029), a PARAGRAPH SEPARATOR.
* - "\u202F" (Unicode U+202F), a NARROW NO-BREAK SPACE.
* - "\u205F" (Unicode U+205F), a MEDIUM MATHEMATICAL SPACE.
* - "\u3000" (Unicode U+3000), an IDEOGRAPHIC SPACE.
* - "\u0085" (Unicode U+0085), a NEXT LINE (NEL).
* - "\u180E" (Unicode U+180E), a MONGOLIAN VOWEL SEPARATOR.
* @param string|null $encoding Optional. The encoding parameter is the character encoding. If it is omitted or null, the internal character encoding value will be used.
* @return string The trimmed string.
*/
function mb_trim( string $str, ?string $characters = null, ?string $encoding = null ) {
if ( is_null( $characters ) ) {
$characters = " \t\n\r\0\v\f\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}";
}

if ( '' === $characters ) {
return $str;
}

/*
* Keep this polyfill UTF-8-only: if a non-UTF-8 encoding is explicitly
* requested, bail out unchanged instead of attempting lossy conversions.
*/
if ( ! is_null( $encoding ) && ! _is_utf8_charset( $encoding ) ) {
wp_trigger_error(
__FUNCTION__,
'mb_trim() polyfill only supports UTF-8 encoding. The provided encoding "' . $encoding . '" is not supported.',
E_USER_WARNING
);
return $str;
}

// Use preg_replace to trim the characters from both ends of the string.
$pattern = '/^[' . preg_quote( $characters, '/' ) . ']+|[' . preg_quote( $characters, '/' ) . ']+$/uD';
$trimmed_string = preg_replace( $pattern, '', $str );

if ( false === $trimmed_string || null === $trimmed_string ) {
return $str; // If preg_replace fails, return the original string.
}

return $trimmed_string;
}
}

// IMAGETYPE_AVIF constant is only defined in PHP 8.x or later.
if ( ! defined( 'IMAGETYPE_AVIF' ) ) {
define( 'IMAGETYPE_AVIF', 19 );
Expand Down
29 changes: 29 additions & 0 deletions src/wp-includes/formatting.php
Original file line number Diff line number Diff line change
Expand Up @@ -6294,3 +6294,32 @@ function maybe_hash_hex_color( $color ) {

return $color;
}

/**
* Global variable containing the characters to trim from the beginning and end of a string.
*
* This variable is used by the `js_trim()` function to define which characters
* should be trimmed from a string. It includes common whitespace characters
* as well as some Unicode whitespace characters supported by JavaScript.
*
* @since 6.9.0
*
* @var string
*/
$js_trimmables = "\u{0009}\u{000A}\u{000B}\u{000C}\u{000D}\u{0020}\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{FEFF}";

/**
* Trims whitespace from the beginning and end of a string.
*
* This function is similar to `trim()`, but it uses a custom set of characters
* defined in the global `$js_trimmables` variable.
*
* @since 6.9.0
*
* @param string $string The string to trim.
* @return string The trimmed string.
*/
function js_trim( $string ) {
global $js_trimmables;
return mb_trim( $string, $js_trimmables, 'UTF-8' );
}
125 changes: 125 additions & 0 deletions tests/phpunit/tests/compat/mbTrim.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
<?php

/**
* @group compat
*
* @covers ::mb_trim
*/
class Tests_Compat_mbTrim extends WP_UnitTestCase {

/**
* @ticket 63804
*
* Test that mb_trim() is always available (either from PHP or WP).
*/
public function test_mb_trim_availability(): void {
$this->assertTrue( function_exists( 'mb_trim' ) );
}

/**
* @ticket 63804
*
* @dataProvider data_mb_trim
*
* @param string $input The input string to be trimmed.
* @param string $expected The expected trimmed result.
* @param string|null $characters Optional. The characters to trim. Default null (whitespace).
* @param string|null $encoding Optional. The character encoding. Default null (internal encoding).
*/
public function test_mb_trim( $input, $expected, $characters = null, $encoding = null ): void {
$this->assertSame(
$expected,
mb_trim( $input, $characters, $encoding )
);
}

/**
* Data provider for mb_trim tests.
*
* @return array[]
*/
public function data_mb_trim(): array {
return array(
// Basic ASCII whitespace.
array( ' hello ', 'hello' ),
array( "\t\n\rhello\n\r\t", 'hello' ),
// Unicode whitespace.
array( "\u{00A0}hello\u{00A0}", 'hello' ),
array( "\u{3000}hello\u{3000}", 'hello' ),
array( "\u{00A0}\u{3000} hello \u{3000}\u{00A0}", 'hello' ),
// Custom characters.
array( 'xxhelloxx', 'hello', 'x' ),
array( 'xyhelloyx', 'hello', 'xy' ),
// No trimming needed.
array( 'hello', 'hello' ),
// Empty string.
array( '', '' ),
// With encoding.
array( ' hello ', 'hello', null, 'UTF-8' ),
// Null characters.
array( "\0hello\0", 'hello' ),
// Vertical tab and form feed.
array( "\v\fhello\f\v", 'hello' ),
);
}

/**
* @ticket 63804
*
* Tests that passing a non-UTF-8 encoding to the WP polyfill triggers a
* warning and returns the original string unchanged, rather than attempting
* a lossy re-encoding that could silently corrupt data.
*
* Note: when PHP's native mb_trim() is available this test is skipped,
* because the native function does handle other encodings (via code-point
* boundary iteration, not re-encoding) and no warning is issued.
*
* @dataProvider data_mb_trim_non_utf8
*
* @param string $input The input string to be trimmed.
* @param string $encoding The non-UTF-8 character encoding to pass.
*/
public function test_mb_trim_non_utf8_encoding_bails_with_warning( string $input, string $encoding ): void {
if ( extension_loaded( 'mbstring' ) && version_compare( PHP_VERSION, '8.4', '>=' ) ) {
$this->markTestSkipped( 'Native mb_trim() is available; polyfill bail-out behaviour does not apply.' );
}

$this->expectException( 'WP_Exception' );
$this->expectExceptionMessage( 'mb_trim() polyfill only supports UTF-8 encoding' );

// wp_trigger_error() raises E_USER_WARNING; convert it to an exception so
// PHPUnit can catch it cleanly.
set_error_handler(
static function ( int $errno, string $errstr ) use ( $encoding ): bool {
if ( E_USER_WARNING === $errno ) {
throw new WP_Exception( $errstr );
}
return false;
},
E_USER_WARNING
);

try {
$result = mb_trim( $input, null, $encoding );

// If wp_trigger_error() did not throw (e.g. errors are suppressed),
// assert that the original string is returned unchanged.
$this->assertSame( $input, $result, 'Polyfill should return the original string unchanged for unsupported encodings.' );
} finally {
restore_error_handler();
}
}

/**
* Data provider for non-UTF-8 encoding bail-out tests.
*
* @return array[]
*/
public function data_mb_trim_non_utf8(): array {
return array(
'ISO-8859-1 latin string' => array( ' café ', 'ISO-8859-1' ),
'SJIS japanese string' => array( ' test ', 'SJIS' ),
'Windows-1252 string' => array( ' hello ', 'Windows-1252' ),
);
}
}
55 changes: 55 additions & 0 deletions tests/phpunit/tests/formatting/jsTrim.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
<?php

/**
* @group formatting
*
* @covers ::js_trim
*/
class Tests_Formatting_JsTrim extends WP_UnitTestCase {

/**
* @ticket 63804
*
* Test that js_trim() is always available (either from PHP or WP).
*/
public function test_js_trim_availability(): void {
$this->assertTrue( function_exists( 'js_trim' ) );
}

/**
* @ticket 63804
*
* @dataProvider data_js_trim
*
* @param string $input The input string to be trimmed.
* @param string $expected The expected trimmed result.
*/
public function test_js_trim( $input, $expected ): void {
$this->assertSame( $expected, js_trim( $input ) );
}

/**
* Data provider for js_trim tests.
*
* @return array[]
*/
public function data_js_trim(): array {
return array(
// Basic ASCII whitespace.
array( ' hello ', 'hello' ),
array( "\t\n\rhello\n\r\t", 'hello' ),
// Unicode whitespace.
array( "\u{00A0}hello\u{00A0}", 'hello' ),
array( "\u{3000}hello\u{3000}", 'hello' ),
array( "\u{00A0}\u{3000} hello \u{3000}\u{00A0}", 'hello' ),
// Null characters should not be trimmed by js_trim().
array( "\0hello\0", "\0hello\0" ),
// Vertical tab and form feed are trimmed.
array( "\v\fhello\f\v", 'hello' ),
// No trimming needed.
array( 'hello', 'hello' ),
// Empty string.
array( '', '' ),
);
}
}
Loading