SoFunction
Updated on 2025-03-10

PHP source code exploit instructions

When we need to divide an array into an array based on a certain character or string, exploit uses a very happy one, but do you know how exploit works?
First of all, it is certain that exploit will also allocate space, without a doubt.
Copy the codeThe code is as follows:

//File 1: ext/standard/
//Look at the source code of explore first
PHP_FUNCTION(explode)
{
char *str, *delim;
int str_len = 0, delim_len = 0;
long limit = LONG_MAX; /* No limit */
zval zdelim, zstr;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &delim, &delim_len, &str, &str_len, &limit) == FAILURE) {
return;
}
if (delim_len == 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
RETURN_FALSE;
}
//An array will be opened here to store the divided data
array_init(return_value);
//Because of this, we use exploit('|', ''); to become legal
if (str_len == 0) {
if (limit >= 0) {
add_next_index_stringl(return_value, "", sizeof("") - 1, 1);
}
return;
}
//The following two are to construct the original string and the splitter into a _zval_struct structure.
//ZVAL_STRINGL will allocate space~~The source code will be posted later
ZVAL_STRINGL(&zstr, str, str_len, 0);
ZVAL_STRINGL(&zdelim, delim, delim_len, 0);
//The limit value is the third parameter of the exploit allowed to be passed in the exploit, which allows positive and negative
if (limit > 1) {
php_explode(&zdelim, &zstr, return_value, limit);
} else if (limit < 0) {
php_explode_negative_limit(&zdelim, &zstr, return_value, limit);
} else {
add_index_stringl(return_value, 0, str, str_len, 1);
}
}

Copy the codeThe code is as follows:

//ZVAL_STRINGL source code:
//File 2: zend/zend_API.c
#define ZVAL_STRINGL(z, s, l, duplicate) { \
const char *__s=(s); int __l=l; \
Z_STRLEN_P(z) = __l; \
Z_STRVAL_P(z) = (duplicate?estrndup(__s, __l):(char*)__s);\
Z_TYPE_P(z) = IS_STRING; \
}
....
//estrndup is the main course:
//File 3: zend/zend_alloc.h
#define estrndup(s, length) _estrndup((s), (length) ZEND_FILE_LINE_CC ZEND_FILE_LINE_EMPTY_CC)
....
// Implementation of_estrndup: zend/zend_alloc.c
ZEND_API char *_estrndup(const char *s, uint length ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC)
{
char *p;
p = (char *) _emalloc(length+1 ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
if (UNEXPECTED(p == NULL)) {
return p;
}
memcpy(p, s, length); //Allocate space
p[length] = 0;
return p;
}
//In addition, ZVAL_STRING used in substr and strrchr strstr also uses the appeal implementation

The following is to analyze the call based on the third parameter limit of exploit: the condition corresponds to the last three lines in exploit, and the difference between limit conditions
Note: When limit is default (not passed), its default value is LONG_MAX, which is the case of branch 1.
1、limit > 1 :
Call the php_explode method, which can also be found in ext/standard/, and appears immediately above the exploit implementation (so it is very convenient to find the method from this file in this function, almost no column is located immediately above the function ^_^),
Copy the codeThe code is as follows:

PHPAPI void php_explode(zval *delim, zval *str, zval *return_value, long limit)
{
char *p1, *p2, *endp;
//The first thing I get is the pointer at the end of the source string
endp = Z_STRVAL_P(str) + Z_STRLEN_P(str);
//Record start position
p1 = Z_STRVAL_P(str);
//The following is to get the position of the splitter in str. You can see that this method is also used in strrpos and strpos to locate it.
p2 = php_memnstr(Z_STRVAL_P(str), Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp);
if (p2 == NULL) {
//Because of this, when we call exploit('|', 'abc'); is legal, and the one that comes out is array(0 => 'abc')
add_next_index_stringl(return_value, p1, Z_STRLEN_P(str), 1);
} else {
//Loop in turn to get the position of the next separator until the end
do {
//The substring obtained (the previous position to the middle of this position, the first time the previous position is the beginning
add_next_index_stringl(return_value, p1, p2 - p1, 1);
//Position to the position of the delimiter position p2 + the position of the length of the delimiter
//For example, the separator = '|', original string = 'ab|c', p2 = 2, then p1=2+1=3
p1 = p2 + Z_STRLEN_P(delim);
} while ((p2 = php_memnstr(p1, Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp)) != NULL &&
--limit > 1);
//Put the string after the last separator into the result array
//explode('|', 'avc|sdf'); => array(0 => 'avc', 1= > 'sdf')
if (p1 <= endp)
add_next_index_stringl(return_value, p1, endp-p1, 1);
}
}

2、limit < 0 :
Call php_explode_negative_limit method
Copy the codeThe code is as follows:

PHPAPI void php_explode_negative_limit(zval *delim, zval *str, zval *return_value, long limit)
{
#define EXPLODE_ALLOC_STEP 64
char *p1, *p2, *endp;
endp = Z_STRVAL_P(str) + Z_STRLEN_P(str);
p1 = Z_STRVAL_P(str);
p2 = php_memnstr(Z_STRVAL_P(str), Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp);
if (p2 == NULL) {
//It is not processed here, so exploit('|', 'abc', -1) becomes illegal and cannot obtain any value
/*
do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0
by doing nothing we return empty array
*/
} else {
int allocated = EXPLODE_ALLOC_STEP, found = 0;
long i, to_return;
char **positions = emalloc(allocated * sizeof(char *));
//Note the declaration of positions here. This array is used to save the read locations of all substrings
positions[found++] = p1; //Of course, the starting position still needs to be saved
//The following two loops, the first one is to loop through all the delimiter positions that appear in the string, and save the next substring to read the position.
do {
if (found >= allocated) {
allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */
positions = erealloc(positions, allocated*sizeof(char *));
}
positions[found++] = p1 = p2 + Z_STRLEN_P(delim);
} while ((p2 = php_memnstr(p1, Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp)) != NULL);
//This is what is the result returned from the array that will start reading from which substring
to_return = limit + found;
/* limit is at least -1 therefore no need of bounds checking : i will be always less than found */
for (i = 0;i < to_return;i++) { /* this checks also for to_return > 0 */
add_next_index_stringl(return_value, positions[i],
(positions[i+1] - Z_STRLEN_P(delim)) - positions[i],
1
);
}
efree(positions);//It's very important, freeing memory
}
#undef EXPLODE_ALLOC_STEP
}

3、limit = 1 or limit = 0 :
When all the first and second conditions are not satisfied, this branch is entered. This branch is very simple to put the source string into the output array, exploit('|', 'avc|sd', 1) or exploit('|', 'avc|sd', 0) will all return array(0 => 'avc|sd');
Copy the codeThe code is as follows:

//add_index_stringl source code
//File 4: zend/zend_API.c
ZEND_API int add_next_index_stringl(zval *arg, const char *str, uint length, int duplicate) /* {{{ */
{
zval *tmp;
MAKE_STD_ZVAL(tmp);
ZVAL_STRINGL(tmp, str, length, duplicate);
return zend_hash_next_index_insert(Z_ARRVAL_P(arg), &tmp, sizeof(zval *), NULL);
}
//zend_hash_next_index_insert
//zend/zend_hash.h
#define zend_hash_next_index_insert(ht, pData, nDataSize, pDest) \
_zend_hash_index_update_or_next_insert(ht, 0, pData, nDataSize, pDest, HASH_NEXT_INSERT ZEND_FILE_LINE_CC)
//zend/zend_hash.c
///It's too long~~~~ I won't stick it anymore

visible (excluding allocated spaces),
When limit>1, the efficiency is O(N) [N is limit value].
When limit<0, the efficiency is O(N+M) [N is the limit value, M is the number of times the splitter appears],
When limit=1 or limit=0, the efficiency is O(1)