function sqrt(uint256 x) internal pure returns (uint256 z) {
assembly {
z := 181
let r := shl(7, lt(87112285931760246646623899502532662132735, x))
r := or(r, shl(6, lt(4722366482869645213695, shr(r, x))))
r := or(r, shl(5, lt(1099511627775, shr(r, x))))
@>
@> r := or(r, shl(4, lt(16777002, shr(r, x))))
z := shl(shr(1, r), z)
z := shr(18, mul(z, add(shr(r, x), 65536)))
z := shr(1, add(z, div(x, z)))
z := shr(1, add(z, div(x, z)))
z := shr(1, add(z, div(x, z)))
z := shr(1, add(z, div(x, z)))
z := shr(1, add(z, div(x, z)))
z := shr(1, add(z, div(x, z)))
z := shr(1, add(z, div(x, z)))
z := sub(z, lt(div(x, z), z))
}
}
function sqrt(uint256 x) internal pure returns (uint256 z) {
/// @solidity memory-safe-assembly
assembly {
z := 181
// This segment is to get a reasonable initial estimate for the Babylonian method. With a bad
// start, the correct # of bits increases ~linearly each iteration instead of ~quadratically.
let r := shl(7, lt(87112285931760246646623899502532662132735, x))
r := or(r, shl(6, lt(4722366482869645213695, shr(r, x))))
r := or(r, shl(5, lt(1099511627775, shr(r, x))))
// Correct: 16777215 0xffffff
- r := or(r, shl(4, lt(16777002, shr(r, x))))
+ r := or(r, shl(4, lt(16777215, shr(r, x))))
z := shl(shr(1, r), z)
// There is no overflow risk here since `y < 2**136` after the first branch above.
z := shr(18, mul(z, add(shr(r, x), 65536))) // A `mul()` is saved from starting `z` at 181.
// Given the worst case multiplicative error of 2.84 above, 7 iterations should be enough.
z := shr(1, add(z, div(x, z)))
z := shr(1, add(z, div(x, z)))
z := shr(1, add(z, div(x, z)))
z := shr(1, add(z, div(x, z)))
z := shr(1, add(z, div(x, z)))
z := shr(1, add(z, div(x, z)))
z := shr(1, add(z, div(x, z)))
// If `x+1` is a perfect square, the Babylonian method cycles between
// `floor(sqrt(x))` and `ceil(sqrt(x))`. This statement ensures we return floor.
// See: https://en.wikipedia.org/wiki/Integer_square_root#Using_only_integer_division
z := sub(z, lt(div(x, z), z))
}
}