Picolibc Updates

Tue Mar 1 23:28:58 2022 · semihost

$ arm-none-eabi-gcc -mcpu=cortex-m7 -mfloat-abi=hard

I have been researching old terminal and X games recently, and realized
that much of the code from 'xmille' originated from the terminal game
'mille', which is part of bsdgames.

...

[The copyright and license information] has been stripped out of all
code in the xmille distribution.  Also, none of the included materials
give credit to the original author, Ken Arnold.

void * nano_calloc(malloc_size_t n, malloc_size_t elem)
{
    ptrdiff_t bytes;
    void * mem;

    if (__builtin_mul_overflow (n, elem, &bytes))
    {
    RERRNO = ENOMEM;
    return NULL;
    }
    mem = nano_malloc(bytes);
    if (mem != NULL) memset(mem, 0, bytes);
    return mem;
}

foo = malloc(n);
if (foo) memset(foo, '\0', n);

foo = calloc(n, 1);

chunk->size = foo
nano_free((char *) chunk + CHUNK_OFFSET);

nano_free((char *) chunk + CHUNK_OFFSET);

mem = nano_malloc(new_size);
if (mem) {
    memcpy(mem, old, MIN(old_size, new_size));
    nano_free(old);
}
return mem;

for (element = head; element; element = element->next)
    do stuff ...

prev = NULL;
for (element = head; element; element = element->next)
    ...
    if (found)
        break;
    ...
    prev = element

if (prev != NULL)
    prev->next = element->next;
else
    head = element->next;

for (ptr = &head; (element = *ptr); ptr = &(element->next))
    ...
    if (found)
        break;

*ptr = element->next;

for (ptr = &head; (element = *ptr); ptr = &(element->next))
    if (found)
        break;

new_element->next = element;
*ptr = new_element;

typedef struct {
    char c;
    union {
    void *p;
    double d;
    long long ll;
    size_t s;
    } u;
} align_t;

#define MALLOC_ALIGN        (offsetof(align_t, u))

int
__dtoa_engine(double x, struct dtoa *dtoa, uint8_t max_digits, uint8_t max_decimals);

int
__ftoa_engine(float x, struct ftoa *ftoa, uint8_t max_digits, uint8_t max_decimals);

double
__atod_engine(uint64_t m10, int e10);

float
__atof_engine(uint32_t m10, int e10);

   text    data     bss     dec     hex filename
  59068      44   37968   97080   17b38 snek-qemu-riscv-orig.elf
  59430      44   37968   97442   17ca2 snek-qemu-riscv-ryu.elf
    362

t = 1.0f;

/* Iterate until s1 is flat */
do {
    t = t/2.0f;
    _de_casteljau(s, s1, s2, t);
} while (!_is_flat(s1));

flatness(t) = tolerance

float       hi = 1.0f;
float       lo = 0.0f;

/* Search for an initial section of the spline which
 * is flat, but not too flat
 */
for (;;) {

    /* Average the lo and hi values for our
     * next estimate
     */
    float t = (hi + lo) / 2.0f;

    /* Split the spline at the target location
     */
    _de_casteljau(s, s1, s2, t);

    /* Compute the flatness and see if s1 is flat
     * enough
     */
    float flat = _flatness(s1);

    if (flat <= SCALE_FLAT(SNEK_DRAW_TOLERANCE)) {

        /* Stop looking when s1 is close
         * enough to the target tolerance
         */
        if (flat >= SCALE_FLAT(SNEK_DRAW_TOLERANCE - SNEK_FLAT_TOLERANCE))
            break;

        /* Flat: t is the new lower interval bound */
        lo = t;
    } else {

        /* Not flat: t is the new upper interval bound */
        hi =  t;
    }
}

ux = (3×b.x - 2×a.x - d.x)²
uy = (3×b.y - 2×a.y - d.y)²
vx = (3×c.x - 2×d.x - a.x)²
vy = (3×c.y - 2×d.y - a.y)²

flat = max(ux, vx) + max(uy, vy)

/*
 * Copyright © 2020 Keith Packard <keithp@keithp.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <math.h>

typedef float point_t[2];
typedef point_t spline_t[4];

uint64_t num_flats;
uint64_t num_points;

#define SNEK_DRAW_TOLERANCE 0.5f
#define SNEK_FLAT_TOLERANCE 0.01f

/*
 * This actually returns flatness² * 16,
 * so we need to compare against scaled values
 * using the SCALE_FLAT macro
 */
static float
_flatness(spline_t spline)
{
    /*
     * This computes the maximum deviation of the spline from a
     * straight line between the end points.
     *
     * From https://hcklbrrfnn.files.wordpress.com/2012/08/bez.pdf
     */
    float ux = 3.0f * spline[1][0] - 2.0f * spline[0][0] - spline[3][0];
    float uy = 3.0f * spline[1][1] - 2.0f * spline[0][1] - spline[3][1];
    float vx = 3.0f * spline[2][0] - 2.0f * spline[3][0] - spline[0][0];
    float vy = 3.0f * spline[2][1] - 2.0f * spline[3][1] - spline[0][1];

    ux *= ux;
    uy *= uy;
    vx *= vx;
    vy *= vy;
    if (ux < vx)
        ux = vx;
    if (uy < vy)
        uy = vy;
    ++num_flats;

    /*
     *If we wanted to return the true flatness, we'd use:
     *
     * return sqrtf((ux + uy)/16.0f)
     */
    return ux + uy;
}

/* Convert constants to values usable with _flatness() */
#define SCALE_FLAT(f)   ((f) * (f) * 16.0f)

/*
 * Linear interpolate from a to b using distance t (0 <= t <= 1)
 */
static void
_lerp (point_t a, point_t b, point_t r, float t)
{
    int i;
    for (i = 0; i < 2; i++)
        r[i] = a[i]*(1.0f - t) + b[i]*t;
}

/*
 * Split 's' into two splines at distance t (0 <= t <= 1)
 */
static void
_de_casteljau(spline_t s, spline_t s1, spline_t s2, float t)
{
    point_t first[3];
    point_t second[2];
    int i;

    for (i = 0; i < 3; i++)
        _lerp(s[i], s[i+1], first[i], t);

    for (i = 0; i < 2; i++)
        _lerp(first[i], first[i+1], second[i], t);

    _lerp(second[0], second[1], s1[3], t);

    for (i = 0; i < 2; i++) {
        s1[0][i] = s[0][i];
        s1[1][i] = first[0][i];
        s1[2][i] = second[0][i];

        s2[0][i] = s1[3][i];
        s2[1][i] = second[1][i];
        s2[2][i] = first[2][i];
        s2[3][i] = s[3][i];
    }
}

/*
 * Decompose 's' into straight lines which are
 * within SNEK_DRAW_TOLERANCE of the spline
 */
static void
_spline_decompose(void (*draw)(float x, float y), spline_t s)
{
    /* Start at the beginning of the spline. */
    (*draw)(s[0][0], s[0][1]);

    /* Split the spline until it is flat enough */
    while (_flatness(s) > SCALE_FLAT(SNEK_DRAW_TOLERANCE)) {
        spline_t    s1, s2;
        float       hi = 1.0f;
        float       lo = 0.0f;

        /* Search for an initial section of the spline which
         * is flat, but not too flat
         */
        for (;;) {

            /* Average the lo and hi values for our
             * next estimate
             */
            float t = (hi + lo) / 2.0f;

            /* Split the spline at the target location
             */
            _de_casteljau(s, s1, s2, t);

            /* Compute the flatness and see if s1 is flat
             * enough
             */
            float flat = _flatness(s1);

            if (flat <= SCALE_FLAT(SNEK_DRAW_TOLERANCE)) {

                /* Stop looking when s1 is close
                 * enough to the target tolerance
                 */
                if (flat >= SCALE_FLAT(SNEK_DRAW_TOLERANCE - SNEK_FLAT_TOLERANCE))
                    break;

                /* Flat: t is the new lower interval bound */
                lo = t;
            } else {

                /* Not flat: t is the new upper interval bound */
                hi =  t;
            }
        }

        /* Draw to the end of s1 */
        (*draw)(s1[3][0], s1[3][1]);

        /* Replace s with s2 */
        memcpy(&s[0], &s2[0], sizeof (spline_t));
    }

    /* S is now flat enough, so draw to the end */
    (*draw)(s[3][0], s[3][1]);
}

void draw(float x, float y)
{
    ++num_points;
    printf("%8g, %8g\n", x, y);
}

int main(int argc, char **argv)
{
    spline_t spline = {
        { 0.0f, 0.0f },
        { 0.0f, 256.0f },
        { 256.0f, -256.0f },
        { 256.0f, 0.0f }
    };
    _spline_decompose(draw, spline);
    fprintf(stderr, "flats %lu points %lu\n", num_flats, num_points);
    return 0;
}

static void
_twin_spline_decompose (twin_path_t *path,
            twin_spline_t   *spline, 
            twin_dfixed_t   tolerance_squared)
{
    if (_twin_spline_error_squared (spline) <= tolerance_squared)
    {
    _twin_path_sdraw (path, spline->a.x, spline->a.y);
    }
    else
    {
    twin_spline_t s1, s2;
    _de_casteljau (spline, &s1, &s2);
    _twin_spline_decompose (path, &s1, tolerance_squared);
    _twin_spline_decompose (path, &s2, tolerance_squared);
    }
}

static void
_lerp_half (twin_spoint_t *a, twin_spoint_t *b, twin_spoint_t *result)
{
    result->x = a->x + ((b->x - a->x) >> 1);
    result->y = a->y + ((b->y - a->y) >> 1);
}

static void
_de_casteljau (twin_spline_t *spline, twin_spline_t *s1, twin_spline_t *s2)
{
    twin_spoint_t ab, bc, cd;
    twin_spoint_t abbc, bccd;
    twin_spoint_t final;

    _lerp_half (&spline->a, &spline->b, &ab);
    _lerp_half (&spline->b, &spline->c, &bc);
    _lerp_half (&spline->c, &spline->d, &cd);
    _lerp_half (&ab, &bc, &abbc);
    _lerp_half (&bc, &cd, &bccd);
    _lerp_half (&abbc, &bccd, &final);

    s1->a = spline->a;
    s1->b = ab;
    s1->c = abbc;
    s1->d = final;

    s2->a = final;
    s2->b = bccd;
    s2->c = cd;
    s2->d = spline->d;
}

S' = _de_casteljau(s, 1/2)

S[n] = _de_casteljau(s0, (1/2)ⁿ)

while S is not flat:
    n = 1
    do
        Sleft, Sright = _decasteljau(S, (1/2)ⁿ)
        n = n + 1
    until Sleft is flat
    result ← Sleft
    S = Sright
result ← S

/*
 * Copyright © 2020 Keith Packard <keithp@keithp.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#include <stdbool.h>
#include <stdio.h>
#include <string.h>

typedef float point_t[2];
typedef point_t spline_t[4];

#define SNEK_DRAW_TOLERANCE 0.5f

/* Is this spline flat within the defined tolerance */
static bool
_is_flat(spline_t spline)
{
    /*
     * This computes the maximum deviation of the spline from a
     * straight line between the end points.
     *
     * From https://hcklbrrfnn.files.wordpress.com/2012/08/bez.pdf
     */
    float ux = 3.0f * spline[1][0] - 2.0f * spline[0][0] - spline[3][0];
    float uy = 3.0f * spline[1][1] - 2.0f * spline[0][1] - spline[3][1];
    float vx = 3.0f * spline[2][0] - 2.0f * spline[3][0] - spline[0][0];
    float vy = 3.0f * spline[2][1] - 2.0f * spline[3][1] - spline[0][1];

    ux *= ux;
    uy *= uy;
    vx *= vx;
    vy *= vy;
    if (ux < vx)
        ux = vx;
    if (uy < vy)
        uy = vy;
    return (ux + uy <= 16.0f * SNEK_DRAW_TOLERANCE * SNEK_DRAW_TOLERANCE);
}

static void
_lerp (point_t a, point_t b, point_t r, float t)
{
    int i;
    for (i = 0; i < 2; i++)
        r[i] = a[i]*(1.0f - t) + b[i]*t;
}

static void
_de_casteljau(spline_t s, spline_t s1, spline_t s2, float t)
{
    point_t first[3];
    point_t second[2];
    int i;

    for (i = 0; i < 3; i++)
        _lerp(s[i], s[i+1], first[i], t);

    for (i = 0; i < 2; i++)
        _lerp(first[i], first[i+1], second[i], t);

    _lerp(second[0], second[1], s1[3], t);

    for (i = 0; i < 2; i++) {
        s1[0][i] = s[0][i];
        s1[1][i] = first[0][i];
        s1[2][i] = second[0][i];

        s2[0][i] = s1[3][i];
        s2[1][i] = second[1][i];
        s2[2][i] = first[2][i];
        s2[3][i] = s[3][i];
    }
}

static void
_spline_decompose(void (*draw)(float x, float y), spline_t s)
{
    float       t;
    spline_t    s1, s2;

    (*draw)(s[0][0], s[0][1]);

    /* If s is flat, we're done */
    while (!_is_flat(s)) {
        t = 1.0f;

        /* Iterate until s1 is flat */
        do {
            t = t/2.0f;
            _de_casteljau(s, s1, s2, t);
        } while (!_is_flat(s1));

        /* Draw to the end of s1 */
        (*draw)(s1[3][0], s1[3][1]);

        /* Replace s with s2 */
        memcpy(&s[0], &s2[0], sizeof (spline_t));
    }
    (*draw)(s[3][0], s[3][1]);
}

void draw(float x, float y)
{
    printf("%8g, %8g\n", x, y);
}

int main(int argc, char **argv)
{
    spline_t spline = {
        { 0.0f, 0.0f },
        { 0.0f, 256.0f },
        { 256.0f, -256.0f },
        { 256.0f, 0.0f }
    };
    _spline_decompose(draw, spline);
    return 0;
}

typedef struct VkPresentPeriodMESA {
    VkStructureType    sType;
    const void*        pNext;
    uint32_t           swapchainCount;
    const int64_t*     pPresentPeriods;
} VkPresentPeriodMESA;

Architecture	Alignment
x86_64	8
RISC-V	8
aarch64	8
arm	8
x86	4

Search	Calls	Points
Simple	150	33
Bisect	229	25

Testing Picolibc with the glibc tests

Parallel meson build files

Pre-loading a pile of hacks

Supporting command line parameters

Avoiding glibc headers

Adding more POSIX to Picolibc

Which tests are working?

Bugs found and fixed in Picolibc

Tests added to Picolibc

Where's the source code?

Concluding thoughts

Reviving Very Old X Code

Fixing The Bug

Building Kgames in 2020

Kgames 1.0 uploaded to Debian New Queue

But They Look So Old

Xkw version 2.0

SVG Playing Cards

SVG Xmille Cards

The Results

Picolibc Updates

Cleaning up the Picolibc Math Library

New Code Sponsored by ARM

Errno Adventures

Exceptions

Fixing Memory Allocator bugs

Something Something Padding

A Few Bugs

Time For Testing

GCC Optimizations

Reworking Malloc

Padding

realloc

List Walking

System Parameters

memalign

Sending Patches Back to Newlib

Float/String Conversion in Picolibc: Enter “Ryū”

Getting the Ryū Code

Picolibc String/Float conversion interface

Porting Ryū to Picolibc

Correctness Results

Size Results

Performance

Pointers to the Code

Float/String Conversion in Picolibc

Newlib Float/String Conversion

Testing Every Possible Allocation Failure

Bits Pushed to the Repository

Picolibc's “Tinystdio” Float/String Conversion

Slightly Better Iterative Spline Decomposition

The Initial Search

Bisection Method

A Closed Form Approach?

Update: Fancier Method Found!

Current Implementation

Decomposing Splines Without Recursion

The Usual Method

De Casteljau Splits At Any Point

Iterative Left-most Spline Decomposition

Final Code

Prototyping a Vulkan Extension — VK_MESA_present_period

VK_GOOGLE_display_timing

Specifying Presentation Periods

Going In The Wrong Direction

Window-system Independent Implementation

VK_MESA_present_period

Status and Plans

Snekboard's Crowd Supply Campaign

Getting Things Together

Launching the Campaign

Interest is Strong

Creating Teaching Materials

Free Software / Free Hardware

Linux.conf.au 2020

Picolibc

Snek

X History and Politics