I have experimented and have a sort of solution to sending 9-bit data at 1.25 Mbps.
It involves modifications to my pigpio library.
I've added a gpioWaveAddSerialX() function which extends the gpioWaveAddSerial() function to allow for setting the number of data bits. This function constructs a waveform representing the serial data to be transmitted.
This also required an update to the internal waveBitDelay() function to allow for a variable number of data bits.
The other needed change was to change the base clock from 1 MHz to 1.25 MHz by changing clkDivI = 50 * micros to clkDivI = 40 * micros in the internal initClock() function.
The manifest constant PI_WAVE_MAX_BAUD in pigpio.h was changed to 1000000.

waveBitDelay
static void waveBitDelay(unsigned baud, unsigned bits, unsigned *bitDelay)
{
unsigned fullBit, halfBit, s, e, d, m, i, err, t;
/* scaled 100X */
fullBit = 100000000 / baud;
halfBit = 50000000 / baud;
d = (fullBit/200)*200;
s = 0;
e = d;
t = d/100;
bitDelay[0] = t ? t : 1;
err = d / 3;
for (i=0; i<bits; i++)
{
s = e;
m = halfBit + (i+1)*fullBit;
e = s + d;
if ((e-m) < err) e+=200;
t = (e-s)/100;
bitDelay[i+1] = t ? t : 1;
}
s = e;
e = ((100*(bits+2)*1000000 / baud)+100)/200*200;
t = (e-s)/100;
bitDelay[bits+1] = t ? t : 1;
}
gpioWaveAddSerialX
int gpioWaveAddSerialX
(unsigned gpio,
unsigned bbBaud,
unsigned bbBits,
unsigned offset,
unsigned numChar,
char *bstr)
{
int i, b, p, lev, c, v;
uint16_t *wstr = bstr;
unsigned bitDelay[17];
DBG(DBG_USER,
"gpio=%d baud=%d bits=%d offset=%d numChar=%d str=[%s]",
gpio, bbBaud, bbBits, offset, numChar, myBuf2Str(numChar,
(char *)bstr));
CHECK_INITED;
if (gpio > PI_MAX_USER_GPIO)
SOFT_ERROR(PI_BAD_USER_GPIO, "bad gpio (%d)", gpio);
if ((bbBaud < PI_WAVE_MIN_BAUD) || (bbBaud > PI_WAVE_MAX_BAUD))
SOFT_ERROR(PI_BAD_WAVE_BAUD,
"gpio %d, bad baud rate (%d)", gpio, bbBaud);
if (numChar > PI_WAVE_MAX_CHARS)
SOFT_ERROR(PI_TOO_MANY_CHARS, "too many chars (%d)", numChar);
if (offset > PI_WAVE_MAX_MICROS)
SOFT_ERROR(PI_BAD_SER_OFFSET, "offset too large (%d)", offset);
if (bbBits > 8) numChar /= 2;
if (!numChar) return 0;
waveBitDelay(bbBaud, bbBits, bitDelay);
for (i=0; i<=bbBits+1; i++) DBG(0, "bit%d delay=%d", i, bitDelay[0]);
p = 0;
wf[2][p].gpioOn = (1<<gpio);
wf[2][p].gpioOff = 0;
wf[2][p].flags = 0;
if (offset > bitDelay[0]) wf[2][p].usDelay = offset;
else wf[2][p].usDelay = bitDelay[0];
for (i=0; i<numChar; i++)
{
p++;
/* start bit */
wf[2][p].gpioOn = 0;
wf[2][p].gpioOff = (1<<gpio);
wf[2][p].usDelay = bitDelay[0];
wf[2][p].flags = 0;
lev = 0;
if (bbBits < 9)
c = bstr[i];
else
c = wstr[i];
DBG(0, "i=%d c=%x", i , c);
for (b=0; b<bbBits; b++)
{
if (c & (1<<b)) v=1; else v=0;
if (v == lev) wf[2][p].usDelay += bitDelay[b+1];
else
{
p++;
lev = v;
if (lev)
{
wf[2][p].gpioOn = (1<<gpio);
wf[2][p].gpioOff = 0;
wf[2][p].flags = 0;
}
else
{
wf[2][p].gpioOn = 0;
wf[2][p].gpioOff = (1<<gpio);
wf[2][p].flags = 0;
}
wf[2][p].usDelay = bitDelay[b+1];
}
}
/* stop bit */
if (lev) wf[2][p].usDelay += bitDelay[9];
else
{
p++;
wf[2][p].gpioOn = (1<<gpio);
wf[2][p].gpioOff = 0;
wf[2][p].usDelay = bitDelay[9];
wf[2][p].flags = 0;
}
}
p++;
wf[2][p].gpioOn = (1<<gpio);
wf[2][p].gpioOff = 0;
wf[2][p].usDelay = bitDelay[0];
wf[2][p].flags = 0;
return rawWaveAddGeneric(p, wf[2]);
}
Test program.
#include <stdio.h>
#include <stdint.h>
#include <pigpio.h>
#define GPIO 14
int main(int argc, char *argv[])
{
uint16_t buf[1024];
int i, wid;
if (gpioInitialise() < 0) return 1;
gpioSetMode(GPIO, PI_OUTPUT);
for (i=0; i<600; i++) buf[i] = i;
gpioWaveAddSerialX(GPIO, 1000000, 9, 2, 0, 1200, (char*)buf);
wid = gpioWaveCreate();
if (wid >= 0)
{
printf("ready recorder, then return\n");
getchar();
if (wid >= 0) gpioWaveTxSend(wid, 0);
printf("stop recorder, then return\n");
getchar();
}
else printf("error %d\n", wid);
gpioTerminate();
}