/* VGAlib version 1.2 - (c) 1993 Tommy Frandsen 		   */
/*								   */
/* This library is free software; you can redistribute it and/or   */
/* modify it without any restrictions. This library is distributed */
/* in the hope that it will be useful, but without any warranty.   */

/* Cirrus support Copyright (C) 1993 Harm Hanemaayer */
/* partially copyrighted (C) 1993 by Hartmut Schirmer */

#include <stdio.h>	/* for printf */
#include <string.h>	/* for memset */
#include "vga.h"
#include "libvga.h"
#include "driver.h"

/*
	Cirrus registers addition:	
			4 extra CRT regs 
		    	3 extra Graphics regs 
			26 extra Sequencer regs (last one is not changed)
			Hicolor DAC register

	For SVGA modes, if a 5426 is present, bit 5 of Graphics register 0x0b
	is set to enable 16K bank granularity (the default is 4K). This way,
	2Mb of video memory is addressable.

	Currently the default (lowest) dot clocks (and frequencies) are used
	for each mode. All Cirrus cards support higher clocks for most modes;
	up to 75 MHz, which is not that high. But 90 MHz may available
	on 5426's at least; my BIOS provides a 1024x768x16bit interlaced mode
	which uses a clock of 90 MHz (although the clock registers say 45 MHz;
	perhaps it uses a multiplexing mode), but it is above the official
	spec (which is 86 MHz).	In the following table, dot clock is in
	Mbytes/sec.

			Default		BIOS max.	Hypothetical max.
			Dot	Freq	Dot 	Freq	Dot	Freq
	640x480x256	25 MHz	60 Hz	31.5	72		90
	640x480x32k	50	60	63	72		90
	640x480x16M	75	60	75	60	90	72
	800x600x256	36	56	50	72		90
	800x600x32k	72	56	72	56	90	68
	800x600x16M	96.4	50
	800x600x16M*					90	90i
	1024x768x256	45	87i	75	70	90	84
	1024x768x32k	90	87i	90	87i	90	87i
	1280x1024x256	75	87i	75	87i
	1150x910x256					90	~60
	* - very hypothetical

	Note that the 90 MHz dot clock would make 640x480x16M and 800x600x32k
	flicker free (i.e. really usable), although it slows down the card
	a lot. My card seems to be able to go up to 100 MHz (this may not
	be safe though).

	The Hicolor DAC register seems have the following values:

	0x00	256 color
	0xf0	32K color
	0xe1	64K color
	0xe5	16M color 

	The dot clock must be multiplied by the number of bytes per pixel.

	Dot clock formulae (this was taken from the XFree86 linkkit driver):

*/

#if 0

CLOCK_FACTOR is double the osc freq in kHz (osc = 14.31818 MHz)
#define CLOCK_FACTOR 28636

clock in kHz is (numer * CLOCK_FACTOR / (denom & 0x3E)) >> (denom & 1)
#define CLOCKVAL(n, d) \
     ((((n) & 0x7F) * CLOCK_FACTOR / ((d) & 0x3E)) >> ((d) & 1))

#endif


#define CIRRUS_HIGHDOTCLOCK 0
/*

	0 : no 800x600x16M NI
	1 : 800x600x16M NI at 49.7 Hz (dotclock 96.1 MHz)
		Horizontal Sync: 31.5 KHz
		This is a bit flickery of course, and needs some
		adjusting on a fixed frequency monitor.
	2 : 800x600x16M NI at 56 Hz (dotclock 108.3 MHz)
		Horizontal Sync: 35.5 KHz
		Most cards probably cannot handle this dot clock (my card
		manages up to 100 MHz).

	800x600x16M interlaced at 90Hz (dot clock 90 MHz) would be easier on
	the hardware (and the eye, probably), but is hard to define.

	I have disabled 800x600x16M since the official limit (due to
	heat/power fluctuation considerations etc.) is 86 MHz, which probably
	means that using it non-stop for a couple of hours is not safe.
	Picture viewing (about the only thing that it is useful for) would
	probably be OK.
*/

/* This macro relies on compiler optimization for acceptable code size :*) */
/* Set dot clock (clk / 10) MHz */
#define setclock(clk) \
	switch (clk) { \
	case 250 : clockreg0x0e = 0x4a; clockreg0x1e = 0x2b; break; \
	case 315 : clockreg0x0e = 0x42; clockreg0x1e = 0x1f; break; \
	case 360 : clockreg0x0e = 0x7e; clockreg0x1e = 0x33; break; \
	case 400 : clockreg0x0e = 0x51; clockreg0x1e = 0x3a; break; \
	case 450 : clockreg0x0e = 0x55; clockreg0x1e = 0x36; break; \
	case 500 : clockreg0x0e = 0x65; clockreg0x1e = 0x3a; break; \
	case 630 : clockreg0x0e = 0x42; clockreg0x1e = 0x1e; break; \
	case 650 : clockreg0x0e = 0x76; clockreg0x1e = 0x34; break; \
	case 720 : clockreg0x0e = 0x7e; clockreg0x1e = 0x32; break; \
	case 750 : clockreg0x0e = 0x3a; clockreg0x1e = 0x16; break; \
	case 800 : clockreg0x0e = 0x5f; clockreg0x1e = 0x22; break; \
	case 900 : clockreg0x0e = 0x7e; clockreg0x1e = 0x28; break; \
	case 960 : clockreg0x0e = 0x5e; clockreg0x1e = 0x1c; break; \
	case 1000 : clockreg0x0e = 0x7e; clockreg0x1e = 0x24; break; \
	}


static int cirrus_init(int, int, int);
static int cirrus_interlaced( int mode );
static int cirrus_unlock();

static int cirrus_memory;
static int cirrus_chiptype;

enum { CLGD5420 = 0, CLGD5422, CLGD5424, CLGD5426, CLGD5428 };

#include "cirrus.regs"

/* Mode table */
static ModeTable cirrus_modes[] = {
	OneModeEntry(320x200x256),	/* SVGA-type 320x200x256 */
	OneModeEntry(800x600x16),
	{ G1024x768x16, g1024x768x16i_regs },
	{ G1280x1024x16, g1280x1024x16i_regs },
	OneModeEntry(640x480x256),
	OneModeEntry(800x600x256),
	{ G1024x768x256, g1024x768x256i_regs },
	OneModeEntry(320x200x32K),
	OneModeEntry(640x480x32K),
	OneModeEntry(800x600x32K),
	OneModeEntry(1024x768x32K),
	OneModeEntry(320x200x64K),
	OneModeEntry(640x480x64K),
	OneModeEntry(800x600x64K),
	OneModeEntry(1024x768x64K),
	OneModeEntry(320x200x16M),
	OneModeEntry(640x480x16M),
#if CIRRUS_HIGHDOTCLOCK
	OneModeEntry(800x600x16M),
#endif
	END_OF_MODE_TABLE
};


int cirrus_inlinearmode() {
	outb(0x3c4, 0x07);
	return (inb(0x3c5) & 0xf0) != 0;
}

 
/* Fill in chipset specific mode information */

static int cirrus_getmodeinfo( int mode, vga_modeinfo *modeinfo ) {
	if (modeinfo->bytesperpixel > 0)
		modeinfo->maxpixels = cirrus_memory * 1024 / modeinfo->bytesperpixel;
	else
		/* 16-color SVGA mode */
		/* Value taken from the air. */
		modeinfo->maxpixels = cirrus_memory * 2048;
	modeinfo->maxlogicalwidth = 4088;
#if 0
	if (mode != G320x200x256) {
	/* No need to check for 320x200x256, we now have a special */
	/* SVGA-derived 320x200x256 mode that fully supports page */
	/* flipping etc. */
#endif	
		modeinfo->startaddressrange = 0x1fffff;
		if (modeinfo->bytesperpixel == 1 &&
		    cirrus_chiptype >= CLGD5426 &&
		    (VMEM == -1 ? modeinfo->linewidth * modeinfo->height :
		    VMEM) <= (cirrus_memory * 1024 - 64)) {
			modeinfo->haveblit = HAVE_FILLBLIT | HAVE_BITBLIT |
				HAVE_IMAGEBLIT;
			/* 64 bytes are used a fill pattern */
			modeinfo->maxpixels = (cirrus_memory * 1024 - 64)
				/ modeinfo->bytesperpixel;
		}
		else
			modeinfo->haveblit = 0;
#if 0
	}
	else {
		modeinfo->startaddressrange = 0xffff;
		modeinfo->maxpixels = 65536;
		modeinfo->haveblit = 0;
	}
#endif
	if (cirrus_interlaced(mode))
	  modeinfo->flags |= IS_INTERLACED;
	modeinfo->flags &= ~HAVE_RWPAGE;

#if 0	/* We can't be sure it will work. We'd have to test the amount of */
	/* memory installed (which would have to be 14Mb or less). */
	/* Linear mode can still be forced by vga_setlinearaddressing(). */
	modeinfo->flags |= CAPABLE_LINEAR;
#endif
	if (cirrus_inlinearmode())
		modeinfo->flags |= IS_LINEAR;
	return 0;
}


/* Read and save chipset-specific registers */

static int cirrus_saveregs( unsigned char regs[] ) {
    	int i;

/*	#ifdef DEBUG
	printf("Saving Cirrus extended registers.\n");
	#endif
*/	

	cirrus_unlock(); /* May be locked again by other programs (eg. X) */

	/* save extended CRTC registers */
	for (i = 0; i < 4; i++) {
		port_out(0x18 + i, CRT_I);
		regs[EXT + i] = port_in(CRT_D);
	}
	
	/* save extended graphics registers */
	for (i = 0; i < 3; i++) {
		port_out(0x09 + i, GRA_I);
		regs[EXT + i + 4] = port_in(GRA_D);
	}
	
    	/* save extended sequencer registers */
    	/* used to be 27, but last register indicates internal memory clock */
    	/* (you can now take benefit from a higher speed set with a DOS */
    	/* driver disk program or setmclk) */
    	for (i = 0; i < 26; i++) {
	 	port_out(0x05 + i, SEQ_I); 
	 	regs[EXT + i + 7] = port_in(SEQ_D); 
    	}

    	/* save Hicolor DAC register */
    	inb(0x3c8);
    	inb(0x3c6); inb(0x3c6); inb(0x3c6); inb(0x3c6);
    	regs[EXT + 34] = inb(0x3c6);

	return 35;  /* Cirrus has 35 additional registers */
}


static void writehicolordac( unsigned char c ) {
    	inb(0x3c8);
    	inb(0x3c6); inb(0x3c6); inb(0x3c6); inb(0x3c6);
    	outb(0x3c6, c);
    	inb(0x3c8);
}


/* Set chipset-specific registers */

static int cirrus_setregs( const unsigned char regs[], int mode )
{
    	int i;
    	
/*	#ifdef DEBUG
	printf("Setting Cirrus extended registers.\n");
 	#endif
*/ 
	cirrus_unlock(); /* May be locked again by other programs (eg. X) */

 	/* write extended CRTC registers */
	for (i = 0; i < 4; i++) {
		port_out(0x18 + i, CRT_IC);
		port_out(regs[EXT + i], CRT_DC);
	}
	
	/* write extended graphics registers */
	for (i = 0; i < 3; i++) {
		port_out(0x09 + i, GRA_I);
		port_out(regs[EXT + i + 4], GRA_D);
	}
   	
    	/* write extended sequencer registers */
    	for (i = 0; i < 26; i++) {
    		port_out(0x05 + i, SEQ_I); 
    		port_out(regs[EXT + i + 7], SEQ_D); 
	}

    	/* write Hicolor DAC register */
    	writehicolordac(regs[EXT + 34]);
	
	return 0;
}


/* Return nonzero if mode is available */

static int cirrus_modeavailable( int mode ) {
	const unsigned char *regs;
	struct info *info;

	regs = LOOKUPMODE(cirrus_modes, mode);
	if (regs == NULL || mode == GPLANE16)
		return vga_chipsetfunctions[CHIPSET_MODEAVAILABLE](mode);
	if (regs == DISABLE_MODE || mode <= TEXT || mode > GLASTMODE)
		return 0;

	info = &__svgalib_infotable[mode];
	if (cirrus_memory * 1024 < info->ydim * info->xbytes)
		return 0;

	/* Monitor considerations. */
	if (info->xdim == 800 && info->ydim == 600 &&
		__svgalib_monitortype < MON800_56)
		return 0;
	if (info->xdim == 1024 && info->ydim == 768 &&
		__svgalib_monitortype < MON1024_43I)
		return 0;
	if (info->xdim == 1280 && info->ydim == 1024 &&
		__svgalib_monitortype < MON1024_60)
		return 0;

	return SVGADRV;
}


/* Check if mode is interlaced */

static int cirrus_interlaced( int mode ) { 
	const unsigned char *regs;

	if (cirrus_modeavailable(mode) != SVGADRV)
	    return 0;
	regs = LOOKUPMODE(cirrus_modes, mode);
	if (regs == NULL || regs == DISABLE_MODE)
	    return 0;
	return (regs[EXT+2]&0x01) != 0;  /* CRTC 0x1a */
}



/* Set a mode */

static unsigned char cirrus_monidtable[7] = {
	0, 2, 3, 4, 5, 6, 7 };

static int cirrus_setmode( int mode, int prv_mode ) {
	const unsigned char *regs;
	unsigned char clockreg0x0e, clockreg0x1e;

	regs = LOOKUPMODE(cirrus_modes, mode);
	if (regs == NULL) {
		/* Let the standard VGA driver set standard VGA modes */
		/* But first reset an Cirrus extended register that */
		/* the XFree86 Trident probe corrupts. */
		outw(0x3d4, 0x4a0b);
		return vga_chipsetfunctions[CHIPSET_SETMODE](mode);
	}
	if (!cirrus_modeavailable(mode))
	        return 1;

	if (regs == g1024x768x16i_regs && __svgalib_monitortype >= MON1024_60)
		/* Can do 1024x768x16 non-interlaced. */
		regs = (unsigned char *)g1024x768x16_regs;
	if (regs == g1024x768x256i_regs && __svgalib_monitortype >= MON1024_60)
		/* Can do 1024x768x256 non-interlaced. */
		regs = (unsigned char *)g1024x768x256_regs;

	__vga_setregs(regs);		/* set standard regs */
	cirrus_setregs(regs, mode);	/* set extended regs */
	if (cirrus_memory == 2048) {
		/* Setting this bit is highly dubious, but it seems to */
		/* be required on the 2Mb card I have. */
		outb(0x3c4, 0x0f);	/* enable 2M */
		outb(0x3c5, inb(0x3c5) | 0x80);
		outb(0x3ce, 0x0b);
		outb(0x3cf, inb(0x3cf) | 0x20);	/* set 16K bank granularity */
	}
	if (__svgalib_infotable[mode].colors == (1 << 16)) {
		/* For 64K color modes, we set the 32K registers to */
		/* save space. Now set 64K color mode. */
		writehicolordac(0xe1);	/* 64K color mode */
	}

	/* Now set best dot clock for monitor type. */
	if (mode == G320x200x256 || mode == G320x200x32K || mode == G320x200x64K || mode == G320x200x16M)
		/* Don't mess with this one; it's 70 Hz low-res VGA. */
		return 0;

	/* Blow it off for now. Use the default (lowest) clock. */
	/* We do have 1024x768 non-interlaced though. */
#if 0
	if (mode == G640x480x256) {
		setclock(250);			/* 60 Hz */
		if (__svgalib_monitortype >= MON800_60)
			setclock(315);		/* 72 Hz */
	}
	if (mode == G640x480x32K || mode == G640x480x64K) {
		setclock(500);			/* 60 Hz */
		if (__svgalib_monitortype >= MON800_60)
			setclock(630);		/* 72 Hz */
	}
	if (mode == G640x480x16M) {
		setclock(750);			/* 60 Hz */
		#ifdef CIRRUS_HIGHDOTCLOCK
		if (cirrus_chiptype >= CLGD5426 &&
		__svgalib_monitortype >= MON800_60)
			setclock(900);		/* 72 Hz */
		#endif
	}
	if (mode == G800x600x256 || mode == G800x600x16) {
		setclock(360);			/* 56 Hz */
		if (__svgalib_monitortype >= MON800_60)
			setclock(400);		/* 60 Hz */
		if (__svgalib_monitortype >= MON1024_60)
			setclock(500);		/* 72 Hz */
	}
	if (mode == G800x600x32K || mode == G800x600x64K) {
		setclock(720);			/* 56 Hz */
		if (__svgalib_monitortype >= MON800_60)
			setclock(800);		/* 60 Hz */
		#ifdef CIRRUS_HIGHDOTCLOCK			
		if (cirrus_chiptype >= CLGD5426 &&
		__svgalib_monitortype >= MON1024_60)
			setclock(1000);		/* 72 Hz */
		#endif
	}
	if (mode == G800x600x16M) {
		setclock(960);			/* 50 Hz */
	}
	if (mode == G1024x768x256 || mode == G1024x768x16) {
		setclock(450);			/* 43 Hz i */
	}
	if (mode == G1024x768x256 || mode == G1024x768x16) {
		/* Non-interlaced 1024x768. */
		if (__svgalib_monitortype >= MON1024_60)
			setclock(650);		/* 60 Hz */
		if (__svgalib_monitortype >= MON1024_70)
			setclock(750);		/* 70 Hz */
	}
	if (mode == G1024x768x32K || mode == G1024x768x64K) {
		/* Weird case, dot clock instead of byteclock (expect 900). */
		/* I wonder what high clocks mean in this configuration. */
		setclock(450);			/* 43 Hz i */
	}
	if (mode == G1280x1024x16) {
		setclock(750);			/* 43 Hz i */
	}
	outb(0x3c4, 0x0e);
	outb(0x3c5, clockreg0x0e);
	outb(0x3c4, 0x1e);
	outb(0x3c5, clockreg0x1e);
#endif

	/* Set Cirrus monitor ID register. */
	outb(0x3c4, 0x09);	/* Monitor ID register. */
	outb(0x3c5, (inb(0x3c5) & 0xe3) |
		(cirrus_monidtable[__svgalib_monitortype] << 2));
	return 0;
}


/* Unlock chipset-specific registers */

static int cirrus_unlock() {
	int vgaIOBase, temp;
	
				/* Are we Mono or Color? */
       	vgaIOBase = (inb(0x3CC) & 0x01) ? 0x3D0 : 0x3B0;

       	outb(0x3C4,0x06);
       	outb(0x3C5,0x12);	/* unlock cirrus special */

				/* Put the Vert. Retrace End Reg in temp */

       	outb(vgaIOBase + 4, 0x11); temp = inb(vgaIOBase + 5);

				/* Put it back with PR bit set to 0 */
				/* This unprotects the 0-7 CRTC regs so */
				/* they can be modified, i.e. we can set */
				/* the timing. */

       	outb(vgaIOBase + 5, temp & 0x7F);

	return 0;
}


/* Relock chipset-specific registers */
/* (currently not used) */

static int cirrus_lock() {
	outb(0x3C4,0x06);
	outb(0x3C5,0x0F);	 /* relock cirrus special */

	return 0;
}


/* Indentify chipset, initialize and return non-zero if detected */

static int cirrus_test() {
	int oldlockreg;
	int lockreg;
	
	outb(0x3c4, 0x06);
	oldlockreg = inb(0x3c5);

	cirrus_unlock();

	  /* If it's a Cirrus at all, we should be */
	  /* able to read back the lock register */
	  
	  outb(0x3C4,0x06);
	  lockreg = inb(0x3C5);
	  
	  /* Ok, if it's not 0x12, we're not a Cirrus542X. */
	if (lockreg != 0x12) {
		outb(0x3c4, 0x06);
		outb(0x3c5, oldlockreg);
		return 0;
	}
	
	/* The above check seems to be weak, so we also check the chip ID. */
	
	outb(CRT_I, 0x27);
	switch (inb(CRT_D) >> 2) {
		case 0x22 : 
		case 0x23 :
		case 0x24 :
		case 0x25 :
		case 0x26 :
		case 0x29 :	/* Assume 5434 works. */
			break;
		default :
			outb(0x3c4, 0x06);
			outb(0x3c5, oldlockreg);
			return 0;
	}

	cirrus_init(0, 0, 0);
	return 1;
}


/* Bank switching function -- set 64K page number */

static void cirrus_setpage_2M( unsigned page ) {
	/* Cirrus banking register has been set to 16K granularity */
	outw(0x3ce, (page << 10) + 0x09);
}

static void cirrus_setpage( unsigned page ) {
	/* default 4K granularity */
	outw(0x3ce, (page << 12) + 0x09);
}


/* No r/w paging */
static int cirrus_setrdpage(unsigned page) { return 0; }
static int cirrus_setwrpage(unsigned page) { return 0; }


/* Set display start address (not for 16 color modes) */
/* Cirrus supports any address in video memory (up to 2Mb) */

static int cirrus_setdisplaystart( int address ) {
	outw(0x3d4, 0x0d + ((address >> 2) & 0x00ff) * 256);	/* sa2-sa9 */
	outw(0x3d4, 0x0c + ((address >> 2) & 0xff00));		/* sa10-sa17 */
	inb(0x3da);			/* set ATC to addressing mode */
	outb(0x3c0, 0x13 + 0x20);	/* select ATC reg 0x13 */
	/* Cirrus specific bits 0,1 and 18,19,20: */
	outb(0x3c0, (inb(0x3c1) & 0xf0) | (address & 3));
		/* write sa0-1 to bits 0-1; other cards use bits 1-2 */
	outb(0x3d4, 0x1b);
	outb(0x3d5, (inb(0x3d5) & 0xf2) 
		| ((address & 0x40000) >> 18)	/* sa18: write to bit 0 */
		| ((address & 0x80000) >> 17)	/* sa19: write to bit 2 */
		| ((address & 0x100000) >> 17)); /* sa20: write to bit 3 */

	return 0;
}


/* Set logical scanline length (usually multiple of 8) */
/* Cirrus supports multiples of 8, up to 4088 */

static int cirrus_setlogicalwidth( int width ) { 
	outw(0x3d4, 0x13 + (width >> 3) * 256);	/* lw3-lw11 */
	outb(0x3d4, 0x1b);
	outb(0x3d5, (inb(0x3d5) & 0xef) | ((width & 0x800) >> 7));
		/* write lw12 to bit 4 of Sequencer reg. 0x1b */

	return 0;
}


/* Function table (exported) */

static int cirrus_bitblt( int, int, int, int, int );
static int cirrus_imageblt( unsigned *, int, int, int, int );
static int cirrus_fillblt( int, int, int, int, int );
static int cirrus_hlinelistblt( int, int, int *, int *, int, int );
static int cirrus_bltwait();

int (*cirrus_chipsetfunctions[])() = {
	cirrus_saveregs,
	cirrus_setregs,
	cirrus_unlock,
	cirrus_lock,
	cirrus_test,
	cirrus_init,
	(int (*)()) cirrus_setpage,
	cirrus_setrdpage,
	cirrus_setwrpage,
	cirrus_setmode,
	cirrus_modeavailable,
	cirrus_setdisplaystart,
	cirrus_setlogicalwidth,
	cirrus_getmodeinfo,
	cirrus_bitblt,
	cirrus_imageblt,
	cirrus_fillblt,
	cirrus_hlinelistblt,
	cirrus_bltwait
};


/* Initialize chipset (called after detection) */

static int cirrus_typenumber[] = { 5420, 5422, 5424, 5426, 5428 };

static int cirrus_init( int force, int par1, int par2 ) {
	unsigned char v;
	if (force) {
		cirrus_memory = par1;
		cirrus_chiptype = par2;
	}
	else {
		outb(0x3c4,0x0a);	/* read memory register */
		v = inb(0x3c5);
		cirrus_memory = 256 << ((v >> 3) & 3);
		outb(CRT_I, 0x27);
		cirrus_chiptype = inb(CRT_D) >> 2;
		switch (cirrus_chiptype) {
			case 0x22 : cirrus_chiptype = CLGD5420; break;
			case 0x23 : cirrus_chiptype = CLGD5422; break;
			case 0x24 : cirrus_chiptype = CLGD5426; break;
			case 0x25 : cirrus_chiptype = CLGD5424; break;
			case 0x26 : cirrus_chiptype = CLGD5428; break;
			default :
				printf("Unknown Cirrus chip %2x. Assuming "
					"GD5426 compatibility.\n",
					cirrus_chiptype);
				cirrus_chiptype = CLGD5426;
				break;
		}
	}
	if (__svgalib_driver_report) {
		printf("Using Cirrus Logic GD542x driver (%d, %dK).\n",
			cirrus_typenumber[cirrus_chiptype], cirrus_memory);
	}

	/* Set up the correct paging routine */
	if (cirrus_memory == 2048)
		cirrus_chipsetfunctions[CHIPSET_SETPAGE] = 
			(int (*)()) cirrus_setpage_2M;

	chipsetfunctions = cirrus_chipsetfunctions;

	return 0;
}



/* 	Some information on the accelerated features of the 5426,
	derived from the Databook.
	
	port	index

	Addresses have 21 bits (2Mb of memory).
	0x3ce,	0x28	bits 0-7 of the destination address
	0x3ce,  0x29	bits 8-15
	0x3ce,	0x2a	bits 16-20

	0x3ce,	0x2c	bits 0-7 of the source address
	0x3ce,	0x2d	bits 8-15
	0x3ce,	0x2e	bits 16-20

	Maximum pitch is 4095.
	0x3ce,	0x24	bits 0-7 of the destination pitch (screen width)
	0x3ce,	0x25	bits 8-11
	
	0x3ce,	0x26	bits 0-7 of the source pitch (screen width)
	0x3ce,	0x27	bits 8-11
	
	Maximum width is 2047.
	0x3ce,	0x20	bits 0-7 of the box width - 1
	0x3ce,	0x21	bits 8-10
	
	Maximum height is 1023.
	0x3ce,	0x22	bits 0-7 of the box height - 1
	0x3ce,	0x23	bits 8-9

	0x3ce,	0x30	BLT mode
			bit 0: direction (0 = down, 1 = up)
			bit 1: destination
			bit 2: source (0 = video memory, 1 = system memory)
			bit 3: enable transparency compare
			bit 4: 16-bit color expand/transparency
			bit 6: 8x8 pattern copy
			bit 7: enable color expand

		0x31	BLT status
			bit 0: busy
			bit 1: start operation (1)/suspend (0)
			bit 2: reset
			bit 3: set while blit busy/suspended

		0x32	BLT raster operation
			0x00	black
			0x01	white
			0x0d	copy source
			0xd0	copy inverted source
			0x0b	invert destination
			0x05	logical AND
			0x6d	logical OR (paint)
			0x59	XOR

		0x34	BLT transparent color
		0x35	high byte

	0x3ce,  0x00	background color (for color expansion)
	0x3ce,  0x01	foreground color
	0x3ce,  0x10	high byte of background color for 16-bit pixels
	0x3ce,  0x11	high byte of foreground color

	0x3ce,	0x0b	bit 1: enable BY8 addressing
	0x3c4,	0x02	8-bit plane mask for BY8 (corresponds to 8 pixels)
			(write mode 1, 4, 5)
	0x3c5,  0x05	bits 0-2: VGA write mode
			extended write mode 4: write up to 8 pixels in
				foreground color (BY8)
			extended write mode 5: write 8 pixels in foreground/
				background color (BY8)
			This may also work in normal non-BY8 packed-pixel mode.

	When doing blits from system memory to video memory, pixel data
	can apparently be written to any video address in 16-bit words, with
	the each scanline padded to 4-byte alignment. This is handy because
	the chip handles line transitions and alignment automatically (and
	can do, for example, masking).

	The pattern copy requires an 8x8 pattern (64 pixels) at the source
	address in video memory, and fills a box with specified size and 
	destination address with the pattern. This is in fact the way to do
	solid fills.
	
	mode			pattern
	Color Expansion		8 bytes (monochrome bitmap)
	8-bit pixels		64 bytes
	16-bit pixels		128 bytes

*/


static int cirrus_bltwait() {
	while (inb(0x3cf) & 1);		/* wait until finished */
	return 0;
}

static int cirrus_bitblt( int srcaddr, int destaddr, int w, int h, 
int pitch ) {
	w--;
	outw(0x3ce, 0x20 + (w << 8));		/* bits 0-7 of width */
	outw(0x3ce, 0x21 + (w & 0xff00));	/* bits 8-10 */

	h--;
	outw(0x3ce, 0x22 + (h << 8));		/* bits 0-7 of height */
	outw(0x3ce, 0x23 + (h & 0xff00));	/* bits 8-9 */

	outw(0x3ce, 0x24 + (pitch << 8));	/* bits 0-7 of dest. pitch */
	outw(0x3ce, 0x25 + (pitch & 0xff00));	/* bits 8-11 */
	outw(0x3ce, 0x26 + (pitch << 8));	/* source pitch */
	outw(0x3ce, 0x27 + (pitch & 0xff00));

	outw(0x3ce, 0x28 + (destaddr << 8));	/* bits 0-7 of dest. address */
	outw(0x3ce, 0x29 + (destaddr & 0xff00));	  /* bits 8-15 */
	outw(0x3ce, 0x2a + ((destaddr & 0x1f0000) >> 8)); /* bits 16-20 */

	outw(0x3ce, 0x2c + (srcaddr << 8));	/* bits 0-7 of src address */
	outw(0x3ce, 0x2d + (srcaddr & 0xff00));		 /* bits 8-15 */
	outw(0x3ce, 0x2e + ((srcaddr & 0x1f0000) >> 8)); /* bits 16-20 */

	outw(0x3ce, 0x30 + 0);		/* normal blit */
	outw(0x3ce, 0x32 + 0x0d00);	/* operation: copy */
	outw(0x3ce, 0x31 + 0x0200);	/* start operation */

	return cirrus_bltwait();
}


static int cirrus_imageblt( unsigned *srcaddr, int destaddr, int w, int h, 
int pitch ) {
	int count;
	unsigned *base;
	
	w--;
	outw(0x3ce, 0x20 + (w << 8));		/* bits 0-7 of width */
	outw(0x3ce, 0x21 + (w & 0xff00));	/* bits 8-10 */

	h--;
	outw(0x3ce, 0x22 + (h << 8));		/* bits 0-7 of height */
	outw(0x3ce, 0x23 + (h & 0xff00));	/* bits 8-9 */

	outw(0x3ce, 0x24 + (pitch << 8));	/* bits 0-7 of dest. pitch */
	outw(0x3ce, 0x25 + (pitch & 0xff00));	/* bits 8-11 */

	/* source pitch not required */

	outw(0x3ce, 0x28 + (destaddr << 8));	/* bits 0-7 of dest. address */
	outw(0x3ce, 0x29 + (destaddr & 0xff00));	  /* bits 8-15 */
	outw(0x3ce, 0x2a + ((destaddr & 0x1f0000) >> 8)); /* bits 16-20 */

	/* blit source address not required */

	outw(0x3ce, 0x30 + 0x0400);	/* from system memory */
	outw(0x3ce, 0x32 + 0x0d00);	/* operation: copy */
	outw(0x3ce, 0x31 + 0x0200);	/* start operation */
	
	w++;
	h++;
	count = (w * h + 3) / 4;	/* #longwords */
	/* This may not be quite right for the last few bytes */

	base = (unsigned *)(__svgalib_graph_mem + 4);
	
#define USE_ASM	

	{
	unsigned *srcp;
	unsigned *endp;
	srcp = srcaddr;
	endp = srcaddr + count;
	#ifdef USE_ASM
	while (srcp + 16 <= endp) {
		int t;	/* temp. storage only */ 
		asm(
			"movl (%1),%0\n\t"
			"movl %0,(%2)\n\t"
			"movl 4(%1),%0\n\t"
			"movl %0,(%2)\n\t"
			"movl 8(%1),%0\n\t"
			"movl %0,(%2)\n\t"
			"movl 12(%1),%0\n\t"
			"movl %0,(%2)\n\t"
			"movl 16(%1),%0\n\t"
			"movl %0,(%2)\n\t"
			"movl 20(%1),%0\n\t"
			"movl %0,(%2)\n\t"
			"movl 24(%1),%0\n\t"
			"movl %0,(%2)\n\t"
			"movl 28(%1),%0\n\t"
			"movl %0,(%2)\n\t"
			"movl 32(%1),%0\n\t"
			"movl %0,(%2)\n\t"
			"movl 36(%1),%0\n\t"
			"movl %0,(%2)\n\t"
			"movl 40(%1),%0\n\t"
			"movl %0,(%2)\n\t"
			"movl 44(%1),%0\n\t"
			"movl %0,(%2)\n\t"
			"movl 48(%1),%0\n\t"
			"movl %0,(%2)\n\t"
			"movl 52(%1),%0\n\t"
			"movl %0,(%2)\n\t"
			"movl 56(%1),%0\n\t"
			"movl %0,(%2)\n\t"
			"movl 60(%1),%0\n\t"
			"movl %0,(%2)\n\t"
			: : "r" (t), "r" (srcp), "r" (base)
		);
		srcp += 16;	/* add 16 * 4 to pointer */
	}
	#endif
	while (srcp < endp)
		*base = *(srcp++);
	}

	return cirrus_bltwait();
}


#if 0
static int cirrus_getimageblt( int srcaddr, unsigned *destaddr, int w, int h, 
int pitch ) {
	int count;
	unsigned *base;
	
	w--;
	outw(0x3ce, 0x20 + (w << 8));		/* bits 0-7 of width */
	outw(0x3ce, 0x21 + (w & 0xff00));	/* bits 8-10 */

	h--;
	outw(0x3ce, 0x22 + (h << 8));		/* bits 0-7 of height */
	outw(0x3ce, 0x23 + (h & 0xff00));	/* bits 8-9 */

	outw(0x3ce, 0x26 + (pitch << 8));	/* bits 0-7 of dest. pitch */
	outw(0x3ce, 0x27 + (pitch & 0xff00));	/* bits 8-11 */

	/* destination pitch not required */

	outw(0x3ce, 0x2c + (srcaddr << 8));	/* bits 0-7 of source address */
	outw(0x3ce, 0x2d + (srcaddr & 0xff00));		  /* bits 8-15 */
	outw(0x3ce, 0x2e + ((srcaddr & 0x1f0000) >> 8)); /* bits 16-20 */

	/* blit destination address not required */

	outw(0x3ce, 0x30 + 0x0200);	/* from system memory */
	outw(0x3ce, 0x32 + 0x0d00);	/* operation: copy */
	outw(0x3ce, 0x31 + 0x0200);	/* start operation */

	w++;
	h++;
	count = (w * h + 3) / 4;	/* #longwords */
	/* This may not be quite right for the last few bytes */

	base = (unsigned *)(__svgalib_graph_mem + 4);
	
#define USE_ASM	

	{
	unsigned *destp;
	unsigned *endp;
	destp = destaddr;
	endp = destaddr + count;
	#ifdef USE_ASM
	while (destp + 16 <= endp) {
		int t;
		asm(
			"movl (%2),%0\n\t"
			"movl %0,(%1)\n\t"
			"movl (%2),%0\n\t"
			"movl %0,4(%1)\n\t"
			"movl (%2),%0\n\t"
			"movl %0,8(%1)\n\t"
			"movl (%2),%0\n\t"
			"movl %0,12(%1)\n\t"
			"movl (%2),%0\n\t"
			"movl %0,16(%1)\n\t"
			"movl (%2),%0\n\t"
			"movl %0,20(%1)\n\t"
			"movl (%2),%0\n\t"
			"movl %0,24(%1)\n\t"
			"movl (%2),%0\n\t"
			"movl %0,28(%1)\n\t"
			"movl (%2),%0\n\t"
			"movl %0,32(%1)\n\t"
			"movl (%2),%0\n\t"
			"movl %0,36(%1)\n\t"
			"movl (%2),%0\n\t"
			"movl %0,40(%1)\n\t"
			"movl (%2),%0\n\t"
			"movl %0,44(%1)\n\t"
			"movl (%2),%0\n\t"
			"movl %0,48(%1)\n\t"
			"movl (%2),%0\n\t"
			"movl %0,52(%1)\n\t"
			"movl (%2),%0\n\t"
			"movl %0,56(%1)\n\t"
			"movl (%2),%0\n\t"
			"movl %0,60(%1)\n\t"
			: : "r" (t), "r" (destp), "r" (base)
		);
		destp += 16;	/* add 16 * 4 to pointer */
	}
	#endif
	while (destp < endp) {
		*destp = *base;
		destp++;
	}
	}

	cirrus_bltwait();
}
#endif


static int pattern = -1;

static int initializepattern() {
	int i;
	int v;
	/* Write 8x8 monochrome pattern above claimed video memory. */
	v = VMEM;
	if (vga_claimvideomemory(VMEM + 8)) {
		printf("svgalib: Cannot allocate video memory for Cirrus fill pattern.\n");
		return 1;
	}
	pattern = v;
	if (cirrus_inlinearmode())
		memset(vga_getgraphmem() + pattern, 0xff, 8);
	else {
		cirrus_chipsetfunctions[CHIPSET_SETPAGE](pattern >> 16);
		memset(__svgalib_graph_mem + (pattern & 0xffff), 0xff, 8);
	}
	return 0;
}

static int cirrus_fillblt( int destaddr, int w, int h, int pitch, int c ) {
	int srcaddr;

	if (pattern == -1)
		if (initializepattern())
			return -1;
	
	srcaddr = pattern;

	outw(0x3ce, 0x2c + (srcaddr << 8));	/* bits 0-7 of src address */
	outw(0x3ce, 0x2d + (srcaddr & 0xff00));		 /* bits 8-15 */
	outw(0x3ce, 0x2e + ((srcaddr & 0x1f0000) >> 8)); /* bits 16-20 */

	w--;
	outw(0x3ce, 0x20 + (w << 8));		/* bits 0-7 of width */
	outw(0x3ce, 0x21 + (w & 0xff00));	/* bits 8-10 */

	h--;
	outw(0x3ce, 0x22 + (h << 8));		/* bits 0-7 of height */
	outw(0x3ce, 0x23 + (h & 0xff00));	/* bits 8-9 */

	outw(0x3ce, 0x24 + (pitch << 8));	/* bits 0-7 of dest. pitch */
	outw(0x3ce, 0x25 + (pitch & 0xff00));	/* bits 8-11 */

	outw(0x3ce, 0x28 + (destaddr << 8));	/* bits 0-7 of dest. address */
	outw(0x3ce, 0x29 + (destaddr & 0xff00));	  /* bits 8-15 */
	outw(0x3ce, 0x2a + ((destaddr & 0x1f0000) >> 8)); /* bits 16-20 */

	outw(0x3ce, 0x01 + (c << 8));	/* foreground color */

	outw(0x3ce, 0x30 + 0xc000);	/* pattern fill, color expansion */
	outw(0x3ce, 0x32 + 0x0d00);	/* operation: copy */
	outw(0x3ce, 0x31 + 0x0200);	/* start operation */

	return cirrus_bltwait();
}

static int cirrus_hlinelistblt( int ymin, int n, int *xmin, int *xmax,
int pitch, int c ) {
	int srcaddr, i;

	if (pattern == -1)
		if (initializepattern())
			return -1;

	srcaddr = pattern;

	/* registers that won't change */

	outw(0x3ce, 0x2c + (srcaddr << 8));	/* bits 0-7 of src address */
	outw(0x3ce, 0x2d + (srcaddr & 0xff00));		 /* bits 8-15 */
	outw(0x3ce, 0x2e + ((srcaddr & 0x1f0000) >> 8)); /* bits 16-20 */

	outw(0x3ce, 0x24 + (pitch << 8));	/* bits 0-7 of dest. pitch */
	outw(0x3ce, 0x25 + (pitch & 0xff00));	/* bits 8-11 */

	outw(0x3ce, 0x30 + 0xc000);	/* pattern fill, color expansion */
	outw(0x3ce, 0x01 + (c << 8));	/* foreground color */
	outw(0x3ce, 0x32 + 0x0d00);	/* operation: copy */

	for (i = 0; i < n; i++) {
		int w;
		int destaddr;
		w = xmax[i] - xmin[i] - 1;	/* discard rightmost pixel */
		if (w < 0)			/* (and -1 for chip) */
			continue;

		while (inb(0x3cf) & 1);		/* wait for blitter */

		outw(0x3ce, 0x20 + (w << 8));		/* bits 0-7 of width */
		outw(0x3ce, 0x21 + (w & 0x0700));	/* bits 8-10 */

		/* set height to 1 */
		outw(0x3ce, 0x22 + (0 << 8));		/* bits 0-7 of height */
		outw(0x3ce, 0x23 + (0 & 0x0300));	/* bits 8-9 */

		destaddr = (ymin + i) * pitch + xmin[i];

		outw(0x3ce, 0x28 + (destaddr << 8));
		outw(0x3ce, 0x29 + (destaddr & 0xff00));
		outw(0x3ce, 0x2a + ((destaddr & 0x1f0000) >> 8));

		outw(0x3ce, 0x31 + 0x0200);	/* start operation */

		CRITICAL = 1;
	}
	cirrus_bltwait();
	CRITICAL = 0;

	return 0;
}
