Handle number of M68K cycles used when tracing in debugger mode
[clinton/Virtual-Jaguar-Rx.git] / src / op.cpp
CommitLineData
cf76e892
JPM
1//
2// Object Processor
3//
4// Original source by David Raingeard (Cal2)
5// GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6// Extensive cleanups/fixes/rewrites by James Hammons
7// (C) 2010 Underground Software
8//
9// JLH = James Hammons <jlhamm@acm.org>
10// JPM = Jean-Paul Mari <djipi.mari@gmail.com>
11//
12// Who When What
13// --- ---------- -----------------------------------------------------------
14// JLH 01/16/2010 Created this log ;-)
15// JPM 06/06/2016 Visual Studio support
16//
17
18#include "op.h"
19
20#include <stdlib.h>
21#include <string.h>
22#include "gpu.h"
23#include "jaguar.h"
24#include "log.h"
25#include "m68000/m68kinterface.h"
26#include "memory.h"
27#include "tom.h"
28
29//#define OP_DEBUG
30//#define OP_DEBUG_BMP
31
32#define BLEND_Y(dst, src) op_blend_y[(((uint16_t)dst<<8)) | ((uint16_t)(src))]
33#define BLEND_CR(dst, src) op_blend_cr[(((uint16_t)dst)<<8) | ((uint16_t)(src))]
34
35#define OBJECT_TYPE_BITMAP 0 // 000
36#define OBJECT_TYPE_SCALE 1 // 001
37#define OBJECT_TYPE_GPU 2 // 010
38#define OBJECT_TYPE_BRANCH 3 // 011
39#define OBJECT_TYPE_STOP 4 // 100
40
41#define CONDITION_EQUAL 0 // VC == YPOS
42#define CONDITION_LESS_THAN 1 // VC < YPOS
43#define CONDITION_GREATER_THAN 2 // VC > YPOS
44#define CONDITION_OP_FLAG_SET 3
45#define CONDITION_SECOND_HALF_LINE 4
46
47#if 0
48#define OPFLAG_RELEASE 8 // Bus release bit
49#define OPFLAG_TRANS 4 // Transparency bit
50#define OPFLAG_RMW 2 // Read-Modify-Write bit
51#define OPFLAG_REFLECT 1 // Horizontal mirror bit
52#endif
53
54// Private function prototypes
55
56void OPProcessFixedBitmap(uint64_t p0, uint64_t p1, bool render);
57void OPProcessScaledBitmap(uint64_t p0, uint64_t p1, uint64_t p2, bool render);
58void OPDiscoverObjects(uint32_t address);
59void OPDumpObjectList(void);
60void DumpScaledObject(uint64_t p0, uint64_t p1, uint64_t p2);
61void DumpFixedObject(uint64_t p0, uint64_t p1);
62void DumpBitmapCore(uint64_t p0, uint64_t p1);
63uint64_t OPLoadPhrase(uint32_t offset);
64
65// Local global variables
66
67// Blend tables (64K each)
68static uint8_t op_blend_y[0x10000];
69static uint8_t op_blend_cr[0x10000];
70// There may be a problem with this "RAM" overlapping (and thus being independent of)
71// some of the regular TOM RAM...
72//#warning objectp_ram is separated from TOM RAM--need to fix that!
73//static uint8_t objectp_ram[0x40]; // This is based at $F00000
74uint8_t objectp_running = 0;
75//bool objectp_stop_reading_list;
76
77static uint8_t op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
78//static uint32_t op_bitmap_bit_size[8] =
79// { (uint32_t)(0.125*65536), (uint32_t)(0.25*65536), (uint32_t)(0.5*65536), (uint32_t)(1*65536),
80// (uint32_t)(2*65536), (uint32_t)(1*65536), (uint32_t)(1*65536), (uint32_t)(1*65536) };
81static uint32_t op_pointer;
82
83int32_t phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
84
85
86//
87// Object Processor initialization
88//
89void OPInit(void)
90{
91 // Here we calculate the saturating blend of a signed 4-bit value and an
92 // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
93 // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
94 for(int i=0; i<256*256; i++)
95 {
96 int y = (i >> 8) & 0xFF;
97 int dy = (int8_t)i; // Sign extend the Y index
98 int c1 = (i >> 8) & 0x0F;
99 int dc1 = (int8_t)(i << 4) >> 4; // Sign extend the R index
100 int c2 = (i >> 12) & 0x0F;
101 int dc2 = (int8_t)(i & 0xF0) >> 4; // Sign extend the C index
102
103 y += dy;
104
105 if (y < 0)
106 y = 0;
107 else if (y > 0xFF)
108 y = 0xFF;
109
110 op_blend_y[i] = y;
111
112 c1 += dc1;
113
114 if (c1 < 0)
115 c1 = 0;
116 else if (c1 > 0x0F)
117 c1 = 0x0F;
118
119 c2 += dc2;
120
121 if (c2 < 0)
122 c2 = 0;
123 else if (c2 > 0x0F)
124 c2 = 0x0F;
125
126 op_blend_cr[i] = (c2 << 4) | c1;
127 }
128
129 OPReset();
130}
131
132
133//
134// Object Processor reset
135//
136void OPReset(void)
137{
138// memset(objectp_ram, 0x00, 0x40);
139 objectp_running = 0;
140}
141
142
143static const char * opType[8] =
144{ "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
145static const char * ccType[8] =
146 { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
147static uint32_t object[8192];
148static uint32_t numberOfObjects;
149//static uint32_t objectLink[8192];
150//static uint32_t numberOfLinks;
151
152
153void OPDone(void)
154{
155//#warning "!!! Fix OL dump so that it follows links !!!"
156// const char * opType[8] =
157// { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
158// const char * ccType[8] =
159// { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
160
161 uint32_t olp = OPGetListPointer();
162 WriteLog("\nOP: OLP = $%08X\n", olp);
163 WriteLog("OP: Phrase dump\n ----------\n");
164
165#if 0
166 for(uint32_t i=0; i<0x100; i+=8)
167 {
168 uint32_t hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
169 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
170
171 if ((lo & 0x07) == 3)
172 {
173 uint16_t ypos = (lo >> 3) & 0x7FF;
174 uint8_t cc = (lo >> 14) & 0x03;
175 uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
176 WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
177 }
178
179 WriteLog("\n");
180
181 if ((lo & 0x07) == 0)
182 DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
183
184 if ((lo & 0x07) == 1)
185 DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
186 }
187
188 WriteLog("\n");
189#else
190//#warning "!!! Fix lockup in OPDiscoverObjects() !!!"
191//temp, to keep the following function from locking up on bad/weird OLs
192//return;
193
194 numberOfObjects = 0;
195 OPDiscoverObjects(olp);
196 OPDumpObjectList();
197#endif
198}
199
200
201bool OPObjectExists(uint32_t address)
202{
203 // Yes, we really do a linear search, every time. :-/
204 for(uint32_t i=0; i<numberOfObjects; i++)
205 {
206 if (address == object[i])
207 return true;
208 }
209
210 return false;
211}
212
213
214void OPDiscoverObjects(uint32_t address)
215{
216 uint8_t objectType = 0;
217
218 do
219 {
220 // If we've seen this object already, bail out!
221 // Otherwise, add it to the list
222 if (OPObjectExists(address))
223 return;
224
225 object[numberOfObjects++] = address;
226
227 // Get the object & decode its type, link address
228 uint32_t hi = JaguarReadLong(address + 0, OP);
229 uint32_t lo = JaguarReadLong(address + 4, OP);
230 objectType = lo & 0x07;
231 uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
232
233 if (objectType == 3)
234 {
235 // Branch if YPOS < 2047 (or YPOS > 0) can be treated as a GOTO, so
236 // don't do any discovery in that case. Otherwise, have at it:
237 if (((lo & 0xFFFF) != 0x7FFB) && ((lo & 0xFFFF) != 0x8003))
238 // Recursion needed to follow all links! This does depth-first
239 // recursion on the not-taken objects
240 OPDiscoverObjects(address + 8);
241 }
242
243 // Get the next object...
244 address = link;
245 }
246 while (objectType != 4);
247}
248
249
250void OPDumpObjectList(void)
251{
252 for(uint32_t i=0; i<numberOfObjects; i++)
253 {
254 uint32_t address = object[i];
255
256 uint32_t hi = JaguarReadLong(address + 0, OP);
257 uint32_t lo = JaguarReadLong(address + 4, OP);
258 uint8_t objectType = lo & 0x07;
259 uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
260 WriteLog("%08X: %08X %08X %s -> $%08X", address, hi, lo, opType[objectType], link);
261
262 if (objectType == 3)
263 {
264 uint16_t ypos = (lo >> 3) & 0x7FF;
265 uint8_t cc = (lo >> 14) & 0x07; // Proper # of bits == 3
266 WriteLog(" YPOS %s %u", ccType[cc], ypos);
267 }
268
269 WriteLog("\n");
270
271 // Yes, this is how the OP finds follow-on phrases for bitmap/scaled
272 // bitmap objects...!
273 if (objectType == 0)
274 DumpFixedObject(OPLoadPhrase(address + 0),
275 OPLoadPhrase(address | 0x08));
276
277 if (objectType == 1)
278 DumpScaledObject(OPLoadPhrase(address + 0),
279 OPLoadPhrase(address | 0x08), OPLoadPhrase(address | 0x10));
280
281 if (address == link) // Ruh roh...
282 {
283 // Runaway recursive link is bad!
284 WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
285 }
286 }
287
288 WriteLog("\n");
289}
290
291
292//
293// Object Processor memory access
294// Memory range: F00010 - F00027
295//
296// F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor
297// F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list
298// F00026 W -------- -------x OBF - object processor flag
299//
300
301#if 0
302uint8_t OPReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
303{
304 offset &= 0x3F;
305 return objectp_ram[offset];
306}
307
308uint16_t OPReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
309{
310 offset &= 0x3F;
311 return GET16(objectp_ram, offset);
312}
313
314void OPWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
315{
316 offset &= 0x3F;
317 objectp_ram[offset] = data;
318}
319
320void OPWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
321{
322 offset &= 0x3F;
323 SET16(objectp_ram, offset, data);
324
325/*if (offset == 0x20)
326WriteLog("OP: Setting lo list pointer: %04X\n", data);
327if (offset == 0x22)
328WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
329}
330#endif
331
332
333uint32_t OPGetListPointer(void)
334{
335 // Note: This register is LO / HI WORD, hence the funky look of this...
336 return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
337}
338
339
340// This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
341
342uint32_t OPGetStatusRegister(void)
343{
344 return GET16(tomRam8, 0x26);
345}
346
347
348// This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
349
350void OPSetStatusRegister(uint32_t data)
351{
352 tomRam8[0x26] = (data & 0x0000FF00) >> 8;
353 tomRam8[0x27] |= (data & 0xFE);
354}
355
356
357void OPSetCurrentObject(uint64_t object)
358{
359//Not sure this is right... Wouldn't it just be stored 64 bit BE?
360 // Stored as least significant 32 bits first, ms32 last in big endian
361/* objectp_ram[0x13] = object & 0xFF; object >>= 8;
362 objectp_ram[0x12] = object & 0xFF; object >>= 8;
363 objectp_ram[0x11] = object & 0xFF; object >>= 8;
364 objectp_ram[0x10] = object & 0xFF; object >>= 8;
365
366 objectp_ram[0x17] = object & 0xFF; object >>= 8;
367 objectp_ram[0x16] = object & 0xFF; object >>= 8;
368 objectp_ram[0x15] = object & 0xFF; object >>= 8;
369 objectp_ram[0x14] = object & 0xFF;*/
370// Let's try regular good old big endian...
371 tomRam8[0x17] = object & 0xFF; object >>= 8;
372 tomRam8[0x16] = object & 0xFF; object >>= 8;
373 tomRam8[0x15] = object & 0xFF; object >>= 8;
374 tomRam8[0x14] = object & 0xFF; object >>= 8;
375
376 tomRam8[0x13] = object & 0xFF; object >>= 8;
377 tomRam8[0x12] = object & 0xFF; object >>= 8;
378 tomRam8[0x11] = object & 0xFF; object >>= 8;
379 tomRam8[0x10] = object & 0xFF;
380}
381
382
383uint64_t OPLoadPhrase(uint32_t offset)
384{
385 offset &= ~0x07; // 8 byte alignment
386 return ((uint64_t)JaguarReadLong(offset, OP) << 32) | (uint64_t)JaguarReadLong(offset+4, OP);
387}
388
389
390void OPStorePhrase(uint32_t offset, uint64_t p)
391{
392 offset &= ~0x07; // 8 byte alignment
393 JaguarWriteLong(offset, p >> 32, OP);
394 JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
395}
396
397
398//
399// Debugging routines
400//
401void DumpScaledObject(uint64_t p0, uint64_t p1, uint64_t p2)
402{
403 WriteLog(" %08X %08X\n", (uint32_t)(p1>>32), (uint32_t)(p1&0xFFFFFFFF));
404 WriteLog(" %08X %08X\n", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
405 DumpBitmapCore(p0, p1);
406 uint32_t hscale = p2 & 0xFF;
407 uint32_t vscale = (p2 >> 8) & 0xFF;
408 uint32_t remainder = (p2 >> 16) & 0xFF;
409 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
410}
411
412
413void DumpFixedObject(uint64_t p0, uint64_t p1)
414{
415 WriteLog(" %08X %08X\n", (uint32_t)(p1>>32), (uint32_t)(p1&0xFFFFFFFF));
416 DumpBitmapCore(p0, p1);
417}
418
419
420void DumpBitmapCore(uint64_t p0, uint64_t p1)
421{
422 uint32_t bdMultiplier[8] = { 64, 32, 16, 8, 4, 2, 1, 1 };
423 uint8_t bitdepth = (p1 >> 12) & 0x07;
424//WAS: int16_t ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
425 int16_t ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
426 int32_t xpos = p1 & 0xFFF;
427 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); // Sign extend that mutha!
428 uint32_t iwidth = ((p1 >> 28) & 0x3FF);
429 uint32_t dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
430 uint16_t height = ((p0 >> 14) & 0x3FF);
431 uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
432 uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
433 uint32_t firstPix = (p1 >> 49) & 0x3F;
434 uint8_t flags = (p1 >> 45) & 0x0F;
435 uint8_t idx = (p1 >> 38) & 0x7F;
436 uint32_t pitch = (p1 >> 15) & 0x07;
437 WriteLog(" [%u x %u @ (%i, %u) (iw:%u, dw:%u) (%u bpp), p:%08X fp:%02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
438 iwidth * bdMultiplier[bitdepth],
439 height, xpos, ypos, iwidth, dwidth, op_bitmap_bit_depth[bitdepth],
440 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
441 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
442 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
443}
444
445
446//
447// Object Processor main routine
448//
449#ifdef _MSC_VER
450#pragma message("Warning: Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!")
451#else
452#warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
453#endif // _MSC_VER
454void OPProcessList(int halfline, bool render)
455{
456#ifdef _MSC_VER
457#pragma message("Warning: !!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!")
458#else
459#warning "!!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!"
460#endif // _MSC_VER
461// We ignore them, for now; not good D-:
462// N.B.: Half-lines are exactly that, half-lines. When in interlaced mode, it
463// draws the screen exactly the same way as it does in non, one line at a
464// time. The only way you know you're in field #2 is that the topmost bit
465// of VC is set. Half-line mode is so you can draw higher horizontal
466// resolutions than you normally could, as the line buffer is only 720
467// pixels wide...
468 halfline &= 0x7FF;
469
470extern int op_start_log;
471
472 op_pointer = OPGetListPointer();
473
474// objectp_stop_reading_list = false;
475
476//WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
477//op_done();
478
479// *** BEGIN OP PROCESSOR TESTING ONLY ***
480extern bool interactiveMode;
481extern bool iToggle;
482extern int objectPtr;
483bool inhibit;
484int bitmapCounter = 0;
485// *** END OP PROCESSOR TESTING ONLY ***
486
487 uint32_t opCyclesToRun = 30000; // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
488
489// if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
490 while (op_pointer)
491 {
492// *** BEGIN OP PROCESSOR TESTING ONLY ***
493if (interactiveMode && bitmapCounter == objectPtr)
494 inhibit = iToggle;
495else
496 inhibit = false;
497// *** END OP PROCESSOR TESTING ONLY ***
498// if (objectp_stop_reading_list)
499// return;
500
501 uint64_t p0 = OPLoadPhrase(op_pointer);
502 op_pointer += 8;
503//WriteLog("\t%08X type %i\n", op_pointer, (uint8_t)p0 & 0x07);
504
505#if 1
506if (halfline == TOMGetVDB() && op_start_log)
507//if (halfline == 215 && op_start_log)
508//if (halfline == 28 && op_start_log)
509//if (halfline == 0)
510{
511WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
512if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
513{
514WriteLog(" (BITMAP) ");
515uint64_t p1 = OPLoadPhrase(op_pointer);
516WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
517 uint8_t bitdepth = (p1 >> 12) & 0x07;
518//WAS: int16_t ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
519 int16_t ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
520int32_t xpos = p1 & 0xFFF;
521xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
522 uint32_t iwidth = ((p1 >> 28) & 0x3FF);
523 uint32_t dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
524 uint16_t height = ((p0 >> 14) & 0x3FF);
525 uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
526 uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
527 uint32_t firstPix = (p1 >> 49) & 0x3F;
528 uint8_t flags = (p1 >> 45) & 0x0F;
529 uint8_t idx = (p1 >> 38) & 0x7F;
530 uint32_t pitch = (p1 >> 15) & 0x07;
531WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
532 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
533}
534if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
535{
536WriteLog(" (SCALED BITMAP)");
537uint64_t p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
538WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
539WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
540 uint8_t bitdepth = (p1 >> 12) & 0x07;
541//WAS: int16_t ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
542 int16_t ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
543int32_t xpos = p1 & 0xFFF;
544xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
545 uint32_t iwidth = ((p1 >> 28) & 0x3FF);
546 uint32_t dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
547 uint16_t height = ((p0 >> 14) & 0x3FF);
548 uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
549 uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
550 uint32_t firstPix = (p1 >> 49) & 0x3F;
551 uint8_t flags = (p1 >> 45) & 0x0F;
552 uint8_t idx = (p1 >> 38) & 0x7F;
553 uint32_t pitch = (p1 >> 15) & 0x07;
554WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
555 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
556 uint32_t hscale = p2 & 0xFF;
557 uint32_t vscale = (p2 >> 8) & 0xFF;
558 uint32_t remainder = (p2 >> 16) & 0xFF;
559WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
560}
561if ((p0 & 0x07) == OBJECT_TYPE_GPU)
562WriteLog(" (GPU)\n");
563if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
564{
565WriteLog(" (BRANCH)\n");
566uint8_t * jaguarMainRam = GetRamPtr();
567WriteLog("[RAM] --> ");
568for(int k=0; k<8; k++)
569 WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
570WriteLog("\n");
571}
572if ((p0 & 0x07) == OBJECT_TYPE_STOP)
573WriteLog(" --> List end\n\n");
574}
575#endif
576
577 switch ((uint8_t)p0 & 0x07)
578 {
579 case OBJECT_TYPE_BITMAP:
580 {
581 uint16_t ypos = (p0 >> 3) & 0x7FF;
582// This is only theory implied by Rayman...!
583// It seems that if the YPOS is zero, then bump the YPOS value so that it
584// coincides with the VDB value. With interlacing, this would be slightly more
585// tricky. There's probably another bit somewhere that enables this mode--but
586// so far, doesn't seem to affect any other game in a negative way (that I've
587// seen). Either that, or it's an undocumented bug...
588
589//No, the reason this was needed is that the OP code before was wrong. Any value
590//less than VDB will get written to the top line of the display!
591#if 0
592// Not so sure... Let's see what happens here...
593// No change...
594 if (ypos == 0)
595 ypos = TOMReadWord(0xF00046, OP) / 2; // Get the VDB value
596#endif
597// Actually, no. Any item less than VDB will get only the lines that hang over
598// VDB displayed. Actually, this is incorrect. It seems that VDB value is wrong
599// somewhere and that's what's causing things to fuck up. Still no idea why.
600
601 uint32_t height = (p0 & 0xFFC000) >> 14;
602 uint32_t oldOPP = op_pointer - 8;
603// *** BEGIN OP PROCESSOR TESTING ONLY ***
604if (inhibit && op_start_log)
605 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
606bitmapCounter++;
607if (!inhibit) // For OP testing only!
608// *** END OP PROCESSOR TESTING ONLY ***
609 if (halfline >= ypos && height > 0)
610 {
611 // Believe it or not, this is what the OP actually does...
612 // which is why they're required to be on a dphrase boundary!
613 uint64_t p1 = OPLoadPhrase(oldOPP | 0x08);
614//unneeded op_pointer += 8;
615//WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
616//WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
617// OPProcessFixedBitmap(halfline, p0, p1, render);
618 OPProcessFixedBitmap(p0, p1, render);
619
620 // OP write-backs
621
622 height--;
623
624 uint64_t data = (p0 & 0xFFFFF80000000000LL) >> 40;
625 uint64_t dwidth = (p1 & 0xFFC0000) >> 15;
626 data += dwidth;
627
628 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
629 p0 |= (uint64_t)height << 14;
630 p0 |= data << 40;
631 OPStorePhrase(oldOPP, p0);
632 }
633
634 // OP bottom 3 bits are hardwired to zero. The link address
635 // reflects this, so we only need the top 19 bits of the address
636 // (which is why we only shift 21, and not 24).
637 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
638
639 // KLUDGE: Seems that memory access is mirrored in the first 8MB of
640 // memory...
641 if (op_pointer > 0x1FFFFF && op_pointer < 0x800000)
642 op_pointer &= 0xFF1FFFFF; // Knock out bits 21-23
643
644 break;
645 }
646 case OBJECT_TYPE_SCALE:
647 {
648//WAS: uint16_t ypos = (p0 >> 3) & 0x3FF;
649 uint16_t ypos = (p0 >> 3) & 0x7FF;
650 uint32_t height = (p0 & 0xFFC000) >> 14;
651 uint32_t oldOPP = op_pointer - 8;
652//WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
653// *** BEGIN OP PROCESSOR TESTING ONLY ***
654if (inhibit && op_start_log)
655{
656 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
657 DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
658}
659bitmapCounter++;
660if (!inhibit) // For OP testing only!
661// *** END OP PROCESSOR TESTING ONLY ***
662 if (halfline >= ypos && height > 0)
663 {
664 // Believe it or not, this is what the OP actually does...
665 // which is why they're required to be on a qphrase boundary!
666 uint64_t p1 = OPLoadPhrase(oldOPP | 0x08);
667 uint64_t p2 = OPLoadPhrase(oldOPP | 0x10);
668//unneeded op_pointer += 16;
669 OPProcessScaledBitmap(p0, p1, p2, render);
670
671 // OP write-backs
672
673 uint16_t remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
674 uint8_t /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
675//Actually, we should skip this object if it has a vscale of zero.
676//Or do we? Not sure... Atari Karts has a few lines that look like:
677// (SCALED BITMAP)
678//000E8268 --> phrase 00010000 7000B00D
679// [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
680// [hsc: 9A, vsc: 00, rem: 00]
681// Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
682//WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
683
684 if (vscale == 0)
685 vscale = 0x20; // OP bug??? Nope, it isn't...! Or is it?
686
687//extern int start_logging;
688//if (start_logging)
689// WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
690//Locks up here:
691//--> Returned from scaled bitmap processing (rem=20, vscale=80)...
692//There are other problems here, it looks like...
693//Another lock up:
694//About to execute OP (508)...
695/*
696OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
697--> Returned from scaled bitmap processing (rem=50, vscale=7C)...
698OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
699--> Returned from scaled bitmap processing (rem=30, vscale=7C)...
700OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
701--> Returned from scaled bitmap processing (rem=10, vscale=7C)...
702OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
703--> Returned from scaled bitmap processing (rem=00, vscale=7E)...
704OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
705--> Returned from scaled bitmap processing (rem=00, vscale=80)...
706OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
707--> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
708OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
709--> Returned from scaled bitmap processing (rem=60, vscale=80)...
710OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
711--> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
712OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
713--> Returned from scaled bitmap processing (rem=40, vscale=80)...
714OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
715--> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
716OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
717--> Returned from scaled bitmap processing (rem=20, vscale=80)...
718*/
719//Here's another problem:
720// [hsc: 20, vsc: 20, rem: 00]
721// Since we're not checking for $E0 (but that's what we get from the above), we
722// end up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but...
723// still not quite right. Either that, or the Accolade team that wrote Bubsy
724// screwed up royal.]
725//Also note: $E0 = 7.0 which IS a legal vscale value...
726
727// if (remainder & 0x80) // I.e., it's negative
728// if ((remainder & 0x80) || remainder == 0) // I.e., it's <= 0
729// if ((remainder - 1) >= 0xE0) // I.e., it's <= 0
730// if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
731// if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
732// if (remainder <= 0x20) // I.e., it's <= 1.0
733 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
734 if (remainder < 0x20)
735 {
736 uint64_t data = (p0 & 0xFFFFF80000000000LL) >> 40;
737 uint64_t dwidth = (p1 & 0xFFC0000) >> 15;
738
739// while (remainder & 0x80)
740// while ((remainder & 0x80) || remainder == 0)
741// while ((remainder - 1) >= 0xE0)
742// while ((remainder >= 0xE1) || remainder == 0)
743// while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
744// while (remainder <= 0x20)
745 while (remainder < 0x20)
746 {
747 remainder += vscale;
748
749 if (height)
750 height--;
751
752 data += dwidth;
753 }
754
755 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
756 p0 |= (uint64_t)height << 14;
757 p0 |= data << 40;
758 OPStorePhrase(oldOPP, p0);
759 }
760
761 remainder -= 0x20; // 1.0f in [3.5] fixed point format
762
763//if (start_logging)
764// WriteLog("--> Finished writebacks...\n");//*/
765
766//WriteLog(" [%08X%08X -> ", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
767 p2 &= ~0x0000000000FF0000LL;
768 p2 |= (uint64_t)remainder << 16;
769//WriteLog("%08X%08X]\n", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
770 OPStorePhrase(oldOPP + 16, p2);
771//remainder = (uint8_t)(p2 >> 16), vscale = (uint8_t)(p2 >> 8);
772//WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
773 }
774
775 // OP bottom 3 bits are hardwired to zero. The link address
776 // reflects this, so we only need the top 19 bits of the address
777 // (which is why we only shift 21, and not 24).
778 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
779
780 // KLUDGE: Seems that memory access is mirrored in the first 8MB of
781 // memory...
782 if (op_pointer > 0x1FFFFF && op_pointer < 0x800000)
783 op_pointer &= 0xFF1FFFFF; // Knock out bits 21-23
784
785 break;
786 }
787 case OBJECT_TYPE_GPU:
788 {
789//WriteLog("OP: Asserting GPU IRQ #3...\n");
790#ifdef _MSC_VER
791#pragma message("Warning: Need to fix OP GPU IRQ handling! !!! FIX !!!")
792#else
793#warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
794#endif // _MSC_VER
795 OPSetCurrentObject(p0);
796 GPUSetIRQLine(3, ASSERT_LINE);
797//Also, OP processing is suspended from this point until OBF (F00026) is written to...
798// !!! FIX !!!
799//Do something like:
800//OPSuspendedByGPU = true;
801//Dunno if the OP keeps processing from where it was interrupted, or if it just continues
802//on the next halfline...
803// --> It continues from where it was interrupted! !!! FIX !!!
804 break;
805 }
806 case OBJECT_TYPE_BRANCH:
807 {
808 uint16_t ypos = (p0 >> 3) & 0x7FF;
809 // JTRM is wrong: CC is bits 14-16 (3 bits, *not* 2)
810 uint8_t cc = (p0 >> 14) & 0x07;
811 uint32_t link = (p0 >> 21) & 0x3FFFF8;
812
813 switch (cc)
814 {
815 case CONDITION_EQUAL:
816 if (halfline == ypos || ypos == 0x7FF)
817 op_pointer = link;
818 break;
819 case CONDITION_LESS_THAN:
820 if (halfline < ypos)
821 op_pointer = link;
822 break;
823 case CONDITION_GREATER_THAN:
824 if (halfline > ypos)
825 op_pointer = link;
826 break;
827 case CONDITION_OP_FLAG_SET:
828 if (OPGetStatusRegister() & 0x01)
829 op_pointer = link;
830 break;
831 case CONDITION_SECOND_HALF_LINE:
832 // Branch if bit 10 of HC is set...
833 if (TOMGetHC() & 0x0400)
834 op_pointer = link;
835 break;
836 default:
837 // Basically, if you do this, the OP does nothing. :-)
838 WriteLog("OP: Unimplemented branch condition %i\n", cc);
839 }
840 break;
841 }
842 case OBJECT_TYPE_STOP:
843 {
844 OPSetCurrentObject(p0);
845
846 if ((p0 & 0x08) && TOMIRQEnabled(IRQ_OPFLAG))
847 {
848 TOMSetPendingObjectInt();
849 m68k_set_irq(2); // Cause a 68K IPL 2 to occur...
850 }
851
852 // Bail out, we're done...
853 return;
854 }
855 default:
856 WriteLog("OP: Unknown object type %i\n", (uint8_t)p0 & 0x07);
857 }
858
859 // Here is a little sanity check to keep the OP from locking up the
860 // machine when fed bad data. Better would be to count how many actual
861 // cycles it used and bail out/reenter to properly simulate an
862 // overloaded OP... !!! FIX !!!
863#ifdef _MSC_VER
864#pragma message("Warning: Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!")
865#else
866#warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
867#endif // _MSC_VER
868 opCyclesToRun--;
869
870 if (!opCyclesToRun)
871 return;
872 }
873}
874
875
876//
877// Store fixed size bitmap in line buffer
878//
879void OPProcessFixedBitmap(uint64_t p0, uint64_t p1, bool render)
880{
881// Need to make sure that when writing that it stays within the line buffer...
882// LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
883 uint8_t depth = (p1 >> 12) & 0x07; // Color depth of image
884 int32_t xpos = ((int16_t)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
885 uint32_t iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
886 uint32_t data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
887 uint32_t firstPix = (p1 >> 49) & 0x3F;
888 // "The LSB is significant only for scaled objects..." -JTRM
889 // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top
890 // four are significant..."
891 firstPix &= 0x3E;
892
893// We can ignore the RELEASE (high order) bit for now--probably forever...!
894// uint8_t flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
895//Optimize: break these out to their own BOOL values
896 uint8_t flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
897 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
898 flagRMW = (flags & OPFLAG_RMW ? true : false),
899 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
900// "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
901// provide the most significant bits of the palette address."
902 uint8_t index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
903 uint32_t pitch = (p1 >> 15) & 0x07; // Phrase pitch
904 pitch <<= 3; // Optimization: Multiply pitch by 8
905
906// int16_t scanlineWidth = tom_getVideoModeWidth();
907 uint8_t * tomRam8 = TOMGetRamPointer();
908 uint8_t * paletteRAM = &tomRam8[0x400];
909 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct
910 // copies--NOT for use when using endian-corrected data (i.e., any of the
911 // *_word_read functions!)
912 uint16_t * paletteRAM16 = (uint16_t *)paletteRAM;
913
914// WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
915// iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
916
917// Is it OK to have a 0 for the data width??? (i.e., undocumented?)
918// Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as
919// well.
920// Pitch == 0 is OK too...
921
922//kludge: Seems that the OP treats iwidth == 0 as iwidth == 1... Need to
923// investigate on real hardware...
924#ifdef _MSC_VER
925#pragma message("Warning: !!! Need to investigate iwidth == 0 behavior on real hardware !!!")
926#else
927#warning "!!! Need to investigate iwidth == 0 behavior on real hardware !!!"
928#endif // _MSC_VER
929if (iwidth == 0)
930 iwidth = 1;
931
932// if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
933//I'm not convinced that we need to concern ourselves with data & op_pointer
934//here either!
935 if (!render || iwidth == 0)
936 return;
937
938//OK, so we know the position in the line buffer is correct. It's the clipping
939//in 24bpp mode that's wrong!
940#if 0
941//This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
942//into the line buffer for each pixel.
943if (depth == 5) // i.e., 24bpp mode...
944 xpos >>= 1; // Cut it in half...
945#endif
946
947//#define OP_DEBUG_BMP
948//#ifdef OP_DEBUG_BMP
949// WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
950// iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
951//#endif
952
953// int32_t leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
954 int32_t startPos = xpos, endPos = xpos +
955 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
956 : -((phraseWidthToPixels[depth] * iwidth) + 1));
957 uint32_t clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
958 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
959 // This is correct, the OP line buffer is a constant size...
960 int32_t limit = 720;
961 int32_t lbufWidth = 719;
962
963 // If the image is completely to the left or right of the line buffer, then
964 // bail.
965//If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
966//There are four possibilities:
967// 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
968// 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
969// 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
970// 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
971//Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
972// numbers 1 & 3 are of concern.
973// This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
974// if (rightMargin < 0 || leftMargin > lbufWidth)
975
976// It might be easier to swap these (if REFLECTed) and just use XPOS down below...
977// That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
978// Still have to be careful with the DATA and IWIDTH values though...
979
980// if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
981// || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
982// return;
983 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
984 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
985 return;
986
987 // Otherwise, find the clip limits and clip the phrase as well...
988 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
989 // line buffer, but it shouldn't matter since there are two unused line
990 // buffers below and nothing above and I'll at most write 8 bytes outside
991 // the line buffer... I could use a fractional clip begin/end value, but
992 // this makes the blit a *lot* more hairy. I might fix this in the future
993 // if it becomes necessary. (JLH)
994 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
995 // which pixel in the phrase is being written, and quit when either end of phrases
996 // is reached or line buffer extents are surpassed.
997
998//This stuff is probably wrong as well... !!! FIX !!!
999//The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1000//Yup. Seems that JagMania doesn't work correctly with this...
1001//Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1002// if (!flagREFLECT)
1003
1004/*
1005 if (leftMargin < 0)
1006 clippedWidth = 0 - leftMargin,
1007 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1008 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1009// leftMargin = 0;
1010
1011 if (rightMargin > lbufWidth)
1012 clippedWidth = rightMargin - lbufWidth,
1013 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
1014// rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1015// rightMargin = lbufWidth;
1016*/
1017if (depth > 5)
1018 WriteLog("OP: We're about to encounter a divide by zero error!\n");
1019 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1020 // ALSO: There may be another case where we start out of bounds and end out
1021 // of bounds...!
1022 // !!! FIX !!!
1023 if (startPos < 0) // Case #1: Begin out, end in, L to R
1024 clippedWidth = 0 - startPos,
1025 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1026 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1027
1028 if (endPos < 0) // Case #2: Begin in, end out, R to L
1029 clippedWidth = 0 - endPos,
1030 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1031
1032 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1033 clippedWidth = endPos - lbufWidth,
1034 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1035
1036 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1037 clippedWidth = startPos - lbufWidth,
1038 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1039 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1040//printf("<OP:spos=%i,epos=%i]", startPos, endPos);
1041
1042 // If the image is sitting on the line buffer left or right edge, we need to compensate
1043 // by decreasing the image phrase width accordingly.
1044 iwidth -= phraseClippedWidth;
1045
1046 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1047 // the pixel data.
1048// data += phraseClippedWidth * (pitch << 3);
1049 data += dataClippedWidth * pitch;
1050
1051 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1052 // bitmap! This makes clipping & etc. MUCH, much easier...!
1053// uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1054//Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
1055//Is this a bug in the OP?
1056//It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
1057//Though it looks like we're doing it here no matter what...
1058// uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1059//Let's try this:
1060 uint32_t lbufAddress = 0x1800 + (startPos * 2);
1061 uint8_t * currentLineBuffer = &tomRam8[lbufAddress];
1062
1063 // Render.
1064
1065// Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1066// If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1067// that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1068// anyway.
1069// This seems to be the case (at least according to the Midsummer docs)...!
1070
1071// This is to test using palette zeroes instead of bit zeroes...
1072// And it seems that this is wrong, index == 0 is transparent apparently... :-/
1073//#define OP_USES_PALETTE_ZERO
1074
1075 if (depth == 0) // 1 BPP
1076 {
1077 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1078 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1079
1080 // Fetch 1st phrase...
1081 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1082//Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1083//i.e., we didn't clip on the margin... !!! FIX !!!
1084 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1085 int i = firstPix; // Start counter at right spot...
1086
1087 while (iwidth--)
1088 {
1089 while (i++ < 64)
1090 {
1091 uint8_t bit = pixels >> 63;
1092#ifndef OP_USES_PALETTE_ZERO
1093 if (flagTRANS && bit == 0)
1094#else
1095 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1096#endif
1097 ; // Do nothing...
1098 else
1099 {
1100 if (!flagRMW)
1101//Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1102//Won't optimize RMW case though...
1103 // This is the *only* correct use of endian-dependent code
1104 // (i.e., mem-to-mem direct copying)!
1105 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bit];
1106 else
1107 *currentLineBuffer =
1108 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1109 *(currentLineBuffer + 1) =
1110 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1111 }
1112
1113 currentLineBuffer += lbufDelta;
1114 pixels <<= 1;
1115 }
1116 i = 0;
1117 // Fetch next phrase...
1118 data += pitch;
1119 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1120 }
1121 }
1122 else if (depth == 1) // 2 BPP
1123 {
1124if (firstPix)
1125 WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1126 index &= 0xFC; // Top six bits form CLUT index
1127 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1128 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1129
1130 while (iwidth--)
1131 {
1132 // Fetch phrase...
1133 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1134 data += pitch;
1135
1136 for(int i=0; i<32; i++)
1137 {
1138 uint8_t bits = pixels >> 62;
1139// Seems to me that both of these are in the same endian, so we could cast it as
1140// uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1141// This only works for the palettized modes (1 - 8 BPP), since we actually have to
1142// copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1143// No, it isn't because we read the memory in an endian safe way--this *won't* work...
1144#ifndef OP_USES_PALETTE_ZERO
1145 if (flagTRANS && bits == 0)
1146#else
1147 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1148#endif
1149 ; // Do nothing...
1150 else
1151 {
1152 if (!flagRMW)
1153 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1154 else
1155 *currentLineBuffer =
1156 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1157 *(currentLineBuffer + 1) =
1158 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1159 }
1160
1161 currentLineBuffer += lbufDelta;
1162 pixels <<= 2;
1163 }
1164 }
1165 }
1166 else if (depth == 2) // 4 BPP
1167 {
1168if (firstPix)
1169 WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1170 index &= 0xF0; // Top four bits form CLUT index
1171 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1172 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1173
1174 while (iwidth--)
1175 {
1176 // Fetch phrase...
1177 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1178 data += pitch;
1179
1180 for(int i=0; i<16; i++)
1181 {
1182 uint8_t bits = pixels >> 60;
1183// Seems to me that both of these are in the same endian, so we could cast it as
1184// uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1185// This only works for the palettized modes (1 - 8 BPP), since we actually have to
1186// copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1187// No, it isn't because we read the memory in an endian safe way--this *won't* work...
1188#ifndef OP_USES_PALETTE_ZERO
1189 if (flagTRANS && bits == 0)
1190#else
1191 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1192#endif
1193 ; // Do nothing...
1194 else
1195 {
1196 if (!flagRMW)
1197 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1198 else
1199 *currentLineBuffer =
1200 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1201 *(currentLineBuffer + 1) =
1202 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1203 }
1204
1205 currentLineBuffer += lbufDelta;
1206 pixels <<= 4;
1207 }
1208 }
1209 }
1210 else if (depth == 3) // 8 BPP
1211 {
1212 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1213 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1214
1215 // Fetch 1st phrase...
1216 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1217//Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1218//i.e., we didn't clip on the margin... !!! FIX !!!
1219 firstPix &= 0x30; // Only top two bits are valid for 8 BPP
1220 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1221 int i = firstPix >> 3; // Start counter at right spot...
1222
1223 while (iwidth--)
1224 {
1225 while (i++ < 8)
1226 {
1227 uint8_t bits = pixels >> 56;
1228// Seems to me that both of these are in the same endian, so we could cast it as
1229// uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1230// This only works for the palettized modes (1 - 8 BPP), since we actually have to
1231// copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1232// No, it isn't because we read the memory in an endian safe way--this *won't* work...
1233//This would seem to be problematic...
1234//Because it's the palette entry being zero that makes the pixel transparent...
1235//Let's try it and see.
1236#ifndef OP_USES_PALETTE_ZERO
1237 if (flagTRANS && bits == 0)
1238#else
1239 if (flagTRANS && (paletteRAM16[bits] == 0))
1240#endif
1241 ; // Do nothing...
1242 else
1243 {
1244 if (!flagRMW)
1245 *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1246 else
1247 *currentLineBuffer =
1248 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1249 *(currentLineBuffer + 1) =
1250 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1251 }
1252
1253 currentLineBuffer += lbufDelta;
1254 pixels <<= 8;
1255 }
1256 i = 0;
1257 // Fetch next phrase...
1258 data += pitch;
1259 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1260 }
1261 }
1262 else if (depth == 4) // 16 BPP
1263 {
1264if (firstPix)
1265 WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1266 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1267 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1268
1269 while (iwidth--)
1270 {
1271 // Fetch phrase...
1272 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1273 data += pitch;
1274
1275 for(int i=0; i<4; i++)
1276 {
1277 uint8_t bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1278// Seems to me that both of these are in the same endian, so we could cast it
1279// as uint16_t * and do straight across copies (what about 24 bpp? Treat it
1280// differently...) This only works for the palettized modes (1 - 8 BPP), since
1281// we actually have to copy data from memory in 16 BPP mode (or does it? Isn't
1282// this the same as the CLUT case?) No, it isn't because we read the memory in
1283// an endian safe way--it *won't* work...
1284//This doesn't seem right... Let's try the encoded black value ($8800):
1285//Apparently, CRY 0 maps to $8800...
1286 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1287// if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1288 ; // Do nothing...
1289 else
1290 {
1291 if (!flagRMW)
1292 *currentLineBuffer = bitsHi,
1293 *(currentLineBuffer + 1) = bitsLo;
1294 else
1295 *currentLineBuffer =
1296 BLEND_CR(*currentLineBuffer, bitsHi),
1297 *(currentLineBuffer + 1) =
1298 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1299 }
1300
1301 currentLineBuffer += lbufDelta;
1302 pixels <<= 16;
1303 }
1304 }
1305 }
1306 else if (depth == 5) // 24 BPP
1307 {
1308//Looks like Iron Soldier is the only game that uses 24BPP mode...
1309//There *might* be others...
1310//WriteLog("OP: Writing 24 BPP bitmap!\n");
1311if (firstPix)
1312 WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1313 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1314 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1315 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 4) | 0x04;
1316
1317 while (iwidth--)
1318 {
1319 // Fetch phrase...
1320 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1321 data += pitch;
1322
1323 for(int i=0; i<2; i++)
1324 {
1325 // We don't use a 32-bit var here because of endian issues...!
1326 uint8_t bits3 = pixels >> 56, bits2 = pixels >> 48,
1327 bits1 = pixels >> 40, bits0 = pixels >> 32;
1328
1329 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1330 ; // Do nothing...
1331 else
1332 *currentLineBuffer = bits3,
1333 *(currentLineBuffer + 1) = bits2,
1334 *(currentLineBuffer + 2) = bits1,
1335 *(currentLineBuffer + 3) = bits0;
1336
1337 currentLineBuffer += lbufDelta;
1338 pixels <<= 32;
1339 }
1340 }
1341 }
1342}
1343
1344
1345//
1346// Store scaled bitmap in line buffer
1347//
1348void OPProcessScaledBitmap(uint64_t p0, uint64_t p1, uint64_t p2, bool render)
1349{
1350// Need to make sure that when writing that it stays within the line buffer...
1351// LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1352 uint8_t depth = (p1 >> 12) & 0x07; // Color depth of image
1353 int32_t xpos = ((int16_t)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1354 uint32_t iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
1355 uint32_t data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
1356//#ifdef OP_DEBUG_BMP
1357// Prolly should use this... Though not sure exactly how.
1358//Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1359 uint32_t firstPix = (p1 >> 49) & 0x3F;
1360//This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1361if (firstPix)
1362 WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1363//#endif
1364// We can ignore the RELEASE (high order) bit for now--probably forever...!
1365// uint8_t flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
1366//Optimize: break these out to their own BOOL values [DONE]
1367 uint8_t flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
1368 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1369 flagRMW = (flags & OPFLAG_RMW ? true : false),
1370 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1371 uint8_t index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
1372 uint32_t pitch = (p1 >> 15) & 0x07; // Phrase pitch
1373
1374 uint8_t * tomRam8 = TOMGetRamPointer();
1375 uint8_t * paletteRAM = &tomRam8[0x400];
1376 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct
1377 // copies--NOT for use when using endian-corrected data (i.e., any of the
1378 // *ReadWord functions!)
1379 uint16_t * paletteRAM16 = (uint16_t *)paletteRAM;
1380
1381 uint16_t hscale = p2 & 0xFF;
1382// Hmm. It seems that fixing the horizontal scale necessitated re-fixing this.
1383// Not sure why, but seems to be consistent with the vertical scaling now (and
1384// it may turn out to be wrong!)...
1385 uint16_t horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable [It's not!]
1386// uint8_t horizontalRemainder = 0; // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1387 int32_t scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1388 uint32_t scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1389
1390// WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1391// iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1392
1393// Looks like an hscale of zero means don't draw!
1394 if (!render || iwidth == 0 || hscale == 0)
1395 return;
1396
1397/*extern int start_logging;
1398if (start_logging)
1399 WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1400 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1401//#define OP_DEBUG_BMP
1402//#ifdef OP_DEBUG_BMP
1403// WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1404// iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1405//#endif
1406
1407 int32_t startPos = xpos, endPos = xpos +
1408 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1409 uint32_t clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1410 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
1411 // Not sure if this is Jaguar Two only location or what...
1412 // From the docs, it is... If we want to limit here we should think of something else.
1413// int32_t limit = GET16(tom_ram_8, 0x0008); // LIMIT
1414 int32_t limit = 720;
1415// int32_t lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1416 int32_t lbufWidth = 719; // Zero based limit...
1417
1418 // If the image is completely to the left or right of the line buffer, then bail.
1419//If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1420//There are four possibilities:
1421// 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1422// 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1423// 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1424// 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1425//Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1426// numbers 1 & 3 are of concern.
1427// This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1428// if (rightMargin < 0 || leftMargin > lbufWidth)
1429
1430// It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1431// That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1432// Still have to be careful with the DATA and IWIDTH values though...
1433
1434 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1435 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1436 return;
1437
1438 // Otherwise, find the clip limits and clip the phrase as well...
1439 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of
1440 // the line buffer, but it shouldn't matter since there are two
1441 // unused line buffers below and nothing above and I'll at most write
1442 // 40 bytes outside the line buffer... I could use a fractional clip
1443 // begin/end value, but this makes the blit a *lot* more hairy. I
1444 // might fix this in the future if it becomes necessary. (JLH)
1445 // Probably wouldn't be *that* hairy. Just use a delta that tells the
1446 // inner loop which pixel in the phrase is being written, and quit
1447 // when either end of phrases is reached or line buffer extents are
1448 // surpassed.
1449
1450//This stuff is probably wrong as well... !!! FIX !!!
1451//The strange thing is that it seems to work, but that's no guarantee that it's
1452//bulletproof!
1453//Yup. Seems that JagMania doesn't work correctly with this...
1454//Dunno if this is the problem, but Atari Karts is showing *some* of the road
1455//now...
1456//Actually, it is! Or, it was. It doesn't seem to be clipping here, so the
1457//problem lies elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases
1458//seems to draw the ground a bit more accurately... Strange!
1459//It's probably a case of the REFLECT flag being set and the background being
1460//written from the right side of the screen...
1461//But no, it isn't... At least if the diagnostics are telling the truth!
1462
1463 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1464 // ALSO: There may be another case where we start out of bounds and end out
1465 // of bounds...!
1466 // !!! FIX !!!
1467
1468//There's a problem here with scaledPhrasePixels in that it can be forced to
1469//zero when the scaling factor is small. So fix it already! !!! FIX !!!
1470/*if (scaledPhrasePixels == 0)
1471{
1472 WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1473 DumpScaledObject(p0, p1, p2);
1474}//*/
1475//NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1476
1477//Try a simple example...
1478// Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1479// non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1480// Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1481//
1482// Normally, we would expect this in the line buffer:
1483// ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1484//
1485// But instead we're getting:
1486// XXXXYYYYZZZZXXXXYYYYZZZZ...
1487//
1488// or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1489// on negative boundary--or are we? Hmm...
1490// cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1491//
1492// Let's try a real world example:
1493//
1494//OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1495//OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1496//
1497// Really, spp is 27.75 in the second case...
1498// So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1499// start position (14 * 27.75), we get -6.5... NOT -17!
1500
1501//Now it seems we're working OK, at least for the first case...
1502uint32_t scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1503
1504 if (startPos < 0) // Case #1: Begin out, end in, L to R
1505{
1506extern int start_logging;
1507if (start_logging)
1508 WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1509// clippedWidth = 0 - startPos,
1510 clippedWidth = (0 - startPos) << 5,
1511// dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1512 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1513// startPos = 0 - (clippedWidth % scaledPhrasePixels);
1514 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1515if (start_logging)
1516 WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1517}
1518
1519 if (endPos < 0) // Case #2: Begin in, end out, R to L
1520 clippedWidth = 0 - endPos,
1521 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1522
1523 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1524 clippedWidth = endPos - lbufWidth,
1525 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1526
1527 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1528 clippedWidth = startPos - lbufWidth,
1529 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1530 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1531
1532extern int op_start_log;
1533if (op_start_log && clippedWidth != 0)
1534 WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1535if (op_start_log && startPos == 13)
1536{
1537 WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1538 DumpScaledObject(p0, p1, p2);
1539 if (iwidth == 7)
1540 {
1541 WriteLog(" %08X: ", data);
1542 for(int i=0; i<7*8; i++)
1543 WriteLog("%02X ", JaguarReadByte(data+i));
1544 WriteLog("\n");
1545 }
1546}
1547 // If the image is sitting on the line buffer left or right edge, we need to compensate
1548 // by decreasing the image phrase width accordingly.
1549 iwidth -= phraseClippedWidth;
1550
1551 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1552 // the pixel data.
1553// data += phraseClippedWidth * (pitch << 3);
1554 data += dataClippedWidth * (pitch << 3);
1555
1556 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1557 // bitmap! This makes clipping & etc. MUCH, much easier...!
1558// uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1559// uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1560 uint32_t lbufAddress = 0x1800 + startPos * 2;
1561 uint8_t * currentLineBuffer = &tomRam8[lbufAddress];
1562//uint8_t * lineBufferLowerLimit = &tom_ram_8[0x1800],
1563// * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1564
1565 // Render.
1566
1567// Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1568// If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1569// that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1570// anyway.
1571// This seems to be the case (at least according to the Midsummer docs)...!
1572
1573 if (depth == 0) // 1 BPP
1574 {
1575if (firstPix != 0)
1576 WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1577 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1578 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1579
1580 int pixCount = 0;
1581 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1582
1583 while ((int32_t)iwidth > 0)
1584 {
1585 uint8_t bits = pixels >> 63;
1586
1587#ifndef OP_USES_PALETTE_ZERO
1588 if (flagTRANS && bits == 0)
1589#else
1590 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1591#endif
1592 ; // Do nothing...
1593 else
1594 {
1595 if (!flagRMW)
1596 // This is the *only* correct use of endian-dependent code
1597 // (i.e., mem-to-mem direct copying)!
1598 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1599 else
1600 *currentLineBuffer =
1601 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1602 *(currentLineBuffer + 1) =
1603 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1604 }
1605
1606 currentLineBuffer += lbufDelta;
1607
1608/*
1609The reason we subtract the horizontalRemainder *after* the test is because we had too few
1610bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1611wide, so we could probably go back to that (as long as we make it an int16_t and not a uint16!)
1612*/
1613/* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1614 while (horizontalRemainder & 0x80)
1615 {
1616 horizontalRemainder += hscale;
1617 pixCount++;
1618 pixels <<= 1;
1619 }//*/
1620// while (horizontalRemainder <= 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1621 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1622 {
1623 horizontalRemainder += hscale;
1624 pixCount++;
1625 pixels <<= 1;
1626 }
1627 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1628
1629 if (pixCount > 63)
1630 {
1631 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1632
1633 data += (pitch << 3) * phrasesToSkip;
1634 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1635 pixels <<= 1 * pixelShift;
1636 iwidth -= phrasesToSkip;
1637 pixCount = pixelShift;
1638 }
1639 }
1640 }
1641 else if (depth == 1) // 2 BPP
1642 {
1643if (firstPix != 0)
1644 WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1645 index &= 0xFC; // Top six bits form CLUT index
1646 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1647 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1648
1649 int pixCount = 0;
1650 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1651
1652 while ((int32_t)iwidth > 0)
1653 {
1654 uint8_t bits = pixels >> 62;
1655
1656#ifndef OP_USES_PALETTE_ZERO
1657 if (flagTRANS && bits == 0)
1658#else
1659 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1660#endif
1661 ; // Do nothing...
1662 else
1663 {
1664 if (!flagRMW)
1665 // This is the *only* correct use of endian-dependent code
1666 // (i.e., mem-to-mem direct copying)!
1667 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1668 else
1669 *currentLineBuffer =
1670 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1671 *(currentLineBuffer + 1) =
1672 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1673 }
1674
1675 currentLineBuffer += lbufDelta;
1676
1677/* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1678 while (horizontalRemainder & 0x80)
1679 {
1680 horizontalRemainder += hscale;
1681 pixCount++;
1682 pixels <<= 2;
1683 }//*/
1684// while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1685 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1686 {
1687 horizontalRemainder += hscale;
1688 pixCount++;
1689 pixels <<= 2;
1690 }
1691 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1692
1693 if (pixCount > 31)
1694 {
1695 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1696
1697 data += (pitch << 3) * phrasesToSkip;
1698 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1699 pixels <<= 2 * pixelShift;
1700 iwidth -= phrasesToSkip;
1701 pixCount = pixelShift;
1702 }
1703 }
1704 }
1705 else if (depth == 2) // 4 BPP
1706 {
1707if (firstPix != 0)
1708 WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1709 index &= 0xF0; // Top four bits form CLUT index
1710 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1711 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1712
1713 int pixCount = 0;
1714 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1715
1716 while ((int32_t)iwidth > 0)
1717 {
1718 uint8_t bits = pixels >> 60;
1719
1720#ifndef OP_USES_PALETTE_ZERO
1721 if (flagTRANS && bits == 0)
1722#else
1723 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1724#endif
1725 ; // Do nothing...
1726 else
1727 {
1728 if (!flagRMW)
1729 // This is the *only* correct use of endian-dependent code
1730 // (i.e., mem-to-mem direct copying)!
1731 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1732 else
1733 *currentLineBuffer =
1734 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1735 *(currentLineBuffer + 1) =
1736 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1737 }
1738
1739 currentLineBuffer += lbufDelta;
1740
1741/* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1742 while (horizontalRemainder & 0x80)
1743 {
1744 horizontalRemainder += hscale;
1745 pixCount++;
1746 pixels <<= 4;
1747 }//*/
1748// while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1749 while (horizontalRemainder < 0x20) // I.e., it's <= 0 (*before* subtraction)
1750 {
1751 horizontalRemainder += hscale;
1752 pixCount++;
1753 pixels <<= 4;
1754 }
1755 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1756
1757 if (pixCount > 15)
1758 {
1759 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1760
1761 data += (pitch << 3) * phrasesToSkip;
1762 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1763 pixels <<= 4 * pixelShift;
1764 iwidth -= phrasesToSkip;
1765 pixCount = pixelShift;
1766 }
1767 }
1768 }
1769 else if (depth == 3) // 8 BPP
1770 {
1771if (firstPix)
1772 WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1773 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1774 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1775
1776 int pixCount = 0;
1777 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1778
1779 while ((int32_t)iwidth > 0)
1780 {
1781 uint8_t bits = pixels >> 56;
1782
1783#ifndef OP_USES_PALETTE_ZERO
1784 if (flagTRANS && bits == 0)
1785#else
1786 if (flagTRANS && (paletteRAM16[bits] == 0))
1787#endif
1788 ; // Do nothing...
1789 else
1790 {
1791 if (!flagRMW)
1792 // This is the *only* correct use of endian-dependent code
1793 // (i.e., mem-to-mem direct copying)!
1794 *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1795/* {
1796 if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1797 *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1798 }*/
1799 else
1800 *currentLineBuffer =
1801 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1802 *(currentLineBuffer + 1) =
1803 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1804 }
1805
1806 currentLineBuffer += lbufDelta;
1807
1808// while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1809 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1810 {
1811 horizontalRemainder += hscale;
1812 pixCount++;
1813 pixels <<= 8;
1814 }
1815 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1816
1817 if (pixCount > 7)
1818 {
1819 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1820
1821 data += (pitch << 3) * phrasesToSkip;
1822 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1823 pixels <<= 8 * pixelShift;
1824 iwidth -= phrasesToSkip;
1825 pixCount = pixelShift;
1826 }
1827 }
1828 }
1829 else if (depth == 4) // 16 BPP
1830 {
1831if (firstPix != 0)
1832 WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1833 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1834 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1835
1836 int pixCount = 0;
1837 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1838
1839 while ((int32_t)iwidth > 0)
1840 {
1841 uint8_t bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1842
1843//This doesn't seem right... Let's try the encoded black value ($8800):
1844//Apparently, CRY 0 maps to $8800...
1845 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1846// if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1847 ; // Do nothing...
1848 else
1849 {
1850 if (!flagRMW)
1851 *currentLineBuffer = bitsHi,
1852 *(currentLineBuffer + 1) = bitsLo;
1853 else
1854 *currentLineBuffer =
1855 BLEND_CR(*currentLineBuffer, bitsHi),
1856 *(currentLineBuffer + 1) =
1857 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1858 }
1859
1860 currentLineBuffer += lbufDelta;
1861
1862/* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1863 while (horizontalRemainder & 0x80)
1864 {
1865 horizontalRemainder += hscale;
1866 pixCount++;
1867 pixels <<= 16;
1868 }//*/
1869// while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1870 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1871 {
1872 horizontalRemainder += hscale;
1873 pixCount++;
1874 pixels <<= 16;
1875 }
1876 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1877//*/
1878 if (pixCount > 3)
1879 {
1880 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1881
1882 data += (pitch << 3) * phrasesToSkip;
1883 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1884 pixels <<= 16 * pixelShift;
1885
1886 iwidth -= phrasesToSkip;
1887
1888 pixCount = pixelShift;
1889 }
1890 }
1891 }
1892 else if (depth == 5) // 24 BPP
1893 {
1894//I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1895WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1896if (firstPix != 0)
1897 WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1898 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1899 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1900 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 4) | 0x04;
1901
1902 while (iwidth--)
1903 {
1904 // Fetch phrase...
1905 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1906 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
1907
1908 for(int i=0; i<2; i++)
1909 {
1910 uint8_t bits3 = pixels >> 56, bits2 = pixels >> 48,
1911 bits1 = pixels >> 40, bits0 = pixels >> 32;
1912
1913 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1914 ; // Do nothing...
1915 else
1916 *currentLineBuffer = bits3,
1917 *(currentLineBuffer + 1) = bits2,
1918 *(currentLineBuffer + 2) = bits1,
1919 *(currentLineBuffer + 3) = bits0;
1920
1921 currentLineBuffer += lbufDelta;
1922 pixels <<= 32;
1923 }
1924 }
1925 }
1926}