Code refactoring for the variables support
[clinton/Virtual-Jaguar-Rx.git] / src / op.cpp
1 //
2 // Object Processor
3 //
4 // Original source by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/fixes/rewrites by James Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James Hammons <jlhamm@acm.org>
10 // JPM = Jean-Paul Mari <djipi.mari@gmail.com>
11 //
12 // Who When What
13 // --- ---------- -----------------------------------------------------------
14 // JLH 01/16/2010 Created this log ;-)
15 // JPM 06/06/2016 Visual Studio support
16 //
17
18 #include "op.h"
19
20 #include <stdlib.h>
21 #include <string.h>
22 #include "gpu.h"
23 #include "jaguar.h"
24 #include "log.h"
25 #include "m68000/m68kinterface.h"
26 #include "memory.h"
27 #include "tom.h"
28
29 //#define OP_DEBUG
30 //#define OP_DEBUG_BMP
31
32 #define BLEND_Y(dst, src) op_blend_y[(((uint16_t)dst<<8)) | ((uint16_t)(src))]
33 #define BLEND_CR(dst, src) op_blend_cr[(((uint16_t)dst)<<8) | ((uint16_t)(src))]
34
35 #define OBJECT_TYPE_BITMAP 0 // 000
36 #define OBJECT_TYPE_SCALE 1 // 001
37 #define OBJECT_TYPE_GPU 2 // 010
38 #define OBJECT_TYPE_BRANCH 3 // 011
39 #define OBJECT_TYPE_STOP 4 // 100
40
41 #define CONDITION_EQUAL 0 // VC == YPOS
42 #define CONDITION_LESS_THAN 1 // VC < YPOS
43 #define CONDITION_GREATER_THAN 2 // VC > YPOS
44 #define CONDITION_OP_FLAG_SET 3
45 #define CONDITION_SECOND_HALF_LINE 4
46
47 #if 0
48 #define OPFLAG_RELEASE 8 // Bus release bit
49 #define OPFLAG_TRANS 4 // Transparency bit
50 #define OPFLAG_RMW 2 // Read-Modify-Write bit
51 #define OPFLAG_REFLECT 1 // Horizontal mirror bit
52 #endif
53
54 // Private function prototypes
55
56 void OPProcessFixedBitmap(uint64_t p0, uint64_t p1, bool render);
57 void OPProcessScaledBitmap(uint64_t p0, uint64_t p1, uint64_t p2, bool render);
58 void OPDiscoverObjects(uint32_t address);
59 void OPDumpObjectList(void);
60 void DumpScaledObject(uint64_t p0, uint64_t p1, uint64_t p2);
61 void DumpFixedObject(uint64_t p0, uint64_t p1);
62 void DumpBitmapCore(uint64_t p0, uint64_t p1);
63 uint64_t OPLoadPhrase(uint32_t offset);
64
65 // Local global variables
66
67 // Blend tables (64K each)
68 static uint8_t op_blend_y[0x10000];
69 static uint8_t op_blend_cr[0x10000];
70 // There may be a problem with this "RAM" overlapping (and thus being independent of)
71 // some of the regular TOM RAM...
72 //#warning objectp_ram is separated from TOM RAM--need to fix that!
73 //static uint8_t objectp_ram[0x40]; // This is based at $F00000
74 uint8_t objectp_running = 0;
75 //bool objectp_stop_reading_list;
76
77 static uint8_t op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 };
78 //static uint32_t op_bitmap_bit_size[8] =
79 // { (uint32_t)(0.125*65536), (uint32_t)(0.25*65536), (uint32_t)(0.5*65536), (uint32_t)(1*65536),
80 // (uint32_t)(2*65536), (uint32_t)(1*65536), (uint32_t)(1*65536), (uint32_t)(1*65536) };
81 static uint32_t op_pointer;
82
83 int32_t phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 };
84
85
86 //
87 // Object Processor initialization
88 //
89 void OPInit(void)
90 {
91 // Here we calculate the saturating blend of a signed 4-bit value and an
92 // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity...
93 // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY
94 for(int i=0; i<256*256; i++)
95 {
96 int y = (i >> 8) & 0xFF;
97 int dy = (int8_t)i; // Sign extend the Y index
98 int c1 = (i >> 8) & 0x0F;
99 int dc1 = (int8_t)(i << 4) >> 4; // Sign extend the R index
100 int c2 = (i >> 12) & 0x0F;
101 int dc2 = (int8_t)(i & 0xF0) >> 4; // Sign extend the C index
102
103 y += dy;
104
105 if (y < 0)
106 y = 0;
107 else if (y > 0xFF)
108 y = 0xFF;
109
110 op_blend_y[i] = y;
111
112 c1 += dc1;
113
114 if (c1 < 0)
115 c1 = 0;
116 else if (c1 > 0x0F)
117 c1 = 0x0F;
118
119 c2 += dc2;
120
121 if (c2 < 0)
122 c2 = 0;
123 else if (c2 > 0x0F)
124 c2 = 0x0F;
125
126 op_blend_cr[i] = (c2 << 4) | c1;
127 }
128
129 OPReset();
130 }
131
132
133 //
134 // Object Processor reset
135 //
136 void OPReset(void)
137 {
138 // memset(objectp_ram, 0x00, 0x40);
139 objectp_running = 0;
140 }
141
142
143 static const char * opType[8] =
144 { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
145 static const char * ccType[8] =
146 { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" };
147 static uint32_t object[8192];
148 static uint32_t numberOfObjects;
149 //static uint32_t objectLink[8192];
150 //static uint32_t numberOfLinks;
151
152
153 void OPDone(void)
154 {
155 //#warning "!!! Fix OL dump so that it follows links !!!"
156 // const char * opType[8] =
157 // { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" };
158 // const char * ccType[8] =
159 // { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" };
160
161 uint32_t olp = OPGetListPointer();
162 WriteLog("\nOP: OLP = $%08X\n", olp);
163 WriteLog("OP: Phrase dump\n ----------\n");
164
165 #if 0
166 for(uint32_t i=0; i<0x100; i+=8)
167 {
168 uint32_t hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP);
169 WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]);
170
171 if ((lo & 0x07) == 3)
172 {
173 uint16_t ypos = (lo >> 3) & 0x7FF;
174 uint8_t cc = (lo >> 14) & 0x03;
175 uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
176 WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link);
177 }
178
179 WriteLog("\n");
180
181 if ((lo & 0x07) == 0)
182 DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8));
183
184 if ((lo & 0x07) == 1)
185 DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16));
186 }
187
188 WriteLog("\n");
189 #else
190 //#warning "!!! Fix lockup in OPDiscoverObjects() !!!"
191 //temp, to keep the following function from locking up on bad/weird OLs
192 //return;
193
194 numberOfObjects = 0;
195 OPDiscoverObjects(olp);
196 OPDumpObjectList();
197 #endif
198 }
199
200
201 bool OPObjectExists(uint32_t address)
202 {
203 // Yes, we really do a linear search, every time. :-/
204 for(uint32_t i=0; i<numberOfObjects; i++)
205 {
206 if (address == object[i])
207 return true;
208 }
209
210 return false;
211 }
212
213
214 void OPDiscoverObjects(uint32_t address)
215 {
216 uint8_t objectType = 0;
217
218 do
219 {
220 // If we've seen this object already, bail out!
221 // Otherwise, add it to the list
222 if (OPObjectExists(address))
223 return;
224
225 object[numberOfObjects++] = address;
226
227 // Get the object & decode its type, link address
228 uint32_t hi = JaguarReadLong(address + 0, OP);
229 uint32_t lo = JaguarReadLong(address + 4, OP);
230 objectType = lo & 0x07;
231 uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
232
233 if (objectType == 3)
234 {
235 // Branch if YPOS < 2047 (or YPOS > 0) can be treated as a GOTO, so
236 // don't do any discovery in that case. Otherwise, have at it:
237 if (((lo & 0xFFFF) != 0x7FFB) && ((lo & 0xFFFF) != 0x8003))
238 // Recursion needed to follow all links! This does depth-first
239 // recursion on the not-taken objects
240 OPDiscoverObjects(address + 8);
241 }
242
243 // Get the next object...
244 address = link;
245 }
246 while (objectType != 4);
247 }
248
249
250 void OPDumpObjectList(void)
251 {
252 for(uint32_t i=0; i<numberOfObjects; i++)
253 {
254 uint32_t address = object[i];
255
256 uint32_t hi = JaguarReadLong(address + 0, OP);
257 uint32_t lo = JaguarReadLong(address + 4, OP);
258 uint8_t objectType = lo & 0x07;
259 uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8;
260 WriteLog("%08X: %08X %08X %s -> $%08X", address, hi, lo, opType[objectType], link);
261
262 if (objectType == 3)
263 {
264 uint16_t ypos = (lo >> 3) & 0x7FF;
265 uint8_t cc = (lo >> 14) & 0x07; // Proper # of bits == 3
266 WriteLog(" YPOS %s %u", ccType[cc], ypos);
267 }
268
269 WriteLog("\n");
270
271 // Yes, this is how the OP finds follow-on phrases for bitmap/scaled
272 // bitmap objects...!
273 if (objectType == 0)
274 DumpFixedObject(OPLoadPhrase(address + 0),
275 OPLoadPhrase(address | 0x08));
276
277 if (objectType == 1)
278 DumpScaledObject(OPLoadPhrase(address + 0),
279 OPLoadPhrase(address | 0x08), OPLoadPhrase(address | 0x10));
280
281 if (address == link) // Ruh roh...
282 {
283 // Runaway recursive link is bad!
284 WriteLog("***** SELF REFERENTIAL LINK *****\n\n");
285 }
286 }
287
288 WriteLog("\n");
289 }
290
291
292 //
293 // Object Processor memory access
294 // Memory range: F00010 - F00027
295 //
296 // F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor
297 // F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list
298 // F00026 W -------- -------x OBF - object processor flag
299 //
300
301 #if 0
302 uint8_t OPReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
303 {
304 offset &= 0x3F;
305 return objectp_ram[offset];
306 }
307
308 uint16_t OPReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
309 {
310 offset &= 0x3F;
311 return GET16(objectp_ram, offset);
312 }
313
314 void OPWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
315 {
316 offset &= 0x3F;
317 objectp_ram[offset] = data;
318 }
319
320 void OPWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
321 {
322 offset &= 0x3F;
323 SET16(objectp_ram, offset, data);
324
325 /*if (offset == 0x20)
326 WriteLog("OP: Setting lo list pointer: %04X\n", data);
327 if (offset == 0x22)
328 WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/
329 }
330 #endif
331
332
333 uint32_t OPGetListPointer(void)
334 {
335 // Note: This register is LO / HI WORD, hence the funky look of this...
336 return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16);
337 }
338
339
340 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
341
342 uint32_t OPGetStatusRegister(void)
343 {
344 return GET16(tomRam8, 0x26);
345 }
346
347
348 // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED]
349
350 void OPSetStatusRegister(uint32_t data)
351 {
352 tomRam8[0x26] = (data & 0x0000FF00) >> 8;
353 tomRam8[0x27] |= (data & 0xFE);
354 }
355
356
357 void OPSetCurrentObject(uint64_t object)
358 {
359 //Not sure this is right... Wouldn't it just be stored 64 bit BE?
360 // Stored as least significant 32 bits first, ms32 last in big endian
361 /* objectp_ram[0x13] = object & 0xFF; object >>= 8;
362 objectp_ram[0x12] = object & 0xFF; object >>= 8;
363 objectp_ram[0x11] = object & 0xFF; object >>= 8;
364 objectp_ram[0x10] = object & 0xFF; object >>= 8;
365
366 objectp_ram[0x17] = object & 0xFF; object >>= 8;
367 objectp_ram[0x16] = object & 0xFF; object >>= 8;
368 objectp_ram[0x15] = object & 0xFF; object >>= 8;
369 objectp_ram[0x14] = object & 0xFF;*/
370 // Let's try regular good old big endian...
371 tomRam8[0x17] = object & 0xFF; object >>= 8;
372 tomRam8[0x16] = object & 0xFF; object >>= 8;
373 tomRam8[0x15] = object & 0xFF; object >>= 8;
374 tomRam8[0x14] = object & 0xFF; object >>= 8;
375
376 tomRam8[0x13] = object & 0xFF; object >>= 8;
377 tomRam8[0x12] = object & 0xFF; object >>= 8;
378 tomRam8[0x11] = object & 0xFF; object >>= 8;
379 tomRam8[0x10] = object & 0xFF;
380 }
381
382
383 uint64_t OPLoadPhrase(uint32_t offset)
384 {
385 offset &= ~0x07; // 8 byte alignment
386 return ((uint64_t)JaguarReadLong(offset, OP) << 32) | (uint64_t)JaguarReadLong(offset+4, OP);
387 }
388
389
390 void OPStorePhrase(uint32_t offset, uint64_t p)
391 {
392 offset &= ~0x07; // 8 byte alignment
393 JaguarWriteLong(offset, p >> 32, OP);
394 JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP);
395 }
396
397
398 //
399 // Debugging routines
400 //
401 void DumpScaledObject(uint64_t p0, uint64_t p1, uint64_t p2)
402 {
403 WriteLog(" %08X %08X\n", (uint32_t)(p1>>32), (uint32_t)(p1&0xFFFFFFFF));
404 WriteLog(" %08X %08X\n", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
405 DumpBitmapCore(p0, p1);
406 uint32_t hscale = p2 & 0xFF;
407 uint32_t vscale = (p2 >> 8) & 0xFF;
408 uint32_t remainder = (p2 >> 16) & 0xFF;
409 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
410 }
411
412
413 void DumpFixedObject(uint64_t p0, uint64_t p1)
414 {
415 WriteLog(" %08X %08X\n", (uint32_t)(p1>>32), (uint32_t)(p1&0xFFFFFFFF));
416 DumpBitmapCore(p0, p1);
417 }
418
419
420 void DumpBitmapCore(uint64_t p0, uint64_t p1)
421 {
422 uint32_t bdMultiplier[8] = { 64, 32, 16, 8, 4, 2, 1, 1 };
423 uint8_t bitdepth = (p1 >> 12) & 0x07;
424 //WAS: int16_t ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
425 int16_t ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
426 int32_t xpos = p1 & 0xFFF;
427 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); // Sign extend that mutha!
428 uint32_t iwidth = ((p1 >> 28) & 0x3FF);
429 uint32_t dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
430 uint16_t height = ((p0 >> 14) & 0x3FF);
431 uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
432 uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
433 uint32_t firstPix = (p1 >> 49) & 0x3F;
434 uint8_t flags = (p1 >> 45) & 0x0F;
435 uint8_t idx = (p1 >> 38) & 0x7F;
436 uint32_t pitch = (p1 >> 15) & 0x07;
437 WriteLog(" [%u x %u @ (%i, %u) (iw:%u, dw:%u) (%u bpp), p:%08X fp:%02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
438 iwidth * bdMultiplier[bitdepth],
439 height, xpos, ypos, iwidth, dwidth, op_bitmap_bit_depth[bitdepth],
440 ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""),
441 (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""),
442 (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
443 }
444
445
446 //
447 // Object Processor main routine
448 //
449 #ifdef _MSC_VER
450 #pragma message("Warning: Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!")
451 #else
452 #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!"
453 #endif // _MSC_VER
454 void OPProcessList(int halfline, bool render)
455 {
456 #ifdef _MSC_VER
457 #pragma message("Warning: !!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!")
458 #else
459 #warning "!!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!"
460 #endif // _MSC_VER
461 // We ignore them, for now; not good D-:
462 // N.B.: Half-lines are exactly that, half-lines. When in interlaced mode, it
463 // draws the screen exactly the same way as it does in non, one line at a
464 // time. The only way you know you're in field #2 is that the topmost bit
465 // of VC is set. Half-line mode is so you can draw higher horizontal
466 // resolutions than you normally could, as the line buffer is only 720
467 // pixels wide...
468 halfline &= 0x7FF;
469
470 extern int op_start_log;
471
472 op_pointer = OPGetListPointer();
473
474 // objectp_stop_reading_list = false;
475
476 //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer);
477 //op_done();
478
479 // *** BEGIN OP PROCESSOR TESTING ONLY ***
480 extern bool interactiveMode;
481 extern bool iToggle;
482 extern int objectPtr;
483 bool inhibit;
484 int bitmapCounter = 0;
485 // *** END OP PROCESSOR TESTING ONLY ***
486
487 uint32_t opCyclesToRun = 30000; // This is a pulled-out-of-the-air value (will need to be fixed, obviously!)
488
489 // if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline);
490 while (op_pointer)
491 {
492 // *** BEGIN OP PROCESSOR TESTING ONLY ***
493 if (interactiveMode && bitmapCounter == objectPtr)
494 inhibit = iToggle;
495 else
496 inhibit = false;
497 // *** END OP PROCESSOR TESTING ONLY ***
498 // if (objectp_stop_reading_list)
499 // return;
500
501 uint64_t p0 = OPLoadPhrase(op_pointer);
502 op_pointer += 8;
503 //WriteLog("\t%08X type %i\n", op_pointer, (uint8_t)p0 & 0x07);
504
505 #if 1
506 if (halfline == TOMGetVDB() && op_start_log)
507 //if (halfline == 215 && op_start_log)
508 //if (halfline == 28 && op_start_log)
509 //if (halfline == 0)
510 {
511 WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF));
512 if ((p0 & 0x07) == OBJECT_TYPE_BITMAP)
513 {
514 WriteLog(" (BITMAP) ");
515 uint64_t p1 = OPLoadPhrase(op_pointer);
516 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
517 uint8_t bitdepth = (p1 >> 12) & 0x07;
518 //WAS: int16_t ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
519 int16_t ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
520 int32_t xpos = p1 & 0xFFF;
521 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
522 uint32_t iwidth = ((p1 >> 28) & 0x3FF);
523 uint32_t dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
524 uint16_t height = ((p0 >> 14) & 0x3FF);
525 uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
526 uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
527 uint32_t firstPix = (p1 >> 49) & 0x3F;
528 uint8_t flags = (p1 >> 45) & 0x0F;
529 uint8_t idx = (p1 >> 38) & 0x7F;
530 uint32_t pitch = (p1 >> 15) & 0x07;
531 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
532 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
533 }
534 if ((p0 & 0x07) == OBJECT_TYPE_SCALE)
535 {
536 WriteLog(" (SCALED BITMAP)");
537 uint64_t p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8);
538 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF));
539 WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF));
540 uint8_t bitdepth = (p1 >> 12) & 0x07;
541 //WAS: int16_t ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)?
542 int16_t ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)?
543 int32_t xpos = p1 & 0xFFF;
544 xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos);
545 uint32_t iwidth = ((p1 >> 28) & 0x3FF);
546 uint32_t dwidth = ((p1 >> 18) & 0x3FF); // Unsigned!
547 uint16_t height = ((p0 >> 14) & 0x3FF);
548 uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3;
549 uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3;
550 uint32_t firstPix = (p1 >> 49) & 0x3F;
551 uint8_t flags = (p1 >> 45) & 0x0F;
552 uint8_t idx = (p1 >> 38) & 0x7F;
553 uint32_t pitch = (p1 >> 15) & 0x07;
554 WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n",
555 iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch);
556 uint32_t hscale = p2 & 0xFF;
557 uint32_t vscale = (p2 >> 8) & 0xFF;
558 uint32_t remainder = (p2 >> 16) & 0xFF;
559 WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder);
560 }
561 if ((p0 & 0x07) == OBJECT_TYPE_GPU)
562 WriteLog(" (GPU)\n");
563 if ((p0 & 0x07) == OBJECT_TYPE_BRANCH)
564 {
565 WriteLog(" (BRANCH)\n");
566 uint8_t * jaguarMainRam = GetRamPtr();
567 WriteLog("[RAM] --> ");
568 for(int k=0; k<8; k++)
569 WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]);
570 WriteLog("\n");
571 }
572 if ((p0 & 0x07) == OBJECT_TYPE_STOP)
573 WriteLog(" --> List end\n\n");
574 }
575 #endif
576
577 switch ((uint8_t)p0 & 0x07)
578 {
579 case OBJECT_TYPE_BITMAP:
580 {
581 uint16_t ypos = (p0 >> 3) & 0x7FF;
582 // This is only theory implied by Rayman...!
583 // It seems that if the YPOS is zero, then bump the YPOS value so that it
584 // coincides with the VDB value. With interlacing, this would be slightly more
585 // tricky. There's probably another bit somewhere that enables this mode--but
586 // so far, doesn't seem to affect any other game in a negative way (that I've
587 // seen). Either that, or it's an undocumented bug...
588
589 //No, the reason this was needed is that the OP code before was wrong. Any value
590 //less than VDB will get written to the top line of the display!
591 #if 0
592 // Not so sure... Let's see what happens here...
593 // No change...
594 if (ypos == 0)
595 ypos = TOMReadWord(0xF00046, OP) / 2; // Get the VDB value
596 #endif
597 // Actually, no. Any item less than VDB will get only the lines that hang over
598 // VDB displayed. Actually, this is incorrect. It seems that VDB value is wrong
599 // somewhere and that's what's causing things to fuck up. Still no idea why.
600
601 uint32_t height = (p0 & 0xFFC000) >> 14;
602 uint32_t oldOPP = op_pointer - 8;
603 // *** BEGIN OP PROCESSOR TESTING ONLY ***
604 if (inhibit && op_start_log)
605 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n");
606 bitmapCounter++;
607 if (!inhibit) // For OP testing only!
608 // *** END OP PROCESSOR TESTING ONLY ***
609 if (halfline >= ypos && height > 0)
610 {
611 // Believe it or not, this is what the OP actually does...
612 // which is why they're required to be on a dphrase boundary!
613 uint64_t p1 = OPLoadPhrase(oldOPP | 0x08);
614 //unneeded op_pointer += 8;
615 //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos);
616 //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]);
617 // OPProcessFixedBitmap(halfline, p0, p1, render);
618 OPProcessFixedBitmap(p0, p1, render);
619
620 // OP write-backs
621
622 height--;
623
624 uint64_t data = (p0 & 0xFFFFF80000000000LL) >> 40;
625 uint64_t dwidth = (p1 & 0xFFC0000) >> 15;
626 data += dwidth;
627
628 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
629 p0 |= (uint64_t)height << 14;
630 p0 |= data << 40;
631 OPStorePhrase(oldOPP, p0);
632 }
633
634 // OP bottom 3 bits are hardwired to zero. The link address
635 // reflects this, so we only need the top 19 bits of the address
636 // (which is why we only shift 21, and not 24).
637 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
638
639 // KLUDGE: Seems that memory access is mirrored in the first 8MB of
640 // memory...
641 if (op_pointer > 0x1FFFFF && op_pointer < 0x800000)
642 op_pointer &= 0xFF1FFFFF; // Knock out bits 21-23
643
644 break;
645 }
646 case OBJECT_TYPE_SCALE:
647 {
648 //WAS: uint16_t ypos = (p0 >> 3) & 0x3FF;
649 uint16_t ypos = (p0 >> 3) & 0x7FF;
650 uint32_t height = (p0 & 0xFFC000) >> 14;
651 uint32_t oldOPP = op_pointer - 8;
652 //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height);
653 // *** BEGIN OP PROCESSOR TESTING ONLY ***
654 if (inhibit && op_start_log)
655 {
656 WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height);
657 DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8));
658 }
659 bitmapCounter++;
660 if (!inhibit) // For OP testing only!
661 // *** END OP PROCESSOR TESTING ONLY ***
662 if (halfline >= ypos && height > 0)
663 {
664 // Believe it or not, this is what the OP actually does...
665 // which is why they're required to be on a qphrase boundary!
666 uint64_t p1 = OPLoadPhrase(oldOPP | 0x08);
667 uint64_t p2 = OPLoadPhrase(oldOPP | 0x10);
668 //unneeded op_pointer += 16;
669 OPProcessScaledBitmap(p0, p1, p2, render);
670
671 // OP write-backs
672
673 uint16_t remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8;
674 uint8_t /*remainder = p2 >> 16,*/ vscale = p2 >> 8;
675 //Actually, we should skip this object if it has a vscale of zero.
676 //Or do we? Not sure... Atari Karts has a few lines that look like:
677 // (SCALED BITMAP)
678 //000E8268 --> phrase 00010000 7000B00D
679 // [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01]
680 // [hsc: 9A, vsc: 00, rem: 00]
681 // Could it be the vscale is overridden if the DWIDTH is zero? Hmm...
682 //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
683
684 if (vscale == 0)
685 vscale = 0x20; // OP bug??? Nope, it isn't...! Or is it?
686
687 //extern int start_logging;
688 //if (start_logging)
689 // WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/
690 //Locks up here:
691 //--> Returned from scaled bitmap processing (rem=20, vscale=80)...
692 //There are other problems here, it looks like...
693 //Another lock up:
694 //About to execute OP (508)...
695 /*
696 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
697 --> Returned from scaled bitmap processing (rem=50, vscale=7C)...
698 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
699 --> Returned from scaled bitmap processing (rem=30, vscale=7C)...
700 OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no
701 --> Returned from scaled bitmap processing (rem=10, vscale=7C)...
702 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
703 --> Returned from scaled bitmap processing (rem=00, vscale=7E)...
704 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
705 --> Returned from scaled bitmap processing (rem=00, vscale=80)...
706 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
707 --> Returned from scaled bitmap processing (rem=5E, vscale=7E)...
708 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
709 --> Returned from scaled bitmap processing (rem=60, vscale=80)...
710 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
711 --> Returned from scaled bitmap processing (rem=3E, vscale=7E)...
712 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
713 --> Returned from scaled bitmap processing (rem=40, vscale=80)...
714 OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no
715 --> Returned from scaled bitmap processing (rem=1E, vscale=7E)...
716 OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no
717 --> Returned from scaled bitmap processing (rem=20, vscale=80)...
718 */
719 //Here's another problem:
720 // [hsc: 20, vsc: 20, rem: 00]
721 // Since we're not checking for $E0 (but that's what we get from the above), we
722 // end up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but...
723 // still not quite right. Either that, or the Accolade team that wrote Bubsy
724 // screwed up royal.]
725 //Also note: $E0 = 7.0 which IS a legal vscale value...
726
727 // if (remainder & 0x80) // I.e., it's negative
728 // if ((remainder & 0x80) || remainder == 0) // I.e., it's <= 0
729 // if ((remainder - 1) >= 0xE0) // I.e., it's <= 0
730 // if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0
731 // if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0
732 // if (remainder <= 0x20) // I.e., it's <= 1.0
733 // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f.
734 if (remainder < 0x20)
735 {
736 uint64_t data = (p0 & 0xFFFFF80000000000LL) >> 40;
737 uint64_t dwidth = (p1 & 0xFFC0000) >> 15;
738
739 // while (remainder & 0x80)
740 // while ((remainder & 0x80) || remainder == 0)
741 // while ((remainder - 1) >= 0xE0)
742 // while ((remainder >= 0xE1) || remainder == 0)
743 // while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)
744 // while (remainder <= 0x20)
745 while (remainder < 0x20)
746 {
747 remainder += vscale;
748
749 if (height)
750 height--;
751
752 data += dwidth;
753 }
754
755 p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data...
756 p0 |= (uint64_t)height << 14;
757 p0 |= data << 40;
758 OPStorePhrase(oldOPP, p0);
759 }
760
761 remainder -= 0x20; // 1.0f in [3.5] fixed point format
762
763 //if (start_logging)
764 // WriteLog("--> Finished writebacks...\n");//*/
765
766 //WriteLog(" [%08X%08X -> ", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
767 p2 &= ~0x0000000000FF0000LL;
768 p2 |= (uint64_t)remainder << 16;
769 //WriteLog("%08X%08X]\n", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF));
770 OPStorePhrase(oldOPP + 16, p2);
771 //remainder = (uint8_t)(p2 >> 16), vscale = (uint8_t)(p2 >> 8);
772 //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale);
773 }
774
775 // OP bottom 3 bits are hardwired to zero. The link address
776 // reflects this, so we only need the top 19 bits of the address
777 // (which is why we only shift 21, and not 24).
778 op_pointer = (p0 & 0x000007FFFF000000LL) >> 21;
779
780 // KLUDGE: Seems that memory access is mirrored in the first 8MB of
781 // memory...
782 if (op_pointer > 0x1FFFFF && op_pointer < 0x800000)
783 op_pointer &= 0xFF1FFFFF; // Knock out bits 21-23
784
785 break;
786 }
787 case OBJECT_TYPE_GPU:
788 {
789 //WriteLog("OP: Asserting GPU IRQ #3...\n");
790 #ifdef _MSC_VER
791 #pragma message("Warning: Need to fix OP GPU IRQ handling! !!! FIX !!!")
792 #else
793 #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!"
794 #endif // _MSC_VER
795 OPSetCurrentObject(p0);
796 GPUSetIRQLine(3, ASSERT_LINE);
797 //Also, OP processing is suspended from this point until OBF (F00026) is written to...
798 // !!! FIX !!!
799 //Do something like:
800 //OPSuspendedByGPU = true;
801 //Dunno if the OP keeps processing from where it was interrupted, or if it just continues
802 //on the next halfline...
803 // --> It continues from where it was interrupted! !!! FIX !!!
804 break;
805 }
806 case OBJECT_TYPE_BRANCH:
807 {
808 uint16_t ypos = (p0 >> 3) & 0x7FF;
809 // JTRM is wrong: CC is bits 14-16 (3 bits, *not* 2)
810 uint8_t cc = (p0 >> 14) & 0x07;
811 uint32_t link = (p0 >> 21) & 0x3FFFF8;
812
813 switch (cc)
814 {
815 case CONDITION_EQUAL:
816 if (halfline == ypos || ypos == 0x7FF)
817 op_pointer = link;
818 break;
819 case CONDITION_LESS_THAN:
820 if (halfline < ypos)
821 op_pointer = link;
822 break;
823 case CONDITION_GREATER_THAN:
824 if (halfline > ypos)
825 op_pointer = link;
826 break;
827 case CONDITION_OP_FLAG_SET:
828 if (OPGetStatusRegister() & 0x01)
829 op_pointer = link;
830 break;
831 case CONDITION_SECOND_HALF_LINE:
832 // Branch if bit 10 of HC is set...
833 if (TOMGetHC() & 0x0400)
834 op_pointer = link;
835 break;
836 default:
837 // Basically, if you do this, the OP does nothing. :-)
838 WriteLog("OP: Unimplemented branch condition %i\n", cc);
839 }
840 break;
841 }
842 case OBJECT_TYPE_STOP:
843 {
844 OPSetCurrentObject(p0);
845
846 if ((p0 & 0x08) && TOMIRQEnabled(IRQ_OPFLAG))
847 {
848 TOMSetPendingObjectInt();
849 m68k_set_irq(2); // Cause a 68K IPL 2 to occur...
850 }
851
852 // Bail out, we're done...
853 return;
854 }
855 default:
856 WriteLog("OP: Unknown object type %i\n", (uint8_t)p0 & 0x07);
857 }
858
859 // Here is a little sanity check to keep the OP from locking up the
860 // machine when fed bad data. Better would be to count how many actual
861 // cycles it used and bail out/reenter to properly simulate an
862 // overloaded OP... !!! FIX !!!
863 #ifdef _MSC_VER
864 #pragma message("Warning: Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!")
865 #else
866 #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!"
867 #endif // _MSC_VER
868 opCyclesToRun--;
869
870 if (!opCyclesToRun)
871 return;
872 }
873 }
874
875
876 //
877 // Store fixed size bitmap in line buffer
878 //
879 void OPProcessFixedBitmap(uint64_t p0, uint64_t p1, bool render)
880 {
881 // Need to make sure that when writing that it stays within the line buffer...
882 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
883 uint8_t depth = (p1 >> 12) & 0x07; // Color depth of image
884 int32_t xpos = ((int16_t)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
885 uint32_t iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
886 uint32_t data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
887 uint32_t firstPix = (p1 >> 49) & 0x3F;
888 // "The LSB is significant only for scaled objects..." -JTRM
889 // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top
890 // four are significant..."
891 firstPix &= 0x3E;
892
893 // We can ignore the RELEASE (high order) bit for now--probably forever...!
894 // uint8_t flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
895 //Optimize: break these out to their own BOOL values
896 uint8_t flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
897 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
898 flagRMW = (flags & OPFLAG_RMW ? true : false),
899 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
900 // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index
901 // provide the most significant bits of the palette address."
902 uint8_t index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
903 uint32_t pitch = (p1 >> 15) & 0x07; // Phrase pitch
904 pitch <<= 3; // Optimization: Multiply pitch by 8
905
906 // int16_t scanlineWidth = tom_getVideoModeWidth();
907 uint8_t * tomRam8 = TOMGetRamPointer();
908 uint8_t * paletteRAM = &tomRam8[0x400];
909 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct
910 // copies--NOT for use when using endian-corrected data (i.e., any of the
911 // *_word_read functions!)
912 uint16_t * paletteRAM16 = (uint16_t *)paletteRAM;
913
914 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
915 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
916
917 // Is it OK to have a 0 for the data width??? (i.e., undocumented?)
918 // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as
919 // well.
920 // Pitch == 0 is OK too...
921
922 //kludge: Seems that the OP treats iwidth == 0 as iwidth == 1... Need to
923 // investigate on real hardware...
924 #ifdef _MSC_VER
925 #pragma message("Warning: !!! Need to investigate iwidth == 0 behavior on real hardware !!!")
926 #else
927 #warning "!!! Need to investigate iwidth == 0 behavior on real hardware !!!"
928 #endif // _MSC_VER
929 if (iwidth == 0)
930 iwidth = 1;
931
932 // if (!render || op_pointer == 0 || ptr == 0 || pitch == 0)
933 //I'm not convinced that we need to concern ourselves with data & op_pointer
934 //here either!
935 if (!render || iwidth == 0)
936 return;
937
938 //OK, so we know the position in the line buffer is correct. It's the clipping
939 //in 24bpp mode that's wrong!
940 #if 0
941 //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes
942 //into the line buffer for each pixel.
943 if (depth == 5) // i.e., 24bpp mode...
944 xpos >>= 1; // Cut it in half...
945 #endif
946
947 //#define OP_DEBUG_BMP
948 //#ifdef OP_DEBUG_BMP
949 // WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
950 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
951 //#endif
952
953 // int32_t leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1;
954 int32_t startPos = xpos, endPos = xpos +
955 (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1
956 : -((phraseWidthToPixels[depth] * iwidth) + 1));
957 uint32_t clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0;
958 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
959 // This is correct, the OP line buffer is a constant size...
960 int32_t limit = 720;
961 int32_t lbufWidth = 719;
962
963 // If the image is completely to the left or right of the line buffer, then
964 // bail.
965 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
966 //There are four possibilities:
967 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
968 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
969 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
970 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
971 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
972 // numbers 1 & 3 are of concern.
973 // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...!
974 // if (rightMargin < 0 || leftMargin > lbufWidth)
975
976 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
977 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
978 // Still have to be careful with the DATA and IWIDTH values though...
979
980 // if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth))
981 // || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth)))
982 // return;
983 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
984 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
985 return;
986
987 // Otherwise, find the clip limits and clip the phrase as well...
988 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the
989 // line buffer, but it shouldn't matter since there are two unused line
990 // buffers below and nothing above and I'll at most write 8 bytes outside
991 // the line buffer... I could use a fractional clip begin/end value, but
992 // this makes the blit a *lot* more hairy. I might fix this in the future
993 // if it becomes necessary. (JLH)
994 // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop
995 // which pixel in the phrase is being written, and quit when either end of phrases
996 // is reached or line buffer extents are surpassed.
997
998 //This stuff is probably wrong as well... !!! FIX !!!
999 //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof!
1000 //Yup. Seems that JagMania doesn't work correctly with this...
1001 //Dunno if this is the problem, but Atari Karts is showing *some* of the road now...
1002 // if (!flagREFLECT)
1003
1004 /*
1005 if (leftMargin < 0)
1006 clippedWidth = 0 - leftMargin,
1007 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1008 leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1009 // leftMargin = 0;
1010
1011 if (rightMargin > lbufWidth)
1012 clippedWidth = rightMargin - lbufWidth,
1013 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//,
1014 // rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1015 // rightMargin = lbufWidth;
1016 */
1017 if (depth > 5)
1018 WriteLog("OP: We're about to encounter a divide by zero error!\n");
1019 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1020 // ALSO: There may be another case where we start out of bounds and end out
1021 // of bounds...!
1022 // !!! FIX !!!
1023 if (startPos < 0) // Case #1: Begin out, end in, L to R
1024 clippedWidth = 0 - startPos,
1025 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1026 startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]);
1027
1028 if (endPos < 0) // Case #2: Begin in, end out, R to L
1029 clippedWidth = 0 - endPos,
1030 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1031
1032 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1033 clippedWidth = endPos - lbufWidth,
1034 phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];
1035
1036 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1037 clippedWidth = startPos - lbufWidth,
1038 dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth],
1039 startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]);
1040 //printf("<OP:spos=%i,epos=%i]", startPos, endPos);
1041
1042 // If the image is sitting on the line buffer left or right edge, we need to compensate
1043 // by decreasing the image phrase width accordingly.
1044 iwidth -= phraseClippedWidth;
1045
1046 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1047 // the pixel data.
1048 // data += phraseClippedWidth * (pitch << 3);
1049 data += dataClippedWidth * pitch;
1050
1051 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1052 // bitmap! This makes clipping & etc. MUCH, much easier...!
1053 // uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1054 //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode?
1055 //Is this a bug in the OP?
1056 //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2.
1057 //Though it looks like we're doing it here no matter what...
1058 // uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2);
1059 //Let's try this:
1060 uint32_t lbufAddress = 0x1800 + (startPos * 2);
1061 uint8_t * currentLineBuffer = &tomRam8[lbufAddress];
1062
1063 // Render.
1064
1065 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1066 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1067 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1068 // anyway.
1069 // This seems to be the case (at least according to the Midsummer docs)...!
1070
1071 // This is to test using palette zeroes instead of bit zeroes...
1072 // And it seems that this is wrong, index == 0 is transparent apparently... :-/
1073 //#define OP_USES_PALETTE_ZERO
1074
1075 if (depth == 0) // 1 BPP
1076 {
1077 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1078 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1079
1080 // Fetch 1st phrase...
1081 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1082 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1083 //i.e., we didn't clip on the margin... !!! FIX !!!
1084 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1085 int i = firstPix; // Start counter at right spot...
1086
1087 while (iwidth--)
1088 {
1089 while (i++ < 64)
1090 {
1091 uint8_t bit = pixels >> 63;
1092 #ifndef OP_USES_PALETTE_ZERO
1093 if (flagTRANS && bit == 0)
1094 #else
1095 if (flagTRANS && (paletteRAM16[index | bit] == 0))
1096 #endif
1097 ; // Do nothing...
1098 else
1099 {
1100 if (!flagRMW)
1101 //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index...
1102 //Won't optimize RMW case though...
1103 // This is the *only* correct use of endian-dependent code
1104 // (i.e., mem-to-mem direct copying)!
1105 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bit];
1106 else
1107 *currentLineBuffer =
1108 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]),
1109 *(currentLineBuffer + 1) =
1110 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]);
1111 }
1112
1113 currentLineBuffer += lbufDelta;
1114 pixels <<= 1;
1115 }
1116 i = 0;
1117 // Fetch next phrase...
1118 data += pitch;
1119 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1120 }
1121 }
1122 else if (depth == 1) // 2 BPP
1123 {
1124 if (firstPix)
1125 WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1126 index &= 0xFC; // Top six bits form CLUT index
1127 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1128 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1129
1130 while (iwidth--)
1131 {
1132 // Fetch phrase...
1133 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1134 data += pitch;
1135
1136 for(int i=0; i<32; i++)
1137 {
1138 uint8_t bits = pixels >> 62;
1139 // Seems to me that both of these are in the same endian, so we could cast it as
1140 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1141 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1142 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1143 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1144 #ifndef OP_USES_PALETTE_ZERO
1145 if (flagTRANS && bits == 0)
1146 #else
1147 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1148 #endif
1149 ; // Do nothing...
1150 else
1151 {
1152 if (!flagRMW)
1153 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1154 else
1155 *currentLineBuffer =
1156 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1157 *(currentLineBuffer + 1) =
1158 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1159 }
1160
1161 currentLineBuffer += lbufDelta;
1162 pixels <<= 2;
1163 }
1164 }
1165 }
1166 else if (depth == 2) // 4 BPP
1167 {
1168 if (firstPix)
1169 WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1170 index &= 0xF0; // Top four bits form CLUT index
1171 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1172 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1173
1174 while (iwidth--)
1175 {
1176 // Fetch phrase...
1177 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1178 data += pitch;
1179
1180 for(int i=0; i<16; i++)
1181 {
1182 uint8_t bits = pixels >> 60;
1183 // Seems to me that both of these are in the same endian, so we could cast it as
1184 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1185 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1186 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1187 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1188 #ifndef OP_USES_PALETTE_ZERO
1189 if (flagTRANS && bits == 0)
1190 #else
1191 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1192 #endif
1193 ; // Do nothing...
1194 else
1195 {
1196 if (!flagRMW)
1197 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1198 else
1199 *currentLineBuffer =
1200 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1201 *(currentLineBuffer + 1) =
1202 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1203 }
1204
1205 currentLineBuffer += lbufDelta;
1206 pixels <<= 4;
1207 }
1208 }
1209 }
1210 else if (depth == 3) // 8 BPP
1211 {
1212 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1213 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1214
1215 // Fetch 1st phrase...
1216 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1217 //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap
1218 //i.e., we didn't clip on the margin... !!! FIX !!!
1219 firstPix &= 0x30; // Only top two bits are valid for 8 BPP
1220 pixels <<= firstPix; // Skip first N pixels (N=firstPix)...
1221 int i = firstPix >> 3; // Start counter at right spot...
1222
1223 while (iwidth--)
1224 {
1225 while (i++ < 8)
1226 {
1227 uint8_t bits = pixels >> 56;
1228 // Seems to me that both of these are in the same endian, so we could cast it as
1229 // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...)
1230 // This only works for the palettized modes (1 - 8 BPP), since we actually have to
1231 // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?)
1232 // No, it isn't because we read the memory in an endian safe way--this *won't* work...
1233 //This would seem to be problematic...
1234 //Because it's the palette entry being zero that makes the pixel transparent...
1235 //Let's try it and see.
1236 #ifndef OP_USES_PALETTE_ZERO
1237 if (flagTRANS && bits == 0)
1238 #else
1239 if (flagTRANS && (paletteRAM16[bits] == 0))
1240 #endif
1241 ; // Do nothing...
1242 else
1243 {
1244 if (!flagRMW)
1245 *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1246 else
1247 *currentLineBuffer =
1248 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1249 *(currentLineBuffer + 1) =
1250 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1251 }
1252
1253 currentLineBuffer += lbufDelta;
1254 pixels <<= 8;
1255 }
1256 i = 0;
1257 // Fetch next phrase...
1258 data += pitch;
1259 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1260 }
1261 }
1262 else if (depth == 4) // 16 BPP
1263 {
1264 if (firstPix)
1265 WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1266 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1267 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1268
1269 while (iwidth--)
1270 {
1271 // Fetch phrase...
1272 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1273 data += pitch;
1274
1275 for(int i=0; i<4; i++)
1276 {
1277 uint8_t bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1278 // Seems to me that both of these are in the same endian, so we could cast it
1279 // as uint16_t * and do straight across copies (what about 24 bpp? Treat it
1280 // differently...) This only works for the palettized modes (1 - 8 BPP), since
1281 // we actually have to copy data from memory in 16 BPP mode (or does it? Isn't
1282 // this the same as the CLUT case?) No, it isn't because we read the memory in
1283 // an endian safe way--it *won't* work...
1284 //This doesn't seem right... Let's try the encoded black value ($8800):
1285 //Apparently, CRY 0 maps to $8800...
1286 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1287 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1288 ; // Do nothing...
1289 else
1290 {
1291 if (!flagRMW)
1292 *currentLineBuffer = bitsHi,
1293 *(currentLineBuffer + 1) = bitsLo;
1294 else
1295 *currentLineBuffer =
1296 BLEND_CR(*currentLineBuffer, bitsHi),
1297 *(currentLineBuffer + 1) =
1298 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1299 }
1300
1301 currentLineBuffer += lbufDelta;
1302 pixels <<= 16;
1303 }
1304 }
1305 }
1306 else if (depth == 5) // 24 BPP
1307 {
1308 //Looks like Iron Soldier is the only game that uses 24BPP mode...
1309 //There *might* be others...
1310 //WriteLog("OP: Writing 24 BPP bitmap!\n");
1311 if (firstPix)
1312 WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1313 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1314 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it.
1315 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 4) | 0x04;
1316
1317 while (iwidth--)
1318 {
1319 // Fetch phrase...
1320 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1321 data += pitch;
1322
1323 for(int i=0; i<2; i++)
1324 {
1325 // We don't use a 32-bit var here because of endian issues...!
1326 uint8_t bits3 = pixels >> 56, bits2 = pixels >> 48,
1327 bits1 = pixels >> 40, bits0 = pixels >> 32;
1328
1329 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1330 ; // Do nothing...
1331 else
1332 *currentLineBuffer = bits3,
1333 *(currentLineBuffer + 1) = bits2,
1334 *(currentLineBuffer + 2) = bits1,
1335 *(currentLineBuffer + 3) = bits0;
1336
1337 currentLineBuffer += lbufDelta;
1338 pixels <<= 32;
1339 }
1340 }
1341 }
1342 }
1343
1344
1345 //
1346 // Store scaled bitmap in line buffer
1347 //
1348 void OPProcessScaledBitmap(uint64_t p0, uint64_t p1, uint64_t p2, bool render)
1349 {
1350 // Need to make sure that when writing that it stays within the line buffer...
1351 // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM
1352 uint8_t depth = (p1 >> 12) & 0x07; // Color depth of image
1353 int32_t xpos = ((int16_t)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF
1354 uint32_t iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases*
1355 uint32_t data = (p0 >> 40) & 0xFFFFF8; // Pixel data address
1356 //#ifdef OP_DEBUG_BMP
1357 // Prolly should use this... Though not sure exactly how.
1358 //Use the upper bits as an offset into the phrase depending on the BPP. That's how!
1359 uint32_t firstPix = (p1 >> 49) & 0x3F;
1360 //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened???
1361 if (firstPix)
1362 WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n");
1363 //#endif
1364 // We can ignore the RELEASE (high order) bit for now--probably forever...!
1365 // uint8_t flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE
1366 //Optimize: break these out to their own BOOL values [DONE]
1367 uint8_t flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2)
1368 bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false),
1369 flagRMW = (flags & OPFLAG_RMW ? true : false),
1370 flagTRANS = (flags & OPFLAG_TRANS ? true : false);
1371 uint8_t index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp)
1372 uint32_t pitch = (p1 >> 15) & 0x07; // Phrase pitch
1373
1374 uint8_t * tomRam8 = TOMGetRamPointer();
1375 uint8_t * paletteRAM = &tomRam8[0x400];
1376 // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct
1377 // copies--NOT for use when using endian-corrected data (i.e., any of the
1378 // *ReadWord functions!)
1379 uint16_t * paletteRAM16 = (uint16_t *)paletteRAM;
1380
1381 uint16_t hscale = p2 & 0xFF;
1382 // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this.
1383 // Not sure why, but seems to be consistent with the vertical scaling now (and
1384 // it may turn out to be wrong!)...
1385 uint16_t horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable [It's not!]
1386 // uint8_t horizontalRemainder = 0; // Let's try zero! Seems to work! Yay! [No, it doesn't!]
1387 int32_t scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5;
1388 uint32_t scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5;
1389
1390 // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n",
1391 // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no"));
1392
1393 // Looks like an hscale of zero means don't draw!
1394 if (!render || iwidth == 0 || hscale == 0)
1395 return;
1396
1397 /*extern int start_logging;
1398 if (start_logging)
1399 WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n",
1400 iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/
1401 //#define OP_DEBUG_BMP
1402 //#ifdef OP_DEBUG_BMP
1403 // WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n",
1404 // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no"));
1405 //#endif
1406
1407 int32_t startPos = xpos, endPos = xpos +
1408 (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1));
1409 uint32_t clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;
1410 bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE
1411 // Not sure if this is Jaguar Two only location or what...
1412 // From the docs, it is... If we want to limit here we should think of something else.
1413 // int32_t limit = GET16(tom_ram_8, 0x0008); // LIMIT
1414 int32_t limit = 720;
1415 // int32_t lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit...
1416 int32_t lbufWidth = 719; // Zero based limit...
1417
1418 // If the image is completely to the left or right of the line buffer, then bail.
1419 //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE]
1420 //There are four possibilities:
1421 // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds.
1422 // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds.
1423 // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds.
1424 // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds.
1425 //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop,
1426 // numbers 1 & 3 are of concern.
1427 // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...!
1428 // if (rightMargin < 0 || leftMargin > lbufWidth)
1429
1430 // It might be easier to swap these (if REFLECTed) and just use XPOS down below...
1431 // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise.
1432 // Still have to be careful with the DATA and IWIDTH values though...
1433
1434 if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth))
1435 || (flagREFLECT && (startPos < 0 || endPos > lbufWidth)))
1436 return;
1437
1438 // Otherwise, find the clip limits and clip the phrase as well...
1439 // NOTE: I'm fudging here by letting the actual blit overstep the bounds of
1440 // the line buffer, but it shouldn't matter since there are two
1441 // unused line buffers below and nothing above and I'll at most write
1442 // 40 bytes outside the line buffer... I could use a fractional clip
1443 // begin/end value, but this makes the blit a *lot* more hairy. I
1444 // might fix this in the future if it becomes necessary. (JLH)
1445 // Probably wouldn't be *that* hairy. Just use a delta that tells the
1446 // inner loop which pixel in the phrase is being written, and quit
1447 // when either end of phrases is reached or line buffer extents are
1448 // surpassed.
1449
1450 //This stuff is probably wrong as well... !!! FIX !!!
1451 //The strange thing is that it seems to work, but that's no guarantee that it's
1452 //bulletproof!
1453 //Yup. Seems that JagMania doesn't work correctly with this...
1454 //Dunno if this is the problem, but Atari Karts is showing *some* of the road
1455 //now...
1456 //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the
1457 //problem lies elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases
1458 //seems to draw the ground a bit more accurately... Strange!
1459 //It's probably a case of the REFLECT flag being set and the background being
1460 //written from the right side of the screen...
1461 //But no, it isn't... At least if the diagnostics are telling the truth!
1462
1463 // NOTE: We're just using endPos to figure out how much, if any, to clip by.
1464 // ALSO: There may be another case where we start out of bounds and end out
1465 // of bounds...!
1466 // !!! FIX !!!
1467
1468 //There's a problem here with scaledPhrasePixels in that it can be forced to
1469 //zero when the scaling factor is small. So fix it already! !!! FIX !!!
1470 /*if (scaledPhrasePixels == 0)
1471 {
1472 WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n");
1473 DumpScaledObject(p0, p1, p2);
1474 }//*/
1475 //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p
1476
1477 //Try a simple example...
1478 // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10,
1479 // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ.
1480 // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1481 //
1482 // Normally, we would expect this in the line buffer:
1483 // ZZXXXXYYYYZZZZXXXXYYYYZZZZ...
1484 //
1485 // But instead we're getting:
1486 // XXXXYYYYZZZZXXXXYYYYZZZZ...
1487 //
1488 // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting
1489 // on negative boundary--or are we? Hmm...
1490 // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10
1491 //
1492 // Let's try a real world example:
1493 //
1494 //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14]
1495 //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14]
1496 //
1497 // Really, spp is 27.75 in the second case...
1498 // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the
1499 // start position (14 * 27.75), we get -6.5... NOT -17!
1500
1501 //Now it seems we're working OK, at least for the first case...
1502 uint32_t scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale;
1503
1504 if (startPos < 0) // Case #1: Begin out, end in, L to R
1505 {
1506 extern int start_logging;
1507 if (start_logging)
1508 WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos);
1509 // clippedWidth = 0 - startPos,
1510 clippedWidth = (0 - startPos) << 5,
1511 // dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1512 dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5,
1513 // startPos = 0 - (clippedWidth % scaledPhrasePixels);
1514 startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5;
1515 if (start_logging)
1516 WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth);
1517 }
1518
1519 if (endPos < 0) // Case #2: Begin in, end out, R to L
1520 clippedWidth = 0 - endPos,
1521 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1522
1523 if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R
1524 clippedWidth = endPos - lbufWidth,
1525 phraseClippedWidth = clippedWidth / scaledPhrasePixels;
1526
1527 if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L
1528 clippedWidth = startPos - lbufWidth,
1529 dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels,
1530 startPos = lbufWidth + (clippedWidth % scaledPhrasePixels);
1531
1532 extern int op_start_log;
1533 if (op_start_log && clippedWidth != 0)
1534 WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale);
1535 if (op_start_log && startPos == 13)
1536 {
1537 WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix);
1538 DumpScaledObject(p0, p1, p2);
1539 if (iwidth == 7)
1540 {
1541 WriteLog(" %08X: ", data);
1542 for(int i=0; i<7*8; i++)
1543 WriteLog("%02X ", JaguarReadByte(data+i));
1544 WriteLog("\n");
1545 }
1546 }
1547 // If the image is sitting on the line buffer left or right edge, we need to compensate
1548 // by decreasing the image phrase width accordingly.
1549 iwidth -= phraseClippedWidth;
1550
1551 // Also, if we're clipping the phrase we need to make sure we're in the correct part of
1552 // the pixel data.
1553 // data += phraseClippedWidth * (pitch << 3);
1554 data += dataClippedWidth * (pitch << 3);
1555
1556 // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the
1557 // bitmap! This makes clipping & etc. MUCH, much easier...!
1558 // uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4);
1559 // uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4);
1560 uint32_t lbufAddress = 0x1800 + startPos * 2;
1561 uint8_t * currentLineBuffer = &tomRam8[lbufAddress];
1562 //uint8_t * lineBufferLowerLimit = &tom_ram_8[0x1800],
1563 // * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719];
1564
1565 // Render.
1566
1567 // Hmm. We check above for 24 BPP mode, but don't do anything about it below...
1568 // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me
1569 // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps
1570 // anyway.
1571 // This seems to be the case (at least according to the Midsummer docs)...!
1572
1573 if (depth == 0) // 1 BPP
1574 {
1575 if (firstPix != 0)
1576 WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n");
1577 // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1578 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1579
1580 int pixCount = 0;
1581 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1582
1583 while ((int32_t)iwidth > 0)
1584 {
1585 uint8_t bits = pixels >> 63;
1586
1587 #ifndef OP_USES_PALETTE_ZERO
1588 if (flagTRANS && bits == 0)
1589 #else
1590 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1591 #endif
1592 ; // Do nothing...
1593 else
1594 {
1595 if (!flagRMW)
1596 // This is the *only* correct use of endian-dependent code
1597 // (i.e., mem-to-mem direct copying)!
1598 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1599 else
1600 *currentLineBuffer =
1601 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1602 *(currentLineBuffer + 1) =
1603 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1604 }
1605
1606 currentLineBuffer += lbufDelta;
1607
1608 /*
1609 The reason we subtract the horizontalRemainder *after* the test is because we had too few
1610 bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits
1611 wide, so we could probably go back to that (as long as we make it an int16_t and not a uint16!)
1612 */
1613 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1614 while (horizontalRemainder & 0x80)
1615 {
1616 horizontalRemainder += hscale;
1617 pixCount++;
1618 pixels <<= 1;
1619 }//*/
1620 // while (horizontalRemainder <= 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1621 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1622 {
1623 horizontalRemainder += hscale;
1624 pixCount++;
1625 pixels <<= 1;
1626 }
1627 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1628
1629 if (pixCount > 63)
1630 {
1631 int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64;
1632
1633 data += (pitch << 3) * phrasesToSkip;
1634 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1635 pixels <<= 1 * pixelShift;
1636 iwidth -= phrasesToSkip;
1637 pixCount = pixelShift;
1638 }
1639 }
1640 }
1641 else if (depth == 1) // 2 BPP
1642 {
1643 if (firstPix != 0)
1644 WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n");
1645 index &= 0xFC; // Top six bits form CLUT index
1646 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1647 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1648
1649 int pixCount = 0;
1650 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1651
1652 while ((int32_t)iwidth > 0)
1653 {
1654 uint8_t bits = pixels >> 62;
1655
1656 #ifndef OP_USES_PALETTE_ZERO
1657 if (flagTRANS && bits == 0)
1658 #else
1659 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1660 #endif
1661 ; // Do nothing...
1662 else
1663 {
1664 if (!flagRMW)
1665 // This is the *only* correct use of endian-dependent code
1666 // (i.e., mem-to-mem direct copying)!
1667 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1668 else
1669 *currentLineBuffer =
1670 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1671 *(currentLineBuffer + 1) =
1672 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1673 }
1674
1675 currentLineBuffer += lbufDelta;
1676
1677 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1678 while (horizontalRemainder & 0x80)
1679 {
1680 horizontalRemainder += hscale;
1681 pixCount++;
1682 pixels <<= 2;
1683 }//*/
1684 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1685 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1686 {
1687 horizontalRemainder += hscale;
1688 pixCount++;
1689 pixels <<= 2;
1690 }
1691 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1692
1693 if (pixCount > 31)
1694 {
1695 int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32;
1696
1697 data += (pitch << 3) * phrasesToSkip;
1698 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1699 pixels <<= 2 * pixelShift;
1700 iwidth -= phrasesToSkip;
1701 pixCount = pixelShift;
1702 }
1703 }
1704 }
1705 else if (depth == 2) // 4 BPP
1706 {
1707 if (firstPix != 0)
1708 WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n");
1709 index &= 0xF0; // Top four bits form CLUT index
1710 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1711 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1712
1713 int pixCount = 0;
1714 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1715
1716 while ((int32_t)iwidth > 0)
1717 {
1718 uint8_t bits = pixels >> 60;
1719
1720 #ifndef OP_USES_PALETTE_ZERO
1721 if (flagTRANS && bits == 0)
1722 #else
1723 if (flagTRANS && (paletteRAM16[index | bits] == 0))
1724 #endif
1725 ; // Do nothing...
1726 else
1727 {
1728 if (!flagRMW)
1729 // This is the *only* correct use of endian-dependent code
1730 // (i.e., mem-to-mem direct copying)!
1731 *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits];
1732 else
1733 *currentLineBuffer =
1734 BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]),
1735 *(currentLineBuffer + 1) =
1736 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]);
1737 }
1738
1739 currentLineBuffer += lbufDelta;
1740
1741 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1742 while (horizontalRemainder & 0x80)
1743 {
1744 horizontalRemainder += hscale;
1745 pixCount++;
1746 pixels <<= 4;
1747 }//*/
1748 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1749 while (horizontalRemainder < 0x20) // I.e., it's <= 0 (*before* subtraction)
1750 {
1751 horizontalRemainder += hscale;
1752 pixCount++;
1753 pixels <<= 4;
1754 }
1755 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1756
1757 if (pixCount > 15)
1758 {
1759 int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16;
1760
1761 data += (pitch << 3) * phrasesToSkip;
1762 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1763 pixels <<= 4 * pixelShift;
1764 iwidth -= phrasesToSkip;
1765 pixCount = pixelShift;
1766 }
1767 }
1768 }
1769 else if (depth == 3) // 8 BPP
1770 {
1771 if (firstPix)
1772 WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix);
1773 // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it.
1774 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1775
1776 int pixCount = 0;
1777 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1778
1779 while ((int32_t)iwidth > 0)
1780 {
1781 uint8_t bits = pixels >> 56;
1782
1783 #ifndef OP_USES_PALETTE_ZERO
1784 if (flagTRANS && bits == 0)
1785 #else
1786 if (flagTRANS && (paletteRAM16[bits] == 0))
1787 #endif
1788 ; // Do nothing...
1789 else
1790 {
1791 if (!flagRMW)
1792 // This is the *only* correct use of endian-dependent code
1793 // (i.e., mem-to-mem direct copying)!
1794 *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1795 /* {
1796 if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit)
1797 *(uint16_t *)currentLineBuffer = paletteRAM16[bits];
1798 }*/
1799 else
1800 *currentLineBuffer =
1801 BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]),
1802 *(currentLineBuffer + 1) =
1803 BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]);
1804 }
1805
1806 currentLineBuffer += lbufDelta;
1807
1808 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1809 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1810 {
1811 horizontalRemainder += hscale;
1812 pixCount++;
1813 pixels <<= 8;
1814 }
1815 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1816
1817 if (pixCount > 7)
1818 {
1819 int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8;
1820
1821 data += (pitch << 3) * phrasesToSkip;
1822 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1823 pixels <<= 8 * pixelShift;
1824 iwidth -= phrasesToSkip;
1825 pixCount = pixelShift;
1826 }
1827 }
1828 }
1829 else if (depth == 4) // 16 BPP
1830 {
1831 if (firstPix != 0)
1832 WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n");
1833 // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it.
1834 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02;
1835
1836 int pixCount = 0;
1837 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1838
1839 while ((int32_t)iwidth > 0)
1840 {
1841 uint8_t bitsHi = pixels >> 56, bitsLo = pixels >> 48;
1842
1843 //This doesn't seem right... Let's try the encoded black value ($8800):
1844 //Apparently, CRY 0 maps to $8800...
1845 if (flagTRANS && ((bitsLo | bitsHi) == 0))
1846 // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00))
1847 ; // Do nothing...
1848 else
1849 {
1850 if (!flagRMW)
1851 *currentLineBuffer = bitsHi,
1852 *(currentLineBuffer + 1) = bitsLo;
1853 else
1854 *currentLineBuffer =
1855 BLEND_CR(*currentLineBuffer, bitsHi),
1856 *(currentLineBuffer + 1) =
1857 BLEND_Y(*(currentLineBuffer + 1), bitsLo);
1858 }
1859
1860 currentLineBuffer += lbufDelta;
1861
1862 /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1863 while (horizontalRemainder & 0x80)
1864 {
1865 horizontalRemainder += hscale;
1866 pixCount++;
1867 pixels <<= 16;
1868 }//*/
1869 // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction)
1870 while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction)
1871 {
1872 horizontalRemainder += hscale;
1873 pixCount++;
1874 pixels <<= 16;
1875 }
1876 horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format
1877 //*/
1878 if (pixCount > 3)
1879 {
1880 int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4;
1881
1882 data += (pitch << 3) * phrasesToSkip;
1883 pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1884 pixels <<= 16 * pixelShift;
1885
1886 iwidth -= phrasesToSkip;
1887
1888 pixCount = pixelShift;
1889 }
1890 }
1891 }
1892 else if (depth == 5) // 24 BPP
1893 {
1894 //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much.
1895 WriteLog("OP: Writing 24 BPP scaled bitmap!\n");
1896 if (firstPix != 0)
1897 WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n");
1898 // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode...
1899 // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it.
1900 int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 4) | 0x04;
1901
1902 while (iwidth--)
1903 {
1904 // Fetch phrase...
1905 uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP);
1906 data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value)
1907
1908 for(int i=0; i<2; i++)
1909 {
1910 uint8_t bits3 = pixels >> 56, bits2 = pixels >> 48,
1911 bits1 = pixels >> 40, bits0 = pixels >> 32;
1912
1913 if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0)
1914 ; // Do nothing...
1915 else
1916 *currentLineBuffer = bits3,
1917 *(currentLineBuffer + 1) = bits2,
1918 *(currentLineBuffer + 2) = bits1,
1919 *(currentLineBuffer + 3) = bits0;
1920
1921 currentLineBuffer += lbufDelta;
1922 pixels <<= 32;
1923 }
1924 }
1925 }
1926 }