Commit | Line | Data |
---|---|---|
cf76e892 JPM |
1 | // |
2 | // Object Processor | |
3 | // | |
4 | // Original source by David Raingeard (Cal2) | |
5 | // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS) | |
6 | // Extensive cleanups/fixes/rewrites by James Hammons | |
7 | // (C) 2010 Underground Software | |
8 | // | |
9 | // JLH = James Hammons <jlhamm@acm.org> | |
10 | // JPM = Jean-Paul Mari <djipi.mari@gmail.com> | |
11 | // | |
12 | // Who When What | |
13 | // --- ---------- ----------------------------------------------------------- | |
14 | // JLH 01/16/2010 Created this log ;-) | |
15 | // JPM 06/06/2016 Visual Studio support | |
16 | // | |
17 | ||
18 | #include "op.h" | |
19 | ||
20 | #include <stdlib.h> | |
21 | #include <string.h> | |
22 | #include "gpu.h" | |
23 | #include "jaguar.h" | |
24 | #include "log.h" | |
25 | #include "m68000/m68kinterface.h" | |
26 | #include "memory.h" | |
27 | #include "tom.h" | |
28 | ||
29 | //#define OP_DEBUG | |
30 | //#define OP_DEBUG_BMP | |
31 | ||
32 | #define BLEND_Y(dst, src) op_blend_y[(((uint16_t)dst<<8)) | ((uint16_t)(src))] | |
33 | #define BLEND_CR(dst, src) op_blend_cr[(((uint16_t)dst)<<8) | ((uint16_t)(src))] | |
34 | ||
35 | #define OBJECT_TYPE_BITMAP 0 // 000 | |
36 | #define OBJECT_TYPE_SCALE 1 // 001 | |
37 | #define OBJECT_TYPE_GPU 2 // 010 | |
38 | #define OBJECT_TYPE_BRANCH 3 // 011 | |
39 | #define OBJECT_TYPE_STOP 4 // 100 | |
40 | ||
41 | #define CONDITION_EQUAL 0 // VC == YPOS | |
42 | #define CONDITION_LESS_THAN 1 // VC < YPOS | |
43 | #define CONDITION_GREATER_THAN 2 // VC > YPOS | |
44 | #define CONDITION_OP_FLAG_SET 3 | |
45 | #define CONDITION_SECOND_HALF_LINE 4 | |
46 | ||
47 | #if 0 | |
48 | #define OPFLAG_RELEASE 8 // Bus release bit | |
49 | #define OPFLAG_TRANS 4 // Transparency bit | |
50 | #define OPFLAG_RMW 2 // Read-Modify-Write bit | |
51 | #define OPFLAG_REFLECT 1 // Horizontal mirror bit | |
52 | #endif | |
53 | ||
54 | // Private function prototypes | |
55 | ||
56 | void OPProcessFixedBitmap(uint64_t p0, uint64_t p1, bool render); | |
57 | void OPProcessScaledBitmap(uint64_t p0, uint64_t p1, uint64_t p2, bool render); | |
58 | void OPDiscoverObjects(uint32_t address); | |
59 | void OPDumpObjectList(void); | |
60 | void DumpScaledObject(uint64_t p0, uint64_t p1, uint64_t p2); | |
61 | void DumpFixedObject(uint64_t p0, uint64_t p1); | |
62 | void DumpBitmapCore(uint64_t p0, uint64_t p1); | |
63 | uint64_t OPLoadPhrase(uint32_t offset); | |
64 | ||
65 | // Local global variables | |
66 | ||
67 | // Blend tables (64K each) | |
68 | static uint8_t op_blend_y[0x10000]; | |
69 | static uint8_t op_blend_cr[0x10000]; | |
70 | // There may be a problem with this "RAM" overlapping (and thus being independent of) | |
71 | // some of the regular TOM RAM... | |
72 | //#warning objectp_ram is separated from TOM RAM--need to fix that! | |
73 | //static uint8_t objectp_ram[0x40]; // This is based at $F00000 | |
74 | uint8_t objectp_running = 0; | |
75 | //bool objectp_stop_reading_list; | |
76 | ||
77 | static uint8_t op_bitmap_bit_depth[8] = { 1, 2, 4, 8, 16, 24, 32, 0 }; | |
78 | //static uint32_t op_bitmap_bit_size[8] = | |
79 | // { (uint32_t)(0.125*65536), (uint32_t)(0.25*65536), (uint32_t)(0.5*65536), (uint32_t)(1*65536), | |
80 | // (uint32_t)(2*65536), (uint32_t)(1*65536), (uint32_t)(1*65536), (uint32_t)(1*65536) }; | |
81 | static uint32_t op_pointer; | |
82 | ||
83 | int32_t phraseWidthToPixels[8] = { 64, 32, 16, 8, 4, 2, 0, 0 }; | |
84 | ||
85 | ||
86 | // | |
87 | // Object Processor initialization | |
88 | // | |
89 | void OPInit(void) | |
90 | { | |
91 | // Here we calculate the saturating blend of a signed 4-bit value and an | |
92 | // existing Cyan/Red value as well as a signed 8-bit value and an existing intensity... | |
93 | // Note: CRY is 4 bits Cyan, 4 bits Red, 16 bits intensitY | |
94 | for(int i=0; i<256*256; i++) | |
95 | { | |
96 | int y = (i >> 8) & 0xFF; | |
97 | int dy = (int8_t)i; // Sign extend the Y index | |
98 | int c1 = (i >> 8) & 0x0F; | |
99 | int dc1 = (int8_t)(i << 4) >> 4; // Sign extend the R index | |
100 | int c2 = (i >> 12) & 0x0F; | |
101 | int dc2 = (int8_t)(i & 0xF0) >> 4; // Sign extend the C index | |
102 | ||
103 | y += dy; | |
104 | ||
105 | if (y < 0) | |
106 | y = 0; | |
107 | else if (y > 0xFF) | |
108 | y = 0xFF; | |
109 | ||
110 | op_blend_y[i] = y; | |
111 | ||
112 | c1 += dc1; | |
113 | ||
114 | if (c1 < 0) | |
115 | c1 = 0; | |
116 | else if (c1 > 0x0F) | |
117 | c1 = 0x0F; | |
118 | ||
119 | c2 += dc2; | |
120 | ||
121 | if (c2 < 0) | |
122 | c2 = 0; | |
123 | else if (c2 > 0x0F) | |
124 | c2 = 0x0F; | |
125 | ||
126 | op_blend_cr[i] = (c2 << 4) | c1; | |
127 | } | |
128 | ||
129 | OPReset(); | |
130 | } | |
131 | ||
132 | ||
133 | // | |
134 | // Object Processor reset | |
135 | // | |
136 | void OPReset(void) | |
137 | { | |
138 | // memset(objectp_ram, 0x00, 0x40); | |
139 | objectp_running = 0; | |
140 | } | |
141 | ||
142 | ||
143 | static const char * opType[8] = | |
144 | { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" }; | |
145 | static const char * ccType[8] = | |
146 | { "==", "<", ">", "(opflag set)", "(second half line)", "?", "?", "?" }; | |
147 | static uint32_t object[8192]; | |
148 | static uint32_t numberOfObjects; | |
149 | //static uint32_t objectLink[8192]; | |
150 | //static uint32_t numberOfLinks; | |
151 | ||
152 | ||
153 | void OPDone(void) | |
154 | { | |
155 | //#warning "!!! Fix OL dump so that it follows links !!!" | |
156 | // const char * opType[8] = | |
157 | // { "(BITMAP)", "(SCALED BITMAP)", "(GPU INT)", "(BRANCH)", "(STOP)", "???", "???", "???" }; | |
158 | // const char * ccType[8] = | |
159 | // { "\"==\"", "\"<\"", "\">\"", "(opflag set)", "(second half line)", "?", "?", "?" }; | |
160 | ||
161 | uint32_t olp = OPGetListPointer(); | |
162 | WriteLog("\nOP: OLP = $%08X\n", olp); | |
163 | WriteLog("OP: Phrase dump\n ----------\n"); | |
164 | ||
165 | #if 0 | |
166 | for(uint32_t i=0; i<0x100; i+=8) | |
167 | { | |
168 | uint32_t hi = JaguarReadLong(olp + i, OP), lo = JaguarReadLong(olp + i + 4, OP); | |
169 | WriteLog("\t%08X: %08X %08X %s", olp + i, hi, lo, opType[lo & 0x07]); | |
170 | ||
171 | if ((lo & 0x07) == 3) | |
172 | { | |
173 | uint16_t ypos = (lo >> 3) & 0x7FF; | |
174 | uint8_t cc = (lo >> 14) & 0x03; | |
175 | uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8; | |
176 | WriteLog(" YPOS=%u, CC=%s, link=%08X", ypos, ccType[cc], link); | |
177 | } | |
178 | ||
179 | WriteLog("\n"); | |
180 | ||
181 | if ((lo & 0x07) == 0) | |
182 | DumpFixedObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8)); | |
183 | ||
184 | if ((lo & 0x07) == 1) | |
185 | DumpScaledObject(OPLoadPhrase(olp+i), OPLoadPhrase(olp+i+8), OPLoadPhrase(olp+i+16)); | |
186 | } | |
187 | ||
188 | WriteLog("\n"); | |
189 | #else | |
190 | //#warning "!!! Fix lockup in OPDiscoverObjects() !!!" | |
191 | //temp, to keep the following function from locking up on bad/weird OLs | |
192 | //return; | |
193 | ||
194 | numberOfObjects = 0; | |
195 | OPDiscoverObjects(olp); | |
196 | OPDumpObjectList(); | |
197 | #endif | |
198 | } | |
199 | ||
200 | ||
201 | bool OPObjectExists(uint32_t address) | |
202 | { | |
203 | // Yes, we really do a linear search, every time. :-/ | |
204 | for(uint32_t i=0; i<numberOfObjects; i++) | |
205 | { | |
206 | if (address == object[i]) | |
207 | return true; | |
208 | } | |
209 | ||
210 | return false; | |
211 | } | |
212 | ||
213 | ||
214 | void OPDiscoverObjects(uint32_t address) | |
215 | { | |
216 | uint8_t objectType = 0; | |
217 | ||
218 | do | |
219 | { | |
220 | // If we've seen this object already, bail out! | |
221 | // Otherwise, add it to the list | |
222 | if (OPObjectExists(address)) | |
223 | return; | |
224 | ||
225 | object[numberOfObjects++] = address; | |
226 | ||
227 | // Get the object & decode its type, link address | |
228 | uint32_t hi = JaguarReadLong(address + 0, OP); | |
229 | uint32_t lo = JaguarReadLong(address + 4, OP); | |
230 | objectType = lo & 0x07; | |
231 | uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8; | |
232 | ||
233 | if (objectType == 3) | |
234 | { | |
235 | // Branch if YPOS < 2047 (or YPOS > 0) can be treated as a GOTO, so | |
236 | // don't do any discovery in that case. Otherwise, have at it: | |
237 | if (((lo & 0xFFFF) != 0x7FFB) && ((lo & 0xFFFF) != 0x8003)) | |
238 | // Recursion needed to follow all links! This does depth-first | |
239 | // recursion on the not-taken objects | |
240 | OPDiscoverObjects(address + 8); | |
241 | } | |
242 | ||
243 | // Get the next object... | |
244 | address = link; | |
245 | } | |
246 | while (objectType != 4); | |
247 | } | |
248 | ||
249 | ||
250 | void OPDumpObjectList(void) | |
251 | { | |
252 | for(uint32_t i=0; i<numberOfObjects; i++) | |
253 | { | |
254 | uint32_t address = object[i]; | |
255 | ||
256 | uint32_t hi = JaguarReadLong(address + 0, OP); | |
257 | uint32_t lo = JaguarReadLong(address + 4, OP); | |
258 | uint8_t objectType = lo & 0x07; | |
259 | uint32_t link = ((hi << 11) | (lo >> 21)) & 0x3FFFF8; | |
260 | WriteLog("%08X: %08X %08X %s -> $%08X", address, hi, lo, opType[objectType], link); | |
261 | ||
262 | if (objectType == 3) | |
263 | { | |
264 | uint16_t ypos = (lo >> 3) & 0x7FF; | |
265 | uint8_t cc = (lo >> 14) & 0x07; // Proper # of bits == 3 | |
266 | WriteLog(" YPOS %s %u", ccType[cc], ypos); | |
267 | } | |
268 | ||
269 | WriteLog("\n"); | |
270 | ||
271 | // Yes, this is how the OP finds follow-on phrases for bitmap/scaled | |
272 | // bitmap objects...! | |
273 | if (objectType == 0) | |
274 | DumpFixedObject(OPLoadPhrase(address + 0), | |
275 | OPLoadPhrase(address | 0x08)); | |
276 | ||
277 | if (objectType == 1) | |
278 | DumpScaledObject(OPLoadPhrase(address + 0), | |
279 | OPLoadPhrase(address | 0x08), OPLoadPhrase(address | 0x10)); | |
280 | ||
281 | if (address == link) // Ruh roh... | |
282 | { | |
283 | // Runaway recursive link is bad! | |
284 | WriteLog("***** SELF REFERENTIAL LINK *****\n\n"); | |
285 | } | |
286 | } | |
287 | ||
288 | WriteLog("\n"); | |
289 | } | |
290 | ||
291 | ||
292 | // | |
293 | // Object Processor memory access | |
294 | // Memory range: F00010 - F00027 | |
295 | // | |
296 | // F00010-F00017 R xxxxxxxx xxxxxxxx OB - current object code from the graphics processor | |
297 | // F00020-F00023 W xxxxxxxx xxxxxxxx OLP - start of the object list | |
298 | // F00026 W -------- -------x OBF - object processor flag | |
299 | // | |
300 | ||
301 | #if 0 | |
302 | uint8_t OPReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/) | |
303 | { | |
304 | offset &= 0x3F; | |
305 | return objectp_ram[offset]; | |
306 | } | |
307 | ||
308 | uint16_t OPReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/) | |
309 | { | |
310 | offset &= 0x3F; | |
311 | return GET16(objectp_ram, offset); | |
312 | } | |
313 | ||
314 | void OPWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/) | |
315 | { | |
316 | offset &= 0x3F; | |
317 | objectp_ram[offset] = data; | |
318 | } | |
319 | ||
320 | void OPWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/) | |
321 | { | |
322 | offset &= 0x3F; | |
323 | SET16(objectp_ram, offset, data); | |
324 | ||
325 | /*if (offset == 0x20) | |
326 | WriteLog("OP: Setting lo list pointer: %04X\n", data); | |
327 | if (offset == 0x22) | |
328 | WriteLog("OP: Setting hi list pointer: %04X\n", data);//*/ | |
329 | } | |
330 | #endif | |
331 | ||
332 | ||
333 | uint32_t OPGetListPointer(void) | |
334 | { | |
335 | // Note: This register is LO / HI WORD, hence the funky look of this... | |
336 | return GET16(tomRam8, 0x20) | (GET16(tomRam8, 0x22) << 16); | |
337 | } | |
338 | ||
339 | ||
340 | // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED] | |
341 | ||
342 | uint32_t OPGetStatusRegister(void) | |
343 | { | |
344 | return GET16(tomRam8, 0x26); | |
345 | } | |
346 | ||
347 | ||
348 | // This is WRONG, since the OBF is only 16 bits wide!!! [FIXED] | |
349 | ||
350 | void OPSetStatusRegister(uint32_t data) | |
351 | { | |
352 | tomRam8[0x26] = (data & 0x0000FF00) >> 8; | |
353 | tomRam8[0x27] |= (data & 0xFE); | |
354 | } | |
355 | ||
356 | ||
357 | void OPSetCurrentObject(uint64_t object) | |
358 | { | |
359 | //Not sure this is right... Wouldn't it just be stored 64 bit BE? | |
360 | // Stored as least significant 32 bits first, ms32 last in big endian | |
361 | /* objectp_ram[0x13] = object & 0xFF; object >>= 8; | |
362 | objectp_ram[0x12] = object & 0xFF; object >>= 8; | |
363 | objectp_ram[0x11] = object & 0xFF; object >>= 8; | |
364 | objectp_ram[0x10] = object & 0xFF; object >>= 8; | |
365 | ||
366 | objectp_ram[0x17] = object & 0xFF; object >>= 8; | |
367 | objectp_ram[0x16] = object & 0xFF; object >>= 8; | |
368 | objectp_ram[0x15] = object & 0xFF; object >>= 8; | |
369 | objectp_ram[0x14] = object & 0xFF;*/ | |
370 | // Let's try regular good old big endian... | |
371 | tomRam8[0x17] = object & 0xFF; object >>= 8; | |
372 | tomRam8[0x16] = object & 0xFF; object >>= 8; | |
373 | tomRam8[0x15] = object & 0xFF; object >>= 8; | |
374 | tomRam8[0x14] = object & 0xFF; object >>= 8; | |
375 | ||
376 | tomRam8[0x13] = object & 0xFF; object >>= 8; | |
377 | tomRam8[0x12] = object & 0xFF; object >>= 8; | |
378 | tomRam8[0x11] = object & 0xFF; object >>= 8; | |
379 | tomRam8[0x10] = object & 0xFF; | |
380 | } | |
381 | ||
382 | ||
383 | uint64_t OPLoadPhrase(uint32_t offset) | |
384 | { | |
385 | offset &= ~0x07; // 8 byte alignment | |
386 | return ((uint64_t)JaguarReadLong(offset, OP) << 32) | (uint64_t)JaguarReadLong(offset+4, OP); | |
387 | } | |
388 | ||
389 | ||
390 | void OPStorePhrase(uint32_t offset, uint64_t p) | |
391 | { | |
392 | offset &= ~0x07; // 8 byte alignment | |
393 | JaguarWriteLong(offset, p >> 32, OP); | |
394 | JaguarWriteLong(offset + 4, p & 0xFFFFFFFF, OP); | |
395 | } | |
396 | ||
397 | ||
398 | // | |
399 | // Debugging routines | |
400 | // | |
401 | void DumpScaledObject(uint64_t p0, uint64_t p1, uint64_t p2) | |
402 | { | |
403 | WriteLog(" %08X %08X\n", (uint32_t)(p1>>32), (uint32_t)(p1&0xFFFFFFFF)); | |
404 | WriteLog(" %08X %08X\n", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF)); | |
405 | DumpBitmapCore(p0, p1); | |
406 | uint32_t hscale = p2 & 0xFF; | |
407 | uint32_t vscale = (p2 >> 8) & 0xFF; | |
408 | uint32_t remainder = (p2 >> 16) & 0xFF; | |
409 | WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder); | |
410 | } | |
411 | ||
412 | ||
413 | void DumpFixedObject(uint64_t p0, uint64_t p1) | |
414 | { | |
415 | WriteLog(" %08X %08X\n", (uint32_t)(p1>>32), (uint32_t)(p1&0xFFFFFFFF)); | |
416 | DumpBitmapCore(p0, p1); | |
417 | } | |
418 | ||
419 | ||
420 | void DumpBitmapCore(uint64_t p0, uint64_t p1) | |
421 | { | |
422 | uint32_t bdMultiplier[8] = { 64, 32, 16, 8, 4, 2, 1, 1 }; | |
423 | uint8_t bitdepth = (p1 >> 12) & 0x07; | |
424 | //WAS: int16_t ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)? | |
425 | int16_t ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)? | |
426 | int32_t xpos = p1 & 0xFFF; | |
427 | xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); // Sign extend that mutha! | |
428 | uint32_t iwidth = ((p1 >> 28) & 0x3FF); | |
429 | uint32_t dwidth = ((p1 >> 18) & 0x3FF); // Unsigned! | |
430 | uint16_t height = ((p0 >> 14) & 0x3FF); | |
431 | uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3; | |
432 | uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3; | |
433 | uint32_t firstPix = (p1 >> 49) & 0x3F; | |
434 | uint8_t flags = (p1 >> 45) & 0x0F; | |
435 | uint8_t idx = (p1 >> 38) & 0x7F; | |
436 | uint32_t pitch = (p1 >> 15) & 0x07; | |
437 | WriteLog(" [%u x %u @ (%i, %u) (iw:%u, dw:%u) (%u bpp), p:%08X fp:%02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n", | |
438 | iwidth * bdMultiplier[bitdepth], | |
439 | height, xpos, ypos, iwidth, dwidth, op_bitmap_bit_depth[bitdepth], | |
440 | ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), | |
441 | (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), | |
442 | (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch); | |
443 | } | |
444 | ||
445 | ||
446 | // | |
447 | // Object Processor main routine | |
448 | // | |
449 | #ifdef _MSC_VER | |
450 | #pragma message("Warning: Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!") | |
451 | #else | |
452 | #warning "Need to fix this so that when an GPU object IRQ happens, we can pick up OP processing where we left off. !!! FIX !!!" | |
453 | #endif // _MSC_VER | |
454 | void OPProcessList(int halfline, bool render) | |
455 | { | |
456 | #ifdef _MSC_VER | |
457 | #pragma message("Warning: !!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!") | |
458 | #else | |
459 | #warning "!!! NEED TO HANDLE MULTIPLE FIELDS PROPERLY !!!" | |
460 | #endif // _MSC_VER | |
461 | // We ignore them, for now; not good D-: | |
462 | // N.B.: Half-lines are exactly that, half-lines. When in interlaced mode, it | |
463 | // draws the screen exactly the same way as it does in non, one line at a | |
464 | // time. The only way you know you're in field #2 is that the topmost bit | |
465 | // of VC is set. Half-line mode is so you can draw higher horizontal | |
466 | // resolutions than you normally could, as the line buffer is only 720 | |
467 | // pixels wide... | |
468 | halfline &= 0x7FF; | |
469 | ||
470 | extern int op_start_log; | |
471 | ||
472 | op_pointer = OPGetListPointer(); | |
473 | ||
474 | // objectp_stop_reading_list = false; | |
475 | ||
476 | //WriteLog("OP: Processing line #%u (OLP=%08X)...\n", halfline, op_pointer); | |
477 | //op_done(); | |
478 | ||
479 | // *** BEGIN OP PROCESSOR TESTING ONLY *** | |
480 | extern bool interactiveMode; | |
481 | extern bool iToggle; | |
482 | extern int objectPtr; | |
483 | bool inhibit; | |
484 | int bitmapCounter = 0; | |
485 | // *** END OP PROCESSOR TESTING ONLY *** | |
486 | ||
487 | uint32_t opCyclesToRun = 30000; // This is a pulled-out-of-the-air value (will need to be fixed, obviously!) | |
488 | ||
489 | // if (op_pointer) WriteLog(" new op list at 0x%.8x halfline %i\n",op_pointer,halfline); | |
490 | while (op_pointer) | |
491 | { | |
492 | // *** BEGIN OP PROCESSOR TESTING ONLY *** | |
493 | if (interactiveMode && bitmapCounter == objectPtr) | |
494 | inhibit = iToggle; | |
495 | else | |
496 | inhibit = false; | |
497 | // *** END OP PROCESSOR TESTING ONLY *** | |
498 | // if (objectp_stop_reading_list) | |
499 | // return; | |
500 | ||
501 | uint64_t p0 = OPLoadPhrase(op_pointer); | |
502 | op_pointer += 8; | |
503 | //WriteLog("\t%08X type %i\n", op_pointer, (uint8_t)p0 & 0x07); | |
504 | ||
505 | #if 1 | |
506 | if (halfline == TOMGetVDB() && op_start_log) | |
507 | //if (halfline == 215 && op_start_log) | |
508 | //if (halfline == 28 && op_start_log) | |
509 | //if (halfline == 0) | |
510 | { | |
511 | WriteLog("%08X --> phrase %08X %08X", op_pointer - 8, (int)(p0>>32), (int)(p0&0xFFFFFFFF)); | |
512 | if ((p0 & 0x07) == OBJECT_TYPE_BITMAP) | |
513 | { | |
514 | WriteLog(" (BITMAP) "); | |
515 | uint64_t p1 = OPLoadPhrase(op_pointer); | |
516 | WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF)); | |
517 | uint8_t bitdepth = (p1 >> 12) & 0x07; | |
518 | //WAS: int16_t ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)? | |
519 | int16_t ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)? | |
520 | int32_t xpos = p1 & 0xFFF; | |
521 | xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); | |
522 | uint32_t iwidth = ((p1 >> 28) & 0x3FF); | |
523 | uint32_t dwidth = ((p1 >> 18) & 0x3FF); // Unsigned! | |
524 | uint16_t height = ((p0 >> 14) & 0x3FF); | |
525 | uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3; | |
526 | uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3; | |
527 | uint32_t firstPix = (p1 >> 49) & 0x3F; | |
528 | uint8_t flags = (p1 >> 45) & 0x0F; | |
529 | uint8_t idx = (p1 >> 38) & 0x7F; | |
530 | uint32_t pitch = (p1 >> 15) & 0x07; | |
531 | WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n", | |
532 | iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch); | |
533 | } | |
534 | if ((p0 & 0x07) == OBJECT_TYPE_SCALE) | |
535 | { | |
536 | WriteLog(" (SCALED BITMAP)"); | |
537 | uint64_t p1 = OPLoadPhrase(op_pointer), p2 = OPLoadPhrase(op_pointer+8); | |
538 | WriteLog("\n%08X --> phrase %08X %08X ", op_pointer, (int)(p1>>32), (int)(p1&0xFFFFFFFF)); | |
539 | WriteLog("\n%08X --> phrase %08X %08X ", op_pointer+8, (int)(p2>>32), (int)(p2&0xFFFFFFFF)); | |
540 | uint8_t bitdepth = (p1 >> 12) & 0x07; | |
541 | //WAS: int16_t ypos = ((p0 >> 3) & 0x3FF); // ??? What if not interlaced (/2)? | |
542 | int16_t ypos = ((p0 >> 3) & 0x7FF); // ??? What if not interlaced (/2)? | |
543 | int32_t xpos = p1 & 0xFFF; | |
544 | xpos = (xpos & 0x800 ? xpos | 0xFFFFF000 : xpos); | |
545 | uint32_t iwidth = ((p1 >> 28) & 0x3FF); | |
546 | uint32_t dwidth = ((p1 >> 18) & 0x3FF); // Unsigned! | |
547 | uint16_t height = ((p0 >> 14) & 0x3FF); | |
548 | uint32_t link = ((p0 >> 24) & 0x7FFFF) << 3; | |
549 | uint32_t ptr = ((p0 >> 43) & 0x1FFFFF) << 3; | |
550 | uint32_t firstPix = (p1 >> 49) & 0x3F; | |
551 | uint8_t flags = (p1 >> 45) & 0x0F; | |
552 | uint8_t idx = (p1 >> 38) & 0x7F; | |
553 | uint32_t pitch = (p1 >> 15) & 0x07; | |
554 | WriteLog("\n [%u (%u) x %u @ (%i, %u) (%u bpp), l: %08X, p: %08X fp: %02X, fl:%s%s%s%s, idx:%02X, pt:%02X]\n", | |
555 | iwidth, dwidth, height, xpos, ypos, op_bitmap_bit_depth[bitdepth], link, ptr, firstPix, (flags&OPFLAG_REFLECT ? "REFLECT " : ""), (flags&OPFLAG_RMW ? "RMW " : ""), (flags&OPFLAG_TRANS ? "TRANS " : ""), (flags&OPFLAG_RELEASE ? "RELEASE" : ""), idx, pitch); | |
556 | uint32_t hscale = p2 & 0xFF; | |
557 | uint32_t vscale = (p2 >> 8) & 0xFF; | |
558 | uint32_t remainder = (p2 >> 16) & 0xFF; | |
559 | WriteLog(" [hsc: %02X, vsc: %02X, rem: %02X]\n", hscale, vscale, remainder); | |
560 | } | |
561 | if ((p0 & 0x07) == OBJECT_TYPE_GPU) | |
562 | WriteLog(" (GPU)\n"); | |
563 | if ((p0 & 0x07) == OBJECT_TYPE_BRANCH) | |
564 | { | |
565 | WriteLog(" (BRANCH)\n"); | |
566 | uint8_t * jaguarMainRam = GetRamPtr(); | |
567 | WriteLog("[RAM] --> "); | |
568 | for(int k=0; k<8; k++) | |
569 | WriteLog("%02X ", jaguarMainRam[op_pointer-8 + k]); | |
570 | WriteLog("\n"); | |
571 | } | |
572 | if ((p0 & 0x07) == OBJECT_TYPE_STOP) | |
573 | WriteLog(" --> List end\n\n"); | |
574 | } | |
575 | #endif | |
576 | ||
577 | switch ((uint8_t)p0 & 0x07) | |
578 | { | |
579 | case OBJECT_TYPE_BITMAP: | |
580 | { | |
581 | uint16_t ypos = (p0 >> 3) & 0x7FF; | |
582 | // This is only theory implied by Rayman...! | |
583 | // It seems that if the YPOS is zero, then bump the YPOS value so that it | |
584 | // coincides with the VDB value. With interlacing, this would be slightly more | |
585 | // tricky. There's probably another bit somewhere that enables this mode--but | |
586 | // so far, doesn't seem to affect any other game in a negative way (that I've | |
587 | // seen). Either that, or it's an undocumented bug... | |
588 | ||
589 | //No, the reason this was needed is that the OP code before was wrong. Any value | |
590 | //less than VDB will get written to the top line of the display! | |
591 | #if 0 | |
592 | // Not so sure... Let's see what happens here... | |
593 | // No change... | |
594 | if (ypos == 0) | |
595 | ypos = TOMReadWord(0xF00046, OP) / 2; // Get the VDB value | |
596 | #endif | |
597 | // Actually, no. Any item less than VDB will get only the lines that hang over | |
598 | // VDB displayed. Actually, this is incorrect. It seems that VDB value is wrong | |
599 | // somewhere and that's what's causing things to fuck up. Still no idea why. | |
600 | ||
601 | uint32_t height = (p0 & 0xFFC000) >> 14; | |
602 | uint32_t oldOPP = op_pointer - 8; | |
603 | // *** BEGIN OP PROCESSOR TESTING ONLY *** | |
604 | if (inhibit && op_start_log) | |
605 | WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!!\n"); | |
606 | bitmapCounter++; | |
607 | if (!inhibit) // For OP testing only! | |
608 | // *** END OP PROCESSOR TESTING ONLY *** | |
609 | if (halfline >= ypos && height > 0) | |
610 | { | |
611 | // Believe it or not, this is what the OP actually does... | |
612 | // which is why they're required to be on a dphrase boundary! | |
613 | uint64_t p1 = OPLoadPhrase(oldOPP | 0x08); | |
614 | //unneeded op_pointer += 8; | |
615 | //WriteLog("OP: Writing halfline %d with ypos == %d...\n", halfline, ypos); | |
616 | //WriteLog("--> Writing %u BPP bitmap...\n", op_bitmap_bit_depth[(p1 >> 12) & 0x07]); | |
617 | // OPProcessFixedBitmap(halfline, p0, p1, render); | |
618 | OPProcessFixedBitmap(p0, p1, render); | |
619 | ||
620 | // OP write-backs | |
621 | ||
622 | height--; | |
623 | ||
624 | uint64_t data = (p0 & 0xFFFFF80000000000LL) >> 40; | |
625 | uint64_t dwidth = (p1 & 0xFFC0000) >> 15; | |
626 | data += dwidth; | |
627 | ||
628 | p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data... | |
629 | p0 |= (uint64_t)height << 14; | |
630 | p0 |= data << 40; | |
631 | OPStorePhrase(oldOPP, p0); | |
632 | } | |
633 | ||
634 | // OP bottom 3 bits are hardwired to zero. The link address | |
635 | // reflects this, so we only need the top 19 bits of the address | |
636 | // (which is why we only shift 21, and not 24). | |
637 | op_pointer = (p0 & 0x000007FFFF000000LL) >> 21; | |
638 | ||
639 | // KLUDGE: Seems that memory access is mirrored in the first 8MB of | |
640 | // memory... | |
641 | if (op_pointer > 0x1FFFFF && op_pointer < 0x800000) | |
642 | op_pointer &= 0xFF1FFFFF; // Knock out bits 21-23 | |
643 | ||
644 | break; | |
645 | } | |
646 | case OBJECT_TYPE_SCALE: | |
647 | { | |
648 | //WAS: uint16_t ypos = (p0 >> 3) & 0x3FF; | |
649 | uint16_t ypos = (p0 >> 3) & 0x7FF; | |
650 | uint32_t height = (p0 & 0xFFC000) >> 14; | |
651 | uint32_t oldOPP = op_pointer - 8; | |
652 | //WriteLog("OP: Scaled Object (ypos=%04X, height=%04X", ypos, height); | |
653 | // *** BEGIN OP PROCESSOR TESTING ONLY *** | |
654 | if (inhibit && op_start_log) | |
655 | { | |
656 | WriteLog("!!! ^^^ This object is INHIBITED! ^^^ !!! (halfline=%u, ypos=%u, height=%u)\n", halfline, ypos, height); | |
657 | DumpScaledObject(p0, OPLoadPhrase(op_pointer), OPLoadPhrase(op_pointer+8)); | |
658 | } | |
659 | bitmapCounter++; | |
660 | if (!inhibit) // For OP testing only! | |
661 | // *** END OP PROCESSOR TESTING ONLY *** | |
662 | if (halfline >= ypos && height > 0) | |
663 | { | |
664 | // Believe it or not, this is what the OP actually does... | |
665 | // which is why they're required to be on a qphrase boundary! | |
666 | uint64_t p1 = OPLoadPhrase(oldOPP | 0x08); | |
667 | uint64_t p2 = OPLoadPhrase(oldOPP | 0x10); | |
668 | //unneeded op_pointer += 16; | |
669 | OPProcessScaledBitmap(p0, p1, p2, render); | |
670 | ||
671 | // OP write-backs | |
672 | ||
673 | uint16_t remainder = (p2 >> 16) & 0xFF;//, vscale = p2 >> 8; | |
674 | uint8_t /*remainder = p2 >> 16,*/ vscale = p2 >> 8; | |
675 | //Actually, we should skip this object if it has a vscale of zero. | |
676 | //Or do we? Not sure... Atari Karts has a few lines that look like: | |
677 | // (SCALED BITMAP) | |
678 | //000E8268 --> phrase 00010000 7000B00D | |
679 | // [7 (0) x 1 @ (13, 0) (8 bpp), l: 000E82A0, p: 000E0FC0 fp: 00, fl:RELEASE, idx:00, pt:01] | |
680 | // [hsc: 9A, vsc: 00, rem: 00] | |
681 | // Could it be the vscale is overridden if the DWIDTH is zero? Hmm... | |
682 | //WriteLog("OP: Scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/ | |
683 | ||
684 | if (vscale == 0) | |
685 | vscale = 0x20; // OP bug??? Nope, it isn't...! Or is it? | |
686 | ||
687 | //extern int start_logging; | |
688 | //if (start_logging) | |
689 | // WriteLog("--> Returned from scaled bitmap processing (rem=%02X, vscale=%02X)...\n", remainder, vscale);//*/ | |
690 | //Locks up here: | |
691 | //--> Returned from scaled bitmap processing (rem=20, vscale=80)... | |
692 | //There are other problems here, it looks like... | |
693 | //Another lock up: | |
694 | //About to execute OP (508)... | |
695 | /* | |
696 | OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no | |
697 | --> Returned from scaled bitmap processing (rem=50, vscale=7C)... | |
698 | OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no | |
699 | --> Returned from scaled bitmap processing (rem=30, vscale=7C)... | |
700 | OP: Scaled bitmap 4x? 4bpp at 38,? hscale=7C fpix=0 data=00075E28 pitch 1 hflipped=no dwidth=? (linked to 00071118) Transluency=no | |
701 | --> Returned from scaled bitmap processing (rem=10, vscale=7C)... | |
702 | OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756A8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no | |
703 | --> Returned from scaled bitmap processing (rem=00, vscale=7E)... | |
704 | OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no | |
705 | --> Returned from scaled bitmap processing (rem=00, vscale=80)... | |
706 | OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no | |
707 | --> Returned from scaled bitmap processing (rem=5E, vscale=7E)... | |
708 | OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no | |
709 | --> Returned from scaled bitmap processing (rem=60, vscale=80)... | |
710 | OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no | |
711 | --> Returned from scaled bitmap processing (rem=3E, vscale=7E)... | |
712 | OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no | |
713 | --> Returned from scaled bitmap processing (rem=40, vscale=80)... | |
714 | OP: Scaled bitmap 4x? 4bpp at 36,? hscale=7E fpix=0 data=000756C8 pitch 1 hflipped=no dwidth=? (linked to 00073058) Transluency=no | |
715 | --> Returned from scaled bitmap processing (rem=1E, vscale=7E)... | |
716 | OP: Scaled bitmap 4x? 4bpp at 34,? hscale=80 fpix=0 data=000756E8 pitch 1 hflipped=no dwidth=? (linked to 00073078) Transluency=no | |
717 | --> Returned from scaled bitmap processing (rem=20, vscale=80)... | |
718 | */ | |
719 | //Here's another problem: | |
720 | // [hsc: 20, vsc: 20, rem: 00] | |
721 | // Since we're not checking for $E0 (but that's what we get from the above), we | |
722 | // end up repeating this halfline unnecessarily... !!! FIX !!! [DONE, but... | |
723 | // still not quite right. Either that, or the Accolade team that wrote Bubsy | |
724 | // screwed up royal.] | |
725 | //Also note: $E0 = 7.0 which IS a legal vscale value... | |
726 | ||
727 | // if (remainder & 0x80) // I.e., it's negative | |
728 | // if ((remainder & 0x80) || remainder == 0) // I.e., it's <= 0 | |
729 | // if ((remainder - 1) >= 0xE0) // I.e., it's <= 0 | |
730 | // if ((remainder >= 0xE1) || remainder == 0)// I.e., it's <= 0 | |
731 | // if ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0)// I.e., it's <= 0 | |
732 | // if (remainder <= 0x20) // I.e., it's <= 1.0 | |
733 | // I.e., it's < 1.0f -> means it'll go negative when we subtract 1.0f. | |
734 | if (remainder < 0x20) | |
735 | { | |
736 | uint64_t data = (p0 & 0xFFFFF80000000000LL) >> 40; | |
737 | uint64_t dwidth = (p1 & 0xFFC0000) >> 15; | |
738 | ||
739 | // while (remainder & 0x80) | |
740 | // while ((remainder & 0x80) || remainder == 0) | |
741 | // while ((remainder - 1) >= 0xE0) | |
742 | // while ((remainder >= 0xE1) || remainder == 0) | |
743 | // while ((remainder >= 0xE1 && remainder <= 0xFF) || remainder == 0) | |
744 | // while (remainder <= 0x20) | |
745 | while (remainder < 0x20) | |
746 | { | |
747 | remainder += vscale; | |
748 | ||
749 | if (height) | |
750 | height--; | |
751 | ||
752 | data += dwidth; | |
753 | } | |
754 | ||
755 | p0 &= ~0xFFFFF80000FFC000LL; // Mask out old data... | |
756 | p0 |= (uint64_t)height << 14; | |
757 | p0 |= data << 40; | |
758 | OPStorePhrase(oldOPP, p0); | |
759 | } | |
760 | ||
761 | remainder -= 0x20; // 1.0f in [3.5] fixed point format | |
762 | ||
763 | //if (start_logging) | |
764 | // WriteLog("--> Finished writebacks...\n");//*/ | |
765 | ||
766 | //WriteLog(" [%08X%08X -> ", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF)); | |
767 | p2 &= ~0x0000000000FF0000LL; | |
768 | p2 |= (uint64_t)remainder << 16; | |
769 | //WriteLog("%08X%08X]\n", (uint32_t)(p2>>32), (uint32_t)(p2&0xFFFFFFFF)); | |
770 | OPStorePhrase(oldOPP + 16, p2); | |
771 | //remainder = (uint8_t)(p2 >> 16), vscale = (uint8_t)(p2 >> 8); | |
772 | //WriteLog(" [after]: rem=%02X, vscale=%02X\n", remainder, vscale); | |
773 | } | |
774 | ||
775 | // OP bottom 3 bits are hardwired to zero. The link address | |
776 | // reflects this, so we only need the top 19 bits of the address | |
777 | // (which is why we only shift 21, and not 24). | |
778 | op_pointer = (p0 & 0x000007FFFF000000LL) >> 21; | |
779 | ||
780 | // KLUDGE: Seems that memory access is mirrored in the first 8MB of | |
781 | // memory... | |
782 | if (op_pointer > 0x1FFFFF && op_pointer < 0x800000) | |
783 | op_pointer &= 0xFF1FFFFF; // Knock out bits 21-23 | |
784 | ||
785 | break; | |
786 | } | |
787 | case OBJECT_TYPE_GPU: | |
788 | { | |
789 | //WriteLog("OP: Asserting GPU IRQ #3...\n"); | |
790 | #ifdef _MSC_VER | |
791 | #pragma message("Warning: Need to fix OP GPU IRQ handling! !!! FIX !!!") | |
792 | #else | |
793 | #warning "Need to fix OP GPU IRQ handling! !!! FIX !!!" | |
794 | #endif // _MSC_VER | |
795 | OPSetCurrentObject(p0); | |
796 | GPUSetIRQLine(3, ASSERT_LINE); | |
797 | //Also, OP processing is suspended from this point until OBF (F00026) is written to... | |
798 | // !!! FIX !!! | |
799 | //Do something like: | |
800 | //OPSuspendedByGPU = true; | |
801 | //Dunno if the OP keeps processing from where it was interrupted, or if it just continues | |
802 | //on the next halfline... | |
803 | // --> It continues from where it was interrupted! !!! FIX !!! | |
804 | break; | |
805 | } | |
806 | case OBJECT_TYPE_BRANCH: | |
807 | { | |
808 | uint16_t ypos = (p0 >> 3) & 0x7FF; | |
809 | // JTRM is wrong: CC is bits 14-16 (3 bits, *not* 2) | |
810 | uint8_t cc = (p0 >> 14) & 0x07; | |
811 | uint32_t link = (p0 >> 21) & 0x3FFFF8; | |
812 | ||
813 | switch (cc) | |
814 | { | |
815 | case CONDITION_EQUAL: | |
816 | if (halfline == ypos || ypos == 0x7FF) | |
817 | op_pointer = link; | |
818 | break; | |
819 | case CONDITION_LESS_THAN: | |
820 | if (halfline < ypos) | |
821 | op_pointer = link; | |
822 | break; | |
823 | case CONDITION_GREATER_THAN: | |
824 | if (halfline > ypos) | |
825 | op_pointer = link; | |
826 | break; | |
827 | case CONDITION_OP_FLAG_SET: | |
828 | if (OPGetStatusRegister() & 0x01) | |
829 | op_pointer = link; | |
830 | break; | |
831 | case CONDITION_SECOND_HALF_LINE: | |
832 | // Branch if bit 10 of HC is set... | |
833 | if (TOMGetHC() & 0x0400) | |
834 | op_pointer = link; | |
835 | break; | |
836 | default: | |
837 | // Basically, if you do this, the OP does nothing. :-) | |
838 | WriteLog("OP: Unimplemented branch condition %i\n", cc); | |
839 | } | |
840 | break; | |
841 | } | |
842 | case OBJECT_TYPE_STOP: | |
843 | { | |
844 | OPSetCurrentObject(p0); | |
845 | ||
846 | if ((p0 & 0x08) && TOMIRQEnabled(IRQ_OPFLAG)) | |
847 | { | |
848 | TOMSetPendingObjectInt(); | |
849 | m68k_set_irq(2); // Cause a 68K IPL 2 to occur... | |
850 | } | |
851 | ||
852 | // Bail out, we're done... | |
853 | return; | |
854 | } | |
855 | default: | |
856 | WriteLog("OP: Unknown object type %i\n", (uint8_t)p0 & 0x07); | |
857 | } | |
858 | ||
859 | // Here is a little sanity check to keep the OP from locking up the | |
860 | // machine when fed bad data. Better would be to count how many actual | |
861 | // cycles it used and bail out/reenter to properly simulate an | |
862 | // overloaded OP... !!! FIX !!! | |
863 | #ifdef _MSC_VER | |
864 | #pragma message("Warning: Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!") | |
865 | #else | |
866 | #warning "Better would be to count how many actual cycles it used and bail out/reenter to properly simulate an overloaded OP... !!! FIX !!!" | |
867 | #endif // _MSC_VER | |
868 | opCyclesToRun--; | |
869 | ||
870 | if (!opCyclesToRun) | |
871 | return; | |
872 | } | |
873 | } | |
874 | ||
875 | ||
876 | // | |
877 | // Store fixed size bitmap in line buffer | |
878 | // | |
879 | void OPProcessFixedBitmap(uint64_t p0, uint64_t p1, bool render) | |
880 | { | |
881 | // Need to make sure that when writing that it stays within the line buffer... | |
882 | // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM | |
883 | uint8_t depth = (p1 >> 12) & 0x07; // Color depth of image | |
884 | int32_t xpos = ((int16_t)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF | |
885 | uint32_t iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases* | |
886 | uint32_t data = (p0 >> 40) & 0xFFFFF8; // Pixel data address | |
887 | uint32_t firstPix = (p1 >> 49) & 0x3F; | |
888 | // "The LSB is significant only for scaled objects..." -JTRM | |
889 | // "In 1 BPP mode, all five bits are significant. In 2 BPP mode, the top | |
890 | // four are significant..." | |
891 | firstPix &= 0x3E; | |
892 | ||
893 | // We can ignore the RELEASE (high order) bit for now--probably forever...! | |
894 | // uint8_t flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE | |
895 | //Optimize: break these out to their own BOOL values | |
896 | uint8_t flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2) | |
897 | bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false), | |
898 | flagRMW = (flags & OPFLAG_RMW ? true : false), | |
899 | flagTRANS = (flags & OPFLAG_TRANS ? true : false); | |
900 | // "For images with 1 to 4 bits/pixel the top 7 to 4 bits of the index | |
901 | // provide the most significant bits of the palette address." | |
902 | uint8_t index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp) | |
903 | uint32_t pitch = (p1 >> 15) & 0x07; // Phrase pitch | |
904 | pitch <<= 3; // Optimization: Multiply pitch by 8 | |
905 | ||
906 | // int16_t scanlineWidth = tom_getVideoModeWidth(); | |
907 | uint8_t * tomRam8 = TOMGetRamPointer(); | |
908 | uint8_t * paletteRAM = &tomRam8[0x400]; | |
909 | // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct | |
910 | // copies--NOT for use when using endian-corrected data (i.e., any of the | |
911 | // *_word_read functions!) | |
912 | uint16_t * paletteRAM16 = (uint16_t *)paletteRAM; | |
913 | ||
914 | // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n", | |
915 | // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no")); | |
916 | ||
917 | // Is it OK to have a 0 for the data width??? (i.e., undocumented?) | |
918 | // Seems to be... Seems that dwidth *can* be zero (i.e., reuse same line) as | |
919 | // well. | |
920 | // Pitch == 0 is OK too... | |
921 | ||
922 | //kludge: Seems that the OP treats iwidth == 0 as iwidth == 1... Need to | |
923 | // investigate on real hardware... | |
924 | #ifdef _MSC_VER | |
925 | #pragma message("Warning: !!! Need to investigate iwidth == 0 behavior on real hardware !!!") | |
926 | #else | |
927 | #warning "!!! Need to investigate iwidth == 0 behavior on real hardware !!!" | |
928 | #endif // _MSC_VER | |
929 | if (iwidth == 0) | |
930 | iwidth = 1; | |
931 | ||
932 | // if (!render || op_pointer == 0 || ptr == 0 || pitch == 0) | |
933 | //I'm not convinced that we need to concern ourselves with data & op_pointer | |
934 | //here either! | |
935 | if (!render || iwidth == 0) | |
936 | return; | |
937 | ||
938 | //OK, so we know the position in the line buffer is correct. It's the clipping | |
939 | //in 24bpp mode that's wrong! | |
940 | #if 0 | |
941 | //This is a total kludge, based upon the fact that 24BPP mode puts *4* bytes | |
942 | //into the line buffer for each pixel. | |
943 | if (depth == 5) // i.e., 24bpp mode... | |
944 | xpos >>= 1; // Cut it in half... | |
945 | #endif | |
946 | ||
947 | //#define OP_DEBUG_BMP | |
948 | //#ifdef OP_DEBUG_BMP | |
949 | // WriteLog("bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n", | |
950 | // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no")); | |
951 | //#endif | |
952 | ||
953 | // int32_t leftMargin = xpos, rightMargin = (xpos + (phraseWidthToPixels[depth] * iwidth)) - 1; | |
954 | int32_t startPos = xpos, endPos = xpos + | |
955 | (!flagREFLECT ? (phraseWidthToPixels[depth] * iwidth) - 1 | |
956 | : -((phraseWidthToPixels[depth] * iwidth) + 1)); | |
957 | uint32_t clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0;//, phrasePixel = 0; | |
958 | bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE | |
959 | // This is correct, the OP line buffer is a constant size... | |
960 | int32_t limit = 720; | |
961 | int32_t lbufWidth = 719; | |
962 | ||
963 | // If the image is completely to the left or right of the line buffer, then | |
964 | // bail. | |
965 | //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE] | |
966 | //There are four possibilities: | |
967 | // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds. | |
968 | // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds. | |
969 | // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds. | |
970 | // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds. | |
971 | //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop, | |
972 | // numbers 1 & 3 are of concern. | |
973 | // This *indirectly* handles only cases 2 & 4! And is WRONG is REFLECT is set...! | |
974 | // if (rightMargin < 0 || leftMargin > lbufWidth) | |
975 | ||
976 | // It might be easier to swap these (if REFLECTed) and just use XPOS down below... | |
977 | // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise. | |
978 | // Still have to be careful with the DATA and IWIDTH values though... | |
979 | ||
980 | // if ((!flagREFLECT && (rightMargin < 0 || leftMargin > lbufWidth)) | |
981 | // || (flagREFLECT && (leftMargin < 0 || rightMargin > lbufWidth))) | |
982 | // return; | |
983 | if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth)) | |
984 | || (flagREFLECT && (startPos < 0 || endPos > lbufWidth))) | |
985 | return; | |
986 | ||
987 | // Otherwise, find the clip limits and clip the phrase as well... | |
988 | // NOTE: I'm fudging here by letting the actual blit overstep the bounds of the | |
989 | // line buffer, but it shouldn't matter since there are two unused line | |
990 | // buffers below and nothing above and I'll at most write 8 bytes outside | |
991 | // the line buffer... I could use a fractional clip begin/end value, but | |
992 | // this makes the blit a *lot* more hairy. I might fix this in the future | |
993 | // if it becomes necessary. (JLH) | |
994 | // Probably wouldn't be *that* hairy. Just use a delta that tells the inner loop | |
995 | // which pixel in the phrase is being written, and quit when either end of phrases | |
996 | // is reached or line buffer extents are surpassed. | |
997 | ||
998 | //This stuff is probably wrong as well... !!! FIX !!! | |
999 | //The strange thing is that it seems to work, but that's no guarantee that it's bulletproof! | |
1000 | //Yup. Seems that JagMania doesn't work correctly with this... | |
1001 | //Dunno if this is the problem, but Atari Karts is showing *some* of the road now... | |
1002 | // if (!flagREFLECT) | |
1003 | ||
1004 | /* | |
1005 | if (leftMargin < 0) | |
1006 | clippedWidth = 0 - leftMargin, | |
1007 | phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth], | |
1008 | leftMargin = 0 - (clippedWidth % phraseWidthToPixels[depth]); | |
1009 | // leftMargin = 0; | |
1010 | ||
1011 | if (rightMargin > lbufWidth) | |
1012 | clippedWidth = rightMargin - lbufWidth, | |
1013 | phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth];//, | |
1014 | // rightMargin = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]); | |
1015 | // rightMargin = lbufWidth; | |
1016 | */ | |
1017 | if (depth > 5) | |
1018 | WriteLog("OP: We're about to encounter a divide by zero error!\n"); | |
1019 | // NOTE: We're just using endPos to figure out how much, if any, to clip by. | |
1020 | // ALSO: There may be another case where we start out of bounds and end out | |
1021 | // of bounds...! | |
1022 | // !!! FIX !!! | |
1023 | if (startPos < 0) // Case #1: Begin out, end in, L to R | |
1024 | clippedWidth = 0 - startPos, | |
1025 | dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth], | |
1026 | startPos = 0 - (clippedWidth % phraseWidthToPixels[depth]); | |
1027 | ||
1028 | if (endPos < 0) // Case #2: Begin in, end out, R to L | |
1029 | clippedWidth = 0 - endPos, | |
1030 | phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth]; | |
1031 | ||
1032 | if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R | |
1033 | clippedWidth = endPos - lbufWidth, | |
1034 | phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth]; | |
1035 | ||
1036 | if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L | |
1037 | clippedWidth = startPos - lbufWidth, | |
1038 | dataClippedWidth = phraseClippedWidth = clippedWidth / phraseWidthToPixels[depth], | |
1039 | startPos = lbufWidth + (clippedWidth % phraseWidthToPixels[depth]); | |
1040 | //printf("<OP:spos=%i,epos=%i]", startPos, endPos); | |
1041 | ||
1042 | // If the image is sitting on the line buffer left or right edge, we need to compensate | |
1043 | // by decreasing the image phrase width accordingly. | |
1044 | iwidth -= phraseClippedWidth; | |
1045 | ||
1046 | // Also, if we're clipping the phrase we need to make sure we're in the correct part of | |
1047 | // the pixel data. | |
1048 | // data += phraseClippedWidth * (pitch << 3); | |
1049 | data += dataClippedWidth * pitch; | |
1050 | ||
1051 | // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the | |
1052 | // bitmap! This makes clipping & etc. MUCH, much easier...! | |
1053 | // uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4); | |
1054 | //Why does this work right when multiplying startPos by 2 (instead of 4) for 24 BPP mode? | |
1055 | //Is this a bug in the OP? | |
1056 | //It's because in 24bpp mode, each pixel takes *4* bytes, instead of the usual 2. | |
1057 | //Though it looks like we're doing it here no matter what... | |
1058 | // uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 2); | |
1059 | //Let's try this: | |
1060 | uint32_t lbufAddress = 0x1800 + (startPos * 2); | |
1061 | uint8_t * currentLineBuffer = &tomRam8[lbufAddress]; | |
1062 | ||
1063 | // Render. | |
1064 | ||
1065 | // Hmm. We check above for 24 BPP mode, but don't do anything about it below... | |
1066 | // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me | |
1067 | // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps | |
1068 | // anyway. | |
1069 | // This seems to be the case (at least according to the Midsummer docs)...! | |
1070 | ||
1071 | // This is to test using palette zeroes instead of bit zeroes... | |
1072 | // And it seems that this is wrong, index == 0 is transparent apparently... :-/ | |
1073 | //#define OP_USES_PALETTE_ZERO | |
1074 | ||
1075 | if (depth == 0) // 1 BPP | |
1076 | { | |
1077 | // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it. | |
1078 | int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02; | |
1079 | ||
1080 | // Fetch 1st phrase... | |
1081 | uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1082 | //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap | |
1083 | //i.e., we didn't clip on the margin... !!! FIX !!! | |
1084 | pixels <<= firstPix; // Skip first N pixels (N=firstPix)... | |
1085 | int i = firstPix; // Start counter at right spot... | |
1086 | ||
1087 | while (iwidth--) | |
1088 | { | |
1089 | while (i++ < 64) | |
1090 | { | |
1091 | uint8_t bit = pixels >> 63; | |
1092 | #ifndef OP_USES_PALETTE_ZERO | |
1093 | if (flagTRANS && bit == 0) | |
1094 | #else | |
1095 | if (flagTRANS && (paletteRAM16[index | bit] == 0)) | |
1096 | #endif | |
1097 | ; // Do nothing... | |
1098 | else | |
1099 | { | |
1100 | if (!flagRMW) | |
1101 | //Optimize: Set palleteRAM16 to beginning of palette RAM + index*2 and use only [bit] as index... | |
1102 | //Won't optimize RMW case though... | |
1103 | // This is the *only* correct use of endian-dependent code | |
1104 | // (i.e., mem-to-mem direct copying)! | |
1105 | *(uint16_t *)currentLineBuffer = paletteRAM16[index | bit]; | |
1106 | else | |
1107 | *currentLineBuffer = | |
1108 | BLEND_CR(*currentLineBuffer, paletteRAM[(index | bit) << 1]), | |
1109 | *(currentLineBuffer + 1) = | |
1110 | BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bit) << 1) + 1]); | |
1111 | } | |
1112 | ||
1113 | currentLineBuffer += lbufDelta; | |
1114 | pixels <<= 1; | |
1115 | } | |
1116 | i = 0; | |
1117 | // Fetch next phrase... | |
1118 | data += pitch; | |
1119 | pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1120 | } | |
1121 | } | |
1122 | else if (depth == 1) // 2 BPP | |
1123 | { | |
1124 | if (firstPix) | |
1125 | WriteLog("OP: Fixed bitmap @ 2 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix); | |
1126 | index &= 0xFC; // Top six bits form CLUT index | |
1127 | // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it. | |
1128 | int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02; | |
1129 | ||
1130 | while (iwidth--) | |
1131 | { | |
1132 | // Fetch phrase... | |
1133 | uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1134 | data += pitch; | |
1135 | ||
1136 | for(int i=0; i<32; i++) | |
1137 | { | |
1138 | uint8_t bits = pixels >> 62; | |
1139 | // Seems to me that both of these are in the same endian, so we could cast it as | |
1140 | // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...) | |
1141 | // This only works for the palettized modes (1 - 8 BPP), since we actually have to | |
1142 | // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?) | |
1143 | // No, it isn't because we read the memory in an endian safe way--this *won't* work... | |
1144 | #ifndef OP_USES_PALETTE_ZERO | |
1145 | if (flagTRANS && bits == 0) | |
1146 | #else | |
1147 | if (flagTRANS && (paletteRAM16[index | bits] == 0)) | |
1148 | #endif | |
1149 | ; // Do nothing... | |
1150 | else | |
1151 | { | |
1152 | if (!flagRMW) | |
1153 | *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits]; | |
1154 | else | |
1155 | *currentLineBuffer = | |
1156 | BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]), | |
1157 | *(currentLineBuffer + 1) = | |
1158 | BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]); | |
1159 | } | |
1160 | ||
1161 | currentLineBuffer += lbufDelta; | |
1162 | pixels <<= 2; | |
1163 | } | |
1164 | } | |
1165 | } | |
1166 | else if (depth == 2) // 4 BPP | |
1167 | { | |
1168 | if (firstPix) | |
1169 | WriteLog("OP: Fixed bitmap @ 4 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix); | |
1170 | index &= 0xF0; // Top four bits form CLUT index | |
1171 | // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it. | |
1172 | int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02; | |
1173 | ||
1174 | while (iwidth--) | |
1175 | { | |
1176 | // Fetch phrase... | |
1177 | uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1178 | data += pitch; | |
1179 | ||
1180 | for(int i=0; i<16; i++) | |
1181 | { | |
1182 | uint8_t bits = pixels >> 60; | |
1183 | // Seems to me that both of these are in the same endian, so we could cast it as | |
1184 | // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...) | |
1185 | // This only works for the palettized modes (1 - 8 BPP), since we actually have to | |
1186 | // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?) | |
1187 | // No, it isn't because we read the memory in an endian safe way--this *won't* work... | |
1188 | #ifndef OP_USES_PALETTE_ZERO | |
1189 | if (flagTRANS && bits == 0) | |
1190 | #else | |
1191 | if (flagTRANS && (paletteRAM16[index | bits] == 0)) | |
1192 | #endif | |
1193 | ; // Do nothing... | |
1194 | else | |
1195 | { | |
1196 | if (!flagRMW) | |
1197 | *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits]; | |
1198 | else | |
1199 | *currentLineBuffer = | |
1200 | BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]), | |
1201 | *(currentLineBuffer + 1) = | |
1202 | BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]); | |
1203 | } | |
1204 | ||
1205 | currentLineBuffer += lbufDelta; | |
1206 | pixels <<= 4; | |
1207 | } | |
1208 | } | |
1209 | } | |
1210 | else if (depth == 3) // 8 BPP | |
1211 | { | |
1212 | // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it. | |
1213 | int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02; | |
1214 | ||
1215 | // Fetch 1st phrase... | |
1216 | uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1217 | //Note that firstPix should only be honored *if* we start with the 1st phrase of the bitmap | |
1218 | //i.e., we didn't clip on the margin... !!! FIX !!! | |
1219 | firstPix &= 0x30; // Only top two bits are valid for 8 BPP | |
1220 | pixels <<= firstPix; // Skip first N pixels (N=firstPix)... | |
1221 | int i = firstPix >> 3; // Start counter at right spot... | |
1222 | ||
1223 | while (iwidth--) | |
1224 | { | |
1225 | while (i++ < 8) | |
1226 | { | |
1227 | uint8_t bits = pixels >> 56; | |
1228 | // Seems to me that both of these are in the same endian, so we could cast it as | |
1229 | // uint16_t * and do straight across copies (what about 24 bpp? Treat it differently...) | |
1230 | // This only works for the palettized modes (1 - 8 BPP), since we actually have to | |
1231 | // copy data from memory in 16 BPP mode (or does it? Isn't this the same as the CLUT case?) | |
1232 | // No, it isn't because we read the memory in an endian safe way--this *won't* work... | |
1233 | //This would seem to be problematic... | |
1234 | //Because it's the palette entry being zero that makes the pixel transparent... | |
1235 | //Let's try it and see. | |
1236 | #ifndef OP_USES_PALETTE_ZERO | |
1237 | if (flagTRANS && bits == 0) | |
1238 | #else | |
1239 | if (flagTRANS && (paletteRAM16[bits] == 0)) | |
1240 | #endif | |
1241 | ; // Do nothing... | |
1242 | else | |
1243 | { | |
1244 | if (!flagRMW) | |
1245 | *(uint16_t *)currentLineBuffer = paletteRAM16[bits]; | |
1246 | else | |
1247 | *currentLineBuffer = | |
1248 | BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]), | |
1249 | *(currentLineBuffer + 1) = | |
1250 | BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]); | |
1251 | } | |
1252 | ||
1253 | currentLineBuffer += lbufDelta; | |
1254 | pixels <<= 8; | |
1255 | } | |
1256 | i = 0; | |
1257 | // Fetch next phrase... | |
1258 | data += pitch; | |
1259 | pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1260 | } | |
1261 | } | |
1262 | else if (depth == 4) // 16 BPP | |
1263 | { | |
1264 | if (firstPix) | |
1265 | WriteLog("OP: Fixed bitmap @ 16 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix); | |
1266 | // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it. | |
1267 | int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02; | |
1268 | ||
1269 | while (iwidth--) | |
1270 | { | |
1271 | // Fetch phrase... | |
1272 | uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1273 | data += pitch; | |
1274 | ||
1275 | for(int i=0; i<4; i++) | |
1276 | { | |
1277 | uint8_t bitsHi = pixels >> 56, bitsLo = pixels >> 48; | |
1278 | // Seems to me that both of these are in the same endian, so we could cast it | |
1279 | // as uint16_t * and do straight across copies (what about 24 bpp? Treat it | |
1280 | // differently...) This only works for the palettized modes (1 - 8 BPP), since | |
1281 | // we actually have to copy data from memory in 16 BPP mode (or does it? Isn't | |
1282 | // this the same as the CLUT case?) No, it isn't because we read the memory in | |
1283 | // an endian safe way--it *won't* work... | |
1284 | //This doesn't seem right... Let's try the encoded black value ($8800): | |
1285 | //Apparently, CRY 0 maps to $8800... | |
1286 | if (flagTRANS && ((bitsLo | bitsHi) == 0)) | |
1287 | // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00)) | |
1288 | ; // Do nothing... | |
1289 | else | |
1290 | { | |
1291 | if (!flagRMW) | |
1292 | *currentLineBuffer = bitsHi, | |
1293 | *(currentLineBuffer + 1) = bitsLo; | |
1294 | else | |
1295 | *currentLineBuffer = | |
1296 | BLEND_CR(*currentLineBuffer, bitsHi), | |
1297 | *(currentLineBuffer + 1) = | |
1298 | BLEND_Y(*(currentLineBuffer + 1), bitsLo); | |
1299 | } | |
1300 | ||
1301 | currentLineBuffer += lbufDelta; | |
1302 | pixels <<= 16; | |
1303 | } | |
1304 | } | |
1305 | } | |
1306 | else if (depth == 5) // 24 BPP | |
1307 | { | |
1308 | //Looks like Iron Soldier is the only game that uses 24BPP mode... | |
1309 | //There *might* be others... | |
1310 | //WriteLog("OP: Writing 24 BPP bitmap!\n"); | |
1311 | if (firstPix) | |
1312 | WriteLog("OP: Fixed bitmap @ 24 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix); | |
1313 | // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode... | |
1314 | // The LSB of flags is OPFLAG_REFLECT, so sign extend it and OR 4 into it. | |
1315 | int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 4) | 0x04; | |
1316 | ||
1317 | while (iwidth--) | |
1318 | { | |
1319 | // Fetch phrase... | |
1320 | uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1321 | data += pitch; | |
1322 | ||
1323 | for(int i=0; i<2; i++) | |
1324 | { | |
1325 | // We don't use a 32-bit var here because of endian issues...! | |
1326 | uint8_t bits3 = pixels >> 56, bits2 = pixels >> 48, | |
1327 | bits1 = pixels >> 40, bits0 = pixels >> 32; | |
1328 | ||
1329 | if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0) | |
1330 | ; // Do nothing... | |
1331 | else | |
1332 | *currentLineBuffer = bits3, | |
1333 | *(currentLineBuffer + 1) = bits2, | |
1334 | *(currentLineBuffer + 2) = bits1, | |
1335 | *(currentLineBuffer + 3) = bits0; | |
1336 | ||
1337 | currentLineBuffer += lbufDelta; | |
1338 | pixels <<= 32; | |
1339 | } | |
1340 | } | |
1341 | } | |
1342 | } | |
1343 | ||
1344 | ||
1345 | // | |
1346 | // Store scaled bitmap in line buffer | |
1347 | // | |
1348 | void OPProcessScaledBitmap(uint64_t p0, uint64_t p1, uint64_t p2, bool render) | |
1349 | { | |
1350 | // Need to make sure that when writing that it stays within the line buffer... | |
1351 | // LBUF ($F01800 - $F01D9E) 360 x 32-bit RAM | |
1352 | uint8_t depth = (p1 >> 12) & 0x07; // Color depth of image | |
1353 | int32_t xpos = ((int16_t)((p1 << 4) & 0xFFFF)) >> 4;// Image xpos in LBUF | |
1354 | uint32_t iwidth = (p1 >> 28) & 0x3FF; // Image width in *phrases* | |
1355 | uint32_t data = (p0 >> 40) & 0xFFFFF8; // Pixel data address | |
1356 | //#ifdef OP_DEBUG_BMP | |
1357 | // Prolly should use this... Though not sure exactly how. | |
1358 | //Use the upper bits as an offset into the phrase depending on the BPP. That's how! | |
1359 | uint32_t firstPix = (p1 >> 49) & 0x3F; | |
1360 | //This is WEIRD! I'm sure I saw Atari Karts request 8 BPP FIRSTPIX! What happened??? | |
1361 | if (firstPix) | |
1362 | WriteLog("OP: FIRSTPIX != 0! (Scaled BM)\n"); | |
1363 | //#endif | |
1364 | // We can ignore the RELEASE (high order) bit for now--probably forever...! | |
1365 | // uint8_t flags = (p1 >> 45) & 0x0F; // REFLECT, RMW, TRANS, RELEASE | |
1366 | //Optimize: break these out to their own BOOL values [DONE] | |
1367 | uint8_t flags = (p1 >> 45) & 0x07; // REFLECT (0), RMW (1), TRANS (2) | |
1368 | bool flagREFLECT = (flags & OPFLAG_REFLECT ? true : false), | |
1369 | flagRMW = (flags & OPFLAG_RMW ? true : false), | |
1370 | flagTRANS = (flags & OPFLAG_TRANS ? true : false); | |
1371 | uint8_t index = (p1 >> 37) & 0xFE; // CLUT index offset (upper pix, 1-4 bpp) | |
1372 | uint32_t pitch = (p1 >> 15) & 0x07; // Phrase pitch | |
1373 | ||
1374 | uint8_t * tomRam8 = TOMGetRamPointer(); | |
1375 | uint8_t * paletteRAM = &tomRam8[0x400]; | |
1376 | // This is OK as long as it's used correctly: For 16-bit RAM to RAM direct | |
1377 | // copies--NOT for use when using endian-corrected data (i.e., any of the | |
1378 | // *ReadWord functions!) | |
1379 | uint16_t * paletteRAM16 = (uint16_t *)paletteRAM; | |
1380 | ||
1381 | uint16_t hscale = p2 & 0xFF; | |
1382 | // Hmm. It seems that fixing the horizontal scale necessitated re-fixing this. | |
1383 | // Not sure why, but seems to be consistent with the vertical scaling now (and | |
1384 | // it may turn out to be wrong!)... | |
1385 | uint16_t horizontalRemainder = hscale; // Not sure if it starts full, but seems reasonable [It's not!] | |
1386 | // uint8_t horizontalRemainder = 0; // Let's try zero! Seems to work! Yay! [No, it doesn't!] | |
1387 | int32_t scaledWidthInPixels = (iwidth * phraseWidthToPixels[depth] * hscale) >> 5; | |
1388 | uint32_t scaledPhrasePixels = (phraseWidthToPixels[depth] * hscale) >> 5; | |
1389 | ||
1390 | // WriteLog("bitmap %ix? %ibpp at %i,? firstpix=? data=0x%.8x pitch %i hflipped=%s dwidth=? (linked to ?) RMW=%s Tranparent=%s\n", | |
1391 | // iwidth, op_bitmap_bit_depth[bitdepth], xpos, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), (flags&OPFLAG_RMW ? "yes" : "no"), (flags&OPFLAG_TRANS ? "yes" : "no")); | |
1392 | ||
1393 | // Looks like an hscale of zero means don't draw! | |
1394 | if (!render || iwidth == 0 || hscale == 0) | |
1395 | return; | |
1396 | ||
1397 | /*extern int start_logging; | |
1398 | if (start_logging) | |
1399 | WriteLog("OP: Scaled bitmap %ix? %ibpp at %i,? hscale=%02X fpix=%i data=%08X pitch %i hflipped=%s dwidth=? (linked to %08X) Transluency=%s\n", | |
1400 | iwidth, op_bitmap_bit_depth[depth], xpos, hscale, firstPix, data, pitch, (flagREFLECT ? "yes" : "no"), op_pointer, (flagRMW ? "yes" : "no"));*/ | |
1401 | //#define OP_DEBUG_BMP | |
1402 | //#ifdef OP_DEBUG_BMP | |
1403 | // WriteLog("OP: Scaled bitmap %ix%i %ibpp at %i,%i firstpix=%i data=0x%.8x pitch %i hflipped=%s dwidth=%i (linked to 0x%.8x) Transluency=%s\n", | |
1404 | // iwidth, height, op_bitmap_bit_depth[bitdepth], xpos, ypos, firstPix, ptr, pitch, (flags&OPFLAG_REFLECT ? "yes" : "no"), dwidth, op_pointer, (flags&OPFLAG_RMW ? "yes" : "no")); | |
1405 | //#endif | |
1406 | ||
1407 | int32_t startPos = xpos, endPos = xpos + | |
1408 | (!flagREFLECT ? scaledWidthInPixels - 1 : -(scaledWidthInPixels + 1)); | |
1409 | uint32_t clippedWidth = 0, phraseClippedWidth = 0, dataClippedWidth = 0; | |
1410 | bool in24BPPMode = (((GET16(tomRam8, 0x0028) >> 1) & 0x03) == 1 ? true : false); // VMODE | |
1411 | // Not sure if this is Jaguar Two only location or what... | |
1412 | // From the docs, it is... If we want to limit here we should think of something else. | |
1413 | // int32_t limit = GET16(tom_ram_8, 0x0008); // LIMIT | |
1414 | int32_t limit = 720; | |
1415 | // int32_t lbufWidth = (!in24BPPMode ? limit - 1 : (limit / 2) - 1); // Zero based limit... | |
1416 | int32_t lbufWidth = 719; // Zero based limit... | |
1417 | ||
1418 | // If the image is completely to the left or right of the line buffer, then bail. | |
1419 | //If in REFLECT mode, then these values are swapped! !!! FIX !!! [DONE] | |
1420 | //There are four possibilities: | |
1421 | // 1. image sits on left edge and no REFLECT; starts out of bounds but ends in bounds. | |
1422 | // 2. image sits on left edge and REFLECT; starts in bounds but ends out of bounds. | |
1423 | // 3. image sits on right edge and REFLECT; starts out of bounds but ends in bounds. | |
1424 | // 4. image sits on right edge and no REFLECT; starts in bounds but ends out of bounds. | |
1425 | //Numbers 2 & 4 can be caught by checking the LBUF clip while in the inner loop, | |
1426 | // numbers 1 & 3 are of concern. | |
1427 | // This *indirectly* handles only cases 2 & 4! And is WRONG if REFLECT is set...! | |
1428 | // if (rightMargin < 0 || leftMargin > lbufWidth) | |
1429 | ||
1430 | // It might be easier to swap these (if REFLECTed) and just use XPOS down below... | |
1431 | // That way, you could simply set XPOS to leftMargin if !REFLECT and to rightMargin otherwise. | |
1432 | // Still have to be careful with the DATA and IWIDTH values though... | |
1433 | ||
1434 | if ((!flagREFLECT && (endPos < 0 || startPos > lbufWidth)) | |
1435 | || (flagREFLECT && (startPos < 0 || endPos > lbufWidth))) | |
1436 | return; | |
1437 | ||
1438 | // Otherwise, find the clip limits and clip the phrase as well... | |
1439 | // NOTE: I'm fudging here by letting the actual blit overstep the bounds of | |
1440 | // the line buffer, but it shouldn't matter since there are two | |
1441 | // unused line buffers below and nothing above and I'll at most write | |
1442 | // 40 bytes outside the line buffer... I could use a fractional clip | |
1443 | // begin/end value, but this makes the blit a *lot* more hairy. I | |
1444 | // might fix this in the future if it becomes necessary. (JLH) | |
1445 | // Probably wouldn't be *that* hairy. Just use a delta that tells the | |
1446 | // inner loop which pixel in the phrase is being written, and quit | |
1447 | // when either end of phrases is reached or line buffer extents are | |
1448 | // surpassed. | |
1449 | ||
1450 | //This stuff is probably wrong as well... !!! FIX !!! | |
1451 | //The strange thing is that it seems to work, but that's no guarantee that it's | |
1452 | //bulletproof! | |
1453 | //Yup. Seems that JagMania doesn't work correctly with this... | |
1454 | //Dunno if this is the problem, but Atari Karts is showing *some* of the road | |
1455 | //now... | |
1456 | //Actually, it is! Or, it was. It doesn't seem to be clipping here, so the | |
1457 | //problem lies elsewhere! Hmm. Putting the scaling code into the 1/2/8 BPP cases | |
1458 | //seems to draw the ground a bit more accurately... Strange! | |
1459 | //It's probably a case of the REFLECT flag being set and the background being | |
1460 | //written from the right side of the screen... | |
1461 | //But no, it isn't... At least if the diagnostics are telling the truth! | |
1462 | ||
1463 | // NOTE: We're just using endPos to figure out how much, if any, to clip by. | |
1464 | // ALSO: There may be another case where we start out of bounds and end out | |
1465 | // of bounds...! | |
1466 | // !!! FIX !!! | |
1467 | ||
1468 | //There's a problem here with scaledPhrasePixels in that it can be forced to | |
1469 | //zero when the scaling factor is small. So fix it already! !!! FIX !!! | |
1470 | /*if (scaledPhrasePixels == 0) | |
1471 | { | |
1472 | WriteLog("OP: [Scaled] We're about to encounter a divide by zero error!\n"); | |
1473 | DumpScaledObject(p0, p1, p2); | |
1474 | }//*/ | |
1475 | //NOTE: I'm almost 100% sure that this is wrong... And it is! :-p | |
1476 | ||
1477 | //Try a simple example... | |
1478 | // Let's say we have a 8 BPP scanline with an hscale of $80 (4). Our xpos is -10, | |
1479 | // non-flipped. Pixels in the bitmap are XYZXYZXYZXYZXYZ. | |
1480 | // Scaled up, they would be XXXXYYYYZZZZXXXXYYYYZZZZXXXXYYYYZZZZ... | |
1481 | // | |
1482 | // Normally, we would expect this in the line buffer: | |
1483 | // ZZXXXXYYYYZZZZXXXXYYYYZZZZ... | |
1484 | // | |
1485 | // But instead we're getting: | |
1486 | // XXXXYYYYZZZZXXXXYYYYZZZZ... | |
1487 | // | |
1488 | // or are we??? It would seem so, simply by virtue of the fact that we're NOT starting | |
1489 | // on negative boundary--or are we? Hmm... | |
1490 | // cw = 10, dcw = pcw = 10 / ([8 * 4 = 32] 32) = 0, sp = -10 | |
1491 | // | |
1492 | // Let's try a real world example: | |
1493 | // | |
1494 | //OP: Scaled bitmap (70, 8 BPP, spp=28) sp (-400) < 0... [new sp=-8, cw=400, dcw=pcw=14] | |
1495 | //OP: Scaled bitmap (6F, 8 BPP, spp=27) sp (-395) < 0... [new sp=-17, cw=395, dcw=pcw=14] | |
1496 | // | |
1497 | // Really, spp is 27.75 in the second case... | |
1498 | // So... If we do 395 / 27.75, we get 14. Ok so far... If we scale that against the | |
1499 | // start position (14 * 27.75), we get -6.5... NOT -17! | |
1500 | ||
1501 | //Now it seems we're working OK, at least for the first case... | |
1502 | uint32_t scaledPhrasePixelsUS = phraseWidthToPixels[depth] * hscale; | |
1503 | ||
1504 | if (startPos < 0) // Case #1: Begin out, end in, L to R | |
1505 | { | |
1506 | extern int start_logging; | |
1507 | if (start_logging) | |
1508 | WriteLog("OP: Scaled bitmap (%02X, %u BPP, spp=%u) start pos (%i) < 0...", hscale, op_bitmap_bit_depth[depth], scaledPhrasePixels, startPos); | |
1509 | // clippedWidth = 0 - startPos, | |
1510 | clippedWidth = (0 - startPos) << 5, | |
1511 | // dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels, | |
1512 | dataClippedWidth = phraseClippedWidth = (clippedWidth / scaledPhrasePixelsUS) >> 5, | |
1513 | // startPos = 0 - (clippedWidth % scaledPhrasePixels); | |
1514 | startPos += (dataClippedWidth * scaledPhrasePixelsUS) >> 5; | |
1515 | if (start_logging) | |
1516 | WriteLog(" [new sp=%i, cw=%i, dcw=pcw=%i]\n", startPos, clippedWidth, dataClippedWidth); | |
1517 | } | |
1518 | ||
1519 | if (endPos < 0) // Case #2: Begin in, end out, R to L | |
1520 | clippedWidth = 0 - endPos, | |
1521 | phraseClippedWidth = clippedWidth / scaledPhrasePixels; | |
1522 | ||
1523 | if (endPos > lbufWidth) // Case #3: Begin in, end out, L to R | |
1524 | clippedWidth = endPos - lbufWidth, | |
1525 | phraseClippedWidth = clippedWidth / scaledPhrasePixels; | |
1526 | ||
1527 | if (startPos > lbufWidth) // Case #4: Begin out, end in, R to L | |
1528 | clippedWidth = startPos - lbufWidth, | |
1529 | dataClippedWidth = phraseClippedWidth = clippedWidth / scaledPhrasePixels, | |
1530 | startPos = lbufWidth + (clippedWidth % scaledPhrasePixels); | |
1531 | ||
1532 | extern int op_start_log; | |
1533 | if (op_start_log && clippedWidth != 0) | |
1534 | WriteLog("OP: Clipped line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X\n", startPos, endPos, clippedWidth, iwidth, hscale); | |
1535 | if (op_start_log && startPos == 13) | |
1536 | { | |
1537 | WriteLog("OP: Scaled line. SP=%i, EP=%i, clip=%u, iwidth=%u, hscale=%02X, depth=%u, firstPix=%u\n", startPos, endPos, clippedWidth, iwidth, hscale, depth, firstPix); | |
1538 | DumpScaledObject(p0, p1, p2); | |
1539 | if (iwidth == 7) | |
1540 | { | |
1541 | WriteLog(" %08X: ", data); | |
1542 | for(int i=0; i<7*8; i++) | |
1543 | WriteLog("%02X ", JaguarReadByte(data+i)); | |
1544 | WriteLog("\n"); | |
1545 | } | |
1546 | } | |
1547 | // If the image is sitting on the line buffer left or right edge, we need to compensate | |
1548 | // by decreasing the image phrase width accordingly. | |
1549 | iwidth -= phraseClippedWidth; | |
1550 | ||
1551 | // Also, if we're clipping the phrase we need to make sure we're in the correct part of | |
1552 | // the pixel data. | |
1553 | // data += phraseClippedWidth * (pitch << 3); | |
1554 | data += dataClippedWidth * (pitch << 3); | |
1555 | ||
1556 | // NOTE: When the bitmap is in REFLECT mode, the XPOS marks the *right* side of the | |
1557 | // bitmap! This makes clipping & etc. MUCH, much easier...! | |
1558 | // uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? leftMargin * 2 : leftMargin * 4); | |
1559 | // uint32_t lbufAddress = 0x1800 + (!in24BPPMode ? startPos * 2 : startPos * 4); | |
1560 | uint32_t lbufAddress = 0x1800 + startPos * 2; | |
1561 | uint8_t * currentLineBuffer = &tomRam8[lbufAddress]; | |
1562 | //uint8_t * lineBufferLowerLimit = &tom_ram_8[0x1800], | |
1563 | // * lineBufferUpperLimit = &tom_ram_8[0x1800 + 719]; | |
1564 | ||
1565 | // Render. | |
1566 | ||
1567 | // Hmm. We check above for 24 BPP mode, but don't do anything about it below... | |
1568 | // If we *were* in 24 BPP mode, how would you convert CRY to RGB24? Seems to me | |
1569 | // that if you're in CRY mode then you wouldn't be able to use 24 BPP bitmaps | |
1570 | // anyway. | |
1571 | // This seems to be the case (at least according to the Midsummer docs)...! | |
1572 | ||
1573 | if (depth == 0) // 1 BPP | |
1574 | { | |
1575 | if (firstPix != 0) | |
1576 | WriteLog("OP: Scaled bitmap @ 1 BPP requesting FIRSTPIX!\n"); | |
1577 | // The LSB of flags is OPFLAG_REFLECT, so sign extend it and or 2 into it. | |
1578 | int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02; | |
1579 | ||
1580 | int pixCount = 0; | |
1581 | uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1582 | ||
1583 | while ((int32_t)iwidth > 0) | |
1584 | { | |
1585 | uint8_t bits = pixels >> 63; | |
1586 | ||
1587 | #ifndef OP_USES_PALETTE_ZERO | |
1588 | if (flagTRANS && bits == 0) | |
1589 | #else | |
1590 | if (flagTRANS && (paletteRAM16[index | bits] == 0)) | |
1591 | #endif | |
1592 | ; // Do nothing... | |
1593 | else | |
1594 | { | |
1595 | if (!flagRMW) | |
1596 | // This is the *only* correct use of endian-dependent code | |
1597 | // (i.e., mem-to-mem direct copying)! | |
1598 | *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits]; | |
1599 | else | |
1600 | *currentLineBuffer = | |
1601 | BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]), | |
1602 | *(currentLineBuffer + 1) = | |
1603 | BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]); | |
1604 | } | |
1605 | ||
1606 | currentLineBuffer += lbufDelta; | |
1607 | ||
1608 | /* | |
1609 | The reason we subtract the horizontalRemainder *after* the test is because we had too few | |
1610 | bytes for horizontalRemainder to properly recognize a negative number. But now it's 16 bits | |
1611 | wide, so we could probably go back to that (as long as we make it an int16_t and not a uint16!) | |
1612 | */ | |
1613 | /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format | |
1614 | while (horizontalRemainder & 0x80) | |
1615 | { | |
1616 | horizontalRemainder += hscale; | |
1617 | pixCount++; | |
1618 | pixels <<= 1; | |
1619 | }//*/ | |
1620 | // while (horizontalRemainder <= 0x20) // I.e., it's <= 1.0 (*before* subtraction) | |
1621 | while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction) | |
1622 | { | |
1623 | horizontalRemainder += hscale; | |
1624 | pixCount++; | |
1625 | pixels <<= 1; | |
1626 | } | |
1627 | horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format | |
1628 | ||
1629 | if (pixCount > 63) | |
1630 | { | |
1631 | int phrasesToSkip = pixCount / 64, pixelShift = pixCount % 64; | |
1632 | ||
1633 | data += (pitch << 3) * phrasesToSkip; | |
1634 | pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1635 | pixels <<= 1 * pixelShift; | |
1636 | iwidth -= phrasesToSkip; | |
1637 | pixCount = pixelShift; | |
1638 | } | |
1639 | } | |
1640 | } | |
1641 | else if (depth == 1) // 2 BPP | |
1642 | { | |
1643 | if (firstPix != 0) | |
1644 | WriteLog("OP: Scaled bitmap @ 2 BPP requesting FIRSTPIX!\n"); | |
1645 | index &= 0xFC; // Top six bits form CLUT index | |
1646 | // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it. | |
1647 | int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02; | |
1648 | ||
1649 | int pixCount = 0; | |
1650 | uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1651 | ||
1652 | while ((int32_t)iwidth > 0) | |
1653 | { | |
1654 | uint8_t bits = pixels >> 62; | |
1655 | ||
1656 | #ifndef OP_USES_PALETTE_ZERO | |
1657 | if (flagTRANS && bits == 0) | |
1658 | #else | |
1659 | if (flagTRANS && (paletteRAM16[index | bits] == 0)) | |
1660 | #endif | |
1661 | ; // Do nothing... | |
1662 | else | |
1663 | { | |
1664 | if (!flagRMW) | |
1665 | // This is the *only* correct use of endian-dependent code | |
1666 | // (i.e., mem-to-mem direct copying)! | |
1667 | *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits]; | |
1668 | else | |
1669 | *currentLineBuffer = | |
1670 | BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]), | |
1671 | *(currentLineBuffer + 1) = | |
1672 | BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]); | |
1673 | } | |
1674 | ||
1675 | currentLineBuffer += lbufDelta; | |
1676 | ||
1677 | /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format | |
1678 | while (horizontalRemainder & 0x80) | |
1679 | { | |
1680 | horizontalRemainder += hscale; | |
1681 | pixCount++; | |
1682 | pixels <<= 2; | |
1683 | }//*/ | |
1684 | // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction) | |
1685 | while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction) | |
1686 | { | |
1687 | horizontalRemainder += hscale; | |
1688 | pixCount++; | |
1689 | pixels <<= 2; | |
1690 | } | |
1691 | horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format | |
1692 | ||
1693 | if (pixCount > 31) | |
1694 | { | |
1695 | int phrasesToSkip = pixCount / 32, pixelShift = pixCount % 32; | |
1696 | ||
1697 | data += (pitch << 3) * phrasesToSkip; | |
1698 | pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1699 | pixels <<= 2 * pixelShift; | |
1700 | iwidth -= phrasesToSkip; | |
1701 | pixCount = pixelShift; | |
1702 | } | |
1703 | } | |
1704 | } | |
1705 | else if (depth == 2) // 4 BPP | |
1706 | { | |
1707 | if (firstPix != 0) | |
1708 | WriteLog("OP: Scaled bitmap @ 4 BPP requesting FIRSTPIX!\n"); | |
1709 | index &= 0xF0; // Top four bits form CLUT index | |
1710 | // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it. | |
1711 | int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02; | |
1712 | ||
1713 | int pixCount = 0; | |
1714 | uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1715 | ||
1716 | while ((int32_t)iwidth > 0) | |
1717 | { | |
1718 | uint8_t bits = pixels >> 60; | |
1719 | ||
1720 | #ifndef OP_USES_PALETTE_ZERO | |
1721 | if (flagTRANS && bits == 0) | |
1722 | #else | |
1723 | if (flagTRANS && (paletteRAM16[index | bits] == 0)) | |
1724 | #endif | |
1725 | ; // Do nothing... | |
1726 | else | |
1727 | { | |
1728 | if (!flagRMW) | |
1729 | // This is the *only* correct use of endian-dependent code | |
1730 | // (i.e., mem-to-mem direct copying)! | |
1731 | *(uint16_t *)currentLineBuffer = paletteRAM16[index | bits]; | |
1732 | else | |
1733 | *currentLineBuffer = | |
1734 | BLEND_CR(*currentLineBuffer, paletteRAM[(index | bits) << 1]), | |
1735 | *(currentLineBuffer + 1) = | |
1736 | BLEND_Y(*(currentLineBuffer + 1), paletteRAM[((index | bits) << 1) + 1]); | |
1737 | } | |
1738 | ||
1739 | currentLineBuffer += lbufDelta; | |
1740 | ||
1741 | /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format | |
1742 | while (horizontalRemainder & 0x80) | |
1743 | { | |
1744 | horizontalRemainder += hscale; | |
1745 | pixCount++; | |
1746 | pixels <<= 4; | |
1747 | }//*/ | |
1748 | // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction) | |
1749 | while (horizontalRemainder < 0x20) // I.e., it's <= 0 (*before* subtraction) | |
1750 | { | |
1751 | horizontalRemainder += hscale; | |
1752 | pixCount++; | |
1753 | pixels <<= 4; | |
1754 | } | |
1755 | horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format | |
1756 | ||
1757 | if (pixCount > 15) | |
1758 | { | |
1759 | int phrasesToSkip = pixCount / 16, pixelShift = pixCount % 16; | |
1760 | ||
1761 | data += (pitch << 3) * phrasesToSkip; | |
1762 | pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1763 | pixels <<= 4 * pixelShift; | |
1764 | iwidth -= phrasesToSkip; | |
1765 | pixCount = pixelShift; | |
1766 | } | |
1767 | } | |
1768 | } | |
1769 | else if (depth == 3) // 8 BPP | |
1770 | { | |
1771 | if (firstPix) | |
1772 | WriteLog("OP: Scaled bitmap @ 8 BPP requesting FIRSTPIX! (fp=%u)\n", firstPix); | |
1773 | // The LSB is OPFLAG_REFLECT, so sign extend it and or 2 into it. | |
1774 | int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02; | |
1775 | ||
1776 | int pixCount = 0; | |
1777 | uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1778 | ||
1779 | while ((int32_t)iwidth > 0) | |
1780 | { | |
1781 | uint8_t bits = pixels >> 56; | |
1782 | ||
1783 | #ifndef OP_USES_PALETTE_ZERO | |
1784 | if (flagTRANS && bits == 0) | |
1785 | #else | |
1786 | if (flagTRANS && (paletteRAM16[bits] == 0)) | |
1787 | #endif | |
1788 | ; // Do nothing... | |
1789 | else | |
1790 | { | |
1791 | if (!flagRMW) | |
1792 | // This is the *only* correct use of endian-dependent code | |
1793 | // (i.e., mem-to-mem direct copying)! | |
1794 | *(uint16_t *)currentLineBuffer = paletteRAM16[bits]; | |
1795 | /* { | |
1796 | if (currentLineBuffer >= lineBufferLowerLimit && currentLineBuffer <= lineBufferUpperLimit) | |
1797 | *(uint16_t *)currentLineBuffer = paletteRAM16[bits]; | |
1798 | }*/ | |
1799 | else | |
1800 | *currentLineBuffer = | |
1801 | BLEND_CR(*currentLineBuffer, paletteRAM[bits << 1]), | |
1802 | *(currentLineBuffer + 1) = | |
1803 | BLEND_Y(*(currentLineBuffer + 1), paletteRAM[(bits << 1) + 1]); | |
1804 | } | |
1805 | ||
1806 | currentLineBuffer += lbufDelta; | |
1807 | ||
1808 | // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction) | |
1809 | while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction) | |
1810 | { | |
1811 | horizontalRemainder += hscale; | |
1812 | pixCount++; | |
1813 | pixels <<= 8; | |
1814 | } | |
1815 | horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format | |
1816 | ||
1817 | if (pixCount > 7) | |
1818 | { | |
1819 | int phrasesToSkip = pixCount / 8, pixelShift = pixCount % 8; | |
1820 | ||
1821 | data += (pitch << 3) * phrasesToSkip; | |
1822 | pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1823 | pixels <<= 8 * pixelShift; | |
1824 | iwidth -= phrasesToSkip; | |
1825 | pixCount = pixelShift; | |
1826 | } | |
1827 | } | |
1828 | } | |
1829 | else if (depth == 4) // 16 BPP | |
1830 | { | |
1831 | if (firstPix != 0) | |
1832 | WriteLog("OP: Scaled bitmap @ 16 BPP requesting FIRSTPIX!\n"); | |
1833 | // The LSB is OPFLAG_REFLECT, so sign extend it and OR 2 into it. | |
1834 | int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 5) | 0x02; | |
1835 | ||
1836 | int pixCount = 0; | |
1837 | uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1838 | ||
1839 | while ((int32_t)iwidth > 0) | |
1840 | { | |
1841 | uint8_t bitsHi = pixels >> 56, bitsLo = pixels >> 48; | |
1842 | ||
1843 | //This doesn't seem right... Let's try the encoded black value ($8800): | |
1844 | //Apparently, CRY 0 maps to $8800... | |
1845 | if (flagTRANS && ((bitsLo | bitsHi) == 0)) | |
1846 | // if (flagTRANS && (bitsHi == 0x88) && (bitsLo == 0x00)) | |
1847 | ; // Do nothing... | |
1848 | else | |
1849 | { | |
1850 | if (!flagRMW) | |
1851 | *currentLineBuffer = bitsHi, | |
1852 | *(currentLineBuffer + 1) = bitsLo; | |
1853 | else | |
1854 | *currentLineBuffer = | |
1855 | BLEND_CR(*currentLineBuffer, bitsHi), | |
1856 | *(currentLineBuffer + 1) = | |
1857 | BLEND_Y(*(currentLineBuffer + 1), bitsLo); | |
1858 | } | |
1859 | ||
1860 | currentLineBuffer += lbufDelta; | |
1861 | ||
1862 | /* horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format | |
1863 | while (horizontalRemainder & 0x80) | |
1864 | { | |
1865 | horizontalRemainder += hscale; | |
1866 | pixCount++; | |
1867 | pixels <<= 16; | |
1868 | }//*/ | |
1869 | // while (horizontalRemainder <= 0x20) // I.e., it's <= 0 (*before* subtraction) | |
1870 | while (horizontalRemainder < 0x20) // I.e., it's <= 1.0 (*before* subtraction) | |
1871 | { | |
1872 | horizontalRemainder += hscale; | |
1873 | pixCount++; | |
1874 | pixels <<= 16; | |
1875 | } | |
1876 | horizontalRemainder -= 0x20; // Subtract 1.0f in [3.5] fixed point format | |
1877 | //*/ | |
1878 | if (pixCount > 3) | |
1879 | { | |
1880 | int phrasesToSkip = pixCount / 4, pixelShift = pixCount % 4; | |
1881 | ||
1882 | data += (pitch << 3) * phrasesToSkip; | |
1883 | pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1884 | pixels <<= 16 * pixelShift; | |
1885 | ||
1886 | iwidth -= phrasesToSkip; | |
1887 | ||
1888 | pixCount = pixelShift; | |
1889 | } | |
1890 | } | |
1891 | } | |
1892 | else if (depth == 5) // 24 BPP | |
1893 | { | |
1894 | //I'm not sure that you can scale a 24 BPP bitmap properly--the JTRM seem to indicate as much. | |
1895 | WriteLog("OP: Writing 24 BPP scaled bitmap!\n"); | |
1896 | if (firstPix != 0) | |
1897 | WriteLog("OP: Scaled bitmap @ 24 BPP requesting FIRSTPIX!\n"); | |
1898 | // Not sure, but I think RMW only works with 16 BPP and below, and only in CRY mode... | |
1899 | // The LSB is OPFLAG_REFLECT, so sign extend it and or 4 into it. | |
1900 | int32_t lbufDelta = ((int8_t)((flags << 7) & 0xFF) >> 4) | 0x04; | |
1901 | ||
1902 | while (iwidth--) | |
1903 | { | |
1904 | // Fetch phrase... | |
1905 | uint64_t pixels = ((uint64_t)JaguarReadLong(data, OP) << 32) | JaguarReadLong(data + 4, OP); | |
1906 | data += pitch << 3; // Multiply pitch * 8 (optimize: precompute this value) | |
1907 | ||
1908 | for(int i=0; i<2; i++) | |
1909 | { | |
1910 | uint8_t bits3 = pixels >> 56, bits2 = pixels >> 48, | |
1911 | bits1 = pixels >> 40, bits0 = pixels >> 32; | |
1912 | ||
1913 | if (flagTRANS && (bits3 | bits2 | bits1 | bits0) == 0) | |
1914 | ; // Do nothing... | |
1915 | else | |
1916 | *currentLineBuffer = bits3, | |
1917 | *(currentLineBuffer + 1) = bits2, | |
1918 | *(currentLineBuffer + 2) = bits1, | |
1919 | *(currentLineBuffer + 3) = bits0; | |
1920 | ||
1921 | currentLineBuffer += lbufDelta; | |
1922 | pixels <<= 32; | |
1923 | } | |
1924 | } | |
1925 | } | |
1926 | } |