From 3edc5c6f0a0943292c0fc99a45772bc81ab2b57e Mon Sep 17 00:00:00 2001 From: IntelOrca Date: Fri, 19 Feb 2016 23:41:03 +0000 Subject: [PATCH 1/6] compile multiple versions of draw RLE sprite --- openrct2.vcxproj | 1 + openrct2.vcxproj.filters | 3 + src/drawing/drawing.c | 2 +- src/drawing/drawing.h | 10 +- src/drawing/drawing_fast.cpp | 190 +++++++++++++++++++++++++++++++++++ src/drawing/sprite.c | 128 +---------------------- 6 files changed, 203 insertions(+), 131 deletions(-) create mode 100644 src/drawing/drawing_fast.cpp diff --git a/openrct2.vcxproj b/openrct2.vcxproj index 73fe4a6021..3ab8b8c9aa 100644 --- a/openrct2.vcxproj +++ b/openrct2.vcxproj @@ -37,6 +37,7 @@ + diff --git a/openrct2.vcxproj.filters b/openrct2.vcxproj.filters index 3fddff8512..d9a36792a2 100644 --- a/openrct2.vcxproj.filters +++ b/openrct2.vcxproj.filters @@ -581,6 +581,9 @@ + + Source\Drawing + diff --git a/src/drawing/drawing.c b/src/drawing/drawing.c index 9f13e83dbf..708633277c 100644 --- a/src/drawing/drawing.c +++ b/src/drawing/drawing.c @@ -537,7 +537,7 @@ void gfx_draw_pickedup_peep() * * rct2: 0x00681DE2 */ -void gfx_draw_sprite_raw_masked(rct_drawpixelinfo *dpi, int x, int y, int maskImage, int colourImage) +void __fastcall gfx_draw_sprite_raw_masked(rct_drawpixelinfo *dpi, int x, int y, int maskImage, int colourImage) { int left, top, right, bottom, width, height; rct_g1_element *imgMask = &g1Elements[maskImage & 0x7FFFF]; diff --git a/src/drawing/drawing.h b/src/drawing/drawing.h index 1be8be7213..cac779ff52 100644 --- a/src/drawing/drawing.h +++ b/src/drawing/drawing.h @@ -123,11 +123,11 @@ int gfx_load_g2(); void gfx_unload_g1(); void gfx_unload_g2(); void sub_68371D(); -void gfx_bmp_sprite_to_buffer(uint8* palette_pointer, uint8* unknown_pointer, uint8* source_pointer, uint8* dest_pointer, rct_g1_element* source_image, rct_drawpixelinfo *dest_dpi, int height, int width, int image_type); -void gfx_rle_sprite_to_buffer(const uint8* source_bits_pointer, uint8* dest_bits_pointer, const uint8* palette_pointer, const rct_drawpixelinfo *dpi, int image_type, int source_y_start, int height, int source_x_start, int width); -void gfx_draw_sprite(rct_drawpixelinfo *dpi, int image_id, int x, int y, uint32 tertiary_colour); -void gfx_draw_sprite_palette_set(rct_drawpixelinfo *dpi, int image_id, int x, int y, uint8* palette_pointer, uint8* unknown_pointer); -void gfx_draw_sprite_raw_masked(rct_drawpixelinfo *dpi, int x, int y, int maskImage, int colourImage); +void __fastcall gfx_bmp_sprite_to_buffer(uint8* palette_pointer, uint8* unknown_pointer, uint8* source_pointer, uint8* dest_pointer, rct_g1_element* source_image, rct_drawpixelinfo *dest_dpi, int height, int width, int image_type); +void __fastcall gfx_rle_sprite_to_buffer(const uint8* source_bits_pointer, uint8* dest_bits_pointer, const uint8* palette_pointer, const rct_drawpixelinfo *dpi, int image_type, int source_y_start, int height, int source_x_start, int width); +void __fastcall gfx_draw_sprite(rct_drawpixelinfo *dpi, int image_id, int x, int y, uint32 tertiary_colour); +void __fastcall gfx_draw_sprite_palette_set(rct_drawpixelinfo *dpi, int image_id, int x, int y, uint8* palette_pointer, uint8* unknown_pointer); +void __fastcall gfx_draw_sprite_raw_masked(rct_drawpixelinfo *dpi, int x, int y, int maskImage, int colourImage); // string int clip_text(char *buffer, int width); diff --git a/src/drawing/drawing_fast.cpp b/src/drawing/drawing_fast.cpp new file mode 100644 index 0000000000..3dadd2b1fb --- /dev/null +++ b/src/drawing/drawing_fast.cpp @@ -0,0 +1,190 @@ +extern "C" +{ + #include "drawing.h" +} + +template +static void __fastcall DrawRLESprite2(const uint8* source_bits_pointer, + uint8* dest_bits_pointer, + const uint8* palette_pointer, + const rct_drawpixelinfo *dpi, + int source_y_start, + int height, + int source_x_start, + int width) +{ + int zoom_amount = 1 << zoom_level; + int zoom_mask = 0xFFFFFFFF << zoom_level; + uint8* next_dest_pointer = dest_bits_pointer; + + int line_width = (dpi->width >> zoom_level) + dpi->pitch; + + if (source_y_start < 0) { + source_y_start += zoom_amount; + next_dest_pointer += line_width; + height -= zoom_amount; + } + + //For every line in the image + for (int y = source_y_start; y < (height + source_y_start); y += zoom_amount) { + + //The first part of the source pointer is a list of offsets to different lines + //This will move the pointer to the correct source line. + const uint8 *next_source_pointer = source_bits_pointer + ((uint16*)source_bits_pointer)[y]; + + uint8 last_data_line = 0; + + //For every data section in the line + while (!last_data_line) { + const uint8* source_pointer = next_source_pointer; + uint8* dest_pointer = next_dest_pointer; + + int no_pixels = *source_pointer++; + //gap_size is the number of non drawn pixels you require to + //jump over on your destination + uint8 gap_size = *source_pointer++; + //The last bit in no_pixels tells you if you have reached the end of a line + last_data_line = no_pixels & 0x80; + //Clear the last data line bit so we have just the no_pixels + no_pixels &= 0x7f; + //Have our next source pointer point to the next data section + next_source_pointer = source_pointer + no_pixels; + + //Calculates the start point of the image + int x_start = gap_size - source_x_start; + + if (x_start & ~zoom_mask) { + no_pixels -= (x_start&~zoom_mask); + x_start += ~zoom_mask; + source_pointer += (x_start&~zoom_mask); + if (no_pixels <= 0) continue; + } + + if (x_start > 0) { + //Since the start is positive + //We need to move the drawing surface to the correct position + dest_pointer += x_start >> zoom_level; + } else { + //If the start is negative we require to remove part of the image. + //This is done by moving the image pointer to the correct position. + source_pointer -= x_start; + //The no_pixels will be reduced in this operation + no_pixels += x_start; + //If there are no pixels there is nothing to draw this data section + if (no_pixels <= 0) continue; + //Reset the start position to zero as we have taken into account all moves + x_start = 0; + } + + int x_end = x_start + no_pixels; + //If the end position is further out than the whole image + //end position then we need to shorten the line again + if (x_end > width) { + //Shorten the line + no_pixels -= x_end - width; + //If there are no pixels there is nothing to draw. + if (no_pixels <= 0) continue; + } + + //Finally after all those checks, copy the image onto the drawing surface + //If the image type is not a basic one we require to mix the pixels + if (image_type & IMAGE_TYPE_USE_PALETTE) {//In the .exe these are all unraveled loops + for (; no_pixels > 0; no_pixels -= zoom_amount, source_pointer += zoom_amount, dest_pointer++) { + uint8 al = *source_pointer; + uint8 ah = *dest_pointer; + if (image_type & IMAGE_TYPE_MIX_BACKGROUND) + al = palette_pointer[(((uint16)al << 8) | ah) - 0x100]; + else + al = palette_pointer[al]; + *dest_pointer = al; + } + } else if (image_type & IMAGE_TYPE_MIX_BACKGROUND) {//In the .exe these are all unraveled loops + //Doesnt use source pointer ??? mix with background only? + //Not Tested + + for (; no_pixels > 0; no_pixels -= zoom_amount, dest_pointer++) { + uint8 pixel = *dest_pointer; + pixel = palette_pointer[pixel]; + *dest_pointer = pixel; + } + } else + { + if (zoom_amount == 1) { + memcpy(dest_pointer, source_pointer, no_pixels); + } else { + for (; no_pixels > 0; no_pixels -= zoom_amount, source_pointer += zoom_amount, dest_pointer++) { + *dest_pointer = *source_pointer; + } + } + } + } + + //Add a line to the drawing surface pointer + next_dest_pointer += line_width; + } +} + +#define DrawRLESpriteHelper2(image_type, zoom_level) \ + DrawRLESprite2(source_bits_pointer, dest_bits_pointer, palette_pointer, dpi, source_y_start, height, source_x_start, width) + +template +static void __fastcall DrawRLESprite1(const uint8* source_bits_pointer, + uint8* dest_bits_pointer, + const uint8* palette_pointer, + const rct_drawpixelinfo *dpi, + int source_y_start, + int height, + int source_x_start, + int width) +{ + int zoom_level = dpi->zoom_level; + switch (zoom_level) { + case 0: DrawRLESpriteHelper2(image_type, 0); break; + case 1: DrawRLESpriteHelper2(image_type, 1); break; + case 2: DrawRLESpriteHelper2(image_type, 2); break; + case 3: DrawRLESpriteHelper2(image_type, 3); break; + default: assert(false); break; + } +} + +#define DrawRLESpriteHelper1(image_type) \ + DrawRLESprite1(source_bits_pointer, dest_bits_pointer, palette_pointer, dpi, source_y_start, height, source_x_start, width) + +extern "C" +{ + /** + * Transfers readied images onto buffers + * This function copies the sprite data onto the screen + * rct2: 0x0067AA18 + */ + void __fastcall gfx_rle_sprite_to_buffer(const uint8* source_bits_pointer, + uint8* dest_bits_pointer, + const uint8* palette_pointer, + const rct_drawpixelinfo *dpi, + int image_type, + int source_y_start, + int height, + int source_x_start, + int width) + { + if (image_type & IMAGE_TYPE_USE_PALETTE) + { + if (image_type & IMAGE_TYPE_MIX_BACKGROUND) + { + DrawRLESpriteHelper1(IMAGE_TYPE_USE_PALETTE | IMAGE_TYPE_MIX_BACKGROUND); + } + else + { + DrawRLESpriteHelper1(IMAGE_TYPE_USE_PALETTE); + } + } + else if (image_type & IMAGE_TYPE_MIX_BACKGROUND) + { + DrawRLESpriteHelper1(IMAGE_TYPE_MIX_BACKGROUND); + } + else + { + DrawRLESpriteHelper1(0); + } + } +} diff --git a/src/drawing/sprite.c b/src/drawing/sprite.c index d1b52d549f..c4948bfb96 100644 --- a/src/drawing/sprite.c +++ b/src/drawing/sprite.c @@ -142,7 +142,7 @@ void sub_68371D() * image. * rct2: 0x0067A690 */ -void gfx_bmp_sprite_to_buffer(uint8* palette_pointer, uint8* unknown_pointer, uint8* source_pointer, uint8* dest_pointer, rct_g1_element* source_image, rct_drawpixelinfo *dest_dpi, int height, int width, int image_type){ +void __fastcall gfx_bmp_sprite_to_buffer(uint8* palette_pointer, uint8* unknown_pointer, uint8* source_pointer, uint8* dest_pointer, rct_g1_element* source_image, rct_drawpixelinfo *dest_dpi, int height, int width, int image_type){ uint16 zoom_level = dest_dpi->zoom_level; uint8 zoom_amount = 1 << zoom_level; uint32 dest_line_width = (dest_dpi->width / zoom_amount) + dest_dpi->pitch; @@ -270,128 +270,6 @@ void gfx_bmp_sprite_to_buffer(uint8* palette_pointer, uint8* unknown_pointer, ui return; } -/** - * Transfers readied images onto buffers - * This function copies the sprite data onto the screen - * rct2: 0x0067AA18 - */ -void gfx_rle_sprite_to_buffer(const uint8* source_bits_pointer, uint8* dest_bits_pointer, const uint8* palette_pointer, const rct_drawpixelinfo *dpi, int image_type, int source_y_start, int height, int source_x_start, int width){ - int zoom_level = dpi->zoom_level; - int zoom_amount = 1 << zoom_level; - int zoom_mask = 0xFFFFFFFF << zoom_level; - uint8* next_dest_pointer = dest_bits_pointer; - - int line_width = (dpi->width >> zoom_level) + dpi->pitch; - - if (source_y_start < 0){ - source_y_start += zoom_amount; - next_dest_pointer += line_width; - height -= zoom_amount; - } - - //For every line in the image - for (int y = source_y_start; y < (height + source_y_start); y += zoom_amount){ - - //The first part of the source pointer is a list of offsets to different lines - //This will move the pointer to the correct source line. - const uint8 *next_source_pointer = source_bits_pointer + ((uint16*)source_bits_pointer)[y]; - - uint8 last_data_line = 0; - - //For every data section in the line - while (!last_data_line){ - const uint8* source_pointer = next_source_pointer; - uint8* dest_pointer = next_dest_pointer; - - int no_pixels = *source_pointer++; - //gap_size is the number of non drawn pixels you require to - //jump over on your destination - uint8 gap_size = *source_pointer++; - //The last bit in no_pixels tells you if you have reached the end of a line - last_data_line = no_pixels & 0x80; - //Clear the last data line bit so we have just the no_pixels - no_pixels &= 0x7f; - //Have our next source pointer point to the next data section - next_source_pointer = source_pointer + no_pixels; - - //Calculates the start point of the image - int x_start = gap_size - source_x_start; - - if (x_start & ~zoom_mask){ - no_pixels -= (x_start&~zoom_mask); - x_start += ~zoom_mask; - source_pointer += (x_start&~zoom_mask); - if (no_pixels <= 0) continue; - } - - if (x_start > 0){ - //Since the start is positive - //We need to move the drawing surface to the correct position - dest_pointer += x_start >> zoom_level; - } - else{ - //If the start is negative we require to remove part of the image. - //This is done by moving the image pointer to the correct position. - source_pointer -= x_start; - //The no_pixels will be reduced in this operation - no_pixels += x_start; - //If there are no pixels there is nothing to draw this data section - if (no_pixels <= 0) continue; - //Reset the start position to zero as we have taken into account all moves - x_start = 0; - } - - int x_end = x_start + no_pixels; - //If the end position is further out than the whole image - //end position then we need to shorten the line again - if (x_end > width){ - //Shorten the line - no_pixels -= x_end - width; - //If there are no pixels there is nothing to draw. - if (no_pixels <= 0) continue; - } - - //Finally after all those checks, copy the image onto the drawing surface - //If the image type is not a basic one we require to mix the pixels - if (image_type & IMAGE_TYPE_USE_PALETTE){//In the .exe these are all unraveled loops - for (; no_pixels > 0; no_pixels -= zoom_amount, source_pointer += zoom_amount, dest_pointer++){ - uint8 al = *source_pointer; - uint8 ah = *dest_pointer; - if (image_type & IMAGE_TYPE_MIX_BACKGROUND) - al = palette_pointer[(((uint16)al << 8) | ah) - 0x100]; - else - al = palette_pointer[al]; - *dest_pointer = al; - } - } - else if (image_type & IMAGE_TYPE_MIX_BACKGROUND){//In the .exe these are all unraveled loops - //Doesnt use source pointer ??? mix with background only? - //Not Tested - - for (; no_pixels > 0; no_pixels -= zoom_amount, dest_pointer++){ - uint8 pixel = *dest_pointer; - pixel = palette_pointer[pixel]; - *dest_pointer = pixel; - } - } - else - { - if (zoom_amount == 1) { - memcpy(dest_pointer, source_pointer, no_pixels); - } - else { - for (; no_pixels > 0; no_pixels -= zoom_amount, source_pointer += zoom_amount, dest_pointer++) { - *dest_pointer = *source_pointer; - } - } - } - } - - //Add a line to the drawing surface pointer - next_dest_pointer += line_width; - } -} - /** * * rct2: 0x0067A28E @@ -408,7 +286,7 @@ void gfx_rle_sprite_to_buffer(const uint8* source_bits_pointer, uint8* dest_bits * dpi (esi) * tertiary_colour (ebp) */ -void gfx_draw_sprite(rct_drawpixelinfo *dpi, int image_id, int x, int y, uint32 tertiary_colour) +void __fastcall gfx_draw_sprite(rct_drawpixelinfo *dpi, int image_id, int x, int y, uint32 tertiary_colour) { int image_type = (image_id & 0xE0000000) >> 28; int image_sub_type = (image_id & 0x1C000000) >> 26; @@ -488,7 +366,7 @@ void gfx_draw_sprite(rct_drawpixelinfo *dpi, int image_id, int x, int y, uint32 * x (cx) * y (dx) */ -void gfx_draw_sprite_palette_set(rct_drawpixelinfo *dpi, int image_id, int x, int y, uint8* palette_pointer, uint8* unknown_pointer) +void __fastcall gfx_draw_sprite_palette_set(rct_drawpixelinfo *dpi, int image_id, int x, int y, uint8* palette_pointer, uint8* unknown_pointer) { int image_element = image_id & 0x7FFFF; int image_type = (image_id & 0xE0000000) >> 28; From aaf572ab276420f3d5e282451a809c30ff4f3688 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Janiszewski?= Date: Sat, 20 Feb 2016 11:12:00 +0100 Subject: [PATCH 2/6] Fix fastcall for non-MSVC toolchains --- src/common.h | 9 +++++++++ src/drawing/drawing.c | 2 +- src/drawing/drawing.h | 10 +++++----- src/drawing/drawing_fast.cpp | 6 +++--- src/drawing/sprite.c | 6 +++--- 5 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/common.h b/src/common.h index b488f7c8e7..e58c85ae93 100644 --- a/src/common.h +++ b/src/common.h @@ -34,4 +34,13 @@ #endif #define abstract = 0 +#ifdef __GNUC__ +#define FASTCALL __attribute__((fastcall)) +#elif _MSC_VER +#define FASTCALL __fastcall +#else +#pragma message "Not using fastcall calling convention, please check your compiler support" +#define FASTCALL +#endif + #endif diff --git a/src/drawing/drawing.c b/src/drawing/drawing.c index 708633277c..e51c8f4c8d 100644 --- a/src/drawing/drawing.c +++ b/src/drawing/drawing.c @@ -537,7 +537,7 @@ void gfx_draw_pickedup_peep() * * rct2: 0x00681DE2 */ -void __fastcall gfx_draw_sprite_raw_masked(rct_drawpixelinfo *dpi, int x, int y, int maskImage, int colourImage) +void FASTCALL gfx_draw_sprite_raw_masked(rct_drawpixelinfo *dpi, int x, int y, int maskImage, int colourImage) { int left, top, right, bottom, width, height; rct_g1_element *imgMask = &g1Elements[maskImage & 0x7FFFF]; diff --git a/src/drawing/drawing.h b/src/drawing/drawing.h index cac779ff52..5ac68c701a 100644 --- a/src/drawing/drawing.h +++ b/src/drawing/drawing.h @@ -123,11 +123,11 @@ int gfx_load_g2(); void gfx_unload_g1(); void gfx_unload_g2(); void sub_68371D(); -void __fastcall gfx_bmp_sprite_to_buffer(uint8* palette_pointer, uint8* unknown_pointer, uint8* source_pointer, uint8* dest_pointer, rct_g1_element* source_image, rct_drawpixelinfo *dest_dpi, int height, int width, int image_type); -void __fastcall gfx_rle_sprite_to_buffer(const uint8* source_bits_pointer, uint8* dest_bits_pointer, const uint8* palette_pointer, const rct_drawpixelinfo *dpi, int image_type, int source_y_start, int height, int source_x_start, int width); -void __fastcall gfx_draw_sprite(rct_drawpixelinfo *dpi, int image_id, int x, int y, uint32 tertiary_colour); -void __fastcall gfx_draw_sprite_palette_set(rct_drawpixelinfo *dpi, int image_id, int x, int y, uint8* palette_pointer, uint8* unknown_pointer); -void __fastcall gfx_draw_sprite_raw_masked(rct_drawpixelinfo *dpi, int x, int y, int maskImage, int colourImage); +void FASTCALL gfx_bmp_sprite_to_buffer(uint8* palette_pointer, uint8* unknown_pointer, uint8* source_pointer, uint8* dest_pointer, rct_g1_element* source_image, rct_drawpixelinfo *dest_dpi, int height, int width, int image_type); +void FASTCALL gfx_rle_sprite_to_buffer(const uint8* source_bits_pointer, uint8* dest_bits_pointer, const uint8* palette_pointer, const rct_drawpixelinfo *dpi, int image_type, int source_y_start, int height, int source_x_start, int width); +void FASTCALL gfx_draw_sprite(rct_drawpixelinfo *dpi, int image_id, int x, int y, uint32 tertiary_colour); +void FASTCALL gfx_draw_sprite_palette_set(rct_drawpixelinfo *dpi, int image_id, int x, int y, uint8* palette_pointer, uint8* unknown_pointer); +void FASTCALL gfx_draw_sprite_raw_masked(rct_drawpixelinfo *dpi, int x, int y, int maskImage, int colourImage); // string int clip_text(char *buffer, int width); diff --git a/src/drawing/drawing_fast.cpp b/src/drawing/drawing_fast.cpp index 3dadd2b1fb..1d2732e0d6 100644 --- a/src/drawing/drawing_fast.cpp +++ b/src/drawing/drawing_fast.cpp @@ -4,7 +4,7 @@ extern "C" } template -static void __fastcall DrawRLESprite2(const uint8* source_bits_pointer, +static void FASTCALL DrawRLESprite2(const uint8* source_bits_pointer, uint8* dest_bits_pointer, const uint8* palette_pointer, const rct_drawpixelinfo *dpi, @@ -128,7 +128,7 @@ static void __fastcall DrawRLESprite2(const uint8* source_bits_pointer, DrawRLESprite2(source_bits_pointer, dest_bits_pointer, palette_pointer, dpi, source_y_start, height, source_x_start, width) template -static void __fastcall DrawRLESprite1(const uint8* source_bits_pointer, +static void FASTCALL DrawRLESprite1(const uint8* source_bits_pointer, uint8* dest_bits_pointer, const uint8* palette_pointer, const rct_drawpixelinfo *dpi, @@ -157,7 +157,7 @@ extern "C" * This function copies the sprite data onto the screen * rct2: 0x0067AA18 */ - void __fastcall gfx_rle_sprite_to_buffer(const uint8* source_bits_pointer, + void FASTCALL gfx_rle_sprite_to_buffer(const uint8* source_bits_pointer, uint8* dest_bits_pointer, const uint8* palette_pointer, const rct_drawpixelinfo *dpi, diff --git a/src/drawing/sprite.c b/src/drawing/sprite.c index c4948bfb96..f6f2e55c3c 100644 --- a/src/drawing/sprite.c +++ b/src/drawing/sprite.c @@ -142,7 +142,7 @@ void sub_68371D() * image. * rct2: 0x0067A690 */ -void __fastcall gfx_bmp_sprite_to_buffer(uint8* palette_pointer, uint8* unknown_pointer, uint8* source_pointer, uint8* dest_pointer, rct_g1_element* source_image, rct_drawpixelinfo *dest_dpi, int height, int width, int image_type){ +void FASTCALL gfx_bmp_sprite_to_buffer(uint8* palette_pointer, uint8* unknown_pointer, uint8* source_pointer, uint8* dest_pointer, rct_g1_element* source_image, rct_drawpixelinfo *dest_dpi, int height, int width, int image_type){ uint16 zoom_level = dest_dpi->zoom_level; uint8 zoom_amount = 1 << zoom_level; uint32 dest_line_width = (dest_dpi->width / zoom_amount) + dest_dpi->pitch; @@ -286,7 +286,7 @@ void __fastcall gfx_bmp_sprite_to_buffer(uint8* palette_pointer, uint8* unknown_ * dpi (esi) * tertiary_colour (ebp) */ -void __fastcall gfx_draw_sprite(rct_drawpixelinfo *dpi, int image_id, int x, int y, uint32 tertiary_colour) +void FASTCALL gfx_draw_sprite(rct_drawpixelinfo *dpi, int image_id, int x, int y, uint32 tertiary_colour) { int image_type = (image_id & 0xE0000000) >> 28; int image_sub_type = (image_id & 0x1C000000) >> 26; @@ -366,7 +366,7 @@ void __fastcall gfx_draw_sprite(rct_drawpixelinfo *dpi, int image_id, int x, int * x (cx) * y (dx) */ -void __fastcall gfx_draw_sprite_palette_set(rct_drawpixelinfo *dpi, int image_id, int x, int y, uint8* palette_pointer, uint8* unknown_pointer) +void FASTCALL gfx_draw_sprite_palette_set(rct_drawpixelinfo *dpi, int image_id, int x, int y, uint8* palette_pointer, uint8* unknown_pointer) { int image_element = image_id & 0x7FFFF; int image_type = (image_id & 0xE0000000) >> 28; From 14bf53bb306dadde75b6e27a5d28be8a09abe873 Mon Sep 17 00:00:00 2001 From: Marijn van der Werf Date: Sat, 20 Feb 2016 11:52:24 +0100 Subject: [PATCH 3/6] Add drawing_fast.cpp to project --- OpenRCT2.xcodeproj/project.pbxproj | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/OpenRCT2.xcodeproj/project.pbxproj b/OpenRCT2.xcodeproj/project.pbxproj index 70e37754c6..1a85788b3e 100644 --- a/OpenRCT2.xcodeproj/project.pbxproj +++ b/OpenRCT2.xcodeproj/project.pbxproj @@ -7,6 +7,7 @@ objects = { /* Begin PBXBuildFile section */ + C62A08D51C787C2A00F3AA76 /* drawing_fast.cpp in Sources */ = {isa = PBXBuildFile; fileRef = C62A08D41C787C2A00F3AA76 /* drawing_fast.cpp */; }; D41B73EF1C2101890080A7B9 /* libcurl.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = D41B73EE1C2101890080A7B9 /* libcurl.tbd */; }; D41B73F11C21018C0080A7B9 /* libssl.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = D41B73F01C21018C0080A7B9 /* libssl.tbd */; }; D41B741D1C210A7A0080A7B9 /* libiconv.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = D41B741C1C210A7A0080A7B9 /* libiconv.tbd */; }; @@ -220,6 +221,7 @@ /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ + C62A08D41C787C2A00F3AA76 /* drawing_fast.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = drawing_fast.cpp; sourceTree = ""; }; D4163F671C2A044D00B83136 /* version.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = version.h; path = src/version.h; sourceTree = ""; }; D41B73EE1C2101890080A7B9 /* libcurl.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libcurl.tbd; path = usr/lib/libcurl.tbd; sourceTree = SDKROOT; }; D41B73F01C21018C0080A7B9 /* libssl.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libssl.tbd; path = usr/lib/libssl.tbd; sourceTree = SDKROOT; }; @@ -762,6 +764,7 @@ children = ( D4EC46F41C26342F0024B507 /* drawing.c */, D4EC46F51C26342F0024B507 /* drawing.h */, + C62A08D41C787C2A00F3AA76 /* drawing_fast.cpp */, D4EC46F61C26342F0024B507 /* font.c */, D4EC46F71C26342F0024B507 /* font.h */, D4EC46F81C26342F0024B507 /* line.c */, @@ -1465,6 +1468,7 @@ D4B63B951C43028200367A37 /* Console.cpp in Sources */, D4EC481E1C26342F0024B507 /* rct2.c in Sources */, D4C0EDD11C52EA6400A03A06 /* player.c in Sources */, + C62A08D51C787C2A00F3AA76 /* drawing_fast.cpp in Sources */, D4EC48801C26342F0024B507 /* sprite.c in Sources */, D4EC486C1C26342F0024B507 /* top_toolbar.c in Sources */, D4EC48621C26342F0024B507 /* themes.c in Sources */, From 0e42bef94e66b7475eb4cf6c1b34fc824c0c7d13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Janiszewski?= Date: Sat, 20 Feb 2016 17:10:09 +0100 Subject: [PATCH 4/6] Trade control-flow dependency for data-dependency Removes branching in drawing code --- src/drawing/drawing_fast.cpp | 53 +++++++++++++++++------------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/src/drawing/drawing_fast.cpp b/src/drawing/drawing_fast.cpp index 1d2732e0d6..f143950e18 100644 --- a/src/drawing/drawing_fast.cpp +++ b/src/drawing/drawing_fast.cpp @@ -3,6 +3,9 @@ extern "C" #include "drawing.h" } +// This will have 1 for val > 0, 0 otherwise +#define greater_than_zero(val) (((val - 1) >> (sizeof(val) * 8 - 1)) + 1) + template static void FASTCALL DrawRLESprite2(const uint8* source_bits_pointer, uint8* dest_bits_pointer, @@ -24,9 +27,10 @@ static void FASTCALL DrawRLESprite2(const uint8* source_bits_pointer, next_dest_pointer += line_width; height -= zoom_amount; } + const int source_y_end = height + source_y_start; //For every line in the image - for (int y = source_y_start; y < (height + source_y_start); y += zoom_amount) { + for (int y = source_y_start; y < source_y_end; y += zoom_amount) { //The first part of the source pointer is a list of offsets to different lines //This will move the pointer to the correct source line. @@ -52,39 +56,31 @@ static void FASTCALL DrawRLESprite2(const uint8* source_bits_pointer, //Calculates the start point of the image int x_start = gap_size - source_x_start; + const int x_diff = x_start & ~zoom_mask; - if (x_start & ~zoom_mask) { - no_pixels -= (x_start&~zoom_mask); - x_start += ~zoom_mask; - source_pointer += (x_start&~zoom_mask); - if (no_pixels <= 0) continue; - } + no_pixels -= x_diff; + x_start += ~zoom_mask * greater_than_zero(x_diff - 1); + source_pointer += (x_start&~zoom_mask); - if (x_start > 0) { - //Since the start is positive - //We need to move the drawing surface to the correct position - dest_pointer += x_start >> zoom_level; - } else { - //If the start is negative we require to remove part of the image. - //This is done by moving the image pointer to the correct position. - source_pointer -= x_start; - //The no_pixels will be reduced in this operation - no_pixels += x_start; - //If there are no pixels there is nothing to draw this data section - if (no_pixels <= 0) continue; - //Reset the start position to zero as we have taken into account all moves - x_start = 0; - } + // This will have 1 for x_start > 0, 0 otherwise + uint8 sign = greater_than_zero(x_start); + + dest_pointer += (x_start >> zoom_level) * sign; + + //If the start is negative we require to remove part of the image. + //This is done by moving the image pointer to the correct position. + source_pointer -= x_start * (1 - sign); + //The no_pixels will be reduced in this operation + no_pixels += x_start * (1 - sign); + //Reset the start position to zero as we have taken into account all moves + x_start *= sign; int x_end = x_start + no_pixels; //If the end position is further out than the whole image //end position then we need to shorten the line again - if (x_end > width) { - //Shorten the line - no_pixels -= x_end - width; - //If there are no pixels there is nothing to draw. - if (no_pixels <= 0) continue; - } + const int pixels_till_end = x_end - width; + //Shorten the line + no_pixels -= pixels_till_end * greater_than_zero(pixels_till_end); //Finally after all those checks, copy the image onto the drawing surface //If the image type is not a basic one we require to mix the pixels @@ -110,6 +106,7 @@ static void FASTCALL DrawRLESprite2(const uint8* source_bits_pointer, } else { if (zoom_amount == 1) { + no_pixels *= greater_than_zero(no_pixels); memcpy(dest_pointer, source_pointer, no_pixels); } else { for (; no_pixels > 0; no_pixels -= zoom_amount, source_pointer += zoom_amount, dest_pointer++) { From 2744db77d986b0210a39c38d44f6c18b020d7345 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Janiszewski?= Date: Mon, 22 Feb 2016 15:54:46 +0100 Subject: [PATCH 5/6] Prefer masking over multiplication in rendering code --- src/drawing/drawing_fast.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/drawing/drawing_fast.cpp b/src/drawing/drawing_fast.cpp index f143950e18..29a1c6ab9e 100644 --- a/src/drawing/drawing_fast.cpp +++ b/src/drawing/drawing_fast.cpp @@ -3,8 +3,10 @@ extern "C" #include "drawing.h" } -// This will have 1 for val > 0, 0 otherwise -#define greater_than_zero(val) (((val - 1) >> (sizeof(val) * 8 - 1)) + 1) +// This will have -1 (0xffffffff) for (val <= 0), 0 otherwise, so it can act as a mask +// This is expected to generate +// sar $0x1f, %eax +#define less_or_equal_zero_mask(val) (((val - 1) >> (sizeof(val) * 8 - 1))) template static void FASTCALL DrawRLESprite2(const uint8* source_bits_pointer, @@ -59,28 +61,28 @@ static void FASTCALL DrawRLESprite2(const uint8* source_bits_pointer, const int x_diff = x_start & ~zoom_mask; no_pixels -= x_diff; - x_start += ~zoom_mask * greater_than_zero(x_diff - 1); + x_start += ~zoom_mask & ~less_or_equal_zero_mask(x_diff - 1); source_pointer += (x_start&~zoom_mask); // This will have 1 for x_start > 0, 0 otherwise - uint8 sign = greater_than_zero(x_start); + int sign = less_or_equal_zero_mask(x_start); - dest_pointer += (x_start >> zoom_level) * sign; + dest_pointer += (x_start >> zoom_level) & ~sign; //If the start is negative we require to remove part of the image. //This is done by moving the image pointer to the correct position. - source_pointer -= x_start * (1 - sign); + source_pointer -= x_start & sign; //The no_pixels will be reduced in this operation - no_pixels += x_start * (1 - sign); + no_pixels += x_start & sign; //Reset the start position to zero as we have taken into account all moves - x_start *= sign; + x_start &= ~sign; int x_end = x_start + no_pixels; //If the end position is further out than the whole image //end position then we need to shorten the line again const int pixels_till_end = x_end - width; //Shorten the line - no_pixels -= pixels_till_end * greater_than_zero(pixels_till_end); + no_pixels -= pixels_till_end & ~(less_or_equal_zero_mask(pixels_till_end)); //Finally after all those checks, copy the image onto the drawing surface //If the image type is not a basic one we require to mix the pixels @@ -106,7 +108,7 @@ static void FASTCALL DrawRLESprite2(const uint8* source_bits_pointer, } else { if (zoom_amount == 1) { - no_pixels *= greater_than_zero(no_pixels); + no_pixels &= ~less_or_equal_zero_mask(no_pixels); memcpy(dest_pointer, source_pointer, no_pixels); } else { for (; no_pixels > 0; no_pixels -= zoom_amount, source_pointer += zoom_amount, dest_pointer++) { From 60d01a3ddf24f76a96fcc284502ef1c3bc2dcd27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Janiszewski?= Date: Mon, 22 Feb 2016 16:16:35 +0100 Subject: [PATCH 6/6] More optimizations to drawing code --- src/drawing/drawing_fast.cpp | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/drawing/drawing_fast.cpp b/src/drawing/drawing_fast.cpp index 29a1c6ab9e..8490099e48 100644 --- a/src/drawing/drawing_fast.cpp +++ b/src/drawing/drawing_fast.cpp @@ -5,7 +5,7 @@ extern "C" // This will have -1 (0xffffffff) for (val <= 0), 0 otherwise, so it can act as a mask // This is expected to generate -// sar $0x1f, %eax +// sar eax, 0x1f (arithmetic shift right by 31) #define less_or_equal_zero_mask(val) (((val - 1) >> (sizeof(val) * 8 - 1))) template @@ -24,26 +24,27 @@ static void FASTCALL DrawRLESprite2(const uint8* source_bits_pointer, int line_width = (dpi->width >> zoom_level) + dpi->pitch; - if (source_y_start < 0) { - source_y_start += zoom_amount; - next_dest_pointer += line_width; - height -= zoom_amount; - } - const int source_y_end = height + source_y_start; + const int source_y_start_mask = less_or_equal_zero_mask(source_y_start + 1); + source_y_start += zoom_amount & source_y_start_mask; + next_dest_pointer += line_width & source_y_start_mask; + height -= zoom_amount & source_y_start_mask; //For every line in the image - for (int y = source_y_start; y < source_y_end; y += zoom_amount) { + for (int i = 0; i < height; i += zoom_amount) { + int y = source_y_start + i; + uint8 i2 = i >> zoom_level; //The first part of the source pointer is a list of offsets to different lines //This will move the pointer to the correct source line. const uint8 *next_source_pointer = source_bits_pointer + ((uint16*)source_bits_pointer)[y]; + uint8* loop_dest_pointer = next_dest_pointer + line_width * i2; uint8 last_data_line = 0; //For every data section in the line while (!last_data_line) { const uint8* source_pointer = next_source_pointer; - uint8* dest_pointer = next_dest_pointer; + uint8* dest_pointer = loop_dest_pointer; int no_pixels = *source_pointer++; //gap_size is the number of non drawn pixels you require to @@ -59,12 +60,13 @@ static void FASTCALL DrawRLESprite2(const uint8* source_bits_pointer, //Calculates the start point of the image int x_start = gap_size - source_x_start; const int x_diff = x_start & ~zoom_mask; + const int x_mask = ~less_or_equal_zero_mask(x_diff); no_pixels -= x_diff; - x_start += ~zoom_mask & ~less_or_equal_zero_mask(x_diff - 1); - source_pointer += (x_start&~zoom_mask); + x_start += ~zoom_mask & x_mask; + source_pointer += (x_start&~zoom_mask) & x_mask; - // This will have 1 for x_start > 0, 0 otherwise + // This will have -1 (0xffffffff) for (x_start <= 0), 0 otherwise int sign = less_or_equal_zero_mask(x_start); dest_pointer += (x_start >> zoom_level) & ~sign; @@ -117,9 +119,6 @@ static void FASTCALL DrawRLESprite2(const uint8* source_bits_pointer, } } } - - //Add a line to the drawing surface pointer - next_dest_pointer += line_width; } }