Qbasicnews.com
December 12, 2019, 03:57:16 AM *
Welcome, Guest. Please login or register.

Login with username, password and session length
News: Back to Qbasicnews.com | QB Online Help | FAQ | Chat | All Basic Code | QB Knowledge Base
 
   Home   Help Search Login Register  
Pages: 1 [2] 3
  Print  
Author Topic: Who's up for a real challenge?  (Read 17599 times)
Blitz
I hold this place together
*****
Posts: 853



WWW
« Reply #15 on: April 22, 2003, 12:43:18 PM »

i never said i was going to use it, pointless opn 486. Since noone enterd i didn't bother myself either. So i never finished it, doesn't align to vram. But beat this. It's faster then ugl on 486.

Code:

            .model medium, basic            
            .386
            .data
innerloop   word        ?
            .code
           
Blit486     proc    public uses bx di si ds es,\
                    pdst:far ptr, x:word, y:word, psrc:far ptr

            local   dstXres:word, dstYres:word                    
            local   srcXres:word, srcYres:word
           
            ;;
            ;; es:di -> dst
            ;; ds:si -> src
            ;;
            lds     si, psrc
            les     di, pdst            

            cmp     word ptr [pdst+2], 0a000h
            jne     @@sramloop
            mov     innerloop, offset vram_inner
            jmp     @@conta
@@sramloop: mov     innerloop, offset sram_inner
           

@@conta:            
            ;;
            ;; Get destination width and height
            ;;
            mov     dstYres, 200
            mov     dstXres, 320
           
            ;;
            ;; Get source width and height
            ;;
            mov     ax, ds:[si]
            mov     cx, ds:[si+2]
            shr     ax, 3
            mov     srcYres, cx
            mov     srcXres, ax
           
            ;;
            ;; Setup destination adress
            ;;
            mov     ax, y
            mul     dstXres
            add     di, x
            add     di, ax            
            add     si, 4
           
            mov     bx, srcXres
            mov     cx, srcYres
           
            or      bx, bx
            jz      @@exit
            or      cx, cx
            jz      @@exit            
           
           
@@oloop:    call    innerloop
            add     si, srcXres
            add     di, dstXres
            dec     cx
            jnz     @@oloop

@@exit:     ret
Blit486     endp
                   
           

           
           
;;
;; bx -> pixel count
;; ds:si -> source
;; es:di -> destination
;;
sram_inner  proc    near private uses ax bx cx dx di si bp
           
            ;;
            ;; fs -> single loop count
            ;; gs -> quad loop count
            ;;
            xor     ax, ax
            mov     fs, bx            
            mov     gs, ax
            cmp     bx, 8
            jl      @@bloop_s
           
            ;;
            ;; Quad pixels
            ;;            
            mov     bp, bx
            shr     bp, 2
            shl     bp, 2
            mov     gs, bp
           
            ;;
            ;; Single pixels
            ;;
            and     bx, 3
            jz      @@qloop_s
            mov     fs, bx            
           
           
            ;;
            ;; Single pixel loop
            ;;            
@@bloop_s:  mov    bp, fs
            add    si, bp
            add    di, bp
            neg    bp            
           
@@bloop_i:  mov    cl, ds:[si+bp]  ;; 1
            mov    dl, es:[di+bp]  ;; 2

            ;;
            ;; Create mask
            ;;
            cmp    cl, 1           ;; 3
            sbb    al, al          ;; 4
           
            ;;
            ;; Combine pixels
            ;;            
            and    al, dl          ;; 5
            or     al, cl          ;; 6
           
            mov    es:[di+bp], al  ;; 7
           
            ;;
            ;; Pixels left ?
            ;;
            inc    bp              ;; 8
            jnz    @@bloop_i       ;; 8-11
           
           
            ;;
            ;; Quad pixel loop
            ;;
@@qloop_s:  mov    bp, gs
            or     bp, bp
            jz     @@exit
           
            add    si, bp
            add    di, bp
            neg    bp            
           
@@qloop_i:  mov    ecx, ds:[si+bp] ;; 1
            mov    edx, es:[di+bp] ;; 2

            ;; ecx -> source
            ;; edx -> destination            
            mov    ebx, ecx        ;; 3
            shr    ebx, 16         ;; 3-5
           
            ;;
            ;; Create mask
            ;;
            cmp    bl, 1           ;; 6
            sbb    al, al          ;; 7
            cmp    bh, 1           ;; 8
            sbb    ah, ah          ;; 9
            shl    eax, 16         ;; 9-11
            cmp    cl, 1           ;; 12
            sbb    al, al          ;; 13
            cmp    ch, 1           ;; 14
            sbb    ah, ah          ;; 15
           
            ;;
            ;; Combine source and destination
            ;;            
            and    eax, edx        ;; 16
            or     eax, ecx        ;; 17
            mov    es:[di+bp], eax ;; 18 (Hopefully)
           
            ;;
            ;; More pixels?
            ;;
            add    bp, 4           ;; 19
            jnz    @@qloop_i       ;; 19-22
                       
@@exit:     ret
sram_inner  endp


;;
;; bx -> pixel count
;; ds:si -> source
;; es:di -> destination
;;
vram_inner  proc    near private uses ax bx cx di si bp
            jmp     @@begin
vramTB      word    @@cont, @@casea, @@caseb, @@casec

@@begin:    mov     bp, bx
            add     si, bx
            add     di, bx
            neg     bp

@@loop:     mov     ax, ds:[si+bp]

            or      al, al
            setnz   bl
            or      ah, ah
            setnz   bh
           
            shl     bl, 1
            shl     bh, 2
            or      bl, bh
            xor     bh, bh
            jmp     [vramTB+bx]
           
@@casea:    mov     es:[di+bp+0], al
            jmp     @@cont
@@caseb:    mov     es:[di+bp+1], ah
            jmp     @@cont
@@casec:    mov     es:[di+bp+0], ax
           
@@cont:     add     bp, 2
            jnz     @@loop

@@exit:     ret
vram_inner  endp
            end
Logged

oship me and i will give you lots of guurrls and beeea
Blitz
I hold this place together
*****
Posts: 853



WWW
« Reply #16 on: April 22, 2003, 12:53:55 PM »

Oh, and it's.
Code:

        add     si, bx
        add     di, bx
        neg     bx
@@iloop:
        mov     al, ds:[si+bx]
        or      al, al
        jz      @@skip
        mov     es:[di+bx], al
@@skip: inc     bx
        jnz     @@iloop
Logged

oship me and i will give you lots of guurrls and beeea
Blitz
I hold this place together
*****
Posts: 853



WWW
« Reply #17 on: April 25, 2003, 09:12:10 AM »

No comments rel? That's a first Tongue
Logged

oship me and i will give you lots of guurrls and beeea
Agamemnus
x/ \z
*****
Posts: 3491



« Reply #18 on: April 25, 2003, 06:07:41 PM »

I'd post something but this whole thread isn't in English...
Logged

Peace cannot be obtained without war. Why? If there is already peace, it is unnecessary for war. If there is no peace, there is already war."

Visit www.neobasic.net to see rubbish in all its finest.
LooseCaboose
I hold this place together
*****
Posts: 981



« Reply #19 on: April 26, 2003, 02:02:48 AM »

I just glanced at this thread so I could be way off, but wouldnt something like the following be sufficient:

Code:

void blit(void *dst, int x, int y, void *src) {
  int i, width, height;

  /*
   * Work out the height and width and offset
   * dst correctly. Cant be bothered.
   */

  for(i = 0; i < width * height; i++) {
    /* Gives transparent colour = 0 */
    if(*src) {
      /* Copy */
      *dst++ = *src++;
    }
  }
}


Much nicer than programming in assembly, plus its completely portable and should give reasonably good object good if you use a good compiler (gcc -O3).
Logged

esus saves.... Passes to Moses, shoots, he scores!
Blitz
I hold this place together
*****
Posts: 853



WWW
« Reply #20 on: April 26, 2003, 05:02:15 AM »

Well, sure it would work, if yyou changed the loop a little. But that wouldn't give 1/10 the performance of my loops.
Logged

oship me and i will give you lots of guurrls and beeea
relsoft
*/-\*
*****
Posts: 3927



WWW
« Reply #21 on: April 27, 2003, 03:12:13 AM »

Blitz: How much speed difference? Which was faster?  Man, that masking thingy is kool!!!!
Logged

y smiley is 24 bit.


Genso's Junkyard:
http://rel.betterwebber.com/
Blitz
I hold this place together
*****
Posts: 853



WWW
« Reply #22 on: April 27, 2003, 06:07:07 AM »

The sram loop is allot faster then ugls 386 loop. And the vram loop is basically the same.
Logged

oship me and i will give you lots of guurrls and beeea
LooseCaboose
I hold this place together
*****
Posts: 981



« Reply #23 on: April 27, 2003, 11:54:43 PM »

Like I said it would depend on the compiler and the optomizations flags used, but a hand written asm version would probably still beat it, I could make it slightly faster with register keywords on the variables and some more tidying.

I just like the C version better, because its far more portable and flexible. Coding graphics in asm is okay for a hobby, but most new graphics cards have hardware blitters (and most other functions too) so learning how to write one in asm for a 486 does seem kind of pointless to me, however as always you have churned out some truely impressive code :lol:
Logged

esus saves.... Passes to Moses, shoots, he scores!
Blitz
I hold this place together
*****
Posts: 853



WWW
« Reply #24 on: April 30, 2003, 05:08:42 AM »

Good asm code is an art. And in the game industry, it always has been and will be very important.
Logged

oship me and i will give you lots of guurrls and beeea
Blitz
I hold this place together
*****
Posts: 853



WWW
« Reply #25 on: April 30, 2003, 05:18:14 AM »

Also, knowing asm is very useful even when using high level languages. You'd think compilers generate stable code, but that's not always the case with for instance watcom or borland. By dissasembling it i can see exactly what it does and find out where the bug is.

Or when i'm trying to get a certain loop to run as fast as possible, i usally tinker with the loop and dissassemble it until i find a code the makes the compiler generate as good code as it can (which always suxs pretty much).

Knowing asm makes a better high level coder as well, since you know how the computer works you know how to code optimal code.

So i don't agree, it will always be important. There's a huge difference between knowing asm and being good at it. DQB is the perfect example of why some people should just stick to compilers.
Logged

oship me and i will give you lots of guurrls and beeea
Blitz
I hold this place together
*****
Posts: 853



WWW
« Reply #26 on: May 20, 2005, 06:19:43 AM »

So who won?
Logged

oship me and i will give you lots of guurrls and beeea
relsoft
*/-\*
*****
Posts: 3927



WWW
« Reply #27 on: May 20, 2005, 07:42:56 AM »

Digs up his ancient plaque and gives it to Blitz....
 Cheesy
Logged

y smiley is 24 bit.


Genso's Junkyard:
http://rel.betterwebber.com/
Blitz
I hold this place together
*****
Posts: 853



WWW
« Reply #28 on: May 20, 2005, 08:33:59 AM »

Looking through old posts, this place was much more fun 2 years ago.
Logged

oship me and i will give you lots of guurrls and beeea
KiZ
__/--\__
*****
Posts: 2879


WWW
« Reply #29 on: May 20, 2005, 05:47:23 PM »

Get over it already.

Most of the pros left.

Most of the new pros are into non-demo related stuff. Its just the way it develops. Demo, GFX and game programming arent real big anymore. There is no point in creating high speed blitters or any of that stuff, it would be reinventing the wheel.

Stop bitching because you feel nostalgic looking at this thread.

(Bitching might have been to harsh a word, sorry.)
Logged
Pages: 1 [2] 3
  Print  
 
Jump to:  

Powered by MySQL Powered by PHP Powered by SMF 1.1.21 | SMF © 2015, Simple Machines Valid XHTML 1.0! Valid CSS!