rebase master update
@@ -7,6 +7,7 @@ all:
|
||||
$(MAKE) -C printf
|
||||
$(MAKE) -C diverge
|
||||
$(MAKE) -C fence
|
||||
$(MAKE) -C tex
|
||||
|
||||
run-simx:
|
||||
$(MAKE) -C basic run-simx
|
||||
@@ -17,6 +18,7 @@ run-simx:
|
||||
$(MAKE) -C printf run-simx
|
||||
$(MAKE) -C diverge run-simx
|
||||
$(MAKE) -C fence run-simx
|
||||
$(MAKE) -C tex run-simx
|
||||
|
||||
run-vlsim:
|
||||
$(MAKE) -C basic run-vlsim
|
||||
@@ -27,6 +29,7 @@ run-vlsim:
|
||||
$(MAKE) -C printf run-vlsim
|
||||
$(MAKE) -C diverge run-vlsim
|
||||
$(MAKE) -C fence run-vlsim
|
||||
$(MAKE) -C tex run-vlsim
|
||||
|
||||
clean:
|
||||
$(MAKE) -C basic clean
|
||||
@@ -37,6 +40,7 @@ clean:
|
||||
$(MAKE) -C printf clean
|
||||
$(MAKE) -C diverge clean
|
||||
$(MAKE) -C fence clean
|
||||
$(MAKE) -C tex clean
|
||||
|
||||
clean-all:
|
||||
$(MAKE) -C basic clean-all
|
||||
@@ -47,4 +51,5 @@ clean-all:
|
||||
$(MAKE) -C printf clean-all
|
||||
$(MAKE) -C diverge clean-all
|
||||
$(MAKE) -C fence clean-all
|
||||
$(MAKE) -C tex clean-all
|
||||
|
||||
|
||||
70
tests/regression/tex/Makefile
Normal file
@@ -0,0 +1,70 @@
|
||||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
|
||||
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
|
||||
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
|
||||
OPTS ?= -f1
|
||||
|
||||
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
||||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
||||
VX_SRCS = kernel.c
|
||||
|
||||
#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
||||
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I$(VORTEX_DRV_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(VORTEX_DRV_PATH)/stub -lvortex
|
||||
|
||||
PROJECT = tex
|
||||
|
||||
SRCS = main.cpp utils.cpp
|
||||
|
||||
all: $(PROJECT) kernel.bin kernel.dump
|
||||
|
||||
kernel.dump: kernel.elf
|
||||
$(VX_DP) -D kernel.elf > kernel.dump
|
||||
|
||||
kernel.bin: kernel.elf
|
||||
$(VX_CP) -O binary kernel.elf kernel.bin
|
||||
|
||||
kernel.elf: $(VX_SRCS)
|
||||
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
run-simx: $(PROJECT) kernel.bin
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-fpga: $(PROJECT) kernel.bin
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-asesim: $(PROJECT) kernel.bin
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-vlsim: $(PROJECT) kernel.bin
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-rtlsim: $(PROJECT) kernel.bin
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o .depend
|
||||
|
||||
clean-all: clean
|
||||
rm -rf *.elf *.bin *.dump
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
||||
260
tests/regression/tex/blitter.h
Normal file
@@ -0,0 +1,260 @@
|
||||
#include "format.h"
|
||||
|
||||
struct SurfaceDesc {
|
||||
ePixelFormat Format;
|
||||
uint8_t *pBits;
|
||||
uint32_t Width;
|
||||
uint32_t Height;
|
||||
uint32_t Pitch;
|
||||
};
|
||||
|
||||
class BlitTable {
|
||||
public:
|
||||
typedef int (*PfnCopy)(const SurfaceDesc &dstDesc,
|
||||
uint32_t dstOffsetX,
|
||||
uint32_t dstOffsetY,
|
||||
uint32_t copyWidth,
|
||||
uint32_t copyHeight,
|
||||
const SurfaceDesc &srcDesc,
|
||||
uint32_t srcOffsetX,
|
||||
uint32_t srcOffsetY);
|
||||
|
||||
BlitTable() {
|
||||
for (uint32_t s = 0; s < FORMAT_COLOR_SIZE_; ++s) {
|
||||
for (uint32_t d = 0; d < FORMAT_COLOR_SIZE_; ++d) {
|
||||
copyFuncs_[s][d] = CopyInvalid;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t s = 0; s < FORMAT_COLOR_SIZE_; ++s) {
|
||||
switch (s) {
|
||||
case FORMAT_A8:
|
||||
case FORMAT_L8:
|
||||
copyFuncs_[s][s] = CopyFast<uint8_t>;
|
||||
break;
|
||||
|
||||
case FORMAT_A8L8:
|
||||
copyFuncs_[FORMAT_A8L8][FORMAT_A8] = Copy<FORMAT_A8L8, FORMAT_A8>;
|
||||
copyFuncs_[FORMAT_A8L8][FORMAT_A8L8] = CopyFast<uint16_t>;
|
||||
break;
|
||||
|
||||
case FORMAT_R5G6B5:
|
||||
copyFuncs_[FORMAT_R5G6B5][FORMAT_L8] = Copy<FORMAT_R5G6B5, FORMAT_L8>;
|
||||
copyFuncs_[FORMAT_R5G6B5][FORMAT_R5G6B5] = CopyFast<uint16_t>;
|
||||
copyFuncs_[FORMAT_R5G6B5][FORMAT_R8G8B8] =
|
||||
Copy<FORMAT_R5G6B5, FORMAT_R8G8B8>;
|
||||
copyFuncs_[FORMAT_R5G6B5][FORMAT_B8G8R8] =
|
||||
Copy<FORMAT_R5G6B5, FORMAT_B8G8R8>;
|
||||
copyFuncs_[FORMAT_R5G6B5][FORMAT_A8B8G8R8] =
|
||||
Copy<FORMAT_R5G6B5, FORMAT_A8B8G8R8>;
|
||||
copyFuncs_[FORMAT_R5G6B5][FORMAT_A8R8G8B8] =
|
||||
Copy<FORMAT_R5G6B5, FORMAT_A8R8G8B8>;
|
||||
break;
|
||||
|
||||
case FORMAT_A1R5G5B5:
|
||||
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_A8] =
|
||||
Copy<FORMAT_A1R5G5B5, FORMAT_A8>;
|
||||
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_L8] =
|
||||
Copy<FORMAT_A1R5G5B5, FORMAT_L8>;
|
||||
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_A8L8] =
|
||||
Copy<FORMAT_A1R5G5B5, FORMAT_A8L8>;
|
||||
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_R8G8B8] =
|
||||
Copy<FORMAT_A1R5G5B5, FORMAT_R8G8B8>;
|
||||
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_A8R8G8B8] =
|
||||
Copy<FORMAT_A1R5G5B5, FORMAT_A8R8G8B8>;
|
||||
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_R5G5B5A1] =
|
||||
Copy<FORMAT_A1R5G5B5, FORMAT_R5G5B5A1>;
|
||||
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_R4G4B4A4] =
|
||||
Copy<FORMAT_A1R5G5B5, FORMAT_R4G4B4A4>;
|
||||
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_B8G8R8] =
|
||||
Copy<FORMAT_A1R5G5B5, FORMAT_B8G8R8>;
|
||||
copyFuncs_[FORMAT_A1R5G5B5][FORMAT_A8B8G8R8] =
|
||||
Copy<FORMAT_A1R5G5B5, FORMAT_A8B8G8R8>;
|
||||
break;
|
||||
|
||||
case FORMAT_A4R4G4B4:
|
||||
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_A8] =
|
||||
Copy<FORMAT_A4R4G4B4, FORMAT_A8>;
|
||||
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_L8] =
|
||||
Copy<FORMAT_A4R4G4B4, FORMAT_L8>;
|
||||
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_A8L8] =
|
||||
Copy<FORMAT_A4R4G4B4, FORMAT_A8L8>;
|
||||
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_R8G8B8] =
|
||||
Copy<FORMAT_A4R4G4B4, FORMAT_R8G8B8>;
|
||||
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_A8R8G8B8] =
|
||||
Copy<FORMAT_A4R4G4B4, FORMAT_A8R8G8B8>;
|
||||
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_R5G5B5A1] =
|
||||
Copy<FORMAT_A4R4G4B4, FORMAT_R5G5B5A1>;
|
||||
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_R4G4B4A4] =
|
||||
Copy<FORMAT_A4R4G4B4, FORMAT_R4G4B4A4>;
|
||||
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_B8G8R8] =
|
||||
Copy<FORMAT_A4R4G4B4, FORMAT_B8G8R8>;
|
||||
copyFuncs_[FORMAT_A4R4G4B4][FORMAT_A8B8G8R8] =
|
||||
Copy<FORMAT_A4R4G4B4, FORMAT_A8B8G8R8>;
|
||||
break;
|
||||
|
||||
case FORMAT_R8G8B8:
|
||||
copyFuncs_[FORMAT_R8G8B8][FORMAT_L8] = Copy<FORMAT_R8G8B8, FORMAT_L8>;
|
||||
copyFuncs_[FORMAT_R8G8B8][FORMAT_R5G6B5] =
|
||||
Copy<FORMAT_R8G8B8, FORMAT_R5G6B5>;
|
||||
copyFuncs_[FORMAT_R8G8B8][FORMAT_R8G8B8] = CopyFast<uint24_t>;
|
||||
copyFuncs_[FORMAT_R8G8B8][FORMAT_B8G8R8] =
|
||||
Copy<FORMAT_R8G8B8, FORMAT_B8G8R8>;
|
||||
copyFuncs_[FORMAT_R8G8B8][FORMAT_A8B8G8R8] =
|
||||
Copy<FORMAT_R8G8B8, FORMAT_A8B8G8R8>;
|
||||
copyFuncs_[FORMAT_R8G8B8][FORMAT_A8R8G8B8] =
|
||||
Copy<FORMAT_R8G8B8, FORMAT_A8R8G8B8>;
|
||||
break;
|
||||
|
||||
case FORMAT_A8R8G8B8:
|
||||
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_A8] =
|
||||
Copy<FORMAT_A8R8G8B8, FORMAT_A8>;
|
||||
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_L8] =
|
||||
Copy<FORMAT_A8R8G8B8, FORMAT_L8>;
|
||||
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_A8L8] =
|
||||
Copy<FORMAT_A8R8G8B8, FORMAT_A8L8>;
|
||||
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_R5G6B5] =
|
||||
Copy<FORMAT_A8R8G8B8, FORMAT_R5G6B5>;
|
||||
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_R8G8B8] =
|
||||
Copy<FORMAT_A8R8G8B8, FORMAT_R8G8B8>;
|
||||
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_A8R8G8B8] = CopyFast<uint32_t>;
|
||||
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_R5G5B5A1] =
|
||||
Copy<FORMAT_A8R8G8B8, FORMAT_R5G5B5A1>;
|
||||
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_R4G4B4A4] =
|
||||
Copy<FORMAT_A8R8G8B8, FORMAT_R4G4B4A4>;
|
||||
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_B8G8R8] =
|
||||
Copy<FORMAT_A8R8G8B8, FORMAT_B8G8R8>;
|
||||
copyFuncs_[FORMAT_A8R8G8B8][FORMAT_A8B8G8R8] =
|
||||
Copy<FORMAT_A8R8G8B8, FORMAT_A8B8G8R8>;
|
||||
break;
|
||||
|
||||
case FORMAT_R5G5B5A1:
|
||||
copyFuncs_[FORMAT_R5G5B5A1][FORMAT_A8] =
|
||||
Copy<FORMAT_R5G5B5A1, FORMAT_A8>;
|
||||
copyFuncs_[FORMAT_R5G5B5A1][FORMAT_L8] =
|
||||
Copy<FORMAT_R5G5B5A1, FORMAT_L8>;
|
||||
copyFuncs_[FORMAT_R5G5B5A1][FORMAT_A8L8] =
|
||||
Copy<FORMAT_R5G5B5A1, FORMAT_A8L8>;
|
||||
copyFuncs_[FORMAT_R5G5B5A1][FORMAT_RGB] =
|
||||
Copy<FORMAT_R5G5B5A1, FORMAT_RGB>;
|
||||
copyFuncs_[FORMAT_R5G5B5A1][FORMAT_ARGB] =
|
||||
Copy<FORMAT_R5G5B5A1, FORMAT_ARGB>;
|
||||
break;
|
||||
|
||||
case FORMAT_R4G4B4A4:
|
||||
copyFuncs_[FORMAT_R4G4B4A4][FORMAT_A8] =
|
||||
Copy<FORMAT_R4G4B4A4, FORMAT_A8>;
|
||||
copyFuncs_[FORMAT_R4G4B4A4][FORMAT_L8] =
|
||||
Copy<FORMAT_R4G4B4A4, FORMAT_L8>;
|
||||
copyFuncs_[FORMAT_R4G4B4A4][FORMAT_A8L8] =
|
||||
Copy<FORMAT_R4G4B4A4, FORMAT_A8L8>;
|
||||
copyFuncs_[FORMAT_R4G4B4A4][FORMAT_RGB] =
|
||||
Copy<FORMAT_R4G4B4A4, FORMAT_RGB>;
|
||||
copyFuncs_[FORMAT_R4G4B4A4][FORMAT_ARGB] =
|
||||
Copy<FORMAT_R4G4B4A4, FORMAT_ARGB>;
|
||||
break;
|
||||
|
||||
case FORMAT_B8G8R8:
|
||||
copyFuncs_[FORMAT_B8G8R8][FORMAT_L8] = Copy<FORMAT_B8G8R8, FORMAT_L8>;
|
||||
copyFuncs_[FORMAT_B8G8R8][FORMAT_RGB] = Copy<FORMAT_B8G8R8, FORMAT_RGB>;
|
||||
break;
|
||||
|
||||
case FORMAT_A8B8G8R8:
|
||||
copyFuncs_[FORMAT_A8B8G8R8][FORMAT_A8] =
|
||||
Copy<FORMAT_A8B8G8R8, FORMAT_A8>;
|
||||
copyFuncs_[FORMAT_A8B8G8R8][FORMAT_L8] =
|
||||
Copy<FORMAT_A8B8G8R8, FORMAT_L8>;
|
||||
copyFuncs_[FORMAT_A8B8G8R8][FORMAT_A8L8] =
|
||||
Copy<FORMAT_A8B8G8R8, FORMAT_A8L8>;
|
||||
copyFuncs_[FORMAT_A8B8G8R8][FORMAT_RGB] =
|
||||
Copy<FORMAT_A8B8G8R8, FORMAT_RGB>;
|
||||
copyFuncs_[FORMAT_A8B8G8R8][FORMAT_ARGB] =
|
||||
Copy<FORMAT_A8B8G8R8, FORMAT_ARGB>;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PfnCopy get(uint32_t srcFormat, uint32_t dstFormat) const {
|
||||
assert(srcFormat < FORMAT_COLOR_SIZE_);
|
||||
assert(dstFormat < FORMAT_COLOR_SIZE_);
|
||||
return copyFuncs_[srcFormat][dstFormat];
|
||||
}
|
||||
|
||||
private:
|
||||
template <ePixelFormat SrcFormat, ePixelFormat DstFormat>
|
||||
static int Copy(const SurfaceDesc &dstDesc,
|
||||
uint32_t dstOffsetX,
|
||||
uint32_t dstOffsetY,
|
||||
uint32_t copyWidth,
|
||||
uint32_t copyHeight,
|
||||
const SurfaceDesc &srcDesc,
|
||||
uint32_t srcOffsetX,
|
||||
uint32_t srcOffsetY) {
|
||||
auto srcBPP = TFormatInfo<SrcFormat>::CBSIZE;
|
||||
auto dstBPP = TFormatInfo<DstFormat>::CBSIZE;
|
||||
auto srcNextLine = srcDesc.Pitch;
|
||||
auto dstNextLine = dstDesc.Pitch;
|
||||
|
||||
auto pbSrc = srcDesc.pBits + srcOffsetX * srcBPP + srcOffsetY * srcDesc.Pitch;
|
||||
auto pbDst = dstDesc.pBits + dstOffsetX * dstBPP + dstOffsetY * dstDesc.Pitch;
|
||||
|
||||
while (copyHeight--) {
|
||||
auto pSrc = reinterpret_cast<const typename TFormatInfo<SrcFormat>::TYPE *>(pbSrc);
|
||||
for (auto *pDst = reinterpret_cast<typename TFormatInfo<DstFormat>::TYPE *>(
|
||||
pbDst),
|
||||
*const pEnd = pDst + copyWidth;
|
||||
pDst != pEnd; ++pDst, ++pSrc) {
|
||||
auto tmp = Format::ConvertFrom<SrcFormat, true>(pSrc);
|
||||
Format::ConvertTo<DstFormat>(pDst, tmp);
|
||||
}
|
||||
|
||||
pbSrc += srcNextLine;
|
||||
pbDst += dstNextLine;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
static int CopyFast(const SurfaceDesc &dstDesc,
|
||||
uint32_t dstOffsetX,
|
||||
uint32_t dstOffsetY,
|
||||
uint32_t copyWidth,
|
||||
uint32_t copyHeight,
|
||||
const SurfaceDesc &srcDesc,
|
||||
uint32_t srcOffsetX,
|
||||
uint32_t srcOffsetY) {
|
||||
auto nBPP = sizeof(Type);
|
||||
auto srcNextLine = srcDesc.Pitch;
|
||||
auto dstNextLine = dstDesc.Pitch;
|
||||
|
||||
auto pbSrc = srcDesc.pBits + srcOffsetX * nBPP + srcOffsetY * srcDesc.Pitch;
|
||||
auto pbDst = dstDesc.pBits + dstOffsetX * nBPP + dstOffsetY * dstDesc.Pitch;
|
||||
|
||||
while (copyHeight--) {
|
||||
auto pSrc = reinterpret_cast<const Type *>(pbSrc);
|
||||
for (auto *pDst = reinterpret_cast<Type *>(pbDst), *const pEnd = pDst + copyWidth;
|
||||
pDst != pEnd; ++pDst, ++pSrc) {
|
||||
*pDst = *pSrc;
|
||||
}
|
||||
pbSrc += srcNextLine;
|
||||
pbDst += dstNextLine;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int CopyInvalid(const SurfaceDesc & /*dstDesc*/,
|
||||
uint32_t /*dstOffsetX*/,
|
||||
uint32_t /*dstOffsetY*/,
|
||||
uint32_t /*copyWidth*/,
|
||||
uint32_t /*copyHeight*/,
|
||||
const SurfaceDesc & /*srcDesc*/,
|
||||
uint32_t /*srcOffsetX*/,
|
||||
uint32_t /*srcOffsetY*/)
|
||||
{
|
||||
std::cout << "Error: invalid format" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
PfnCopy copyFuncs_[FORMAT_COLOR_SIZE_][FORMAT_COLOR_SIZE_];
|
||||
};
|
||||
68
tests/regression/tex/color.h
Normal file
@@ -0,0 +1,68 @@
|
||||
//
|
||||
// Copyright (c) Blaise Tine. All rights reserved.
|
||||
//
|
||||
//
|
||||
// Use of this sample source code is subject to the terms of the Microsoft
|
||||
// license agreement under which you licensed this sample source code. If
|
||||
// you did not accept the terms of the license agreement, you are not
|
||||
// authorized to use this sample source code. For the terms of the license,
|
||||
// please see the license agreement between you and Microsoft or, if applicable,
|
||||
// see the LICENSE.RTF on your install media or the root of your tools
|
||||
// installation.
|
||||
// THE SAMPLE SOURCE CODE IS PROVIDED "AS IS", WITH NO WARRANTIES OR
|
||||
// INDEMNITIES.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <assert.h>
|
||||
|
||||
struct ColorARGB {
|
||||
union {
|
||||
struct {
|
||||
uint32_t value;
|
||||
};
|
||||
struct {
|
||||
uint8_t b, g, r, a;
|
||||
};
|
||||
struct {
|
||||
uint8_t m[4];
|
||||
};
|
||||
};
|
||||
|
||||
ColorARGB() {}
|
||||
|
||||
ColorARGB(int a, int r, int g, int b) {
|
||||
assert((a >= 0) && (a <= 0xff));
|
||||
assert((r >= 0) && (r <= 0xff));
|
||||
assert((g >= 0) && (g <= 0xff));
|
||||
assert((b >= 0) && (b <= 0xff));
|
||||
|
||||
this->b = static_cast<uint8_t>(b);
|
||||
this->g = static_cast<uint8_t>(g);
|
||||
this->r = static_cast<uint8_t>(r);
|
||||
this->a = static_cast<uint8_t>(a);
|
||||
}
|
||||
|
||||
ColorARGB(int r, int g, int b) {
|
||||
assert((r >= 0) && (r <= 0xff));
|
||||
assert((g >= 0) && (g <= 0xff));
|
||||
assert((b >= 0) && (b <= 0xff));
|
||||
|
||||
this->b = static_cast<uint8_t>(b);
|
||||
this->g = static_cast<uint8_t>(g);
|
||||
this->r = static_cast<uint8_t>(r);
|
||||
}
|
||||
|
||||
ColorARGB(int value) {
|
||||
this->value = value;
|
||||
}
|
||||
|
||||
void operator=(const ColorARGB &rhs) {
|
||||
this->value = rhs.value;
|
||||
}
|
||||
|
||||
operator uint32_t() const {
|
||||
return this->value;
|
||||
}
|
||||
};
|
||||
25
tests/regression/tex/common.h
Normal file
@@ -0,0 +1,25 @@
|
||||
#ifndef _COMMON_H_
|
||||
#define _COMMON_H_
|
||||
|
||||
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
|
||||
|
||||
struct kernel_arg_t {
|
||||
uint32_t num_tasks;
|
||||
uint8_t format;
|
||||
uint8_t filter;
|
||||
uint8_t wrap;
|
||||
uint8_t use_sw;
|
||||
uint32_t lod;
|
||||
uint8_t src_logWidth;
|
||||
uint8_t src_logHeight;
|
||||
uint8_t src_stride;
|
||||
uint8_t src_pitch;
|
||||
uint32_t src_ptr;
|
||||
uint32_t dst_width;
|
||||
uint32_t dst_height;
|
||||
uint8_t dst_stride;
|
||||
uint32_t dst_pitch;
|
||||
uint32_t dst_ptr;
|
||||
};
|
||||
|
||||
#endif
|
||||
BIN
tests/regression/tex/earth.tga
Normal file
|
After Width: | Height: | Size: 48 MiB |
BIN
tests/regression/tex/flower.tga
Normal file
|
After Width: | Height: | Size: 16 MiB |
BIN
tests/regression/tex/football.tga
Normal file
|
After Width: | Height: | Size: 12 KiB |
1022
tests/regression/tex/format.h
Normal file
37
tests/regression/tex/int24.h
Normal file
@@ -0,0 +1,37 @@
|
||||
//
|
||||
// Copyright (c) Blaise Tine. All rights reserved.
|
||||
//
|
||||
//
|
||||
// Use of this sample source code is subject to the terms of the Microsoft
|
||||
// license agreement under which you licensed this sample source code. If
|
||||
// you did not accept the terms of the license agreement, you are not
|
||||
// authorized to use this sample source code. For the terms of the license,
|
||||
// please see the license agreement between you and Microsoft or, if applicable,
|
||||
// see the LICENSE.RTF on your install media or the root of your tools
|
||||
// installation.
|
||||
// THE SAMPLE SOURCE CODE IS PROVIDED "AS IS", WITH NO WARRANTIES OR
|
||||
// INDEMNITIES.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
struct uint24_t {
|
||||
uint8_t m[3];
|
||||
|
||||
explicit uint24_t(uint32_t value) {
|
||||
m[0] = (value >> 0) & 0xff;
|
||||
m[1] = (value >> 8) & 0xff;
|
||||
m[2] = (value >> 16) & 0xff;
|
||||
}
|
||||
|
||||
explicit uint24_t(uint8_t x, uint8_t y, uint8_t z) {
|
||||
m[0] = x;
|
||||
m[1] = y;
|
||||
m[2] = z;
|
||||
}
|
||||
|
||||
operator uint32_t() const {
|
||||
return (m[2] << 16) | (m[1] << 8) | m[0];
|
||||
}
|
||||
};
|
||||
BIN
tests/regression/tex/kernel.bin
Executable file
67
tests/regression/tex/kernel.c
Normal file
@@ -0,0 +1,67 @@
|
||||
#include <stdint.h>
|
||||
#include <vx_intrinsics.h>
|
||||
#include "common.h"
|
||||
#include "texsw.h"
|
||||
|
||||
#define ENABLE_SW
|
||||
|
||||
struct tile_arg_t {
|
||||
struct kernel_arg_t* state;
|
||||
uint32_t tile_width;
|
||||
uint32_t tile_height;
|
||||
float deltaX;
|
||||
float deltaY;
|
||||
};
|
||||
|
||||
void kernel_body(int task_id, void* arg) {
|
||||
struct tile_arg_t* _arg = (struct tile_arg_t*)(arg);
|
||||
struct kernel_arg_t* state = _arg->state;
|
||||
|
||||
uint32_t xoffset = 0;
|
||||
uint32_t yoffset = task_id * _arg->tile_height;
|
||||
uint8_t* dst_ptr = (uint8_t*)(state->dst_ptr + xoffset * state->dst_stride + yoffset * state->dst_pitch);
|
||||
|
||||
float fv = yoffset * _arg->deltaY;
|
||||
for (uint32_t y = 0; y < _arg->tile_height; ++y) {
|
||||
uint32_t* dst_row = (uint32_t*)dst_ptr;
|
||||
float fu = xoffset * _arg->deltaX;
|
||||
for (uint32_t x = 0; x < _arg->tile_width; ++x) {
|
||||
int32_t u = (int32_t)(fu * (1<<20));
|
||||
int32_t v = (int32_t)(fv * (1<<20));
|
||||
#ifdef ENABLE_SW
|
||||
if (state->use_sw) {
|
||||
dst_row[x] = (state->filter == 2) ? tex3_sw(state, 0, u, v, state->lod) : tex_sw(state, 0, u, v, state->lod);
|
||||
} else {
|
||||
#endif
|
||||
dst_row[x] = (state->filter == 2) ? vx_tex3(0, u, v, state->lod) : vx_tex(0, u, v, state->lod);
|
||||
#ifdef ENABLE_SW
|
||||
}
|
||||
#endif
|
||||
fu += _arg->deltaX;
|
||||
}
|
||||
dst_ptr += state->dst_pitch;
|
||||
fv += _arg->deltaY;
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
|
||||
// configure texture unit
|
||||
vx_csr_write(CSR_TEX_ADDR(0), arg->src_ptr);
|
||||
vx_csr_write(CSR_TEX_MIPOFF(0), 0);
|
||||
vx_csr_write(CSR_TEX_WIDTH(0), arg->src_logWidth);
|
||||
vx_csr_write(CSR_TEX_HEIGHT(0), arg->src_logHeight);
|
||||
vx_csr_write(CSR_TEX_FORMAT(0), arg->format);
|
||||
vx_csr_write(CSR_TEX_WRAP(0), (arg->wrap << 2) | arg->wrap);
|
||||
vx_csr_write(CSR_TEX_FILTER(0), (arg->filter ? 1 : 0));
|
||||
|
||||
struct tile_arg_t targ;
|
||||
targ.state = arg;
|
||||
targ.tile_width = arg->dst_width;
|
||||
targ.tile_height = (arg->dst_height + arg->num_tasks - 1) / arg->num_tasks;
|
||||
targ.deltaX = 1.0f / arg->dst_width;
|
||||
targ.deltaY = 1.0f / arg->dst_height;
|
||||
|
||||
vx_spawn_tasks(arg->num_tasks, kernel_body, &targ);
|
||||
}
|
||||
1514
tests/regression/tex/kernel.dump
Normal file
BIN
tests/regression/tex/kernel.elf
Executable file
260
tests/regression/tex/main.cpp
Normal file
@@ -0,0 +1,260 @@
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <assert.h>
|
||||
#include <vortex.h>
|
||||
#include "common.h"
|
||||
#include "utils.h"
|
||||
|
||||
#define RT_CHECK(_expr) \
|
||||
do { \
|
||||
int _ret = _expr; \
|
||||
if (0 == _ret) \
|
||||
break; \
|
||||
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} while (false)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
const char* kernel_file = "kernel.bin";
|
||||
const char* input_file = "palette64.tga";
|
||||
const char* output_file = "output.tga";
|
||||
int wrap = 0;
|
||||
int filter = 0;
|
||||
float scale = 1.0f;
|
||||
int format = 0;
|
||||
bool use_sw = false;
|
||||
ePixelFormat eformat = FORMAT_A8R8G8B8;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h buffer = nullptr;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Texture Test." << std::endl;
|
||||
std::cout << "Usage: [-k: kernel] [-i image] [-o image] [-s scale] [-w wrap] [-f format] [-g filter] [-z no_hw] [-h: help]" << std::endl;
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "zi:o:k:w:f:g:h?")) != -1) {
|
||||
switch (c) {
|
||||
case 'i':
|
||||
input_file = optarg;
|
||||
break;
|
||||
case 'o':
|
||||
output_file = optarg;
|
||||
break;
|
||||
case 's':
|
||||
scale = std::stof(optarg, NULL);
|
||||
break;
|
||||
case 'w':
|
||||
wrap = std::atoi(optarg);
|
||||
break;
|
||||
case 'z':
|
||||
use_sw = true;
|
||||
break;
|
||||
case 'f': {
|
||||
format = std::atoi(optarg);
|
||||
switch (format) {
|
||||
case 0: eformat = FORMAT_A8R8G8B8; break;
|
||||
case 1: eformat = FORMAT_R5G6B5; break;
|
||||
case 2: eformat = FORMAT_R4G4B4A4; break;
|
||||
case 3: eformat = FORMAT_L8; break;
|
||||
case 4: eformat = FORMAT_A8; break;
|
||||
default:
|
||||
std::cout << "Error: invalid format: " << format << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
} break;
|
||||
case 'g':
|
||||
filter = std::atoi(optarg);
|
||||
break;
|
||||
case 'k':
|
||||
kernel_file = optarg;
|
||||
break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
show_usage();
|
||||
exit(0);
|
||||
} break;
|
||||
default:
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cleanup() {
|
||||
if (buffer) {
|
||||
vx_buf_release(buffer);
|
||||
}
|
||||
if (device) {
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
int run_test(const kernel_arg_t& kernel_arg,
|
||||
uint32_t buf_size,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
uint32_t bpp) {
|
||||
auto time_start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// start device
|
||||
std::cout << "start device" << std::endl;
|
||||
RT_CHECK(vx_start(device));
|
||||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
|
||||
auto time_end = std::chrono::high_resolution_clock::now();
|
||||
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
|
||||
printf("Elapsed time: %lg ms\n", elapsed);
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
|
||||
|
||||
std::vector<uint8_t> dst_pixels(buf_size);
|
||||
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < buf_size; ++i) {
|
||||
dst_pixels[i] = buf_ptr[i];
|
||||
}
|
||||
|
||||
// save output image
|
||||
std::cout << "save output image" << std::endl;
|
||||
//dump_image(dst_pixels, width, height, bpp);
|
||||
RT_CHECK(SaveTGA(output_file, dst_pixels, width, height, bpp));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
kernel_arg_t kernel_arg;
|
||||
std::vector<uint8_t> src_pixels;
|
||||
uint32_t src_width;
|
||||
uint32_t src_height;
|
||||
uint32_t src_bpp;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
std::vector<uint8_t> tmp_pixels;
|
||||
RT_CHECK(LoadTGA(input_file, tmp_pixels, &src_width, &src_height));
|
||||
|
||||
// check power of two support
|
||||
if (!ISPOW2(src_width) || !ISPOW2(src_height)) {
|
||||
std::cout << "Error: only power of two textures supported: width=" << src_width << ", heigth=" << src_height << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
RT_CHECK(ConvertImage(src_pixels, tmp_pixels, src_width, src_height, FORMAT_A8R8G8B8, eformat));
|
||||
src_bpp = Format::GetInfo(eformat).BytePerPixel;
|
||||
|
||||
//dump_image(src_pixels, src_width, src_height, src_bpp);
|
||||
|
||||
uint32_t src_bufsize = src_bpp * src_width * src_height;
|
||||
|
||||
uint32_t dst_width = (uint32_t)(src_width * scale);
|
||||
uint32_t dst_height = (uint32_t)(src_height * scale);
|
||||
uint32_t dst_bpp = 4;
|
||||
uint32_t dst_bufsize = dst_bpp * dst_width * dst_height;
|
||||
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
unsigned max_cores, max_warps, max_threads;
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
|
||||
|
||||
uint32_t num_tasks = max_cores * max_warps * max_threads;
|
||||
|
||||
std::cout << "number of tasks: " << std::dec << num_tasks << std::endl;
|
||||
std::cout << "source buffer: width=" << src_width << ", heigth=" << src_height << ", size=" << src_bufsize << " bytes" << std::endl;
|
||||
std::cout << "destination buffer: width=" << dst_width << ", heigth=" << dst_height << ", size=" << dst_bufsize << " bytes" << std::endl;
|
||||
|
||||
// upload program
|
||||
std::cout << "upload program" << std::endl;
|
||||
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
|
||||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
size_t src_addr, dst_addr;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, src_bufsize, &src_addr));
|
||||
RT_CHECK(vx_alloc_dev_mem(device, dst_bufsize, &dst_addr));
|
||||
|
||||
std::cout << "src_addr=0x" << std::hex << src_addr << std::endl;
|
||||
std::cout << "dst_addr=0x" << std::hex << dst_addr << std::endl;
|
||||
|
||||
// allocate staging shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>(sizeof(kernel_arg_t), std::max<uint32_t>(src_bufsize, dst_bufsize));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
{
|
||||
kernel_arg.num_tasks = std::min<uint32_t>(num_tasks, dst_height);
|
||||
kernel_arg.format = format;
|
||||
kernel_arg.filter = filter;
|
||||
kernel_arg.wrap = wrap;
|
||||
kernel_arg.use_sw = use_sw;
|
||||
kernel_arg.lod = 0x0;
|
||||
|
||||
kernel_arg.src_logWidth = (uint32_t)std::log2(src_width);
|
||||
kernel_arg.src_logHeight = (uint32_t)std::log2(src_height);
|
||||
kernel_arg.src_stride = src_bpp;
|
||||
kernel_arg.src_pitch = src_bpp * src_width;
|
||||
kernel_arg.src_ptr = src_addr;
|
||||
|
||||
kernel_arg.dst_width = dst_width;
|
||||
kernel_arg.dst_height = dst_height;
|
||||
kernel_arg.dst_stride = dst_bpp;
|
||||
kernel_arg.dst_pitch = dst_bpp * dst_width;
|
||||
kernel_arg.dst_ptr = dst_addr;
|
||||
|
||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
|
||||
}
|
||||
|
||||
// upload source buffer
|
||||
std::cout << "upload source buffer" << std::endl;
|
||||
{
|
||||
auto buf_ptr = (int8_t*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < src_bufsize; ++i) {
|
||||
buf_ptr[i] = src_pixels[i];
|
||||
}
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src_ptr, src_bufsize, 0));
|
||||
}
|
||||
|
||||
// clear destination buffer
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
{
|
||||
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < (dst_bufsize/4); ++i) {
|
||||
buf_ptr[i] = 0xdeadbeef;
|
||||
}
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_ptr, dst_bufsize, 0));
|
||||
}
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
RT_CHECK(run_test(kernel_arg, dst_bufsize, dst_width, dst_height, dst_bpp));
|
||||
|
||||
// cleanup
|
||||
std::cout << "cleanup" << std::endl;
|
||||
cleanup();
|
||||
|
||||
std::cout << "PASSED!" << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
BIN
tests/regression/tex/output.tga
Normal file
|
After Width: | Height: | Size: 16 KiB |
BIN
tests/regression/tex/palette16.tga
Normal file
|
After Width: | Height: | Size: 1.0 KiB |
BIN
tests/regression/tex/palette4.tga
Normal file
|
After Width: | Height: | Size: 108 B |
BIN
tests/regression/tex/palette64.tga
Normal file
|
After Width: | Height: | Size: 16 KiB |
167
tests/regression/tex/texsw.h
Normal file
@@ -0,0 +1,167 @@
|
||||
#ifndef _TEXSW_H_
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#define TEX_LOD_MAX 11
|
||||
|
||||
#define MIN(x, y) ((x < y) ? (x) : (y))
|
||||
|
||||
#define MAX(x, y) ((x > y) ? (x) : (y))
|
||||
|
||||
inline int address(int wrap, int value) {
|
||||
switch (wrap) {
|
||||
case 1: return value & 0xfffff;
|
||||
default:
|
||||
case 0: return MIN(MAX(value, 0), 0xfffff);
|
||||
}
|
||||
}
|
||||
|
||||
inline void unpack(int format, int value, int* l, int* h) {
|
||||
switch (format) {
|
||||
case 1:
|
||||
case 2:
|
||||
*l = value;
|
||||
*h = 0;
|
||||
break;
|
||||
case 3:
|
||||
*l = (value | (value << 8)) & 0x00ff00ff;
|
||||
*h = 0;
|
||||
break;
|
||||
case 4:
|
||||
*l = (value | (value << 16)) & 0x07e0f81f;
|
||||
*h = 0;
|
||||
break;
|
||||
case 5:
|
||||
*l = (value | (value << 12)) & 0x0f0f0f0f;
|
||||
*h = 0;
|
||||
break;
|
||||
default:
|
||||
case 0:
|
||||
*l = value & 0x00ff00ff;
|
||||
*h = (value >> 8) & 0x00ff00ff;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
inline void lerp(int al, int ah, int bl, int bh, int frac, int* l, int* h) {
|
||||
*l = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
|
||||
*h = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
|
||||
}
|
||||
|
||||
inline int pack(int format, int l, int h) {
|
||||
switch (format) {
|
||||
case 1:
|
||||
case 2:
|
||||
return l;
|
||||
case 3:
|
||||
return (l | (l >> 8)) & 0xffff;
|
||||
case 4:
|
||||
return (l | (l >> 16)) & 0xffff;
|
||||
case 5:
|
||||
return (l | (l >> 12)) & 0xffff;
|
||||
default:
|
||||
case 0:
|
||||
return (h << 8) | l;
|
||||
}
|
||||
}
|
||||
|
||||
inline int tex_sw(struct kernel_arg_t* state, int stage, int u, int v, int lod) {
|
||||
int base_addr = state->src_ptr;
|
||||
int mip_offset = 0;
|
||||
int log_width = state->src_logWidth;
|
||||
int log_height = state->src_logHeight;
|
||||
int format = state->format;
|
||||
int wrap = state->wrap;
|
||||
int filter = state->filter;
|
||||
|
||||
int32_t* pBits = ((uint32_t*)base_addr) + mip_offset;
|
||||
|
||||
if (filter) {
|
||||
int u0 = address(wrap, u - (0x80000 >> log_width));
|
||||
int v0 = address(wrap, v - (0x80000 >> log_height));
|
||||
int u1 = address(wrap, u + (0x80000 >> log_width));
|
||||
int v1 = address(wrap, v + (0x80000 >> log_height));
|
||||
|
||||
int x0 = u0 >> (20 - log_width);
|
||||
int y0 = v0 >> (20 - log_height);
|
||||
int x1 = u1 >> (20 - log_width);
|
||||
int y1 = v1 >> (20 - log_height);
|
||||
|
||||
// memory lookup
|
||||
|
||||
int c0 = pBits[x0 + (y0 << log_width)];
|
||||
int c1 = pBits[x1 + (y0 << log_width)];
|
||||
int c2 = pBits[x0 + (y1 << log_width)];
|
||||
int c3 = pBits[x1 + (y1 << log_width)];
|
||||
|
||||
// filtering
|
||||
|
||||
int alpha = x0 & 0xff;
|
||||
int beta = y0 & 0xff;
|
||||
|
||||
int c0a, c0b;
|
||||
int c1a, c1b;
|
||||
int c01a, c01b;
|
||||
|
||||
unpack(format, c0, &c0a, &c0b);
|
||||
unpack(format, c1, &c1a, &c1b);
|
||||
lerp(c0a, c0b, c1a, c1b, alpha, &c01a, &c01b);
|
||||
|
||||
int c2a, c2b;
|
||||
int c3a, c3b;
|
||||
int c23a, c23b;
|
||||
|
||||
unpack(format, c2, &c2a, &c2b);
|
||||
unpack(format, c3, &c3a, &c3b);
|
||||
lerp(c2a, c2b, c3a, c3b, alpha, &c23a, &c23b);
|
||||
|
||||
int c4a, c4b;
|
||||
lerp(c01a, c01b, c23a, c23b, beta, &c4a, &c4b);
|
||||
return pack(format, c4a, c4b);
|
||||
} else {
|
||||
int u0 = address(wrap, u);
|
||||
int v0 = address(wrap, v);
|
||||
|
||||
int x0 = u0 >> (20 - log_width);
|
||||
int y0 = v0 >> (20 - log_height);
|
||||
|
||||
int c0 = pBits[x0 + (y0 <<log_width)];
|
||||
|
||||
int c0a, c0b;
|
||||
unpack(format, c0, &c0a, &c0b);
|
||||
return pack(format, c0a, c0b);
|
||||
}
|
||||
}
|
||||
|
||||
inline int vx_tex3(int stage, int u, int v, int lod) {
|
||||
int lodn = MIN(lod + 0x100000, TEX_LOD_MAX);
|
||||
int a = vx_tex(0, u, v, lod);
|
||||
int b = vx_tex(0, u, v, lodn);
|
||||
int al = a & 0x00ff00ff;
|
||||
int ah = (a >> 8) & 0x00ff00ff;
|
||||
int bl = b & 0x00ff00ff;
|
||||
int bh = (b >> 8) & 0x00ff00ff;
|
||||
int frac = (lod >> 12) & 0xff;
|
||||
int cl = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
|
||||
int ch = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
|
||||
int c = al | (ah << 8);
|
||||
return c;
|
||||
}
|
||||
|
||||
inline int tex3_sw(struct kernel_arg_t* state, int stage, int u, int v, int lod) {
|
||||
int lodn = MIN(lod + 0x10000, TEX_LOD_MAX);
|
||||
int a = tex_sw(state, 0, u, v, lod);
|
||||
int b = tex_sw(state, 0, u, v, lodn);
|
||||
int al = a & 0x00ff00ff;
|
||||
int ah = (a >> 8) & 0x00ff00ff;
|
||||
|
||||
int bl = b & 0x00ff00ff;
|
||||
int bh = (b >> 8) & 0x00ff00ff;
|
||||
int frac = (lod >> 12) & 0xff;
|
||||
int cl = (al + (((bl - al) * frac) >> 8)) & 0x00ff00ff;
|
||||
int ch = (ah + (((bh - ah) * frac) >> 8)) & 0x00ff00ff;
|
||||
int c = al | (ah << 8);
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif
|
||||
BIN
tests/regression/tex/toad.tga
Normal file
|
After Width: | Height: | Size: 16 KiB |
217
tests/regression/tex/utils.cpp
Normal file
@@ -0,0 +1,217 @@
|
||||
#include "utils.h"
|
||||
#include <fstream>
|
||||
#include <assert.h>
|
||||
#include "format.h"
|
||||
|
||||
struct __attribute__((__packed__)) tga_header_t {
|
||||
int8_t idlength;
|
||||
int8_t colormaptype;
|
||||
int8_t imagetype;
|
||||
int16_t colormaporigin;
|
||||
int16_t colormaplength;
|
||||
int8_t colormapdepth;
|
||||
int16_t xoffset;
|
||||
int16_t yoffset;
|
||||
int16_t width;
|
||||
int16_t height;
|
||||
int8_t bitsperpixel;
|
||||
int8_t imagedescriptor;
|
||||
};
|
||||
|
||||
int LoadTGA(const char *filename,
|
||||
std::vector<uint8_t> &pixels,
|
||||
uint32_t *width,
|
||||
uint32_t *height) {
|
||||
std::ifstream ifs(filename, std::ios::in | std::ios::binary);
|
||||
if (!ifs.is_open()) {
|
||||
std::cerr << "couldn't open file: " << filename << "!" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
tga_header_t header;
|
||||
ifs.read(reinterpret_cast<char *>(&header), sizeof(tga_header_t));
|
||||
if (ifs.fail()) {
|
||||
std::cerr << "invalid TGA file header!" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (header.imagetype != 2) {
|
||||
std::cerr << "unsupported TGA encoding format!" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
ifs.seekg(header.idlength, std::ios::cur); // skip string
|
||||
if (ifs.fail()) {
|
||||
std::cerr << "invalid TGA file!" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (header.bitsperpixel) {
|
||||
case 16:
|
||||
case 24:
|
||||
case 32: {
|
||||
auto stride = header.bitsperpixel / 8;
|
||||
std::vector<uint8_t> staging(stride * header.width * header.height);
|
||||
|
||||
// Read pixels data
|
||||
ifs.read((char*)staging.data(), staging.size());
|
||||
if (ifs.fail()) {
|
||||
std::cerr << "invalid TGA file!" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// format conversion to RGBA
|
||||
pixels.resize(4 * header.width * header.height);
|
||||
const uint8_t* src_bytes = staging.data();
|
||||
uint32_t* dst_bytes = (uint32_t*)pixels.data();
|
||||
for (const uint8_t* const src_end = src_bytes + staging.size();
|
||||
src_bytes != src_end;
|
||||
src_bytes += stride) {
|
||||
ColorARGB color;
|
||||
switch (stride) {
|
||||
case 2:
|
||||
color = Format::ConvertFrom<FORMAT_A1R5G5B5, true>(src_bytes);
|
||||
break;
|
||||
case 3:
|
||||
color = Format::ConvertFrom<FORMAT_R8G8B8, true>(src_bytes);
|
||||
break;
|
||||
case 4:
|
||||
color = Format::ConvertFrom<FORMAT_A8R8G8B8, true>(src_bytes);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
*dst_bytes++ = color;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
std::cerr << "unsupported TGA bitsperpixel!" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
*width = header.width;
|
||||
*height = header.height;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int SaveTGA(const char *filename,
|
||||
const std::vector<uint8_t> &pixels,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
uint32_t bpp) {
|
||||
std::ofstream ofs(filename, std::ios::out | std::ios::binary);
|
||||
if (!ofs.is_open()) {
|
||||
std::cerr << "couldn't create file: " << filename << "!" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (bpp < 2 || bpp > 4) {
|
||||
std::cerr << "unsupported pixel stride: " << bpp << "!" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
tga_header_t header;
|
||||
header.idlength = 0;
|
||||
header.colormaptype = 0; // no palette
|
||||
header.imagetype = 2; // color mapped data
|
||||
header.colormaporigin = 0;
|
||||
header.colormaplength = 0;
|
||||
header.colormapdepth = 0;
|
||||
header.xoffset = 0;
|
||||
header.yoffset = 0;
|
||||
header.width = width;
|
||||
header.height = height;
|
||||
header.bitsperpixel = bpp * 8;
|
||||
header.imagedescriptor = 0;
|
||||
|
||||
// write header
|
||||
ofs.write(reinterpret_cast<char *>(&header), sizeof(tga_header_t));
|
||||
|
||||
// write pixel data
|
||||
uint32_t pitch = bpp * width;
|
||||
const uint8_t* pixel_bytes = pixels.data() + (height - 1) * pitch;
|
||||
for (uint32_t y = 0; y < height; ++y) {
|
||||
const uint8_t* pixel_row = pixel_bytes;
|
||||
for (uint32_t x = 0; x < width; ++x) {
|
||||
ofs.write((const char*)pixel_row, bpp);
|
||||
pixel_row += bpp;
|
||||
}
|
||||
pixel_bytes -= pitch;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dump_image(const std::vector<uint8_t>& pixels, uint32_t width, uint32_t height, uint32_t bpp) {
|
||||
assert(width * height * bpp == pixels.size());
|
||||
const uint8_t* pixel_bytes = pixels.data();
|
||||
for (uint32_t y = 0; y < height; ++y) {
|
||||
for (uint32_t x = 0; x < width; ++x) {
|
||||
uint32_t pixel32 = 0;
|
||||
for (uint32_t b = 0; b < bpp; ++b) {
|
||||
uint32_t pixel8 = *pixel_bytes++;
|
||||
pixel32 |= pixel8 << (b * 8);
|
||||
}
|
||||
if (x) std::cout << ", ";
|
||||
std::cout << std::hex << pixel32;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
int CopyBuffers(SurfaceDesc &dstDesc,
|
||||
int32_t dstOffsetX,
|
||||
int32_t dstOffsetY,
|
||||
uint32_t copyWidth,
|
||||
uint32_t copyHeight,
|
||||
const SurfaceDesc &srcDesc,
|
||||
int32_t srcOffsetX,
|
||||
int32_t srcOffsetY) {
|
||||
|
||||
static const BlitTable s_blitTable;
|
||||
|
||||
if ((srcOffsetX >= (int32_t)srcDesc.Width) || (srcOffsetY >= (int32_t)srcDesc.Height) ||
|
||||
(dstOffsetX >= (int32_t)dstDesc.Width) || (dstOffsetY >= (int32_t)dstDesc.Height)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (copyWidth > dstDesc.Width) {
|
||||
copyWidth = dstDesc.Width;
|
||||
}
|
||||
|
||||
if (copyWidth > srcDesc.Width) {
|
||||
copyWidth = srcDesc.Width;
|
||||
}
|
||||
|
||||
if (copyHeight > dstDesc.Height) {
|
||||
copyHeight = dstDesc.Height;
|
||||
}
|
||||
|
||||
if (copyHeight > srcDesc.Height) {
|
||||
copyHeight = srcDesc.Height;
|
||||
}
|
||||
|
||||
return s_blitTable.get(srcDesc.Format, dstDesc.Format)(
|
||||
dstDesc, dstOffsetX, dstOffsetY, copyWidth, copyHeight, srcDesc,
|
||||
srcOffsetX, srcOffsetY);
|
||||
}
|
||||
|
||||
int ConvertImage(std::vector<uint8_t>& dst_pixels,
|
||||
const std::vector<uint8_t>& src_pixels,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
ePixelFormat src_format,
|
||||
ePixelFormat dst_format) {
|
||||
|
||||
uint32_t src_pitch = Format::GetInfo(src_format).BytePerPixel * width;
|
||||
uint32_t dst_pitch = Format::GetInfo(dst_format).BytePerPixel * width;
|
||||
|
||||
dst_pixels.resize(dst_pitch * height);
|
||||
|
||||
SurfaceDesc srcDesc{src_format, (uint8_t*)src_pixels.data(), width, height, src_pitch};
|
||||
SurfaceDesc dstDesc{dst_format, dst_pixels.data(), width, height, dst_pitch};
|
||||
|
||||
return CopyBuffers(dstDesc, 0, 0, width, height, srcDesc, 0, 0);
|
||||
}
|
||||
42
tests/regression/tex/utils.h
Normal file
@@ -0,0 +1,42 @@
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "blitter.h"
|
||||
|
||||
#define ISPOW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1))))
|
||||
|
||||
inline uint32_t ilog2 (uint32_t value) {
|
||||
return (uint32_t)(sizeof(uint32_t) * 8UL) - (uint32_t)__builtin_clzl((value << 1) - 1UL) - 1;
|
||||
}
|
||||
|
||||
int LoadTGA(const char *filename,
|
||||
std::vector<uint8_t> &pixels,
|
||||
uint32_t *width,
|
||||
uint32_t *height);
|
||||
|
||||
int SaveTGA(const char *filename,
|
||||
const std::vector<uint8_t> &pixels,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
uint32_t bpp);
|
||||
|
||||
int CopyBuffers(SurfaceDesc &dstDesc,
|
||||
int32_t dstOffsetX,
|
||||
int32_t dstOffsetY,
|
||||
uint32_t copyWidth,
|
||||
uint32_t copyHeight,
|
||||
const SurfaceDesc &srcDesc,
|
||||
int32_t srcOffsetX,
|
||||
int32_t srcOffsetY);
|
||||
|
||||
int ConvertImage(std::vector<uint8_t>& dst_pixels,
|
||||
const std::vector<uint8_t>& src_pixels,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
ePixelFormat src_format,
|
||||
ePixelFormat dst_format);
|
||||
|
||||
void dump_image(const std::vector<uint8_t>& pixels,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
uint32_t bpp);
|
||||