Commit d28b4c52 authored by Hans-Kristian Arntzen's avatar Hans-Kristian Arntzen
Browse files

git subrepo pull mupen64plus-video-paraLLEl/parallel-rdp

subrepo:
  subdir:   "mupen64plus-video-paraLLEl/parallel-rdp"
  merged:   "e79d0cb"
upstream:
  origin:   "https://github.com/Themaister/parallel-rdp-standalone.git"
  branch:   "master"
  commit:   "e79d0cb"
git-subrepo:
  version:  "0.4.3"
  origin:   "???"
  commit:   "???"
parent aec53488
Pipeline #58878 failed with stages
in 4 minutes and 26 seconds
......@@ -6,7 +6,7 @@
[subrepo]
remote = https://github.com/Themaister/parallel-rdp-standalone.git
branch = master
commit = 94518dde3b72703c18273a13e50bb2313b908b0b
parent = f9c7a9df872723f98c1a2098c7335c5aef6c88ed
commit = e79d0cb775faa3953457bf29eb1ca27f540cf370
parent = aec5348830ebe206ea72292091a040ab622585c4
method = rebase
cmdver = 0.4.1
cmdver = 0.4.3
00d1d0252203afa6883ae0a6652e1c4fd56cd093
b3eeb49e708a215ee990415f303ce9936e55bd66
......@@ -48,5 +48,6 @@ ifeq (,$(findstring win,$(platform)))
PARALLEL_RDP_LDFLAGS += -ldl
else
PARALLEL_RDP_CFLAGS += -DVK_USE_PLATFORM_WIN32_KHR
PARALLEL_RDP_LDFLAGS += -lwinmm
endif
......@@ -57,6 +57,21 @@ CommandProcessor::CommandProcessor(Vulkan::Device &device_, void *rdram_ptr,
info.domain = BufferDomain::CachedCoherentHostPreferCached;
info.misc = BUFFER_MISC_ZERO_INITIALIZE_BIT;
if (const char *env = getenv("PARALLEL_RDP_DUMP_PATH"))
{
dump_writer.reset(new RDPDumpWriter);
if (!dump_writer->init(env, rdram_size, hidden_rdram_size))
{
LOGE("Failed to init RDP dump: %s.\n", env);
dump_writer.reset();
}
else
{
LOGI("Dumping RDP commands to: %s.\n", env);
flags |= COMMAND_PROCESSOR_FLAG_HOST_VISIBLE_HIDDEN_RDRAM_BIT;
}
}
if (rdram_ptr)
{
bool allow_memory_host = true;
......@@ -70,10 +85,13 @@ CommandProcessor::CommandProcessor(Vulkan::Device &device_, void *rdram_ptr,
import_size = (import_size + align - 1) & ~(align - 1);
info.size = import_size;
rdram = device.create_imported_host_buffer(info, VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, rdram_ptr);
if (!rdram)
LOGE("Failed to allocate RDRAM with VK_EXT_external_memory_host.\n");
}
else
if (!rdram)
{
LOGW("VK_EXT_external_memory_host is not supported on this device. Falling back to a slower path.\n");
LOGW("VK_EXT_external_memory_host not supported or failed, falling back to a slower path.\n");
is_host_coherent = false;
rdram_offset = 0;
host_rdram = static_cast<uint8_t *>(rdram_ptr) + rdram_offset_;
......@@ -840,7 +858,7 @@ OP(sync_load) OP(sync_pipe)
OP(sync_tile)
#undef OP
void CommandProcessor::enqueue_command(unsigned num_words, const uint32_t *words)
void CommandProcessor::enqueue_command_inner(unsigned num_words, const uint32_t *words)
{
if (single_threaded_processing)
enqueue_command_direct(num_words, words);
......@@ -848,6 +866,31 @@ void CommandProcessor::enqueue_command(unsigned num_words, const uint32_t *words
ring.enqueue_command(num_words, words);
}
void CommandProcessor::enqueue_command(unsigned num_words, const uint32_t *words)
{
if (dump_writer && !dump_in_command_list)
{
wait_for_timeline(signal_timeline());
dump_writer->flush_dram(begin_read_rdram(), rdram_size);
dump_writer->flush_hidden_dram(begin_read_hidden_rdram(), hidden_rdram->get_create_info().size);
dump_in_command_list = true;
}
enqueue_command_inner(num_words, words);
if (dump_writer)
{
uint32_t cmd_id = (words[0] >> 24) & 63;
if (Op(cmd_id) == Op::SyncFull)
{
dump_writer->signal_complete();
dump_in_command_list = false;
}
else
dump_writer->emit_command(cmd_id, words, num_words);
}
}
void CommandProcessor::enqueue_command_direct(unsigned, const uint32_t *words)
{
#define OP(x) &CommandProcessor::op_##x
......@@ -916,12 +959,14 @@ void CommandProcessor::set_quirks(const Quirks &quirks_)
uint32_t(Op::MetaSetQuirks) << 24u,
quirks_.u.words[0],
};
enqueue_command(2, words);
enqueue_command_inner(2, words);
}
void CommandProcessor::set_vi_register(VIRegister reg, uint32_t value)
{
vi.set_vi_register(reg, value);
if (dump_writer)
dump_writer->set_vi_register(uint32_t(reg), value);
}
void *CommandProcessor::begin_read_rdram()
......@@ -977,7 +1022,7 @@ void CommandProcessor::flush()
const uint32_t words[1] = {
uint32_t(Op::MetaFlush) << 24,
};
enqueue_command(1, words);
enqueue_command_inner(1, words);
}
uint64_t CommandProcessor::signal_timeline()
......@@ -989,7 +1034,7 @@ uint64_t CommandProcessor::signal_timeline()
uint32_t(timeline_value),
uint32_t(timeline_value >> 32),
};
enqueue_command(3, words);
enqueue_command_inner(3, words);
return timeline_value;
}
......@@ -1014,6 +1059,14 @@ Vulkan::ImageHandle CommandProcessor::scanout(const ScanoutOptions &opts, VkImag
Vulkan::QueryPoolHandle start_ts, end_ts;
drain_command_ring();
if (dump_writer)
{
wait_for_timeline(signal_timeline());
dump_writer->flush_dram(begin_read_rdram(), rdram_size);
dump_writer->flush_hidden_dram(begin_read_hidden_rdram(), hidden_rdram->get_create_info().size);
dump_writer->end_frame();
}
// Block idle callbacks triggering while we're doing this.
renderer.lock_command_processing();
{
......
......@@ -31,6 +31,7 @@
#include "rdp_common.hpp"
#include "command_ring.hpp"
#include "worker_thread.hpp"
#include "rdp_dump_write.hpp"
#ifndef GRANITE_VULKAN_MT
#error "Granite Vulkan backend must be built with multithreading support."
......@@ -175,6 +176,7 @@ private:
void clear_tmem();
void clear_buffer(Vulkan::Buffer &buffer, uint32_t value);
void init_renderer();
void enqueue_command_inner(unsigned num_words, const uint32_t *words);
Vulkan::ImageHandle scanout(const ScanoutOptions &opts, VkImageLayout target_layout);
......@@ -234,5 +236,8 @@ private:
void decode_triangle_setup(TriangleSetup &setup, const uint32_t *words) const;
Quirks quirks;
std::unique_ptr<RDPDumpWriter> dump_writer;
bool dump_in_command_list = false;
};
}
/* Copyright (c) 2021 Themaister
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "rdp_dump_write.hpp"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
namespace RDP
{
RDPDumpWriter::~RDPDumpWriter()
{
end();
if (file)
fclose(file);
}
bool RDPDumpWriter::init(const char *path, uint32_t dram_size, uint32_t hidden_dram_size)
{
if (file)
return false;
rdp_dram_cache.clear();
rdp_dram_cache.resize(dram_size);
rdp_hidden_dram_cache.clear();
rdp_hidden_dram_cache.resize(hidden_dram_size);
file = fopen(path, "wb");
if (!file)
return false;
fwrite("RDPDUMP2", 8, 1, file);
fwrite(&dram_size, sizeof(dram_size), 1, file);
fwrite(&hidden_dram_size, sizeof(hidden_dram_size), 1, file);
return true;
}
void RDPDumpWriter::end_frame()
{
if (!file)
return;
uint32_t cmd = RDP_DUMP_CMD_END_FRAME;
fwrite(&cmd, sizeof(cmd), 1, file);
}
void RDPDumpWriter::end()
{
if (!file)
return;
uint32_t cmd = RDP_DUMP_CMD_EOF;
fwrite(&cmd, sizeof(cmd), 1, file);
fclose(file);
file = nullptr;
rdp_dram_cache.clear();
rdp_hidden_dram_cache.clear();
}
void RDPDumpWriter::flush(const void *dram_, uint32_t size,
RDPDumpCmd block_cmd, RDPDumpCmd flush_cmd,
uint8_t *cache)
{
if (!file)
return;
const auto *dram = static_cast<const uint8_t *>(dram_);
const uint32_t block_size = 4 * 1024;
uint32_t i = 0;
for (i = 0; i < size; i += block_size)
{
if (memcmp(dram + i, cache + i, block_size) != 0)
{
uint32_t cmd = block_cmd;
fwrite(&cmd, sizeof(cmd), 1, file);
fwrite(&i, sizeof(i), 1, file);
fwrite(&block_size, sizeof(block_size), 1, file);
fwrite(dram + i, 1, block_size, file);
memcpy(cache + i, dram + i, block_size);
}
}
uint32_t cmd = flush_cmd;
fwrite(&cmd, sizeof(cmd), 1, file);
}
void RDPDumpWriter::flush_dram(const void *dram_, uint32_t size)
{
flush(dram_, size, RDP_DUMP_CMD_UPDATE_DRAM, RDP_DUMP_CMD_UPDATE_DRAM_FLUSH, rdp_dram_cache.data());
}
void RDPDumpWriter::flush_hidden_dram(const void *dram_, uint32_t size)
{
flush(dram_, size, RDP_DUMP_CMD_UPDATE_HIDDEN_DRAM, RDP_DUMP_CMD_UPDATE_HIDDEN_DRAM_FLUSH, rdp_hidden_dram_cache.data());
}
void RDPDumpWriter::signal_complete()
{
if (!file)
return;
uint32_t cmd = RDP_DUMP_CMD_SIGNAL_COMPLETE;
fwrite(&cmd, sizeof(cmd), 1, file);
}
void RDPDumpWriter::emit_command(uint32_t command, const uint32_t *cmd_data, uint32_t cmd_words)
{
if (!file)
return;
uint32_t cmd = RDP_DUMP_CMD_RDP_COMMAND;
fwrite(&cmd, sizeof(cmd), 1, file);
fwrite(&command, sizeof(command), 1, file);
fwrite(&cmd_words, sizeof(cmd_words), 1, file);
fwrite(cmd_data, sizeof(*cmd_data), cmd_words, file);
}
void RDPDumpWriter::set_vi_register(uint32_t vi_register, uint32_t value)
{
if (!file)
return;
uint32_t cmd = RDP_DUMP_CMD_SET_VI_REGISTER;
fwrite(&cmd, sizeof(cmd), 1, file);
fwrite(&vi_register, sizeof(vi_register), 1, file);
fwrite(&value, sizeof(value), 1, file);
}
}
/* Copyright (c) 2021 Themaister
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <stdint.h>
#include <stdio.h>
#include <vector>
namespace RDP
{
class RDPDumpWriter
{
public:
~RDPDumpWriter();
bool init(const char *path, uint32_t dram_size, uint32_t hidden_dram_size);
void flush_dram(const void *dram, uint32_t size);
void flush_hidden_dram(const void *dram, uint32_t size);
void signal_complete();
void emit_command(uint32_t command, const uint32_t *cmd_data, uint32_t cmd_words);
void set_vi_register(uint32_t vi_register, uint32_t value);
void end_frame();
private:
enum RDPDumpCmd : uint32_t
{
RDP_DUMP_CMD_INVALID = 0,
RDP_DUMP_CMD_UPDATE_DRAM = 1,
RDP_DUMP_CMD_RDP_COMMAND = 2,
RDP_DUMP_CMD_SET_VI_REGISTER = 3,
RDP_DUMP_CMD_END_FRAME = 4,
RDP_DUMP_CMD_SIGNAL_COMPLETE = 5,
RDP_DUMP_CMD_EOF = 6,
RDP_DUMP_CMD_UPDATE_DRAM_FLUSH = 7,
RDP_DUMP_CMD_UPDATE_HIDDEN_DRAM = 8,
RDP_DUMP_CMD_UPDATE_HIDDEN_DRAM_FLUSH = 9,
RDP_DUMP_CMD_INT_MAX = 0x7fffffff
};
FILE *file = nullptr;
std::vector<uint8_t> rdp_dram_cache;
std::vector<uint8_t> rdp_hidden_dram_cache;
void flush(const void *dram_, uint32_t size, RDPDumpCmd block_cmd, RDPDumpCmd flush_cmd, uint8_t *cache);
void end();
};
}
......@@ -3085,9 +3085,6 @@ void Renderer::load_tile(uint32_t tile, const LoadTileInfo &info)
auto &meta = tiles[tile].meta;
unsigned pixels_coverered_per_line = (((info.shi >> 2) - (info.slo >> 2)) + 1) & 0xfff;
if (meta.fmt == TextureFormat::YUV)
pixels_coverered_per_line *= 2;
// Technically, 32-bpp TMEM upload and YUV upload will work like 16bpp, just split into two halves, but that also means
// we get 2kB wraparound instead of 4kB wraparound, so this works out just fine for our purposes.
unsigned quad_words_covered_per_line = ((pixels_coverered_per_line << unsigned(meta.size)) + 15) >> 4;
......@@ -3101,16 +3098,22 @@ void Renderer::load_tile(uint32_t tile, const LoadTileInfo &info)
// Compute a conservative estimate for how many bytes we're going to splat down into TMEM.
unsigned bytes_covered_per_line = std::max<unsigned>(quad_words_covered_per_line * 8, meta.stride);
unsigned max_bytes_per_line = 0x1000;
// We need to write lower and upper halves at once,
// so we need to wrap around at 2k boundary.
if (meta.fmt == TextureFormat::YUV)
max_bytes_per_line /= 2;
unsigned num_lines = ((info.thi >> 2) - (info.tlo >> 2)) + 1;
unsigned total_bytes_covered = bytes_covered_per_line * num_lines;
if (total_bytes_covered > 0x1000)
if (total_bytes_covered > max_bytes_per_line)
{
// Welp, for whatever reason, the game wants to write more than 4k of texture data to TMEM in one go.
// We can only handle 4kB in one go due to wrap-around effects,
// so split up the upload in multiple chunks.
unsigned max_lines_per_iteration = 0x1000u / bytes_covered_per_line;
unsigned max_lines_per_iteration = max_bytes_per_line / bytes_covered_per_line;
// Align T-state.
max_lines_per_iteration &= ~1u;
......
......@@ -23,12 +23,16 @@
#include "timer.hpp"
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#else
#include <time.h>
#endif
#ifdef __linux__
#include <sys/timerfd.h>
#include <unistd.h>
#endif
namespace Util
{
FrameTimer::FrameTimer()
......@@ -98,6 +102,11 @@ struct QPCFreq
} static static_qpc_freq;
#endif
#if !defined(_WIN32) && !defined(CLOCK_MONOTONIC_RAW)
#warning "CLOCK_MONOTONIC_RAW is not defined?"
#define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC
#endif
int64_t get_current_time_nsecs()
{
#ifdef _WIN32
......@@ -107,7 +116,7 @@ int64_t get_current_time_nsecs()
return int64_t(double(li.QuadPart) * static_qpc_freq.inv_freq);
#else
struct timespec ts = {};
if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0)
if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) < 0)
return 0;
return ts.tv_sec * 1000000000ll + ts.tv_nsec;
#endif
......@@ -123,4 +132,123 @@ double Timer::end()
auto nt = get_current_time_nsecs();
return double(nt - t) * 1e-9;
}
struct FrameLimiter::Impl
{
#ifdef _WIN32
HANDLE timer_handle = nullptr;
#else
int timer_fd = -1;
#endif
bool begin_interval_ns(uint64_t ns)
{
#ifdef _WIN32
if (!timer_handle)
{
timer_handle = CreateWaitableTimerA(nullptr, FALSE, nullptr);
if (timer_handle)
timeBeginPeriod(1);
}
if (!timer_handle)
return false;
LARGE_INTEGER due_time;
due_time.QuadPart = -int64_t(ns) / 100;
if (!SetWaitableTimer(timer_handle, &due_time, ns / 1000000,
nullptr, nullptr, FALSE))
{
CloseHandle(timer_handle);
timer_handle = nullptr;
return false;
}
return true;
#elif defined(__linux__)
if (timer_fd < 0)
timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
if (timer_fd >= 0)
{
itimerspec timerspec = {};
timerspec.it_value.tv_nsec = ns % (1000 * 1000 * 1000);
timerspec.it_value.tv_sec = ns / (1000 * 1000 * 1000);
timerspec.it_interval = timerspec.it_value;
if (timerfd_settime(timer_fd, TFD_TIMER_CANCEL_ON_SET, &timerspec, nullptr) < 0)
{
close(timer_fd);
timer_fd = -1;
}
}
return timer_fd >= 0;
#else
return false;
#endif
}
bool wait_interval()
{
#ifdef _WIN32
if (!timer_handle)
return false;
return WaitForSingleObject(timer_handle, INFINITE) == WAIT_OBJECT_0;
#elif defined(__linux__)
if (timer_fd < 0)
return false;
uint64_t expirations = 0;
return ::read(timer_fd, &expirations, sizeof(expirations)) > 0;
#else
return false;
#endif
}
bool is_active() const
{
#if defined(_WIN32)
return timer_handle != nullptr;
#else
return timer_fd >= 0;
#endif
}
~Impl()
{
#ifdef _WIN32
if (timer_handle)
{
CloseHandle(timer_handle);
timeEndPeriod(1);
}
#else
if (timer_fd >= 0)
::close(timer_fd);
#endif
}
};
FrameLimiter::FrameLimiter()
{
impl.reset(new Impl);
}
FrameLimiter::~FrameLimiter()
{
}
bool FrameLimiter::is_active() const
{
return impl->is_active();
}
bool FrameLimiter::begin_interval_ns(uint64_t ns)
{
return impl->begin_interval_ns(ns);
}
bool FrameLimiter::wait_interval()
{
return impl->wait_interval();
}
}
\ No newline at end of file