Trac-Export/extract.cpp
2024-07-28 22:13:57 +02:00

295 lines
7.8 KiB
C++

#include <cstdint>
#include <filesystem>
#include <fstream>
#include <functional>
#include <future>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <vector>
#include <sqlite3.h>
#include <openssl/evp.h>
#include <openssl/types.h>
struct Data {
std::string id;
uint32_t version = 0;
std::string content;
};
struct Datb {
std::string page;
std::string filename;
};
struct Datc {
uint32_t id;
std::string component;
std::string summary;
std::string description;
std::string status;
};
class Database {
private:
sqlite3* m_db;
public:
Database(std::string const& filepath);
~Database() noexcept;
public:
std::unordered_map<std::string, Data> query_wiki();
std::vector<Datb> query_attachments(std::string const& type);
std::vector<Datc> query_tickets();
private:
void check(int rc);
};
Database::Database(std::string const& filepath)
{
this->check(::sqlite3_open(filepath.c_str(), &m_db));
}
Database::~Database() noexcept
{
this->check(::sqlite3_close(m_db));
}
std::unordered_map<std::string, Data> Database::query_wiki()
{
sqlite3_stmt * stmt;
char const * tail = 0;
std::unordered_map<std::string, Data> data;
this->check(::sqlite3_prepare_v2(m_db, "SELECT name, version, text FROM wiki", -1, &stmt, &tail));
int sqlrc = ::sqlite3_step(stmt);
while (sqlrc == SQLITE_ROW) {
std::string id = (char const*)::sqlite3_column_text(stmt, 0);
uint32_t version = ::sqlite3_column_int64(stmt, 1);
std::string content = (char const*)::sqlite3_column_text(stmt, 2);
if (data[id].version < version) {
data[id].id = id;
data[id].version = version;
data[id].content = content;
}
sqlrc = ::sqlite3_step(stmt);
}
this->check(sqlrc);
this->check(::sqlite3_finalize(stmt));
return data;
}
std::vector<Datb> Database::query_attachments(std::string const& type)
{
sqlite3_stmt * stmt;
char const * tail = 0;
std::vector<Datb> data;
this->check(::sqlite3_prepare_v2(m_db, "SELECT id, filename FROM attachment WHERE type = (?)", -1, &stmt, &tail));
this->check(::sqlite3_bind_text(stmt, 1, type.c_str(), -1, SQLITE_TRANSIENT));
int sqlrc = ::sqlite3_step(stmt);
while (sqlrc == SQLITE_ROW) {
std::string id = (char const*)::sqlite3_column_text(stmt, 0);
std::string filename = (char const*)::sqlite3_column_text(stmt, 1);
Datb dat;
dat.page = id;
dat.filename = filename;
data.push_back(dat);
sqlrc = ::sqlite3_step(stmt);
}
this->check(sqlrc);
this->check(::sqlite3_finalize(stmt));
return data;
}
std::vector<Datc> Database::query_tickets()
{
sqlite3_stmt * stmt;
char const * tail = 0;
std::vector<Datc> data;
this->check(::sqlite3_prepare_v2(m_db, "SELECT id, component, summary, description, status FROM ticket", -1, &stmt, &tail));
int sqlrc = ::sqlite3_step(stmt);
while (sqlrc == SQLITE_ROW) {
Datc dat;
dat.id = ::sqlite3_column_int64(stmt, 0);
dat.component = (char const*)::sqlite3_column_text(stmt, 1);
dat.summary = (char const*)::sqlite3_column_text(stmt, 2);
dat.description = (char const*)::sqlite3_column_text(stmt, 3);
dat.status = (char const*)::sqlite3_column_text(stmt, 4);
data.push_back(dat);
sqlrc = ::sqlite3_step(stmt);
}
this->check(sqlrc);
this->check(::sqlite3_finalize(stmt));
return data;
}
void Database::check(int rc)
{
if (rc != SQLITE_ROW && rc != SQLITE_DONE && rc != SQLITE_OK) {
std::string msg = std::to_string(rc) + ", msg: " + sqlite3_errmsg(m_db) + ", err: " + sqlite3_errstr(rc);
throw std::runtime_error(msg);
}
}
std::string sha1(std::string const& data)
{
EVP_MD_CTX* ctx = ::EVP_MD_CTX_new();
::EVP_DigestInit_ex(ctx, ::EVP_sha1(), nullptr);
::EVP_DigestUpdate(ctx, data.data(), data.size());
uint8_t hash[EVP_MAX_MD_SIZE];
unsigned int size = 0;
::EVP_DigestFinal(ctx, hash, &size);
std::stringstream ss;
for (uint32_t i=0; i<size; ++i) {
ss << std::hex << std::setw(2) << std::setfill('0') << static_cast<uint32_t>(hash[i]);
}
return ss.str();
}
void pandoc(std::filesystem::path path)
{
auto out = path;
out.replace_extension("md");
std::printf("%s -> %s\n", path.string().c_str(), out.string().c_str());
std::string cmd = "pandoc -f mediawiki -t markdown -o \"" + out.string() + "\" \"" + path.string() + "\"";
std::system(cmd.c_str());
}
int main(int argc, char** argv)
{
if (argc < 3) {
return 0;
}
std::string trac = argv[1];
std::string subfolder = argv[2];
std::filesystem::path trac_dir = std::filesystem::absolute(trac);
auto work_dir = std::filesystem::current_path();
auto wiki_output = work_dir / subfolder / "wiki";
auto tickets_output = work_dir / subfolder / "tickets";
std::vector<std::packaged_task<void()>> tasks;
auto db_path = trac_dir / subfolder / "trac.db";
Database db(db_path.string());
auto wdata = db.query_wiki();
std::printf("%ld wiki pages\n", wdata.size());
std::filesystem::create_directories(wiki_output);
std::filesystem::current_path(wiki_output);
for (auto const& [id, dat] : wdata) {
std::filesystem::path path(id + ".wiki");
if (!path.parent_path().empty()) {
std::filesystem::create_directories(path.parent_path());
}
std::ofstream file(path);
if (!file.is_open()) {
std::printf("Could not open file: %s\n", id.c_str());
break;
}
file.write(dat.content.c_str(), dat.content.size());
file.close();
tasks.push_back(std::packaged_task<void()>(std::bind(pandoc, std::filesystem::absolute(path))));
}
wdata = {};
auto wiki_files_dir = trac_dir / subfolder / "files" / "attachments" / "wiki";
auto wadata = db.query_attachments("wiki");
std::printf("%ld attachments\n", wadata.size());
for (auto const& dat : wadata) {
auto page_hash = sha1(dat.page);
std::string group_dir = page_hash.substr(0, 3);
auto file_hash = sha1(dat.filename);
auto origin = wiki_files_dir / group_dir / page_hash / (file_hash + std::filesystem::path(dat.filename).extension().string());
std::filesystem::path path(dat.page);
path = path.parent_path();
auto target = path.empty() ? dat.filename : (path / dat.filename).string();
std::error_code ec;
std::filesystem::copy(origin, target, ec);
}
wadata = {};
std::filesystem::create_directories(tickets_output);
std::filesystem::current_path(tickets_output);
auto tdata = db.query_tickets();
std::printf("%ld tickets\n", tdata.size());
for (auto const& dat : tdata) {
std::filesystem::create_directories(dat.component);
auto const path = std::filesystem::path(dat.component) / (std::to_string(dat.id) + ".wiki");
std::ofstream file(path);
if (!file.is_open()) {
std::printf("Could not open file: %s\n", path.c_str());
break;
}
file << "== " << dat.summary << " ==" << std::endl;
file << std::endl;
file << "'''Status:''' " << dat.status << std::endl;
file << std::endl;
file << dat.description << std::endl;
file.close();
tasks.push_back(std::packaged_task<void()>(std::bind(pandoc, std::filesystem::absolute(path))));
}
tdata = {};
auto ticket_files_dir = trac_dir / subfolder / "files" / "attachments" / "ticket";
auto tadata = db.query_attachments("ticket");
std::printf("%ld attachments\n", tadata.size());
for (auto const& dat : tadata) {
auto page_hash = sha1(dat.page);
std::string group_dir = page_hash.substr(0, 3);
auto file_hash = sha1(dat.filename);
auto origin = ticket_files_dir / group_dir / page_hash / (file_hash + std::filesystem::path(dat.filename).extension().string());
std::filesystem::path path(dat.page);
path = path.parent_path();
auto target = path.empty() ? dat.filename : (path / dat.filename).string();
std::error_code ec;
std::filesystem::copy(origin, target, ec);
}
tadata = {};
uint32_t nthreads = std::thread::hardware_concurrency();
std::vector<std::thread> threads(nthreads);
for (uint32_t i=0; i<nthreads; ++i) {
threads[i] = std::thread([i, nthreads, &tasks]{
for (uint32_t j=i; j<tasks.size(); j+=nthreads) {
tasks[j]();
}
});
}
for (uint32_t i=0; i<nthreads; ++i) {
if (threads[i].joinable()) {
threads[i].join();
}
}
}