From e4d4eb3f30f776b94c9a2aaf2425b39cbfc987ef Mon Sep 17 00:00:00 2001 From: mhaas Date: Sun, 28 Jul 2024 22:13:57 +0200 Subject: [PATCH] =?UTF-8?q?Code=20hinzugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 15 +++ README.md | 6 + extract.cpp | 294 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 315 insertions(+) create mode 100644 CMakeLists.txt create mode 100644 extract.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..16750c4 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.10) + +project(TracExtractor VERSION 1.0.0) + +find_package(OpenSSL REQUIRED) +find_package(SQLite3 REQUIRED) + +add_executable(TracExtractor + extract.cpp +) + +target_link_libraries(TracExtractor PRIVATE + OpenSSL::Crypto + SQLite::SQLite3 +) diff --git a/README.md b/README.md index e69de29..293edcd 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,6 @@ +# Trac-Export + +Es wird eine Kopie der Trac-Datenbanken benötigt. Die ausführbare Datei wird mit dem Pfad zum Trac-Ordner und mit dem Namen des Sub-Tracs aufgerufen. +Benötigt werden pandoc, sqlite und OpenSSL. + +Der Code hier drin ist nicht premium (an einem Tag geschrieben), aber reicht aus, um die Daten als lesbare Markdown zusammen mit den Anhängen aus der Datenbank zu kriegen. diff --git a/extract.cpp b/extract.cpp new file mode 100644 index 0000000..7af954c --- /dev/null +++ b/extract.cpp @@ -0,0 +1,294 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +struct Data { + std::string id; + uint32_t version = 0; + std::string content; +}; + + +struct Datb { + std::string page; + std::string filename; +}; + + +struct Datc { + uint32_t id; + std::string component; + std::string summary; + std::string description; + std::string status; +}; + + + +class Database { + + private: + sqlite3* m_db; + + public: + Database(std::string const& filepath); + ~Database() noexcept; + + public: + std::unordered_map query_wiki(); + std::vector query_attachments(std::string const& type); + std::vector query_tickets(); + + private: + void check(int rc); +}; + + +Database::Database(std::string const& filepath) +{ + this->check(::sqlite3_open(filepath.c_str(), &m_db)); +} + +Database::~Database() noexcept +{ + this->check(::sqlite3_close(m_db)); +} + +std::unordered_map Database::query_wiki() +{ + sqlite3_stmt * stmt; + char const * tail = 0; + + std::unordered_map data; + + this->check(::sqlite3_prepare_v2(m_db, "SELECT name, version, text FROM wiki", -1, &stmt, &tail)); + int sqlrc = ::sqlite3_step(stmt); + while (sqlrc == SQLITE_ROW) { + std::string id = (char const*)::sqlite3_column_text(stmt, 0); + uint32_t version = ::sqlite3_column_int64(stmt, 1); + std::string content = (char const*)::sqlite3_column_text(stmt, 2); + if (data[id].version < version) { + data[id].id = id; + data[id].version = version; + data[id].content = content; + } + sqlrc = ::sqlite3_step(stmt); + } + this->check(sqlrc); + this->check(::sqlite3_finalize(stmt)); + + return data; +} + +std::vector Database::query_attachments(std::string const& type) +{ + sqlite3_stmt * stmt; + char const * tail = 0; + + std::vector data; + + this->check(::sqlite3_prepare_v2(m_db, "SELECT id, filename FROM attachment WHERE type = (?)", -1, &stmt, &tail)); + this->check(::sqlite3_bind_text(stmt, 1, type.c_str(), -1, SQLITE_TRANSIENT)); + int sqlrc = ::sqlite3_step(stmt); + while (sqlrc == SQLITE_ROW) { + std::string id = (char const*)::sqlite3_column_text(stmt, 0); + std::string filename = (char const*)::sqlite3_column_text(stmt, 1); + Datb dat; + dat.page = id; + dat.filename = filename; + data.push_back(dat); + sqlrc = ::sqlite3_step(stmt); + } + this->check(sqlrc); + this->check(::sqlite3_finalize(stmt)); + + return data; +} + +std::vector Database::query_tickets() +{ + sqlite3_stmt * stmt; + char const * tail = 0; + + std::vector data; + + this->check(::sqlite3_prepare_v2(m_db, "SELECT id, component, summary, description, status FROM ticket", -1, &stmt, &tail)); + int sqlrc = ::sqlite3_step(stmt); + while (sqlrc == SQLITE_ROW) { + Datc dat; + dat.id = ::sqlite3_column_int64(stmt, 0); + dat.component = (char const*)::sqlite3_column_text(stmt, 1); + dat.summary = (char const*)::sqlite3_column_text(stmt, 2); + dat.description = (char const*)::sqlite3_column_text(stmt, 3); + dat.status = (char const*)::sqlite3_column_text(stmt, 4); + data.push_back(dat); + + sqlrc = ::sqlite3_step(stmt); + } + this->check(sqlrc); + this->check(::sqlite3_finalize(stmt)); + + return data; +} + +void Database::check(int rc) +{ + if (rc != SQLITE_ROW && rc != SQLITE_DONE && rc != SQLITE_OK) { + std::string msg = std::to_string(rc) + ", msg: " + sqlite3_errmsg(m_db) + ", err: " + sqlite3_errstr(rc); + throw std::runtime_error(msg); + } +} + + +std::string sha1(std::string const& data) +{ + EVP_MD_CTX* ctx = ::EVP_MD_CTX_new(); + ::EVP_DigestInit_ex(ctx, ::EVP_sha1(), nullptr); + + ::EVP_DigestUpdate(ctx, data.data(), data.size()); + uint8_t hash[EVP_MAX_MD_SIZE]; + unsigned int size = 0; + ::EVP_DigestFinal(ctx, hash, &size); + + std::stringstream ss; + for (uint32_t i=0; i(hash[i]); + } + return ss.str(); +} + +void pandoc(std::filesystem::path path) +{ + auto out = path; + out.replace_extension("md"); + std::printf("%s -> %s\n", path.string().c_str(), out.string().c_str()); + std::string cmd = "pandoc -f mediawiki -t markdown -o \"" + out.string() + "\" \"" + path.string() + "\""; + std::system(cmd.c_str()); +} + + +int main(int argc, char** argv) +{ + if (argc < 3) { + return 0; + } + + std::string trac = argv[1]; + std::string subfolder = argv[2]; + std::filesystem::path trac_dir = std::filesystem::absolute(trac); + auto work_dir = std::filesystem::current_path(); + auto wiki_output = work_dir / subfolder / "wiki"; + auto tickets_output = work_dir / subfolder / "tickets"; + + std::vector> tasks; + + auto db_path = trac_dir / subfolder / "trac.db"; + Database db(db_path.string()); + auto wdata = db.query_wiki(); + std::printf("%ld wiki pages\n", wdata.size()); + + std::filesystem::create_directories(wiki_output); + std::filesystem::current_path(wiki_output); + + for (auto const& [id, dat] : wdata) { + std::filesystem::path path(id + ".wiki"); + if (!path.parent_path().empty()) { + std::filesystem::create_directories(path.parent_path()); + } + + std::ofstream file(path); + if (!file.is_open()) { + std::printf("Could not open file: %s\n", id.c_str()); + break; + } + file.write(dat.content.c_str(), dat.content.size()); + file.close(); + + tasks.push_back(std::packaged_task(std::bind(pandoc, std::filesystem::absolute(path)))); + } + wdata = {}; + + auto wiki_files_dir = trac_dir / subfolder / "files" / "attachments" / "wiki"; + auto wadata = db.query_attachments("wiki"); + std::printf("%ld attachments\n", wadata.size()); + for (auto const& dat : wadata) { + auto page_hash = sha1(dat.page); + std::string group_dir = page_hash.substr(0, 3); + auto file_hash = sha1(dat.filename); + auto origin = wiki_files_dir / group_dir / page_hash / (file_hash + std::filesystem::path(dat.filename).extension().string()); + std::filesystem::path path(dat.page); + path = path.parent_path(); + auto target = path.empty() ? dat.filename : (path / dat.filename).string(); + std::error_code ec; + std::filesystem::copy(origin, target, ec); + } + wadata = {}; + + std::filesystem::create_directories(tickets_output); + std::filesystem::current_path(tickets_output); + + auto tdata = db.query_tickets(); + std::printf("%ld tickets\n", tdata.size()); + for (auto const& dat : tdata) { + std::filesystem::create_directories(dat.component); + + auto const path = std::filesystem::path(dat.component) / (std::to_string(dat.id) + ".wiki"); + std::ofstream file(path); + if (!file.is_open()) { + std::printf("Could not open file: %s\n", path.c_str()); + break; + } + file << "== " << dat.summary << " ==" << std::endl; + file << std::endl; + file << "'''Status:''' " << dat.status << std::endl; + file << std::endl; + file << dat.description << std::endl; + file.close(); + + tasks.push_back(std::packaged_task(std::bind(pandoc, std::filesystem::absolute(path)))); + } + tdata = {}; + + auto ticket_files_dir = trac_dir / subfolder / "files" / "attachments" / "ticket"; + auto tadata = db.query_attachments("ticket"); + std::printf("%ld attachments\n", tadata.size()); + for (auto const& dat : tadata) { + auto page_hash = sha1(dat.page); + std::string group_dir = page_hash.substr(0, 3); + auto file_hash = sha1(dat.filename); + auto origin = ticket_files_dir / group_dir / page_hash / (file_hash + std::filesystem::path(dat.filename).extension().string()); + std::filesystem::path path(dat.page); + path = path.parent_path(); + auto target = path.empty() ? dat.filename : (path / dat.filename).string(); + std::error_code ec; + std::filesystem::copy(origin, target, ec); + } + tadata = {}; + + uint32_t nthreads = std::thread::hardware_concurrency(); + std::vector threads(nthreads); + for (uint32_t i=0; i