Code hinzugefügt

This commit is contained in:
mhaas 2024-07-28 22:13:57 +02:00
parent c2405e1175
commit e4d4eb3f30
3 changed files with 315 additions and 0 deletions

15
CMakeLists.txt Normal file
View file

@ -0,0 +1,15 @@
cmake_minimum_required(VERSION 3.10)
project(TracExtractor VERSION 1.0.0)
find_package(OpenSSL REQUIRED)
find_package(SQLite3 REQUIRED)
add_executable(TracExtractor
extract.cpp
)
target_link_libraries(TracExtractor PRIVATE
OpenSSL::Crypto
SQLite::SQLite3
)

View file

@ -0,0 +1,6 @@
# Trac-Export
Es wird eine Kopie der Trac-Datenbanken benötigt. Die ausführbare Datei wird mit dem Pfad zum Trac-Ordner und mit dem Namen des Sub-Tracs aufgerufen.
Benötigt werden pandoc, sqlite und OpenSSL.
Der Code hier drin ist nicht premium (an einem Tag geschrieben), aber reicht aus, um die Daten als lesbare Markdown zusammen mit den Anhängen aus der Datenbank zu kriegen.

294
extract.cpp Normal file
View file

@ -0,0 +1,294 @@
#include <cstdint>
#include <filesystem>
#include <fstream>
#include <functional>
#include <future>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <vector>
#include <sqlite3.h>
#include <openssl/evp.h>
#include <openssl/types.h>
struct Data {
std::string id;
uint32_t version = 0;
std::string content;
};
struct Datb {
std::string page;
std::string filename;
};
struct Datc {
uint32_t id;
std::string component;
std::string summary;
std::string description;
std::string status;
};
class Database {
private:
sqlite3* m_db;
public:
Database(std::string const& filepath);
~Database() noexcept;
public:
std::unordered_map<std::string, Data> query_wiki();
std::vector<Datb> query_attachments(std::string const& type);
std::vector<Datc> query_tickets();
private:
void check(int rc);
};
Database::Database(std::string const& filepath)
{
this->check(::sqlite3_open(filepath.c_str(), &m_db));
}
Database::~Database() noexcept
{
this->check(::sqlite3_close(m_db));
}
std::unordered_map<std::string, Data> Database::query_wiki()
{
sqlite3_stmt * stmt;
char const * tail = 0;
std::unordered_map<std::string, Data> data;
this->check(::sqlite3_prepare_v2(m_db, "SELECT name, version, text FROM wiki", -1, &stmt, &tail));
int sqlrc = ::sqlite3_step(stmt);
while (sqlrc == SQLITE_ROW) {
std::string id = (char const*)::sqlite3_column_text(stmt, 0);
uint32_t version = ::sqlite3_column_int64(stmt, 1);
std::string content = (char const*)::sqlite3_column_text(stmt, 2);
if (data[id].version < version) {
data[id].id = id;
data[id].version = version;
data[id].content = content;
}
sqlrc = ::sqlite3_step(stmt);
}
this->check(sqlrc);
this->check(::sqlite3_finalize(stmt));
return data;
}
std::vector<Datb> Database::query_attachments(std::string const& type)
{
sqlite3_stmt * stmt;
char const * tail = 0;
std::vector<Datb> data;
this->check(::sqlite3_prepare_v2(m_db, "SELECT id, filename FROM attachment WHERE type = (?)", -1, &stmt, &tail));
this->check(::sqlite3_bind_text(stmt, 1, type.c_str(), -1, SQLITE_TRANSIENT));
int sqlrc = ::sqlite3_step(stmt);
while (sqlrc == SQLITE_ROW) {
std::string id = (char const*)::sqlite3_column_text(stmt, 0);
std::string filename = (char const*)::sqlite3_column_text(stmt, 1);
Datb dat;
dat.page = id;
dat.filename = filename;
data.push_back(dat);
sqlrc = ::sqlite3_step(stmt);
}
this->check(sqlrc);
this->check(::sqlite3_finalize(stmt));
return data;
}
std::vector<Datc> Database::query_tickets()
{
sqlite3_stmt * stmt;
char const * tail = 0;
std::vector<Datc> data;
this->check(::sqlite3_prepare_v2(m_db, "SELECT id, component, summary, description, status FROM ticket", -1, &stmt, &tail));
int sqlrc = ::sqlite3_step(stmt);
while (sqlrc == SQLITE_ROW) {
Datc dat;
dat.id = ::sqlite3_column_int64(stmt, 0);
dat.component = (char const*)::sqlite3_column_text(stmt, 1);
dat.summary = (char const*)::sqlite3_column_text(stmt, 2);
dat.description = (char const*)::sqlite3_column_text(stmt, 3);
dat.status = (char const*)::sqlite3_column_text(stmt, 4);
data.push_back(dat);
sqlrc = ::sqlite3_step(stmt);
}
this->check(sqlrc);
this->check(::sqlite3_finalize(stmt));
return data;
}
void Database::check(int rc)
{
if (rc != SQLITE_ROW && rc != SQLITE_DONE && rc != SQLITE_OK) {
std::string msg = std::to_string(rc) + ", msg: " + sqlite3_errmsg(m_db) + ", err: " + sqlite3_errstr(rc);
throw std::runtime_error(msg);
}
}
std::string sha1(std::string const& data)
{
EVP_MD_CTX* ctx = ::EVP_MD_CTX_new();
::EVP_DigestInit_ex(ctx, ::EVP_sha1(), nullptr);
::EVP_DigestUpdate(ctx, data.data(), data.size());
uint8_t hash[EVP_MAX_MD_SIZE];
unsigned int size = 0;
::EVP_DigestFinal(ctx, hash, &size);
std::stringstream ss;
for (uint32_t i=0; i<size; ++i) {
ss << std::hex << std::setw(2) << std::setfill('0') << static_cast<uint32_t>(hash[i]);
}
return ss.str();
}
void pandoc(std::filesystem::path path)
{
auto out = path;
out.replace_extension("md");
std::printf("%s -> %s\n", path.string().c_str(), out.string().c_str());
std::string cmd = "pandoc -f mediawiki -t markdown -o \"" + out.string() + "\" \"" + path.string() + "\"";
std::system(cmd.c_str());
}
int main(int argc, char** argv)
{
if (argc < 3) {
return 0;
}
std::string trac = argv[1];
std::string subfolder = argv[2];
std::filesystem::path trac_dir = std::filesystem::absolute(trac);
auto work_dir = std::filesystem::current_path();
auto wiki_output = work_dir / subfolder / "wiki";
auto tickets_output = work_dir / subfolder / "tickets";
std::vector<std::packaged_task<void()>> tasks;
auto db_path = trac_dir / subfolder / "trac.db";
Database db(db_path.string());
auto wdata = db.query_wiki();
std::printf("%ld wiki pages\n", wdata.size());
std::filesystem::create_directories(wiki_output);
std::filesystem::current_path(wiki_output);
for (auto const& [id, dat] : wdata) {
std::filesystem::path path(id + ".wiki");
if (!path.parent_path().empty()) {
std::filesystem::create_directories(path.parent_path());
}
std::ofstream file(path);
if (!file.is_open()) {
std::printf("Could not open file: %s\n", id.c_str());
break;
}
file.write(dat.content.c_str(), dat.content.size());
file.close();
tasks.push_back(std::packaged_task<void()>(std::bind(pandoc, std::filesystem::absolute(path))));
}
wdata = {};
auto wiki_files_dir = trac_dir / subfolder / "files" / "attachments" / "wiki";
auto wadata = db.query_attachments("wiki");
std::printf("%ld attachments\n", wadata.size());
for (auto const& dat : wadata) {
auto page_hash = sha1(dat.page);
std::string group_dir = page_hash.substr(0, 3);
auto file_hash = sha1(dat.filename);
auto origin = wiki_files_dir / group_dir / page_hash / (file_hash + std::filesystem::path(dat.filename).extension().string());
std::filesystem::path path(dat.page);
path = path.parent_path();
auto target = path.empty() ? dat.filename : (path / dat.filename).string();
std::error_code ec;
std::filesystem::copy(origin, target, ec);
}
wadata = {};
std::filesystem::create_directories(tickets_output);
std::filesystem::current_path(tickets_output);
auto tdata = db.query_tickets();
std::printf("%ld tickets\n", tdata.size());
for (auto const& dat : tdata) {
std::filesystem::create_directories(dat.component);
auto const path = std::filesystem::path(dat.component) / (std::to_string(dat.id) + ".wiki");
std::ofstream file(path);
if (!file.is_open()) {
std::printf("Could not open file: %s\n", path.c_str());
break;
}
file << "== " << dat.summary << " ==" << std::endl;
file << std::endl;
file << "'''Status:''' " << dat.status << std::endl;
file << std::endl;
file << dat.description << std::endl;
file.close();
tasks.push_back(std::packaged_task<void()>(std::bind(pandoc, std::filesystem::absolute(path))));
}
tdata = {};
auto ticket_files_dir = trac_dir / subfolder / "files" / "attachments" / "ticket";
auto tadata = db.query_attachments("ticket");
std::printf("%ld attachments\n", tadata.size());
for (auto const& dat : tadata) {
auto page_hash = sha1(dat.page);
std::string group_dir = page_hash.substr(0, 3);
auto file_hash = sha1(dat.filename);
auto origin = ticket_files_dir / group_dir / page_hash / (file_hash + std::filesystem::path(dat.filename).extension().string());
std::filesystem::path path(dat.page);
path = path.parent_path();
auto target = path.empty() ? dat.filename : (path / dat.filename).string();
std::error_code ec;
std::filesystem::copy(origin, target, ec);
}
tadata = {};
uint32_t nthreads = std::thread::hardware_concurrency();
std::vector<std::thread> threads(nthreads);
for (uint32_t i=0; i<nthreads; ++i) {
threads[i] = std::thread([i, nthreads, &tasks]{
for (uint32_t j=i; j<tasks.size(); j+=nthreads) {
tasks[j]();
}
});
}
for (uint32_t i=0; i<nthreads; ++i) {
if (threads[i].joinable()) {
threads[i].join();
}
}
}