Загрузка данных


// sys_proc_callgraph.cpp
// Build:  g++ -std=c++17 -O2 -Wall -Wextra sys_proc_callgraph.cpp -o sys_proc_callgraph
// Run:    sudo ./sys_proc_callgraph <pid> [samples=20] [delay_ms=100] [out=callgraph.dot] [--threads]
// Render: dot -Tpng callgraph.dot -o callgraph.png

#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/user.h>
#include <sys/wait.h>
#include <unistd.h>
#include <dirent.h>

#include <cerrno>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>

#include <algorithm>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <sstream>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#ifndef __linux__
#error This program is intended for Linux.
#endif

#ifndef __x86_64__
#error This program supports x86_64 Linux only.
#endif

struct Frame {
    std::uint64_t previousRbp;
    std::uint64_t returnAddress;
};

struct MapEntry {
    std::uint64_t start = 0;
    std::uint64_t end = 0;
    std::uint64_t offset = 0;
    std::string perms;
    std::string path;
};

static std::string hex64(std::uint64_t value) {
    std::ostringstream out;
    out << "0x" << std::hex << value;
    return out.str();
}

static std::string trim(std::string s) {
    while (!s.empty() && (s.front() == ' ' || s.front() == '\t')) s.erase(s.begin());
    while (!s.empty() && (s.back() == ' ' || s.back() == '\t' || s.back() == '\n' || s.back() == '\r')) s.pop_back();
    return s;
}

static std::string baseName(const std::string& path) {
    std::size_t pos = path.find_last_of('/');
    if (pos == std::string::npos) return path;
    return path.substr(pos + 1);
}

static std::string shellQuote(const std::string& s) {
    std::string r = "'";
    for (char c : s) {
        if (c == '\'') r += "'\\''";
        else r += c;
    }
    r += "'";
    return r;
}

static std::string dotEscape(const std::string& s) {
    std::string r;
    for (char c : s) {
        if (c == '"') r += "\\\"";
        else if (c == '\\') r += "\\\\";
        else if (c == '\n') r += "\\n";
        else r += c;
    }
    return r;
}

static bool isRealFilePath(const std::string& path) {
    return !path.empty() && path[0] == '/';
}

static std::string readTextFile(const std::string& path) {
    std::ifstream in(path);
    if (!in) return "";
    std::ostringstream ss;
    ss << in.rdbuf();
    return trim(ss.str());
}

static bool isNumberString(const char* s) {
    if (!s || !*s) return false;
    for (const char* p = s; *p; ++p) {
        if (*p < '0' || *p > '9') return false;
    }
    return true;
}

static std::vector<pid_t> listThreads(pid_t pid) {
    std::vector<pid_t> tids;
    std::string path = "/proc/" + std::to_string(pid) + "/task";
    DIR* dir = opendir(path.c_str());
    if (!dir) {
        tids.push_back(pid);
        return tids;
    }

    while (dirent* ent = readdir(dir)) {
        if (isNumberString(ent->d_name)) {
            tids.push_back(static_cast<pid_t>(std::atoi(ent->d_name)));
        }
    }
    closedir(dir);

    if (tids.empty()) tids.push_back(pid);
    std::sort(tids.begin(), tids.end());
    return tids;
}

static std::string processName(pid_t pid) {
    std::string name = readTextFile("/proc/" + std::to_string(pid) + "/comm");
    return name.empty() ? ("pid_" + std::to_string(pid)) : name;
}

class ProcessMaps {
public:
    explicit ProcessMaps(pid_t pid) {
        std::ifstream in("/proc/" + std::to_string(pid) + "/maps");
        if (!in) {
            throw std::runtime_error("cannot open /proc/" + std::to_string(pid) + "/maps");
        }

        std::string line;
        while (std::getline(in, line)) {
            MapEntry e;
            std::string range, dev, inode;
            std::istringstream ss(line);
            ss >> range >> e.perms >> std::hex >> e.offset >> dev >> inode;
            std::string path;
            std::getline(ss, path);
            e.path = trim(path);

            std::size_t dash = range.find('-');
            if (dash == std::string::npos) continue;
            e.start = std::stoull(range.substr(0, dash), nullptr, 16);
            e.end = std::stoull(range.substr(dash + 1), nullptr, 16);
            entries_.push_back(e);
        }

        if (entries_.empty()) {
            throw std::runtime_error("/proc/" + std::to_string(pid) + "/maps is empty; this may be a kernel thread or a protected process");
        }
    }

    const MapEntry* find(std::uint64_t address) const {
        for (const auto& e : entries_) {
            if (address >= e.start && address < e.end) return &e;
        }
        return nullptr;
    }

private:
    std::vector<MapEntry> entries_;
};

class Symbolizer {
public:
    explicit Symbolizer(pid_t pid) : maps_(pid) {}

    std::string name(std::uint64_t address) {
        const MapEntry* m = maps_.find(address);
        if (!m) return hex64(address);

        std::string module = m->path.empty() ? "unknown" : baseName(m->path);
        std::uint64_t adjusted = (address - m->start) + m->offset;

        std::ostringstream key;
        key << m->path << ":" << std::hex << address << ":" << adjusted;
        auto it = cache_.find(key.str());
        if (it != cache_.end()) return it->second;

        std::string result;
        if (isRealFilePath(m->path)) {
            // Shared libraries and PIE executables are usually resolved by adjusted address.
            // Some non-PIE executables may resolve by absolute runtime address.
            result = addr2line(m->path, adjusted);
            if (result.empty()) result = addr2line(m->path, address);
        }

        if (result.empty()) {
            result = module + "+" + hex64(adjusted);
        } else {
            result += "\n" + module + "+" + hex64(adjusted);
        }

        cache_[key.str()] = result;
        return result;
    }

private:
    ProcessMaps maps_;
    std::unordered_map<std::string, std::string> cache_;

    static std::string addr2line(const std::string& file, std::uint64_t address) {
        std::ostringstream cmd;
        cmd << "addr2line -f -C -e " << shellQuote(file) << " " << hex64(address) << " 2>/dev/null";

        FILE* pipe = popen(cmd.str().c_str(), "r");
        if (!pipe) return "";

        char buffer[4096];
        std::string functionName;
        std::string location;

        if (fgets(buffer, sizeof(buffer), pipe)) functionName = trim(buffer);
        if (fgets(buffer, sizeof(buffer), pipe)) location = trim(buffer);

        int rc = pclose(pipe);
        (void)rc;

        if (functionName.empty() || functionName == "??") return "";
        if (!location.empty() && location != "??:0") {
            return functionName + "\n" + location;
        }
        return functionName;
    }
};

class CallGraph {
public:
    void addStack(const std::vector<std::string>& rootToLeaf) {
        if (rootToLeaf.empty()) return;

        for (const auto& n : rootToLeaf) nodeHits_[n]++;
        for (std::size_t i = 1; i < rootToLeaf.size(); ++i) {
            edges_[{rootToLeaf[i - 1], rootToLeaf[i]}]++;
        }
    }

    void writeDot(const std::string& fileName, const std::string& title) const {
        std::ofstream out(fileName);
        if (!out) throw std::runtime_error("cannot write " + fileName);

        out << "digraph ProcessCallGraph {\n";
        out << "  rankdir=TB;\n";
        out << "  label=\"" << dotEscape(title) << "\";\n";
        out << "  labelloc=t;\n";
        out << "  graph [fontname=\"Arial\"];\n";
        out << "  node [shape=box, style=rounded, fontname=\"Arial\"];\n";
        out << "  edge [fontname=\"Arial\"];\n\n";

        for (const auto& [name, hits] : nodeHits_) {
            out << "  \"" << dotEscape(name) << "\" [label=\"" << dotEscape(name)
                << "\\nhits: " << hits << "\"];\n";
        }

        out << "\n";
        for (const auto& [edge, count] : edges_) {
            out << "  \"" << dotEscape(edge.first) << "\" -> \""
                << dotEscape(edge.second) << "\" [label=\"" << count << "\"];\n";
        }

        out << "}\n";
    }

    bool empty() const {
        return nodeHits_.empty();
    }

private:
    std::map<std::string, int> nodeHits_;
    std::map<std::pair<std::string, std::string>, int> edges_;
};

static bool readProcessMemory(pid_t memoryPid, pid_t tracedTid, std::uint64_t remoteAddress, void* localBuffer, std::size_t size) {
    iovec local{};
    local.iov_base = localBuffer;
    local.iov_len = size;

    iovec remote{};
    remote.iov_base = reinterpret_cast<void*>(remoteAddress);
    remote.iov_len = size;

    ssize_t n = process_vm_readv(memoryPid, &local, 1, &remote, 1, 0);
    if (n == static_cast<ssize_t>(size)) return true;

    // Fallback: read word-by-word using ptrace from the traced thread.
    std::uint8_t* dst = static_cast<std::uint8_t*>(localBuffer);
    std::size_t copied = 0;

    while (copied < size) {
        errno = 0;
        long word = ptrace(PTRACE_PEEKDATA, tracedTid, reinterpret_cast<void*>(remoteAddress + copied), nullptr);
        if (errno != 0) return false;

        std::size_t chunk = std::min(sizeof(word), size - copied);
        std::memcpy(dst + copied, &word, chunk);
        copied += chunk;
    }

    return true;
}

class TraceAttach {
public:
    explicit TraceAttach(pid_t tid) : tid_(tid) {
        if (ptrace(PTRACE_ATTACH, tid_, nullptr, nullptr) == -1) {
            throw std::runtime_error("PTRACE_ATTACH failed for tid " + std::to_string(tid_) + ": " + std::string(std::strerror(errno)));
        }

        int status = 0;
        if (waitpid(tid_, &status, 0) == -1) {
            throw std::runtime_error("waitpid failed after attach: " + std::string(std::strerror(errno)));
        }

        attached_ = true;
    }

    ~TraceAttach() {
        if (attached_) {
            ptrace(PTRACE_DETACH, tid_, nullptr, nullptr);
        }
    }

    TraceAttach(const TraceAttach&) = delete;
    TraceAttach& operator=(const TraceAttach&) = delete;

private:
    pid_t tid_;
    bool attached_ = false;
};

static std::vector<std::uint64_t> captureAddresses(pid_t memoryPid, pid_t tid) {
    user_regs_struct regs{};
    if (ptrace(PTRACE_GETREGS, tid, nullptr, &regs) == -1) {
        throw std::runtime_error("PTRACE_GETREGS failed for tid " + std::to_string(tid) + ": " + std::string(std::strerror(errno)));
    }

    std::vector<std::uint64_t> addresses;
    addresses.push_back(static_cast<std::uint64_t>(regs.rip));

    std::uint64_t rbp = static_cast<std::uint64_t>(regs.rbp);
    constexpr int MAX_DEPTH = 96;
    constexpr std::uint64_t MAX_FRAME_DISTANCE = 4ULL * 1024ULL * 1024ULL;

    for (int depth = 0; depth < MAX_DEPTH && rbp != 0; ++depth) {
        Frame frame{};
        if (!readProcessMemory(memoryPid, tid, rbp, &frame, sizeof(frame))) break;
        if (frame.returnAddress == 0) break;

        addresses.push_back(frame.returnAddress);

        if (frame.previousRbp <= rbp) break;
        if (frame.previousRbp - rbp > MAX_FRAME_DISTANCE) break;

        rbp = frame.previousRbp;
    }

    return addresses;
}

static std::vector<std::string> sampleThreadStack(pid_t processPid, pid_t tid, Symbolizer& symbolizer, bool labelThread) {
    TraceAttach attach(tid);

    std::vector<std::uint64_t> addresses = captureAddresses(processPid, tid);
    std::vector<std::string> names;
    names.reserve(addresses.size() + 1);

    for (std::uint64_t address : addresses) {
        names.push_back(symbolizer.name(address));
    }

    // We collected leaf -> root. DOT graph should be root -> leaf.
    std::reverse(names.begin(), names.end());

    if (labelThread && !names.empty()) {
        names.insert(names.begin(), "thread tid=" + std::to_string(tid));
    }

    return names;
}

static int toInt(const char* s, int fallback) {
    if (!s) return fallback;
    char* end = nullptr;
    long v = std::strtol(s, &end, 10);
    if (end == s || *end != '\0' || v <= 0) return fallback;
    return static_cast<int>(v);
}

static void printUsage(const char* argv0) {
    std::cerr
        << "Usage:\n"
        << "  sudo " << argv0 << " <pid> [samples=20] [delay_ms=100] [out=callgraph.dot] [--threads]\n\n"
        << "Examples:\n"
        << "  sudo " << argv0 << " 1 10 200 systemd.dot\n"
        << "  sudo " << argv0 << " 1234 40 100 app.dot --threads\n"
        << "  dot -Tpng systemd.dot -o systemd.png\n\n"
        << "Target programs with frame pointers give better stacks:\n"
        << "  g++ -O0 -g -fno-omit-frame-pointer -rdynamic target.cpp -o target\n";
}

static void printPermissionHints() {
    std::cerr << "\nHints for system processes:\n";
    std::cerr << "  1) Run as root: sudo ./sys_proc_callgraph <pid>\n";
    std::cerr << "  2) Check Yama ptrace policy: cat /proc/sys/kernel/yama/ptrace_scope\n";
    std::cerr << "  3) ptrace_scope=3 disables attach until reboot on many systems.\n";
    std::cerr << "  4) Kernel threads like [kworker/...], [rcu_...] are not normal user-space targets.\n";
}

int main(int argc, char** argv) {
    if (argc < 2 || std::string(argv[1]) == "-h" || std::string(argv[1]) == "--help") {
        printUsage(argv[0]);
        return argc < 2 ? 1 : 0;
    }

    bool allThreads = false;
    for (int i = 1; i < argc; ++i) {
        if (std::string(argv[i]) == "--threads") allThreads = true;
    }

    pid_t pid = static_cast<pid_t>(toInt(argv[1], -1));
    int samples = argc > 2 && std::string(argv[2]).rfind("--", 0) != 0 ? toInt(argv[2], 20) : 20;
    int delayMs = argc > 3 && std::string(argv[3]).rfind("--", 0) != 0 ? toInt(argv[3], 100) : 100;
    std::string outFile = argc > 4 && std::string(argv[4]).rfind("--", 0) != 0 ? argv[4] : "callgraph.dot";

    if (pid <= 0) {
        std::cerr << "Invalid PID\n";
        return 1;
    }

    std::string yama = readTextFile("/proc/sys/kernel/yama/ptrace_scope");
    std::cerr << "target pid=" << pid << " name=" << processName(pid)
              << " samples=" << samples << " delay_ms=" << delayMs
              << " mode=" << (allThreads ? "all threads" : "main thread") << "\n";
    if (!yama.empty()) {
        std::cerr << "kernel.yama.ptrace_scope=" << yama << "\n";
    }

    CallGraph graph;

    try {
        Symbolizer symbolizer(pid);

        for (int i = 0; i < samples; ++i) {
            std::vector<pid_t> tids = allThreads ? listThreads(pid) : std::vector<pid_t>{pid};
            int collected = 0;

            for (pid_t tid : tids) {
                try {
                    std::vector<std::string> stack = sampleThreadStack(pid, tid, symbolizer, allThreads);
                    graph.addStack(stack);
                    collected++;
                } catch (const std::exception& e) {
                    std::cerr << "skip tid " << tid << ": " << e.what() << "\n";
                }
            }

            std::cerr << "sample " << (i + 1) << "/" << samples
                      << ": threads_collected=" << collected << "/" << (allThreads ? listThreads(pid).size() : 1) << "\n";

            if (i + 1 < samples) usleep(static_cast<useconds_t>(delayMs) * 1000);
        }

        if (graph.empty()) {
            std::cerr << "No stack frames collected.\n";
            printPermissionHints();
            return 1;
        }

        graph.writeDot(outFile, "Runtime call graph for pid " + std::to_string(pid) + " (" + processName(pid) + ")");
        std::cout << "Call graph saved to " << outFile << "\n";
        std::cout << "Render example: dot -Tpng " << outFile << " -o callgraph.png\n";
        return 0;
    } catch (const std::exception& e) {
        std::cerr << "Error: " << e.what() << "\n";
        printPermissionHints();
        return 1;
    }
}