This is a quick write-up about Qemu DOE emulation. DOE is part of PCIe and CXL extended capabilities. Technically, It’s optional but is important part of IDE starting gen5.

Starting with cxl_usp_read_config is one of the places where pcie_doe_read_config is called. It seems it’s only supported with CXL which kinda matches what kernel implements. If it’s DOE extended address, it will fall back to pci_default_read_config.

static uint32_t cxl_usp_read_config(PCIDevice *d, uint32_t address, int len)
{
    CXLUpstreamPort *usp = CXL_USP(d);
    uint32_t val;

    if (pcie_doe_read_config(&usp->doe_cdat, address, len, &val)) {
        return val;
    }

    return pci_default_read_config(d, address, len);
}

pcie_doe_read_config reads the fields when address matches the DW in DOW extended configuration address space. Other than configuration access, The important thing here is reading/writing from read_mbox

bool pcie_doe_read_config(DOECap *doe_cap, uint32_t addr, int size,
                          uint32_t *buf)
{
    uint16_t doe_offset = doe_cap->offset;


    addr -= doe_offset;
    *buf = 0;

    if (range_covers_byte(PCI_EXP_DOE_CAP, DWORD_BYTE, addr)) {
        *buf = FIELD_DP32(*buf, PCI_DOE_CAP_REG, INTR_SUPP,
                          doe_cap->cap.intr);
        *buf = FIELD_DP32(*buf, PCI_DOE_CAP_REG, DOE_INTR_MSG_NUM,
                          doe_cap->cap.vec);
    } else if (range_covers_byte(PCI_EXP_DOE_CTRL, DWORD_BYTE, addr)) {
        /* Must return ABORT=0 and GO=0 */
        *buf = FIELD_DP32(*buf, PCI_DOE_CAP_CONTROL, DOE_INTR_EN,
                          doe_cap->ctrl.intr);
    } else if (range_covers_byte(PCI_EXP_DOE_STATUS, DWORD_BYTE, addr)) {
        *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DOE_BUSY,
                          doe_cap->status.busy);
        *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DOE_INTR_STATUS,
                          doe_cap->status.intr);
        *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DOE_ERROR,
                          doe_cap->status.error);
        *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DATA_OBJ_RDY,
                          doe_cap->status.ready);
    /* Mailbox should be DW accessed */
    } else if (addr == PCI_EXP_DOE_RD_DATA_MBOX && size == DWORD_BYTE) {
        if (doe_cap->status.ready && !doe_cap->status.error) {
            *buf = doe_cap->read_mbox[doe_cap->read_mbox_idx];
        }
    }

  ...
}

It’s probably good idea to start from the top. pcie_doe_init adds capability and creates mailboxes. It’s called from cxl_usp_realize.

void pcie_doe_init(PCIDevice *dev, DOECap *doe_cap, uint16_t offset,
                   DOEProtocol *protocols, bool intr, uint16_t vec)
{
    pcie_add_capability(dev, PCI_EXT_CAP_ID_DOE, 0x1, offset,
                        PCI_DOE_SIZEOF);

    doe_cap->write_mbox = g_malloc0(PCI_DOE_DW_SIZE_MAX * DWORD_BYTE);
    doe_cap->read_mbox = g_malloc0(PCI_DOE_DW_SIZE_MAX * DWORD_BYTE);

    pcie_doe_reset_mbox(doe_cap);
}

Back to post-init write, when sender (linux kernel) writes to control register, it calls some handlers to handle control bits change. Also, in case of RD mailbox, qemu sets the ready bit in control reg.

void pcie_doe_write_config(DOECap *doe_cap,
                           uint32_t addr, uint32_t val, int size)
{

    switch (addr) {
    case PCI_EXP_DOE_CTRL:
        if (FIELD_EX32(val, PCI_DOE_CAP_CONTROL, DOE_ABORT)) {
            pcie_doe_set_ready(doe_cap, 0);
            pcie_doe_set_error(doe_cap, 0);
            pcie_doe_reset_mbox(doe_cap);
            return;
        }

        if (FIELD_EX32(val, PCI_DOE_CAP_CONTROL, DOE_GO)) {
            pcie_doe_prepare_rsp(doe_cap);
        }
    ...
    ...
    ...
    case PCI_EXP_DOE_RD_DATA_MBOX:
        /* Mailbox should be DW accessed */
        if (size != DWORD_BYTE) {
            return;
        }
        doe_cap->read_mbox_idx++;
        if (doe_cap->read_mbox_idx == doe_cap->read_mbox_len) {
            pcie_doe_reset_mbox(doe_cap);
            pcie_doe_set_ready(doe_cap, 0);
        } else if (doe_cap->read_mbox_idx > doe_cap->read_mbox_len) {
            /* Underflow */
            pcie_doe_set_error(doe_cap, 1);
        }
        break;
    case PCI_EXP_DOE_WR_DATA_MBOX:
        /* Mailbox should be DW accessed */
        if (size != DWORD_BYTE) {
            return;
        }
        doe_cap->write_mbox[doe_cap->write_mbox_len] = val;
        doe_cap->write_mbox_len++;
        break;

handle_request is set depending on the protocol. In case of discovery request is sent, pcie_doe_discovery is called

static void pcie_doe_prepare_rsp(DOECap *doe_cap)
{

    if (doe_cap->write_mbox[0] ==
        DATA_OBJ_BUILD_HEADER1(PCI_VENDOR_ID_PCI_SIG, PCI_SIG_DOE_DISCOVERY)) {
        handle_request = pcie_doe_discovery;
    } else {
        for (p = 0; p < doe_cap->protocol_num - 1; p++) {
            if (doe_cap->write_mbox[0] ==
                pcie_doe_build_protocol(&doe_cap->protocols[p])) {
                handle_request = doe_cap->protocols[p].handle_request;
                break;
            }
        }
    }

   ...
   ...
    if (handle_request && (doe_cap->write_mbox_len ==
        pcie_doe_get_obj_len(pcie_doe_get_write_mbox_ptr(doe_cap)))) {
        success = handle_request(doe_cap);
    }

    if (success) {
        pcie_doe_set_ready(doe_cap, 1);
    } else {
        pcie_doe_reset_mbox(doe_cap);
    }
}

In pcie_doe_discovery, response is created(including setting next index) and call pcie_doe_set_rsp

static bool pcie_doe_discovery(DOECap *doe_cap)
{
    ...
    ...
    rsp.header = (DOEHeader) {
        .vendor_id = PCI_VENDOR_ID_PCI_SIG,
        .data_obj_type = PCI_SIG_DOE_DISCOVERY,
        .length = DIV_ROUND_UP(sizeof(DoeDiscoveryRsp), DWORD_BYTE),
    };

    if (index == 0) {
        rsp.vendor_id = PCI_VENDOR_ID_PCI_SIG;
        rsp.data_obj_type = PCI_SIG_DOE_DISCOVERY;
    } else {
        if (index < doe_cap->protocol_num) {
            prot = &doe_cap->protocols[index - 1];
            rsp.vendor_id = prot->vendor_id;
            rsp.data_obj_type = prot->data_obj_type;
        } else {
            rsp.vendor_id = 0xFFFF;
            rsp.data_obj_type = 0xFF;
        }
    }

    if (index + 1 == doe_cap->protocol_num) {
        rsp.next_index = 0;
    } else {
        rsp.next_index = index + 1;
    }

    pcie_doe_set_rsp(doe_cap, &rsp);

    return true;
}

pcie_doe_set_rsp just puts the response in read_mbox

void pcie_doe_set_rsp(DOECap *doe_cap, void *rsp)
{
    uint32_t len = pcie_doe_get_obj_len(rsp);

    memcpy(doe_cap->read_mbox + doe_cap->read_mbox_len, rsp, len * DWORD_BYTE);
    doe_cap->read_mbox_len += len;
}