single root input/output virtualization (SR-IOV) is PCIe specifications that provide virtual function (in this context, the usual PCIe function would be physical function). virtual function is used VM for better performance. It allows flow the data like physical function with limited capabilities.

Finding SRIOV capabilities space Link to heading

 897 int pci_iov_init(struct pci_dev *dev)
 898 {
 899     int pos;
 900
 901     if (!pci_is_pcie(dev))
 902         return -ENODEV;
 903
 904     pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
 905     if (pos)
 906         return sriov_init(dev, pos);
 907
 908     return -ENODEV;
 909 }

pos is found by pci_find_ext_capability which calls pci_find_next_ext_capability to get the extended configuration capabilities with SRIOV ID. Note the comare at line 566.

 540 u16 pci_find_next_ext_capability(struct pci_dev *dev, u16 start, int cap)
 541 {
...
...
 565     while (ttl-- > 0) {
 566         if (PCI_EXT_CAP_ID(header) == cap && pos != start)
 567             return pos;
 568
 569         pos = PCI_EXT_CAP_NEXT(header);
 570         if (pos < PCI_CFG_SPACE_SIZE)
 571             break;
 572
 573         if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL)
 574             break;
 575     }
 576

Once we have the position, The next phase is reading SRIOV configuration space in sriov_init

iov initialization Link to heading

The configuration space for SRIOV as follows Example image

insriov_init, The first things is getting some parameter above.

 768
 769     pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);
 770     if (!total)
 771         return 0;
 772
 773     pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);
 774     i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
 775     pgsz &= ~((1 << i) - 1);
 776     if (!pgsz)
 777         return -EIO;
 778
 779     pgsz &= ~(pgsz - 1);
 780     pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);

Next step is parsing the virtual function BAR(base address register) and store info in iov

 786     nres = 0;
 787     for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
 788         res = &dev->resource[i + PCI_IOV_RESOURCES];
 789         /*
 790          * If it is already FIXED, don't change it, something
 791          * (perhaps EA or header fixups) wants it this way.
 792          */
 793         if (res->flags & IORESOURCE_PCI_FIXED)
 794             bar64 = (res->flags & IORESOURCE_MEM_64) ? 1 : 0;
 795         else
 796             bar64 = __pci_read_base(dev, pci_bar_unknown, res,
 797                         pos + PCI_SRIOV_BAR + i * 4);
 798         if (!res->flags)
 799             continue;
 800         if (resource_size(res) & (PAGE_SIZE - 1)) {
 801             rc = -EIO;
 802             goto failed;
 803         }
 804         iov->barsz[i] = resource_size(res);
 805         res->end = res->start + resource_size(res) * total - 1;
 806         pci_info(dev, "VF(n) BAR%d space: %pR (contains BAR%d for %d VFs)\n",
 807              i, res, i, total);
 808         i += bar64;
 809         nres++;
 810     }

once we have all the info in iov, iov is set to dev->sriov

 812     iov->pos = pos;
 813     iov->nres = nres;
 814     iov->ctrl = ctrl;
 815     iov->total_VFs = total;
 816     iov->driver_max_VFs = total;
 817     pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &iov->vf_device);
 818     iov->pgsz = pgsz;
 819     iov->self = dev;
 820     iov->drivers_autoprobe = true;
 821     pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
 822     pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
 823     if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END)
 824         iov->link = PCI_DEVFN(PCI_SLOT(dev->devfn), iov->link);
 825
 826     if (pdev)
 827         iov->dev = pci_dev_get(pdev);
 828     else
 829         iov->dev = dev;
 830
 831     dev->sriov = iov;
 832     dev->is_physfn = 1;
 833     rc = compute_max_vf_buses(dev);
 834     if (rc)
 835         goto fail_max_buses;
 836