HFI1: port to IFS 10.7 rpv1 and support queued_spin_lock in Linux 3.10.0-693.11.6

This commit is contained in:
Balazs Gerofi
2018-04-09 22:21:45 +09:00
parent af22ce62d2
commit 8164b63fc2
11 changed files with 110 additions and 75 deletions

View File

@ -1,40 +1,40 @@
struct hfi1_ctxtdata {
union {
char whole_struct[1456];
char whole_struct[1408];
struct {
char padding0[144];
unsigned int ctxt;
u16 ctxt;
};
struct {
char padding1[172];
char padding1[168];
u32 rcv_array_groups;
};
struct {
char padding2[176];
char padding2[172];
u32 eager_base;
};
struct {
char padding3[180];
char padding3[176];
u32 expected_count;
};
struct {
char padding4[184];
char padding4[180];
u32 expected_base;
};
struct {
char padding5[192];
char padding5[184];
struct exp_tid_set tid_group_list;
};
struct {
char padding6[216];
char padding6[208];
struct exp_tid_set tid_used_list;
};
struct {
char padding7[240];
char padding7[232];
struct exp_tid_set tid_full_list;
};
struct {
char padding8[432];
char padding8[392];
struct hfi1_devdata *dd;
};
};

View File

@ -1,64 +1,64 @@
struct hfi1_devdata {
union {
char whole_struct[7232];
char whole_struct[7872];
struct {
char padding0[2984];
char padding0[3368];
u8 *kregbase1;
};
struct {
char padding1[2992];
char padding1[3376];
resource_size_t physaddr;
};
struct {
char padding2[3320];
char padding2[3704];
u64 default_desc1;
};
struct {
char padding3[3352];
char padding3[3736];
dma_addr_t sdma_pad_phys;
};
struct {
char padding4[3376];
char padding4[3760];
struct sdma_engine *per_sdma;
};
struct {
char padding5[3384];
char padding5[3768];
struct sdma_vl_map *sdma_map;
};
struct {
char padding6[3432];
char padding6[3816];
void *piobase;
};
struct {
char padding7[3440];
char padding7[3824];
void *rcvarray_wc;
};
struct {
char padding8[3648];
char padding8[4040];
long unsigned int *events;
};
struct {
char padding9[3684];
char padding9[4076];
u32 chip_rcv_contexts;
};
struct {
char padding10[3688];
char padding10[4080];
u32 chip_rcv_array_count;
};
struct {
char padding11[6872];
char padding11[7392];
struct hfi1_pportdata *pport;
};
struct {
char padding12[6896];
char padding12[7416];
u16 flags;
};
struct {
char padding13[6899];
u8 first_user_ctxt;
char padding13[7419];
u8 first_dyn_alloc_ctxt;
};
struct {
char padding14[6920];
char padding14[7432];
u64 sc2vl[4];
};
};

View File

@ -0,0 +1,29 @@
struct hfi1_user_sdma_pkt_q {
union {
char whole_struct[352];
struct {
char padding0[4];
u16 n_max_reqs;
};
struct {
char padding1[8];
atomic_t n_reqs;
};
struct {
char padding2[16];
struct hfi1_devdata *dd;
};
struct {
char padding3[32];
struct user_sdma_request *reqs;
};
struct {
char padding4[40];
long unsigned int *req_in_use;
};
struct {
char padding5[288];
unsigned int state;
};
};
};

View File

@ -1,8 +1,8 @@
struct hfi1_pportdata {
union {
char whole_struct[12544];
char whole_struct[12928];
struct {
char padding0[1907];
char padding0[2113];
u8 vls_operational;
};
};

View File

@ -88,19 +88,39 @@
#define atomic_t ihk_atomic_t
typedef ihk_spinlock_t spinlock_t;
/* From: kernel-xppsl_1.5.2/include/linux/irqsave.h */
#define spin_lock_irqsave(lock, flags) \
/*
* Linux queued_spin_lock compatible spin_lock, without the queue.
* We use _Q_PENDING_VAL as locked value to make sure no Linux cores
* enter the queue phase in queued_spin_lock_slowpath().
*/
#define _Q_LOCKED_OFFSET 0
#define _Q_LOCKED_BITS 8
#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET)
#define linux_spin_lock_irqsave(lock, flags) \
do { \
flags = ihk_mc_spinlock_lock(lock); \
uint32_t val; \
flags = cpu_disable_interrupt_save(); \
do { \
val = atomic_cmpxchg4( \
(unsigned int *)lock, 0, \
_Q_PENDING_VAL); \
if (val == 0) \
break; \
cpu_pause(); \
} \
while (1); \
} while (0)
#define spin_unlock_irqrestore(lock, flags) \
#define linux_spin_unlock_irqrestore(lock, flags) \
do { \
ihk_mc_spinlock_unlock(lock, flags); \
ihk_atomic_set((ihk_atomic_t *)lock, 0); \
cpu_restore_interrupt(flags); \
} while (0)
#define spin_lock ihk_mc_spinlock_lock_noirq
#define spin_unlock ihk_mc_spinlock_unlock_noirq
/*****************************************************/
#define ____cacheline_aligned_in_smp __attribute__((aligned(64)))

View File

@ -386,11 +386,9 @@ static inline int sdma_running(struct sdma_engine *engine)
unsigned long flags;
int ret;
hfi1_cdbg(AIOWRITE, "+");
spin_lock_irqsave(&engine->tail_lock, flags);
linux_spin_lock_irqsave(&engine->tail_lock, flags);
ret = __sdma_running(engine);
spin_unlock_irqrestore(&engine->tail_lock, flags);
hfi1_cdbg(AIOWRITE, "-");
linux_spin_unlock_irqrestore(&engine->tail_lock, flags);
return ret;
}

View File

@ -114,26 +114,7 @@ extern uint extended_psn;
#define KDETH_OM_LARGE_SHIFT 6
#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
/* The original size on Linux is 376 B */
struct hfi1_user_sdma_pkt_q {
struct list_head list;
unsigned ctxt;
u16 subctxt;
u16 n_max_reqs;
atomic_t n_reqs;
u16 reqidx;
struct hfi1_devdata *dd;
void *txreq_cache; // struct kmem_cache *txreq_cache;
struct user_sdma_request *reqs;
unsigned long *req_in_use;
struct iowait busy;
unsigned state;
wait_queue_head_t wait;
unsigned long unpinned;
void *handler; // struct mmu_rb_handler *handler;
atomic_t n_locked;
void *mm; // struct mm_struct *mm;
};
#include <hfi1/hfi1_generated_hfi1_user_sdma_pkt_q.h>
struct hfi1_user_sdma_comp_q {
u16 nentries;

View File

@ -37,7 +37,7 @@ HFI1_KO="${1-$(modinfo -n hfi1)}" || \
"$DES_BIN" "$HFI1_KO" hfi1_devdata \
per_sdma sdma_pad_phys sdma_map pport chip_rcv_array_count \
kregbase1 piobase physaddr rcvarray_wc default_desc1 flags \
sc2vl events first_user_ctxt chip_rcv_contexts \
sc2vl events first_dyn_alloc_ctxt chip_rcv_contexts \
> "${HDR_PREFIX}devdata.h"
"$DES_BIN" "$HFI1_KO" hfi1_filedata \
@ -65,3 +65,7 @@ HFI1_KO="${1-$(modinfo -n hfi1)}" || \
"$DES_BIN" "$HFI1_KO" user_sdma_txreq \
hdr txreq list req flags busycount seqnum \
> "${HDR_PREFIX}user_sdma_txreq.h"
"$DES_BIN" "$HFI1_KO" hfi1_user_sdma_pkt_q \
dd req_in_use reqs n_reqs state n_max_reqs \
> "${HDR_PREFIX}hfi1_user_sdma_pkt_q.h"

View File

@ -347,7 +347,7 @@ int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait,
u32 submit_count = 0, flush_count = 0, total_count;
retry_lock:
spin_lock_irqsave(&sde->tail_lock, flags);
linux_spin_lock_irqsave(&sde->tail_lock, flags);
retry:
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
tx->wait = iowait_ioww_to_iow(wait);
@ -378,7 +378,7 @@ update_tail:
iowait_sdma_add(iowait_ioww_to_iow(wait), total_count);
if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail);
spin_unlock_irqrestore(&sde->tail_lock, flags);
linux_spin_unlock_irqrestore(&sde->tail_lock, flags);
*count_out = total_count;
return ret;
@ -404,7 +404,7 @@ nodesc:
}
dkprintf("%s: releasing lock and reiterating.. \n", __FUNCTION__);
spin_unlock_irqrestore(&sde->tail_lock, flags);
linux_spin_unlock_irqrestore(&sde->tail_lock, flags);
cpu_pause();
ret = 0;
goto retry_lock;

View File

@ -429,7 +429,7 @@ int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd, struct hfi1_tid_info *ti
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
unsigned long *ev = uctxt->dd->events +
(((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
(((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + fd->subctxt);
int ret = 0;
@ -764,7 +764,7 @@ static int tid_rb_invalidate(struct hfi1_filedata *fdata,
* process in question.
*/
ev = uctxt->dd->events +
(((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
(((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
}

View File

@ -87,7 +87,7 @@ struct user_sdma_iovec {
* Physical address corresponding to the page that contains
* iov.iov_base and the corresponding page size.
*/
unsigned base_pgsize;
unsigned int base_pgsize;
unsigned long base_phys;
#endif
/*
@ -552,6 +552,7 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
int req_queued = 0;
u16 dlid;
u32 selector;
unsigned long size_info = sizeof(info);
struct kmalloc_cache_header *txreq_cache =
&cpu_local_var(txreq_cache);
@ -561,10 +562,10 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
SDMA,
"[%u:%u:%u] First vector not big enough for header %lu/%lu",
dd->unit, uctxt->ctxt, fd->subctxt,
iovec[idx].iov_len, sizeof(info) + sizeof(req->hdr));
iovec[idx].iov_len, size_info + sizeof(req->hdr));
return -EINVAL;
}
ret = copy_from_user(&info, iovec[idx].iov_base, sizeof(info));
ret = copy_from_user(&info, iovec[idx].iov_base, size_info);
if (ret) {
hfi1_cdbg(SDMA, "[%u:%u:%u] Failed to copy info QW (%d)",
dd->unit, uctxt->ctxt, fd->subctxt, ret);
@ -600,6 +601,7 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
return -EINVAL;
}
/* Try to claim the request. */
if (test_and_set_bit(info.comp_idx, pq->req_in_use)) {
hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in use",
@ -611,8 +613,8 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
/*
* All safety checks have been done and this request has been claimed.
*/
hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit,
uctxt->ctxt, fd->subctxt, info.comp_idx);
//trace_hfi1_sdma_user_process_request(dd, uctxt->ctxt, fd->subctxt,
// info.comp_idx);
req = pq->reqs + info.comp_idx;
req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
req->data_len = 0;
@ -631,7 +633,7 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
INIT_LIST_HEAD(&req->txps);
fast_memcpy(&req->info, &info, sizeof(info));
fast_memcpy(&req->info, &info, size_info);
if (req_opcode(info.ctrl) == EXPECTED) {
/* expected must have a TID info and at least one data vector */
@ -651,7 +653,7 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
goto free_req;
}
/* Copy the header from the user buffer */
ret = copy_from_user(&req->hdr, iovec[idx].iov_base + sizeof(info),
ret = copy_from_user(&req->hdr, iovec[idx].iov_base + size_info,
sizeof(req->hdr));
if (ret) {
SDMA_DBG(req, "Failed to copy header template (%d)", ret);
@ -841,6 +843,7 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
atomic_inc(&pq->n_reqs);
req_queued = 1;
/* Send the first N packets in the request to buy us some time */
ret = user_sdma_send_pkts(req, pcount, txreq_cache);
if (unlikely(ret < 0 && ret != -EBUSY)) {