]> git.karo-electronics.de Git - karo-tx-linux.git/blob - Documentation/mic/mpssd/mpssd.c
Sample Implementation of Intel MIC User Space Daemon.
[karo-tx-linux.git] / Documentation / mic / mpssd / mpssd.c
1 /*
2  * Intel MIC Platform Software Stack (MPSS)
3  *
4  * Copyright(c) 2013 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License, version 2, as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * General Public License for more details.
14  *
15  * The full GNU General Public License is included in this distribution in
16  * the file called "COPYING".
17  *
18  * Intel MIC User Space Tools.
19  */
20
21 #define _GNU_SOURCE
22
23 #include <stdlib.h>
24 #include <fcntl.h>
25 #include <getopt.h>
26 #include <assert.h>
27 #include <unistd.h>
28 #include <stdbool.h>
29 #include <signal.h>
30 #include <poll.h>
31 #include <features.h>
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/mman.h>
35 #include <sys/socket.h>
36 #include <linux/virtio_ring.h>
37 #include <linux/virtio_net.h>
38 #include <linux/virtio_console.h>
39 #include <linux/virtio_blk.h>
40 #include <linux/version.h>
41 #include "mpssd.h"
42 #include <linux/mic_ioctl.h>
43 #include <linux/mic_common.h>
44
45 static void init_mic(struct mic_info *mic);
46
47 static FILE *logfp;
48 static struct mic_info mic_list;
49
50 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
51
52 #define min_t(type, x, y) ({                            \
53                 type __min1 = (x);                      \
54                 type __min2 = (y);                      \
55                 __min1 < __min2 ? __min1 : __min2; })
56
57 /* align addr on a size boundary - adjust address up/down if needed */
58 #define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
59 #define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
60
61 /* align addr on a size boundary - adjust address up if needed */
62 #define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
63
64 /* to align the pointer to the (next) page boundary */
65 #define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
66
67 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
68
69 #define GSO_ENABLED             1
70 #define MAX_GSO_SIZE            (64 * 1024)
71 #define ETH_H_LEN               14
72 #define MAX_NET_PKT_SIZE        (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
73 #define MIC_DEVICE_PAGE_END     0x1000
74
75 #ifndef VIRTIO_NET_HDR_F_DATA_VALID
76 #define VIRTIO_NET_HDR_F_DATA_VALID     2       /* Csum is valid */
77 #endif
78
79 static struct {
80         struct mic_device_desc dd;
81         struct mic_vqconfig vqconfig[2];
82         __u32 host_features, guest_acknowledgements;
83         struct virtio_console_config cons_config;
84 } virtcons_dev_page = {
85         .dd = {
86                 .type = VIRTIO_ID_CONSOLE,
87                 .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
88                 .feature_len = sizeof(virtcons_dev_page.host_features),
89                 .config_len = sizeof(virtcons_dev_page.cons_config),
90         },
91         .vqconfig[0] = {
92                 .num = htole16(MIC_VRING_ENTRIES),
93         },
94         .vqconfig[1] = {
95                 .num = htole16(MIC_VRING_ENTRIES),
96         },
97 };
98
99 static struct {
100         struct mic_device_desc dd;
101         struct mic_vqconfig vqconfig[2];
102         __u32 host_features, guest_acknowledgements;
103         struct virtio_net_config net_config;
104 } virtnet_dev_page = {
105         .dd = {
106                 .type = VIRTIO_ID_NET,
107                 .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
108                 .feature_len = sizeof(virtnet_dev_page.host_features),
109                 .config_len = sizeof(virtnet_dev_page.net_config),
110         },
111         .vqconfig[0] = {
112                 .num = htole16(MIC_VRING_ENTRIES),
113         },
114         .vqconfig[1] = {
115                 .num = htole16(MIC_VRING_ENTRIES),
116         },
117 #if GSO_ENABLED
118                 .host_features = htole32(
119                 1 << VIRTIO_NET_F_CSUM |
120                 1 << VIRTIO_NET_F_GSO |
121                 1 << VIRTIO_NET_F_GUEST_TSO4 |
122                 1 << VIRTIO_NET_F_GUEST_TSO6 |
123                 1 << VIRTIO_NET_F_GUEST_ECN |
124                 1 << VIRTIO_NET_F_GUEST_UFO),
125 #else
126                 .host_features = 0,
127 #endif
128 };
129
130 static const char *mic_config_dir = "/etc/sysconfig/mic";
131 static const char *virtblk_backend = "VIRTBLK_BACKEND";
132 static struct {
133         struct mic_device_desc dd;
134         struct mic_vqconfig vqconfig[1];
135         __u32 host_features, guest_acknowledgements;
136         struct virtio_blk_config blk_config;
137 } virtblk_dev_page = {
138         .dd = {
139                 .type = VIRTIO_ID_BLOCK,
140                 .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
141                 .feature_len = sizeof(virtblk_dev_page.host_features),
142                 .config_len = sizeof(virtblk_dev_page.blk_config),
143         },
144         .vqconfig[0] = {
145                 .num = htole16(MIC_VRING_ENTRIES),
146         },
147         .host_features =
148                 htole32(1<<VIRTIO_BLK_F_SEG_MAX),
149         .blk_config = {
150                 .seg_max = htole32(MIC_VRING_ENTRIES - 2),
151                 .capacity = htole64(0),
152          }
153 };
154
155 static char *myname;
156
157 static int
158 tap_configure(struct mic_info *mic, char *dev)
159 {
160         pid_t pid;
161         char *ifargv[7];
162         char ipaddr[IFNAMSIZ];
163         int ret = 0;
164
165         pid = fork();
166         if (pid == 0) {
167                 ifargv[0] = "ip";
168                 ifargv[1] = "link";
169                 ifargv[2] = "set";
170                 ifargv[3] = dev;
171                 ifargv[4] = "up";
172                 ifargv[5] = NULL;
173                 mpsslog("Configuring %s\n", dev);
174                 ret = execvp("ip", ifargv);
175                 if (ret < 0) {
176                         mpsslog("%s execvp failed errno %s\n",
177                                 mic->name, strerror(errno));
178                         return ret;
179                 }
180         }
181         if (pid < 0) {
182                 mpsslog("%s fork failed errno %s\n",
183                         mic->name, strerror(errno));
184                 return ret;
185         }
186
187         ret = waitpid(pid, NULL, 0);
188         if (ret < 0) {
189                 mpsslog("%s waitpid failed errno %s\n",
190                         mic->name, strerror(errno));
191                 return ret;
192         }
193
194         snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id);
195
196         pid = fork();
197         if (pid == 0) {
198                 ifargv[0] = "ip";
199                 ifargv[1] = "addr";
200                 ifargv[2] = "add";
201                 ifargv[3] = ipaddr;
202                 ifargv[4] = "dev";
203                 ifargv[5] = dev;
204                 ifargv[6] = NULL;
205                 mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
206                 ret = execvp("ip", ifargv);
207                 if (ret < 0) {
208                         mpsslog("%s execvp failed errno %s\n",
209                                 mic->name, strerror(errno));
210                         return ret;
211                 }
212         }
213         if (pid < 0) {
214                 mpsslog("%s fork failed errno %s\n",
215                         mic->name, strerror(errno));
216                 return ret;
217         }
218
219         ret = waitpid(pid, NULL, 0);
220         if (ret < 0) {
221                 mpsslog("%s waitpid failed errno %s\n",
222                         mic->name, strerror(errno));
223                 return ret;
224         }
225         mpsslog("MIC name %s %s %d DONE!\n",
226                 mic->name, __func__, __LINE__);
227         return 0;
228 }
229
230 static int tun_alloc(struct mic_info *mic, char *dev)
231 {
232         struct ifreq ifr;
233         int fd, err;
234 #if GSO_ENABLED
235         unsigned offload;
236 #endif
237         fd = open("/dev/net/tun", O_RDWR);
238         if (fd < 0) {
239                 mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
240                 goto done;
241         }
242
243         memset(&ifr, 0, sizeof(ifr));
244
245         ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
246         if (*dev)
247                 strncpy(ifr.ifr_name, dev, IFNAMSIZ);
248
249         err = ioctl(fd, TUNSETIFF, (void *) &ifr);
250         if (err < 0) {
251                 mpsslog("%s %s %d TUNSETIFF failed %s\n",
252                         mic->name, __func__, __LINE__, strerror(errno));
253                 close(fd);
254                 return err;
255         }
256 #if GSO_ENABLED
257         offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
258                 TUN_F_TSO_ECN | TUN_F_UFO;
259
260         err = ioctl(fd, TUNSETOFFLOAD, offload);
261         if (err < 0) {
262                 mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
263                         mic->name, __func__, __LINE__, strerror(errno));
264                 close(fd);
265                 return err;
266         }
267 #endif
268         strcpy(dev, ifr.ifr_name);
269         mpsslog("Created TAP %s\n", dev);
270 done:
271         return fd;
272 }
273
274 #define NET_FD_VIRTIO_NET 0
275 #define NET_FD_TUN 1
276 #define MAX_NET_FD 2
277
278 static void set_dp(struct mic_info *mic, int type, void *dp)
279 {
280         switch (type) {
281         case VIRTIO_ID_CONSOLE:
282                 mic->mic_console.console_dp = dp;
283                 return;
284         case VIRTIO_ID_NET:
285                 mic->mic_net.net_dp = dp;
286                 return;
287         case VIRTIO_ID_BLOCK:
288                 mic->mic_virtblk.block_dp = dp;
289                 return;
290         }
291         mpsslog("%s %s %d not found\n", mic->name, __func__, type);
292         assert(0);
293 }
294
295 static void *get_dp(struct mic_info *mic, int type)
296 {
297         switch (type) {
298         case VIRTIO_ID_CONSOLE:
299                 return mic->mic_console.console_dp;
300         case VIRTIO_ID_NET:
301                 return mic->mic_net.net_dp;
302         case VIRTIO_ID_BLOCK:
303                 return mic->mic_virtblk.block_dp;
304         }
305         mpsslog("%s %s %d not found\n", mic->name, __func__, type);
306         assert(0);
307         return NULL;
308 }
309
310 static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
311 {
312         struct mic_device_desc *d;
313         int i;
314         void *dp = get_dp(mic, type);
315
316         for (i = mic_aligned_size(struct mic_bootparam); i < PAGE_SIZE;
317                 i += mic_total_desc_size(d)) {
318                 d = dp + i;
319
320                 /* End of list */
321                 if (d->type == 0)
322                         break;
323
324                 if (d->type == -1)
325                         continue;
326
327                 mpsslog("%s %s d-> type %d d %p\n",
328                         mic->name, __func__, d->type, d);
329
330                 if (d->type == (__u8)type)
331                         return d;
332         }
333         mpsslog("%s %s %d not found\n", mic->name, __func__, type);
334         assert(0);
335         return NULL;
336 }
337
338 /* See comments in vhost.c for explanation of next_desc() */
339 static unsigned next_desc(struct vring_desc *desc)
340 {
341         unsigned int next;
342
343         if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
344                 return -1U;
345         next = le16toh(desc->next);
346         return next;
347 }
348
349 /* Sum up all the IOVEC length */
350 static ssize_t
351 sum_iovec_len(struct mic_copy_desc *copy)
352 {
353         ssize_t sum = 0;
354         int i;
355
356         for (i = 0; i < copy->iovcnt; i++)
357                 sum += copy->iov[i].iov_len;
358         return sum;
359 }
360
361 static inline void verify_out_len(struct mic_info *mic,
362         struct mic_copy_desc *copy)
363 {
364         if (copy->out_len != sum_iovec_len(copy)) {
365                 mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%x\n",
366                                 mic->name, __func__, __LINE__,
367                                 copy->out_len, sum_iovec_len(copy));
368                 assert(copy->out_len == sum_iovec_len(copy));
369         }
370 }
371
372 /* Display an iovec */
373 static void
374 disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
375         const char *s, int line)
376 {
377         int i;
378
379         for (i = 0; i < copy->iovcnt; i++)
380                 mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%lx\n",
381                         mic->name, s, line, i,
382                         copy->iov[i].iov_base, copy->iov[i].iov_len);
383 }
384
385 static inline __u16 read_avail_idx(struct mic_vring *vr)
386 {
387         return ACCESS_ONCE(vr->info->avail_idx);
388 }
389
390 static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
391                                 struct mic_copy_desc *copy, ssize_t len)
392 {
393         copy->vr_idx = tx ? 0 : 1;
394         copy->update_used = true;
395         if (type == VIRTIO_ID_NET)
396                 copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
397         else
398                 copy->iov[0].iov_len = len;
399 }
400
401 /* Central API which triggers the copies */
402 static int
403 mic_virtio_copy(struct mic_info *mic, int fd,
404         struct mic_vring *vr, struct mic_copy_desc *copy)
405 {
406         int ret;
407
408         ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
409         if (ret) {
410                 mpsslog("%s %s %d errno %s ret %d\n",
411                         mic->name, __func__, __LINE__,
412                         strerror(errno), ret);
413         }
414         return ret;
415 }
416
417 /*
418  * This initialization routine requires at least one
419  * vring i.e. vr0. vr1 is optional.
420  */
421 static void *
422 init_vr(struct mic_info *mic, int fd, int type,
423         struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
424 {
425         int vr_size;
426         char *va;
427
428         vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
429                 MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
430         va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
431                 PROT_READ, MAP_SHARED, fd, 0);
432         if (MAP_FAILED == va) {
433                 mpsslog("%s %s %d mmap failed errno %s\n",
434                         mic->name, __func__, __LINE__,
435                         strerror(errno));
436                 goto done;
437         }
438         set_dp(mic, type, va);
439         vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
440         vr0->info = vr0->va +
441                 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
442         vring_init(&vr0->vr,
443                 MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
444         mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
445                 __func__, mic->name, vr0->va, vr0->info, vr_size,
446                 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
447         mpsslog("magic 0x%x expected 0x%x\n",
448                 vr0->info->magic, MIC_MAGIC + type);
449         assert(vr0->info->magic == MIC_MAGIC + type);
450         if (vr1) {
451                 vr1->va = (struct mic_vring *)
452                         &va[MIC_DEVICE_PAGE_END + vr_size];
453                 vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES,
454                         MIC_VIRTIO_RING_ALIGN);
455                 vring_init(&vr1->vr,
456                         MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
457                 mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
458                         __func__, mic->name, vr1->va, vr1->info, vr_size,
459                         vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
460                 mpsslog("magic 0x%x expected 0x%x\n",
461                         vr1->info->magic, MIC_MAGIC + type + 1);
462                 assert(vr1->info->magic == MIC_MAGIC + type + 1);
463         }
464 done:
465         return va;
466 }
467
468 static void
469 wait_for_card_driver(struct mic_info *mic, int fd, int type)
470 {
471         struct pollfd pollfd;
472         int err;
473         struct mic_device_desc *desc = get_device_desc(mic, type);
474
475         pollfd.fd = fd;
476         mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
477                 mic->name, __func__, type, desc->status);
478         while (1) {
479                 pollfd.events = POLLIN;
480                 pollfd.revents = 0;
481                 err = poll(&pollfd, 1, -1);
482                 if (err < 0) {
483                         mpsslog("%s %s poll failed %s\n",
484                                 mic->name, __func__, strerror(errno));
485                         continue;
486                 }
487
488                 if (pollfd.revents) {
489                         mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n",
490                                 mic->name, __func__, type, desc->status);
491                         if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
492                                 mpsslog("%s %s poll.revents %d\n",
493                                         mic->name, __func__, pollfd.revents);
494                                 mpsslog("%s %s desc-> type %d status 0x%x\n",
495                                         mic->name, __func__, type,
496                                         desc->status);
497                                 break;
498                         }
499                 }
500         }
501 }
502
503 /* Spin till we have some descriptors */
504 static void
505 spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
506 {
507         __u16 avail_idx = read_avail_idx(vr);
508
509         while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
510 #ifdef DEBUG
511                 mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
512                         mic->name, __func__,
513                         le16toh(vr->vr.avail->idx), vr->info->avail_idx);
514 #endif
515                 sched_yield();
516         }
517 }
518
519 static void *
520 virtio_net(void *arg)
521 {
522         static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
523         static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __aligned(64);
524         struct iovec vnet_iov[2][2] = {
525                 { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
526                   { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
527                 { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
528                   { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
529         };
530         struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
531         struct mic_info *mic = (struct mic_info *)arg;
532         char if_name[IFNAMSIZ];
533         struct pollfd net_poll[MAX_NET_FD];
534         struct mic_vring tx_vr, rx_vr;
535         struct mic_copy_desc copy;
536         struct mic_device_desc *desc;
537         int err;
538
539         snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
540         mic->mic_net.tap_fd = tun_alloc(mic, if_name);
541         if (mic->mic_net.tap_fd < 0)
542                 goto done;
543
544         if (tap_configure(mic, if_name))
545                 goto done;
546         mpsslog("MIC name %s id %d\n", mic->name, mic->id);
547
548         net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
549         net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
550         net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
551         net_poll[NET_FD_TUN].events = POLLIN;
552
553         if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
554                 VIRTIO_ID_NET, &tx_vr, &rx_vr,
555                 virtnet_dev_page.dd.num_vq)) {
556                 mpsslog("%s init_vr failed %s\n",
557                         mic->name, strerror(errno));
558                 goto done;
559         }
560
561         copy.iovcnt = 2;
562         desc = get_device_desc(mic, VIRTIO_ID_NET);
563
564         while (1) {
565                 ssize_t len;
566
567                 net_poll[NET_FD_VIRTIO_NET].revents = 0;
568                 net_poll[NET_FD_TUN].revents = 0;
569
570                 /* Start polling for data from tap and virtio net */
571                 err = poll(net_poll, 2, -1);
572                 if (err < 0) {
573                         mpsslog("%s poll failed %s\n",
574                                 __func__, strerror(errno));
575                         continue;
576                 }
577                 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
578                         wait_for_card_driver(mic, mic->mic_net.virtio_net_fd,
579                                         VIRTIO_ID_NET);
580                 /*
581                  * Check if there is data to be read from TUN and write to
582                  * virtio net fd if there is.
583                  */
584                 if (net_poll[NET_FD_TUN].revents & POLLIN) {
585                         copy.iov = iov0;
586                         len = readv(net_poll[NET_FD_TUN].fd,
587                                 copy.iov, copy.iovcnt);
588                         if (len > 0) {
589                                 struct virtio_net_hdr *hdr
590                                         = (struct virtio_net_hdr *) vnet_hdr[0];
591
592                                 /* Disable checksums on the card since we are on
593                                    a reliable PCIe link */
594                                 hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
595 #ifdef DEBUG
596                                 mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
597                                         __func__, __LINE__, hdr->flags);
598                                 mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
599                                         copy.out_len, hdr->gso_type);
600 #endif
601 #ifdef DEBUG
602                                 disp_iovec(mic, copy, __func__, __LINE__);
603                                 mpsslog("%s %s %d read from tap 0x%lx\n",
604                                         mic->name, __func__, __LINE__,
605                                         len);
606 #endif
607                                 spin_for_descriptors(mic, &tx_vr);
608                                 txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
609                                         len);
610
611                                 err = mic_virtio_copy(mic,
612                                         mic->mic_net.virtio_net_fd, &tx_vr,
613                                         &copy);
614                                 if (err < 0) {
615                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
616                                                 mic->name, __func__, __LINE__,
617                                                 strerror(errno));
618                                 }
619                                 if (!err)
620                                         verify_out_len(mic, &copy);
621 #ifdef DEBUG
622                                 disp_iovec(mic, copy, __func__, __LINE__);
623                                 mpsslog("%s %s %d wrote to net 0x%lx\n",
624                                         mic->name, __func__, __LINE__,
625                                         sum_iovec_len(&copy));
626 #endif
627                                 /* Reinitialize IOV for next run */
628                                 iov0[1].iov_len = MAX_NET_PKT_SIZE;
629                         } else if (len < 0) {
630                                 disp_iovec(mic, &copy, __func__, __LINE__);
631                                 mpsslog("%s %s %d read failed %s ", mic->name,
632                                         __func__, __LINE__, strerror(errno));
633                                 mpsslog("cnt %d sum %d\n",
634                                         copy.iovcnt, sum_iovec_len(&copy));
635                         }
636                 }
637
638                 /*
639                  * Check if there is data to be read from virtio net and
640                  * write to TUN if there is.
641                  */
642                 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
643                         while (rx_vr.info->avail_idx !=
644                                 le16toh(rx_vr.vr.avail->idx)) {
645                                 copy.iov = iov1;
646                                 txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
647                                         MAX_NET_PKT_SIZE
648                                         + sizeof(struct virtio_net_hdr));
649
650                                 err = mic_virtio_copy(mic,
651                                         mic->mic_net.virtio_net_fd, &rx_vr,
652                                         &copy);
653                                 if (!err) {
654 #ifdef DEBUG
655                                         struct virtio_net_hdr *hdr
656                                                 = (struct virtio_net_hdr *)
657                                                         vnet_hdr[1];
658
659                                         mpsslog("%s %s %d hdr->flags 0x%x, ",
660                                                 mic->name, __func__, __LINE__,
661                                                 hdr->flags);
662                                         mpsslog("out_len %d gso_type 0x%x\n",
663                                                 copy.out_len,
664                                                 hdr->gso_type);
665 #endif
666                                         /* Set the correct output iov_len */
667                                         iov1[1].iov_len = copy.out_len -
668                                                 sizeof(struct virtio_net_hdr);
669                                         verify_out_len(mic, &copy);
670 #ifdef DEBUG
671                                         disp_iovec(mic, copy, __func__,
672                                                 __LINE__);
673                                         mpsslog("%s %s %d ",
674                                                 mic->name, __func__, __LINE__);
675                                         mpsslog("read from net 0x%lx\n",
676                                                 sum_iovec_len(copy));
677 #endif
678                                         len = writev(net_poll[NET_FD_TUN].fd,
679                                                 copy.iov, copy.iovcnt);
680                                         if (len != sum_iovec_len(&copy)) {
681                                                 mpsslog("Tun write failed %s ",
682                                                         strerror(errno));
683                                                 mpsslog("len 0x%x ", len);
684                                                 mpsslog("read_len 0x%x\n",
685                                                         sum_iovec_len(&copy));
686                                         } else {
687 #ifdef DEBUG
688                                                 disp_iovec(mic, &copy, __func__,
689                                                         __LINE__);
690                                                 mpsslog("%s %s %d ",
691                                                         mic->name, __func__,
692                                                         __LINE__);
693                                                 mpsslog("wrote to tap 0x%lx\n",
694                                                         len);
695 #endif
696                                         }
697                                 } else {
698                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
699                                                 mic->name, __func__, __LINE__,
700                                                 strerror(errno));
701                                         break;
702                                 }
703                         }
704                 }
705                 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
706                         mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
707         }
708 done:
709         pthread_exit(NULL);
710 }
711
712 /* virtio_console */
713 #define VIRTIO_CONSOLE_FD 0
714 #define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
715 #define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
716 #define MAX_BUFFER_SIZE PAGE_SIZE
717
718 static void *
719 virtio_console(void *arg)
720 {
721         static __u8 vcons_buf[2][PAGE_SIZE];
722         struct iovec vcons_iov[2] = {
723                 { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
724                 { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
725         };
726         struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
727         struct mic_info *mic = (struct mic_info *)arg;
728         int err;
729         struct pollfd console_poll[MAX_CONSOLE_FD];
730         int pty_fd;
731         char *pts_name;
732         ssize_t len;
733         struct mic_vring tx_vr, rx_vr;
734         struct mic_copy_desc copy;
735         struct mic_device_desc *desc;
736
737         pty_fd = posix_openpt(O_RDWR);
738         if (pty_fd < 0) {
739                 mpsslog("can't open a pseudoterminal master device: %s\n",
740                         strerror(errno));
741                 goto _return;
742         }
743         pts_name = ptsname(pty_fd);
744         if (pts_name == NULL) {
745                 mpsslog("can't get pts name\n");
746                 goto _close_pty;
747         }
748         printf("%s console message goes to %s\n", mic->name, pts_name);
749         mpsslog("%s console message goes to %s\n", mic->name, pts_name);
750         err = grantpt(pty_fd);
751         if (err < 0) {
752                 mpsslog("can't grant access: %s %s\n",
753                                 pts_name, strerror(errno));
754                 goto _close_pty;
755         }
756         err = unlockpt(pty_fd);
757         if (err < 0) {
758                 mpsslog("can't unlock a pseudoterminal: %s %s\n",
759                                 pts_name, strerror(errno));
760                 goto _close_pty;
761         }
762         console_poll[MONITOR_FD].fd = pty_fd;
763         console_poll[MONITOR_FD].events = POLLIN;
764
765         console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
766         console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
767
768         if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
769                 VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
770                 virtcons_dev_page.dd.num_vq)) {
771                 mpsslog("%s init_vr failed %s\n",
772                         mic->name, strerror(errno));
773                 goto _close_pty;
774         }
775
776         copy.iovcnt = 1;
777         desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
778
779         for (;;) {
780                 console_poll[MONITOR_FD].revents = 0;
781                 console_poll[VIRTIO_CONSOLE_FD].revents = 0;
782                 err = poll(console_poll, MAX_CONSOLE_FD, -1);
783                 if (err < 0) {
784                         mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
785                                 strerror(errno));
786                         continue;
787                 }
788                 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
789                         wait_for_card_driver(mic,
790                                 mic->mic_console.virtio_console_fd,
791                                 VIRTIO_ID_CONSOLE);
792
793                 if (console_poll[MONITOR_FD].revents & POLLIN) {
794                         copy.iov = iov0;
795                         len = readv(pty_fd, copy.iov, copy.iovcnt);
796                         if (len > 0) {
797 #ifdef DEBUG
798                                 disp_iovec(mic, copy, __func__, __LINE__);
799                                 mpsslog("%s %s %d read from tap 0x%lx\n",
800                                         mic->name, __func__, __LINE__,
801                                         len);
802 #endif
803                                 spin_for_descriptors(mic, &tx_vr);
804                                 txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
805                                         &copy, len);
806
807                                 err = mic_virtio_copy(mic,
808                                         mic->mic_console.virtio_console_fd,
809                                         &tx_vr, &copy);
810                                 if (err < 0) {
811                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
812                                                 mic->name, __func__, __LINE__,
813                                                 strerror(errno));
814                                 }
815                                 if (!err)
816                                         verify_out_len(mic, &copy);
817 #ifdef DEBUG
818                                 disp_iovec(mic, copy, __func__, __LINE__);
819                                 mpsslog("%s %s %d wrote to net 0x%lx\n",
820                                         mic->name, __func__, __LINE__,
821                                         sum_iovec_len(copy));
822 #endif
823                                 /* Reinitialize IOV for next run */
824                                 iov0->iov_len = PAGE_SIZE;
825                         } else if (len < 0) {
826                                 disp_iovec(mic, &copy, __func__, __LINE__);
827                                 mpsslog("%s %s %d read failed %s ",
828                                         mic->name, __func__, __LINE__,
829                                         strerror(errno));
830                                 mpsslog("cnt %d sum %d\n",
831                                         copy.iovcnt, sum_iovec_len(&copy));
832                         }
833                 }
834
835                 if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
836                         while (rx_vr.info->avail_idx !=
837                                 le16toh(rx_vr.vr.avail->idx)) {
838                                 copy.iov = iov1;
839                                 txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
840                                         &copy, PAGE_SIZE);
841
842                                 err = mic_virtio_copy(mic,
843                                         mic->mic_console.virtio_console_fd,
844                                         &rx_vr, &copy);
845                                 if (!err) {
846                                         /* Set the correct output iov_len */
847                                         iov1->iov_len = copy.out_len;
848                                         verify_out_len(mic, &copy);
849 #ifdef DEBUG
850                                         disp_iovec(mic, copy, __func__,
851                                                 __LINE__);
852                                         mpsslog("%s %s %d ",
853                                                 mic->name, __func__, __LINE__);
854                                         mpsslog("read from net 0x%lx\n",
855                                                 sum_iovec_len(copy));
856 #endif
857                                         len = writev(pty_fd,
858                                                 copy.iov, copy.iovcnt);
859                                         if (len != sum_iovec_len(&copy)) {
860                                                 mpsslog("Tun write failed %s ",
861                                                         strerror(errno));
862                                                 mpsslog("len 0x%x ", len);
863                                                 mpsslog("read_len 0x%x\n",
864                                                         sum_iovec_len(&copy));
865                                         } else {
866 #ifdef DEBUG
867                                                 disp_iovec(mic, copy, __func__,
868                                                         __LINE__);
869                                                 mpsslog("%s %s %d ",
870                                                         mic->name, __func__,
871                                                         __LINE__);
872                                                 mpsslog("wrote to tap 0x%lx\n",
873                                                         len);
874 #endif
875                                         }
876                                 } else {
877                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
878                                                 mic->name, __func__, __LINE__,
879                                                 strerror(errno));
880                                         break;
881                                 }
882                         }
883                 }
884                 if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
885                         mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
886         }
887 _close_pty:
888         close(pty_fd);
889 _return:
890         pthread_exit(NULL);
891 }
892
893 static void
894 add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
895 {
896         char path[PATH_MAX];
897         int fd, err;
898
899         snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
900         fd = open(path, O_RDWR);
901         if (fd < 0) {
902                 mpsslog("Could not open %s %s\n", path, strerror(errno));
903                 return;
904         }
905
906         err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
907         if (err < 0) {
908                 mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
909                 close(fd);
910                 return;
911         }
912         switch (dd->type) {
913         case VIRTIO_ID_NET:
914                 mic->mic_net.virtio_net_fd = fd;
915                 mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
916                 break;
917         case VIRTIO_ID_CONSOLE:
918                 mic->mic_console.virtio_console_fd = fd;
919                 mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
920                 break;
921         case VIRTIO_ID_BLOCK:
922                 mic->mic_virtblk.virtio_block_fd = fd;
923                 mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
924                 break;
925         }
926 }
927
928 static bool
929 set_backend_file(struct mic_info *mic)
930 {
931         FILE *config;
932         char buff[PATH_MAX], *line, *evv, *p;
933
934         snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
935         config = fopen(buff, "r");
936         if (config == NULL)
937                 return false;
938         do {  /* look for "virtblk_backend=XXXX" */
939                 line = fgets(buff, PATH_MAX, config);
940                 if (line == NULL)
941                         break;
942                 if (*line == '#')
943                         continue;
944                 p = strchr(line, '\n');
945                 if (p)
946                         *p = '\0';
947         } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
948         fclose(config);
949         if (line == NULL)
950                 return false;
951         evv = strchr(line, '=');
952         if (evv == NULL)
953                 return false;
954         mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
955         if (mic->mic_virtblk.backend_file == NULL) {
956                 mpsslog("can't allocate memory\n", mic->name, mic->id);
957                 return false;
958         }
959         strcpy(mic->mic_virtblk.backend_file, evv + 1);
960         return true;
961 }
962
963 #define SECTOR_SIZE 512
964 static bool
965 set_backend_size(struct mic_info *mic)
966 {
967         mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
968                 SEEK_END);
969         if (mic->mic_virtblk.backend_size < 0) {
970                 mpsslog("%s: can't seek: %s\n",
971                         mic->name, mic->mic_virtblk.backend_file);
972                 return false;
973         }
974         virtblk_dev_page.blk_config.capacity =
975                 mic->mic_virtblk.backend_size / SECTOR_SIZE;
976         if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
977                 virtblk_dev_page.blk_config.capacity++;
978
979         virtblk_dev_page.blk_config.capacity =
980                 htole64(virtblk_dev_page.blk_config.capacity);
981
982         return true;
983 }
984
985 static bool
986 open_backend(struct mic_info *mic)
987 {
988         if (!set_backend_file(mic))
989                 goto _error_exit;
990         mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
991         if (mic->mic_virtblk.backend < 0) {
992                 mpsslog("%s: can't open: %s\n", mic->name,
993                         mic->mic_virtblk.backend_file);
994                 goto _error_free;
995         }
996         if (!set_backend_size(mic))
997                 goto _error_close;
998         mic->mic_virtblk.backend_addr = mmap(NULL,
999                 mic->mic_virtblk.backend_size,
1000                 PROT_READ|PROT_WRITE, MAP_SHARED,
1001                 mic->mic_virtblk.backend, 0L);
1002         if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1003                 mpsslog("%s: can't map: %s %s\n",
1004                         mic->name, mic->mic_virtblk.backend_file,
1005                         strerror(errno));
1006                 goto _error_close;
1007         }
1008         return true;
1009
1010  _error_close:
1011         close(mic->mic_virtblk.backend);
1012  _error_free:
1013         free(mic->mic_virtblk.backend_file);
1014  _error_exit:
1015         return false;
1016 }
1017
1018 static void
1019 close_backend(struct mic_info *mic)
1020 {
1021         munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1022         close(mic->mic_virtblk.backend);
1023         free(mic->mic_virtblk.backend_file);
1024 }
1025
1026 static bool
1027 start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1028 {
1029         if (((__u64)&virtblk_dev_page.blk_config % 8) != 0) {
1030                 mpsslog("%s: blk_config is not 8 byte aligned.\n",
1031                         mic->name);
1032                 return false;
1033         }
1034         add_virtio_device(mic, &virtblk_dev_page.dd);
1035         if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1036                 VIRTIO_ID_BLOCK, vring, NULL, virtblk_dev_page.dd.num_vq)) {
1037                 mpsslog("%s init_vr failed %s\n",
1038                         mic->name, strerror(errno));
1039                 return false;
1040         }
1041         return true;
1042 }
1043
1044 static void
1045 stop_virtblk(struct mic_info *mic)
1046 {
1047         int vr_size, ret;
1048
1049         vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
1050                 MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
1051         ret = munmap(mic->mic_virtblk.block_dp,
1052                 MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1053         if (ret < 0)
1054                 mpsslog("%s munmap errno %d\n", mic->name, errno);
1055         close(mic->mic_virtblk.virtio_block_fd);
1056 }
1057
1058 static __u8
1059 header_error_check(struct vring_desc *desc)
1060 {
1061         if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1062                 mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1063                                 __func__, __LINE__);
1064                 return -EIO;
1065         }
1066         if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1067                 mpsslog("%s() %d: alone\n",
1068                         __func__, __LINE__);
1069                 return -EIO;
1070         }
1071         if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1072                 mpsslog("%s() %d: not read\n",
1073                         __func__, __LINE__);
1074                 return -EIO;
1075         }
1076         return 0;
1077 }
1078
1079 static int
1080 read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1081 {
1082         struct iovec iovec;
1083         struct mic_copy_desc copy;
1084
1085         iovec.iov_len = sizeof(*hdr);
1086         iovec.iov_base = hdr;
1087         copy.iov = &iovec;
1088         copy.iovcnt = 1;
1089         copy.vr_idx = 0;  /* only one vring on virtio_block */
1090         copy.update_used = false;  /* do not update used index */
1091         return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1092 }
1093
1094 static int
1095 transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1096 {
1097         struct mic_copy_desc copy;
1098
1099         copy.iov = iovec;
1100         copy.iovcnt = iovcnt;
1101         copy.vr_idx = 0;  /* only one vring on virtio_block */
1102         copy.update_used = false;  /* do not update used index */
1103         return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1104 }
1105
1106 static __u8
1107 status_error_check(struct vring_desc *desc)
1108 {
1109         if (le32toh(desc->len) != sizeof(__u8)) {
1110                 mpsslog("%s() %d: length is not sizeof(status)\n",
1111                         __func__, __LINE__);
1112                 return -EIO;
1113         }
1114         return 0;
1115 }
1116
1117 static int
1118 write_status(int fd, __u8 *status)
1119 {
1120         struct iovec iovec;
1121         struct mic_copy_desc copy;
1122
1123         iovec.iov_base = status;
1124         iovec.iov_len = sizeof(*status);
1125         copy.iov = &iovec;
1126         copy.iovcnt = 1;
1127         copy.vr_idx = 0;  /* only one vring on virtio_block */
1128         copy.update_used = true; /* Update used index */
1129         return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1130 }
1131
1132 static void *
1133 virtio_block(void *arg)
1134 {
1135         struct mic_info *mic = (struct mic_info *) arg;
1136         int ret;
1137         struct pollfd block_poll;
1138         struct mic_vring vring;
1139         __u16 avail_idx;
1140         __u32 desc_idx;
1141         struct vring_desc *desc;
1142         struct iovec *iovec, *piov;
1143         __u8 status;
1144         __u32 buffer_desc_idx;
1145         struct virtio_blk_outhdr hdr;
1146         void *fos;
1147
1148         for (;;) {  /* forever */
1149                 if (!open_backend(mic)) { /* No virtblk */
1150                         for (mic->mic_virtblk.signaled = 0;
1151                                 !mic->mic_virtblk.signaled;)
1152                                 sleep(1);
1153                         continue;
1154                 }
1155
1156                 /* backend file is specified. */
1157                 if (!start_virtblk(mic, &vring))
1158                         goto _close_backend;
1159                 iovec = malloc(sizeof(*iovec) *
1160                         le32toh(virtblk_dev_page.blk_config.seg_max));
1161                 if (!iovec) {
1162                         mpsslog("%s: can't alloc iovec: %s\n",
1163                                 mic->name, strerror(ENOMEM));
1164                         goto _stop_virtblk;
1165                 }
1166
1167                 block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1168                 block_poll.events = POLLIN;
1169                 for (mic->mic_virtblk.signaled = 0;
1170                      !mic->mic_virtblk.signaled;) {
1171                         block_poll.revents = 0;
1172                                         /* timeout in 1 sec to see signaled */
1173                         ret = poll(&block_poll, 1, 1000);
1174                         if (ret < 0) {
1175                                 mpsslog("%s %d: poll failed: %s\n",
1176                                         __func__, __LINE__,
1177                                         strerror(errno));
1178                                 continue;
1179                         }
1180
1181                         if (!(block_poll.revents & POLLIN)) {
1182 #ifdef DEBUG
1183                                 mpsslog("%s %d: block_poll.revents=0x%x\n",
1184                                         __func__, __LINE__, block_poll.revents);
1185 #endif
1186                                 continue;
1187                         }
1188
1189                         /* POLLIN */
1190                         while (vring.info->avail_idx !=
1191                                 le16toh(vring.vr.avail->idx)) {
1192                                 /* read header element */
1193                                 avail_idx =
1194                                         vring.info->avail_idx &
1195                                         (vring.vr.num - 1);
1196                                 desc_idx = le16toh(
1197                                         vring.vr.avail->ring[avail_idx]);
1198                                 desc = &vring.vr.desc[desc_idx];
1199 #ifdef DEBUG
1200                                 mpsslog("%s() %d: avail_idx=%d ",
1201                                         __func__, __LINE__,
1202                                         vring.info->avail_idx);
1203                                 mpsslog("vring.vr.num=%d desc=%p\n",
1204                                         vring.vr.num, desc);
1205 #endif
1206                                 status = header_error_check(desc);
1207                                 ret = read_header(
1208                                         mic->mic_virtblk.virtio_block_fd,
1209                                         &hdr, desc_idx);
1210                                 if (ret < 0) {
1211                                         mpsslog("%s() %d %s: ret=%d %s\n",
1212                                                 __func__, __LINE__,
1213                                                 mic->name, ret,
1214                                                 strerror(errno));
1215                                         break;
1216                                 }
1217                                 /* buffer element */
1218                                 piov = iovec;
1219                                 status = 0;
1220                                 fos = mic->mic_virtblk.backend_addr +
1221                                         (hdr.sector * SECTOR_SIZE);
1222                                 buffer_desc_idx = desc_idx =
1223                                         next_desc(desc);
1224                                 for (desc = &vring.vr.desc[buffer_desc_idx];
1225                                      desc->flags & VRING_DESC_F_NEXT;
1226                                      desc_idx = next_desc(desc),
1227                                              desc = &vring.vr.desc[desc_idx]) {
1228                                         piov->iov_len = desc->len;
1229                                         piov->iov_base = fos;
1230                                         piov++;
1231                                         fos += desc->len;
1232                                 }
1233                                 /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1234                                 if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1235                                         VIRTIO_BLK_T_GET_ID)) {
1236                                         /*
1237                                           VIRTIO_BLK_T_IN - does not do
1238                                           anything. Probably for documenting.
1239                                           VIRTIO_BLK_T_SCSI_CMD - for
1240                                           virtio_scsi.
1241                                           VIRTIO_BLK_T_FLUSH - turned off in
1242                                           config space.
1243                                           VIRTIO_BLK_T_BARRIER - defined but not
1244                                           used in anywhere.
1245                                         */
1246                                         mpsslog("%s() %d: type %x ",
1247                                                 __func__, __LINE__,
1248                                                 hdr.type);
1249                                         mpsslog("is not supported\n");
1250                                         status = -ENOTSUP;
1251
1252                                 } else {
1253                                         ret = transfer_blocks(
1254                                         mic->mic_virtblk.virtio_block_fd,
1255                                                 iovec,
1256                                                 piov - iovec);
1257                                         if (ret < 0 &&
1258                                                 status != 0)
1259                                                 status = ret;
1260                                 }
1261                                 /* write status and update used pointer */
1262                                 if (status != 0)
1263                                         status = status_error_check(desc);
1264                                 ret = write_status(
1265                                         mic->mic_virtblk.virtio_block_fd,
1266                                         &status);
1267 #ifdef DEBUG
1268                                 mpsslog("%s() %d: write status=%d on desc=%p\n",
1269                                         __func__, __LINE__,
1270                                         status, desc);
1271 #endif
1272                         }
1273                 }
1274                 free(iovec);
1275 _stop_virtblk:
1276                 stop_virtblk(mic);
1277 _close_backend:
1278                 close_backend(mic);
1279         }  /* forever */
1280
1281         pthread_exit(NULL);
1282 }
1283
1284 static void
1285 reset(struct mic_info *mic)
1286 {
1287 #define RESET_TIMEOUT 120
1288         int i = RESET_TIMEOUT;
1289         setsysfs(mic->name, "state", "reset");
1290         while (i) {
1291                 char *state;
1292                 state = readsysfs(mic->name, "state");
1293                 if (!state)
1294                         goto retry;
1295                 mpsslog("%s: %s %d state %s\n",
1296                         mic->name, __func__, __LINE__, state);
1297                 if ((!strcmp(state, "offline"))) {
1298                         free(state);
1299                         break;
1300                 }
1301                 free(state);
1302 retry:
1303                 sleep(1);
1304                 i--;
1305         }
1306 }
1307
1308 static int
1309 get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1310 {
1311         if (!strcmp(shutdown_status, "nop"))
1312                 return MIC_NOP;
1313         if (!strcmp(shutdown_status, "crashed"))
1314                 return MIC_CRASHED;
1315         if (!strcmp(shutdown_status, "halted"))
1316                 return MIC_HALTED;
1317         if (!strcmp(shutdown_status, "poweroff"))
1318                 return MIC_POWER_OFF;
1319         if (!strcmp(shutdown_status, "restart"))
1320                 return MIC_RESTART;
1321         mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1322         /* Invalid state */
1323         assert(0);
1324 };
1325
1326 static int get_mic_state(struct mic_info *mic, char *state)
1327 {
1328         if (!strcmp(state, "offline"))
1329                 return MIC_OFFLINE;
1330         if (!strcmp(state, "online"))
1331                 return MIC_ONLINE;
1332         if (!strcmp(state, "shutting_down"))
1333                 return MIC_SHUTTING_DOWN;
1334         if (!strcmp(state, "reset_failed"))
1335                 return MIC_RESET_FAILED;
1336         mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1337         /* Invalid state */
1338         assert(0);
1339 };
1340
1341 static void mic_handle_shutdown(struct mic_info *mic)
1342 {
1343 #define SHUTDOWN_TIMEOUT 60
1344         int i = SHUTDOWN_TIMEOUT, ret, stat = 0;
1345         char *shutdown_status;
1346         while (i) {
1347                 shutdown_status = readsysfs(mic->name, "shutdown_status");
1348                 if (!shutdown_status)
1349                         continue;
1350                 mpsslog("%s: %s %d shutdown_status %s\n",
1351                         mic->name, __func__, __LINE__, shutdown_status);
1352                 switch (get_mic_shutdown_status(mic, shutdown_status)) {
1353                 case MIC_RESTART:
1354                         mic->restart = 1;
1355                 case MIC_HALTED:
1356                 case MIC_POWER_OFF:
1357                 case MIC_CRASHED:
1358                         free(shutdown_status);
1359                         goto reset;
1360                 default:
1361                         break;
1362                 }
1363                 free(shutdown_status);
1364                 sleep(1);
1365                 i--;
1366         }
1367 reset:
1368         ret = kill(mic->pid, SIGTERM);
1369         mpsslog("%s: %s %d kill pid %d ret %d\n",
1370                 mic->name, __func__, __LINE__,
1371                 mic->pid, ret);
1372         if (!ret) {
1373                 ret = waitpid(mic->pid, &stat,
1374                         WIFSIGNALED(stat));
1375                 mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1376                         mic->name, __func__, __LINE__,
1377                         ret, mic->pid);
1378         }
1379         if (ret == mic->pid)
1380                 reset(mic);
1381 }
1382
1383 static void *
1384 mic_config(void *arg)
1385 {
1386         struct mic_info *mic = (struct mic_info *)arg;
1387         char *state = NULL;
1388         char pathname[PATH_MAX];
1389         int fd, ret;
1390         struct pollfd ufds[1];
1391         char value[4096];
1392
1393         snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1394                 MICSYSFSDIR, mic->name, "state");
1395
1396         fd = open(pathname, O_RDONLY);
1397         if (fd < 0) {
1398                 mpsslog("%s: opening file %s failed %s\n",
1399                         mic->name, pathname, strerror(errno));
1400                 goto error;
1401         }
1402
1403         do {
1404                 ret = read(fd, value, sizeof(value));
1405                 if (ret < 0) {
1406                         mpsslog("%s: Failed to read sysfs entry '%s': %s\n",
1407                                 mic->name, pathname, strerror(errno));
1408                         goto close_error1;
1409                 }
1410 retry:
1411                 state = readsysfs(mic->name, "state");
1412                 if (!state)
1413                         goto retry;
1414                 mpsslog("%s: %s %d state %s\n",
1415                         mic->name, __func__, __LINE__, state);
1416                 switch (get_mic_state(mic, state)) {
1417                 case MIC_SHUTTING_DOWN:
1418                         mic_handle_shutdown(mic);
1419                         goto close_error;
1420                 default:
1421                         break;
1422                 }
1423                 free(state);
1424
1425                 ufds[0].fd = fd;
1426                 ufds[0].events = POLLERR | POLLPRI;
1427                 ret = poll(ufds, 1, -1);
1428                 if (ret < 0) {
1429                         mpsslog("%s: poll failed %s\n",
1430                                 mic->name, strerror(errno));
1431                         goto close_error1;
1432                 }
1433         } while (1);
1434 close_error:
1435         free(state);
1436 close_error1:
1437         close(fd);
1438 error:
1439         init_mic(mic);
1440         pthread_exit(NULL);
1441 }
1442
1443 static void
1444 set_cmdline(struct mic_info *mic)
1445 {
1446         char buffer[PATH_MAX];
1447         int len;
1448
1449         len = snprintf(buffer, PATH_MAX,
1450                 "clocksource=tsc highres=off nohz=off ");
1451         len += snprintf(buffer + len, PATH_MAX,
1452                 "cpufreq_on;corec6_off;pc3_off;pc6_off ");
1453         len += snprintf(buffer + len, PATH_MAX,
1454                 "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1455                 mic->id);
1456
1457         setsysfs(mic->name, "cmdline", buffer);
1458         mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1459         snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id);
1460         mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1461 }
1462
1463 static void
1464 set_log_buf_info(struct mic_info *mic)
1465 {
1466         int fd;
1467         off_t len;
1468         char system_map[] = "/lib/firmware/mic/System.map";
1469         char *map, *temp, log_buf[17] = {'\0'};
1470
1471         fd = open(system_map, O_RDONLY);
1472         if (fd < 0) {
1473                 mpsslog("%s: Opening System.map failed: %d\n",
1474                         mic->name, errno);
1475                 return;
1476         }
1477         len = lseek(fd, 0, SEEK_END);
1478         if (len < 0) {
1479                 mpsslog("%s: Reading System.map size failed: %d\n",
1480                         mic->name, errno);
1481                 close(fd);
1482                 return;
1483         }
1484         map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1485         if (map == MAP_FAILED) {
1486                 mpsslog("%s: mmap of System.map failed: %d\n",
1487                         mic->name, errno);
1488                 close(fd);
1489                 return;
1490         }
1491         temp = strstr(map, "__log_buf");
1492         if (!temp) {
1493                 mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1494                 munmap(map, len);
1495                 close(fd);
1496                 return;
1497         }
1498         strncpy(log_buf, temp - 19, 16);
1499         setsysfs(mic->name, "log_buf_addr", log_buf);
1500         mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1501         temp = strstr(map, "log_buf_len");
1502         if (!temp) {
1503                 mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1504                 munmap(map, len);
1505                 close(fd);
1506                 return;
1507         }
1508         strncpy(log_buf, temp - 19, 16);
1509         setsysfs(mic->name, "log_buf_len", log_buf);
1510         mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1511         munmap(map, len);
1512         close(fd);
1513 }
1514
1515 static void init_mic(struct mic_info *mic);
1516
1517 static void
1518 change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1519 {
1520         struct mic_info *mic;
1521
1522         for (mic = mic_list.next; mic != NULL; mic = mic->next)
1523                 mic->mic_virtblk.signaled = 1/* true */;
1524 }
1525
1526 static void
1527 init_mic(struct mic_info *mic)
1528 {
1529         struct sigaction ignore = {
1530                 .sa_flags = 0,
1531                 .sa_handler = SIG_IGN
1532         };
1533         struct sigaction act = {
1534                 .sa_flags = SA_SIGINFO,
1535                 .sa_sigaction = change_virtblk_backend,
1536         };
1537         char buffer[PATH_MAX];
1538         int err;
1539
1540         /*
1541          * Currently, one virtio block device is supported for each MIC card
1542          * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1543          * The signal informs the virtio block backend about a change in the
1544          * configuration file which specifies the virtio backend file name on
1545          * the host. Virtio block backend then re-reads the configuration file
1546          * and switches to the new block device. This signalling mechanism may
1547          * not be required once multiple virtio block devices are supported by
1548          * the MIC daemon.
1549          */
1550         sigaction(SIGUSR1, &ignore, NULL);
1551
1552         mic->pid = fork();
1553         switch (mic->pid) {
1554         case 0:
1555                 set_log_buf_info(mic);
1556                 set_cmdline(mic);
1557                 add_virtio_device(mic, &virtcons_dev_page.dd);
1558                 add_virtio_device(mic, &virtnet_dev_page.dd);
1559                 err = pthread_create(&mic->mic_console.console_thread, NULL,
1560                         virtio_console, mic);
1561                 if (err)
1562                         mpsslog("%s virtcons pthread_create failed %s\n",
1563                         mic->name, strerror(err));
1564                 err = pthread_create(&mic->mic_net.net_thread, NULL,
1565                         virtio_net, mic);
1566                 if (err)
1567                         mpsslog("%s virtnet pthread_create failed %s\n",
1568                         mic->name, strerror(err));
1569                 err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1570                         virtio_block, mic);
1571                 if (err)
1572                         mpsslog("%s virtblk pthread_create failed %s\n",
1573                         mic->name, strerror(err));
1574                 sigemptyset(&act.sa_mask);
1575                 err = sigaction(SIGUSR1, &act, NULL);
1576                 if (err)
1577                         mpsslog("%s sigaction SIGUSR1 failed %s\n",
1578                         mic->name, strerror(errno));
1579                 while (1)
1580                         sleep(60);
1581         case -1:
1582                 mpsslog("fork failed MIC name %s id %d errno %d\n",
1583                         mic->name, mic->id, errno);
1584                 break;
1585         default:
1586                 if (mic->restart) {
1587                         snprintf(buffer, PATH_MAX, "boot");
1588                         setsysfs(mic->name, "state", buffer);
1589                         mpsslog("%s restarting mic %d\n",
1590                                 mic->name, mic->restart);
1591                         mic->restart = 0;
1592                 }
1593                 pthread_create(&mic->config_thread, NULL, mic_config, mic);
1594         }
1595 }
1596
1597 static void
1598 start_daemon(void)
1599 {
1600         struct mic_info *mic;
1601
1602         for (mic = mic_list.next; mic != NULL; mic = mic->next)
1603                 init_mic(mic);
1604
1605         while (1)
1606                 sleep(60);
1607 }
1608
1609 static int
1610 init_mic_list(void)
1611 {
1612         struct mic_info *mic = &mic_list;
1613         struct dirent *file;
1614         DIR *dp;
1615         int cnt = 0;
1616
1617         dp = opendir(MICSYSFSDIR);
1618         if (!dp)
1619                 return 0;
1620
1621         while ((file = readdir(dp)) != NULL) {
1622                 if (!strncmp(file->d_name, "mic", 3)) {
1623                         mic->next = malloc(sizeof(struct mic_info));
1624                         if (mic->next) {
1625                                 mic = mic->next;
1626                                 mic->next = NULL;
1627                                 memset(mic, 0, sizeof(struct mic_info));
1628                                 mic->id = atoi(&file->d_name[3]);
1629                                 mic->name = malloc(strlen(file->d_name) + 16);
1630                                 if (mic->name)
1631                                         strcpy(mic->name, file->d_name);
1632                                 mpsslog("MIC name %s id %d\n", mic->name,
1633                                         mic->id);
1634                                 cnt++;
1635                         }
1636                 }
1637         }
1638
1639         closedir(dp);
1640         return cnt;
1641 }
1642
1643 void
1644 mpsslog(char *format, ...)
1645 {
1646         va_list args;
1647         char buffer[4096];
1648         char ts[52], *ts1;
1649         time_t t;
1650
1651         if (logfp == NULL)
1652                 return;
1653
1654         va_start(args, format);
1655         vsprintf(buffer, format, args);
1656         va_end(args);
1657
1658         time(&t);
1659         ts1 = ctime_r(&t, ts);
1660         ts1[strlen(ts1) - 1] = '\0';
1661         fprintf(logfp, "%s: %s", ts1, buffer);
1662
1663         fflush(logfp);
1664 }
1665
1666 int
1667 main(int argc, char *argv[])
1668 {
1669         int cnt;
1670         pid_t pid;
1671
1672         myname = argv[0];
1673
1674         logfp = fopen(LOGFILE_NAME, "a+");
1675         if (!logfp) {
1676                 fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1677                 exit(1);
1678         }
1679         pid = fork();
1680         switch (pid) {
1681         case 0:
1682                 break;
1683         case -1:
1684                 exit(2);
1685         default:
1686                 exit(0);
1687         }
1688
1689         mpsslog("MIC Daemon start\n");
1690
1691         cnt = init_mic_list();
1692         if (cnt == 0) {
1693                 mpsslog("MIC module not loaded\n");
1694                 exit(3);
1695         }
1696         mpsslog("MIC found %d devices\n", cnt);
1697
1698         start_daemon();
1699
1700         exit(0);
1701 }