diff --git a/include/linux/feather_buffer.h b/include/linux/feather_buffer.h new file mode 100644 index 0000000..ca49bf2 --- /dev/null +++ b/include/linux/feather_buffer.h @@ -0,0 +1,110 @@ +#ifndef _FEATHER_BUFFER_H_ +#define _FEATHER_BUFFER_H_ + +/* requires UINT_MAX and memcpy */ + +static inline int fetch_and_inc(int *val) +{ + int ret = 1; + __asm__ __volatile__("lock; xaddl %0, %1" : "+r" (ret), "+m" (*val) : : "memory" ); + return ret; +} + +static inline int fetch_and_dec(int *val) +{ + int ret = -1; + __asm__ __volatile__("lock; xaddl %0, %1" : "+r" (ret), "+m" (*val) : : "memory" ); + return ret; +} + +#define SLOT_FREE 0 +#define SLOT_BUSY 1 +#define SLOT_READY 2 + +struct ft_buffer { + unsigned int slot_count; + unsigned int slot_size; + + int free_count; + unsigned int write_idx; + unsigned int read_idx; + + char* slots; + void* buffer_mem; + unsigned int failed_writes; +}; + +static inline int init_ft_buffer(struct ft_buffer* buf, + unsigned int slot_count, + unsigned int slot_size, + char* slots, + void* buffer_mem) +{ + int i = 0; + if (!slot_count || UINT_MAX % slot_count != slot_count - 1) { + /* The slot count must divide UNIT_MAX + 1 so that when it + * wraps around the index correctly points to 0. + */ + return 0; + } else { + buf->slot_count = slot_count; + buf->slot_size = slot_size; + buf->slots = slots; + buf->buffer_mem = buffer_mem; + buf->free_count = slot_count; + buf->write_idx = 0; + buf->read_idx = 0; + buf->failed_writes = 0; + for (i = 0; i < slot_count; i++) + buf->slots[i] = SLOT_FREE; + return 1; + } +} + +static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr) +{ + int free = fetch_and_dec(&buf->free_count); + unsigned int idx; + if (free <= 0) { + fetch_and_inc(&buf->free_count); + *ptr = 0; + fetch_and_inc(&buf->failed_writes); + return 0; + } else { + idx = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count; + buf->slots[idx] = SLOT_BUSY; + *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size; + return 1; + } +} + +static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr) +{ + unsigned int idx; + + idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size; + buf->slots[idx] = SLOT_READY; +} + + +/* exclusive reader access is assumed */ +static inline int ft_buffer_read(struct ft_buffer* buf, void* dest) +{ + unsigned int idx; + if (buf->free_count == buf->slot_count) + /* nothing available */ + return 0; + idx = buf->read_idx % buf->slot_count; + if (buf->slots[idx] == SLOT_READY) { + memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size, + buf->slot_size); + buf->slots[idx] = SLOT_FREE; + buf->read_idx++; + fetch_and_inc(&buf->free_count); + return 1; + } else + return 0; +} + + +#endif diff --git a/include/linux/feather_trace.h b/include/linux/feather_trace.h new file mode 100644 index 0000000..1b576a8 --- /dev/null +++ b/include/linux/feather_trace.h @@ -0,0 +1,106 @@ +#ifndef _FEATHER_TRACE_H_ +#define _FEATHER_TRACE_H_ + +#define feather_callback __attribute__((regparm(0))) + +/* make the compiler reload any register that is not saved in + * a cdecl function call + */ +#define __FT_CLOBBER "memory", "cc", "eax", "ecx", "edx" +#define __FT_PAR "ri" + +#define ft_event(id, callback) \ + __asm__ __volatile__( \ + "1: jmp 2f \n\t" \ + " call " #callback " \n\t" \ + ".section __event_table, \"aw\" \n\t" \ + ".long " #id ", 0, 1b, 2f \n\t" \ + ".previous \n\t" \ + "2: \n\t" \ + : : : __FT_CLOBBER) + +#define ft_event0(id, callback) \ + __asm__ __volatile__( \ + "1: jmp 2f \n\t" \ + " pushl $" #id " \n\t" \ + " call " #callback " \n\t" \ + " addl $4, %%esp \n\t" \ + ".section __event_table, \"aw\" \n\t" \ + ".long " #id ", 0, 1b, 2f \n\t" \ + ".previous \n\t" \ + "2: \n\t" \ + : : : __FT_CLOBBER) + +#define ft_event1(id, callback, param) \ + __asm__ __volatile__( \ + "1: jmp 2f \n\t" \ + " pushl %0 \n\t" \ + " pushl $" #id " \n\t" \ + " call " #callback " \n\t" \ + " addl $8, %%esp \n\t" \ + ".section __event_table, \"aw\" \n\t" \ + ".long " #id ", 0, 1b, 2f \n\t" \ + ".previous \n\t" \ + "2: \n\t" \ + : : __FT_PAR (param) : __FT_CLOBBER) + +#define ft_event2(id, callback, param, param2) \ + __asm__ __volatile__( \ + "1: jmp 2f \n\t" \ + " pushl %1 \n\t" \ + " pushl %0 \n\t" \ + " pushl $" #id " \n\t" \ + " call " #callback " \n\t" \ + " addl $12, %%esp \n\t" \ + ".section __event_table, \"aw\" \n\t" \ + ".long " #id ", 0, 1b, 2f \n\t" \ + ".previous \n\t" \ + "2: \n\t" \ + : : __FT_PAR (param), __FT_PAR (param2) : __FT_CLOBBER) + + +#define ft_event3(id, callback, p, p2, p3) \ + __asm__ __volatile__( \ + "1: jmp 2f \n\t" \ + " pushl %2 \n\t" \ + " pushl %1 \n\t" \ + " pushl %0 \n\t" \ + " pushl $" #id " \n\t" \ + " call " #callback " \n\t" \ + " addl $16, %%esp \n\t" \ + ".section __event_table, \"aw\" \n\t" \ + ".long " #id ", 0, 1b, 2f \n\t" \ + ".previous \n\t" \ + "2: \n\t" \ + : : __FT_PAR (p), __FT_PAR (p2), __FT_PAR (p3) : __FT_CLOBBER) + +#define ft_event4(id, callback, p, p2, p3, p4) \ + __asm__ __volatile__( \ + "1: jmp 2f \n\t" \ + " pushl %3 \n\t" \ + " pushl %2 \n\t" \ + " pushl %1 \n\t" \ + " pushl %0 \n\t" \ + " pushl $" #id " \n\t" \ + " call " #callback " \n\t" \ + " addl $20, %%esp \n\t" \ + ".section __event_table, \"aw\" \n\t" \ + ".long " #id ", 0, 1b, 2f \n\t" \ + ".previous \n\t" \ + "2: \n\t" \ + : : __FT_PAR (p), __FT_PAR (p2), __FT_PAR (p3), \ + __FT_PAR (p4) : __FT_CLOBBER) + + +static inline unsigned long long ft_read_tsc(void) +{ + unsigned long long ret; + __asm__ __volatile__("rdtsc" : "=A" (ret)); + return ret; +} + +int ft_enable_event(unsigned long id); +int ft_disable_event(unsigned long id); +int ft_is_event_enabled(unsigned long id); +int ft_disable_all_events(void); +#endif diff --git a/include/linux/trace.h b/include/linux/trace.h new file mode 100644 index 0000000..a1570a6 --- /dev/null +++ b/include/linux/trace.h @@ -0,0 +1,25 @@ + +#ifndef _SYS_TRACE_H_ +#define _SYS_TRACE_H_ + +#include +#include + +/*********************** EVENTS ************************/ + +struct ft_event_data { + unsigned long event; + unsigned long long timestamp; + + /* add your own event data */ +}; + + +/* buffer holding time stamps - will be provided by driver */ +extern struct ft_buffer* trace_ts_buf; + + +#define FT_TRACE(l) ft_event0(1, save_timestamp) + + +#endif diff --git a/kernel/Makefile b/kernel/Makefile index 14f4d45..438547a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -8,7 +8,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ - hrtimer.o rwsem.o latency.o nsproxy.o srcu.o + hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \ + trace.o ft_event.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ diff --git a/kernel/ft_event.c b/kernel/ft_event.c new file mode 100644 index 0000000..7f0ba9a --- /dev/null +++ b/kernel/ft_event.c @@ -0,0 +1,105 @@ +#include + +#include + +/* the feather trace management functions assume + * exclusive access to the event table + */ + + +#define BYTE_JUMP 0xeb +#define BYTE_JUMP_LEN 0x02 + +/* for each event, there is an entry in the event table */ +struct trace_event { + long id; + long count; + long start_addr; + long end_addr; +}; + +extern struct trace_event __start___event_table[]; +extern struct trace_event __stop___event_table[]; + +int ft_enable_event(unsigned long id) +{ + struct trace_event* te = __start___event_table; + int count = 0; + char* delta; + unsigned char* instr; + + while (te < __stop___event_table) { + if (te->id == id && ++te->count == 1) { + instr = (unsigned char*) te->start_addr; + /* make sure we don't clobber something wrong */ + if (*instr == BYTE_JUMP) { + delta = (((unsigned char*) te->start_addr) + 1); + *delta = 0; + } + } + if (te->id == id) + count++; + te++; + } + return count; +} + +int ft_disable_all_events(void) +{ + struct trace_event* te = __start___event_table; + int count = 0; + char* delta; + unsigned char* instr; + + while (te < __stop___event_table) { + if (te->count) { + instr = (unsigned char*) te->start_addr; + if (*instr == BYTE_JUMP) { + delta = (((unsigned char*) te->start_addr) + + 1); + *delta = te->end_addr - te->start_addr - + BYTE_JUMP_LEN; + te->count = 0; + count++; + } + } + te++; + } + return count; +} + + +int ft_disable_event(unsigned long id) +{ + struct trace_event* te = __start___event_table; + int count = 0; + char* delta; + unsigned char* instr; + + while (te < __stop___event_table) { + if (te->id == id && --te->count == 0) { + instr = (unsigned char*) te->start_addr; + if (*instr == BYTE_JUMP) { + delta = (((unsigned char*) te->start_addr) + 1); + *delta = te->end_addr - te->start_addr - + BYTE_JUMP_LEN; + } + } + if (te->id == id) + count++; + te++; + } + return count; +} + +int ft_is_event_enabled(unsigned long id) +{ + struct trace_event* te = __start___event_table; + + while (te < __stop___event_table) { + if (te->id == id) + return te->count; + te++; + } + return 0; +} diff --git a/kernel/trace.c b/kernel/trace.c new file mode 100644 index 0000000..5f83abf --- /dev/null +++ b/kernel/trace.c @@ -0,0 +1,254 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +/******************************************************************************/ +/* Allocation */ +/******************************************************************************/ + +struct ft_buffer* trace_ts_buf = NULL; + +feather_callback void save_timestamp(unsigned long event) +{ + struct ft_event_data *ts; + if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { + ts->event = event; + ts->timestamp = ft_read_tsc(); + ft_buffer_finish_write(trace_ts_buf, ts); + } +} + +static struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size) +{ + struct ft_buffer* buf; + size_t total = (size + 1) * count; + char* mem; + int order = 0, pages = 1; + + buf = kmalloc(sizeof(struct ft_buffer), GFP_KERNEL); + if (!buf) + return NULL; + + total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0); + while (pages < total) { + order++; + pages *= 2; + } + + mem = (char*) __get_free_pages(GFP_KERNEL, order); + if (!mem) { + kfree(buf); + return NULL; + } + + if (!init_ft_buffer(buf, count, size, + mem + (count * size), /* markers at the end */ + mem)) { /* buffer objects */ + free_pages((unsigned long) mem, order); + kfree(buf); + return NULL; + } + return buf; +} + +static void free_ft_buffer(struct ft_buffer* buf) +{ + int order = 0, pages = 1; + size_t total; + + if (buf) { + total = (buf->slot_size + 1) * buf->slot_count; + total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0); + while (pages < total) { + order++; + pages *= 2; + } + free_pages((unsigned long) buf->buffer_mem, order); + kfree(buf); + } +} + + +/******************************************************************************/ +/* DEVICE FILE DRIVER */ +/******************************************************************************/ + +#define NO_TIMESTAMPS 262144 + +static int trace_release(struct inode *in, struct file *filp) +{ + int error = -EINVAL; + + /* disable events */ + ft_disable_all_events(); + set_current_state(TASK_UNINTERRUPTIBLE); + /* wait for any pending events to complete */ + schedule_timeout(HZ); + printk(KERN_ALERT "Failed trace writes: %u\n", + trace_ts_buf->failed_writes); + free_ft_buffer(trace_ts_buf); + trace_ts_buf = NULL; + return error; +} + +static ssize_t trace_read(struct file *filp, char __user *to, size_t len, + loff_t *f_pos) +{ + /* we ignore f_pos, this is strictly sequential */ + ssize_t error = 0; + struct ft_event_data ts; + + while (len >= sizeof(struct ft_event_data)) { + if (ft_buffer_read(trace_ts_buf, &ts)) { + if (copy_to_user(to, &ts, sizeof(struct ft_event_data))) { + error = -EFAULT; + break; + } else { + len -= sizeof(struct ft_event_data); + to += sizeof(struct ft_event_data); + error += sizeof(struct ft_event_data); + } + } else { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(50); + if (signal_pending(current)) { + error = -ERESTARTSYS; + break; + } + } + } + return error; +} + +#define ENABLE_CMD 0 +#define DISABLE_CMD 1 + +static ssize_t trace_write(struct file *filp, const char __user *from, + size_t len, loff_t *f_pos) +{ + ssize_t error = -EINVAL; + unsigned long cmd; + unsigned long id; + + if (len % sizeof(long) || len < 2 * sizeof(long)) + goto out; + + if (copy_from_user(&cmd, from, sizeof(long))) { + error = -EFAULT; + goto out; + } + len -= sizeof(long); + from += sizeof(long); + + if (cmd != ENABLE_CMD && cmd != DISABLE_CMD) + goto out; + + error = sizeof(long); + while (len) { + if (copy_from_user(&id, from, sizeof(long))) { + error = -EFAULT; + goto out; + } + len -= sizeof(long); + from += sizeof(long); + if (cmd) { + printk(KERN_INFO + "Disabling feather-trace event %lu.\n", id); + ft_disable_event(id); + } else { + printk(KERN_INFO + "Enabling feather-trace event %lu.\n", id); + ft_enable_event(id); + } + error += sizeof(long); + } + + + out: + return error; +} + +static int trace_open(struct inode *in, struct file *filp) +{ + int err = 0; + unsigned int count = NO_TIMESTAMPS; + while (count && !trace_ts_buf) { + printk("trace: trying to allocate %u time stamps.\n", count); + trace_ts_buf = alloc_ft_buffer(count, sizeof(struct ft_event_data)); + count /= 2; + } + if (!trace_ts_buf) + err = -ENOMEM; + + return err; +} + +/******************************************************************************/ +/* Device Registration */ +/******************************************************************************/ + +#define FT_TRACE_MAJOR 252 + +struct file_operations ft_trace_fops = { + .owner = THIS_MODULE, + .open = trace_open, + .release = trace_release, + .write = trace_write, + .read = trace_read, +}; + + +static int __init register_buffer_dev(const char* name, + struct file_operations* fops, + int major, int count) +{ + dev_t trace_dev; + struct cdev *cdev; + int error = 0; + + trace_dev = MKDEV(major, 0); + error = register_chrdev_region(trace_dev, count, name); + if (error) + { + printk(KERN_WARNING "trace: " + "Could not register major/minor number %d\n", major); + return error; + } + cdev = cdev_alloc(); + if (!cdev) { + printk(KERN_WARNING "trace: " + "Could not get a cdev for %s.\n", name); + return -ENOMEM; + } + cdev->owner = THIS_MODULE; + cdev->ops = fops; + error = cdev_add(cdev, trace_dev, count); + if (error) { + printk(KERN_WARNING "trace: " + "add_cdev failed for %s.\n", name); + return -ENOMEM; + } + return error; + +} + +static int __init init_feather_trace(void) +{ + int error = 0; + + printk("Initializing Feather-Trace device\n"); + /* dummy entry to make linker happy */ + ft_event0(666, save_timestamp); + + error = register_buffer_dev("ft_trace", &ft_trace_fops, + FT_TRACE_MAJOR, 1); + return error; +} + +module_init(init_feather_trace);