25 #include <linux/module.h>
29 #include <linux/slab.h>
33 #define RELEASE_ALL 9999
37 static int num_spu_nodes;
42 static unsigned max_spu_buff;
44 static void spu_buff_add(
unsigned long int value,
int spu)
73 if (
spu_buff[spu].head >= max_spu_buff)
96 for (spu = 0; spu < num_spu_nodes; spu++) {
110 spin_unlock_irqrestore(&buffer_lock, flags);
117 curr_head, max_spu_buff);
121 spin_unlock_irqrestore(&buffer_lock, flags);
145 static void destroy_cached_info(
struct kref *
kref)
164 if (spu_num >= num_spu_nodes) {
166 "%s, line %d: Invalid index %d into spu info cache\n",
167 __func__, __LINE__, spu_num);
171 if (!spu_info[spu_num] && the_spu) {
179 ret_info = spu_info[spu_num];
190 prepare_cached_spu_info(
struct spu *spu,
unsigned long objectId)
200 info = get_cached_info(spu, spu->number);
203 pr_debug(
"Found cached SPU info.\n");
213 "%s, line %d: create vma_map failed\n",
221 "%s, line %d: create vma_map failed\n",
232 spu_info[spu->number] =
info;
244 destroy_cached_info);
245 spin_unlock_irqrestore(&cache_lock, flags);
258 static int release_cached_info(
int spu_index)
266 if (spu_index >= num_spu_nodes) {
269 "Invalid index %d into spu info cache\n",
270 __func__, __LINE__, spu_index);
276 for (; index <
end; index++) {
277 if (spu_info[index]) {
279 destroy_cached_info);
297 static inline unsigned long fast_get_dcookie(
struct path *
path)
302 return (
unsigned long)path->
dentry;
318 get_exec_dcookie_and_offset(
struct spu *spu,
unsigned int *offsetp,
319 unsigned long *spu_bin_dcookie,
320 unsigned long spu_ref)
322 unsigned long app_cookie = 0;
323 unsigned int my_offset = 0;
333 app_cookie = fast_get_dcookie(&mm->
exe_file->f_path);
335 mm->
exe_file->f_dentry->d_name.name);
341 my_offset = spu_ref - vma->
vm_start;
343 goto fail_no_image_cookie;
345 pr_debug(
"Found spu ELF at %X(object-id:%lx) for file %s\n",
347 vma->
vm_file->f_dentry->d_name.name);
348 *offsetp = my_offset;
352 *spu_bin_dcookie = fast_get_dcookie(&vma->
vm_file->f_path);
360 fail_no_image_cookie:
364 "%s, line %d: Cannot find dcookie for SPU binary\n",
375 static int process_context_switch(
struct spu *spu,
unsigned long objectId)
380 unsigned long spu_cookie = 0, app_dcookie;
382 retval = prepare_cached_spu_info(spu, objectId);
389 app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId);
390 if (!app_dcookie || !spu_cookie) {
399 spu_buff_add(spu->number, spu->number);
400 spu_buff_add(spu->pid, spu->number);
401 spu_buff_add(spu->tgid, spu->number);
402 spu_buff_add(app_dcookie, spu->number);
403 spu_buff_add(spu_cookie, spu->number);
404 spu_buff_add(offset, spu->number);
410 spu_buff[spu->number].ctx_sw_seen = 1;
412 spin_unlock_irqrestore(&buffer_lock, flags);
430 struct spu *the_spu =
data;
432 pr_debug(
"SPU event notification arrived\n");
435 retval = release_cached_info(the_spu->number);
436 spin_unlock_irqrestore(&cache_lock, flags);
438 retval = process_context_switch(the_spu, val);
444 .notifier_call = spu_active_notify,
447 static int number_of_online_nodes(
void)
459 static int oprofile_spu_buff_create(
void)
465 for (spu = 0; spu < num_spu_nodes; spu++) {
478 spu_buff[spu].buff = kzalloc((max_spu_buff
479 *
sizeof(
unsigned long)),
484 "%s, line %d: oprofile_spu_buff_create "
485 "failed to allocate spu buffer %d.\n",
486 __func__, __LINE__, spu);
515 unsigned long flags = 0;
524 ret = oprofile_spu_buff_create();
529 for (spu = 0; spu < num_spu_nodes; spu++) {
532 spu_buff_add(num_spu_nodes, spu);
534 spin_unlock_irqrestore(&buffer_lock, flags);
536 for (spu = 0; spu < num_spu_nodes; spu++) {
548 pr_debug(
"spu_sync_start -- running.\n");
562 unsigned long long spu_num_ll = spu_num;
563 unsigned long long spu_num_shifted = spu_num_ll << 32;
572 c_info = get_cached_info(
NULL, spu_num);
578 pr_debug(
"SPU_PROF: No cached SPU contex "
579 "for SPU #%d. Dropping samples.\n", spu_num);
585 spin_lock(&buffer_lock);
586 for (i = 0; i < num_samples; i++) {
587 unsigned int sample = *(samples+
i);
599 if (grd_val && grd_val !=
spu_buff[spu_num].last_guard_val) {
600 spu_buff[spu_num].last_guard_val = grd_val;
611 spu_buff_add((file_offset | spu_num_shifted),
614 spin_unlock(&buffer_lock);
616 spin_unlock_irqrestore(&cache_lock, flags);
622 unsigned long flags = 0;
630 "%s, line %d: spu_switch_event_unregister " \
632 __func__, __LINE__, ret);
639 spin_unlock_irqrestore(&cache_lock, flags);
647 for (k = 0; k < num_spu_nodes; k++) {
657 pr_debug(
"spu_sync_stop -- done.\n");