@phdthesis{13190, author = {H{\r a}kon Stensland}, title = {Processing Multimedia Workloads on Heterogeneous Multicore Architectures}, abstract = {Processor architectures have been evolving quickly since the introduction of the central processing unit. For a very long time, one of the important means of increasing performance was to increase the clock frequency. However, in the last decade, processor manufacturers have hit the so-called power wall, with high heat dissipation. To overcome this problem, processors were designed with reduced clock frequencies but with multiple cores and, later, heterogeneous processing elements. This shift introduced a new challenge for programmers: Legacy applications, written without parallelization in mind, gain no benefits from moving to multicore and heterogeneous architectures. Another challenge for the programmers is that heterogeneous architecture designs are very different with respect to caches, memory types, execution unit organization, and so forth and, in the worst case, a programmer must completely rewrite the application to obtain the best performance on the new architecture.Multimedia workloads, such as video encoding, are often time sensitive and interactive. These workloads differ from traditional batch processing workloads with no real-time requirements. This work investigates how to use modern heterogeneous architectures efficiently to process multimedia workloads. To do so, we investigate both simple and complex workloads on multiple architectures to learn about the properties of these architectures. When programing multimedia workloads, it is very important to know how the algorithms perform on the target architecture. In addition, achieving high performance on heterogeneous architectures is not a trivial task, often requiring detailed knowledge about the architecture. We therefore evaluate several optimizations so we can learn how best to write programs for these architectures and avoid potential pitfalls. We later use the knowledge gained to propose a framework design and language called Parallel Processing Graph (P2G). The P2G framework is designed for multimedia workloads and supports heterogeneous architectures. To demonstrate the feasibility of the framework, we construct a proof-of-concept implementation. Two simple workloads show that we can express multimedia workloads in the system. We also demonstrate the scalability of the designed solution.}, year = {2015}, journal = {University of Oslo}, volume = {PhD}, month = {02/2015}, publisher = {UiO}, url = {https://www.duo.uio.no/handle/10852/50618}, }