library(languageR) # Kern-Dichteschätzer mieten <- read.table('nettomieten.txt') x <- mieten[,1] myKernel <- function(x,d,sd=10) { w <- 0 for (z in d) { w <- w + dnorm(x,mean=z,sd=sd) } return(w/length(d)) return(k) } xx <- seq(0,600,1) # 1:600 plot(xx,myKernel(xx,data=x,sd=30),type='l') plot(density(x,kernel='rectangular'),adjust=5) lines(density(x),col='red') hist(ratings$Frequency,probability=T) lines(density(ratings$Frequency)) hist(log(table(alice)),probability=T) lines(density(log(table(alice)))) lines(density(log(table(alice)),bw=.1),col="red") lines(density(log(table(alice)),bw=.2),col="green") lines(density(log(table(alice)),bw=.3),col="blue") lines(density(log(table(alice)),bw=.4),col="brown") lines(density(log(table(alice)),bw=.5),col="orange") library(MASS) truehist(exp(lexdec$RT),probability=T) lines(density(exp(lexdec$RT),bw=50)) truehist(lexdec$RT,probability=T) lines(density(lexdec$RT,bw=50)) rt.male <- exp(lexdec$RT)[lexdec$Sex=='M'] rt.female <- exp(lexdec$RT)[lexdec$Sex=='F'] boxplot(rt.male,rt.female,xlab=c('M','F')) par(mfrow=c(1,2)) hist(rt.male) hist(rt.female) par(mfrow=c(1,1)) ######## #Varianz und Standardabweichung x <- lexdec$RT x**2 # alle Werte zum Quadrat erhoben mean(x**2) # E(x**2) mean(x)**2 # E(x)**2 mean(x**2)-mean(x)**2 # Definition der Varianz # Alternative Herangehensweise x.centered <- x-mean(x) h.x <- hist(x) h.xc <- hist(x.centered) plot(h.x,xlim=c(min(x.centered),max(x))) plot(h.xc,add=T,col='blue') mean(x.centered) # sollte theoretisch 0 sein; kleiner Rundungsfehler hist.x <- histrogram mean(x.centered**2) # Varianz sd(x) sqrt(var(x)) sqrt(mean(x.centered**2)) # Alternative Herangehensweise x.centered.and.scaled <- x.centered/sd(x.centered) h.xcs <- hist(x.centered.and.scaled) plot(h.xc,xlim=range(x.centered.and.scaled),col='red') plot(h.xcs,add=T) mean(x.centered.and.scaled) # theoretically 0 var(x.centered.and.scaled) # 1 # Normalverteilung xx <- seq(-5,5,.005) # Dichtekurve plot(xx,dnorm(x),type="l") lines(xx,dnorm(xx,mean=-1),col="red") lines(xx,dnorm(xx,sd=2),col="blue") # Zufallsvariablen r <- rnorm(100) hist(r,freq=F) lines(density(r)) lines(xx,dnorm(xx),col="red") # Zentraler Grenzwertsatz r <- c() for (i in 1:100) { r <- c(r,sum(rbinom(1000,1,.5))) } hist(r,freq=F) lines(density(r)) xx <- seq(min(r),max(r),.01) lines(xx,dnorm(xx,mean=mean(r),sd=sd(r)),col="red") r.sc <- (r-mean(r))/sd(r) qqnorm(r.sc) abline(0,1) shapiro.test(r)