0

私はmatplotlibと、積み上げ棒グラフをグラフ化するために誰かが書いたstackedbarchartプログラムを使用しています。

私のグラフ:

x 軸には、バーごとに 1 つずつ、計 8 つの収入分布があります。

y 軸は、各所得分布における人の割合です。person type-a は最初のスタック、person type-b は 2 番目のスタック、person type-c は 3 番目のスタックです。

私の棒グラフは中央揃えで、棒の間隔を空けてグラフの見栄えを良くし、ラベルを読みやすくする方法を見つけようとしています。提案や説明はありますか?

プログラムはstackedBarGraph.pyで、コードは次のようになります。ここで、幅は棒グラフの幅に対応する8つの値の配列です。

さらに情報が必要な場合はお知らせください(すべて関連性を保つように努めました)。ありがとう!

完全なコード (読むのが難しくないことを願っています):

   from __future__ import division
from pylab import * 
import seaborn as sns
import pandas as pd
import numpy as np
from stackedbars import StackedBarGrapher

data = csv2rec('coa.csv', delimiter=',')

x = data['totalgrantaid']
y = data['studenteffort']
z = data['parentcontim']
g = data['parentincomeim']

df = pd.DataFrame(dict(grant = x, stud = y, par = z, income = g))

#organize the data to graph
income_brackets = [(0, 25000), (25000, 50000), (50000, 75000), (75000, 100000), (100000, 150000), (150000,200000), (200000,250000), (250000,300000)]
source = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
    for key in source:
        source[key].append(median(df.query('income > {} and income < {}'.format(lower, upper))[key]))

#set the widths
source2 = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
    for key in source2:
        source2[key].append(pd.DataFrame(df.query('income > {} and income < {}'.format(lower,upper))).count()[key])

total = pd.DataFrame(df.query('income > 0 and income < 300000')['grant']).count()
total = total/10

#graph specifications
d_widths = [(source2['grant'][i]/total)[0] for i in range(8)]
d_colors = ['r','g','b']
d_labels = ('<25000', '25000-\n50000', '50000-\n75000', '75000-\n100000', '100000-\n150000', '150000-\n200000', '200000-\n250000', '250000-\n300000')
d = np.array([[source[k][i] for k in ('grant', 'stud', 'par')] for i in range(8)])

#the graph
fig = plt.figure()
ax1 = fig.add_subplot(111)
mygraph = StackedBarGrapher()
mygraph.stackedBarPlot(ax1,d,d_colors, edgeCols=['#000000']*3,widths = d_widths,  showFirst = 8, xLabels=d_labels,scale=True)

積み上げ棒グラフ プログラム:

    def stackedBarPlot(self,
                       ax,                                 # axes to plot onto
                       data,                               # data to plot
                       cols,                               # colors for each level
                       xLabels = None,                     # bar specific labels
                       yTicks = 6.,                        # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
                       edgeCols=None,                      # colors for edges
                       showFirst=-1,                       # only plot the first <showFirst> bars
                       scale=False,                        # scale bars to same height
                       widths=None,                        # set widths for each bar
                       heights=None,                       # set heights for each bar
                       ylabel='',                          # label for x axis
                       xlabel=''                          # label for y axis
                       ):

#------------------------------------------------------------------------------
# data fixeratering

        # make sure this makes sense
        if showFirst != -1:
            showFirst = np.min([showFirst, np.shape(data)[0]])
            data_copy = np.copy(data[:showFirst]).transpose().astype('float')
            data_shape = np.shape(data_copy)
            if heights is not None:
                heights = heights[:showFirst]
            if widths is not None:
                widths = widths[:showFirst]
            showFirst = -1
        else:
            data_copy = np.copy(data).transpose()
        data_shape = np.shape(data_copy)

        # determine the number of bars and corresponding levels from the shape of the data
        num_bars = data_shape[1]
        levels = data_shape[0]

        if widths is None:
            widths = np.array([1] * num_bars)
            x = np.arange(num_bars)
        else:
            x = [0]
            for i in range(1, len(widths)):
                x.append(x[i-1] + (widths[i-1] + widths[i])/2)


        # stack the data --
        # replace the value in each level by the cumulative sum of all preceding levels
        data_stack = np.reshape([float(i) for i in np.ravel(np.cumsum(data_copy, axis=0))], data_shape)

        # scale the data is needed
        if scale:
            data_copy /= data_stack[levels-1]
            data_stack /= data_stack[levels-1]
            if heights is not None:
                print "WARNING: setting scale and heights does not make sense."
                heights = None
        elif heights is not None:
            data_copy /= data_stack[levels-1]
            data_stack /= data_stack[levels-1]
            for i in np.arange(num_bars):
                data_copy[:,i] *= heights[i]
                data_stack[:,i] *= heights[i]

#------------------------------------------------------------------------------
# ticks

        if yTicks is not "none":
            # it is either a set of ticks or the number of auto ticks to make
            real_ticks = True
            try:
                k = len(yTicks[1])
            except:
                real_ticks = False

            if not real_ticks:
                yTicks = float(yTicks)
                if scale:
                    # make the ticks line up to 100 %
                    y_ticks_at = np.arange(yTicks)/(yTicks-1)
                    y_tick_labels = np.array(["%0.0f"%(i * 100) for i in y_ticks_at])
                else:
                    # space the ticks along the y axis
                    y_ticks_at = np.arange(yTicks)/(yTicks-1)*np.max(data_stack)
                    y_tick_labels = np.array([str(i) for i in y_ticks_at])
                yTicks=(y_ticks_at, y_tick_labels)

#------------------------------------------------------------------------------
# plot

        if edgeCols is None:
            edgeCols = ["none"]*len(cols)

        # bars
        ax.bar(x,
               data_stack[0],
               color=cols[0],alpha=0.7,
               edgecolor=edgeCols[0],
               width=widths,
               linewidth=0.5,
               align='center'
               )
        for i in np.arange(1,levels):
            ax.bar(x,
                   data_copy[i],
                   bottom=data_stack[i-1],
                   color=cols[i],alpha=0.7,
                   edgecolor=edgeCols[i],
                   width=widths,
                   linewidth=0.5,
                   align='center'
                   )

        # borders
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.spines["bottom"].set_visible(False)
        ax.spines["left"].set_visible(False)

        # make ticks if necessary
        if yTicks is not "none":
            ax.tick_params(axis='y', which='both', labelsize=8, direction="out")
            ax.yaxis.tick_left()
            plt.yticks(yTicks[0], yTicks[1])
        else:
            plt.yticks([], [])

        if xLabels is not None:
            ax.tick_params(axis='x', which='both', labelsize=8, direction="out")
            ax.xaxis.tick_bottom()
            plt.xticks(x, xLabels, rotation='horizontal')
        else:
            plt.xticks([], [])

        # limits
        ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
        ax.set_ylim(0, np.max(data_stack))

        # labels
        if xlabel != '':
            ax.xlabel(xlabel)
        if ylabel != '':
            ax.ylabel(ylabel)

これまでの様子

4

1 に答える 1

0

では、ご意見をお寄せいただきありがとうございます (また、リスト内包表記を効果的に使用する方法を教えてくれた Bill にも感謝します)。

プログラムを変更して、私が望んでいたことを達成することができました(と思います)。プログラムの以下の部分に axspacing という新しい変数を追加しました。

def stackedBarPlot(self,
                   ax,                                 # axes to plot onto
                   data,                               # data to plot
                   cols,                               # colors for each level
                   xLabels = None,                     # bar specific labels
                   yTicks = 6.,                        # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
                   edgeCols=None,                      # colors for edges
                   showFirst=-1,                       # only plot the first <showFirst> bars
                   scale=False,                        # scale bars to same height
                   widths=None,                        # set widths for each bar
                   heights=None,                       # set heights for each bar
                   ylabel='',                          # label for x axis
                   xlabel='',                          # label for y axis
                   xaxlim=None,
                   axspacing=0,
                   ):

.

    if widths is None:
        widths = np.array([1] * num_bars)
        x = np.arange(num_bars)
    else:
        x = [0]
        for i in range(1, len(widths)):
            x.append(x[i-1] + (widths[i-1] + widths[i])/2 + axspacing)

.

    # limits
    #ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
    ax.set_ylim(0, np.max(data_stack))
    if xaxlim is None:
        ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5 + num_bars * axspacing)
    else:
        ax.set_xlim(xaxlim)
于 2014-04-29T14:18:58.447 に答える